microsoft-cognitiveservices-speech-sdk

Version:

Microsoft Cognitive Services Speech SDK for JavaScript

docs.microsoft.com/azure/cognitive-services/speech-service/

Microsoft/cognitive-services-speech-sdk-js

1 lines • 10.2 kB

Source Map (JSON)

{"version":3,"sources":["src/common.speech/SpeakerServiceRecognizer.ts"],"names":[],"mappings":"AAIA,OAAO,EAEH,YAAY,EAIf,MAAM,sBAAsB,CAAC;AAE9B,OAAO,EAAE,uBAAuB,EAAE,MAAM,mCAAmC,CAAC;AAC5E,OAAO,EACH,qBAAqB,EACrB,kBAAkB,EAClB,wBAAwB,EACxB,iBAAiB,EAKpB,MAAM,mBAAmB,CAAC;AAC3B,OAAO,EAIH,qBAAqB,EACxB,MAAM,cAAc,CAAC;AACtB,OAAO,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAC;AACvD,OAAO,EAAE,kBAAkB,EAAE,MAAM,yBAAyB,CAAC;AAC7D,OAAO,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AACzD,OAAO,EAAE,uBAAuB,EAAE,MAAM,uCAAuC,CAAC;AAYhF,qBAAa,wBAAyB,SAAQ,qBAAqB;IAC/D,OAAO,CAAC,qBAAqB,CAAoB;IACjD,OAAO,CAAC,sBAAsB,CAAe;IAC7C,OAAO,CAAC,kBAAkB,CAAqC;IAC/D,OAAO,CAAC,gBAAgB,CAA0B;gBAG9C,cAAc,EAAE,eAAe,EAC/B,iBAAiB,EAAE,kBAAkB,EACrC,WAAW,EAAE,YAAY,EACzB,gBAAgB,EAAE,gBAAgB,EAClC,UAAU,EAAE,iBAAiB;IAQjC,SAAS,CAAC,2BAA2B,CAAC,iBAAiB,EAAE,uBAAuB,GAAG,OAAO,CAAC,OAAO,CAAC;IAwCnG,SAAS,CAAC,iBAAiB,CACvB,SAAS,EAAE,MAAM,EACjB,SAAS,EAAE,MAAM,EACjB,kBAAkB,EAAE,kBAAkB,EACtC,SAAS,EAAE,qBAAqB,EAChC,KAAK,EAAE,MAAM,GAAG,IAAI;IAuBX,oBAAoB,CAAC,KAAK,EAAE,uBAAuB,GAAG,OAAO,CAAC,wBAAwB,CAAC;YAiDtF,oBAAoB;YAMpB,sBAAsB;IAUpC,OAAO,CAAC,qBAAqB;CAUhC","file":"SpeakerServiceRecognizer.d.ts","sourcesContent":["// Copyright (c) Microsoft Corporation. All rights reserved.\r\n// Licensed under the MIT license.\r\n\r\nimport { ReplayableAudioNode } from \"../common.browser/Exports.js\";\r\nimport {\r\n Deferred,\r\n IAudioSource,\r\n IAudioStreamNode,\r\n IConnection,\r\n MessageType,\r\n} from \"../common/Exports.js\";\r\nimport { AudioStreamFormatImpl } from \"../sdk/Audio/AudioStreamFormat.js\";\r\nimport { SpeakerRecognitionModel } from \"../sdk/SpeakerRecognitionModel.js\";\r\nimport {\r\n CancellationErrorCode,\r\n CancellationReason,\r\n SpeakerRecognitionResult,\r\n SpeakerRecognizer,\r\n PropertyCollection,\r\n PropertyId,\r\n ResultReason,\r\n SessionEventArgs,\r\n} from \"../sdk/Exports.js\";\r\nimport {\r\n CancellationErrorCodePropertyName,\r\n ISpeechConfigAudioDevice,\r\n SpeakerResponse,\r\n ServiceRecognizerBase,\r\n} from \"./Exports.js\";\r\nimport { IAuthentication } from \"./IAuthentication.js\";\r\nimport { IConnectionFactory } from \"./IConnectionFactory.js\";\r\nimport { RecognizerConfig } from \"./RecognizerConfig.js\";\r\nimport { SpeechConnectionMessage } from \"./SpeechConnectionMessage.Internal.js\";\r\n\r\ninterface SpeakerContext {\r\n scenario: string;\r\n profileIds: string[];\r\n features: {\r\n interimResult: string;\r\n progressiveDetection: string;\r\n };\r\n}\r\n\r\n// eslint-disable-next-line max-classes-per-file\r\nexport class SpeakerServiceRecognizer extends ServiceRecognizerBase {\r\n private privSpeakerRecognizer: SpeakerRecognizer;\r\n private privSpeakerAudioSource: IAudioSource;\r\n private privResultDeferral: Deferred<SpeakerRecognitionResult>;\r\n private privSpeakerModel: SpeakerRecognitionModel;\r\n\r\n public constructor(\r\n authentication: IAuthentication,\r\n connectionFactory: IConnectionFactory,\r\n audioSource: IAudioSource,\r\n recognizerConfig: RecognizerConfig,\r\n recognizer: SpeakerRecognizer) {\r\n super(authentication, connectionFactory, audioSource, recognizerConfig, recognizer);\r\n this.privSpeakerRecognizer = recognizer;\r\n this.privSpeakerAudioSource = audioSource;\r\n this.recognizeSpeaker = (model: SpeakerRecognitionModel): Promise<SpeakerRecognitionResult> => this.recognizeSpeakerOnce(model);\r\n this.sendPrePayloadJSONOverride = (): Promise<void> => this.noOp();\r\n }\r\n\r\n protected processTypeSpecificMessages(connectionMessage: SpeechConnectionMessage): Promise<boolean> {\r\n\r\n let processed: boolean = false;\r\n\r\n const resultProps: PropertyCollection = new PropertyCollection();\r\n if (connectionMessage.messageType === MessageType.Text) {\r\n resultProps.setProperty(PropertyId.SpeechServiceResponse_JsonResult, connectionMessage.textBody);\r\n }\r\n\r\n switch (connectionMessage.path.toLowerCase()) {\r\n case \"speaker.response\":\r\n const response: SpeakerResponse = JSON.parse(connectionMessage.textBody) as SpeakerResponse;\r\n let result: SpeakerRecognitionResult;\r\n if (response.status.statusCode.toLowerCase() !== \"success\") {\r\n result = new SpeakerRecognitionResult(\r\n response,\r\n ResultReason.Canceled,\r\n CancellationErrorCode.ServiceError,\r\n response.status.reason\r\n );\r\n } else {\r\n result = new SpeakerRecognitionResult(\r\n response,\r\n ResultReason.RecognizedSpeaker,\r\n );\r\n }\r\n if (!!this.privResultDeferral) {\r\n this.privResultDeferral.resolve(result);\r\n }\r\n processed = true;\r\n break;\r\n default:\r\n break;\r\n }\r\n const defferal = new Deferred<boolean>();\r\n defferal.resolve(processed);\r\n return defferal.promise;\r\n }\r\n\r\n // Cancels recognition.\r\n protected cancelRecognition(\r\n sessionId: string,\r\n requestId: string,\r\n cancellationReason: CancellationReason,\r\n errorCode: CancellationErrorCode,\r\n error: string): void {\r\n\r\n const properties: PropertyCollection = new PropertyCollection();\r\n properties.setProperty(CancellationErrorCodePropertyName, CancellationErrorCode[errorCode]);\r\n\r\n if (!!this.privResultDeferral) {\r\n const result: SpeakerRecognitionResult = new SpeakerRecognitionResult(\r\n {\r\n scenario: this.privSpeakerModel.scenario,\r\n status: { statusCode: error, reason: error }\r\n },\r\n ResultReason.Canceled,\r\n errorCode,\r\n error\r\n );\r\n try {\r\n this.privResultDeferral.resolve(result);\r\n } catch (error) {\r\n this.privResultDeferral.reject(error as string);\r\n }\r\n }\r\n }\r\n\r\n public async recognizeSpeakerOnce(model: SpeakerRecognitionModel): Promise<SpeakerRecognitionResult> {\r\n this.privSpeakerModel = model;\r\n this.voiceProfileType = model.scenario;\r\n if (!this.privResultDeferral) {\r\n this.privResultDeferral = new Deferred<SpeakerRecognitionResult>();\r\n }\r\n this.privRequestSession.startNewRecognition();\r\n this.privRequestSession.listenForServiceTelemetry(this.privSpeakerAudioSource.events);\r\n\r\n this.privRecognizerConfig.parameters.setProperty(PropertyId.Speech_SessionId, this.privRequestSession.sessionId);\r\n\r\n // Start the connection to the service. The promise this will create is stored and will be used by configureConnection().\r\n const conPromise: Promise<IConnection> = this.connectImpl();\r\n\r\n const preAudioPromise: Promise<void> = this.sendPreAudioMessages(this.extractSpeakerContext(model));\r\n\r\n const node: IAudioStreamNode = await this.privSpeakerAudioSource.attach(this.privRequestSession.audioNodeId);\r\n const format: AudioStreamFormatImpl = await this.privSpeakerAudioSource.format;\r\n const deviceInfo: ISpeechConfigAudioDevice = await this.privSpeakerAudioSource.deviceInfo;\r\n\r\n const audioNode = new ReplayableAudioNode(node, format.avgBytesPerSec);\r\n await this.privRequestSession.onAudioSourceAttachCompleted(audioNode, false);\r\n\r\n this.privRecognizerConfig.SpeechServiceConfig.Context.audio = { source: deviceInfo };\r\n\r\n try {\r\n await conPromise;\r\n await preAudioPromise;\r\n } catch (err) {\r\n this.cancelRecognition(this.privRequestSession.sessionId, this.privRequestSession.requestId, CancellationReason.Error, CancellationErrorCode.ConnectionFailure, err as string);\r\n }\r\n\r\n const sessionStartEventArgs: SessionEventArgs = new SessionEventArgs(this.privRequestSession.sessionId);\r\n\r\n if (!!this.privRecognizer.sessionStarted) {\r\n this.privRecognizer.sessionStarted(this.privRecognizer, sessionStartEventArgs);\r\n }\r\n\r\n void this.receiveMessage();\r\n const audioSendPromise = this.sendAudio(audioNode);\r\n\r\n // /* eslint-disable no-empty */\r\n audioSendPromise.then((): void => { /* add? return true;*/ }, (error: string): void => {\r\n this.cancelRecognition(this.privRequestSession.sessionId, this.privRequestSession.requestId, CancellationReason.Error, CancellationErrorCode.RuntimeError, error);\r\n });\r\n\r\n return this.privResultDeferral.promise;\r\n }\r\n\r\n private async sendPreAudioMessages(context: SpeakerContext): Promise<void> {\r\n const connection: IConnection = await this.fetchConnection();\r\n await this.sendSpeakerRecognition(connection, context);\r\n // await this.sendWaveHeader(connection);\r\n }\r\n\r\n private async sendSpeakerRecognition(connection: IConnection, context: SpeakerContext): Promise<void> {\r\n const speakerContextJson = JSON.stringify(context);\r\n return connection.send(new SpeechConnectionMessage(\r\n MessageType.Text,\r\n \"speaker.context\",\r\n this.privRequestSession.requestId,\r\n \"application/json; charset=utf-8\",\r\n speakerContextJson));\r\n }\r\n\r\n private extractSpeakerContext(model: SpeakerRecognitionModel): SpeakerContext {\r\n return {\r\n features: {\r\n interimResult: \"enabled\",\r\n progressiveDetection: \"disabled\",\r\n },\r\n profileIds: model.profileIds,\r\n scenario: model.scenario,\r\n };\r\n }\r\n}\r\n"]}