microsoft-cognitiveservices-speech-sdk
Version:
Microsoft Cognitive Services Speech SDK for JavaScript
1 lines • 12.7 kB
Source Map (JSON)
{"version":3,"sources":["src/sdk/AvatarSynthesizer.ts"],"names":[],"mappings":"AAIA,OAAO,EAAE,oBAAoB,EAAE,MAAM,0CAA0C,CAAC;AAChF,OAAO,EAAE,iBAAiB,EAAE,MAAM,uCAAuC,CAAC;AAC1E,OAAO,EAEH,eAAe,EACf,2BAA2B,EAC3B,mBAAmB,EACnB,oBAAoB,EACvB,MAAM,6BAA6B,CAAC;AAGrC,OAAO,EACH,YAAY,EACZ,eAAe,EACf,kBAAkB,EAGlB,YAAY,EAEZ,qBAAqB,EACrB,eAAe,EACf,WAAW,EACd,MAAM,cAAc,CAAC;AAItB;;;;;;GAMG;AACH,qBAAa,iBAAkB,SAAQ,WAAW;IAC9C,SAAS,CAAC,cAAc,EAAE,kBAAkB,CAAC;IAC7C,OAAO,CAAC,gBAAgB,CAAe;IACvC,OAAO,CAAC,cAAc,CAAiB;IACvC;;;;;OAKG;IACI,mBAAmB,EAAE,CAAC,MAAM,EAAE,iBAAiB,EAAE,KAAK,EAAE,eAAe,KAAK,IAAI,CAAC;IAExF;;;;;OAKG;gBACgB,YAAY,EAAE,YAAY,EAAE,YAAY,EAAE,YAAY;IAUzE,SAAS,CAAC,yBAAyB,IAAI,IAAI;IAS3C;;;;;;;OAOG;IACU,gBAAgB,CAAC,cAAc,EAAE,iBAAiB,GAAG,OAAO,CAAC,eAAe,CAAC;IAwD1F;;;;;;;OAOG;IACU,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,eAAe,CAAC;IAUnE;;;;;;;OAOG;IACU,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,eAAe,CAAC;IAUnE;;;;;;OAMG;IACU,iBAAiB,IAAI,OAAO,CAAC,IAAI,CAAC;IAQ/C;;;;;;;;OAQG;IACU,eAAe,IAAI,OAAO,CAAC,IAAI,CAAC;IAK7C;;;;;OAKG;IACU,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAQnC;;OAEG;IACH,IAAW,UAAU,IAAI,YAAY,EAAE,CAEtC;IAGD,SAAS,CAAC,sBAAsB,CAC5B,cAAc,EAAE,eAAe,EAC/B,iBAAiB,EAAE,2BAA2B,EAC9C,iBAAiB,EAAE,iBAAiB,GAAG,oBAAoB;IAS/D,SAAS,CAAC,0BAA0B,CAChC,eAAe,EAAE,eAAe,EAChC,kBAAkB,EAAE,iBAAiB,GAAG,oBAAoB;IAIhE,SAAS,CAAC,uBAAuB,CAAC,YAAY,EAAE,mBAAmB,GAAG,iBAAiB;cAMvE,KAAK,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,GAAG,OAAO,CAAC,qBAAqB,CAAC;CAgBvF","file":"AvatarSynthesizer.d.ts","sourcesContent":["// Copyright (c) Microsoft Corporation. All rights reserved.\r\n// Licensed under the MIT license.\r\n\r\nimport { SpeechSynthesisConnectionFactory } from \"../common.speech/SpeechSynthesisConnectionFactory.js\";\r\nimport { SynthesisRestAdapter } from \"../common.speech/SynthesisRestAdapter.js\";\r\nimport { SynthesizerConfig } from \"../common.speech/SynthesizerConfig.js\";\r\nimport {\r\n AvatarSynthesisAdapter,\r\n IAuthentication,\r\n ISynthesisConnectionFactory,\r\n SpeechServiceConfig,\r\n SynthesisAdapterBase\r\n} from \"../common.speech/Exports.js\";\r\nimport { createNoDashGuid, Deferred, Events, EventType, PlatformEvent } from \"../common/Exports.js\";\r\nimport { AudioOutputFormatImpl } from \"./Audio/AudioOutputFormat.js\";\r\nimport {\r\n AvatarConfig,\r\n AvatarEventArgs,\r\n PropertyCollection,\r\n PropertyId,\r\n ResultReason,\r\n SpeechConfig,\r\n SpeechSynthesisOutputFormat,\r\n SpeechSynthesisResult,\r\n SynthesisResult,\r\n Synthesizer\r\n} from \"./Exports.js\";\r\nimport { Contracts } from \"./Contracts.js\";\r\nimport { SynthesisRequest } from \"./Synthesizer.js\";\r\n\r\n/**\r\n * Defines the avatar synthesizer.\r\n * @class AvatarSynthesizer\r\n * Added in version 1.33.0\r\n *\r\n * @experimental This feature is experimental and might change or have limited support.\r\n */\r\nexport class AvatarSynthesizer extends Synthesizer {\r\n protected privProperties: PropertyCollection;\r\n private privAvatarConfig: AvatarConfig;\r\n private privIceServers: RTCIceServer[];\r\n /**\r\n * Defines event handler for avatar events.\r\n * @member AvatarSynthesizer.prototype.avatarEventReceived\r\n * @function\r\n * @public\r\n */\r\n public avatarEventReceived: (sender: AvatarSynthesizer, event: AvatarEventArgs) => void;\r\n\r\n /**\r\n * Creates and initializes an instance of this class.\r\n * @constructor\r\n * @param {SpeechConfig} speechConfig - The speech config.\r\n * @param {AvatarConfig} avatarConfig - The talking avatar config.\r\n */\r\n public constructor(speechConfig: SpeechConfig, avatarConfig: AvatarConfig) {\r\n super(speechConfig);\r\n\r\n Contracts.throwIfNullOrUndefined(avatarConfig, \"avatarConfig\");\r\n\r\n this.privConnectionFactory = new SpeechSynthesisConnectionFactory();\r\n this.privAvatarConfig = avatarConfig;\r\n this.implCommonSynthesizeSetup();\r\n }\r\n\r\n protected implCommonSynthesizeSetup(): void {\r\n super.implCommonSynthesizeSetup();\r\n\r\n // The service checks the audio format setting while it ignores it in avatar synthesis.\r\n this.privAdapter.audioOutputFormat = AudioOutputFormatImpl.fromSpeechSynthesisOutputFormat(\r\n SpeechSynthesisOutputFormat.Riff24Khz16BitMonoPcm\r\n );\r\n }\r\n\r\n /**\r\n * Starts the talking avatar session and establishes the WebRTC connection.\r\n * @member AvatarSynthesizer.prototype.startAvatarAsync\r\n * @function\r\n * @public\r\n * @param {AvatarWebRTCConnectionInfo} peerConnection - The peer connection.\r\n * @returns {Promise<SynthesisResult>} The promise of the connection result.\r\n */\r\n public async startAvatarAsync(peerConnection: RTCPeerConnection): Promise<SynthesisResult> {\r\n Contracts.throwIfNullOrUndefined(peerConnection, \"peerConnection\");\r\n this.privIceServers = peerConnection.getConfiguration().iceServers;\r\n Contracts.throwIfNullOrUndefined(this.privIceServers, \"Ice servers must be set.\");\r\n const iceGatheringDone = new Deferred<void>();\r\n // https://developer.mozilla.org/en-US/docs/Web/API/RTCPeerConnection/icegatheringstatechange_event\r\n peerConnection.onicegatheringstatechange = (): void => {\r\n Events.instance.onEvent(new PlatformEvent(\"peer connection: ice gathering state: \" + peerConnection.iceGatheringState, EventType.Debug));\r\n if (peerConnection.iceGatheringState === \"complete\") {\r\n Events.instance.onEvent(new PlatformEvent(\"peer connection: ice gathering complete.\", EventType.Info));\r\n iceGatheringDone.resolve();\r\n }\r\n };\r\n peerConnection.onicecandidate = (event: RTCPeerConnectionIceEvent): void => {\r\n if (event.candidate) {\r\n Events.instance.onEvent(new PlatformEvent(\"peer connection: ice candidate: \" + event.candidate.candidate, EventType.Debug));\r\n } else {\r\n Events.instance.onEvent(new PlatformEvent(\"peer connection: ice candidate: complete\", EventType.Debug));\r\n iceGatheringDone.resolve();\r\n }\r\n };\r\n // Set a timeout for ice gathering, currently 2 seconds.\r\n setTimeout((): void => {\r\n if (peerConnection.iceGatheringState !== \"complete\") {\r\n Events.instance.onEvent(new PlatformEvent(\"peer connection: ice gathering timeout.\", EventType.Warning));\r\n iceGatheringDone.resolve();\r\n }\r\n }, 2000);\r\n const sdp: RTCSessionDescriptionInit = await peerConnection.createOffer();\r\n await peerConnection.setLocalDescription(sdp);\r\n await iceGatheringDone.promise;\r\n Events.instance.onEvent(new PlatformEvent(\"peer connection: got local SDP.\", EventType.Info));\r\n this.privProperties.setProperty(PropertyId.TalkingAvatarService_WebRTC_SDP, JSON.stringify(peerConnection.localDescription));\r\n\r\n const result: SpeechSynthesisResult = await this.speak(\"\", false);\r\n if (result.reason !== ResultReason.SynthesizingAudioCompleted) {\r\n return new SynthesisResult(\r\n result.resultId,\r\n result.reason,\r\n result.errorDetails,\r\n result.properties,\r\n );\r\n }\r\n const sdpAnswerString: string = atob(result.properties.getProperty(PropertyId.TalkingAvatarService_WebRTC_SDP));\r\n const sdpAnswer: RTCSessionDescription = new RTCSessionDescription(\r\n JSON.parse(sdpAnswerString) as RTCSessionDescriptionInit,\r\n );\r\n await peerConnection.setRemoteDescription(sdpAnswer);\r\n return new SynthesisResult(\r\n result.resultId,\r\n result.reason,\r\n undefined,\r\n result.properties,\r\n );\r\n }\r\n\r\n /**\r\n * Speaks plain text asynchronously. The rendered audio and video will be sent via the WebRTC connection.\r\n * @member AvatarSynthesizer.prototype.speakTextAsync\r\n * @function\r\n * @public\r\n * @param {string} text - The plain text to speak.\r\n * @returns {Promise<SynthesisResult>} The promise of the synthesis result.\r\n */\r\n public async speakTextAsync(text: string): Promise<SynthesisResult> {\r\n const r = await this.speak(text, false);\r\n return new SynthesisResult(\r\n r.resultId,\r\n r.reason,\r\n r.errorDetails,\r\n r.properties,\r\n );\r\n }\r\n\r\n /**\r\n * Speaks SSML asynchronously. The rendered audio and video will be sent via the WebRTC connection.\r\n * @member AvatarSynthesizer.prototype.speakSsmlAsync\r\n * @function\r\n * @public\r\n * @param {string} ssml - The SSML text to speak.\r\n * @returns {Promise<SynthesisResult>} The promise of the synthesis result.\r\n */\r\n public async speakSsmlAsync(ssml: string): Promise<SynthesisResult> {\r\n const r = await this.speak(ssml, true);\r\n return new SynthesisResult(\r\n r.resultId,\r\n r.reason,\r\n r.errorDetails,\r\n r.properties,\r\n );\r\n }\r\n\r\n /**\r\n * Speaks text asynchronously. The avatar will switch to idle state.\r\n * @member AvatarSynthesizer.prototype.stopSpeakingAsync\r\n * @function\r\n * @public\r\n * @returns {Promise<void>} The promise of the void result.\r\n */\r\n public async stopSpeakingAsync(): Promise<void> {\r\n while (this.synthesisRequestQueue.length() > 0) {\r\n const request = await this.synthesisRequestQueue.dequeue();\r\n request.err(\"Synthesis is canceled by user.\");\r\n }\r\n return this.privAdapter.stopSpeaking();\r\n }\r\n\r\n /**\r\n * Stops the talking avatar session and closes the WebRTC connection.\r\n * For now, this is the same as close().\r\n * You need to create a new AvatarSynthesizer instance to start a new session.\r\n * @member AvatarSynthesizer.prototype.stopAvatarAsync\r\n * @function\r\n * @public\r\n * @returns {Promise<void>} The promise of the void result.\r\n */\r\n public async stopAvatarAsync(): Promise<void> {\r\n Contracts.throwIfDisposed(this.privDisposed);\r\n return this.dispose(true);\r\n }\r\n\r\n /**\r\n * Dispose of associated resources.\r\n * @member AvatarSynthesizer.prototype.close\r\n * @function\r\n * @public\r\n */\r\n public async close(): Promise<void> {\r\n if (this.privDisposed) {\r\n return;\r\n }\r\n\r\n return this.dispose(true);\r\n }\r\n\r\n /**\r\n * Gets the ICE servers. Internal use only.\r\n */\r\n public get iceServers(): RTCIceServer[] {\r\n return this.privIceServers;\r\n }\r\n\r\n // Creates the synthesis adapter\r\n protected createSynthesisAdapter(\r\n authentication: IAuthentication,\r\n connectionFactory: ISynthesisConnectionFactory,\r\n synthesizerConfig: SynthesizerConfig): SynthesisAdapterBase {\r\n return new AvatarSynthesisAdapter(\r\n authentication,\r\n connectionFactory,\r\n synthesizerConfig,\r\n this,\r\n this.privAvatarConfig);\r\n }\r\n\r\n protected createRestSynthesisAdapter(\r\n _authentication: IAuthentication,\r\n _synthesizerConfig: SynthesizerConfig): SynthesisRestAdapter {\r\n return undefined;\r\n }\r\n\r\n protected createSynthesizerConfig(speechConfig: SpeechServiceConfig): SynthesizerConfig {\r\n const config = super.createSynthesizerConfig(speechConfig);\r\n config.avatarEnabled = true;\r\n return config;\r\n }\r\n\r\n protected async speak(text: string, isSSML: boolean): Promise<SpeechSynthesisResult> {\r\n const requestId = createNoDashGuid();\r\n const deferredResult = new Deferred<SpeechSynthesisResult>();\r\n this.synthesisRequestQueue.enqueue(new SynthesisRequest(requestId, text, isSSML,\r\n (e: SpeechSynthesisResult): void => {\r\n deferredResult.resolve(e);\r\n this.privSynthesizing = false;\r\n void this.adapterSpeak();\r\n },\r\n (e: string): void => {\r\n deferredResult.reject(e);\r\n this.privSynthesizing = false;\r\n }));\r\n void this.adapterSpeak();\r\n return deferredResult.promise;\r\n }\r\n}\r\n"]}