microsoft-cognitiveservices-speech-sdk
Version:
Microsoft Cognitive Services Speech SDK for JavaScript
1 lines • 18.3 kB
Source Map (JSON)
{"version":3,"sources":["src/sdk/Synthesizer.ts"],"names":[],"mappings":"AAKA,OAAO,EAKH,eAAe,EACf,2BAA2B,EAE3B,mBAAmB,EACnB,oBAAoB,EACpB,oBAAoB,EACpB,iBAAiB,EAAE,MAAM,6BAA6B,CAAC;AAC3D,OAAO,EAAE,iBAAiB,EAAqB,KAAK,EAAE,MAAM,sBAAsB,CAAC;AAEnF,OAAO,EAAE,kBAAkB,EAAc,YAAY,EAAoB,qBAAqB,EAAE,MAAM,cAAc,CAAC;AAErH,8BAAsB,WAAW;IAC7B,SAAS,CAAC,WAAW,EAAE,oBAAoB,CAAC;IAC5C,SAAS,CAAC,eAAe,EAAE,oBAAoB,CAAC;IAChD,SAAS,CAAC,cAAc,EAAE,kBAAkB,CAAC;IAC7C,SAAS,CAAC,qBAAqB,EAAE,2BAA2B,CAAC;IAC7D,SAAS,CAAC,YAAY,EAAE,OAAO,CAAC;IAChC,SAAS,CAAC,gBAAgB,EAAE,OAAO,CAAC;IACpC,SAAS,CAAC,qBAAqB,EAAE,KAAK,CAAC,gBAAgB,CAAC,CAAC;IAEzD;;;;;;OAMG;IACH,IAAW,kBAAkB,IAAI,MAAM,CAEtC;IAED;;;;;;OAMG;IACH,IAAW,kBAAkB,CAAC,KAAK,EAAE,MAAM,EAG1C;IAED;;;;;;OAMG;IACH,IAAW,UAAU,IAAI,kBAAkB,CAE1C;IAED;;;;;;OAMG;IACH,IAAW,wBAAwB,IAAI,OAAO,CAE7C;IAED;;;;OAIG;IACH,SAAS,aAAa,YAAY,EAAE,YAAY;IAUzC,SAAS,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM;IAoJtC;;;;;;;;;OASG;cACa,OAAO,CAAC,SAAS,EAAE,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC;cAc1C,YAAY,IAAI,OAAO,CAAC,IAAI,CAAC;IAgB7C,SAAS,CAAC,QAAQ,CAAC,sBAAsB,CACrC,cAAc,EAAE,eAAe,EAC/B,iBAAiB,EAAE,2BAA2B,EAC9C,iBAAiB,EAAE,iBAAiB,GAAG,oBAAoB;IAG/D,SAAS,CAAC,QAAQ,CAAC,0BAA0B,CACzC,cAAc,EAAE,eAAe,EAC/B,iBAAiB,EAAE,iBAAiB,GAAG,oBAAoB;IAE/D,SAAS,CAAC,uBAAuB,CAAC,YAAY,EAAE,mBAAmB,GAAG,iBAAiB;IAOvF,SAAS,CAAC,yBAAyB,IAAI,IAAI;IAuC3C,SAAS,CAAC,MAAM,CAAC,SAAS,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM;CAOnD;AAED,qBAAa,gBAAgB;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,OAAO,CAAC;IAChB,EAAE,EAAE,CAAC,CAAC,EAAE,qBAAqB,KAAK,IAAI,CAAC;IACvC,GAAG,EAAE,CAAC,CAAC,EAAE,MAAM,KAAK,IAAI,CAAC;IACzB,UAAU,EAAE,iBAAiB,CAAC;gBAElB,SAAS,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,EAAE,CAAC,EAAE,CAAC,CAAC,EAAE,qBAAqB,KAAK,IAAI,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC,EAAE,MAAM,KAAK,IAAI,EAAE,UAAU,CAAC,EAAE,iBAAiB;CAQ1K","file":"Synthesizer.d.ts","sourcesContent":["// Copyright (c) Microsoft Corporation. All rights reserved.\r\n// Licensed under the MIT license.\r\n\r\n/* eslint-disable max-classes-per-file */\r\n\r\nimport {\r\n AutoDetectSourceLanguagesOpenRangeOptionName,\r\n CognitiveSubscriptionKeyAuthentication,\r\n CognitiveTokenAuthentication,\r\n Context,\r\n IAuthentication,\r\n ISynthesisConnectionFactory,\r\n OS,\r\n SpeechServiceConfig,\r\n SynthesisAdapterBase,\r\n SynthesisRestAdapter,\r\n SynthesizerConfig } from \"../common.speech/Exports.js\";\r\nimport { IAudioDestination, IStringDictionary, Queue } from \"../common/Exports.js\";\r\nimport { Contracts } from \"./Contracts.js\";\r\nimport { PropertyCollection, PropertyId, SpeechConfig, SpeechConfigImpl, SpeechSynthesisResult } from \"./Exports.js\";\r\n\r\nexport abstract class Synthesizer {\r\n protected privAdapter: SynthesisAdapterBase;\r\n protected privRestAdapter: SynthesisRestAdapter;\r\n protected privProperties: PropertyCollection;\r\n protected privConnectionFactory: ISynthesisConnectionFactory;\r\n protected privDisposed: boolean;\r\n protected privSynthesizing: boolean;\r\n protected synthesisRequestQueue: Queue<SynthesisRequest>;\r\n\r\n /**\r\n * Gets the authorization token used to communicate with the service.\r\n * @member Synthesizer.prototype.authorizationToken\r\n * @function\r\n * @public\r\n * @returns {string} Authorization token.\r\n */\r\n public get authorizationToken(): string {\r\n return this.properties.getProperty(PropertyId.SpeechServiceAuthorization_Token);\r\n }\r\n\r\n /**\r\n * Gets/Sets the authorization token used to communicate with the service.\r\n * @member Synthesizer.prototype.authorizationToken\r\n * @function\r\n * @public\r\n * @param {string} token - Authorization token.\r\n */\r\n public set authorizationToken(token: string) {\r\n Contracts.throwIfNullOrWhitespace(token, \"token\");\r\n this.properties.setProperty(PropertyId.SpeechServiceAuthorization_Token, token);\r\n }\r\n\r\n /**\r\n * The collection of properties and their values defined for this Synthesizer.\r\n * @member Synthesizer.prototype.properties\r\n * @function\r\n * @public\r\n * @returns {PropertyCollection} The collection of properties and their values defined for this SpeechSynthesizer.\r\n */\r\n public get properties(): PropertyCollection {\r\n return this.privProperties;\r\n }\r\n\r\n /**\r\n * Indicates if auto detect source language is enabled\r\n * @member Synthesizer.prototype.autoDetectSourceLanguage\r\n * @function\r\n * @public\r\n * @returns {boolean} if auto detect source language is enabled\r\n */\r\n public get autoDetectSourceLanguage(): boolean {\r\n return this.properties.getProperty(PropertyId.SpeechServiceConnection_AutoDetectSourceLanguages) === AutoDetectSourceLanguagesOpenRangeOptionName;\r\n }\r\n\r\n /**\r\n * Creates and initializes an instance of a Recognizer\r\n * @constructor\r\n * @param {SpeechConfig} speechConfig - The speech config to initialize the synthesizer.\r\n */\r\n protected constructor(speechConfig: SpeechConfig) {\r\n const speechConfigImpl: SpeechConfigImpl = speechConfig as SpeechConfigImpl;\r\n Contracts.throwIfNull(speechConfigImpl, \"speechConfig\");\r\n\r\n this.privProperties = speechConfigImpl.properties.clone();\r\n this.privDisposed = false;\r\n this.privSynthesizing = false;\r\n this.synthesisRequestQueue = new Queue<SynthesisRequest>();\r\n }\r\n\r\n public buildSsml(text: string): string {\r\n const languageToDefaultVoice: IStringDictionary<string> = {\r\n [\"af-ZA\"]: \"af-ZA-AdriNeural\",\r\n [\"am-ET\"]: \"am-ET-AmehaNeural\",\r\n [\"ar-AE\"]: \"ar-AE-FatimaNeural\",\r\n [\"ar-BH\"]: \"ar-BH-AliNeural\",\r\n [\"ar-DZ\"]: \"ar-DZ-AminaNeural\",\r\n [\"ar-EG\"]: \"ar-EG-SalmaNeural\",\r\n [\"ar-IQ\"]: \"ar-IQ-BasselNeural\",\r\n [\"ar-JO\"]: \"ar-JO-SanaNeural\",\r\n [\"ar-KW\"]: \"ar-KW-FahedNeural\",\r\n [\"ar-LY\"]: \"ar-LY-ImanNeural\",\r\n [\"ar-MA\"]: \"ar-MA-JamalNeural\",\r\n [\"ar-QA\"]: \"ar-QA-AmalNeural\",\r\n [\"ar-SA\"]: \"ar-SA-HamedNeural\",\r\n [\"ar-SY\"]: \"ar-SY-AmanyNeural\",\r\n [\"ar-TN\"]: \"ar-TN-HediNeural\",\r\n [\"ar-YE\"]: \"ar-YE-MaryamNeural\",\r\n [\"bg-BG\"]: \"bg-BG-BorislavNeural\",\r\n [\"bn-BD\"]: \"bn-BD-NabanitaNeural\",\r\n [\"bn-IN\"]: \"bn-IN-BashkarNeural\",\r\n [\"ca-ES\"]: \"ca-ES-JoanaNeural\",\r\n [\"cs-CZ\"]: \"cs-CZ-AntoninNeural\",\r\n [\"cy-GB\"]: \"cy-GB-AledNeural\",\r\n [\"da-DK\"]: \"da-DK-ChristelNeural\",\r\n [\"de-AT\"]: \"de-AT-IngridNeural\",\r\n [\"de-CH\"]: \"de-CH-JanNeural\",\r\n [\"de-DE\"]: \"de-DE-KatjaNeural\",\r\n [\"el-GR\"]: \"el-GR-AthinaNeural\",\r\n [\"en-AU\"]: \"en-AU-NatashaNeural\",\r\n [\"en-CA\"]: \"en-CA-ClaraNeural\",\r\n [\"en-GB\"]: \"en-GB-LibbyNeural\",\r\n [\"en-HK\"]: \"en-HK-SamNeural\",\r\n [\"en-IE\"]: \"en-IE-ConnorNeural\",\r\n [\"en-IN\"]: \"en-IN-NeerjaNeural\",\r\n [\"en-KE\"]: \"en-KE-AsiliaNeural\",\r\n [\"en-NG\"]: \"en-NG-AbeoNeural\",\r\n [\"en-NZ\"]: \"en-NZ-MitchellNeural\",\r\n [\"en-PH\"]: \"en-PH-JamesNeural\",\r\n [\"en-SG\"]: \"en-SG-LunaNeural\",\r\n [\"en-TZ\"]: \"en-TZ-ElimuNeural\",\r\n [\"en-US\"]: \"en-US-AvaMultilingualNeural\",\r\n [\"en-ZA\"]: \"en-ZA-LeahNeural\",\r\n [\"es-AR\"]: \"es-AR-ElenaNeural\",\r\n [\"es-BO\"]: \"es-BO-MarceloNeural\",\r\n [\"es-CL\"]: \"es-CL-CatalinaNeural\",\r\n [\"es-CO\"]: \"es-CO-GonzaloNeural\",\r\n [\"es-CR\"]: \"es-CR-JuanNeural\",\r\n [\"es-CU\"]: \"es-CU-BelkysNeural\",\r\n [\"es-DO\"]: \"es-DO-EmilioNeural\",\r\n [\"es-EC\"]: \"es-EC-AndreaNeural\",\r\n [\"es-ES\"]: \"es-ES-AlvaroNeural\",\r\n [\"es-GQ\"]: \"es-GQ-JavierNeural\",\r\n [\"es-GT\"]: \"es-GT-AndresNeural\",\r\n [\"es-HN\"]: \"es-HN-CarlosNeural\",\r\n [\"es-MX\"]: \"es-MX-DaliaNeural\",\r\n [\"es-NI\"]: \"es-NI-FedericoNeural\",\r\n [\"es-PA\"]: \"es-PA-MargaritaNeural\",\r\n [\"es-PE\"]: \"es-PE-AlexNeural\",\r\n [\"es-PR\"]: \"es-PR-KarinaNeural\",\r\n [\"es-PY\"]: \"es-PY-MarioNeural\",\r\n [\"es-SV\"]: \"es-SV-LorenaNeural\",\r\n [\"es-US\"]: \"es-US-AlonsoNeural\",\r\n [\"es-UY\"]: \"es-UY-MateoNeural\",\r\n [\"es-VE\"]: \"es-VE-PaolaNeural\",\r\n [\"et-EE\"]: \"et-EE-AnuNeural\",\r\n [\"fa-IR\"]: \"fa-IR-DilaraNeural\",\r\n [\"fi-FI\"]: \"fi-FI-SelmaNeural\",\r\n [\"fil-PH\"]: \"fil-PH-AngeloNeural\",\r\n [\"fr-BE\"]: \"fr-BE-CharlineNeural\",\r\n [\"fr-CA\"]: \"fr-CA-SylvieNeural\",\r\n [\"fr-CH\"]: \"fr-CH-ArianeNeural\",\r\n [\"fr-FR\"]: \"fr-FR-DeniseNeural\",\r\n [\"ga-IE\"]: \"ga-IE-ColmNeural\",\r\n [\"gl-ES\"]: \"gl-ES-RoiNeural\",\r\n [\"gu-IN\"]: \"gu-IN-DhwaniNeural\",\r\n [\"he-IL\"]: \"he-IL-AvriNeural\",\r\n [\"hi-IN\"]: \"hi-IN-MadhurNeural\",\r\n [\"hr-HR\"]: \"hr-HR-GabrijelaNeural\",\r\n [\"hu-HU\"]: \"hu-HU-NoemiNeural\",\r\n [\"id-ID\"]: \"id-ID-ArdiNeural\",\r\n [\"is-IS\"]: \"is-IS-GudrunNeural\",\r\n [\"it-IT\"]: \"it-IT-IsabellaNeural\",\r\n [\"ja-JP\"]: \"ja-JP-NanamiNeural\",\r\n [\"jv-ID\"]: \"jv-ID-DimasNeural\",\r\n [\"kk-KZ\"]: \"kk-KZ-AigulNeural\",\r\n [\"km-KH\"]: \"km-KH-PisethNeural\",\r\n [\"kn-IN\"]: \"kn-IN-GaganNeural\",\r\n [\"ko-KR\"]: \"ko-KR-SunHiNeural\",\r\n [\"lo-LA\"]: \"lo-LA-ChanthavongNeural\",\r\n [\"lt-LT\"]: \"lt-LT-LeonasNeural\",\r\n [\"lv-LV\"]: \"lv-LV-EveritaNeural\",\r\n [\"mk-MK\"]: \"mk-MK-AleksandarNeural\",\r\n [\"ml-IN\"]: \"ml-IN-MidhunNeural\",\r\n [\"mr-IN\"]: \"mr-IN-AarohiNeural\",\r\n [\"ms-MY\"]: \"ms-MY-OsmanNeural\",\r\n [\"mt-MT\"]: \"mt-MT-GraceNeural\",\r\n [\"my-MM\"]: \"my-MM-NilarNeural\",\r\n [\"nb-NO\"]: \"nb-NO-PernilleNeural\",\r\n [\"nl-BE\"]: \"nl-BE-ArnaudNeural\",\r\n [\"nl-NL\"]: \"nl-NL-ColetteNeural\",\r\n [\"pl-PL\"]: \"pl-PL-AgnieszkaNeural\",\r\n [\"ps-AF\"]: \"ps-AF-GulNawazNeural\",\r\n [\"pt-BR\"]: \"pt-BR-FranciscaNeural\",\r\n [\"pt-PT\"]: \"pt-PT-DuarteNeural\",\r\n [\"ro-RO\"]: \"ro-RO-AlinaNeural\",\r\n [\"ru-RU\"]: \"ru-RU-SvetlanaNeural\",\r\n [\"si-LK\"]: \"si-LK-SameeraNeural\",\r\n [\"sk-SK\"]: \"sk-SK-LukasNeural\",\r\n [\"sl-SI\"]: \"sl-SI-PetraNeural\",\r\n [\"so-SO\"]: \"so-SO-MuuseNeural\",\r\n [\"sr-RS\"]: \"sr-RS-NicholasNeural\",\r\n [\"su-ID\"]: \"su-ID-JajangNeural\",\r\n [\"sv-SE\"]: \"sv-SE-SofieNeural\",\r\n [\"sw-KE\"]: \"sw-KE-RafikiNeural\",\r\n [\"sw-TZ\"]: \"sw-TZ-DaudiNeural\",\r\n [\"ta-IN\"]: \"ta-IN-PallaviNeural\",\r\n [\"ta-LK\"]: \"ta-LK-KumarNeural\",\r\n [\"ta-SG\"]: \"ta-SG-AnbuNeural\",\r\n [\"te-IN\"]: \"te-IN-MohanNeural\",\r\n [\"th-TH\"]: \"th-TH-PremwadeeNeural\",\r\n [\"tr-TR\"]: \"tr-TR-AhmetNeural\",\r\n [\"uk-UA\"]: \"uk-UA-OstapNeural\",\r\n [\"ur-IN\"]: \"ur-IN-GulNeural\",\r\n [\"ur-PK\"]: \"ur-PK-AsadNeural\",\r\n [\"uz-UZ\"]: \"uz-UZ-MadinaNeural\",\r\n [\"vi-VN\"]: \"vi-VN-HoaiMyNeural\",\r\n [\"zh-CN\"]: \"zh-CN-XiaoxiaoNeural\",\r\n [\"zh-HK\"]: \"zh-HK-HiuMaanNeural\",\r\n [\"zh-TW\"]: \"zh-TW-HsiaoChenNeural\",\r\n [\"zu-ZA\"]: \"zu-ZA-ThandoNeural\",\r\n };\r\n\r\n let language = this.properties.getProperty(PropertyId.SpeechServiceConnection_SynthLanguage, \"en-US\");\r\n let voice = this.properties.getProperty(PropertyId.SpeechServiceConnection_SynthVoice, \"\");\r\n let ssml: string = Synthesizer.XMLEncode(text);\r\n if (this.autoDetectSourceLanguage) {\r\n language = \"en-US\";\r\n } else {\r\n voice = voice || languageToDefaultVoice[language];\r\n }\r\n if (voice) {\r\n ssml = `<voice name='${voice}'>${ssml}</voice>`;\r\n }\r\n ssml = `<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xmlns:mstts='http://www.w3.org/2001/mstts' xmlns:emo='http://www.w3.org/2009/10/emotionml' xml:lang='${language}'>${ssml}</speak>`;\r\n return ssml;\r\n }\r\n\r\n /**\r\n * This method performs cleanup of resources.\r\n * The Boolean parameter disposing indicates whether the method is called\r\n * from Dispose (if disposing is true) or from the finalizer (if disposing is false).\r\n * Derived classes should override this method to dispose resource if needed.\r\n * @member Synthesizer.prototype.dispose\r\n * @function\r\n * @public\r\n * @param {boolean} disposing - Flag to request disposal.\r\n */\r\n protected async dispose(disposing: boolean): Promise<void> {\r\n if (this.privDisposed) {\r\n return;\r\n }\r\n\r\n if (disposing) {\r\n if (this.privAdapter) {\r\n await this.privAdapter.dispose();\r\n }\r\n }\r\n\r\n this.privDisposed = true;\r\n }\r\n\r\n protected async adapterSpeak(): Promise<void> {\r\n if (!this.privDisposed && !this.privSynthesizing) {\r\n this.privSynthesizing = true;\r\n const request: SynthesisRequest = await this.synthesisRequestQueue.dequeue();\r\n return this.privAdapter.Speak(request.text, request.isSSML, request.requestId, request.cb, request.err, request.dataStream);\r\n }\r\n }\r\n\r\n //\r\n // ################################################################################################################\r\n // IMPLEMENTATION.\r\n // Move to independent class\r\n // ################################################################################################################\r\n //\r\n\r\n // Creates the synthesis adapter\r\n protected abstract createSynthesisAdapter(\r\n authentication: IAuthentication,\r\n connectionFactory: ISynthesisConnectionFactory,\r\n synthesizerConfig: SynthesizerConfig): SynthesisAdapterBase;\r\n\r\n // Creates the REST synthesis adapter\r\n protected abstract createRestSynthesisAdapter(\r\n authentication: IAuthentication,\r\n synthesizerConfig: SynthesizerConfig): SynthesisRestAdapter;\r\n\r\n protected createSynthesizerConfig(speechConfig: SpeechServiceConfig): SynthesizerConfig {\r\n return new SynthesizerConfig(\r\n speechConfig,\r\n this.privProperties);\r\n }\r\n\r\n // Does the generic synthesizer setup that is common across all synthesizer types.\r\n protected implCommonSynthesizeSetup(): void {\r\n\r\n let osPlatform = (typeof window !== \"undefined\") ? \"Browser\" : \"Node\";\r\n let osName = \"unknown\";\r\n let osVersion = \"unknown\";\r\n\r\n if (typeof navigator !== \"undefined\") {\r\n osPlatform = osPlatform + \"/\" + navigator.platform;\r\n osName = navigator.userAgent;\r\n osVersion = navigator.appVersion;\r\n }\r\n\r\n const synthesizerConfig: SynthesizerConfig = this.createSynthesizerConfig(\r\n new SpeechServiceConfig(\r\n new Context(new OS(osPlatform, osName, osVersion))));\r\n\r\n const subscriptionKey = this.privProperties.getProperty(PropertyId.SpeechServiceConnection_Key, undefined);\r\n const authentication = (subscriptionKey && subscriptionKey !== \"\") ?\r\n new CognitiveSubscriptionKeyAuthentication(subscriptionKey) :\r\n new CognitiveTokenAuthentication(\r\n (): Promise<string> => {\r\n const authorizationToken = this.privProperties.getProperty(PropertyId.SpeechServiceAuthorization_Token, undefined);\r\n return Promise.resolve(authorizationToken);\r\n },\r\n (): Promise<string> => {\r\n const authorizationToken = this.privProperties.getProperty(PropertyId.SpeechServiceAuthorization_Token, undefined);\r\n return Promise.resolve(authorizationToken);\r\n });\r\n\r\n this.privAdapter = this.createSynthesisAdapter(\r\n authentication,\r\n this.privConnectionFactory,\r\n synthesizerConfig);\r\n\r\n this.privRestAdapter = this.createRestSynthesisAdapter(\r\n authentication,\r\n synthesizerConfig);\r\n }\r\n\r\n protected static XMLEncode(text: string): string {\r\n return text.replace(/&/g, \"&\")\r\n .replace(/</g, \"<\")\r\n .replace(/>/g, \">\")\r\n .replace(/\"/g, \""\")\r\n .replace(/'/g, \"'\");\r\n }\r\n}\r\n\r\nexport class SynthesisRequest {\r\n public requestId: string;\r\n public text: string;\r\n public isSSML: boolean;\r\n public cb: (e: SpeechSynthesisResult) => void;\r\n public err: (e: string) => void;\r\n public dataStream: IAudioDestination;\r\n\r\n public constructor(requestId: string, text: string, isSSML: boolean, cb?: (e: SpeechSynthesisResult) => void, err?: (e: string) => void, dataStream?: IAudioDestination) {\r\n this.requestId = requestId;\r\n this.text = text;\r\n this.isSSML = isSSML;\r\n this.cb = cb;\r\n this.err = err;\r\n this.dataStream = dataStream;\r\n }\r\n}\r\n"]}