microsoft-cognitiveservices-speech-sdk
Version:
Microsoft Cognitive Services Speech SDK for JavaScript
1 lines • 19.8 kB
Source Map (JSON)
{"version":3,"sources":["src/common.speech/TranslationServiceRecognizer.ts"],"names":[],"mappings":"AAGA,OAAO,EAEH,YAAY,EAGf,MAAM,sBAAsB,CAAC;AAC9B,OAAO,EACH,qBAAqB,EACrB,kBAAkB,EAIlB,uBAAuB,EAIvB,qBAAqB,EAIxB,MAAM,mBAAmB,CAAC;AAC3B,OAAO,EAEH,6BAA6B,EAShC,MAAM,cAAc,CAAC;AACtB,OAAO,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAC;AACvD,OAAO,EAAE,kBAAkB,EAAE,MAAM,yBAAyB,CAAC;AAC7D,OAAO,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAEzD,OAAO,EAAE,uBAAuB,EAAE,MAAM,uCAAuC,CAAC;AAGhF,qBAAa,4BAA6B,SAAQ,6BAA6B;IAC3E,OAAO,CAAC,yBAAyB,CAAwB;gBAGrD,cAAc,EAAE,eAAe,EAC/B,iBAAiB,EAAE,kBAAkB,EACrC,WAAW,EAAE,YAAY,EACzB,gBAAgB,EAAE,gBAAgB,EAClC,qBAAqB,EAAE,qBAAqB;cAYhC,2BAA2B,CAAC,iBAAiB,EAAE,uBAAuB,GAAG,OAAO,CAAC,OAAO,CAAC;IAyMzG,SAAS,CAAC,iBAAiB,CACvB,SAAS,EAAE,MAAM,EACjB,SAAS,EAAE,MAAM,EACjB,kBAAkB,EAAE,kBAAkB,EACtC,SAAS,EAAE,qBAAqB,EAChC,KAAK,EAAE,MAAM,GAAG,IAAI;IAyCxB,SAAS,CAAC,yBAAyB,CAAC,MAAM,EAAE,uBAAuB,EAAE,MAAM,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,IAAI;IAW7G,SAAS,CAAC,wBAAwB,CAAC,MAAM,EAAE,uBAAuB,EAAE,MAAM,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,IAAI;IAU5G,OAAO,CAAC,kBAAkB;IAyC1B,OAAO,CAAC,kBAAkB;CAgB7B","file":"TranslationServiceRecognizer.d.ts","sourcesContent":["// Copyright (c) Microsoft Corporation. All rights reserved.\r\n// Licensed under the MIT license.\r\n\r\nimport {\r\n ConnectionEvent,\r\n IAudioSource,\r\n MessageType,\r\n TranslationStatus,\r\n} from \"../common/Exports.js\";\r\nimport {\r\n CancellationErrorCode,\r\n CancellationReason,\r\n PropertyCollection,\r\n PropertyId,\r\n ResultReason,\r\n SpeechRecognitionResult,\r\n TranslationRecognitionCanceledEventArgs,\r\n TranslationRecognitionEventArgs,\r\n TranslationRecognitionResult,\r\n TranslationRecognizer,\r\n Translations,\r\n TranslationSynthesisEventArgs,\r\n TranslationSynthesisResult,\r\n} from \"../sdk/Exports.js\";\r\nimport {\r\n CancellationErrorCodePropertyName,\r\n ConversationServiceRecognizer,\r\n EnumTranslation,\r\n ITranslationHypothesis,\r\n RecognitionStatus,\r\n SimpleSpeechPhrase,\r\n SynthesisStatus,\r\n TranslationHypothesis,\r\n TranslationPhrase,\r\n TranslationSynthesisEnd,\r\n} from \"./Exports.js\";\r\nimport { IAuthentication } from \"./IAuthentication.js\";\r\nimport { IConnectionFactory } from \"./IConnectionFactory.js\";\r\nimport { RecognizerConfig } from \"./RecognizerConfig.js\";\r\nimport { ITranslationPhrase } from \"./ServiceMessages/TranslationPhrase.js\";\r\nimport { SpeechConnectionMessage } from \"./SpeechConnectionMessage.Internal.js\";\r\n\r\n// eslint-disable-next-line max-classes-per-file\r\nexport class TranslationServiceRecognizer extends ConversationServiceRecognizer {\r\n private privTranslationRecognizer: TranslationRecognizer;\r\n\r\n public constructor(\r\n authentication: IAuthentication,\r\n connectionFactory: IConnectionFactory,\r\n audioSource: IAudioSource,\r\n recognizerConfig: RecognizerConfig,\r\n translationRecognizer: TranslationRecognizer) {\r\n\r\n super(authentication, connectionFactory, audioSource, recognizerConfig, translationRecognizer);\r\n this.privTranslationRecognizer = translationRecognizer;\r\n this.connectionEvents.attach((connectionEvent: ConnectionEvent): void => {\r\n if (connectionEvent.name === \"ConnectionEstablishedEvent\") {\r\n this.privTranslationRecognizer.onConnection();\r\n }\r\n });\r\n\r\n }\r\n\r\n protected async processTypeSpecificMessages(connectionMessage: SpeechConnectionMessage): Promise<boolean> {\r\n\r\n const resultProps: PropertyCollection = new PropertyCollection();\r\n let processed: boolean = await this.processSpeechMessages(connectionMessage);\r\n if (processed) {\r\n return true;\r\n }\r\n\r\n const handleTranslationPhrase = async (translatedPhrase: TranslationPhrase): Promise<void> => {\r\n resultProps.setProperty(PropertyId.SpeechServiceResponse_JsonResult, translatedPhrase.asJson());\r\n this.privRequestSession.onPhraseRecognized(translatedPhrase.Offset + translatedPhrase.Duration);\r\n\r\n if (translatedPhrase.RecognitionStatus === RecognitionStatus.Success) {\r\n\r\n // OK, the recognition was successful. How'd the translation do?\r\n const result: TranslationRecognitionEventArgs = this.fireEventForResult(translatedPhrase, resultProps);\r\n if (!!this.privTranslationRecognizer.recognized) {\r\n try {\r\n this.privTranslationRecognizer.recognized(this.privTranslationRecognizer, result);\r\n /* eslint-disable no-empty */\r\n } catch (error) {\r\n // Not going to let errors in the event handler\r\n // trip things up.\r\n }\r\n }\r\n\r\n // report result to promise.\r\n if (!!this.privSuccessCallback) {\r\n try {\r\n this.privSuccessCallback(result.result);\r\n } catch (e) {\r\n if (!!this.privErrorCallback) {\r\n this.privErrorCallback(e as string);\r\n }\r\n }\r\n // Only invoke the call back once.\r\n // and if it's successful don't invoke the\r\n // error after that.\r\n this.privSuccessCallback = undefined;\r\n this.privErrorCallback = undefined;\r\n }\r\n } else {\r\n const reason: ResultReason = EnumTranslation.implTranslateRecognitionResult(translatedPhrase.RecognitionStatus);\r\n\r\n const result = new TranslationRecognitionResult(\r\n undefined,\r\n this.privRequestSession.requestId,\r\n reason,\r\n translatedPhrase.Text,\r\n translatedPhrase.Duration,\r\n translatedPhrase.Offset,\r\n translatedPhrase.Language,\r\n translatedPhrase.Confidence,\r\n undefined,\r\n translatedPhrase.asJson(),\r\n resultProps);\r\n\r\n if (reason === ResultReason.Canceled) {\r\n const cancelReason: CancellationReason = EnumTranslation.implTranslateCancelResult(translatedPhrase.RecognitionStatus);\r\n const cancellationErrorCode: CancellationErrorCode = EnumTranslation.implTranslateCancelErrorCode(translatedPhrase.RecognitionStatus);\r\n\r\n await this.cancelRecognitionLocal(\r\n cancelReason,\r\n cancellationErrorCode,\r\n EnumTranslation.implTranslateErrorDetails(cancellationErrorCode));\r\n\r\n } else {\r\n if (translatedPhrase.RecognitionStatus !== RecognitionStatus.EndOfDictation) {\r\n const ev = new TranslationRecognitionEventArgs(result, result.offset, this.privRequestSession.sessionId);\r\n\r\n if (!!this.privTranslationRecognizer.recognized) {\r\n try {\r\n this.privTranslationRecognizer.recognized(this.privTranslationRecognizer, ev);\r\n /* eslint-disable no-empty */\r\n } catch (error) {\r\n // Not going to let errors in the event handler\r\n // trip things up.\r\n }\r\n }\r\n\r\n // report result to promise.\r\n if (!!this.privSuccessCallback) {\r\n try {\r\n this.privSuccessCallback(result);\r\n } catch (e) {\r\n if (!!this.privErrorCallback) {\r\n this.privErrorCallback(e as string);\r\n }\r\n }\r\n // Only invoke the call back once.\r\n // and if it's successful don't invoke the\r\n // error after that.\r\n this.privSuccessCallback = undefined;\r\n this.privErrorCallback = undefined;\r\n }\r\n }\r\n }\r\n processed = true;\r\n }\r\n\r\n };\r\n\r\n const handleTranslationHypothesis = (hypothesis: TranslationHypothesis): void => {\r\n resultProps.setProperty(PropertyId.SpeechServiceResponse_JsonResult, hypothesis.asJson());\r\n\r\n const result: TranslationRecognitionEventArgs = this.fireEventForResult(hypothesis, resultProps);\r\n this.privRequestSession.onHypothesis(result.offset);\r\n\r\n if (!!this.privTranslationRecognizer.recognizing) {\r\n try {\r\n this.privTranslationRecognizer.recognizing(this.privTranslationRecognizer, result);\r\n /* eslint-disable no-empty */\r\n } catch (error) {\r\n // Not going to let errors in the event handler\r\n // trip things up.\r\n }\r\n }\r\n processed = true;\r\n };\r\n\r\n if (connectionMessage.messageType === MessageType.Text) {\r\n resultProps.setProperty(PropertyId.SpeechServiceResponse_JsonResult, connectionMessage.textBody);\r\n }\r\n\r\n switch (connectionMessage.path.toLowerCase()) {\r\n case \"translation.hypothesis\":\r\n handleTranslationHypothesis(TranslationHypothesis.fromJSON(connectionMessage.textBody, this.privRequestSession.currentTurnAudioOffset));\r\n break;\r\n\r\n case \"translation.response\":\r\n const phrase: { SpeechPhrase: ITranslationPhrase } = JSON.parse(connectionMessage.textBody) as { SpeechPhrase: ITranslationPhrase };\r\n if (!!phrase.SpeechPhrase) {\r\n await handleTranslationPhrase(TranslationPhrase.fromTranslationResponse(phrase, this.privRequestSession.currentTurnAudioOffset));\r\n } else {\r\n const hypothesis: { SpeechHypothesis: ITranslationHypothesis } = JSON.parse(connectionMessage.textBody) as { SpeechHypothesis: ITranslationHypothesis };\r\n if (!!hypothesis.SpeechHypothesis) {\r\n handleTranslationHypothesis(TranslationHypothesis.fromTranslationResponse(hypothesis, this.privRequestSession.currentTurnAudioOffset));\r\n }\r\n }\r\n break;\r\n case \"translation.phrase\":\r\n await handleTranslationPhrase(TranslationPhrase.fromJSON(connectionMessage.textBody, this.privRequestSession.currentTurnAudioOffset));\r\n break;\r\n\r\n case \"translation.synthesis\":\r\n this.sendSynthesisAudio(connectionMessage.binaryBody, this.privRequestSession.sessionId);\r\n processed = true;\r\n break;\r\n\r\n case \"audio.end\":\r\n case \"translation.synthesis.end\":\r\n const synthEnd: TranslationSynthesisEnd = TranslationSynthesisEnd.fromJSON(connectionMessage.textBody);\r\n\r\n switch (synthEnd.SynthesisStatus) {\r\n case SynthesisStatus.Error:\r\n if (!!this.privTranslationRecognizer.synthesizing) {\r\n const result = new TranslationSynthesisResult(ResultReason.Canceled, undefined);\r\n const retEvent: TranslationSynthesisEventArgs = new TranslationSynthesisEventArgs(result, this.privRequestSession.sessionId);\r\n\r\n try {\r\n this.privTranslationRecognizer.synthesizing(this.privTranslationRecognizer, retEvent);\r\n /* eslint-disable no-empty */\r\n } catch (error) {\r\n // Not going to let errors in the event handler\r\n // trip things up.\r\n }\r\n }\r\n\r\n if (!!this.privTranslationRecognizer.canceled) {\r\n // And raise a canceled event to send the rich(er) error message back.\r\n const canceledResult: TranslationRecognitionCanceledEventArgs = new TranslationRecognitionCanceledEventArgs(\r\n this.privRequestSession.sessionId,\r\n CancellationReason.Error,\r\n synthEnd.FailureReason,\r\n CancellationErrorCode.ServiceError,\r\n null);\r\n\r\n try {\r\n this.privTranslationRecognizer.canceled(this.privTranslationRecognizer, canceledResult);\r\n /* eslint-disable no-empty */\r\n } catch (error) {\r\n // Not going to let errors in the event handler\r\n // trip things up.\r\n }\r\n }\r\n break;\r\n case SynthesisStatus.Success:\r\n this.sendSynthesisAudio(undefined, this.privRequestSession.sessionId);\r\n break;\r\n default:\r\n break;\r\n }\r\n processed = true;\r\n break;\r\n default:\r\n break;\r\n }\r\n return processed;\r\n }\r\n\r\n // Cancels recognition.\r\n protected cancelRecognition(\r\n sessionId: string,\r\n requestId: string,\r\n cancellationReason: CancellationReason,\r\n errorCode: CancellationErrorCode,\r\n error: string): void {\r\n\r\n const properties: PropertyCollection = new PropertyCollection();\r\n properties.setProperty(CancellationErrorCodePropertyName, CancellationErrorCode[errorCode]);\r\n\r\n if (!!this.privTranslationRecognizer.canceled) {\r\n\r\n const cancelEvent: TranslationRecognitionCanceledEventArgs = new TranslationRecognitionCanceledEventArgs(\r\n sessionId,\r\n cancellationReason,\r\n error,\r\n errorCode,\r\n undefined);\r\n\r\n try {\r\n this.privTranslationRecognizer.canceled(this.privTranslationRecognizer, cancelEvent);\r\n /* eslint-disable no-empty */\r\n } catch { }\r\n }\r\n\r\n if (!!this.privSuccessCallback) {\r\n const result: TranslationRecognitionResult = new TranslationRecognitionResult(\r\n undefined, // Translations\r\n requestId,\r\n ResultReason.Canceled,\r\n undefined, // Text\r\n undefined, // Druation\r\n undefined, // Offset\r\n undefined, // Language\r\n undefined, // LanguageDetectionConfidence\r\n error,\r\n undefined, // Json\r\n properties);\r\n try {\r\n this.privSuccessCallback(result);\r\n /* eslint-disable no-empty */\r\n this.privSuccessCallback = undefined;\r\n } catch { }\r\n }\r\n }\r\n\r\n protected handleRecognizingCallback(result: SpeechRecognitionResult, offset: number, sessionId: string): void {\r\n try {\r\n const ev = new TranslationRecognitionEventArgs(TranslationRecognitionResult.fromSpeechRecognitionResult(result), offset, sessionId);\r\n this.privTranslationRecognizer.recognizing(this.privTranslationRecognizer, ev);\r\n /* eslint-disable no-empty */\r\n } catch (error) {\r\n // Not going to let errors in the event handler\r\n // trip things up.\r\n }\r\n }\r\n\r\n protected handleRecognizedCallback(result: SpeechRecognitionResult, offset: number, sessionId: string): void {\r\n try {\r\n const ev = new TranslationRecognitionEventArgs(TranslationRecognitionResult.fromSpeechRecognitionResult(result), offset, sessionId);\r\n this.privTranslationRecognizer.recognized(this.privTranslationRecognizer, ev);\r\n } catch (error) {\r\n // Not going to let errors in the event handler\r\n // trip things up.\r\n }\r\n }\r\n\r\n private fireEventForResult(serviceResult: TranslationHypothesis | TranslationPhrase, properties: PropertyCollection): TranslationRecognitionEventArgs {\r\n let translations: Translations;\r\n\r\n if (undefined !== serviceResult.Translation.Translations) {\r\n translations = new Translations();\r\n for (const translation of serviceResult.Translation.Translations) {\r\n translations.set(translation.Language, translation.Text || translation.DisplayText);\r\n }\r\n }\r\n\r\n let resultReason: ResultReason;\r\n let confidence: string;\r\n if (serviceResult instanceof TranslationPhrase) {\r\n if (!!serviceResult.Translation && serviceResult.Translation.TranslationStatus === TranslationStatus.Success) {\r\n resultReason = ResultReason.TranslatedSpeech;\r\n } else {\r\n resultReason = ResultReason.RecognizedSpeech;\r\n }\r\n confidence = serviceResult.Confidence;\r\n } else {\r\n resultReason = ResultReason.TranslatingSpeech;\r\n }\r\n const language = serviceResult.Language;\r\n\r\n const result = new TranslationRecognitionResult(\r\n translations,\r\n this.privRequestSession.requestId,\r\n resultReason,\r\n serviceResult.Text,\r\n serviceResult.Duration,\r\n serviceResult.Offset,\r\n language,\r\n confidence,\r\n serviceResult.Translation.FailureReason,\r\n serviceResult.asJson(),\r\n properties);\r\n\r\n const ev = new TranslationRecognitionEventArgs(result, serviceResult.Offset, this.privRequestSession.sessionId);\r\n return ev;\r\n }\r\n\r\n private sendSynthesisAudio(audio: ArrayBuffer, sessionId: string): void {\r\n const reason = (undefined === audio) ? ResultReason.SynthesizingAudioCompleted : ResultReason.SynthesizingAudio;\r\n const result = new TranslationSynthesisResult(reason, audio);\r\n const retEvent: TranslationSynthesisEventArgs = new TranslationSynthesisEventArgs(result, sessionId);\r\n\r\n if (!!this.privTranslationRecognizer.synthesizing) {\r\n try {\r\n this.privTranslationRecognizer.synthesizing(this.privTranslationRecognizer, retEvent);\r\n /* eslint-disable no-empty */\r\n } catch (error) {\r\n // Not going to let errors in the event handler\r\n // trip things up.\r\n }\r\n }\r\n\r\n }\r\n}\r\n"]}