UNPKG

microsoft-cognitiveservices-speech-sdk

Version:

Microsoft Cognitive Services Speech SDK for JavaScript

docs.microsoft.com/azure/cognitive-services/speech-service/

Microsoft/cognitive-services-speech-sdk-js

387 lines (385 loc) • 17.6 kB

JavaScript

"use strict"; // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT license. // Multi-device Conversation is a Preview feature. Object.defineProperty(exports, "__esModule", { value: true }); exports.ConversationTranslator = exports.SpeechState = void 0; /* eslint-disable max-classes-per-file */ const Exports_js_1 = require("../../common.speech/Exports.js"); const ConversationTranslatorConnectionFactory_js_1 = require("../../common.speech/Transcription/ConversationTranslatorConnectionFactory.js"); const Exports_js_2 = require("../../common/Exports.js"); const Contracts_js_1 = require("../Contracts.js"); const Exports_js_3 = require("../Exports.js"); const Conversation_js_1 = require("./Conversation.js"); const Exports_js_4 = require("./Exports.js"); var SpeechState; (function (SpeechState) { SpeechState[SpeechState["Inactive"] = 0] = "Inactive"; SpeechState[SpeechState["Connecting"] = 1] = "Connecting"; SpeechState[SpeechState["Connected"] = 2] = "Connected"; })(SpeechState = exports.SpeechState || (exports.SpeechState = {})); // child class of TranslationRecognizer meant only for use with ConversationTranslator class ConversationTranslationRecognizer extends Exports_js_3.TranslationRecognizer { constructor(speechConfig, audioConfig, translator, convGetter) { super(speechConfig, audioConfig, new ConversationTranslatorConnectionFactory_js_1.ConversationTranslatorConnectionFactory(convGetter)); this.privSpeechState = SpeechState.Inactive; if (!!translator) { this.privTranslator = translator; this.sessionStarted = () => { this.privSpeechState = SpeechState.Connected; }; this.sessionStopped = () => { this.privSpeechState = SpeechState.Inactive; }; this.recognizing = (tr, e) => { if (!!this.privTranslator.recognizing) { this.privTranslator.recognizing(this.privTranslator, e); } }; // eslint-disable-next-line @typescript-eslint/no-misused-promises this.recognized = async (tr, e) => { // if there is an error connecting to the conversation service from the speech service the error will be returned in the ErrorDetails field. if (e.result?.errorDetails) { await this.cancelSpeech(); // TODO: format the error message contained in 'errorDetails' this.fireCancelEvent(e.result.errorDetails); } else { if (!!this.privTranslator.recognized) { this.privTranslator.recognized(this.privTranslator, e); } } return; }; // eslint-disable-next-line @typescript-eslint/no-misused-promises this.canceled = async () => { if (this.privSpeechState !== SpeechState.Inactive) { try { await this.cancelSpeech(); } catch (error) { this.privSpeechState = SpeechState.Inactive; } } }; } } get state() { return this.privSpeechState; } set state(newState) { this.privSpeechState = newState; } set authentication(token) { this.privReco.authentication = token; } onConnection() { this.privSpeechState = SpeechState.Connected; } async onCancelSpeech() { this.privSpeechState = SpeechState.Inactive; await this.cancelSpeech(); } /** * Fire a cancel event * @param error */ fireCancelEvent(error) { try { if (!!this.privTranslator.canceled) { const cancelEvent = new Exports_js_4.ConversationTranslationCanceledEventArgs(Exports_js_3.CancellationReason.Error, error, Exports_js_3.CancellationErrorCode.RuntimeError); this.privTranslator.canceled(this.privTranslator, cancelEvent); } } catch (e) { // } } async cancelSpeech() { try { this.stopContinuousRecognitionAsync(); await this.privReco?.disconnect(); this.privSpeechState = SpeechState.Inactive; } catch (e) { // ignore the error } } } /** * Join, leave or connect to a conversation. */ class ConversationTranslator extends Exports_js_4.ConversationCommon { constructor(audioConfig) { super(audioConfig); this.privErrors = Exports_js_1.ConversationConnectionConfig.restErrors; this.privIsDisposed = false; this.privIsSpeaking = false; this.privPlaceholderKey = "abcdefghijklmnopqrstuvwxyz012345"; this.privPlaceholderRegion = "westus"; this.privProperties = new Exports_js_3.PropertyCollection(); } get properties() { return this.privProperties; } get speechRecognitionLanguage() { return this.privSpeechRecognitionLanguage; } get participants() { return this.privConversation?.participants; } get canSpeak() { // is there a Conversation websocket available and has the Recognizer been set up if (!this.privConversation.isConnected || !this.privCTRecognizer) { return false; } // is the user already speaking if (this.privIsSpeaking || this.privCTRecognizer.state === SpeechState.Connected || this.privCTRecognizer.state === SpeechState.Connecting) { return false; } // is the user muted if (this.privConversation.isMutedByHost) { return false; } return true; } onToken(token) { this.privCTRecognizer.authentication = token; } setServiceProperty(name, value) { const currentProperties = JSON.parse(this.privProperties.getProperty(Exports_js_1.ServicePropertiesPropertyName, "{}")); currentProperties[name] = value; this.privProperties.setProperty(Exports_js_1.ServicePropertiesPropertyName, JSON.stringify(currentProperties)); } joinConversationAsync(conversation, nickname, param1, param2, param3) { try { if (typeof conversation === "string") { Contracts_js_1.Contracts.throwIfNullOrUndefined(conversation, this.privErrors.invalidArgs.replace("{arg}", "conversation id")); Contracts_js_1.Contracts.throwIfNullOrWhitespace(nickname, this.privErrors.invalidArgs.replace("{arg}", "nickname")); if (!!this.privConversation) { this.handleError(new Error(this.privErrors.permissionDeniedStart), param3); } let lang = param1; if (lang === undefined || lang === null || lang === "") { lang = Exports_js_1.ConversationConnectionConfig.defaultLanguageCode; } // create a placeholder config this.privSpeechTranslationConfig = Exports_js_3.SpeechTranslationConfig.fromSubscription(this.privPlaceholderKey, this.privPlaceholderRegion); this.privSpeechTranslationConfig.setProfanity(Exports_js_3.ProfanityOption.Masked); this.privSpeechTranslationConfig.addTargetLanguage(lang); this.privSpeechTranslationConfig.setProperty(Exports_js_3.PropertyId[Exports_js_3.PropertyId.SpeechServiceConnection_RecoLanguage], lang); this.privSpeechTranslationConfig.setProperty(Exports_js_3.PropertyId[Exports_js_3.PropertyId.ConversationTranslator_Name], nickname); const propertyIdsToCopy = [ Exports_js_3.PropertyId.SpeechServiceConnection_Host, Exports_js_3.PropertyId.ConversationTranslator_Host, Exports_js_3.PropertyId.SpeechServiceConnection_Endpoint, Exports_js_3.PropertyId.SpeechServiceConnection_ProxyHostName, Exports_js_3.PropertyId.SpeechServiceConnection_ProxyPassword, Exports_js_3.PropertyId.SpeechServiceConnection_ProxyPort, Exports_js_3.PropertyId.SpeechServiceConnection_ProxyUserName, "ConversationTranslator_MultiChannelAudio", "ConversationTranslator_Region" ]; for (const prop of propertyIdsToCopy) { const value = this.privProperties.getProperty(prop); if (value) { const key = typeof prop === "string" ? prop : Exports_js_3.PropertyId[prop]; this.privSpeechTranslationConfig.setProperty(key, value); } } const currentProperties = JSON.parse(this.privProperties.getProperty(Exports_js_1.ServicePropertiesPropertyName, "{}")); for (const prop of Object.keys(currentProperties)) { this.privSpeechTranslationConfig.setServiceProperty(prop, currentProperties[prop], Exports_js_3.ServicePropertyChannel.UriQueryParameter); } // join the conversation this.privConversation = new Conversation_js_1.ConversationImpl(this.privSpeechTranslationConfig); this.privConversation.conversationTranslator = this; this.privConversation.joinConversationAsync(conversation, nickname, lang, ((result) => { if (!result) { this.handleError(new Error(this.privErrors.permissionDeniedConnect), param3); } this.privSpeechTranslationConfig.authorizationToken = result; this.privConversation.room.isHost = false; // connect to the ws this.privConversation.startConversationAsync((() => { this.handleCallback(param2, param3); }), ((error) => { this.handleError(error, param3); })); }), ((error) => { this.handleError(error, param3); })); } else if (typeof conversation === "object") { Contracts_js_1.Contracts.throwIfNullOrUndefined(conversation, this.privErrors.invalidArgs.replace("{arg}", "conversation id")); Contracts_js_1.Contracts.throwIfNullOrWhitespace(nickname, this.privErrors.invalidArgs.replace("{arg}", "nickname")); // save the nickname this.privProperties.setProperty(Exports_js_3.PropertyId.ConversationTranslator_Name, nickname); // ref the conversation object this.privConversation = conversation; // ref the conversation translator object this.privConversation.conversationTranslator = this; this.privConversation.room.isHost = true; Contracts_js_1.Contracts.throwIfNullOrUndefined(this.privConversation, this.privErrors.permissionDeniedConnect); Contracts_js_1.Contracts.throwIfNullOrUndefined(this.privConversation.room.token, this.privErrors.permissionDeniedConnect); this.privSpeechTranslationConfig = conversation.config; this.handleCallback(param1, param2); } else { this.handleError(new Error(this.privErrors.invalidArgs.replace("{arg}", "invalid conversation type")), param2); } } catch (error) { this.handleError(error, typeof param1 === "string" ? param3 : param2); } } /** * Leave the conversation * @param cb * @param err */ leaveConversationAsync(cb, err) { Exports_js_2.marshalPromiseToCallbacks((async () => { // stop the speech websocket await this.cancelSpeech(); // stop the websocket await this.privConversation.endConversationImplAsync(); // https delete request await this.privConversation.deleteConversationImplAsync(); this.dispose(); })(), cb, err); } /** * Send a text message * @param message * @param cb * @param err */ sendTextMessageAsync(message, cb, err) { try { Contracts_js_1.Contracts.throwIfNullOrUndefined(this.privConversation, this.privErrors.permissionDeniedSend); Contracts_js_1.Contracts.throwIfNullOrWhitespace(message, this.privErrors.invalidArgs.replace("{arg}", message)); this.privConversation.sendTextMessageAsync(message, cb, err); } catch (error) { this.handleError(error, err); } } /** * Start speaking * @param cb * @param err */ startTranscribingAsync(cb, err) { Exports_js_2.marshalPromiseToCallbacks((async () => { try { Contracts_js_1.Contracts.throwIfNullOrUndefined(this.privConversation, this.privErrors.permissionDeniedSend); Contracts_js_1.Contracts.throwIfNullOrUndefined(this.privConversation.room.token, this.privErrors.permissionDeniedConnect); if (this.privCTRecognizer === undefined) { await this.connectTranslatorRecognizer(); } Contracts_js_1.Contracts.throwIfNullOrUndefined(this.privCTRecognizer, this.privErrors.permissionDeniedSend); if (!this.canSpeak) { this.handleError(new Error(this.privErrors.permissionDeniedSend), err); } await this.startContinuousRecognition(); this.privIsSpeaking = true; } catch (error) { this.privIsSpeaking = false; await this.cancelSpeech(); throw error; } })(), cb, err); } /** * Stop speaking * @param cb * @param err */ stopTranscribingAsync(cb, err) { Exports_js_2.marshalPromiseToCallbacks((async () => { try { if (!this.privIsSpeaking) { // stop speech await this.cancelSpeech(); return; } // stop the recognition but leave the websocket open this.privIsSpeaking = false; await new Promise((resolve, reject) => { this.privCTRecognizer.stopContinuousRecognitionAsync(resolve, reject); }); } catch (error) { await this.cancelSpeech(); } })(), cb, err); } isDisposed() { return this.privIsDisposed; } dispose(reason, success, err) { Exports_js_2.marshalPromiseToCallbacks((async () => { if (this.isDisposed && !this.privIsSpeaking) { return; } await this.cancelSpeech(); this.privIsDisposed = true; this.privSpeechTranslationConfig.close(); this.privSpeechRecognitionLanguage = undefined; this.privProperties = undefined; this.privAudioConfig = undefined; this.privSpeechTranslationConfig = undefined; this.privConversation.dispose(); this.privConversation = undefined; })(), success, err); } /** * Cancel the speech websocket */ async cancelSpeech() { try { this.privIsSpeaking = false; await this.privCTRecognizer?.onCancelSpeech(); this.privCTRecognizer = undefined; } catch (e) { // ignore the error } } /** * Connect to the speech translation recognizer. * Currently there is no language validation performed before sending the SpeechLanguage code to the service. * If it's an invalid language the raw error will be: 'Error during WebSocket handshake: Unexpected response code: 400' * e.g. pass in 'fr' instead of 'fr-FR', or a text-only language 'cy' */ async connectTranslatorRecognizer() { try { if (this.privAudioConfig === undefined) { this.privAudioConfig = Exports_js_3.AudioConfig.fromDefaultMicrophoneInput(); } // clear the temp subscription key if it's a participant joining if (this.privSpeechTranslationConfig.getProperty(Exports_js_3.PropertyId[Exports_js_3.PropertyId.SpeechServiceConnection_Key]) === this.privPlaceholderKey) { this.privSpeechTranslationConfig.setProperty(Exports_js_3.PropertyId[Exports_js_3.PropertyId.SpeechServiceConnection_Key], ""); } const convGetter = () => this.privConversation; this.privCTRecognizer = new ConversationTranslationRecognizer(this.privSpeechTranslationConfig, this.privAudioConfig, this, convGetter); } catch (error) { await this.cancelSpeech(); throw error; } } /** * Handle the start speaking request */ startContinuousRecognition() { return new Promise((resolve, reject) => { this.privCTRecognizer.startContinuousRecognitionAsync(resolve, reject); }); } } exports.ConversationTranslator = ConversationTranslator; //# sourceMappingURL=ConversationTranslator.js.map