UNPKG

@euirim/microsoft-cognitiveservices-speech-sdk

Version:
566 lines (564 loc) 33.7 kB
"use strict"; // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT license. var __extends = (this && this.__extends) || (function () { var extendStatics = function (d, b) { extendStatics = Object.setPrototypeOf || ({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) || function (d, b) { for (var p in b) if (b.hasOwnProperty(p)) d[p] = b[p]; }; return extendStatics(d, b); }; return function (d, b) { extendStatics(d, b); function __() { this.constructor = d; } d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __()); }; })(); Object.defineProperty(exports, "__esModule", { value: true }); var Exports_1 = require("../common.browser/Exports"); var Exports_2 = require("../common/Exports"); var Exports_3 = require("../sdk/Exports"); var DialogServiceTurnStateManager_1 = require("./DialogServiceTurnStateManager"); var Exports_4 = require("./Exports"); var ActivityResponsePayload_1 = require("./ServiceMessages/ActivityResponsePayload"); var SpeechConnectionMessage_Internal_1 = require("./SpeechConnectionMessage.Internal"); var DialogServiceAdapter = /** @class */ (function (_super) { __extends(DialogServiceAdapter, _super); function DialogServiceAdapter(authentication, connectionFactory, audioSource, recognizerConfig, dialogServiceConnector) { var _this = _super.call(this, authentication, connectionFactory, audioSource, recognizerConfig, dialogServiceConnector) || this; _this.sendMessage = function (message) { var interactionGuid = Exports_2.createGuid(); var requestId = Exports_2.createNoDashGuid(); var agentMessage = { context: { interactionId: interactionGuid }, messagePayload: message, version: 0.5 }; var agentMessageJson = JSON.stringify(agentMessage); _this.fetchDialogConnection().onSuccessContinueWith(function (connection) { connection.send(new SpeechConnectionMessage_Internal_1.SpeechConnectionMessage(Exports_2.MessageType.Text, "agent", requestId, "application/json", agentMessageJson)); }); }; _this.listenOnce = function (recoMode, successCallback, errorCallback) { _this.privRecognizerConfig.recognitionMode = recoMode; _this.privDialogRequestSession.startNewRecognition(); _this.privDialogRequestSession.listenForServiceTelemetry(_this.privDialogAudioSource.events); // Start the connection to the service. The promise this will create is stored and will be used by configureConnection(). _this.dialogConnectImpl(); _this.sendPreAudioMessages(); _this.privSuccessCallback = successCallback; return _this.privDialogAudioSource .attach(_this.privDialogRequestSession.audioNodeId) .continueWithPromise(function (result) { var audioNode; if (result.isError) { _this.cancelRecognition(_this.privDialogRequestSession.sessionId, _this.privDialogRequestSession.requestId, Exports_3.CancellationReason.Error, Exports_3.CancellationErrorCode.ConnectionFailure, result.error, successCallback); return Exports_2.PromiseHelper.fromError(result.error); } else { audioNode = new Exports_1.ReplayableAudioNode(result.result, _this.privDialogAudioSource.format); _this.privDialogRequestSession.onAudioSourceAttachCompleted(audioNode, false); } return _this.privDialogAudioSource.deviceInfo.onSuccessContinueWithPromise(function (deviceInfo) { _this.privRecognizerConfig.SpeechServiceConfig.Context.audio = { source: deviceInfo }; return _this.configConnection() .on(function (_) { var sessionStartEventArgs = new Exports_3.SessionEventArgs(_this.privDialogRequestSession.sessionId); if (!!_this.privRecognizer.sessionStarted) { _this.privRecognizer.sessionStarted(_this.privRecognizer, sessionStartEventArgs); } var audioSendPromise = _this.sendAudio(audioNode); // /* tslint:disable:no-empty */ audioSendPromise.on(function (_) { }, function (error) { _this.cancelRecognition(_this.privDialogRequestSession.sessionId, _this.privDialogRequestSession.requestId, Exports_3.CancellationReason.Error, Exports_3.CancellationErrorCode.RuntimeError, error, successCallback); }); }, function (error) { _this.cancelRecognition(_this.privDialogRequestSession.sessionId, _this.privDialogRequestSession.requestId, Exports_3.CancellationReason.Error, Exports_3.CancellationErrorCode.ConnectionFailure, error, successCallback); }).continueWithPromise(function (result) { if (result.isError) { return Exports_2.PromiseHelper.fromError(result.error); } else { return Exports_2.PromiseHelper.fromResult(true); } }); }); }); }; _this.sendAudio = function (audioStreamNode) { // NOTE: Home-baked promises crash ios safari during the invocation // of the error callback chain (looks like the recursion is way too deep, and // it blows up the stack). The following construct is a stop-gap that does not // bubble the error up the callback chain and hence circumvents this problem. // TODO: rewrite with ES6 promises. var deferred = new Exports_2.Deferred(); // The time we last sent data to the service. var nextSendTime = Date.now(); var audioFormat = _this.privDialogAudioSource.format; // Max amount to send before we start to throttle var fastLaneSizeMs = _this.privRecognizerConfig.parameters.getProperty("SPEECH-TransmitLengthBeforThrottleMs", "5000"); var maxSendUnthrottledBytes = audioFormat.avgBytesPerSec / 1000 * parseInt(fastLaneSizeMs, 10); var startRecogNumber = _this.privDialogRequestSession.recogNumber; var readAndUploadCycle = function () { // If speech is done, stop sending audio. if (!_this.privDialogIsDisposed && !_this.privDialogRequestSession.isSpeechEnded && _this.privDialogRequestSession.isRecognizing && _this.privDialogRequestSession.recogNumber === startRecogNumber) { _this.fetchDialogConnection().on(function (connection) { audioStreamNode.read().on(function (audioStreamChunk) { // we have a new audio chunk to upload. if (_this.privDialogRequestSession.isSpeechEnded) { // If service already recognized audio end then don't send any more audio deferred.resolve(true); return; } var payload; var sendDelay; if (audioStreamChunk.isEnd) { payload = null; sendDelay = 0; } else { payload = audioStreamChunk.buffer; _this.privDialogRequestSession.onAudioSent(payload.byteLength); if (maxSendUnthrottledBytes >= _this.privDialogRequestSession.bytesSent) { sendDelay = 0; } else { sendDelay = Math.max(0, nextSendTime - Date.now()); } } // Are we ready to send, or need we delay more? setTimeout(function () { if (payload !== null) { nextSendTime = Date.now() + (payload.byteLength * 1000 / (audioFormat.avgBytesPerSec * 2)); } var uploaded = connection.send(new SpeechConnectionMessage_Internal_1.SpeechConnectionMessage(Exports_2.MessageType.Binary, "audio", _this.privDialogRequestSession.requestId, null, payload)); if (!audioStreamChunk.isEnd) { uploaded.continueWith(function (_) { // Regardless of success or failure, schedule the next upload. // If the underlying connection was broken, the next cycle will // get a new connection and re-transmit missing audio automatically. readAndUploadCycle(); }); } else { // the audio stream has been closed, no need to schedule next // read-upload cycle. _this.privDialogRequestSession.onSpeechEnded(); deferred.resolve(true); } }, sendDelay); }, function (error) { if (_this.privDialogRequestSession.isSpeechEnded) { // For whatever reason, Reject is used to remove queue subscribers inside // the Queue.DrainAndDispose invoked from DetachAudioNode down below, which // means that sometimes things can be rejected in normal circumstances, without // any errors. deferred.resolve(true); // TODO: remove the argument, it's is completely meaningless. } else { // Only reject, if there was a proper error. deferred.reject(error); } }); }, function (error) { deferred.reject(error); }); } }; readAndUploadCycle(); return deferred.promise(); }; _this.receiveDialogMessageOverride = function (successCallback, errorCallBack) { // we won't rely on the cascading promises of the connection since we want to continually be available to receive messages var communicationCustodian = new Exports_2.Deferred(); _this.fetchDialogConnection().on(function (connection) { return connection.read() .onSuccessContinueWithPromise(function (message) { var isDisposed = _this.isDisposed(); var terminateMessageLoop = (!_this.isDisposed() && _this.terminateMessageLoop); if (isDisposed || terminateMessageLoop) { // We're done. communicationCustodian.resolve(undefined); return Exports_2.PromiseHelper.fromResult(undefined); } if (!message) { return _this.receiveDialogMessageOverride(); } var connectionMessage = SpeechConnectionMessage_Internal_1.SpeechConnectionMessage.fromConnectionMessage(message); switch (connectionMessage.path.toLowerCase()) { case "turn.start": { var turnRequestId = connectionMessage.requestId.toUpperCase(); var audioSessionReqId = _this.privDialogRequestSession.requestId.toUpperCase(); // turn started by the service if (turnRequestId !== audioSessionReqId) { _this.privTurnStateManager.StartTurn(turnRequestId); } } break; case "speech.startdetected": var speechStartDetected = Exports_4.SpeechDetected.fromJSON(connectionMessage.textBody); var speechStartEventArgs = new Exports_3.RecognitionEventArgs(speechStartDetected.Offset, _this.privDialogRequestSession.sessionId); if (!!_this.privRecognizer.speechStartDetected) { _this.privRecognizer.speechStartDetected(_this.privRecognizer, speechStartEventArgs); } break; case "speech.enddetected": var json = void 0; if (connectionMessage.textBody.length > 0) { json = connectionMessage.textBody; } else { // If the request was empty, the JSON returned is empty. json = "{ Offset: 0 }"; } var speechStopDetected = Exports_4.SpeechDetected.fromJSON(json); _this.privDialogRequestSession.onServiceRecognized(speechStopDetected.Offset + _this.privDialogRequestSession.currentTurnAudioOffset); var speechStopEventArgs = new Exports_3.RecognitionEventArgs(speechStopDetected.Offset + _this.privDialogRequestSession.currentTurnAudioOffset, _this.privDialogRequestSession.sessionId); if (!!_this.privRecognizer.speechEndDetected) { _this.privRecognizer.speechEndDetected(_this.privRecognizer, speechStopEventArgs); } break; case "turn.end": { var turnEndRequestId = connectionMessage.requestId.toUpperCase(); var audioSessionReqId = _this.privDialogRequestSession.requestId.toUpperCase(); // turn started by the service if (turnEndRequestId !== audioSessionReqId) { _this.privTurnStateManager.CompleteTurn(turnEndRequestId); } else { // Audio session turn var sessionStopEventArgs = new Exports_3.SessionEventArgs(_this.privDialogRequestSession.sessionId); _this.privDialogRequestSession.onServiceTurnEndResponse(false); if (_this.privDialogRequestSession.isSpeechEnded) { if (!!_this.privRecognizer.sessionStopped) { _this.privRecognizer.sessionStopped(_this.privRecognizer, sessionStopEventArgs); } } } } break; default: _this.processTypeSpecificMessages(connectionMessage, successCallback, errorCallBack); } return _this.receiveDialogMessageOverride(); }); }, function (error) { _this.terminateMessageLoop = true; }); return communicationCustodian.promise(); }; _this.fetchDialogConnection = function () { return _this.configConnection(); }; _this.sendAgentConfig = function (connection) { if (_this.agentConfig && !_this.agentConfigSent) { var agentConfigJson = _this.agentConfig.toJsonString(); _this.agentConfigSent = true; return connection.send(new SpeechConnectionMessage_Internal_1.SpeechConnectionMessage(Exports_2.MessageType.Text, "agent.config", _this.privDialogRequestSession.requestId, "application/json", agentConfigJson)); } return Exports_2.PromiseHelper.fromResult(true); }; _this.sendAgentContext = function (connection) { var guid = Exports_2.createGuid(); var agentContext = { channelData: "", context: { interactionId: guid }, version: 0.5 }; var agentContextJson = JSON.stringify(agentContext); return connection.send(new SpeechConnectionMessage_Internal_1.SpeechConnectionMessage(Exports_2.MessageType.Text, "speech.agent.context", _this.privDialogRequestSession.requestId, "application/json", agentContextJson)); }; _this.privDialogServiceConnector = dialogServiceConnector; _this.privDialogAuthentication = authentication; _this.receiveMessageOverride = _this.receiveDialogMessageOverride; _this.privTurnStateManager = new DialogServiceTurnStateManager_1.DialogServiceTurnStateManager(); _this.recognizeOverride = _this.listenOnce; _this.connectImplOverride = _this.dialogConnectImpl; _this.configConnectionOverride = _this.configConnection; _this.fetchConnectionOverride = _this.fetchDialogConnection; _this.disconnectOverride = _this.privDisconnect; _this.privDialogAudioSource = audioSource; _this.privDialogRequestSession = new Exports_4.RequestSession(audioSource.id()); _this.privDialogConnectionFactory = connectionFactory; _this.privDialogIsDisposed = false; _this.agentConfigSent = false; return _this; } DialogServiceAdapter.prototype.isDisposed = function () { return this.privDialogIsDisposed; }; DialogServiceAdapter.prototype.dispose = function (reason) { this.privDialogIsDisposed = true; if (this.privConnectionConfigPromise) { this.privConnectionConfigPromise.onSuccessContinueWith(function (connection) { connection.dispose(reason); }); } }; DialogServiceAdapter.prototype.privDisconnect = function () { this.cancelRecognition(this.privDialogRequestSession.sessionId, this.privDialogRequestSession.requestId, Exports_3.CancellationReason.Error, Exports_3.CancellationErrorCode.NoError, "Disconnecting", undefined); this.terminateMessageLoop = true; this.agentConfigSent = false; if (this.privDialogConnectionPromise.result().isCompleted) { if (!this.privDialogConnectionPromise.result().isError) { this.privDialogConnectionPromise.result().result.dispose(); this.privDialogConnectionPromise = null; } } else { this.privDialogConnectionPromise.onSuccessContinueWith(function (connection) { connection.dispose(); }); } }; DialogServiceAdapter.prototype.processTypeSpecificMessages = function (connectionMessage, successCallback, errorCallBack) { var resultProps = new Exports_3.PropertyCollection(); if (connectionMessage.messageType === Exports_2.MessageType.Text) { resultProps.setProperty(Exports_3.PropertyId.SpeechServiceResponse_JsonResult, connectionMessage.textBody); } var result; switch (connectionMessage.path.toLowerCase()) { case "speech.phrase": var speechPhrase = Exports_4.SimpleSpeechPhrase.fromJSON(connectionMessage.textBody); this.privDialogRequestSession.onPhraseRecognized(this.privDialogRequestSession.currentTurnAudioOffset + speechPhrase.Offset + speechPhrase.Duration); if (speechPhrase.RecognitionStatus === Exports_4.RecognitionStatus.Success) { var args = this.fireEventForResult(speechPhrase, resultProps); if (!!this.privDialogServiceConnector.recognized) { try { this.privDialogServiceConnector.recognized(this.privDialogServiceConnector, args); /* tslint:disable:no-empty */ } catch (error) { // Not going to let errors in the event handler // trip things up. } } // report result to promise. if (!!this.privSuccessCallback) { try { this.privSuccessCallback(args.result); } catch (e) { if (!!errorCallBack) { errorCallBack(e); } } // Only invoke the call back once. // and if it's successful don't invoke the // error after that. this.privSuccessCallback = undefined; errorCallBack = undefined; } } break; case "speech.hypothesis": var hypothesis = Exports_4.SpeechHypothesis.fromJSON(connectionMessage.textBody); var offset = hypothesis.Offset + this.privDialogRequestSession.currentTurnAudioOffset; result = new Exports_3.SpeechRecognitionResult(this.privDialogRequestSession.requestId, Exports_3.ResultReason.RecognizingSpeech, hypothesis.Text, hypothesis.Duration, offset, undefined, connectionMessage.textBody, resultProps); this.privDialogRequestSession.onHypothesis(offset); var ev = new Exports_3.SpeechRecognitionEventArgs(result, hypothesis.Duration, this.privDialogRequestSession.sessionId); if (!!this.privDialogServiceConnector.recognizing) { try { this.privDialogServiceConnector.recognizing(this.privDialogServiceConnector, ev); /* tslint:disable:no-empty */ } catch (error) { // Not going to let errors in the event handler // trip things up. } } break; case "audio": { var audioRequestId = connectionMessage.requestId.toUpperCase(); var turn = this.privTurnStateManager.GetTurn(audioRequestId); try { // Empty binary message signals end of stream. if (!connectionMessage.binaryBody) { turn.endAudioStream(); } else { turn.audioStream.write(connectionMessage.binaryBody); } } catch (error) { // Not going to let errors in the event handler // trip things up. } } break; case "response": { var responseRequestId = connectionMessage.requestId.toUpperCase(); var activityPayload = ActivityResponsePayload_1.ActivityPayloadResponse.fromJSON(connectionMessage.textBody); var turn = this.privTurnStateManager.GetTurn(responseRequestId); // update the conversation Id if (activityPayload.conversationId) { var updateAgentConfig = this.agentConfig.get(); updateAgentConfig.botInfo.conversationId = activityPayload.conversationId; this.agentConfig.set(updateAgentConfig); } var pullAudioOutputStream = turn.processActivityPayload(activityPayload); var activity = new Exports_3.ActivityReceivedEventArgs(activityPayload.messagePayload, pullAudioOutputStream); if (!!this.privDialogServiceConnector.activityReceived) { try { this.privDialogServiceConnector.activityReceived(this.privDialogServiceConnector, activity); /* tslint:disable:no-empty */ } catch (error) { // Not going to let errors in the event handler // trip things up. } } } break; default: break; } }; // Cancels recognition. DialogServiceAdapter.prototype.cancelRecognition = function (sessionId, requestId, cancellationReason, errorCode, error, cancelRecoCallback) { this.terminateMessageLoop = true; if (!!this.privDialogRequestSession.isRecognizing) { this.privDialogRequestSession.onStopRecognizing(); } if (!!this.privDialogServiceConnector.canceled) { var properties = new Exports_3.PropertyCollection(); properties.setProperty(Exports_4.CancellationErrorCodePropertyName, Exports_3.CancellationErrorCode[errorCode]); var cancelEvent = new Exports_3.SpeechRecognitionCanceledEventArgs(cancellationReason, error, errorCode, undefined, sessionId); try { this.privDialogServiceConnector.canceled(this.privDialogServiceConnector, cancelEvent); /* tslint:disable:no-empty */ } catch (_a) { } if (!!cancelRecoCallback) { var result = new Exports_3.SpeechRecognitionResult(undefined, // ResultId Exports_3.ResultReason.Canceled, undefined, // Text undefined, // Druation undefined, // Offset error, undefined, // Json properties); try { cancelRecoCallback(result); /* tslint:disable:no-empty */ } catch (_b) { } } } }; // Establishes a websocket connection to the end point. DialogServiceAdapter.prototype.dialogConnectImpl = function (isUnAuthorized) { var _this = this; if (isUnAuthorized === void 0) { isUnAuthorized = false; } if (this.privDialogConnectionPromise) { if (this.privDialogConnectionPromise.result().isCompleted && (this.privDialogConnectionPromise.result().isError || this.privDialogConnectionPromise.result().result.state() === Exports_2.ConnectionState.Disconnected)) { this.agentConfigSent = false; this.privDialogConnectionPromise = null; } else { return this.privDialogConnectionPromise; } } this.privDialogAuthFetchEventId = Exports_2.createNoDashGuid(); // keep the connectionId for reconnect events if (this.privConnectionId === undefined) { this.privConnectionId = Exports_2.createNoDashGuid(); } this.privDialogRequestSession.onPreConnectionStart(this.privDialogAuthFetchEventId, this.privConnectionId); var authPromise = isUnAuthorized ? this.privDialogAuthentication.fetchOnExpiry(this.privDialogAuthFetchEventId) : this.privDialogAuthentication.fetch(this.privDialogAuthFetchEventId); this.privDialogConnectionPromise = authPromise .continueWithPromise(function (result) { if (result.isError) { _this.privDialogRequestSession.onAuthCompleted(true, result.error); throw new Error(result.error); } else { _this.privDialogRequestSession.onAuthCompleted(false); } var connection = _this.privDialogConnectionFactory.create(_this.privRecognizerConfig, result.result, _this.privConnectionId); _this.privDialogRequestSession.listenForServiceTelemetry(connection.events); // Attach to the underlying event. No need to hold onto the detach pointers as in the event the connection goes away, // it'll stop sending events. connection.events.attach(function (event) { _this.connectionEvents.onEvent(event); }); return connection.open().onSuccessContinueWithPromise(function (response) { if (response.statusCode === 200) { _this.privDialogRequestSession.onPreConnectionStart(_this.privDialogAuthFetchEventId, _this.privConnectionId); _this.privDialogRequestSession.onConnectionEstablishCompleted(response.statusCode); return Exports_2.PromiseHelper.fromResult(connection); } else if (response.statusCode === 403 && !isUnAuthorized) { return _this.dialogConnectImpl(true); } else { _this.privDialogRequestSession.onConnectionEstablishCompleted(response.statusCode, response.reason); return Exports_2.PromiseHelper.fromError("Unable to contact server. StatusCode: " + response.statusCode + ", " + _this.privRecognizerConfig.parameters.getProperty(Exports_3.PropertyId.SpeechServiceConnection_Endpoint) + " Reason: " + response.reason); } }); }); this.privConnectionLoop = this.startMessageLoop(); return this.privDialogConnectionPromise; }; DialogServiceAdapter.prototype.startMessageLoop = function () { var _this = this; this.terminateMessageLoop = false; var messageRetrievalPromise = this.receiveDialogMessageOverride(); return messageRetrievalPromise.on(function (r) { return true; }, function (error) { _this.cancelRecognition(_this.privDialogRequestSession.sessionId, _this.privDialogRequestSession.requestId, Exports_3.CancellationReason.Error, Exports_3.CancellationErrorCode.RuntimeError, error, _this.privSuccessCallback); }); }; // Takes an established websocket connection to the endpoint and sends speech configuration information. DialogServiceAdapter.prototype.configConnection = function () { var _this = this; if (this.privConnectionConfigPromise) { if (this.privConnectionConfigPromise.result().isCompleted && (this.privConnectionConfigPromise.result().isError || this.privConnectionConfigPromise.result().result.state() === Exports_2.ConnectionState.Disconnected)) { this.privConnectionConfigPromise = null; return this.configConnection(); } else { return this.privConnectionConfigPromise; } } this.privConnectionConfigPromise = this.dialogConnectImpl().onSuccessContinueWithPromise(function (connection) { return _this.sendSpeechServiceConfig(connection, _this.privDialogRequestSession, _this.privRecognizerConfig.SpeechServiceConfig.serialize()) .onSuccessContinueWithPromise(function (_) { return _this.sendAgentConfig(connection).onSuccessContinueWith(function (_) { return connection; }); }); }); return this.privConnectionConfigPromise; }; DialogServiceAdapter.prototype.sendPreAudioMessages = function () { var _this = this; this.fetchDialogConnection().onSuccessContinueWith(function (connection) { _this.sendAgentContext(connection); }); }; DialogServiceAdapter.prototype.fireEventForResult = function (serviceResult, properties) { var resultReason = Exports_4.EnumTranslation.implTranslateRecognitionResult(serviceResult.RecognitionStatus); var offset = serviceResult.Offset + this.privDialogRequestSession.currentTurnAudioOffset; var result = new Exports_3.SpeechRecognitionResult(this.privDialogRequestSession.requestId, resultReason, serviceResult.DisplayText, serviceResult.Duration, offset, undefined, JSON.stringify(serviceResult), properties); var ev = new Exports_3.SpeechRecognitionEventArgs(result, offset, this.privDialogRequestSession.sessionId); return ev; }; return DialogServiceAdapter; }(Exports_4.ServiceRecognizerBase)); exports.DialogServiceAdapter = DialogServiceAdapter; //# sourceMappingURL=DialogServiceAdapter.js.map