UNPKG

@euirim/microsoft-cognitiveservices-speech-sdk

Version:
542 lines (540 loc) 31.6 kB
// Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT license. import { ReplayableAudioNode } from "../common.browser/Exports"; import { ConnectionState, createGuid, createNoDashGuid, Deferred, MessageType, PromiseHelper, } from "../common/Exports"; import { ActivityReceivedEventArgs, CancellationErrorCode, CancellationReason, PropertyCollection, PropertyId, RecognitionEventArgs, ResultReason, SessionEventArgs, SpeechRecognitionCanceledEventArgs, SpeechRecognitionEventArgs, SpeechRecognitionResult, } from "../sdk/Exports"; import { DialogServiceTurnStateManager } from "./DialogServiceTurnStateManager"; import { CancellationErrorCodePropertyName, EnumTranslation, RecognitionStatus, RequestSession, ServiceRecognizerBase, SimpleSpeechPhrase, SpeechDetected, SpeechHypothesis, } from "./Exports"; import { ActivityPayloadResponse } from "./ServiceMessages/ActivityResponsePayload"; import { SpeechConnectionMessage } from "./SpeechConnectionMessage.Internal"; export class DialogServiceAdapter extends ServiceRecognizerBase { constructor(authentication, connectionFactory, audioSource, recognizerConfig, dialogServiceConnector) { super(authentication, connectionFactory, audioSource, recognizerConfig, dialogServiceConnector); this.sendMessage = (message) => { const interactionGuid = createGuid(); const requestId = createNoDashGuid(); const agentMessage = { context: { interactionId: interactionGuid }, messagePayload: message, version: 0.5 }; const agentMessageJson = JSON.stringify(agentMessage); this.fetchDialogConnection().onSuccessContinueWith((connection) => { connection.send(new SpeechConnectionMessage(MessageType.Text, "agent", requestId, "application/json", agentMessageJson)); }); }; this.listenOnce = (recoMode, successCallback, errorCallback) => { this.privRecognizerConfig.recognitionMode = recoMode; this.privDialogRequestSession.startNewRecognition(); this.privDialogRequestSession.listenForServiceTelemetry(this.privDialogAudioSource.events); // Start the connection to the service. The promise this will create is stored and will be used by configureConnection(). this.dialogConnectImpl(); this.sendPreAudioMessages(); this.privSuccessCallback = successCallback; return this.privDialogAudioSource .attach(this.privDialogRequestSession.audioNodeId) .continueWithPromise((result) => { let audioNode; if (result.isError) { this.cancelRecognition(this.privDialogRequestSession.sessionId, this.privDialogRequestSession.requestId, CancellationReason.Error, CancellationErrorCode.ConnectionFailure, result.error, successCallback); return PromiseHelper.fromError(result.error); } else { audioNode = new ReplayableAudioNode(result.result, this.privDialogAudioSource.format); this.privDialogRequestSession.onAudioSourceAttachCompleted(audioNode, false); } return this.privDialogAudioSource.deviceInfo.onSuccessContinueWithPromise((deviceInfo) => { this.privRecognizerConfig.SpeechServiceConfig.Context.audio = { source: deviceInfo }; return this.configConnection() .on((_) => { const sessionStartEventArgs = new SessionEventArgs(this.privDialogRequestSession.sessionId); if (!!this.privRecognizer.sessionStarted) { this.privRecognizer.sessionStarted(this.privRecognizer, sessionStartEventArgs); } const audioSendPromise = this.sendAudio(audioNode); // /* tslint:disable:no-empty */ audioSendPromise.on((_) => { }, (error) => { this.cancelRecognition(this.privDialogRequestSession.sessionId, this.privDialogRequestSession.requestId, CancellationReason.Error, CancellationErrorCode.RuntimeError, error, successCallback); }); }, (error) => { this.cancelRecognition(this.privDialogRequestSession.sessionId, this.privDialogRequestSession.requestId, CancellationReason.Error, CancellationErrorCode.ConnectionFailure, error, successCallback); }).continueWithPromise((result) => { if (result.isError) { return PromiseHelper.fromError(result.error); } else { return PromiseHelper.fromResult(true); } }); }); }); }; this.sendAudio = (audioStreamNode) => { // NOTE: Home-baked promises crash ios safari during the invocation // of the error callback chain (looks like the recursion is way too deep, and // it blows up the stack). The following construct is a stop-gap that does not // bubble the error up the callback chain and hence circumvents this problem. // TODO: rewrite with ES6 promises. const deferred = new Deferred(); // The time we last sent data to the service. let nextSendTime = Date.now(); const audioFormat = this.privDialogAudioSource.format; // Max amount to send before we start to throttle const fastLaneSizeMs = this.privRecognizerConfig.parameters.getProperty("SPEECH-TransmitLengthBeforThrottleMs", "5000"); const maxSendUnthrottledBytes = audioFormat.avgBytesPerSec / 1000 * parseInt(fastLaneSizeMs, 10); const startRecogNumber = this.privDialogRequestSession.recogNumber; const readAndUploadCycle = () => { // If speech is done, stop sending audio. if (!this.privDialogIsDisposed && !this.privDialogRequestSession.isSpeechEnded && this.privDialogRequestSession.isRecognizing && this.privDialogRequestSession.recogNumber === startRecogNumber) { this.fetchDialogConnection().on((connection) => { audioStreamNode.read().on((audioStreamChunk) => { // we have a new audio chunk to upload. if (this.privDialogRequestSession.isSpeechEnded) { // If service already recognized audio end then don't send any more audio deferred.resolve(true); return; } let payload; let sendDelay; if (audioStreamChunk.isEnd) { payload = null; sendDelay = 0; } else { payload = audioStreamChunk.buffer; this.privDialogRequestSession.onAudioSent(payload.byteLength); if (maxSendUnthrottledBytes >= this.privDialogRequestSession.bytesSent) { sendDelay = 0; } else { sendDelay = Math.max(0, nextSendTime - Date.now()); } } // Are we ready to send, or need we delay more? setTimeout(() => { if (payload !== null) { nextSendTime = Date.now() + (payload.byteLength * 1000 / (audioFormat.avgBytesPerSec * 2)); } const uploaded = connection.send(new SpeechConnectionMessage(MessageType.Binary, "audio", this.privDialogRequestSession.requestId, null, payload)); if (!audioStreamChunk.isEnd) { uploaded.continueWith((_) => { // Regardless of success or failure, schedule the next upload. // If the underlying connection was broken, the next cycle will // get a new connection and re-transmit missing audio automatically. readAndUploadCycle(); }); } else { // the audio stream has been closed, no need to schedule next // read-upload cycle. this.privDialogRequestSession.onSpeechEnded(); deferred.resolve(true); } }, sendDelay); }, (error) => { if (this.privDialogRequestSession.isSpeechEnded) { // For whatever reason, Reject is used to remove queue subscribers inside // the Queue.DrainAndDispose invoked from DetachAudioNode down below, which // means that sometimes things can be rejected in normal circumstances, without // any errors. deferred.resolve(true); // TODO: remove the argument, it's is completely meaningless. } else { // Only reject, if there was a proper error. deferred.reject(error); } }); }, (error) => { deferred.reject(error); }); } }; readAndUploadCycle(); return deferred.promise(); }; this.receiveDialogMessageOverride = (successCallback, errorCallBack) => { // we won't rely on the cascading promises of the connection since we want to continually be available to receive messages const communicationCustodian = new Deferred(); this.fetchDialogConnection().on((connection) => { return connection.read() .onSuccessContinueWithPromise((message) => { const isDisposed = this.isDisposed(); const terminateMessageLoop = (!this.isDisposed() && this.terminateMessageLoop); if (isDisposed || terminateMessageLoop) { // We're done. communicationCustodian.resolve(undefined); return PromiseHelper.fromResult(undefined); } if (!message) { return this.receiveDialogMessageOverride(); } const connectionMessage = SpeechConnectionMessage.fromConnectionMessage(message); switch (connectionMessage.path.toLowerCase()) { case "turn.start": { const turnRequestId = connectionMessage.requestId.toUpperCase(); const audioSessionReqId = this.privDialogRequestSession.requestId.toUpperCase(); // turn started by the service if (turnRequestId !== audioSessionReqId) { this.privTurnStateManager.StartTurn(turnRequestId); } } break; case "speech.startdetected": const speechStartDetected = SpeechDetected.fromJSON(connectionMessage.textBody); const speechStartEventArgs = new RecognitionEventArgs(speechStartDetected.Offset, this.privDialogRequestSession.sessionId); if (!!this.privRecognizer.speechStartDetected) { this.privRecognizer.speechStartDetected(this.privRecognizer, speechStartEventArgs); } break; case "speech.enddetected": let json; if (connectionMessage.textBody.length > 0) { json = connectionMessage.textBody; } else { // If the request was empty, the JSON returned is empty. json = "{ Offset: 0 }"; } const speechStopDetected = SpeechDetected.fromJSON(json); this.privDialogRequestSession.onServiceRecognized(speechStopDetected.Offset + this.privDialogRequestSession.currentTurnAudioOffset); const speechStopEventArgs = new RecognitionEventArgs(speechStopDetected.Offset + this.privDialogRequestSession.currentTurnAudioOffset, this.privDialogRequestSession.sessionId); if (!!this.privRecognizer.speechEndDetected) { this.privRecognizer.speechEndDetected(this.privRecognizer, speechStopEventArgs); } break; case "turn.end": { const turnEndRequestId = connectionMessage.requestId.toUpperCase(); const audioSessionReqId = this.privDialogRequestSession.requestId.toUpperCase(); // turn started by the service if (turnEndRequestId !== audioSessionReqId) { this.privTurnStateManager.CompleteTurn(turnEndRequestId); } else { // Audio session turn const sessionStopEventArgs = new SessionEventArgs(this.privDialogRequestSession.sessionId); this.privDialogRequestSession.onServiceTurnEndResponse(false); if (this.privDialogRequestSession.isSpeechEnded) { if (!!this.privRecognizer.sessionStopped) { this.privRecognizer.sessionStopped(this.privRecognizer, sessionStopEventArgs); } } } } break; default: this.processTypeSpecificMessages(connectionMessage, successCallback, errorCallBack); } return this.receiveDialogMessageOverride(); }); }, (error) => { this.terminateMessageLoop = true; }); return communicationCustodian.promise(); }; this.fetchDialogConnection = () => { return this.configConnection(); }; this.sendAgentConfig = (connection) => { if (this.agentConfig && !this.agentConfigSent) { const agentConfigJson = this.agentConfig.toJsonString(); this.agentConfigSent = true; return connection.send(new SpeechConnectionMessage(MessageType.Text, "agent.config", this.privDialogRequestSession.requestId, "application/json", agentConfigJson)); } return PromiseHelper.fromResult(true); }; this.sendAgentContext = (connection) => { const guid = createGuid(); const agentContext = { channelData: "", context: { interactionId: guid }, version: 0.5 }; const agentContextJson = JSON.stringify(agentContext); return connection.send(new SpeechConnectionMessage(MessageType.Text, "speech.agent.context", this.privDialogRequestSession.requestId, "application/json", agentContextJson)); }; this.privDialogServiceConnector = dialogServiceConnector; this.privDialogAuthentication = authentication; this.receiveMessageOverride = this.receiveDialogMessageOverride; this.privTurnStateManager = new DialogServiceTurnStateManager(); this.recognizeOverride = this.listenOnce; this.connectImplOverride = this.dialogConnectImpl; this.configConnectionOverride = this.configConnection; this.fetchConnectionOverride = this.fetchDialogConnection; this.disconnectOverride = this.privDisconnect; this.privDialogAudioSource = audioSource; this.privDialogRequestSession = new RequestSession(audioSource.id()); this.privDialogConnectionFactory = connectionFactory; this.privDialogIsDisposed = false; this.agentConfigSent = false; } isDisposed() { return this.privDialogIsDisposed; } dispose(reason) { this.privDialogIsDisposed = true; if (this.privConnectionConfigPromise) { this.privConnectionConfigPromise.onSuccessContinueWith((connection) => { connection.dispose(reason); }); } } privDisconnect() { this.cancelRecognition(this.privDialogRequestSession.sessionId, this.privDialogRequestSession.requestId, CancellationReason.Error, CancellationErrorCode.NoError, "Disconnecting", undefined); this.terminateMessageLoop = true; this.agentConfigSent = false; if (this.privDialogConnectionPromise.result().isCompleted) { if (!this.privDialogConnectionPromise.result().isError) { this.privDialogConnectionPromise.result().result.dispose(); this.privDialogConnectionPromise = null; } } else { this.privDialogConnectionPromise.onSuccessContinueWith((connection) => { connection.dispose(); }); } } processTypeSpecificMessages(connectionMessage, successCallback, errorCallBack) { const resultProps = new PropertyCollection(); if (connectionMessage.messageType === MessageType.Text) { resultProps.setProperty(PropertyId.SpeechServiceResponse_JsonResult, connectionMessage.textBody); } let result; switch (connectionMessage.path.toLowerCase()) { case "speech.phrase": const speechPhrase = SimpleSpeechPhrase.fromJSON(connectionMessage.textBody); this.privDialogRequestSession.onPhraseRecognized(this.privDialogRequestSession.currentTurnAudioOffset + speechPhrase.Offset + speechPhrase.Duration); if (speechPhrase.RecognitionStatus === RecognitionStatus.Success) { const args = this.fireEventForResult(speechPhrase, resultProps); if (!!this.privDialogServiceConnector.recognized) { try { this.privDialogServiceConnector.recognized(this.privDialogServiceConnector, args); /* tslint:disable:no-empty */ } catch (error) { // Not going to let errors in the event handler // trip things up. } } // report result to promise. if (!!this.privSuccessCallback) { try { this.privSuccessCallback(args.result); } catch (e) { if (!!errorCallBack) { errorCallBack(e); } } // Only invoke the call back once. // and if it's successful don't invoke the // error after that. this.privSuccessCallback = undefined; errorCallBack = undefined; } } break; case "speech.hypothesis": const hypothesis = SpeechHypothesis.fromJSON(connectionMessage.textBody); const offset = hypothesis.Offset + this.privDialogRequestSession.currentTurnAudioOffset; result = new SpeechRecognitionResult(this.privDialogRequestSession.requestId, ResultReason.RecognizingSpeech, hypothesis.Text, hypothesis.Duration, offset, undefined, connectionMessage.textBody, resultProps); this.privDialogRequestSession.onHypothesis(offset); const ev = new SpeechRecognitionEventArgs(result, hypothesis.Duration, this.privDialogRequestSession.sessionId); if (!!this.privDialogServiceConnector.recognizing) { try { this.privDialogServiceConnector.recognizing(this.privDialogServiceConnector, ev); /* tslint:disable:no-empty */ } catch (error) { // Not going to let errors in the event handler // trip things up. } } break; case "audio": { const audioRequestId = connectionMessage.requestId.toUpperCase(); const turn = this.privTurnStateManager.GetTurn(audioRequestId); try { // Empty binary message signals end of stream. if (!connectionMessage.binaryBody) { turn.endAudioStream(); } else { turn.audioStream.write(connectionMessage.binaryBody); } } catch (error) { // Not going to let errors in the event handler // trip things up. } } break; case "response": { const responseRequestId = connectionMessage.requestId.toUpperCase(); const activityPayload = ActivityPayloadResponse.fromJSON(connectionMessage.textBody); const turn = this.privTurnStateManager.GetTurn(responseRequestId); // update the conversation Id if (activityPayload.conversationId) { const updateAgentConfig = this.agentConfig.get(); updateAgentConfig.botInfo.conversationId = activityPayload.conversationId; this.agentConfig.set(updateAgentConfig); } const pullAudioOutputStream = turn.processActivityPayload(activityPayload); const activity = new ActivityReceivedEventArgs(activityPayload.messagePayload, pullAudioOutputStream); if (!!this.privDialogServiceConnector.activityReceived) { try { this.privDialogServiceConnector.activityReceived(this.privDialogServiceConnector, activity); /* tslint:disable:no-empty */ } catch (error) { // Not going to let errors in the event handler // trip things up. } } } break; default: break; } } // Cancels recognition. cancelRecognition(sessionId, requestId, cancellationReason, errorCode, error, cancelRecoCallback) { this.terminateMessageLoop = true; if (!!this.privDialogRequestSession.isRecognizing) { this.privDialogRequestSession.onStopRecognizing(); } if (!!this.privDialogServiceConnector.canceled) { const properties = new PropertyCollection(); properties.setProperty(CancellationErrorCodePropertyName, CancellationErrorCode[errorCode]); const cancelEvent = new SpeechRecognitionCanceledEventArgs(cancellationReason, error, errorCode, undefined, sessionId); try { this.privDialogServiceConnector.canceled(this.privDialogServiceConnector, cancelEvent); /* tslint:disable:no-empty */ } catch (_a) { } if (!!cancelRecoCallback) { const result = new SpeechRecognitionResult(undefined, // ResultId ResultReason.Canceled, undefined, // Text undefined, // Druation undefined, // Offset error, undefined, // Json properties); try { cancelRecoCallback(result); /* tslint:disable:no-empty */ } catch (_b) { } } } } // Establishes a websocket connection to the end point. dialogConnectImpl(isUnAuthorized = false) { if (this.privDialogConnectionPromise) { if (this.privDialogConnectionPromise.result().isCompleted && (this.privDialogConnectionPromise.result().isError || this.privDialogConnectionPromise.result().result.state() === ConnectionState.Disconnected)) { this.agentConfigSent = false; this.privDialogConnectionPromise = null; } else { return this.privDialogConnectionPromise; } } this.privDialogAuthFetchEventId = createNoDashGuid(); // keep the connectionId for reconnect events if (this.privConnectionId === undefined) { this.privConnectionId = createNoDashGuid(); } this.privDialogRequestSession.onPreConnectionStart(this.privDialogAuthFetchEventId, this.privConnectionId); const authPromise = isUnAuthorized ? this.privDialogAuthentication.fetchOnExpiry(this.privDialogAuthFetchEventId) : this.privDialogAuthentication.fetch(this.privDialogAuthFetchEventId); this.privDialogConnectionPromise = authPromise .continueWithPromise((result) => { if (result.isError) { this.privDialogRequestSession.onAuthCompleted(true, result.error); throw new Error(result.error); } else { this.privDialogRequestSession.onAuthCompleted(false); } const connection = this.privDialogConnectionFactory.create(this.privRecognizerConfig, result.result, this.privConnectionId); this.privDialogRequestSession.listenForServiceTelemetry(connection.events); // Attach to the underlying event. No need to hold onto the detach pointers as in the event the connection goes away, // it'll stop sending events. connection.events.attach((event) => { this.connectionEvents.onEvent(event); }); return connection.open().onSuccessContinueWithPromise((response) => { if (response.statusCode === 200) { this.privDialogRequestSession.onPreConnectionStart(this.privDialogAuthFetchEventId, this.privConnectionId); this.privDialogRequestSession.onConnectionEstablishCompleted(response.statusCode); return PromiseHelper.fromResult(connection); } else if (response.statusCode === 403 && !isUnAuthorized) { return this.dialogConnectImpl(true); } else { this.privDialogRequestSession.onConnectionEstablishCompleted(response.statusCode, response.reason); return PromiseHelper.fromError(`Unable to contact server. StatusCode: ${response.statusCode}, ${this.privRecognizerConfig.parameters.getProperty(PropertyId.SpeechServiceConnection_Endpoint)} Reason: ${response.reason}`); } }); }); this.privConnectionLoop = this.startMessageLoop(); return this.privDialogConnectionPromise; } startMessageLoop() { this.terminateMessageLoop = false; const messageRetrievalPromise = this.receiveDialogMessageOverride(); return messageRetrievalPromise.on((r) => { return true; }, (error) => { this.cancelRecognition(this.privDialogRequestSession.sessionId, this.privDialogRequestSession.requestId, CancellationReason.Error, CancellationErrorCode.RuntimeError, error, this.privSuccessCallback); }); } // Takes an established websocket connection to the endpoint and sends speech configuration information. configConnection() { if (this.privConnectionConfigPromise) { if (this.privConnectionConfigPromise.result().isCompleted && (this.privConnectionConfigPromise.result().isError || this.privConnectionConfigPromise.result().result.state() === ConnectionState.Disconnected)) { this.privConnectionConfigPromise = null; return this.configConnection(); } else { return this.privConnectionConfigPromise; } } this.privConnectionConfigPromise = this.dialogConnectImpl().onSuccessContinueWithPromise((connection) => { return this.sendSpeechServiceConfig(connection, this.privDialogRequestSession, this.privRecognizerConfig.SpeechServiceConfig.serialize()) .onSuccessContinueWithPromise((_) => { return this.sendAgentConfig(connection).onSuccessContinueWith((_) => { return connection; }); }); }); return this.privConnectionConfigPromise; } sendPreAudioMessages() { this.fetchDialogConnection().onSuccessContinueWith((connection) => { this.sendAgentContext(connection); }); } fireEventForResult(serviceResult, properties) { const resultReason = EnumTranslation.implTranslateRecognitionResult(serviceResult.RecognitionStatus); const offset = serviceResult.Offset + this.privDialogRequestSession.currentTurnAudioOffset; const result = new SpeechRecognitionResult(this.privDialogRequestSession.requestId, resultReason, serviceResult.DisplayText, serviceResult.Duration, offset, undefined, JSON.stringify(serviceResult), properties); const ev = new SpeechRecognitionEventArgs(result, offset, this.privDialogRequestSession.sessionId); return ev; } } //# sourceMappingURL=DialogServiceAdapter.js.map