UNPKG

@euirim/microsoft-cognitiveservices-speech-sdk

Version:
478 lines (476 loc) 26.3 kB
// Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT license. import { ReplayableAudioNode } from "../common.browser/Exports"; import { ArgumentNullError, ConnectionState, createNoDashGuid, Deferred, EventSource, MessageType, PromiseHelper, } from "../common/Exports"; import { CancellationErrorCode, CancellationReason, PropertyId, RecognitionEventArgs, SessionEventArgs, } from "../sdk/Exports"; import { AgentConfig, DynamicGrammarBuilder, RequestSession, SpeechContext, SpeechDetected, } from "./Exports"; import { SpeechConnectionMessage } from "./SpeechConnectionMessage.Internal"; export class ServiceRecognizerBase { constructor(authentication, connectionFactory, audioSource, recognizerConfig, recognizer) { this.recognizeOverride = undefined; this.disconnectOverride = undefined; this.sendTelemetryData = () => { const telemetryData = this.privRequestSession.getTelemetry(); // console.warn("Telem: " + telemetryData); if (ServiceRecognizerBase.telemetryDataEnabled !== true || this.privIsDisposed || null === telemetryData) { return PromiseHelper.fromResult(true); } if (!!ServiceRecognizerBase.telemetryData) { try { ServiceRecognizerBase.telemetryData(telemetryData); /* tslint:disable:no-empty */ } catch (_a) { } } return this.fetchConnection().onSuccessContinueWith((connection) => { return connection.send(new SpeechConnectionMessage(MessageType.Text, "telemetry", this.privRequestSession.requestId, "application/json", telemetryData)); }); }; this.receiveMessageOverride = undefined; this.receiveMessage = (successCallback, errorCallBack) => { return this.fetchConnection().on((connection) => { return connection.read() .onSuccessContinueWithPromise((message) => { if (this.receiveMessageOverride !== undefined) { return this.receiveMessageOverride(); } if (this.privIsDisposed || !this.privRequestSession.isRecognizing) { // We're done. return PromiseHelper.fromResult(undefined); } // indicates we are draining the queue and it came with no message; if (!message) { if (!this.privRequestSession.isRecognizing) { return PromiseHelper.fromResult(true); } else { return this.receiveMessage(successCallback, errorCallBack); } } const connectionMessage = SpeechConnectionMessage.fromConnectionMessage(message); if (connectionMessage.requestId.toLowerCase() === this.privRequestSession.requestId.toLowerCase()) { switch (connectionMessage.path.toLowerCase()) { case "turn.start": this.privMustReportEndOfStream = true; break; case "speech.startdetected": const speechStartDetected = SpeechDetected.fromJSON(connectionMessage.textBody); const speechStartEventArgs = new RecognitionEventArgs(speechStartDetected.Offset, this.privRequestSession.sessionId); if (!!this.privRecognizer.speechStartDetected) { this.privRecognizer.speechStartDetected(this.privRecognizer, speechStartEventArgs); } break; case "speech.enddetected": let json; if (connectionMessage.textBody.length > 0) { json = connectionMessage.textBody; } else { // If the request was empty, the JSON returned is empty. json = "{ Offset: 0 }"; } const speechStopDetected = SpeechDetected.fromJSON(json); // Only shrink the buffers for continuous recognition. // For single shot, the speech.phrase message will come after the speech.end and it should own buffer shrink. if (this.privRecognizerConfig.isContinuousRecognition) { this.privRequestSession.onServiceRecognized(speechStopDetected.Offset + this.privRequestSession.currentTurnAudioOffset); } const speechStopEventArgs = new RecognitionEventArgs(speechStopDetected.Offset + this.privRequestSession.currentTurnAudioOffset, this.privRequestSession.sessionId); if (!!this.privRecognizer.speechEndDetected) { this.privRecognizer.speechEndDetected(this.privRecognizer, speechStopEventArgs); } break; case "turn.end": this.sendTelemetryData(); if (this.privRequestSession.isSpeechEnded && this.privMustReportEndOfStream) { this.privMustReportEndOfStream = false; this.cancelRecognitionLocal(CancellationReason.EndOfStream, CancellationErrorCode.NoError, undefined, successCallback); } const sessionStopEventArgs = new SessionEventArgs(this.privRequestSession.sessionId); this.privRequestSession.onServiceTurnEndResponse(this.privRecognizerConfig.isContinuousRecognition); if (!this.privRecognizerConfig.isContinuousRecognition || this.privRequestSession.isSpeechEnded) { if (!!this.privRecognizer.sessionStopped) { this.privRecognizer.sessionStopped(this.privRecognizer, sessionStopEventArgs); } return PromiseHelper.fromResult(true); } else { this.fetchConnection().onSuccessContinueWith((connection) => { this.sendSpeechContext(connection); }); } default: this.processTypeSpecificMessages(connectionMessage, successCallback, errorCallBack); } } return this.receiveMessage(successCallback, errorCallBack); }); }, (error) => { }); }; this.sendSpeechContext = (connection) => { const speechContextJson = this.speechContext.toJSON(); if (speechContextJson) { return connection.send(new SpeechConnectionMessage(MessageType.Text, "speech.context", this.privRequestSession.requestId, "application/json", speechContextJson)); } return PromiseHelper.fromResult(true); }; this.connectImplOverride = undefined; this.configConnectionOverride = undefined; this.fetchConnectionOverride = undefined; this.sendSpeechServiceConfig = (connection, requestSession, SpeechServiceConfigJson) => { // filter out anything that is not required for the service to work. if (ServiceRecognizerBase.telemetryDataEnabled !== true) { const withTelemetry = JSON.parse(SpeechServiceConfigJson); const replacement = { context: { system: withTelemetry.context.system, }, }; SpeechServiceConfigJson = JSON.stringify(replacement); } if (SpeechServiceConfigJson) { // && this.privConnectionId !== this.privSpeechServiceConfigConnectionId) { this.privSpeechServiceConfigConnectionId = this.privConnectionId; return connection.send(new SpeechConnectionMessage(MessageType.Text, "speech.config", requestSession.requestId, "application/json", SpeechServiceConfigJson)); } return PromiseHelper.fromResult(true); }; this.sendAudio = (audioStreamNode) => { // NOTE: Home-baked promises crash ios safari during the invocation // of the error callback chain (looks like the recursion is way too deep, and // it blows up the stack). The following construct is a stop-gap that does not // bubble the error up the callback chain and hence circumvents this problem. // TODO: rewrite with ES6 promises. const deferred = new Deferred(); // The time we last sent data to the service. let nextSendTime = Date.now(); const audioFormat = this.privAudioSource.format; // Max amount to send before we start to throttle const fastLaneSizeMs = this.privRecognizerConfig.parameters.getProperty("SPEECH-TransmitLengthBeforThrottleMs", "5000"); const maxSendUnthrottledBytes = audioFormat.avgBytesPerSec / 1000 * parseInt(fastLaneSizeMs, 10); const startRecogNumber = this.privRequestSession.recogNumber; const readAndUploadCycle = () => { // If speech is done, stop sending audio. if (!this.privIsDisposed && !this.privRequestSession.isSpeechEnded && this.privRequestSession.isRecognizing && this.privRequestSession.recogNumber === startRecogNumber) { this.fetchConnection().on((connection) => { audioStreamNode.read().on((audioStreamChunk) => { // we have a new audio chunk to upload. if (this.privRequestSession.isSpeechEnded) { // If service already recognized audio end then don't send any more audio deferred.resolve(true); return; } let payload; let sendDelay; if (audioStreamChunk.isEnd) { payload = null; sendDelay = 0; } else { payload = audioStreamChunk.buffer; this.privRequestSession.onAudioSent(payload.byteLength); if (maxSendUnthrottledBytes >= this.privRequestSession.bytesSent) { sendDelay = 0; } else { sendDelay = Math.max(0, nextSendTime - Date.now()); } } // Are we ready to send, or need we delay more? setTimeout(() => { if (payload !== null) { nextSendTime = Date.now() + (payload.byteLength * 1000 / (audioFormat.avgBytesPerSec * 2)); } const uploaded = connection.send(new SpeechConnectionMessage(MessageType.Binary, "audio", this.privRequestSession.requestId, null, payload)); if (!audioStreamChunk.isEnd) { uploaded.continueWith((_) => { // Regardless of success or failure, schedule the next upload. // If the underlying connection was broken, the next cycle will // get a new connection and re-transmit missing audio automatically. readAndUploadCycle(); }); } else { // the audio stream has been closed, no need to schedule next // read-upload cycle. this.privRequestSession.onSpeechEnded(); deferred.resolve(true); } }, sendDelay); }, (error) => { if (this.privRequestSession.isSpeechEnded) { // For whatever reason, Reject is used to remove queue subscribers inside // the Queue.DrainAndDispose invoked from DetachAudioNode down below, which // means that sometimes things can be rejected in normal circumstances, without // any errors. deferred.resolve(true); // TODO: remove the argument, it's is completely meaningless. } else { // Only reject, if there was a proper error. deferred.reject(error); } }); }, (error) => { deferred.reject(error); }); } }; readAndUploadCycle(); return deferred.promise(); }; this.fetchConnection = () => { if (this.fetchConnectionOverride !== undefined) { return this.fetchConnectionOverride(); } return this.configureConnection(); }; if (!authentication) { throw new ArgumentNullError("authentication"); } if (!connectionFactory) { throw new ArgumentNullError("connectionFactory"); } if (!audioSource) { throw new ArgumentNullError("audioSource"); } if (!recognizerConfig) { throw new ArgumentNullError("recognizerConfig"); } this.privMustReportEndOfStream = false; this.privAuthentication = authentication; this.privConnectionFactory = connectionFactory; this.privAudioSource = audioSource; this.privRecognizerConfig = recognizerConfig; this.privIsDisposed = false; this.privRecognizer = recognizer; this.privRequestSession = new RequestSession(this.privAudioSource.id()); this.privConnectionEvents = new EventSource(); this.privDynamicGrammar = new DynamicGrammarBuilder(); this.privSpeechContext = new SpeechContext(this.privDynamicGrammar); this.privAgentConfig = new AgentConfig(); } get audioSource() { return this.privAudioSource; } get speechContext() { return this.privSpeechContext; } get dynamicGrammar() { return this.privDynamicGrammar; } get agentConfig() { return this.privAgentConfig; } isDisposed() { return this.privIsDisposed; } dispose(reason) { this.privIsDisposed = true; if (this.privConnectionConfigurationPromise) { this.privConnectionConfigurationPromise.onSuccessContinueWith((connection) => { connection.dispose(reason); }); } } get connectionEvents() { return this.privConnectionEvents; } get recognitionMode() { return this.privRecognizerConfig.recognitionMode; } recognize(recoMode, successCallback, errorCallBack) { if (this.recognizeOverride !== undefined) { return this.recognizeOverride(recoMode, successCallback, errorCallBack); } // Clear the existing configuration promise to force a re-transmission of config and context. this.privConnectionConfigurationPromise = null; this.privRecognizerConfig.recognitionMode = recoMode; this.privRequestSession.startNewRecognition(); this.privRequestSession.listenForServiceTelemetry(this.privAudioSource.events); // Start the connection to the service. The promise this will create is stored and will be used by configureConnection(). this.connectImpl(); return this.audioSource .attach(this.privRequestSession.audioNodeId) .continueWithPromise((result) => { let audioNode; if (result.isError) { this.cancelRecognitionLocal(CancellationReason.Error, CancellationErrorCode.ConnectionFailure, result.error, successCallback); return PromiseHelper.fromError(result.error); } else { audioNode = new ReplayableAudioNode(result.result, this.audioSource.format); this.privRequestSession.onAudioSourceAttachCompleted(audioNode, false); } return this.audioSource.deviceInfo.onSuccessContinueWithPromise((deviceInfo) => { this.privRecognizerConfig.SpeechServiceConfig.Context.audio = { source: deviceInfo }; return this.configureConnection() .on((_) => { const sessionStartEventArgs = new SessionEventArgs(this.privRequestSession.sessionId); if (!!this.privRecognizer.sessionStarted) { this.privRecognizer.sessionStarted(this.privRecognizer, sessionStartEventArgs); } const messageRetrievalPromise = this.receiveMessage(successCallback, errorCallBack); const audioSendPromise = this.sendAudio(audioNode); /* tslint:disable:no-empty */ audioSendPromise.on((_) => { }, (error) => { this.cancelRecognitionLocal(CancellationReason.Error, CancellationErrorCode.RuntimeError, error, successCallback); }); const completionPromise = PromiseHelper.whenAll([messageRetrievalPromise, audioSendPromise]); return completionPromise.on((r) => { return true; }, (error) => { this.cancelRecognitionLocal(CancellationReason.Error, CancellationErrorCode.RuntimeError, error, successCallback); }); }, (error) => { this.cancelRecognitionLocal(CancellationReason.Error, CancellationErrorCode.ConnectionFailure, error, successCallback); }).continueWithPromise((result) => { if (result.isError) { return PromiseHelper.fromError(result.error); } else { return PromiseHelper.fromResult(true); } }); }); }); } stopRecognizing() { if (this.privRequestSession.isRecognizing) { this.privRequestSession.onStopRecognizing(); this.sendTelemetryData(); this.audioSource.turnOff(); this.sendFinalAudio(); this.privRequestSession.dispose(); } } connect() { this.connectImpl().result(); } disconnect() { if (this.disconnectOverride !== undefined) { this.disconnectOverride(); return; } this.cancelRecognitionLocal(CancellationReason.Error, CancellationErrorCode.NoError, "Disconnecting", undefined); if (this.privConnectionPromise.result().isCompleted) { if (!this.privConnectionPromise.result().isError) { this.privConnectionPromise.result().result.dispose(); this.privConnectionPromise = null; } } else { this.privConnectionPromise.onSuccessContinueWith((connection) => { connection.dispose(); }); } } sendMessage(message) { } // Cancels recognition. cancelRecognitionLocal(cancellationReason, errorCode, error, cancelRecoCallback) { if (!!this.privRequestSession.isRecognizing) { this.privRequestSession.onStopRecognizing(); this.sendTelemetryData(); this.cancelRecognition(this.privRequestSession.sessionId, this.privRequestSession.requestId, cancellationReason, errorCode, error, cancelRecoCallback); } } // Establishes a websocket connection to the end point. connectImpl(isUnAuthorized = false) { if (this.connectImplOverride !== undefined) { return this.connectImplOverride(isUnAuthorized); } if (this.privConnectionPromise) { if (this.privConnectionPromise.result().isCompleted && (this.privConnectionPromise.result().isError || this.privConnectionPromise.result().result.state() === ConnectionState.Disconnected)) { this.privConnectionId = null; this.privConnectionPromise = null; return this.connectImpl(); } else { return this.privConnectionPromise; } } this.privAuthFetchEventId = createNoDashGuid(); this.privConnectionId = createNoDashGuid(); this.privRequestSession.onPreConnectionStart(this.privAuthFetchEventId, this.privConnectionId); const authPromise = isUnAuthorized ? this.privAuthentication.fetchOnExpiry(this.privAuthFetchEventId) : this.privAuthentication.fetch(this.privAuthFetchEventId); this.privConnectionPromise = authPromise .continueWithPromise((result) => { if (result.isError) { this.privRequestSession.onAuthCompleted(true, result.error); throw new Error(result.error); } else { this.privRequestSession.onAuthCompleted(false); } const connection = this.privConnectionFactory.create(this.privRecognizerConfig, result.result, this.privConnectionId); this.privRequestSession.listenForServiceTelemetry(connection.events); // Attach to the underlying event. No need to hold onto the detach pointers as in the event the connection goes away, // it'll stop sending events. connection.events.attach((event) => { this.connectionEvents.onEvent(event); }); return connection.open().onSuccessContinueWithPromise((response) => { if (response.statusCode === 200) { this.privRequestSession.onPreConnectionStart(this.privAuthFetchEventId, this.privConnectionId); this.privRequestSession.onConnectionEstablishCompleted(response.statusCode); return PromiseHelper.fromResult(connection); } else if (response.statusCode === 403 && !isUnAuthorized) { return this.connectImpl(true); } else { this.privRequestSession.onConnectionEstablishCompleted(response.statusCode, response.reason); return PromiseHelper.fromError(`Unable to contact server. StatusCode: ${response.statusCode}, ${this.privRecognizerConfig.parameters.getProperty(PropertyId.SpeechServiceConnection_Endpoint)} Reason: ${response.reason}`); } }); }); return this.privConnectionPromise; } sendFinalAudio() { const deferred = new Deferred(); this.fetchConnection().on((connection) => { connection.send(new SpeechConnectionMessage(MessageType.Binary, "audio", this.privRequestSession.requestId, null, null)).on((_) => { deferred.resolve(true); }, (error) => { deferred.reject(error); }); }, (error) => { deferred.reject(error); }); return deferred.promise(); } // Takes an established websocket connection to the endpoint and sends speech configuration information. configureConnection() { if (this.configConnectionOverride !== undefined) { return this.configConnectionOverride(); } if (this.privConnectionConfigurationPromise) { if (this.privConnectionConfigurationPromise.result().isCompleted && (this.privConnectionConfigurationPromise.result().isError || this.privConnectionConfigurationPromise.result().result.state() === ConnectionState.Disconnected)) { this.privConnectionConfigurationPromise = null; return this.configureConnection(); } else { return this.privConnectionConfigurationPromise; } } this.privConnectionConfigurationPromise = this.connectImpl().onSuccessContinueWithPromise((connection) => { return this.sendSpeechServiceConfig(connection, this.privRequestSession, this.privRecognizerConfig.SpeechServiceConfig.serialize()) .onSuccessContinueWithPromise((_) => { return this.sendSpeechContext(connection).onSuccessContinueWith((_) => { return connection; }); }); }); return this.privConnectionConfigurationPromise; } } ServiceRecognizerBase.telemetryDataEnabled = true; //# sourceMappingURL=ServiceRecognizerBase.js.map