UNPKG

@euirim/microsoft-cognitiveservices-speech-sdk

Version:
512 lines (510 loc) 28.8 kB
"use strict"; // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT license. Object.defineProperty(exports, "__esModule", { value: true }); var Exports_1 = require("../common.browser/Exports"); var Exports_2 = require("../common/Exports"); var Exports_3 = require("../sdk/Exports"); var Exports_4 = require("./Exports"); var SpeechConnectionMessage_Internal_1 = require("./SpeechConnectionMessage.Internal"); var ServiceRecognizerBase = /** @class */ (function () { function ServiceRecognizerBase(authentication, connectionFactory, audioSource, recognizerConfig, recognizer) { var _this = this; this.recognizeOverride = undefined; this.disconnectOverride = undefined; this.sendTelemetryData = function () { var telemetryData = _this.privRequestSession.getTelemetry(); // console.warn("Telem: " + telemetryData); if (ServiceRecognizerBase.telemetryDataEnabled !== true || _this.privIsDisposed || null === telemetryData) { return Exports_2.PromiseHelper.fromResult(true); } if (!!ServiceRecognizerBase.telemetryData) { try { ServiceRecognizerBase.telemetryData(telemetryData); /* tslint:disable:no-empty */ } catch (_a) { } } return _this.fetchConnection().onSuccessContinueWith(function (connection) { return connection.send(new SpeechConnectionMessage_Internal_1.SpeechConnectionMessage(Exports_2.MessageType.Text, "telemetry", _this.privRequestSession.requestId, "application/json", telemetryData)); }); }; this.receiveMessageOverride = undefined; this.receiveMessage = function (successCallback, errorCallBack) { return _this.fetchConnection().on(function (connection) { return connection.read() .onSuccessContinueWithPromise(function (message) { if (_this.receiveMessageOverride !== undefined) { return _this.receiveMessageOverride(); } if (_this.privIsDisposed || !_this.privRequestSession.isRecognizing) { // We're done. return Exports_2.PromiseHelper.fromResult(undefined); } // indicates we are draining the queue and it came with no message; if (!message) { if (!_this.privRequestSession.isRecognizing) { return Exports_2.PromiseHelper.fromResult(true); } else { return _this.receiveMessage(successCallback, errorCallBack); } } var connectionMessage = SpeechConnectionMessage_Internal_1.SpeechConnectionMessage.fromConnectionMessage(message); if (connectionMessage.requestId.toLowerCase() === _this.privRequestSession.requestId.toLowerCase()) { switch (connectionMessage.path.toLowerCase()) { case "turn.start": _this.privMustReportEndOfStream = true; break; case "speech.startdetected": var speechStartDetected = Exports_4.SpeechDetected.fromJSON(connectionMessage.textBody); var speechStartEventArgs = new Exports_3.RecognitionEventArgs(speechStartDetected.Offset, _this.privRequestSession.sessionId); if (!!_this.privRecognizer.speechStartDetected) { _this.privRecognizer.speechStartDetected(_this.privRecognizer, speechStartEventArgs); } break; case "speech.enddetected": var json = void 0; if (connectionMessage.textBody.length > 0) { json = connectionMessage.textBody; } else { // If the request was empty, the JSON returned is empty. json = "{ Offset: 0 }"; } var speechStopDetected = Exports_4.SpeechDetected.fromJSON(json); // Only shrink the buffers for continuous recognition. // For single shot, the speech.phrase message will come after the speech.end and it should own buffer shrink. if (_this.privRecognizerConfig.isContinuousRecognition) { _this.privRequestSession.onServiceRecognized(speechStopDetected.Offset + _this.privRequestSession.currentTurnAudioOffset); } var speechStopEventArgs = new Exports_3.RecognitionEventArgs(speechStopDetected.Offset + _this.privRequestSession.currentTurnAudioOffset, _this.privRequestSession.sessionId); if (!!_this.privRecognizer.speechEndDetected) { _this.privRecognizer.speechEndDetected(_this.privRecognizer, speechStopEventArgs); } break; case "turn.end": _this.sendTelemetryData(); if (_this.privRequestSession.isSpeechEnded && _this.privMustReportEndOfStream) { _this.privMustReportEndOfStream = false; _this.cancelRecognitionLocal(Exports_3.CancellationReason.EndOfStream, Exports_3.CancellationErrorCode.NoError, undefined, successCallback); } var sessionStopEventArgs = new Exports_3.SessionEventArgs(_this.privRequestSession.sessionId); _this.privRequestSession.onServiceTurnEndResponse(_this.privRecognizerConfig.isContinuousRecognition); if (!_this.privRecognizerConfig.isContinuousRecognition || _this.privRequestSession.isSpeechEnded) { if (!!_this.privRecognizer.sessionStopped) { _this.privRecognizer.sessionStopped(_this.privRecognizer, sessionStopEventArgs); } return Exports_2.PromiseHelper.fromResult(true); } else { _this.fetchConnection().onSuccessContinueWith(function (connection) { _this.sendSpeechContext(connection); }); } default: _this.processTypeSpecificMessages(connectionMessage, successCallback, errorCallBack); } } return _this.receiveMessage(successCallback, errorCallBack); }); }, function (error) { }); }; this.sendSpeechContext = function (connection) { var speechContextJson = _this.speechContext.toJSON(); if (speechContextJson) { return connection.send(new SpeechConnectionMessage_Internal_1.SpeechConnectionMessage(Exports_2.MessageType.Text, "speech.context", _this.privRequestSession.requestId, "application/json", speechContextJson)); } return Exports_2.PromiseHelper.fromResult(true); }; this.connectImplOverride = undefined; this.configConnectionOverride = undefined; this.fetchConnectionOverride = undefined; this.sendSpeechServiceConfig = function (connection, requestSession, SpeechServiceConfigJson) { // filter out anything that is not required for the service to work. if (ServiceRecognizerBase.telemetryDataEnabled !== true) { var withTelemetry = JSON.parse(SpeechServiceConfigJson); var replacement = { context: { system: withTelemetry.context.system, }, }; SpeechServiceConfigJson = JSON.stringify(replacement); } if (SpeechServiceConfigJson) { // && this.privConnectionId !== this.privSpeechServiceConfigConnectionId) { _this.privSpeechServiceConfigConnectionId = _this.privConnectionId; return connection.send(new SpeechConnectionMessage_Internal_1.SpeechConnectionMessage(Exports_2.MessageType.Text, "speech.config", requestSession.requestId, "application/json", SpeechServiceConfigJson)); } return Exports_2.PromiseHelper.fromResult(true); }; this.sendAudio = function (audioStreamNode) { // NOTE: Home-baked promises crash ios safari during the invocation // of the error callback chain (looks like the recursion is way too deep, and // it blows up the stack). The following construct is a stop-gap that does not // bubble the error up the callback chain and hence circumvents this problem. // TODO: rewrite with ES6 promises. var deferred = new Exports_2.Deferred(); // The time we last sent data to the service. var nextSendTime = Date.now(); var audioFormat = _this.privAudioSource.format; // Max amount to send before we start to throttle var fastLaneSizeMs = _this.privRecognizerConfig.parameters.getProperty("SPEECH-TransmitLengthBeforThrottleMs", "5000"); var maxSendUnthrottledBytes = audioFormat.avgBytesPerSec / 1000 * parseInt(fastLaneSizeMs, 10); var startRecogNumber = _this.privRequestSession.recogNumber; var readAndUploadCycle = function () { // If speech is done, stop sending audio. if (!_this.privIsDisposed && !_this.privRequestSession.isSpeechEnded && _this.privRequestSession.isRecognizing && _this.privRequestSession.recogNumber === startRecogNumber) { _this.fetchConnection().on(function (connection) { audioStreamNode.read().on(function (audioStreamChunk) { // we have a new audio chunk to upload. if (_this.privRequestSession.isSpeechEnded) { // If service already recognized audio end then don't send any more audio deferred.resolve(true); return; } var payload; var sendDelay; if (audioStreamChunk.isEnd) { payload = null; sendDelay = 0; } else { payload = audioStreamChunk.buffer; _this.privRequestSession.onAudioSent(payload.byteLength); if (maxSendUnthrottledBytes >= _this.privRequestSession.bytesSent) { sendDelay = 0; } else { sendDelay = Math.max(0, nextSendTime - Date.now()); } } // Are we ready to send, or need we delay more? setTimeout(function () { if (payload !== null) { nextSendTime = Date.now() + (payload.byteLength * 1000 / (audioFormat.avgBytesPerSec * 2)); } var uploaded = connection.send(new SpeechConnectionMessage_Internal_1.SpeechConnectionMessage(Exports_2.MessageType.Binary, "audio", _this.privRequestSession.requestId, null, payload)); if (!audioStreamChunk.isEnd) { uploaded.continueWith(function (_) { // Regardless of success or failure, schedule the next upload. // If the underlying connection was broken, the next cycle will // get a new connection and re-transmit missing audio automatically. readAndUploadCycle(); }); } else { // the audio stream has been closed, no need to schedule next // read-upload cycle. _this.privRequestSession.onSpeechEnded(); deferred.resolve(true); } }, sendDelay); }, function (error) { if (_this.privRequestSession.isSpeechEnded) { // For whatever reason, Reject is used to remove queue subscribers inside // the Queue.DrainAndDispose invoked from DetachAudioNode down below, which // means that sometimes things can be rejected in normal circumstances, without // any errors. deferred.resolve(true); // TODO: remove the argument, it's is completely meaningless. } else { // Only reject, if there was a proper error. deferred.reject(error); } }); }, function (error) { deferred.reject(error); }); } }; readAndUploadCycle(); return deferred.promise(); }; this.fetchConnection = function () { if (_this.fetchConnectionOverride !== undefined) { return _this.fetchConnectionOverride(); } return _this.configureConnection(); }; if (!authentication) { throw new Exports_2.ArgumentNullError("authentication"); } if (!connectionFactory) { throw new Exports_2.ArgumentNullError("connectionFactory"); } if (!audioSource) { throw new Exports_2.ArgumentNullError("audioSource"); } if (!recognizerConfig) { throw new Exports_2.ArgumentNullError("recognizerConfig"); } this.privMustReportEndOfStream = false; this.privAuthentication = authentication; this.privConnectionFactory = connectionFactory; this.privAudioSource = audioSource; this.privRecognizerConfig = recognizerConfig; this.privIsDisposed = false; this.privRecognizer = recognizer; this.privRequestSession = new Exports_4.RequestSession(this.privAudioSource.id()); this.privConnectionEvents = new Exports_2.EventSource(); this.privDynamicGrammar = new Exports_4.DynamicGrammarBuilder(); this.privSpeechContext = new Exports_4.SpeechContext(this.privDynamicGrammar); this.privAgentConfig = new Exports_4.AgentConfig(); } Object.defineProperty(ServiceRecognizerBase.prototype, "audioSource", { get: function () { return this.privAudioSource; }, enumerable: true, configurable: true }); Object.defineProperty(ServiceRecognizerBase.prototype, "speechContext", { get: function () { return this.privSpeechContext; }, enumerable: true, configurable: true }); Object.defineProperty(ServiceRecognizerBase.prototype, "dynamicGrammar", { get: function () { return this.privDynamicGrammar; }, enumerable: true, configurable: true }); Object.defineProperty(ServiceRecognizerBase.prototype, "agentConfig", { get: function () { return this.privAgentConfig; }, enumerable: true, configurable: true }); ServiceRecognizerBase.prototype.isDisposed = function () { return this.privIsDisposed; }; ServiceRecognizerBase.prototype.dispose = function (reason) { this.privIsDisposed = true; if (this.privConnectionConfigurationPromise) { this.privConnectionConfigurationPromise.onSuccessContinueWith(function (connection) { connection.dispose(reason); }); } }; Object.defineProperty(ServiceRecognizerBase.prototype, "connectionEvents", { get: function () { return this.privConnectionEvents; }, enumerable: true, configurable: true }); Object.defineProperty(ServiceRecognizerBase.prototype, "recognitionMode", { get: function () { return this.privRecognizerConfig.recognitionMode; }, enumerable: true, configurable: true }); ServiceRecognizerBase.prototype.recognize = function (recoMode, successCallback, errorCallBack) { var _this = this; if (this.recognizeOverride !== undefined) { return this.recognizeOverride(recoMode, successCallback, errorCallBack); } // Clear the existing configuration promise to force a re-transmission of config and context. this.privConnectionConfigurationPromise = null; this.privRecognizerConfig.recognitionMode = recoMode; this.privRequestSession.startNewRecognition(); this.privRequestSession.listenForServiceTelemetry(this.privAudioSource.events); // Start the connection to the service. The promise this will create is stored and will be used by configureConnection(). this.connectImpl(); return this.audioSource .attach(this.privRequestSession.audioNodeId) .continueWithPromise(function (result) { var audioNode; if (result.isError) { _this.cancelRecognitionLocal(Exports_3.CancellationReason.Error, Exports_3.CancellationErrorCode.ConnectionFailure, result.error, successCallback); return Exports_2.PromiseHelper.fromError(result.error); } else { audioNode = new Exports_1.ReplayableAudioNode(result.result, _this.audioSource.format); _this.privRequestSession.onAudioSourceAttachCompleted(audioNode, false); } return _this.audioSource.deviceInfo.onSuccessContinueWithPromise(function (deviceInfo) { _this.privRecognizerConfig.SpeechServiceConfig.Context.audio = { source: deviceInfo }; return _this.configureConnection() .on(function (_) { var sessionStartEventArgs = new Exports_3.SessionEventArgs(_this.privRequestSession.sessionId); if (!!_this.privRecognizer.sessionStarted) { _this.privRecognizer.sessionStarted(_this.privRecognizer, sessionStartEventArgs); } var messageRetrievalPromise = _this.receiveMessage(successCallback, errorCallBack); var audioSendPromise = _this.sendAudio(audioNode); /* tslint:disable:no-empty */ audioSendPromise.on(function (_) { }, function (error) { _this.cancelRecognitionLocal(Exports_3.CancellationReason.Error, Exports_3.CancellationErrorCode.RuntimeError, error, successCallback); }); var completionPromise = Exports_2.PromiseHelper.whenAll([messageRetrievalPromise, audioSendPromise]); return completionPromise.on(function (r) { return true; }, function (error) { _this.cancelRecognitionLocal(Exports_3.CancellationReason.Error, Exports_3.CancellationErrorCode.RuntimeError, error, successCallback); }); }, function (error) { _this.cancelRecognitionLocal(Exports_3.CancellationReason.Error, Exports_3.CancellationErrorCode.ConnectionFailure, error, successCallback); }).continueWithPromise(function (result) { if (result.isError) { return Exports_2.PromiseHelper.fromError(result.error); } else { return Exports_2.PromiseHelper.fromResult(true); } }); }); }); }; ServiceRecognizerBase.prototype.stopRecognizing = function () { if (this.privRequestSession.isRecognizing) { this.privRequestSession.onStopRecognizing(); this.sendTelemetryData(); this.audioSource.turnOff(); this.sendFinalAudio(); this.privRequestSession.dispose(); } }; ServiceRecognizerBase.prototype.connect = function () { this.connectImpl().result(); }; ServiceRecognizerBase.prototype.disconnect = function () { if (this.disconnectOverride !== undefined) { this.disconnectOverride(); return; } this.cancelRecognitionLocal(Exports_3.CancellationReason.Error, Exports_3.CancellationErrorCode.NoError, "Disconnecting", undefined); if (this.privConnectionPromise.result().isCompleted) { if (!this.privConnectionPromise.result().isError) { this.privConnectionPromise.result().result.dispose(); this.privConnectionPromise = null; } } else { this.privConnectionPromise.onSuccessContinueWith(function (connection) { connection.dispose(); }); } }; ServiceRecognizerBase.prototype.sendMessage = function (message) { }; // Cancels recognition. ServiceRecognizerBase.prototype.cancelRecognitionLocal = function (cancellationReason, errorCode, error, cancelRecoCallback) { if (!!this.privRequestSession.isRecognizing) { this.privRequestSession.onStopRecognizing(); this.sendTelemetryData(); this.cancelRecognition(this.privRequestSession.sessionId, this.privRequestSession.requestId, cancellationReason, errorCode, error, cancelRecoCallback); } }; // Establishes a websocket connection to the end point. ServiceRecognizerBase.prototype.connectImpl = function (isUnAuthorized) { var _this = this; if (isUnAuthorized === void 0) { isUnAuthorized = false; } if (this.connectImplOverride !== undefined) { return this.connectImplOverride(isUnAuthorized); } if (this.privConnectionPromise) { if (this.privConnectionPromise.result().isCompleted && (this.privConnectionPromise.result().isError || this.privConnectionPromise.result().result.state() === Exports_2.ConnectionState.Disconnected)) { this.privConnectionId = null; this.privConnectionPromise = null; return this.connectImpl(); } else { return this.privConnectionPromise; } } this.privAuthFetchEventId = Exports_2.createNoDashGuid(); this.privConnectionId = Exports_2.createNoDashGuid(); this.privRequestSession.onPreConnectionStart(this.privAuthFetchEventId, this.privConnectionId); var authPromise = isUnAuthorized ? this.privAuthentication.fetchOnExpiry(this.privAuthFetchEventId) : this.privAuthentication.fetch(this.privAuthFetchEventId); this.privConnectionPromise = authPromise .continueWithPromise(function (result) { if (result.isError) { _this.privRequestSession.onAuthCompleted(true, result.error); throw new Error(result.error); } else { _this.privRequestSession.onAuthCompleted(false); } var connection = _this.privConnectionFactory.create(_this.privRecognizerConfig, result.result, _this.privConnectionId); _this.privRequestSession.listenForServiceTelemetry(connection.events); // Attach to the underlying event. No need to hold onto the detach pointers as in the event the connection goes away, // it'll stop sending events. connection.events.attach(function (event) { _this.connectionEvents.onEvent(event); }); return connection.open().onSuccessContinueWithPromise(function (response) { if (response.statusCode === 200) { _this.privRequestSession.onPreConnectionStart(_this.privAuthFetchEventId, _this.privConnectionId); _this.privRequestSession.onConnectionEstablishCompleted(response.statusCode); return Exports_2.PromiseHelper.fromResult(connection); } else if (response.statusCode === 403 && !isUnAuthorized) { return _this.connectImpl(true); } else { _this.privRequestSession.onConnectionEstablishCompleted(response.statusCode, response.reason); return Exports_2.PromiseHelper.fromError("Unable to contact server. StatusCode: " + response.statusCode + ", " + _this.privRecognizerConfig.parameters.getProperty(Exports_3.PropertyId.SpeechServiceConnection_Endpoint) + " Reason: " + response.reason); } }); }); return this.privConnectionPromise; }; ServiceRecognizerBase.prototype.sendFinalAudio = function () { var _this = this; var deferred = new Exports_2.Deferred(); this.fetchConnection().on(function (connection) { connection.send(new SpeechConnectionMessage_Internal_1.SpeechConnectionMessage(Exports_2.MessageType.Binary, "audio", _this.privRequestSession.requestId, null, null)).on(function (_) { deferred.resolve(true); }, function (error) { deferred.reject(error); }); }, function (error) { deferred.reject(error); }); return deferred.promise(); }; // Takes an established websocket connection to the endpoint and sends speech configuration information. ServiceRecognizerBase.prototype.configureConnection = function () { var _this = this; if (this.configConnectionOverride !== undefined) { return this.configConnectionOverride(); } if (this.privConnectionConfigurationPromise) { if (this.privConnectionConfigurationPromise.result().isCompleted && (this.privConnectionConfigurationPromise.result().isError || this.privConnectionConfigurationPromise.result().result.state() === Exports_2.ConnectionState.Disconnected)) { this.privConnectionConfigurationPromise = null; return this.configureConnection(); } else { return this.privConnectionConfigurationPromise; } } this.privConnectionConfigurationPromise = this.connectImpl().onSuccessContinueWithPromise(function (connection) { return _this.sendSpeechServiceConfig(connection, _this.privRequestSession, _this.privRecognizerConfig.SpeechServiceConfig.serialize()) .onSuccessContinueWithPromise(function (_) { return _this.sendSpeechContext(connection).onSuccessContinueWith(function (_) { return connection; }); }); }); return this.privConnectionConfigurationPromise; }; ServiceRecognizerBase.telemetryDataEnabled = true; return ServiceRecognizerBase; }()); exports.ServiceRecognizerBase = ServiceRecognizerBase; //# sourceMappingURL=ServiceRecognizerBase.js.map