@euirim/microsoft-cognitiveservices-speech-sdk
Version:
Microsoft Cognitive Services Speech SDK for JavaScript
512 lines (510 loc) • 28.8 kB
JavaScript
"use strict";
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT license.
Object.defineProperty(exports, "__esModule", { value: true });
var Exports_1 = require("../common.browser/Exports");
var Exports_2 = require("../common/Exports");
var Exports_3 = require("../sdk/Exports");
var Exports_4 = require("./Exports");
var SpeechConnectionMessage_Internal_1 = require("./SpeechConnectionMessage.Internal");
var ServiceRecognizerBase = /** @class */ (function () {
function ServiceRecognizerBase(authentication, connectionFactory, audioSource, recognizerConfig, recognizer) {
var _this = this;
this.recognizeOverride = undefined;
this.disconnectOverride = undefined;
this.sendTelemetryData = function () {
var telemetryData = _this.privRequestSession.getTelemetry();
// console.warn("Telem: " + telemetryData);
if (ServiceRecognizerBase.telemetryDataEnabled !== true ||
_this.privIsDisposed ||
null === telemetryData) {
return Exports_2.PromiseHelper.fromResult(true);
}
if (!!ServiceRecognizerBase.telemetryData) {
try {
ServiceRecognizerBase.telemetryData(telemetryData);
/* tslint:disable:no-empty */
}
catch (_a) { }
}
return _this.fetchConnection().onSuccessContinueWith(function (connection) {
return connection.send(new SpeechConnectionMessage_Internal_1.SpeechConnectionMessage(Exports_2.MessageType.Text, "telemetry", _this.privRequestSession.requestId, "application/json", telemetryData));
});
};
this.receiveMessageOverride = undefined;
this.receiveMessage = function (successCallback, errorCallBack) {
return _this.fetchConnection().on(function (connection) {
return connection.read()
.onSuccessContinueWithPromise(function (message) {
if (_this.receiveMessageOverride !== undefined) {
return _this.receiveMessageOverride();
}
if (_this.privIsDisposed || !_this.privRequestSession.isRecognizing) {
// We're done.
return Exports_2.PromiseHelper.fromResult(undefined);
}
// indicates we are draining the queue and it came with no message;
if (!message) {
if (!_this.privRequestSession.isRecognizing) {
return Exports_2.PromiseHelper.fromResult(true);
}
else {
return _this.receiveMessage(successCallback, errorCallBack);
}
}
var connectionMessage = SpeechConnectionMessage_Internal_1.SpeechConnectionMessage.fromConnectionMessage(message);
if (connectionMessage.requestId.toLowerCase() === _this.privRequestSession.requestId.toLowerCase()) {
switch (connectionMessage.path.toLowerCase()) {
case "turn.start":
_this.privMustReportEndOfStream = true;
break;
case "speech.startdetected":
var speechStartDetected = Exports_4.SpeechDetected.fromJSON(connectionMessage.textBody);
var speechStartEventArgs = new Exports_3.RecognitionEventArgs(speechStartDetected.Offset, _this.privRequestSession.sessionId);
if (!!_this.privRecognizer.speechStartDetected) {
_this.privRecognizer.speechStartDetected(_this.privRecognizer, speechStartEventArgs);
}
break;
case "speech.enddetected":
var json = void 0;
if (connectionMessage.textBody.length > 0) {
json = connectionMessage.textBody;
}
else {
// If the request was empty, the JSON returned is empty.
json = "{ Offset: 0 }";
}
var speechStopDetected = Exports_4.SpeechDetected.fromJSON(json);
// Only shrink the buffers for continuous recognition.
// For single shot, the speech.phrase message will come after the speech.end and it should own buffer shrink.
if (_this.privRecognizerConfig.isContinuousRecognition) {
_this.privRequestSession.onServiceRecognized(speechStopDetected.Offset + _this.privRequestSession.currentTurnAudioOffset);
}
var speechStopEventArgs = new Exports_3.RecognitionEventArgs(speechStopDetected.Offset + _this.privRequestSession.currentTurnAudioOffset, _this.privRequestSession.sessionId);
if (!!_this.privRecognizer.speechEndDetected) {
_this.privRecognizer.speechEndDetected(_this.privRecognizer, speechStopEventArgs);
}
break;
case "turn.end":
_this.sendTelemetryData();
if (_this.privRequestSession.isSpeechEnded && _this.privMustReportEndOfStream) {
_this.privMustReportEndOfStream = false;
_this.cancelRecognitionLocal(Exports_3.CancellationReason.EndOfStream, Exports_3.CancellationErrorCode.NoError, undefined, successCallback);
}
var sessionStopEventArgs = new Exports_3.SessionEventArgs(_this.privRequestSession.sessionId);
_this.privRequestSession.onServiceTurnEndResponse(_this.privRecognizerConfig.isContinuousRecognition);
if (!_this.privRecognizerConfig.isContinuousRecognition || _this.privRequestSession.isSpeechEnded) {
if (!!_this.privRecognizer.sessionStopped) {
_this.privRecognizer.sessionStopped(_this.privRecognizer, sessionStopEventArgs);
}
return Exports_2.PromiseHelper.fromResult(true);
}
else {
_this.fetchConnection().onSuccessContinueWith(function (connection) {
_this.sendSpeechContext(connection);
});
}
default:
_this.processTypeSpecificMessages(connectionMessage, successCallback, errorCallBack);
}
}
return _this.receiveMessage(successCallback, errorCallBack);
});
}, function (error) {
});
};
this.sendSpeechContext = function (connection) {
var speechContextJson = _this.speechContext.toJSON();
if (speechContextJson) {
return connection.send(new SpeechConnectionMessage_Internal_1.SpeechConnectionMessage(Exports_2.MessageType.Text, "speech.context", _this.privRequestSession.requestId, "application/json", speechContextJson));
}
return Exports_2.PromiseHelper.fromResult(true);
};
this.connectImplOverride = undefined;
this.configConnectionOverride = undefined;
this.fetchConnectionOverride = undefined;
this.sendSpeechServiceConfig = function (connection, requestSession, SpeechServiceConfigJson) {
// filter out anything that is not required for the service to work.
if (ServiceRecognizerBase.telemetryDataEnabled !== true) {
var withTelemetry = JSON.parse(SpeechServiceConfigJson);
var replacement = {
context: {
system: withTelemetry.context.system,
},
};
SpeechServiceConfigJson = JSON.stringify(replacement);
}
if (SpeechServiceConfigJson) { // && this.privConnectionId !== this.privSpeechServiceConfigConnectionId) {
_this.privSpeechServiceConfigConnectionId = _this.privConnectionId;
return connection.send(new SpeechConnectionMessage_Internal_1.SpeechConnectionMessage(Exports_2.MessageType.Text, "speech.config", requestSession.requestId, "application/json", SpeechServiceConfigJson));
}
return Exports_2.PromiseHelper.fromResult(true);
};
this.sendAudio = function (audioStreamNode) {
// NOTE: Home-baked promises crash ios safari during the invocation
// of the error callback chain (looks like the recursion is way too deep, and
// it blows up the stack). The following construct is a stop-gap that does not
// bubble the error up the callback chain and hence circumvents this problem.
// TODO: rewrite with ES6 promises.
var deferred = new Exports_2.Deferred();
// The time we last sent data to the service.
var nextSendTime = Date.now();
var audioFormat = _this.privAudioSource.format;
// Max amount to send before we start to throttle
var fastLaneSizeMs = _this.privRecognizerConfig.parameters.getProperty("SPEECH-TransmitLengthBeforThrottleMs", "5000");
var maxSendUnthrottledBytes = audioFormat.avgBytesPerSec / 1000 * parseInt(fastLaneSizeMs, 10);
var startRecogNumber = _this.privRequestSession.recogNumber;
var readAndUploadCycle = function () {
// If speech is done, stop sending audio.
if (!_this.privIsDisposed &&
!_this.privRequestSession.isSpeechEnded &&
_this.privRequestSession.isRecognizing &&
_this.privRequestSession.recogNumber === startRecogNumber) {
_this.fetchConnection().on(function (connection) {
audioStreamNode.read().on(function (audioStreamChunk) {
// we have a new audio chunk to upload.
if (_this.privRequestSession.isSpeechEnded) {
// If service already recognized audio end then don't send any more audio
deferred.resolve(true);
return;
}
var payload;
var sendDelay;
if (audioStreamChunk.isEnd) {
payload = null;
sendDelay = 0;
}
else {
payload = audioStreamChunk.buffer;
_this.privRequestSession.onAudioSent(payload.byteLength);
if (maxSendUnthrottledBytes >= _this.privRequestSession.bytesSent) {
sendDelay = 0;
}
else {
sendDelay = Math.max(0, nextSendTime - Date.now());
}
}
// Are we ready to send, or need we delay more?
setTimeout(function () {
if (payload !== null) {
nextSendTime = Date.now() + (payload.byteLength * 1000 / (audioFormat.avgBytesPerSec * 2));
}
var uploaded = connection.send(new SpeechConnectionMessage_Internal_1.SpeechConnectionMessage(Exports_2.MessageType.Binary, "audio", _this.privRequestSession.requestId, null, payload));
if (!audioStreamChunk.isEnd) {
uploaded.continueWith(function (_) {
// Regardless of success or failure, schedule the next upload.
// If the underlying connection was broken, the next cycle will
// get a new connection and re-transmit missing audio automatically.
readAndUploadCycle();
});
}
else {
// the audio stream has been closed, no need to schedule next
// read-upload cycle.
_this.privRequestSession.onSpeechEnded();
deferred.resolve(true);
}
}, sendDelay);
}, function (error) {
if (_this.privRequestSession.isSpeechEnded) {
// For whatever reason, Reject is used to remove queue subscribers inside
// the Queue.DrainAndDispose invoked from DetachAudioNode down below, which
// means that sometimes things can be rejected in normal circumstances, without
// any errors.
deferred.resolve(true); // TODO: remove the argument, it's is completely meaningless.
}
else {
// Only reject, if there was a proper error.
deferred.reject(error);
}
});
}, function (error) {
deferred.reject(error);
});
}
};
readAndUploadCycle();
return deferred.promise();
};
this.fetchConnection = function () {
if (_this.fetchConnectionOverride !== undefined) {
return _this.fetchConnectionOverride();
}
return _this.configureConnection();
};
if (!authentication) {
throw new Exports_2.ArgumentNullError("authentication");
}
if (!connectionFactory) {
throw new Exports_2.ArgumentNullError("connectionFactory");
}
if (!audioSource) {
throw new Exports_2.ArgumentNullError("audioSource");
}
if (!recognizerConfig) {
throw new Exports_2.ArgumentNullError("recognizerConfig");
}
this.privMustReportEndOfStream = false;
this.privAuthentication = authentication;
this.privConnectionFactory = connectionFactory;
this.privAudioSource = audioSource;
this.privRecognizerConfig = recognizerConfig;
this.privIsDisposed = false;
this.privRecognizer = recognizer;
this.privRequestSession = new Exports_4.RequestSession(this.privAudioSource.id());
this.privConnectionEvents = new Exports_2.EventSource();
this.privDynamicGrammar = new Exports_4.DynamicGrammarBuilder();
this.privSpeechContext = new Exports_4.SpeechContext(this.privDynamicGrammar);
this.privAgentConfig = new Exports_4.AgentConfig();
}
Object.defineProperty(ServiceRecognizerBase.prototype, "audioSource", {
get: function () {
return this.privAudioSource;
},
enumerable: true,
configurable: true
});
Object.defineProperty(ServiceRecognizerBase.prototype, "speechContext", {
get: function () {
return this.privSpeechContext;
},
enumerable: true,
configurable: true
});
Object.defineProperty(ServiceRecognizerBase.prototype, "dynamicGrammar", {
get: function () {
return this.privDynamicGrammar;
},
enumerable: true,
configurable: true
});
Object.defineProperty(ServiceRecognizerBase.prototype, "agentConfig", {
get: function () {
return this.privAgentConfig;
},
enumerable: true,
configurable: true
});
ServiceRecognizerBase.prototype.isDisposed = function () {
return this.privIsDisposed;
};
ServiceRecognizerBase.prototype.dispose = function (reason) {
this.privIsDisposed = true;
if (this.privConnectionConfigurationPromise) {
this.privConnectionConfigurationPromise.onSuccessContinueWith(function (connection) {
connection.dispose(reason);
});
}
};
Object.defineProperty(ServiceRecognizerBase.prototype, "connectionEvents", {
get: function () {
return this.privConnectionEvents;
},
enumerable: true,
configurable: true
});
Object.defineProperty(ServiceRecognizerBase.prototype, "recognitionMode", {
get: function () {
return this.privRecognizerConfig.recognitionMode;
},
enumerable: true,
configurable: true
});
ServiceRecognizerBase.prototype.recognize = function (recoMode, successCallback, errorCallBack) {
var _this = this;
if (this.recognizeOverride !== undefined) {
return this.recognizeOverride(recoMode, successCallback, errorCallBack);
}
// Clear the existing configuration promise to force a re-transmission of config and context.
this.privConnectionConfigurationPromise = null;
this.privRecognizerConfig.recognitionMode = recoMode;
this.privRequestSession.startNewRecognition();
this.privRequestSession.listenForServiceTelemetry(this.privAudioSource.events);
// Start the connection to the service. The promise this will create is stored and will be used by configureConnection().
this.connectImpl();
return this.audioSource
.attach(this.privRequestSession.audioNodeId)
.continueWithPromise(function (result) {
var audioNode;
if (result.isError) {
_this.cancelRecognitionLocal(Exports_3.CancellationReason.Error, Exports_3.CancellationErrorCode.ConnectionFailure, result.error, successCallback);
return Exports_2.PromiseHelper.fromError(result.error);
}
else {
audioNode = new Exports_1.ReplayableAudioNode(result.result, _this.audioSource.format);
_this.privRequestSession.onAudioSourceAttachCompleted(audioNode, false);
}
return _this.audioSource.deviceInfo.onSuccessContinueWithPromise(function (deviceInfo) {
_this.privRecognizerConfig.SpeechServiceConfig.Context.audio = { source: deviceInfo };
return _this.configureConnection()
.on(function (_) {
var sessionStartEventArgs = new Exports_3.SessionEventArgs(_this.privRequestSession.sessionId);
if (!!_this.privRecognizer.sessionStarted) {
_this.privRecognizer.sessionStarted(_this.privRecognizer, sessionStartEventArgs);
}
var messageRetrievalPromise = _this.receiveMessage(successCallback, errorCallBack);
var audioSendPromise = _this.sendAudio(audioNode);
/* tslint:disable:no-empty */
audioSendPromise.on(function (_) { }, function (error) {
_this.cancelRecognitionLocal(Exports_3.CancellationReason.Error, Exports_3.CancellationErrorCode.RuntimeError, error, successCallback);
});
var completionPromise = Exports_2.PromiseHelper.whenAll([messageRetrievalPromise, audioSendPromise]);
return completionPromise.on(function (r) {
return true;
}, function (error) {
_this.cancelRecognitionLocal(Exports_3.CancellationReason.Error, Exports_3.CancellationErrorCode.RuntimeError, error, successCallback);
});
}, function (error) {
_this.cancelRecognitionLocal(Exports_3.CancellationReason.Error, Exports_3.CancellationErrorCode.ConnectionFailure, error, successCallback);
}).continueWithPromise(function (result) {
if (result.isError) {
return Exports_2.PromiseHelper.fromError(result.error);
}
else {
return Exports_2.PromiseHelper.fromResult(true);
}
});
});
});
};
ServiceRecognizerBase.prototype.stopRecognizing = function () {
if (this.privRequestSession.isRecognizing) {
this.privRequestSession.onStopRecognizing();
this.sendTelemetryData();
this.audioSource.turnOff();
this.sendFinalAudio();
this.privRequestSession.dispose();
}
};
ServiceRecognizerBase.prototype.connect = function () {
this.connectImpl().result();
};
ServiceRecognizerBase.prototype.disconnect = function () {
if (this.disconnectOverride !== undefined) {
this.disconnectOverride();
return;
}
this.cancelRecognitionLocal(Exports_3.CancellationReason.Error, Exports_3.CancellationErrorCode.NoError, "Disconnecting", undefined);
if (this.privConnectionPromise.result().isCompleted) {
if (!this.privConnectionPromise.result().isError) {
this.privConnectionPromise.result().result.dispose();
this.privConnectionPromise = null;
}
}
else {
this.privConnectionPromise.onSuccessContinueWith(function (connection) {
connection.dispose();
});
}
};
ServiceRecognizerBase.prototype.sendMessage = function (message) { };
// Cancels recognition.
ServiceRecognizerBase.prototype.cancelRecognitionLocal = function (cancellationReason, errorCode, error, cancelRecoCallback) {
if (!!this.privRequestSession.isRecognizing) {
this.privRequestSession.onStopRecognizing();
this.sendTelemetryData();
this.cancelRecognition(this.privRequestSession.sessionId, this.privRequestSession.requestId, cancellationReason, errorCode, error, cancelRecoCallback);
}
};
// Establishes a websocket connection to the end point.
ServiceRecognizerBase.prototype.connectImpl = function (isUnAuthorized) {
var _this = this;
if (isUnAuthorized === void 0) { isUnAuthorized = false; }
if (this.connectImplOverride !== undefined) {
return this.connectImplOverride(isUnAuthorized);
}
if (this.privConnectionPromise) {
if (this.privConnectionPromise.result().isCompleted &&
(this.privConnectionPromise.result().isError
|| this.privConnectionPromise.result().result.state() === Exports_2.ConnectionState.Disconnected)) {
this.privConnectionId = null;
this.privConnectionPromise = null;
return this.connectImpl();
}
else {
return this.privConnectionPromise;
}
}
this.privAuthFetchEventId = Exports_2.createNoDashGuid();
this.privConnectionId = Exports_2.createNoDashGuid();
this.privRequestSession.onPreConnectionStart(this.privAuthFetchEventId, this.privConnectionId);
var authPromise = isUnAuthorized ? this.privAuthentication.fetchOnExpiry(this.privAuthFetchEventId) : this.privAuthentication.fetch(this.privAuthFetchEventId);
this.privConnectionPromise = authPromise
.continueWithPromise(function (result) {
if (result.isError) {
_this.privRequestSession.onAuthCompleted(true, result.error);
throw new Error(result.error);
}
else {
_this.privRequestSession.onAuthCompleted(false);
}
var connection = _this.privConnectionFactory.create(_this.privRecognizerConfig, result.result, _this.privConnectionId);
_this.privRequestSession.listenForServiceTelemetry(connection.events);
// Attach to the underlying event. No need to hold onto the detach pointers as in the event the connection goes away,
// it'll stop sending events.
connection.events.attach(function (event) {
_this.connectionEvents.onEvent(event);
});
return connection.open().onSuccessContinueWithPromise(function (response) {
if (response.statusCode === 200) {
_this.privRequestSession.onPreConnectionStart(_this.privAuthFetchEventId, _this.privConnectionId);
_this.privRequestSession.onConnectionEstablishCompleted(response.statusCode);
return Exports_2.PromiseHelper.fromResult(connection);
}
else if (response.statusCode === 403 && !isUnAuthorized) {
return _this.connectImpl(true);
}
else {
_this.privRequestSession.onConnectionEstablishCompleted(response.statusCode, response.reason);
return Exports_2.PromiseHelper.fromError("Unable to contact server. StatusCode: " + response.statusCode + ", " + _this.privRecognizerConfig.parameters.getProperty(Exports_3.PropertyId.SpeechServiceConnection_Endpoint) + " Reason: " + response.reason);
}
});
});
return this.privConnectionPromise;
};
ServiceRecognizerBase.prototype.sendFinalAudio = function () {
var _this = this;
var deferred = new Exports_2.Deferred();
this.fetchConnection().on(function (connection) {
connection.send(new SpeechConnectionMessage_Internal_1.SpeechConnectionMessage(Exports_2.MessageType.Binary, "audio", _this.privRequestSession.requestId, null, null)).on(function (_) {
deferred.resolve(true);
}, function (error) {
deferred.reject(error);
});
}, function (error) {
deferred.reject(error);
});
return deferred.promise();
};
// Takes an established websocket connection to the endpoint and sends speech configuration information.
ServiceRecognizerBase.prototype.configureConnection = function () {
var _this = this;
if (this.configConnectionOverride !== undefined) {
return this.configConnectionOverride();
}
if (this.privConnectionConfigurationPromise) {
if (this.privConnectionConfigurationPromise.result().isCompleted &&
(this.privConnectionConfigurationPromise.result().isError
|| this.privConnectionConfigurationPromise.result().result.state() === Exports_2.ConnectionState.Disconnected)) {
this.privConnectionConfigurationPromise = null;
return this.configureConnection();
}
else {
return this.privConnectionConfigurationPromise;
}
}
this.privConnectionConfigurationPromise = this.connectImpl().onSuccessContinueWithPromise(function (connection) {
return _this.sendSpeechServiceConfig(connection, _this.privRequestSession, _this.privRecognizerConfig.SpeechServiceConfig.serialize())
.onSuccessContinueWithPromise(function (_) {
return _this.sendSpeechContext(connection).onSuccessContinueWith(function (_) {
return connection;
});
});
});
return this.privConnectionConfigurationPromise;
};
ServiceRecognizerBase.telemetryDataEnabled = true;
return ServiceRecognizerBase;
}());
exports.ServiceRecognizerBase = ServiceRecognizerBase;
//# sourceMappingURL=ServiceRecognizerBase.js.map