@euirim/microsoft-cognitiveservices-speech-sdk
Version:
Microsoft Cognitive Services Speech SDK for JavaScript
478 lines (476 loc) • 26.3 kB
JavaScript
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT license.
import { ReplayableAudioNode } from "../common.browser/Exports";
import { ArgumentNullError, ConnectionState, createNoDashGuid, Deferred, EventSource, MessageType, PromiseHelper, } from "../common/Exports";
import { CancellationErrorCode, CancellationReason, PropertyId, RecognitionEventArgs, SessionEventArgs, } from "../sdk/Exports";
import { AgentConfig, DynamicGrammarBuilder, RequestSession, SpeechContext, SpeechDetected, } from "./Exports";
import { SpeechConnectionMessage } from "./SpeechConnectionMessage.Internal";
export class ServiceRecognizerBase {
constructor(authentication, connectionFactory, audioSource, recognizerConfig, recognizer) {
this.recognizeOverride = undefined;
this.disconnectOverride = undefined;
this.sendTelemetryData = () => {
const telemetryData = this.privRequestSession.getTelemetry();
// console.warn("Telem: " + telemetryData);
if (ServiceRecognizerBase.telemetryDataEnabled !== true ||
this.privIsDisposed ||
null === telemetryData) {
return PromiseHelper.fromResult(true);
}
if (!!ServiceRecognizerBase.telemetryData) {
try {
ServiceRecognizerBase.telemetryData(telemetryData);
/* tslint:disable:no-empty */
}
catch (_a) { }
}
return this.fetchConnection().onSuccessContinueWith((connection) => {
return connection.send(new SpeechConnectionMessage(MessageType.Text, "telemetry", this.privRequestSession.requestId, "application/json", telemetryData));
});
};
this.receiveMessageOverride = undefined;
this.receiveMessage = (successCallback, errorCallBack) => {
return this.fetchConnection().on((connection) => {
return connection.read()
.onSuccessContinueWithPromise((message) => {
if (this.receiveMessageOverride !== undefined) {
return this.receiveMessageOverride();
}
if (this.privIsDisposed || !this.privRequestSession.isRecognizing) {
// We're done.
return PromiseHelper.fromResult(undefined);
}
// indicates we are draining the queue and it came with no message;
if (!message) {
if (!this.privRequestSession.isRecognizing) {
return PromiseHelper.fromResult(true);
}
else {
return this.receiveMessage(successCallback, errorCallBack);
}
}
const connectionMessage = SpeechConnectionMessage.fromConnectionMessage(message);
if (connectionMessage.requestId.toLowerCase() === this.privRequestSession.requestId.toLowerCase()) {
switch (connectionMessage.path.toLowerCase()) {
case "turn.start":
this.privMustReportEndOfStream = true;
break;
case "speech.startdetected":
const speechStartDetected = SpeechDetected.fromJSON(connectionMessage.textBody);
const speechStartEventArgs = new RecognitionEventArgs(speechStartDetected.Offset, this.privRequestSession.sessionId);
if (!!this.privRecognizer.speechStartDetected) {
this.privRecognizer.speechStartDetected(this.privRecognizer, speechStartEventArgs);
}
break;
case "speech.enddetected":
let json;
if (connectionMessage.textBody.length > 0) {
json = connectionMessage.textBody;
}
else {
// If the request was empty, the JSON returned is empty.
json = "{ Offset: 0 }";
}
const speechStopDetected = SpeechDetected.fromJSON(json);
// Only shrink the buffers for continuous recognition.
// For single shot, the speech.phrase message will come after the speech.end and it should own buffer shrink.
if (this.privRecognizerConfig.isContinuousRecognition) {
this.privRequestSession.onServiceRecognized(speechStopDetected.Offset + this.privRequestSession.currentTurnAudioOffset);
}
const speechStopEventArgs = new RecognitionEventArgs(speechStopDetected.Offset + this.privRequestSession.currentTurnAudioOffset, this.privRequestSession.sessionId);
if (!!this.privRecognizer.speechEndDetected) {
this.privRecognizer.speechEndDetected(this.privRecognizer, speechStopEventArgs);
}
break;
case "turn.end":
this.sendTelemetryData();
if (this.privRequestSession.isSpeechEnded && this.privMustReportEndOfStream) {
this.privMustReportEndOfStream = false;
this.cancelRecognitionLocal(CancellationReason.EndOfStream, CancellationErrorCode.NoError, undefined, successCallback);
}
const sessionStopEventArgs = new SessionEventArgs(this.privRequestSession.sessionId);
this.privRequestSession.onServiceTurnEndResponse(this.privRecognizerConfig.isContinuousRecognition);
if (!this.privRecognizerConfig.isContinuousRecognition || this.privRequestSession.isSpeechEnded) {
if (!!this.privRecognizer.sessionStopped) {
this.privRecognizer.sessionStopped(this.privRecognizer, sessionStopEventArgs);
}
return PromiseHelper.fromResult(true);
}
else {
this.fetchConnection().onSuccessContinueWith((connection) => {
this.sendSpeechContext(connection);
});
}
default:
this.processTypeSpecificMessages(connectionMessage, successCallback, errorCallBack);
}
}
return this.receiveMessage(successCallback, errorCallBack);
});
}, (error) => {
});
};
this.sendSpeechContext = (connection) => {
const speechContextJson = this.speechContext.toJSON();
if (speechContextJson) {
return connection.send(new SpeechConnectionMessage(MessageType.Text, "speech.context", this.privRequestSession.requestId, "application/json", speechContextJson));
}
return PromiseHelper.fromResult(true);
};
this.connectImplOverride = undefined;
this.configConnectionOverride = undefined;
this.fetchConnectionOverride = undefined;
this.sendSpeechServiceConfig = (connection, requestSession, SpeechServiceConfigJson) => {
// filter out anything that is not required for the service to work.
if (ServiceRecognizerBase.telemetryDataEnabled !== true) {
const withTelemetry = JSON.parse(SpeechServiceConfigJson);
const replacement = {
context: {
system: withTelemetry.context.system,
},
};
SpeechServiceConfigJson = JSON.stringify(replacement);
}
if (SpeechServiceConfigJson) { // && this.privConnectionId !== this.privSpeechServiceConfigConnectionId) {
this.privSpeechServiceConfigConnectionId = this.privConnectionId;
return connection.send(new SpeechConnectionMessage(MessageType.Text, "speech.config", requestSession.requestId, "application/json", SpeechServiceConfigJson));
}
return PromiseHelper.fromResult(true);
};
this.sendAudio = (audioStreamNode) => {
// NOTE: Home-baked promises crash ios safari during the invocation
// of the error callback chain (looks like the recursion is way too deep, and
// it blows up the stack). The following construct is a stop-gap that does not
// bubble the error up the callback chain and hence circumvents this problem.
// TODO: rewrite with ES6 promises.
const deferred = new Deferred();
// The time we last sent data to the service.
let nextSendTime = Date.now();
const audioFormat = this.privAudioSource.format;
// Max amount to send before we start to throttle
const fastLaneSizeMs = this.privRecognizerConfig.parameters.getProperty("SPEECH-TransmitLengthBeforThrottleMs", "5000");
const maxSendUnthrottledBytes = audioFormat.avgBytesPerSec / 1000 * parseInt(fastLaneSizeMs, 10);
const startRecogNumber = this.privRequestSession.recogNumber;
const readAndUploadCycle = () => {
// If speech is done, stop sending audio.
if (!this.privIsDisposed &&
!this.privRequestSession.isSpeechEnded &&
this.privRequestSession.isRecognizing &&
this.privRequestSession.recogNumber === startRecogNumber) {
this.fetchConnection().on((connection) => {
audioStreamNode.read().on((audioStreamChunk) => {
// we have a new audio chunk to upload.
if (this.privRequestSession.isSpeechEnded) {
// If service already recognized audio end then don't send any more audio
deferred.resolve(true);
return;
}
let payload;
let sendDelay;
if (audioStreamChunk.isEnd) {
payload = null;
sendDelay = 0;
}
else {
payload = audioStreamChunk.buffer;
this.privRequestSession.onAudioSent(payload.byteLength);
if (maxSendUnthrottledBytes >= this.privRequestSession.bytesSent) {
sendDelay = 0;
}
else {
sendDelay = Math.max(0, nextSendTime - Date.now());
}
}
// Are we ready to send, or need we delay more?
setTimeout(() => {
if (payload !== null) {
nextSendTime = Date.now() + (payload.byteLength * 1000 / (audioFormat.avgBytesPerSec * 2));
}
const uploaded = connection.send(new SpeechConnectionMessage(MessageType.Binary, "audio", this.privRequestSession.requestId, null, payload));
if (!audioStreamChunk.isEnd) {
uploaded.continueWith((_) => {
// Regardless of success or failure, schedule the next upload.
// If the underlying connection was broken, the next cycle will
// get a new connection and re-transmit missing audio automatically.
readAndUploadCycle();
});
}
else {
// the audio stream has been closed, no need to schedule next
// read-upload cycle.
this.privRequestSession.onSpeechEnded();
deferred.resolve(true);
}
}, sendDelay);
}, (error) => {
if (this.privRequestSession.isSpeechEnded) {
// For whatever reason, Reject is used to remove queue subscribers inside
// the Queue.DrainAndDispose invoked from DetachAudioNode down below, which
// means that sometimes things can be rejected in normal circumstances, without
// any errors.
deferred.resolve(true); // TODO: remove the argument, it's is completely meaningless.
}
else {
// Only reject, if there was a proper error.
deferred.reject(error);
}
});
}, (error) => {
deferred.reject(error);
});
}
};
readAndUploadCycle();
return deferred.promise();
};
this.fetchConnection = () => {
if (this.fetchConnectionOverride !== undefined) {
return this.fetchConnectionOverride();
}
return this.configureConnection();
};
if (!authentication) {
throw new ArgumentNullError("authentication");
}
if (!connectionFactory) {
throw new ArgumentNullError("connectionFactory");
}
if (!audioSource) {
throw new ArgumentNullError("audioSource");
}
if (!recognizerConfig) {
throw new ArgumentNullError("recognizerConfig");
}
this.privMustReportEndOfStream = false;
this.privAuthentication = authentication;
this.privConnectionFactory = connectionFactory;
this.privAudioSource = audioSource;
this.privRecognizerConfig = recognizerConfig;
this.privIsDisposed = false;
this.privRecognizer = recognizer;
this.privRequestSession = new RequestSession(this.privAudioSource.id());
this.privConnectionEvents = new EventSource();
this.privDynamicGrammar = new DynamicGrammarBuilder();
this.privSpeechContext = new SpeechContext(this.privDynamicGrammar);
this.privAgentConfig = new AgentConfig();
}
get audioSource() {
return this.privAudioSource;
}
get speechContext() {
return this.privSpeechContext;
}
get dynamicGrammar() {
return this.privDynamicGrammar;
}
get agentConfig() {
return this.privAgentConfig;
}
isDisposed() {
return this.privIsDisposed;
}
dispose(reason) {
this.privIsDisposed = true;
if (this.privConnectionConfigurationPromise) {
this.privConnectionConfigurationPromise.onSuccessContinueWith((connection) => {
connection.dispose(reason);
});
}
}
get connectionEvents() {
return this.privConnectionEvents;
}
get recognitionMode() {
return this.privRecognizerConfig.recognitionMode;
}
recognize(recoMode, successCallback, errorCallBack) {
if (this.recognizeOverride !== undefined) {
return this.recognizeOverride(recoMode, successCallback, errorCallBack);
}
// Clear the existing configuration promise to force a re-transmission of config and context.
this.privConnectionConfigurationPromise = null;
this.privRecognizerConfig.recognitionMode = recoMode;
this.privRequestSession.startNewRecognition();
this.privRequestSession.listenForServiceTelemetry(this.privAudioSource.events);
// Start the connection to the service. The promise this will create is stored and will be used by configureConnection().
this.connectImpl();
return this.audioSource
.attach(this.privRequestSession.audioNodeId)
.continueWithPromise((result) => {
let audioNode;
if (result.isError) {
this.cancelRecognitionLocal(CancellationReason.Error, CancellationErrorCode.ConnectionFailure, result.error, successCallback);
return PromiseHelper.fromError(result.error);
}
else {
audioNode = new ReplayableAudioNode(result.result, this.audioSource.format);
this.privRequestSession.onAudioSourceAttachCompleted(audioNode, false);
}
return this.audioSource.deviceInfo.onSuccessContinueWithPromise((deviceInfo) => {
this.privRecognizerConfig.SpeechServiceConfig.Context.audio = { source: deviceInfo };
return this.configureConnection()
.on((_) => {
const sessionStartEventArgs = new SessionEventArgs(this.privRequestSession.sessionId);
if (!!this.privRecognizer.sessionStarted) {
this.privRecognizer.sessionStarted(this.privRecognizer, sessionStartEventArgs);
}
const messageRetrievalPromise = this.receiveMessage(successCallback, errorCallBack);
const audioSendPromise = this.sendAudio(audioNode);
/* tslint:disable:no-empty */
audioSendPromise.on((_) => { }, (error) => {
this.cancelRecognitionLocal(CancellationReason.Error, CancellationErrorCode.RuntimeError, error, successCallback);
});
const completionPromise = PromiseHelper.whenAll([messageRetrievalPromise, audioSendPromise]);
return completionPromise.on((r) => {
return true;
}, (error) => {
this.cancelRecognitionLocal(CancellationReason.Error, CancellationErrorCode.RuntimeError, error, successCallback);
});
}, (error) => {
this.cancelRecognitionLocal(CancellationReason.Error, CancellationErrorCode.ConnectionFailure, error, successCallback);
}).continueWithPromise((result) => {
if (result.isError) {
return PromiseHelper.fromError(result.error);
}
else {
return PromiseHelper.fromResult(true);
}
});
});
});
}
stopRecognizing() {
if (this.privRequestSession.isRecognizing) {
this.privRequestSession.onStopRecognizing();
this.sendTelemetryData();
this.audioSource.turnOff();
this.sendFinalAudio();
this.privRequestSession.dispose();
}
}
connect() {
this.connectImpl().result();
}
disconnect() {
if (this.disconnectOverride !== undefined) {
this.disconnectOverride();
return;
}
this.cancelRecognitionLocal(CancellationReason.Error, CancellationErrorCode.NoError, "Disconnecting", undefined);
if (this.privConnectionPromise.result().isCompleted) {
if (!this.privConnectionPromise.result().isError) {
this.privConnectionPromise.result().result.dispose();
this.privConnectionPromise = null;
}
}
else {
this.privConnectionPromise.onSuccessContinueWith((connection) => {
connection.dispose();
});
}
}
sendMessage(message) { }
// Cancels recognition.
cancelRecognitionLocal(cancellationReason, errorCode, error, cancelRecoCallback) {
if (!!this.privRequestSession.isRecognizing) {
this.privRequestSession.onStopRecognizing();
this.sendTelemetryData();
this.cancelRecognition(this.privRequestSession.sessionId, this.privRequestSession.requestId, cancellationReason, errorCode, error, cancelRecoCallback);
}
}
// Establishes a websocket connection to the end point.
connectImpl(isUnAuthorized = false) {
if (this.connectImplOverride !== undefined) {
return this.connectImplOverride(isUnAuthorized);
}
if (this.privConnectionPromise) {
if (this.privConnectionPromise.result().isCompleted &&
(this.privConnectionPromise.result().isError
|| this.privConnectionPromise.result().result.state() === ConnectionState.Disconnected)) {
this.privConnectionId = null;
this.privConnectionPromise = null;
return this.connectImpl();
}
else {
return this.privConnectionPromise;
}
}
this.privAuthFetchEventId = createNoDashGuid();
this.privConnectionId = createNoDashGuid();
this.privRequestSession.onPreConnectionStart(this.privAuthFetchEventId, this.privConnectionId);
const authPromise = isUnAuthorized ? this.privAuthentication.fetchOnExpiry(this.privAuthFetchEventId) : this.privAuthentication.fetch(this.privAuthFetchEventId);
this.privConnectionPromise = authPromise
.continueWithPromise((result) => {
if (result.isError) {
this.privRequestSession.onAuthCompleted(true, result.error);
throw new Error(result.error);
}
else {
this.privRequestSession.onAuthCompleted(false);
}
const connection = this.privConnectionFactory.create(this.privRecognizerConfig, result.result, this.privConnectionId);
this.privRequestSession.listenForServiceTelemetry(connection.events);
// Attach to the underlying event. No need to hold onto the detach pointers as in the event the connection goes away,
// it'll stop sending events.
connection.events.attach((event) => {
this.connectionEvents.onEvent(event);
});
return connection.open().onSuccessContinueWithPromise((response) => {
if (response.statusCode === 200) {
this.privRequestSession.onPreConnectionStart(this.privAuthFetchEventId, this.privConnectionId);
this.privRequestSession.onConnectionEstablishCompleted(response.statusCode);
return PromiseHelper.fromResult(connection);
}
else if (response.statusCode === 403 && !isUnAuthorized) {
return this.connectImpl(true);
}
else {
this.privRequestSession.onConnectionEstablishCompleted(response.statusCode, response.reason);
return PromiseHelper.fromError(`Unable to contact server. StatusCode: ${response.statusCode}, ${this.privRecognizerConfig.parameters.getProperty(PropertyId.SpeechServiceConnection_Endpoint)} Reason: ${response.reason}`);
}
});
});
return this.privConnectionPromise;
}
sendFinalAudio() {
const deferred = new Deferred();
this.fetchConnection().on((connection) => {
connection.send(new SpeechConnectionMessage(MessageType.Binary, "audio", this.privRequestSession.requestId, null, null)).on((_) => {
deferred.resolve(true);
}, (error) => {
deferred.reject(error);
});
}, (error) => {
deferred.reject(error);
});
return deferred.promise();
}
// Takes an established websocket connection to the endpoint and sends speech configuration information.
configureConnection() {
if (this.configConnectionOverride !== undefined) {
return this.configConnectionOverride();
}
if (this.privConnectionConfigurationPromise) {
if (this.privConnectionConfigurationPromise.result().isCompleted &&
(this.privConnectionConfigurationPromise.result().isError
|| this.privConnectionConfigurationPromise.result().result.state() === ConnectionState.Disconnected)) {
this.privConnectionConfigurationPromise = null;
return this.configureConnection();
}
else {
return this.privConnectionConfigurationPromise;
}
}
this.privConnectionConfigurationPromise = this.connectImpl().onSuccessContinueWithPromise((connection) => {
return this.sendSpeechServiceConfig(connection, this.privRequestSession, this.privRecognizerConfig.SpeechServiceConfig.serialize())
.onSuccessContinueWithPromise((_) => {
return this.sendSpeechContext(connection).onSuccessContinueWith((_) => {
return connection;
});
});
});
return this.privConnectionConfigurationPromise;
}
}
ServiceRecognizerBase.telemetryDataEnabled = true;
//# sourceMappingURL=ServiceRecognizerBase.js.map