microsoft-cognitiveservices-speech-sdk
Version:
Microsoft Cognitive Services Speech SDK for JavaScript
756 lines (754 loc) • 38.1 kB
JavaScript
"use strict";
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT license.
Object.defineProperty(exports, "__esModule", { value: true });
exports.ServiceRecognizerBase = void 0;
const Exports_js_1 = require("../common.browser/Exports.js");
const Exports_js_2 = require("../common/Exports.js");
const Exports_js_3 = require("../sdk/Exports.js");
const Exports_js_4 = require("./Exports.js");
const SpeechConnectionMessage_Internal_js_1 = require("./SpeechConnectionMessage.Internal.js");
class ServiceRecognizerBase {
constructor(authentication, connectionFactory, audioSource, recognizerConfig, recognizer) {
// A promise for a configured connection.
// Do not consume directly, call fetchConnection instead.
this.privConnectionConfigurationPromise = undefined;
// A promise for a connection, but one that has not had the speech context sent yet.
// Do not consume directly, call fetchConnection instead.
this.privConnectionPromise = undefined;
this.privSetTimeout = setTimeout;
this.privIsLiveAudio = false;
this.privAverageBytesPerMs = 0;
this.privEnableSpeakerId = false;
this.privExpectContentAssessmentResponse = false;
this.recognizeOverride = undefined;
this.recognizeSpeaker = undefined;
this.disconnectOverride = undefined;
this.receiveMessageOverride = undefined;
this.sendPrePayloadJSONOverride = undefined;
this.postConnectImplOverride = undefined;
this.configConnectionOverride = undefined;
this.handleSpeechPhraseMessage = undefined;
this.handleSpeechHypothesisMessage = undefined;
if (!authentication) {
throw new Exports_js_2.ArgumentNullError("authentication");
}
if (!connectionFactory) {
throw new Exports_js_2.ArgumentNullError("connectionFactory");
}
if (!audioSource) {
throw new Exports_js_2.ArgumentNullError("audioSource");
}
if (!recognizerConfig) {
throw new Exports_js_2.ArgumentNullError("recognizerConfig");
}
this.privEnableSpeakerId = recognizerConfig.isSpeakerDiarizationEnabled;
this.privMustReportEndOfStream = false;
this.privAuthentication = authentication;
this.privConnectionFactory = connectionFactory;
this.privAudioSource = audioSource;
this.privRecognizerConfig = recognizerConfig;
this.privIsDisposed = false;
this.privRecognizer = recognizer;
this.privRequestSession = new Exports_js_4.RequestSession(this.privAudioSource.id());
this.privConnectionEvents = new Exports_js_2.EventSource();
this.privServiceEvents = new Exports_js_2.EventSource();
this.privDynamicGrammar = new Exports_js_4.DynamicGrammarBuilder();
this.privSpeechContext = new Exports_js_4.SpeechContext(this.privDynamicGrammar);
this.privAgentConfig = new Exports_js_4.AgentConfig();
const webWorkerLoadType = this.privRecognizerConfig.parameters.getProperty(Exports_js_3.PropertyId.WebWorkerLoadType, "on").toLowerCase();
if (webWorkerLoadType === "on" && typeof (Blob) !== "undefined" && typeof (Worker) !== "undefined") {
this.privSetTimeout = Exports_js_2.Timeout.setTimeout;
}
else {
if (typeof window !== "undefined") {
// eslint-disable-next-line @typescript-eslint/no-unsafe-assignment
this.privSetTimeout = window.setTimeout.bind(window);
}
if (typeof globalThis !== "undefined") {
// eslint-disable-next-line @typescript-eslint/no-unsafe-assignment
this.privSetTimeout = globalThis.setTimeout.bind(globalThis);
}
}
this.connectionEvents.attach((connectionEvent) => {
if (connectionEvent.name === "ConnectionClosedEvent") {
const connectionClosedEvent = connectionEvent;
if (connectionClosedEvent.statusCode === 1003 ||
connectionClosedEvent.statusCode === 1007 ||
connectionClosedEvent.statusCode === 1002 ||
connectionClosedEvent.statusCode === 4000 ||
this.privRequestSession.numConnectionAttempts > this.privRecognizerConfig.maxRetryCount) {
void this.cancelRecognitionLocal(Exports_js_3.CancellationReason.Error, connectionClosedEvent.statusCode === 1007 ? Exports_js_3.CancellationErrorCode.BadRequestParameters : Exports_js_3.CancellationErrorCode.ConnectionFailure, `${connectionClosedEvent.reason} websocket error code: ${connectionClosedEvent.statusCode}`);
}
}
});
if (this.privEnableSpeakerId) {
this.privDiarizationSessionId = Exports_js_2.createNoDashGuid();
}
this.setLanguageIdJson();
this.setOutputDetailLevelJson();
}
setTranslationJson() {
const targetLanguages = this.privRecognizerConfig.parameters.getProperty(Exports_js_3.PropertyId.SpeechServiceConnection_TranslationToLanguages, undefined);
if (targetLanguages !== undefined) {
const languages = targetLanguages.split(",");
const translationVoice = this.privRecognizerConfig.parameters.getProperty(Exports_js_3.PropertyId.SpeechServiceConnection_TranslationVoice, undefined);
const action = (translationVoice !== undefined) ? "Synthesize" : "None";
this.privSpeechContext.setSection("translation", {
onSuccess: { action },
output: { interimResults: { mode: "Always" } },
targetLanguages: languages,
});
if (translationVoice !== undefined) {
const languageToVoiceMap = {};
for (const lang of languages) {
languageToVoiceMap[lang] = translationVoice;
}
this.privSpeechContext.setSection("synthesis", {
defaultVoices: languageToVoiceMap
});
}
}
}
setSpeechSegmentationTimeoutJson() {
const speechSegmentationSilenceTimeoutMs = this.privRecognizerConfig.parameters.getProperty(Exports_js_3.PropertyId.Speech_SegmentationSilenceTimeoutMs, undefined);
const speechSegmentationMaximumTimeMs = this.privRecognizerConfig.parameters.getProperty(Exports_js_3.PropertyId.Speech_SegmentationMaximumTimeMs, undefined);
const speechSegmentationStrategy = this.privRecognizerConfig.parameters.getProperty(Exports_js_3.PropertyId.Speech_SegmentationStrategy, undefined);
const segmentation = {
segmentation: {
mode: ""
}
};
let configuredSegment = false;
if (speechSegmentationStrategy !== undefined) {
configuredSegment = true;
let segMode = "";
switch (speechSegmentationStrategy.toLowerCase()) {
case "default":
break;
case "time":
segMode = "Custom";
break;
case "semantic":
segMode = "Semantic";
break;
}
segmentation.segmentation.mode = segMode;
}
if (speechSegmentationSilenceTimeoutMs !== undefined) {
configuredSegment = true;
const segmentationSilenceTimeoutMs = parseInt(speechSegmentationSilenceTimeoutMs, 10);
segmentation.segmentation.mode = "Custom";
segmentation.segmentation.segmentationSilenceTimeoutMs = segmentationSilenceTimeoutMs;
}
if (speechSegmentationMaximumTimeMs !== undefined) {
configuredSegment = true;
const segmentationMaximumTimeMs = parseInt(speechSegmentationMaximumTimeMs, 10);
segmentation.segmentation.mode = "Custom";
segmentation.segmentation.segmentationForcedTimeoutMs = segmentationMaximumTimeMs;
}
if (configuredSegment) {
const recoMode = this.recognitionMode === Exports_js_4.RecognitionMode.Conversation ? "CONVERSATION" :
this.recognitionMode === Exports_js_4.RecognitionMode.Dictation ? "DICTATION" : "INTERACTIVE";
const phraseDetection = this.privSpeechContext.getSection("phraseDetection");
phraseDetection.mode = recoMode;
phraseDetection[recoMode] = segmentation;
this.privSpeechContext.setSection("phraseDetection", phraseDetection);
}
}
setLanguageIdJson() {
const phraseDetection = this.privSpeechContext.getSection("phraseDetection");
if (this.privRecognizerConfig.autoDetectSourceLanguages !== undefined) {
const sourceLanguages = this.privRecognizerConfig.autoDetectSourceLanguages.split(",");
let speechContextLidMode;
if (this.privRecognizerConfig.languageIdMode === "Continuous") {
speechContextLidMode = "DetectContinuous";
}
else { // recognizerConfig.languageIdMode === "AtStart"
speechContextLidMode = "DetectAtAudioStart";
}
this.privSpeechContext.setSection("languageId", {
Priority: "PrioritizeLatency",
languages: sourceLanguages,
mode: speechContextLidMode,
onSuccess: { action: "Recognize" },
onUnknown: { action: "None" }
});
this.privSpeechContext.setSection("phraseOutput", {
interimResults: {
resultType: "Auto"
},
phraseResults: {
resultType: "Always"
}
});
const customModels = this.privRecognizerConfig.sourceLanguageModels;
if (customModels !== undefined) {
phraseDetection.customModels = customModels;
phraseDetection.onInterim = { action: "None" };
phraseDetection.onSuccess = { action: "None" };
}
}
const targetLanguages = this.privRecognizerConfig.parameters.getProperty(Exports_js_3.PropertyId.SpeechServiceConnection_TranslationToLanguages, undefined);
if (targetLanguages !== undefined) {
phraseDetection.onInterim = { action: "Translate" };
phraseDetection.onSuccess = { action: "Translate" };
this.privSpeechContext.setSection("phraseOutput", {
interimResults: {
resultType: "None"
},
phraseResults: {
resultType: "None"
}
});
}
this.privSpeechContext.setSection("phraseDetection", phraseDetection);
}
setOutputDetailLevelJson() {
if (this.privEnableSpeakerId) {
const requestWordLevelTimestamps = this.privRecognizerConfig.parameters.getProperty(Exports_js_3.PropertyId.SpeechServiceResponse_RequestWordLevelTimestamps, "false").toLowerCase();
if (requestWordLevelTimestamps === "true") {
this.privSpeechContext.setWordLevelTimings();
}
else {
const outputFormat = this.privRecognizerConfig.parameters.getProperty(Exports_js_4.OutputFormatPropertyName, Exports_js_3.OutputFormat[Exports_js_3.OutputFormat.Simple]).toLowerCase();
if (outputFormat === Exports_js_3.OutputFormat[Exports_js_3.OutputFormat.Detailed].toLocaleLowerCase()) {
this.privSpeechContext.setDetailedOutputFormat();
}
}
}
}
get isSpeakerDiarizationEnabled() {
return this.privEnableSpeakerId;
}
get audioSource() {
return this.privAudioSource;
}
get speechContext() {
return this.privSpeechContext;
}
get dynamicGrammar() {
return this.privDynamicGrammar;
}
get agentConfig() {
return this.privAgentConfig;
}
set conversationTranslatorToken(token) {
this.privRecognizerConfig.parameters.setProperty(Exports_js_3.PropertyId.ConversationTranslator_Token, token);
}
set voiceProfileType(type) {
this.privRecognizerConfig.parameters.setProperty(Exports_js_3.PropertyId.SpeechServiceConnection_SpeakerIdMode, type);
}
set authentication(auth) {
this.privAuthentication = auth;
}
isDisposed() {
return this.privIsDisposed;
}
async dispose(reason) {
this.privIsDisposed = true;
if (this.privConnectionConfigurationPromise !== undefined) {
try {
const connection = await this.privConnectionConfigurationPromise;
await connection.dispose(reason);
}
catch (error) {
// The connection is in a bad state. But we're trying to kill it, so...
return;
}
}
}
get connectionEvents() {
return this.privConnectionEvents;
}
get serviceEvents() {
return this.privServiceEvents;
}
get recognitionMode() {
return this.privRecognizerConfig.recognitionMode;
}
async recognize(recoMode, successCallback, errorCallBack) {
if (this.recognizeOverride !== undefined) {
await this.recognizeOverride(recoMode, successCallback, errorCallBack);
return;
}
// Clear the existing configuration promise to force a re-transmission of config and context.
this.privConnectionConfigurationPromise = undefined;
this.privRecognizerConfig.recognitionMode = recoMode;
this.setSpeechSegmentationTimeoutJson();
this.setTranslationJson();
this.privSuccessCallback = successCallback;
this.privErrorCallback = errorCallBack;
this.privRequestSession.startNewRecognition();
this.privRequestSession.listenForServiceTelemetry(this.privAudioSource.events);
// Start the connection to the service. The promise this will create is stored and will be used by configureConnection().
const conPromise = this.connectImpl();
let audioNode;
try {
const audioStreamNode = await this.audioSource.attach(this.privRequestSession.audioNodeId);
const format = await this.audioSource.format;
const deviceInfo = await this.audioSource.deviceInfo;
this.privIsLiveAudio = deviceInfo.type && deviceInfo.type === Exports_js_4.type.Microphones;
audioNode = new Exports_js_1.ReplayableAudioNode(audioStreamNode, format.avgBytesPerSec);
await this.privRequestSession.onAudioSourceAttachCompleted(audioNode, false);
this.privRecognizerConfig.SpeechServiceConfig.Context.audio = { source: deviceInfo };
}
catch (error) {
await this.privRequestSession.onStopRecognizing();
throw error;
}
try {
await conPromise;
}
catch (error) {
await this.cancelRecognitionLocal(Exports_js_3.CancellationReason.Error, Exports_js_3.CancellationErrorCode.ConnectionFailure, error);
return;
}
const sessionStartEventArgs = new Exports_js_3.SessionEventArgs(this.privRequestSession.sessionId);
if (!!this.privRecognizer.sessionStarted) {
this.privRecognizer.sessionStarted(this.privRecognizer, sessionStartEventArgs);
}
void this.receiveMessage();
const audioSendPromise = this.sendAudio(audioNode);
audioSendPromise.catch(async (error) => {
await this.cancelRecognitionLocal(Exports_js_3.CancellationReason.Error, Exports_js_3.CancellationErrorCode.RuntimeError, error);
});
return;
}
async stopRecognizing() {
if (this.privRequestSession.isRecognizing) {
try {
await this.audioSource.turnOff();
await this.sendFinalAudio();
await this.privRequestSession.onStopRecognizing();
await this.privRequestSession.turnCompletionPromise;
}
finally {
await this.privRequestSession.dispose();
}
}
return;
}
async connect() {
await this.connectImpl();
return Promise.resolve();
}
connectAsync(cb, err) {
this.connectImpl().then(() => {
try {
if (!!cb) {
cb();
}
}
catch (e) {
if (!!err) {
err(e);
}
}
}, (reason) => {
try {
if (!!err) {
err(reason);
}
/* eslint-disable no-empty */
}
catch (error) {
}
});
}
async disconnect() {
await this.cancelRecognitionLocal(Exports_js_3.CancellationReason.Error, Exports_js_3.CancellationErrorCode.NoError, "Disconnecting");
if (this.disconnectOverride !== undefined) {
await this.disconnectOverride();
}
if (this.privConnectionPromise !== undefined) {
try {
await (await this.privConnectionPromise).dispose();
}
catch (error) {
}
}
this.privConnectionPromise = undefined;
}
// eslint-disable-next-line @typescript-eslint/no-unused-vars
sendMessage(message) {
return;
}
async sendNetworkMessage(path, payload) {
const type = typeof payload === "string" ? Exports_js_2.MessageType.Text : Exports_js_2.MessageType.Binary;
const contentType = typeof payload === "string" ? "application/json" : "";
const connection = await this.fetchConnection();
return connection.send(new SpeechConnectionMessage_Internal_js_1.SpeechConnectionMessage(type, path, this.privRequestSession.requestId, contentType, payload));
}
set activityTemplate(messagePayload) {
this.privActivityTemplate = messagePayload;
}
get activityTemplate() {
return this.privActivityTemplate;
}
set expectContentAssessmentResponse(value) {
this.privExpectContentAssessmentResponse = value;
}
async sendTelemetryData() {
const telemetryData = this.privRequestSession.getTelemetry();
if (ServiceRecognizerBase.telemetryDataEnabled !== true ||
this.privIsDisposed ||
null === telemetryData) {
return;
}
if (!!ServiceRecognizerBase.telemetryData) {
try {
ServiceRecognizerBase.telemetryData(telemetryData);
/* eslint-disable no-empty */
}
catch { }
}
const connection = await this.fetchConnection();
await connection.send(new SpeechConnectionMessage_Internal_js_1.SpeechConnectionMessage(Exports_js_2.MessageType.Text, "telemetry", this.privRequestSession.requestId, "application/json", telemetryData));
}
// Cancels recognition.
async cancelRecognitionLocal(cancellationReason, errorCode, error) {
if (!!this.privRequestSession.isRecognizing) {
await this.privRequestSession.onStopRecognizing();
this.cancelRecognition(this.privRequestSession.sessionId, this.privRequestSession.requestId, cancellationReason, errorCode, error);
}
}
async receiveMessage() {
try {
if (this.privIsDisposed) {
// We're done.
return;
}
let connection = await this.fetchConnection();
const message = await connection.read();
if (this.receiveMessageOverride !== undefined) {
return this.receiveMessageOverride();
}
// indicates we are draining the queue and it came with no message;
if (!message) {
return this.receiveMessage();
}
this.privServiceHasSentMessage = true;
const connectionMessage = SpeechConnectionMessage_Internal_js_1.SpeechConnectionMessage.fromConnectionMessage(message);
if (connectionMessage.requestId.toLowerCase() === this.privRequestSession.requestId.toLowerCase()) {
switch (connectionMessage.path.toLowerCase()) {
case "turn.start":
this.privMustReportEndOfStream = true;
this.privRequestSession.onServiceTurnStartResponse();
break;
case "speech.startdetected":
const speechStartDetected = Exports_js_4.SpeechDetected.fromJSON(connectionMessage.textBody, this.privRequestSession.currentTurnAudioOffset);
const speechStartEventArgs = new Exports_js_3.RecognitionEventArgs(speechStartDetected.Offset, this.privRequestSession.sessionId);
if (!!this.privRecognizer.speechStartDetected) {
this.privRecognizer.speechStartDetected(this.privRecognizer, speechStartEventArgs);
}
break;
case "speech.enddetected":
let json;
if (connectionMessage.textBody.length > 0) {
json = connectionMessage.textBody;
}
else {
// If the request was empty, the JSON returned is empty.
json = "{ Offset: 0 }";
}
const speechStopDetected = Exports_js_4.SpeechDetected.fromJSON(json, this.privRequestSession.currentTurnAudioOffset);
const speechStopEventArgs = new Exports_js_3.RecognitionEventArgs(speechStopDetected.Offset + this.privRequestSession.currentTurnAudioOffset, this.privRequestSession.sessionId);
if (!!this.privRecognizer.speechEndDetected) {
this.privRecognizer.speechEndDetected(this.privRecognizer, speechStopEventArgs);
}
break;
case "turn.end":
await this.sendTelemetryData();
if (this.privRequestSession.isSpeechEnded && this.privMustReportEndOfStream) {
this.privMustReportEndOfStream = false;
await this.cancelRecognitionLocal(Exports_js_3.CancellationReason.EndOfStream, Exports_js_3.CancellationErrorCode.NoError, undefined);
}
const sessionStopEventArgs = new Exports_js_3.SessionEventArgs(this.privRequestSession.sessionId);
await this.privRequestSession.onServiceTurnEndResponse(this.privRecognizerConfig.isContinuousRecognition);
if (!this.privRecognizerConfig.isContinuousRecognition || this.privRequestSession.isSpeechEnded || !this.privRequestSession.isRecognizing) {
if (!!this.privRecognizer.sessionStopped) {
this.privRecognizer.sessionStopped(this.privRecognizer, sessionStopEventArgs);
}
return;
}
else {
connection = await this.fetchConnection();
await this.sendPrePayloadJSON(connection);
}
break;
default:
if (!await this.processTypeSpecificMessages(connectionMessage)) {
// here are some messages that the derived class has not processed, dispatch them to connect class
if (!!this.privServiceEvents) {
this.serviceEvents.onEvent(new Exports_js_2.ServiceEvent(connectionMessage.path.toLowerCase(), connectionMessage.textBody));
}
}
}
}
return this.receiveMessage();
}
catch (error) {
return null;
}
}
updateSpeakerDiarizationAudioOffset() {
const bytesSent = this.privRequestSession.recognitionBytesSent;
const audioOffsetMs = this.privAverageBytesPerMs !== 0 ? bytesSent / this.privAverageBytesPerMs : 0;
this.privSpeechContext.setSpeakerDiarizationAudioOffsetMs(audioOffsetMs);
}
sendSpeechContext(connection, generateNewRequestId) {
if (this.privEnableSpeakerId) {
this.updateSpeakerDiarizationAudioOffset();
}
const speechContextJson = this.speechContext.toJSON();
if (generateNewRequestId) {
this.privRequestSession.onSpeechContext();
}
if (speechContextJson) {
return connection.send(new SpeechConnectionMessage_Internal_js_1.SpeechConnectionMessage(Exports_js_2.MessageType.Text, "speech.context", this.privRequestSession.requestId, "application/json", speechContextJson));
}
return;
}
noOp() {
// operation not supported
return;
}
// Encapsulated for derived service recognizers that need to send additional JSON
async sendPrePayloadJSON(connection, generateNewRequestId = true) {
if (this.sendPrePayloadJSONOverride !== undefined) {
return this.sendPrePayloadJSONOverride(connection);
}
await this.sendSpeechContext(connection, generateNewRequestId);
await this.sendWaveHeader(connection);
return;
}
async sendWaveHeader(connection) {
const format = await this.audioSource.format;
// this.writeBufferToConsole(format.header);
return connection.send(new SpeechConnectionMessage_Internal_js_1.SpeechConnectionMessage(Exports_js_2.MessageType.Binary, "audio", this.privRequestSession.requestId, "audio/x-wav", format.header));
}
// Establishes a websocket connection to the end point.
connectImpl() {
if (this.privConnectionPromise !== undefined) {
return this.privConnectionPromise.then((connection) => {
if (connection.state() === Exports_js_2.ConnectionState.Disconnected) {
this.privConnectionId = null;
this.privConnectionPromise = undefined;
this.privServiceHasSentMessage = false;
return this.connectImpl();
}
return this.privConnectionPromise;
}, () => {
this.privConnectionId = null;
this.privConnectionPromise = undefined;
this.privServiceHasSentMessage = false;
return this.connectImpl();
});
}
this.privConnectionPromise = this.retryableConnect();
// Attach an empty handler to allow the promise to run in the background while
// other startup events happen. It'll eventually be awaited on.
// eslint-disable-next-line @typescript-eslint/no-empty-function
this.privConnectionPromise.catch(() => { });
if (this.postConnectImplOverride !== undefined) {
return this.postConnectImplOverride(this.privConnectionPromise);
}
return this.privConnectionPromise;
}
sendSpeechServiceConfig(connection, requestSession, SpeechServiceConfigJson) {
requestSession.onSpeechContext();
// filter out anything that is not required for the service to work.
if (ServiceRecognizerBase.telemetryDataEnabled !== true) {
const withTelemetry = JSON.parse(SpeechServiceConfigJson);
const replacement = {
context: {
system: withTelemetry.context.system,
},
};
SpeechServiceConfigJson = JSON.stringify(replacement);
}
if (this.privRecognizerConfig.parameters.getProperty("f0f5debc-f8c9-4892-ac4b-90a7ab359fd2", "false").toLowerCase() === "true") {
const json = JSON.parse(SpeechServiceConfigJson);
json.context.DisableReferenceChannel = "True";
json.context.MicSpec = "1_0_0";
SpeechServiceConfigJson = JSON.stringify(json);
}
if (SpeechServiceConfigJson) {
return connection.send(new SpeechConnectionMessage_Internal_js_1.SpeechConnectionMessage(Exports_js_2.MessageType.Text, "speech.config", requestSession.requestId, "application/json", SpeechServiceConfigJson));
}
return;
}
async fetchConnection() {
if (this.privConnectionConfigurationPromise !== undefined) {
return this.privConnectionConfigurationPromise.then((connection) => {
if (connection.state() === Exports_js_2.ConnectionState.Disconnected) {
this.privConnectionId = null;
this.privConnectionConfigurationPromise = undefined;
this.privServiceHasSentMessage = false;
return this.fetchConnection();
}
return this.privConnectionConfigurationPromise;
}, () => {
this.privConnectionId = null;
this.privConnectionConfigurationPromise = undefined;
this.privServiceHasSentMessage = false;
return this.fetchConnection();
});
}
this.privConnectionConfigurationPromise = this.configureConnection();
return await this.privConnectionConfigurationPromise;
}
async sendAudio(audioStreamNode) {
const audioFormat = await this.audioSource.format;
this.privAverageBytesPerMs = audioFormat.avgBytesPerSec / 1000;
// The time we last sent data to the service.
let nextSendTime = Date.now();
// Max amount to send before we start to throttle
const fastLaneSizeMs = this.privRecognizerConfig.parameters.getProperty("SPEECH-TransmitLengthBeforThrottleMs", "5000");
const maxSendUnthrottledBytes = audioFormat.avgBytesPerSec / 1000 * parseInt(fastLaneSizeMs, 10);
const startRecogNumber = this.privRequestSession.recogNumber;
const readAndUploadCycle = async () => {
// If speech is done, stop sending audio.
if (!this.privIsDisposed &&
!this.privRequestSession.isSpeechEnded &&
this.privRequestSession.isRecognizing &&
this.privRequestSession.recogNumber === startRecogNumber) {
const connection = await this.fetchConnection();
const audioStreamChunk = await audioStreamNode.read();
// we have a new audio chunk to upload.
if (this.privRequestSession.isSpeechEnded) {
// If service already recognized audio end then don't send any more audio
return;
}
let payload;
let sendDelay;
if (!audioStreamChunk || audioStreamChunk.isEnd) {
payload = null;
sendDelay = 0;
}
else {
payload = audioStreamChunk.buffer;
this.privRequestSession.onAudioSent(payload.byteLength);
if (maxSendUnthrottledBytes >= this.privRequestSession.bytesSent) {
sendDelay = 0;
}
else {
sendDelay = Math.max(0, nextSendTime - Date.now());
}
}
if (0 !== sendDelay) {
await this.delay(sendDelay);
}
if (payload !== null) {
nextSendTime = Date.now() + (payload.byteLength * 1000 / (audioFormat.avgBytesPerSec * 2));
}
// Are we still alive?
if (!this.privIsDisposed &&
!this.privRequestSession.isSpeechEnded &&
this.privRequestSession.isRecognizing &&
this.privRequestSession.recogNumber === startRecogNumber) {
connection.send(new SpeechConnectionMessage_Internal_js_1.SpeechConnectionMessage(Exports_js_2.MessageType.Binary, "audio", this.privRequestSession.requestId, null, payload)).catch(() => {
// eslint-disable-next-line @typescript-eslint/no-empty-function
this.privRequestSession.onServiceTurnEndResponse(this.privRecognizerConfig.isContinuousRecognition).catch(() => { });
});
if (!audioStreamChunk?.isEnd) {
// this.writeBufferToConsole(payload);
// Regardless of success or failure, schedule the next upload.
// If the underlying connection was broken, the next cycle will
// get a new connection and re-transmit missing audio automatically.
return readAndUploadCycle();
}
else {
// the audio stream has been closed, no need to schedule next
// read-upload cycle.
if (!this.privIsLiveAudio) {
this.privRequestSession.onSpeechEnded();
}
}
}
}
};
return readAndUploadCycle();
}
async retryableConnect() {
let isUnAuthorized = false;
this.privAuthFetchEventId = Exports_js_2.createNoDashGuid();
const sessionId = this.privRequestSession.sessionId;
this.privConnectionId = (sessionId !== undefined) ? sessionId : Exports_js_2.createNoDashGuid();
this.privRequestSession.onPreConnectionStart(this.privAuthFetchEventId, this.privConnectionId);
let lastStatusCode = 0;
let lastReason = "";
while (this.privRequestSession.numConnectionAttempts <= this.privRecognizerConfig.maxRetryCount) {
// Get the auth information for the connection. This is a bit of overkill for the current API surface, but leaving the plumbing in place to be able to raise a developer-customer
// facing event when a connection fails to let them try and provide new auth information.
const authPromise = isUnAuthorized ? this.privAuthentication.fetchOnExpiry(this.privAuthFetchEventId) : this.privAuthentication.fetch(this.privAuthFetchEventId);
const auth = await authPromise;
await this.privRequestSession.onAuthCompleted(false);
// Create the connection
const connection = this.privConnectionFactory.create(this.privRecognizerConfig, auth, this.privConnectionId);
// Attach the telemetry handlers.
this.privRequestSession.listenForServiceTelemetry(connection.events);
// Attach to the underlying event. No need to hold onto the detach pointers as in the event the connection goes away,
// it'll stop sending events.
connection.events.attach((event) => {
this.connectionEvents.onEvent(event);
});
const response = await connection.open();
// 200 == everything is fine.
if (response.statusCode === 200) {
await this.privRequestSession.onConnectionEstablishCompleted(response.statusCode);
return Promise.resolve(connection);
}
else if (response.statusCode === 1006) {
isUnAuthorized = true;
}
lastStatusCode = response.statusCode;
lastReason = response.reason;
this.privRequestSession.onRetryConnection();
}
await this.privRequestSession.onConnectionEstablishCompleted(lastStatusCode, lastReason);
return Promise.reject(`Unable to contact server. StatusCode: ${lastStatusCode}, ${this.privRecognizerConfig.parameters.getProperty(Exports_js_3.PropertyId.SpeechServiceConnection_Endpoint)} Reason: ${lastReason}`);
}
delay(delayMs) {
return new Promise((resolve) => this.privSetTimeout(resolve, delayMs));
}
writeBufferToConsole(buffer) {
let out = "Buffer Size: ";
if (null === buffer) {
out += "null";
}
else {
const readView = new Uint8Array(buffer);
out += `${buffer.byteLength}\r\n`;
for (let i = 0; i < buffer.byteLength; i++) {
out += readView[i].toString(16).padStart(2, "0") + " ";
if (((i + 1) % 16) === 0) {
// eslint-disable-next-line no-console
console.info(out);
out = "";
}
}
}
// eslint-disable-next-line no-console
console.info(out);
}
async sendFinalAudio() {
const connection = await this.fetchConnection();
await connection.send(new SpeechConnectionMessage_Internal_js_1.SpeechConnectionMessage(Exports_js_2.MessageType.Binary, "audio", this.privRequestSession.requestId, null, null));
return;
}
// Takes an established websocket connection to the endpoint and sends speech configuration information.
async configureConnection() {
const connection = await this.connectImpl();
if (this.configConnectionOverride !== undefined) {
return this.configConnectionOverride(connection);
}
await this.sendSpeechServiceConfig(connection, this.privRequestSession, this.privRecognizerConfig.SpeechServiceConfig.serialize());
await this.sendPrePayloadJSON(connection, false);
return connection;
}
}
exports.ServiceRecognizerBase = ServiceRecognizerBase;
ServiceRecognizerBase.telemetryDataEnabled = true;
//# sourceMappingURL=ServiceRecognizerBase.js.map