microsoft-cognitiveservices-speech-sdk
Version:
Microsoft Cognitive Services Speech SDK for JavaScript
1 lines • 35.3 kB
Source Map (JSON)
{"version":3,"sources":["src/common.speech/DialogServiceAdapter.ts"],"names":[],"mappings":"AAOA,OAAO,EAUH,YAAY,EAKf,MAAM,sBAAsB,CAAC;AAI9B,OAAO,EAEH,qBAAqB,EACrB,kBAAkB,EAElB,sBAAsB,EAQtB,uBAAuB,EAE1B,MAAM,mBAAmB,CAAC;AAE3B,OAAO,EAKH,qBAAqB,EAKxB,MAAM,cAAc,CAAC;AACtB,OAAO,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAC;AACvD,OAAO,EAAE,kBAAkB,EAAE,MAAM,yBAAyB,CAAC;AAC7D,OAAO,EAAE,eAAe,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAE1E,OAAO,EAAE,uBAAuB,EAAE,MAAM,uCAAuC,CAAC;AAEhF,qBAAa,oBAAqB,SAAQ,qBAAqB;IAC3D,OAAO,CAAC,0BAA0B,CAAyB;IAE3D,OAAO,CAAC,qBAAqB,CAAe;IAE5C,OAAO,CAAC,kBAAkB,CAAgB;IAC1C,OAAO,CAAC,oBAAoB,CAAU;IACtC,OAAO,CAAC,eAAe,CAAU;IACjC,OAAO,CAAC,cAAc,CAA0B;IAChD,OAAO,CAAC,UAAU,CAA2B;IAK7C,OAAO,CAAC,oBAAoB,CAAgC;gBAGxD,cAAc,EAAE,eAAe,EAC/B,iBAAiB,EAAE,kBAAkB,EACrC,WAAW,EAAE,YAAY,EACzB,gBAAgB,EAAE,gBAAgB,EAClC,sBAAsB,EAAE,sBAAsB;IAyBrC,WAAW,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;cAwBxC,cAAc,IAAI,OAAO,CAAC,IAAI,CAAC;IAY/C,SAAS,CAAC,2BAA2B,CAAC,iBAAiB,EAAE,uBAAuB,GAAG,OAAO,CAAC,OAAO,CAAC;cAmInF,iBAAiB,CAC7B,SAAS,EAAE,MAAM,EACjB,SAAS,EAAE,MAAM,EACjB,kBAAkB,EAAE,kBAAkB,EACtC,SAAS,EAAE,qBAAqB,EAChC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;cA8CjB,UAAU,CACtB,QAAQ,EAAE,eAAe,EACzB,eAAe,EAAE,CAAC,CAAC,EAAE,uBAAuB,KAAK,IAAI,EACrD,aAAa,EAAE,CAAC,CAAC,EAAE,MAAM,KAAK,IAAI,GACnC,OAAO,CAAC,IAAI,CAAC;IAgDhB,OAAO,CAAC,iBAAiB;IAKzB,OAAO,CAAC,4BAA4B;YA6ItB,gBAAgB;YAchB,gBAAgB;YAWhB,oBAAoB;IAQlC,OAAO,CAAC,eAAe;IA4BvB,OAAO,CAAC,gBAAgB;IAwBxB,OAAO,CAAC,kBAAkB;IAoB1B,OAAO,CAAC,qBAAqB;IAqD7B,OAAO,CAAC,OAAO;IAKf,OAAO,CAAC,qBAAqB;CAmChC","file":"DialogServiceAdapter.d.ts","sourcesContent":["// Copyright (c) Microsoft Corporation. All rights reserved.\r\n// Licensed under the MIT license.\r\n\r\nimport {\r\n ReplayableAudioNode\r\n} from \"../common.browser/Exports.js\";\r\nimport { SendingAgentContextMessageEvent } from \"../common/DialogEvents.js\";\r\nimport {\r\n BackgroundEvent,\r\n ConnectionEvent,\r\n ConnectionMessage,\r\n createGuid,\r\n createNoDashGuid,\r\n Deferred,\r\n DialogEvent,\r\n Events,\r\n EventSource,\r\n IAudioSource,\r\n IAudioStreamNode,\r\n IConnection,\r\n MessageType,\r\n ServiceEvent,\r\n} from \"../common/Exports.js\";\r\nimport { AudioOutputFormatImpl } from \"../sdk/Audio/AudioOutputFormat.js\";\r\nimport { PullAudioOutputStreamImpl } from \"../sdk/Audio/AudioOutputStream.js\";\r\nimport { AudioStreamFormatImpl } from \"../sdk/Audio/AudioStreamFormat.js\";\r\nimport {\r\n ActivityReceivedEventArgs,\r\n CancellationErrorCode,\r\n CancellationReason,\r\n DialogServiceConfig,\r\n DialogServiceConnector,\r\n PropertyCollection,\r\n PropertyId,\r\n RecognitionEventArgs,\r\n ResultReason,\r\n SessionEventArgs,\r\n SpeechRecognitionCanceledEventArgs,\r\n SpeechRecognitionEventArgs,\r\n SpeechRecognitionResult,\r\n TurnStatusReceivedEventArgs,\r\n} from \"../sdk/Exports.js\";\r\nimport { DialogServiceTurnStateManager } from \"./DialogServiceTurnStateManager.js\";\r\nimport {\r\n CancellationErrorCodePropertyName,\r\n EnumTranslation,\r\n ISpeechConfigAudioDevice,\r\n RecognitionStatus,\r\n ServiceRecognizerBase,\r\n SimpleSpeechPhrase,\r\n SpeechDetected,\r\n SpeechHypothesis,\r\n SpeechKeyword,\r\n} from \"./Exports.js\";\r\nimport { IAuthentication } from \"./IAuthentication.js\";\r\nimport { IConnectionFactory } from \"./IConnectionFactory.js\";\r\nimport { RecognitionMode, RecognizerConfig } from \"./RecognizerConfig.js\";\r\nimport { ActivityPayloadResponse } from \"./ServiceMessages/ActivityResponsePayload.js\";\r\nimport { SpeechConnectionMessage } from \"./SpeechConnectionMessage.Internal.js\";\r\n\r\nexport class DialogServiceAdapter extends ServiceRecognizerBase {\r\n private privDialogServiceConnector: DialogServiceConnector;\r\n\r\n private privDialogAudioSource: IAudioSource;\r\n\r\n private privConnectionLoop: Promise<void>;\r\n private terminateMessageLoop: boolean;\r\n private agentConfigSent: boolean;\r\n private privLastResult: SpeechRecognitionResult;\r\n private privEvents: EventSource<DialogEvent>;\r\n\r\n // Turns are of two kinds:\r\n // 1: SR turns, end when the SR result is returned and then turn end.\r\n // 2: Service turns where an activity is sent by the service along with the audio.\r\n private privTurnStateManager: DialogServiceTurnStateManager;\r\n\r\n public constructor(\r\n authentication: IAuthentication,\r\n connectionFactory: IConnectionFactory,\r\n audioSource: IAudioSource,\r\n recognizerConfig: RecognizerConfig,\r\n dialogServiceConnector: DialogServiceConnector) {\r\n\r\n super(authentication, connectionFactory, audioSource, recognizerConfig, dialogServiceConnector);\r\n\r\n this.privEvents = new EventSource<DialogEvent>();\r\n this.privDialogServiceConnector = dialogServiceConnector;\r\n this.receiveMessageOverride = (): Promise<void> => this.receiveDialogMessageOverride();\r\n this.privTurnStateManager = new DialogServiceTurnStateManager();\r\n this.recognizeOverride =\r\n (recoMode: RecognitionMode, successCallback: (e: SpeechRecognitionResult) => void, errorCallback: (e: string) => void): Promise<void> =>\r\n this.listenOnce(recoMode, successCallback, errorCallback);\r\n this.postConnectImplOverride = (connection: Promise<IConnection>): Promise<IConnection> => this.dialogConnectImpl(connection);\r\n this.configConnectionOverride = (connection: IConnection): Promise<IConnection> => this.configConnection(connection);\r\n this.disconnectOverride = (): Promise<void> => this.privDisconnect();\r\n this.privDialogAudioSource = audioSource;\r\n\r\n this.agentConfigSent = false;\r\n this.privLastResult = null;\r\n this.connectionEvents.attach((connectionEvent: ConnectionEvent): void => {\r\n if (connectionEvent.name === \"ConnectionClosedEvent\") {\r\n this.terminateMessageLoop = true;\r\n }\r\n });\r\n }\r\n\r\n public async sendMessage(message: string): Promise<void> {\r\n const interactionGuid: string = createGuid();\r\n const requestId: string = createNoDashGuid();\r\n\r\n const agentMessage: any = {\r\n context: {\r\n interactionId: interactionGuid\r\n },\r\n // eslint-disable-next-line @typescript-eslint/no-unsafe-assignment\r\n messagePayload: JSON.parse(message),\r\n version: 0.5\r\n };\r\n\r\n const agentMessageJson = JSON.stringify(agentMessage);\r\n const connection: IConnection = await this.fetchConnection();\r\n await connection.send(new SpeechConnectionMessage(\r\n MessageType.Text,\r\n \"agent\",\r\n requestId,\r\n \"application/json\",\r\n agentMessageJson));\r\n\r\n }\r\n\r\n protected async privDisconnect(): Promise<void> {\r\n await this.cancelRecognition(this.privRequestSession.sessionId,\r\n this.privRequestSession.requestId,\r\n CancellationReason.Error,\r\n CancellationErrorCode.NoError,\r\n \"Disconnecting\");\r\n\r\n this.terminateMessageLoop = true;\r\n this.agentConfigSent = false;\r\n return;\r\n }\r\n\r\n protected processTypeSpecificMessages(connectionMessage: SpeechConnectionMessage): Promise<boolean> {\r\n\r\n const resultProps: PropertyCollection = new PropertyCollection();\r\n if (connectionMessage.messageType === MessageType.Text) {\r\n resultProps.setProperty(PropertyId.SpeechServiceResponse_JsonResult, connectionMessage.textBody);\r\n }\r\n\r\n let result: SpeechRecognitionResult;\r\n let processed: boolean;\r\n\r\n switch (connectionMessage.path.toLowerCase()) {\r\n case \"speech.phrase\":\r\n const speechPhrase: SimpleSpeechPhrase = SimpleSpeechPhrase.fromJSON(connectionMessage.textBody, this.privRequestSession.currentTurnAudioOffset);\r\n\r\n this.privRequestSession.onPhraseRecognized(speechPhrase.Offset + speechPhrase.Duration);\r\n\r\n if (speechPhrase.RecognitionStatus !== RecognitionStatus.TooManyRequests && speechPhrase.RecognitionStatus !== RecognitionStatus.Error) {\r\n const args: SpeechRecognitionEventArgs = this.fireEventForResult(speechPhrase, resultProps);\r\n this.privLastResult = args.result;\r\n\r\n if (!!this.privDialogServiceConnector.recognized) {\r\n try {\r\n this.privDialogServiceConnector.recognized(this.privDialogServiceConnector, args);\r\n /* eslint-disable no-empty */\r\n } catch (error) {\r\n // Not going to let errors in the event handler\r\n // trip things up.\r\n }\r\n }\r\n }\r\n processed = true;\r\n break;\r\n case \"speech.hypothesis\":\r\n const hypothesis: SpeechHypothesis = SpeechHypothesis.fromJSON(connectionMessage.textBody, this.privRequestSession.currentTurnAudioOffset);\r\n\r\n result = new SpeechRecognitionResult(\r\n this.privRequestSession.requestId,\r\n ResultReason.RecognizingSpeech,\r\n hypothesis.Text,\r\n hypothesis.Duration,\r\n hypothesis.Offset,\r\n hypothesis.Language,\r\n hypothesis.LanguageDetectionConfidence,\r\n undefined,\r\n undefined,\r\n hypothesis.asJson(),\r\n resultProps);\r\n\r\n this.privRequestSession.onHypothesis(hypothesis.Offset);\r\n\r\n const ev = new SpeechRecognitionEventArgs(result, hypothesis.Offset, this.privRequestSession.sessionId);\r\n\r\n if (!!this.privDialogServiceConnector.recognizing) {\r\n try {\r\n this.privDialogServiceConnector.recognizing(this.privDialogServiceConnector, ev);\r\n /* eslint-disable no-empty */\r\n } catch (error) {\r\n // Not going to let errors in the event handler\r\n // trip things up.\r\n }\r\n }\r\n processed = true;\r\n break;\r\n case \"speech.keyword\":\r\n const keyword: SpeechKeyword = SpeechKeyword.fromJSON(connectionMessage.textBody, this.privRequestSession.currentTurnAudioOffset);\r\n\r\n result = new SpeechRecognitionResult(\r\n this.privRequestSession.requestId,\r\n keyword.Status === \"Accepted\" ? ResultReason.RecognizedKeyword : ResultReason.NoMatch,\r\n keyword.Text,\r\n keyword.Duration,\r\n keyword.Offset,\r\n undefined,\r\n undefined,\r\n undefined,\r\n undefined,\r\n keyword.asJson(),\r\n resultProps);\r\n\r\n if (keyword.Status !== \"Accepted\") {\r\n this.privLastResult = result;\r\n }\r\n\r\n const event = new SpeechRecognitionEventArgs(result, result.duration, result.resultId);\r\n\r\n if (!!this.privDialogServiceConnector.recognized) {\r\n try {\r\n this.privDialogServiceConnector.recognized(this.privDialogServiceConnector, event);\r\n /* eslint-disable no-empty */\r\n } catch (error) {\r\n // Not going to let errors in the event handler\r\n // trip things up.\r\n }\r\n }\r\n processed = true;\r\n break;\r\n case \"audio\":\r\n {\r\n const audioRequestId = connectionMessage.requestId.toUpperCase();\r\n const turn = this.privTurnStateManager.GetTurn(audioRequestId);\r\n try {\r\n // Empty binary message signals end of stream.\r\n if (!connectionMessage.binaryBody) {\r\n turn.endAudioStream();\r\n } else {\r\n turn.audioStream.write(connectionMessage.binaryBody);\r\n }\r\n } catch (error) {\r\n // Not going to let errors in the event handler\r\n // trip things up.\r\n }\r\n }\r\n processed = true;\r\n break;\r\n\r\n case \"response\":\r\n {\r\n this.handleResponseMessage(connectionMessage);\r\n }\r\n processed = true;\r\n break;\r\n\r\n default:\r\n break;\r\n }\r\n const defferal = new Deferred<boolean>();\r\n defferal.resolve(processed);\r\n return defferal.promise;\r\n }\r\n\r\n // Cancels recognition.\r\n protected async cancelRecognition(\r\n sessionId: string,\r\n requestId: string,\r\n cancellationReason: CancellationReason,\r\n errorCode: CancellationErrorCode,\r\n error: string): Promise<void> {\r\n\r\n this.terminateMessageLoop = true;\r\n\r\n if (!!this.privRequestSession.isRecognizing) {\r\n await this.privRequestSession.onStopRecognizing();\r\n }\r\n\r\n if (!!this.privDialogServiceConnector.canceled) {\r\n const properties: PropertyCollection = new PropertyCollection();\r\n properties.setProperty(CancellationErrorCodePropertyName, CancellationErrorCode[errorCode]);\r\n\r\n const cancelEvent: SpeechRecognitionCanceledEventArgs = new SpeechRecognitionCanceledEventArgs(\r\n cancellationReason,\r\n error,\r\n errorCode,\r\n undefined,\r\n sessionId);\r\n\r\n try {\r\n this.privDialogServiceConnector.canceled(this.privDialogServiceConnector, cancelEvent);\r\n /* eslint-disable no-empty */\r\n } catch { }\r\n\r\n if (!!this.privSuccessCallback) {\r\n const result: SpeechRecognitionResult = new SpeechRecognitionResult(\r\n undefined, // ResultId\r\n ResultReason.Canceled,\r\n undefined, // Text\r\n undefined, // Duration\r\n undefined, // Offset\r\n undefined, // Language\r\n undefined, // Language Detection Confidence\r\n undefined, // Speaker Id\r\n error,\r\n undefined, // Json\r\n properties);\r\n try {\r\n this.privSuccessCallback(result);\r\n this.privSuccessCallback = undefined;\r\n /* eslint-disable no-empty */\r\n } catch { }\r\n }\r\n }\r\n }\r\n\r\n protected async listenOnce(\r\n recoMode: RecognitionMode,\r\n successCallback: (e: SpeechRecognitionResult) => void,\r\n errorCallback: (e: string) => void\r\n ): Promise<void> {\r\n this.privRecognizerConfig.recognitionMode = recoMode;\r\n\r\n this.privSuccessCallback = successCallback;\r\n this.privErrorCallback = errorCallback;\r\n\r\n this.privRequestSession.startNewRecognition();\r\n this.privRequestSession.listenForServiceTelemetry(this.privDialogAudioSource.events);\r\n\r\n this.privRecognizerConfig.parameters.setProperty(PropertyId.Speech_SessionId, this.privRequestSession.sessionId);\r\n\r\n // Start the connection to the service. The promise this will create is stored and will be used by configureConnection().\r\n const conPromise: Promise<IConnection> = this.connectImpl();\r\n\r\n const preAudioPromise: Promise<void> = this.sendPreAudioMessages();\r\n\r\n const node: IAudioStreamNode = await this.privDialogAudioSource.attach(this.privRequestSession.audioNodeId);\r\n const format: AudioStreamFormatImpl = await this.privDialogAudioSource.format;\r\n const deviceInfo: ISpeechConfigAudioDevice = await this.privDialogAudioSource.deviceInfo;\r\n\r\n const audioNode = new ReplayableAudioNode(node, format.avgBytesPerSec);\r\n await this.privRequestSession.onAudioSourceAttachCompleted(audioNode, false);\r\n\r\n this.privRecognizerConfig.SpeechServiceConfig.Context.audio = { source: deviceInfo };\r\n\r\n try {\r\n await conPromise;\r\n await preAudioPromise;\r\n } catch (error) {\r\n await this.cancelRecognition(this.privRequestSession.sessionId, this.privRequestSession.requestId, CancellationReason.Error, CancellationErrorCode.ConnectionFailure, error as string);\r\n return Promise.resolve();\r\n }\r\n\r\n const sessionStartEventArgs: SessionEventArgs = new SessionEventArgs(this.privRequestSession.sessionId);\r\n\r\n if (!!this.privRecognizer.sessionStarted) {\r\n this.privRecognizer.sessionStarted(this.privRecognizer, sessionStartEventArgs);\r\n }\r\n\r\n const audioSendPromise = this.sendAudio(audioNode);\r\n\r\n // /* eslint-disable no-empty */\r\n audioSendPromise.then((): void => { /* add? return true;*/ }, async (error: string): Promise<void> => {\r\n await this.cancelRecognition(this.privRequestSession.sessionId, this.privRequestSession.requestId, CancellationReason.Error, CancellationErrorCode.RuntimeError, error);\r\n });\r\n }\r\n\r\n // Establishes a websocket connection to the end point.\r\n private dialogConnectImpl(connection: Promise<IConnection>): Promise<IConnection> {\r\n this.privConnectionLoop = this.startMessageLoop();\r\n return connection;\r\n }\r\n\r\n private receiveDialogMessageOverride(): Promise<void> {\r\n\r\n // we won't rely on the cascading promises of the connection since we want to continually be available to receive messages\r\n const communicationCustodian: Deferred<void> = new Deferred<void>();\r\n\r\n const loop = async (): Promise<void> => {\r\n try {\r\n const isDisposed: boolean = this.isDisposed();\r\n const terminateMessageLoop = (!this.isDisposed() && this.terminateMessageLoop);\r\n if (isDisposed || terminateMessageLoop) {\r\n // We're done.\r\n communicationCustodian.resolve(undefined);\r\n return;\r\n }\r\n\r\n const connection: IConnection = await this.fetchConnection();\r\n const message: ConnectionMessage = await connection.read();\r\n\r\n if (!message) {\r\n return loop();\r\n }\r\n\r\n const connectionMessage = SpeechConnectionMessage.fromConnectionMessage(message);\r\n\r\n switch (connectionMessage.path.toLowerCase()) {\r\n case \"turn.start\":\r\n {\r\n const turnRequestId = connectionMessage.requestId.toUpperCase();\r\n const audioSessionReqId = this.privRequestSession.requestId.toUpperCase();\r\n\r\n // turn started by the service\r\n if (turnRequestId !== audioSessionReqId) {\r\n this.privTurnStateManager.StartTurn(turnRequestId);\r\n } else {\r\n this.privRequestSession.onServiceTurnStartResponse();\r\n }\r\n }\r\n break;\r\n\r\n case \"speech.startdetected\":\r\n const speechStartDetected: SpeechDetected = SpeechDetected.fromJSON(connectionMessage.textBody, this.privRequestSession.currentTurnAudioOffset);\r\n\r\n const speechStartEventArgs = new RecognitionEventArgs(speechStartDetected.Offset, this.privRequestSession.sessionId);\r\n\r\n if (!!this.privRecognizer.speechStartDetected) {\r\n this.privRecognizer.speechStartDetected(this.privRecognizer, speechStartEventArgs);\r\n }\r\n\r\n break;\r\n\r\n case \"speech.enddetected\":\r\n\r\n let json: string;\r\n\r\n if (connectionMessage.textBody.length > 0) {\r\n json = connectionMessage.textBody;\r\n } else {\r\n // If the request was empty, the JSON returned is empty.\r\n json = \"{ Offset: 0 }\";\r\n }\r\n\r\n const speechStopDetected: SpeechDetected = SpeechDetected.fromJSON(json, this.privRequestSession.currentTurnAudioOffset);\r\n\r\n this.privRequestSession.onServiceRecognized(speechStopDetected.Offset);\r\n\r\n const speechStopEventArgs = new RecognitionEventArgs(speechStopDetected.Offset, this.privRequestSession.sessionId);\r\n\r\n if (!!this.privRecognizer.speechEndDetected) {\r\n this.privRecognizer.speechEndDetected(this.privRecognizer, speechStopEventArgs);\r\n }\r\n break;\r\n\r\n case \"turn.end\":\r\n {\r\n const turnEndRequestId = connectionMessage.requestId.toUpperCase();\r\n\r\n const audioSessionReqId = this.privRequestSession.requestId.toUpperCase();\r\n\r\n // turn started by the service\r\n if (turnEndRequestId !== audioSessionReqId) {\r\n this.privTurnStateManager.CompleteTurn(turnEndRequestId);\r\n } else {\r\n // Audio session turn\r\n\r\n const sessionStopEventArgs: SessionEventArgs = new SessionEventArgs(this.privRequestSession.sessionId);\r\n await this.privRequestSession.onServiceTurnEndResponse(false);\r\n\r\n if (!this.privRecognizerConfig.isContinuousRecognition || this.privRequestSession.isSpeechEnded || !this.privRequestSession.isRecognizing) {\r\n if (!!this.privRecognizer.sessionStopped) {\r\n this.privRecognizer.sessionStopped(this.privRecognizer, sessionStopEventArgs);\r\n }\r\n }\r\n\r\n // report result to promise.\r\n if (!!this.privSuccessCallback && this.privLastResult) {\r\n try {\r\n this.privSuccessCallback(this.privLastResult);\r\n this.privLastResult = null;\r\n } catch (e) {\r\n if (!!this.privErrorCallback) {\r\n this.privErrorCallback(e as string);\r\n }\r\n }\r\n // Only invoke the call back once.\r\n // and if it's successful don't invoke the\r\n // error after that.\r\n this.privSuccessCallback = undefined;\r\n this.privErrorCallback = undefined;\r\n }\r\n }\r\n }\r\n break;\r\n\r\n default:\r\n try {\r\n const processed = await this.processTypeSpecificMessages(connectionMessage);\r\n if (!processed) {\r\n if (!!this.serviceEvents) {\r\n this.serviceEvents.onEvent(new ServiceEvent(connectionMessage.path.toLowerCase(), connectionMessage.textBody));\r\n }\r\n }\r\n } catch (e) {\r\n //\r\n }\r\n }\r\n const ret: Promise<void> = loop();\r\n\r\n return ret;\r\n } catch (error) {\r\n this.terminateMessageLoop = true;\r\n communicationCustodian.resolve();\r\n }\r\n };\r\n\r\n loop().catch((reason: string): void => {\r\n Events.instance.onEvent(new BackgroundEvent(reason));\r\n });\r\n\r\n return communicationCustodian.promise;\r\n }\r\n\r\n private async startMessageLoop(): Promise<void> {\r\n\r\n this.terminateMessageLoop = false;\r\n\r\n try {\r\n await this.receiveDialogMessageOverride();\r\n } catch (error) {\r\n await this.cancelRecognition(this.privRequestSession.sessionId, this.privRequestSession.requestId, CancellationReason.Error, CancellationErrorCode.RuntimeError, error as string);\r\n }\r\n\r\n return Promise.resolve();\r\n }\r\n\r\n // Takes an established websocket connection to the endpoint and sends speech configuration information.\r\n private async configConnection(connection: IConnection): Promise<IConnection> {\r\n if (this.terminateMessageLoop) {\r\n this.terminateMessageLoop = false;\r\n return Promise.reject(\"Connection to service terminated.\");\r\n }\r\n\r\n await this.sendSpeechServiceConfig(connection, this.privRequestSession, this.privRecognizerConfig.SpeechServiceConfig.serialize());\r\n await this.sendAgentConfig(connection);\r\n return connection;\r\n }\r\n\r\n private async sendPreAudioMessages(): Promise<void> {\r\n const connection: IConnection = await this.fetchConnection();\r\n this.addKeywordContextData();\r\n await this.sendSpeechContext(connection, true);\r\n await this.sendAgentContext(connection);\r\n await this.sendWaveHeader(connection);\r\n }\r\n\r\n private sendAgentConfig(connection: IConnection): Promise<void> {\r\n if (this.agentConfig && !this.agentConfigSent) {\r\n\r\n if (this.privRecognizerConfig\r\n .parameters\r\n .getProperty(PropertyId.Conversation_DialogType) === DialogServiceConfig.DialogTypes.CustomCommands) {\r\n const config = this.agentConfig.get();\r\n config.botInfo.commandsCulture = this.privRecognizerConfig.parameters.getProperty(PropertyId.SpeechServiceConnection_RecoLanguage, \"en-us\");\r\n this.agentConfig.set(config);\r\n }\r\n this.onEvent(new SendingAgentContextMessageEvent(this.agentConfig));\r\n\r\n const agentConfigJson = this.agentConfig.toJsonString();\r\n\r\n // guard against sending this multiple times on one connection\r\n this.agentConfigSent = true;\r\n\r\n return connection.send(new SpeechConnectionMessage(\r\n MessageType.Text,\r\n \"agent.config\",\r\n this.privRequestSession.requestId,\r\n \"application/json\",\r\n agentConfigJson));\r\n }\r\n\r\n return;\r\n }\r\n\r\n private sendAgentContext(connection: IConnection): Promise<void> {\r\n const guid: string = createGuid();\r\n\r\n const speechActivityTemplate = this.privDialogServiceConnector.properties.getProperty(PropertyId.Conversation_Speech_Activity_Template);\r\n\r\n const agentContext: any = {\r\n channelData: \"\",\r\n context: {\r\n interactionId: guid\r\n },\r\n messagePayload: typeof speechActivityTemplate === undefined ? undefined : speechActivityTemplate,\r\n version: 0.5\r\n };\r\n\r\n const agentContextJson = JSON.stringify(agentContext);\r\n\r\n return connection.send(new SpeechConnectionMessage(\r\n MessageType.Text,\r\n \"speech.agent.context\",\r\n this.privRequestSession.requestId,\r\n \"application/json\",\r\n agentContextJson));\r\n }\r\n\r\n private fireEventForResult(serviceResult: SimpleSpeechPhrase, properties: PropertyCollection): SpeechRecognitionEventArgs {\r\n const resultReason: ResultReason = EnumTranslation.implTranslateRecognitionResult(serviceResult.RecognitionStatus);\r\n\r\n const result = new SpeechRecognitionResult(\r\n this.privRequestSession.requestId,\r\n resultReason,\r\n serviceResult.DisplayText,\r\n serviceResult.Duration,\r\n serviceResult.Offset,\r\n serviceResult.Language,\r\n serviceResult.LanguageDetectionConfidence,\r\n undefined,\r\n undefined,\r\n serviceResult.asJson(),\r\n properties);\r\n\r\n const ev = new SpeechRecognitionEventArgs(result, serviceResult.Offset, this.privRequestSession.sessionId);\r\n return ev;\r\n }\r\n\r\n private handleResponseMessage(responseMessage: SpeechConnectionMessage): void {\r\n // \"response\" messages can contain either \"message\" (activity) or \"MessageStatus\" data. Fire the appropriate\r\n // event according to the message type that's specified.\r\n const responsePayload: { messageType: string } = JSON.parse(responseMessage.textBody) as { messageType: string };\r\n switch (responsePayload.messageType.toLowerCase()) {\r\n case \"message\":\r\n const responseRequestId = responseMessage.requestId.toUpperCase();\r\n const activityPayload: ActivityPayloadResponse = ActivityPayloadResponse.fromJSON(responseMessage.textBody);\r\n const turn = this.privTurnStateManager.GetTurn(responseRequestId);\r\n\r\n // update the conversation Id\r\n if (activityPayload.conversationId) {\r\n const updateAgentConfig = this.agentConfig.get();\r\n updateAgentConfig.botInfo.conversationId = activityPayload.conversationId;\r\n this.agentConfig.set(updateAgentConfig);\r\n }\r\n\r\n const pullAudioOutputStream: PullAudioOutputStreamImpl = turn.processActivityPayload(\r\n activityPayload,\r\n AudioOutputFormatImpl.fromSpeechSynthesisOutputFormatString(this.privDialogServiceConnector.properties.getProperty(PropertyId.SpeechServiceConnection_SynthOutputFormat, undefined)));\r\n const activity = new ActivityReceivedEventArgs(activityPayload.messagePayload, pullAudioOutputStream);\r\n if (!!this.privDialogServiceConnector.activityReceived) {\r\n try {\r\n this.privDialogServiceConnector.activityReceived(this.privDialogServiceConnector, activity);\r\n /* eslint-disable-next-line no-empty */\r\n } catch (error) {\r\n // Not going to let errors in the event handler\r\n // trip things up.\r\n }\r\n }\r\n break;\r\n\r\n case \"messagestatus\":\r\n if (!!this.privDialogServiceConnector.turnStatusReceived) {\r\n try {\r\n this.privDialogServiceConnector.turnStatusReceived(\r\n this.privDialogServiceConnector,\r\n new TurnStatusReceivedEventArgs(responseMessage.textBody));\r\n /* eslint-disable-next-line no-empty */\r\n } catch (error) {\r\n // Not going to let errors in the event handler\r\n // trip things up.\r\n }\r\n }\r\n break;\r\n\r\n default:\r\n Events.instance.onEvent(\r\n new BackgroundEvent(`Unexpected response of type ${responsePayload.messageType}. Ignoring.`));\r\n break;\r\n }\r\n }\r\n\r\n private onEvent(event: DialogEvent): void {\r\n this.privEvents.onEvent(event);\r\n Events.instance.onEvent(event);\r\n }\r\n\r\n private addKeywordContextData(): void {\r\n const keywordPropertyValue: string = this.privRecognizerConfig.parameters.getProperty(\"SPEECH-KeywordsToDetect\");\r\n if (keywordPropertyValue === undefined) {\r\n return;\r\n }\r\n\r\n const keywordOffsetPropertyValue: string = this.privRecognizerConfig.parameters\r\n .getProperty(\"SPEECH-KeywordsToDetect-Offsets\");\r\n const keywordDurationPropertyValue: string = this.privRecognizerConfig.parameters\r\n .getProperty(\"SPEECH-KeywordsToDetect-Durations\");\r\n\r\n const keywords = keywordPropertyValue.split(\";\");\r\n const keywordOffsets = keywordOffsetPropertyValue === undefined ? [] : keywordOffsetPropertyValue.split(\";\");\r\n const keywordDurations = keywordDurationPropertyValue === undefined ? [] : keywordDurationPropertyValue.split(\";\");\r\n\r\n const keywordDefinitionArray = [];\r\n for (let i = 0; i < keywords.length; i++) {\r\n const definition: { [section: string]: any } = {};\r\n definition.text = keywords[i];\r\n if (i < keywordOffsets.length) {\r\n definition.offset = Number(keywordOffsets[i]);\r\n }\r\n if (i < keywordDurations.length) {\r\n definition.duration = Number(keywordDurations[i]);\r\n }\r\n keywordDefinitionArray.push(definition);\r\n }\r\n\r\n this.speechContext.setSection(\"invocationSource\", \"VoiceActivationWithKeyword\");\r\n this.speechContext.setSection(\"keywordDetection\", [{\r\n clientDetectedKeywords: keywordDefinitionArray,\r\n onReject: { action: \"EndOfTurn\" },\r\n type: \"startTrigger\"\r\n }]);\r\n }\r\n}\r\n"]}