microsoft-cognitiveservices-speech-sdk
Version:
Microsoft Cognitive Services Speech SDK for JavaScript
1 lines • 7.5 kB
Source Map (JSON)
{"version":3,"sources":["src/common.browser/PCMRecorder.ts"],"names":[],"mappings":"AAGA,OAAO,EAAkB,MAAM,EAAE,MAAM,mBAAmB,CAAC;AAC3D,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAExC,qBAAa,WAAY,YAAW,SAAS;IACzC,OAAO,CAAC,kBAAkB,CAAkB;IAC5C,OAAO,CAAC,yBAAyB,CAAS;IAC1C,OAAO,CAAC,sBAAsB,CAAU;gBAErB,kBAAkB,EAAE,OAAO;IAIvC,MAAM,CAAC,OAAO,EAAE,YAAY,EAAE,WAAW,EAAE,WAAW,EAAE,YAAY,EAAE,MAAM,CAAC,WAAW,CAAC,GAAG,IAAI;IA8GhG,qBAAqB,CAAC,OAAO,EAAE,YAAY,GAAG,IAAI;IAgBlD,aAAa,CAAC,GAAG,EAAE,MAAM,GAAG,IAAI;CAG1C","file":"PCMRecorder.d.ts","sourcesContent":["// Copyright (c) Microsoft Corporation. All rights reserved.\r\n// Licensed under the MIT license.\r\n\r\nimport { RiffPcmEncoder, Stream } from \"../common/Exports\";\r\nimport { IRecorder } from \"./IRecorder\";\r\n\r\nexport class PcmRecorder implements IRecorder {\r\n private privMediaResources: IMediaResources;\r\n private privSpeechProcessorScript: string; // speech-processor.js Url\r\n private privStopInputOnRelease: boolean;\r\n\r\n public constructor(stopInputOnRelease: boolean) {\r\n this.privStopInputOnRelease = stopInputOnRelease;\r\n }\r\n\r\n public record(context: AudioContext, mediaStream: MediaStream, outputStream: Stream<ArrayBuffer>): void {\r\n const desiredSampleRate = 16000;\r\n\r\n const waveStreamEncoder = new RiffPcmEncoder(context.sampleRate, desiredSampleRate);\r\n\r\n const micInput = context.createMediaStreamSource(mediaStream);\r\n\r\n const attachScriptProcessor = (): void => {\r\n // eslint-disable-next-line @typescript-eslint/explicit-function-return-type\r\n const scriptNode = (() => {\r\n let bufferSize = 0;\r\n try {\r\n return context.createScriptProcessor(bufferSize, 1, 1);\r\n } catch (error) {\r\n // Webkit (<= version 31) requires a valid bufferSize.\r\n bufferSize = 2048;\r\n let audioSampleRate = context.sampleRate;\r\n while (bufferSize < 16384 && audioSampleRate >= (2 * desiredSampleRate)) {\r\n bufferSize <<= 1;\r\n audioSampleRate >>= 1;\r\n }\r\n return context.createScriptProcessor(bufferSize, 1, 1);\r\n }\r\n })();\r\n scriptNode.onaudioprocess = (event: AudioProcessingEvent): void => {\r\n const inputFrame = event.inputBuffer.getChannelData(0);\r\n\r\n if (outputStream && !outputStream.isClosed) {\r\n const waveFrame = waveStreamEncoder.encode(inputFrame);\r\n if (!!waveFrame) {\r\n outputStream.writeStreamChunk({\r\n buffer: waveFrame,\r\n isEnd: false,\r\n timeReceived: Date.now(),\r\n });\r\n }\r\n }\r\n };\r\n micInput.connect(scriptNode);\r\n scriptNode.connect(context.destination);\r\n this.privMediaResources = {\r\n scriptProcessorNode: scriptNode,\r\n source: micInput,\r\n stream: mediaStream,\r\n };\r\n };\r\n\r\n // https://webaudio.github.io/web-audio-api/#audioworklet\r\n // Using AudioWorklet to improve audio quality and avoid audio glitches due to blocking the UI thread\r\n const skipAudioWorklet = !!this.privSpeechProcessorScript && this.privSpeechProcessorScript.toLowerCase() === \"ignore\";\r\n\r\n if (!!context.audioWorklet && !skipAudioWorklet) {\r\n if (!this.privSpeechProcessorScript) {\r\n const workletScript = `class SP extends AudioWorkletProcessor {\r\n constructor(options) {\r\n super(options);\r\n }\r\n process(inputs, outputs) {\r\n const input = inputs[0];\r\n const output = [];\r\n for (let channel = 0; channel < input.length; channel += 1) {\r\n output[channel] = input[channel];\r\n }\r\n this.port.postMessage(output[0]);\r\n return true;\r\n }\r\n }\r\n registerProcessor('speech-processor', SP);`;\r\n const blob = new Blob([workletScript], { type: \"application/javascript; charset=utf-8\" });\r\n this.privSpeechProcessorScript = URL.createObjectURL(blob);\r\n }\r\n\r\n context.audioWorklet\r\n .addModule(this.privSpeechProcessorScript)\r\n .then((): void => {\r\n const workletNode = new AudioWorkletNode(context, \"speech-processor\");\r\n workletNode.port.onmessage = (ev: MessageEvent): void => {\r\n const inputFrame: Float32Array = ev.data as Float32Array;\r\n\r\n if (outputStream && !outputStream.isClosed) {\r\n const waveFrame = waveStreamEncoder.encode(inputFrame);\r\n if (!!waveFrame) {\r\n outputStream.writeStreamChunk({\r\n buffer: waveFrame,\r\n isEnd: false,\r\n timeReceived: Date.now(),\r\n });\r\n }\r\n }\r\n };\r\n micInput.connect(workletNode);\r\n workletNode.connect(context.destination);\r\n this.privMediaResources = {\r\n scriptProcessorNode: workletNode,\r\n source: micInput,\r\n stream: mediaStream,\r\n };\r\n })\r\n .catch((): void => {\r\n attachScriptProcessor();\r\n });\r\n } else {\r\n try {\r\n attachScriptProcessor();\r\n } catch (err) {\r\n throw new Error(`Unable to start audio worklet node for PCMRecorder: ${err as string}`);\r\n }\r\n }\r\n }\r\n\r\n public releaseMediaResources(context: AudioContext): void {\r\n if (this.privMediaResources) {\r\n if (this.privMediaResources.scriptProcessorNode) {\r\n this.privMediaResources.scriptProcessorNode.disconnect(context.destination);\r\n this.privMediaResources.scriptProcessorNode = null;\r\n }\r\n if (this.privMediaResources.source) {\r\n this.privMediaResources.source.disconnect();\r\n if (this.privStopInputOnRelease) {\r\n this.privMediaResources.stream.getTracks().forEach((track: MediaStreamTrack): void => track.stop());\r\n }\r\n this.privMediaResources.source = null;\r\n }\r\n }\r\n }\r\n\r\n public setWorkletUrl(url: string): void {\r\n this.privSpeechProcessorScript = url;\r\n }\r\n}\r\n\r\ninterface IMediaResources {\r\n source: MediaStreamAudioSourceNode;\r\n scriptProcessorNode: ScriptProcessorNode | AudioWorkletNode;\r\n stream: MediaStream;\r\n}\r\n"]}