UNPKG

speech-to-element

Version:

Add real-time speech to text functionality into your website with no effort

160 lines (159 loc) 7.62 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.Azure = void 0; const preventConnectionStop_1 = require("./preventConnectionStop"); const azureSpeechConfig_1 = require("./azureSpeechConfig"); const stopTimeout_1 = require("../../utils/stopTimeout"); const azureAudioConfig_1 = require("./azureAudioConfig"); const azureTranscript_1 = require("./azureTranscript"); const speech_1 = require("../../speech"); // REF - https://github.com/Azure-Samples/cognitive-services-speech-sdk/blob/master/samples/js/browser/index.html#L240 class Azure extends speech_1.Speech { constructor() { super(...arguments); this._newTextPadding = ''; // Unlike webspeech there is no automatic space between final results } start(options, isDuringReset) { this._newTextPadding = ''; if (this.stopTimeout === undefined) stopTimeout_1.StopTimeout.reset(this, options === null || options === void 0 ? void 0 : options.stopAfterSilenceMs); this.prepareBeforeStart(options); // need to prepare before validation to set onError this.startAsync(options); if (!isDuringReset) preventConnectionStop_1.PreventConnectionStop.applyPrevention(this); } async startAsync(options) { var _a; if (this.validate(options)) { await this.instantiateService(options); this._translations = options === null || options === void 0 ? void 0 : options.translations; (_a = this._service) === null || _a === void 0 ? void 0 : _a.startContinuousRecognitionAsync(() => { }, this.error); } } validate(options) { if (!Azure.getAPI()) { this.moduleNotFound(); return false; } return azureSpeechConfig_1.AzureSpeechConfig.validateOptions(this.error.bind(this), options); } async instantiateService(options) { const speechSDK = Azure.getAPI(); const audioConfig = azureAudioConfig_1.AzureAudioConfig.get(speechSDK.AudioConfig, options.deviceId); const speechConfig = await azureSpeechConfig_1.AzureSpeechConfig.get(speechSDK.SpeechConfig, options); if (speechConfig) { let recognizer; if (options.autoLanguage && options.autoLanguage.languages.length > 0) { // https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-identification const { type, languages } = options.autoLanguage; const maxLanguages = languages.slice(0, type === 'Continuous' ? 10 : 4); const autoDetectLanguageConfig = speechSDK.AutoDetectSourceLanguageConfig.fromLanguages(maxLanguages); if (type === 'Continuous') autoDetectLanguageConfig.mode = 1; recognizer = speechSDK.SpeechRecognizer.FromConfig(speechConfig, autoDetectLanguageConfig, audioConfig); } else { recognizer = new speechSDK.SpeechRecognizer(speechConfig, audioConfig); } this.setEvents(recognizer); this._service = recognizer; if (options.retrieveToken) this.retrieveTokenInterval(options.retrieveToken); } else { this.error('Unable to contact Azure server'); } } setEvents(recognizer) { recognizer.recognizing = this.onRecognizing.bind(this); recognizer.recognized = this.onRecognized.bind(this); recognizer.sessionStarted = this.onSessionStarted.bind(this); recognizer.canceled = this.onCanceled.bind(this); recognizer.sessionStopped = this.onSessionStopped.bind(this); // PhraseListGrammar allows for the customization of recognizer vocabulary. // The semicolon-delimited list of words or phrases will be treated as additional, more likely components // of recognition results when applied to the recognizer. // // See https://docs.microsoft.com/azure/cognitive-services/speech-service/get-started-speech-to-text#improve-recognition-accuracy // if (phrases.value) { // const phraseListGrammar = sdk.PhraseListGrammar.fromRecognizer(recognizer); // phraseListGrammar.addPhrases(phrases.value.split(';')); // } } // prettier-ignore onRecognizing(_, event) { if (this._stopping) return; const { interimTranscript, finalTranscript, newText } = azureTranscript_1.AzureTranscript.extract(this._newTextPadding + event.result.text, this.finalTranscript, false, this._translations); stopTimeout_1.StopTimeout.reset(this, this.stopTimeoutMS); this.updateElements(interimTranscript, finalTranscript, newText); } // prettier-ignore onRecognized(_, event) { const result = event.result; switch (result.reason) { case window.SpeechSDK.ResultReason.Canceled: break; case window.SpeechSDK.ResultReason.RecognizedSpeech: if (result.text && !this._stopping) { const { interimTranscript, finalTranscript, newText } = azureTranscript_1.AzureTranscript.extract(this._newTextPadding + result.text, this.finalTranscript, true, this._translations); stopTimeout_1.StopTimeout.reset(this, this.stopTimeoutMS); this.updateElements(interimTranscript, finalTranscript, newText); if (finalTranscript !== '') this._newTextPadding = ' '; } break; } } onCanceled(_, event) { if (event.reason === window.SpeechSDK.CancellationReason.Error) { this.error(event.errorDetails); } } onSessionStarted() { preventConnectionStop_1.PreventConnectionStop.clearPrevention(this); this.setStateOnStart(); } onSessionStopped() { if (!this._retrieveTokenInterval) clearInterval(this._retrieveTokenInterval); this._stopping = false; this.setStateOnStop(); } retrieveTokenInterval(retrieveToken) { this._retrieveTokenInterval = setInterval(() => { retrieveToken === null || retrieveToken === void 0 ? void 0 : retrieveToken().then((token) => { if (this._service) this._service.authorizationToken = (token === null || token === void 0 ? void 0 : token.trim()) || ''; }).catch((error) => { this.error(error); }); }, 10000); } stop(isDuringReset) { var _a; if (!isDuringReset && this._retrieveTokenInterval) clearInterval(this._retrieveTokenInterval); this._stopping = true; (_a = this._service) === null || _a === void 0 ? void 0 : _a.stopContinuousRecognitionAsync(); stopTimeout_1.StopTimeout.stop(this); this.finalise(isDuringReset); } static getAPI() { return window.SpeechSDK; } moduleNotFound() { console.error('speech recognition module not found:'); console.error("please install the 'microsoft-cognitiveservices-speech-sdk' npm package " + 'or add a script tag: <script src="https://aka.ms/csspeech/jsbrowserpackageraw"></script>'); this.setStateOnError('speech recognition module not found'); } error(details) { if (this._retrieveTokenInterval) clearInterval(this._retrieveTokenInterval); console.error(details); this.setStateOnError(details); this.stop(); } } exports.Azure = Azure;