speech-to-element
Version:
Add real-time speech to text functionality into your website with no effort
160 lines (159 loc) • 7.62 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.Azure = void 0;
const preventConnectionStop_1 = require("./preventConnectionStop");
const azureSpeechConfig_1 = require("./azureSpeechConfig");
const stopTimeout_1 = require("../../utils/stopTimeout");
const azureAudioConfig_1 = require("./azureAudioConfig");
const azureTranscript_1 = require("./azureTranscript");
const speech_1 = require("../../speech");
// REF - https://github.com/Azure-Samples/cognitive-services-speech-sdk/blob/master/samples/js/browser/index.html#L240
class Azure extends speech_1.Speech {
constructor() {
super(...arguments);
this._newTextPadding = ''; // Unlike webspeech there is no automatic space between final results
}
start(options, isDuringReset) {
this._newTextPadding = '';
if (this.stopTimeout === undefined)
stopTimeout_1.StopTimeout.reset(this, options === null || options === void 0 ? void 0 : options.stopAfterSilenceMs);
this.prepareBeforeStart(options); // need to prepare before validation to set onError
this.startAsync(options);
if (!isDuringReset)
preventConnectionStop_1.PreventConnectionStop.applyPrevention(this);
}
async startAsync(options) {
var _a;
if (this.validate(options)) {
await this.instantiateService(options);
this._translations = options === null || options === void 0 ? void 0 : options.translations;
(_a = this._service) === null || _a === void 0 ? void 0 : _a.startContinuousRecognitionAsync(() => { }, this.error);
}
}
validate(options) {
if (!Azure.getAPI()) {
this.moduleNotFound();
return false;
}
return azureSpeechConfig_1.AzureSpeechConfig.validateOptions(this.error.bind(this), options);
}
async instantiateService(options) {
const speechSDK = Azure.getAPI();
const audioConfig = azureAudioConfig_1.AzureAudioConfig.get(speechSDK.AudioConfig, options.deviceId);
const speechConfig = await azureSpeechConfig_1.AzureSpeechConfig.get(speechSDK.SpeechConfig, options);
if (speechConfig) {
let recognizer;
if (options.autoLanguage && options.autoLanguage.languages.length > 0) {
// https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-identification
const { type, languages } = options.autoLanguage;
const maxLanguages = languages.slice(0, type === 'Continuous' ? 10 : 4);
const autoDetectLanguageConfig = speechSDK.AutoDetectSourceLanguageConfig.fromLanguages(maxLanguages);
if (type === 'Continuous')
autoDetectLanguageConfig.mode = 1;
recognizer = speechSDK.SpeechRecognizer.FromConfig(speechConfig, autoDetectLanguageConfig, audioConfig);
}
else {
recognizer = new speechSDK.SpeechRecognizer(speechConfig, audioConfig);
}
this.setEvents(recognizer);
this._service = recognizer;
if (options.retrieveToken)
this.retrieveTokenInterval(options.retrieveToken);
}
else {
this.error('Unable to contact Azure server');
}
}
setEvents(recognizer) {
recognizer.recognizing = this.onRecognizing.bind(this);
recognizer.recognized = this.onRecognized.bind(this);
recognizer.sessionStarted = this.onSessionStarted.bind(this);
recognizer.canceled = this.onCanceled.bind(this);
recognizer.sessionStopped = this.onSessionStopped.bind(this);
// PhraseListGrammar allows for the customization of recognizer vocabulary.
// The semicolon-delimited list of words or phrases will be treated as additional, more likely components
// of recognition results when applied to the recognizer.
//
// See https://docs.microsoft.com/azure/cognitive-services/speech-service/get-started-speech-to-text#improve-recognition-accuracy
// if (phrases.value) {
// const phraseListGrammar = sdk.PhraseListGrammar.fromRecognizer(recognizer);
// phraseListGrammar.addPhrases(phrases.value.split(';'));
// }
}
// prettier-ignore
onRecognizing(_, event) {
if (this._stopping)
return;
const { interimTranscript, finalTranscript, newText } = azureTranscript_1.AzureTranscript.extract(this._newTextPadding + event.result.text, this.finalTranscript, false, this._translations);
stopTimeout_1.StopTimeout.reset(this, this.stopTimeoutMS);
this.updateElements(interimTranscript, finalTranscript, newText);
}
// prettier-ignore
onRecognized(_, event) {
const result = event.result;
switch (result.reason) {
case window.SpeechSDK.ResultReason.Canceled:
break;
case window.SpeechSDK.ResultReason.RecognizedSpeech:
if (result.text && !this._stopping) {
const { interimTranscript, finalTranscript, newText } = azureTranscript_1.AzureTranscript.extract(this._newTextPadding + result.text, this.finalTranscript, true, this._translations);
stopTimeout_1.StopTimeout.reset(this, this.stopTimeoutMS);
this.updateElements(interimTranscript, finalTranscript, newText);
if (finalTranscript !== '')
this._newTextPadding = ' ';
}
break;
}
}
onCanceled(_, event) {
if (event.reason === window.SpeechSDK.CancellationReason.Error) {
this.error(event.errorDetails);
}
}
onSessionStarted() {
preventConnectionStop_1.PreventConnectionStop.clearPrevention(this);
this.setStateOnStart();
}
onSessionStopped() {
if (!this._retrieveTokenInterval)
clearInterval(this._retrieveTokenInterval);
this._stopping = false;
this.setStateOnStop();
}
retrieveTokenInterval(retrieveToken) {
this._retrieveTokenInterval = setInterval(() => {
retrieveToken === null || retrieveToken === void 0 ? void 0 : retrieveToken().then((token) => {
if (this._service)
this._service.authorizationToken = (token === null || token === void 0 ? void 0 : token.trim()) || '';
}).catch((error) => {
this.error(error);
});
}, 10000);
}
stop(isDuringReset) {
var _a;
if (!isDuringReset && this._retrieveTokenInterval)
clearInterval(this._retrieveTokenInterval);
this._stopping = true;
(_a = this._service) === null || _a === void 0 ? void 0 : _a.stopContinuousRecognitionAsync();
stopTimeout_1.StopTimeout.stop(this);
this.finalise(isDuringReset);
}
static getAPI() {
return window.SpeechSDK;
}
moduleNotFound() {
console.error('speech recognition module not found:');
console.error("please install the 'microsoft-cognitiveservices-speech-sdk' npm package " +
'or add a script tag: <script src="https://aka.ms/csspeech/jsbrowserpackageraw"></script>');
this.setStateOnError('speech recognition module not found');
}
error(details) {
if (this._retrieveTokenInterval)
clearInterval(this._retrieveTokenInterval);
console.error(details);
this.setStateOnError(details);
this.stop();
}
}
exports.Azure = Azure;