UNPKG

echogarden

Version:

An easy-to-use speech toolset. Includes tools for synthesis, recognition, alignment, speech translation, language detection, source separation and more.

91 lines 3.56 kB
import { request } from 'gaxios'; import { Logger } from '../utilities/Logger.js'; import { extendDeep } from '../utilities/ObjectUtilities.js'; import * as FFMpegTranscoder from '../codecs/FFMpegTranscoder.js'; export async function recognize(rawAudio, languageCode, options) { const logger = new Logger(); logger.start('Initialize Deepgram recognition'); options = extendDeep(defaultDeepgramSTTOptions, options); if (!options.apiKey) { throw new Error('No Deepgram API key provided'); } // Prepare API request const params = { model: options.model, encoding: 'opus', punctuate: options.punctuate ? 'true' : 'false', }; // Set language or enable auto-detection if (languageCode) { params.language = languageCode; } else { params.detect_language = 'true'; } // Set audio encoding parameters logger.start('Convert audio to Opus format'); const audioData = await FFMpegTranscoder.encodeFromChannels(rawAudio, FFMpegTranscoder.getDefaultFFMpegOptionsForSpeech('opus')); logger.start('Send request to Deepgram API'); let response; try { response = await request({ method: 'POST', url: 'https://api.deepgram.com/v1/listen', params, headers: { 'Authorization': `Token ${options.apiKey}`, 'Content-Type': 'audio/ogg', 'Accept': 'application/json' }, body: audioData, responseType: 'json', }); } catch (e) { const response = e.response; if (response) { logger.log(`Request failed with status code ${response.status}`); if (response.data) { logger.log(`Server responded with:`); logger.log(response.data); } } throw e; } const deepgramResponse = response.data; const firstAlternative = deepgramResponse.results?.channels[0]?.alternatives[0]; // Extract transcript and create timeline const transcript = firstAlternative?.transcript || ''; // Extract word-level timing information if available const words = firstAlternative?.words || []; const timeline = words.map((wordEntry) => ({ type: 'word', text: wordEntry.word, startTime: wordEntry.start, endTime: wordEntry.end, confidence: wordEntry.confidence, })); // If `punctuate` is set to `true`, modify the text of all words to match their exact case in the transcript. // This is required, otherwise it would later fail deriving word offsets. if (options.punctuate) { const lowerCaseTranscript = transcript.toLocaleLowerCase(); let readOffset = 0; for (const wordEntry of timeline) { const wordEntryTextLowercase = wordEntry.text.toLocaleLowerCase(); const matchPosition = lowerCaseTranscript.indexOf(wordEntryTextLowercase, readOffset); if (matchPosition === -1) { throw new Error(`Couldn't match the word '${wordEntry.text}' in the lowercase transcript`); } wordEntry.text = transcript.substring(matchPosition, matchPosition + wordEntryTextLowercase.length); readOffset = matchPosition + wordEntry.text.length; } } logger.end(); return { transcript, timeline }; } export const defaultDeepgramSTTOptions = { apiKey: undefined, model: 'nova-2', punctuate: true, }; //# sourceMappingURL=DeepgramSTT.js.map