echogarden
Version:
An easy-to-use speech toolset. Includes tools for synthesis, recognition, alignment, speech translation, language detection, source separation and more.
91 lines • 3.56 kB
JavaScript
import { request } from 'gaxios';
import { Logger } from '../utilities/Logger.js';
import { extendDeep } from '../utilities/ObjectUtilities.js';
import * as FFMpegTranscoder from '../codecs/FFMpegTranscoder.js';
export async function recognize(rawAudio, languageCode, options) {
const logger = new Logger();
logger.start('Initialize Deepgram recognition');
options = extendDeep(defaultDeepgramSTTOptions, options);
if (!options.apiKey) {
throw new Error('No Deepgram API key provided');
}
// Prepare API request
const params = {
model: options.model,
encoding: 'opus',
punctuate: options.punctuate ? 'true' : 'false',
};
// Set language or enable auto-detection
if (languageCode) {
params.language = languageCode;
}
else {
params.detect_language = 'true';
}
// Set audio encoding parameters
logger.start('Convert audio to Opus format');
const audioData = await FFMpegTranscoder.encodeFromChannels(rawAudio, FFMpegTranscoder.getDefaultFFMpegOptionsForSpeech('opus'));
logger.start('Send request to Deepgram API');
let response;
try {
response = await request({
method: 'POST',
url: 'https://api.deepgram.com/v1/listen',
params,
headers: {
'Authorization': `Token ${options.apiKey}`,
'Content-Type': 'audio/ogg',
'Accept': 'application/json'
},
body: audioData,
responseType: 'json',
});
}
catch (e) {
const response = e.response;
if (response) {
logger.log(`Request failed with status code ${response.status}`);
if (response.data) {
logger.log(`Server responded with:`);
logger.log(response.data);
}
}
throw e;
}
const deepgramResponse = response.data;
const firstAlternative = deepgramResponse.results?.channels[0]?.alternatives[0];
// Extract transcript and create timeline
const transcript = firstAlternative?.transcript || '';
// Extract word-level timing information if available
const words = firstAlternative?.words || [];
const timeline = words.map((wordEntry) => ({
type: 'word',
text: wordEntry.word,
startTime: wordEntry.start,
endTime: wordEntry.end,
confidence: wordEntry.confidence,
}));
// If `punctuate` is set to `true`, modify the text of all words to match their exact case in the transcript.
// This is required, otherwise it would later fail deriving word offsets.
if (options.punctuate) {
const lowerCaseTranscript = transcript.toLocaleLowerCase();
let readOffset = 0;
for (const wordEntry of timeline) {
const wordEntryTextLowercase = wordEntry.text.toLocaleLowerCase();
const matchPosition = lowerCaseTranscript.indexOf(wordEntryTextLowercase, readOffset);
if (matchPosition === -1) {
throw new Error(`Couldn't match the word '${wordEntry.text}' in the lowercase transcript`);
}
wordEntry.text = transcript.substring(matchPosition, matchPosition + wordEntryTextLowercase.length);
readOffset = matchPosition + wordEntry.text.length;
}
}
logger.end();
return { transcript, timeline };
}
export const defaultDeepgramSTTOptions = {
apiKey: undefined,
model: 'nova-2',
punctuate: true,
};
//# sourceMappingURL=DeepgramSTT.js.map