UNPKG

echogarden

Version:

An easy-to-use speech toolset. Includes tools for synthesis, recognition, alignment, speech translation, language detection, source separation and more.

67 lines 2.44 kB
import { request } from 'gaxios'; import * as FFMpegTranscoder from '../codecs/FFMpegTranscoder.js'; import { Logger } from '../utilities/Logger.js'; import { encodeBase64 } from '../encodings/Base64.js'; export async function recognize(rawAudio, apiKey, languageCode = 'en-US') { const flac16Khz16bitMonoAudio = await FFMpegTranscoder.encodeFromChannels(rawAudio, { format: 'flac', sampleRate: 16000, sampleFormat: 's16', channelCount: 1 }); const logger = new Logger(); logger.start('Request recognition from Google Cloud'); const requestBody = { config: { encoding: 'FLAC', sampleRateHertz: 16000, audioChannelCount: 1, languageCode, alternativeLanguageCodes: [], maxAlternatives: 1, profanityFilter: false, enableWordTimeOffsets: true, enableWordConfidence: true, enableAutomaticPunctuation: true, model: 'latest_long', useEnhanced: true }, audio: { content: encodeBase64(flac16Khz16bitMonoAudio) } }; const response = await request({ method: 'POST', url: `https://speech.googleapis.com/v1p1beta1/speech:recognize`, params: { 'key': apiKey }, headers: { 'User-Agent': '' }, data: requestBody, responseType: 'json' }); logger.start('Parse response body'); const result = parseResponseBody(response.data); logger.end(); return result; } function parseResponseBody(responseBody) { const results = responseBody.results; let transcript = ''; const timeline = []; for (const result of results) { if (!result.alternatives || !result.alternatives[0] || !result.alternatives[0].transcript) { continue; } const firstAlternative = result.alternatives[0]; transcript += firstAlternative.transcript; for (const wordEvent of firstAlternative.words) { timeline.push({ type: 'word', text: wordEvent.word, startTime: parseFloat(wordEvent.startTime.replace('s', '')), endTime: parseFloat(wordEvent.endTime.replace('s', '')), confidence: wordEvent.confidence }); } } return { transcript, timeline }; } //# sourceMappingURL=GoogleCloudSTT.js.map