UNPKG

audio-to-text-node

Version:

Backend audio file to text transcription using Web Speech API with Puppeteer

138 lines 5.63 kB
import chalk from "chalk"; import puppeteer from "puppeteer-core"; import { playAudio } from "./routing"; import * as fs from 'fs'; let browserInstance = null; /** * Gets or creates a Puppeteer browser instance. * If a browser instance already exists, it returns that instance. * If not, it launches a new browser instance with specific arguments. * @param executablePath Optional path to the browser executable. If not provided, will try common paths. * @returns A Promise that resolves to the Puppeteer browser instance. */ async function getOrCreateBrowser(executablePath) { if (!browserInstance) { const defaultPaths = [ // Microsoft Edge '/usr/bin/microsoft-edge', '/usr/bin/microsoft-edge-stable', // Google Chrome '/usr/bin/google-chrome', '/usr/bin/google-chrome-stable', // Chromium '/usr/bin/chromium-browser', '/usr/bin/chromium', ]; let browserPath = executablePath; // If no path provided, try to find a browser automatically if (!browserPath) { for (const path of defaultPaths) { if (fs.existsSync(path)) { browserPath = path; break; } } } if (!browserPath) { throw new Error('No browser executable found. Please provide executablePath parameter or install Microsoft Edge, Chrome, or Chromium.'); } browserInstance = await puppeteer.launch({ headless: true, executablePath: browserPath, args: [ '--use-fake-ui-for-media-stream', '--no-sandbox', '--disable-setuid-sandbox', ], }); } return browserInstance; } /** * Launches the speech recognition process in the browser. * @param files Array of audio file chunks with their paths and start times * @param language Language code for the speech recognition * @param executablePath Optional path to the browser executable * @returns A Promise that resolves to the final transcript text */ export async function launchRecognizer(files, language, executablePath) { console.log(chalk.blue("- Launching browser and setting up recognizer...")); const browser = await getOrCreateBrowser(executablePath); let transcriptText = ""; console.log(chalk.yellow("- Playing audio...")); for (const file of files) { console.log(chalk.green(`- Processing audio chunk: ${file.path} (start: ${file.start}s)`)); const page = await browser.newPage(); await page.exposeFunction("playAudio", async () => await playAudio(file.path)); await page.exposeFunction("log", (c) => console.log(c)); await page.exposeFunction("onSpeechResult", (text) => { // A space should be added at the beginning to prevent sentences from sticking together transcriptText += ` ${text}`; }); await page.exposeFunction("onSpeechError", (e) => { console.error(chalk.red("Speech recognition error:"), e); }); await page.evaluate(async (language) => { // @ts-ignore const recognition = new window.webkitSpeechRecognition(); recognition.lang = language; recognition.continuous = true; recognition.interimResults = true; recognition.onresult = (event) => { let finalText = ''; for (let i = event.resultIndex; i < event.results.length; ++i) { // @ts-ignore // window.log(event.results[i][0].transcript); if (event.results[i].isFinal) { finalText += event.results[i][0].transcript; } } // @ts-ignore window.onSpeechResult(finalText); }; recognition.onerror = (e) => { // @ts-ignore window.onSpeechError({ name: 'SpeechRecognitionErrorEvent', isTrusted: e.isTrusted, bubbles: e.bubbles, cancelBubble: e.cancelBubble, cancelable: e.cancelable, composed: e.composed, defaultPrevented: e.defaultPrevented, error: e.error, eventPhase: e.eventPhase, message: e.message, returnValue: e.returnValue, timeStamp: e.timeStamp, type: e.type, date: new Date(), }); }; recognition.start(); // Wait 500 milliseconds for the page to load and the audio to be ready await new Promise((r) => setTimeout(r, 500)); // @ts-ignore await window.playAudio(); // w await new Promise((r) => setTimeout(r, 500)); recognition.stop(); await new Promise((r) => setTimeout(r, 500)); }, language); await page.close(); } await closeBrowser(); return transcriptText.replace(/\s+/g, ' ').trim(); } /** * Closes the Puppeteer browser instance if it exists. * Sets the browserInstance to null after closing. * @returns A Promise that resolves when the browser is closed. */ export async function closeBrowser() { if (browserInstance) { await browserInstance.close(); browserInstance = null; } } //# sourceMappingURL=puppeteer.js.map