UNPKG

@pr0gramm/fluester

Version:

Node.js bindings for OpenAI's Whisper. Optimized for CPU.

118 lines (117 loc) 4.29 kB
import * as fs from "node:fs/promises"; import * as path from "node:path"; import { execute } from "./execute.js"; import { defaultExecutablePath, nodeModulesModelPath } from "./interop.js"; import { modelFileNames } from "./model.js"; import transcriptToArray, { parseDetectedLanguage, } from "./transcript.js"; export { createWebVttString } from "./webvtt.js"; export { convertFileToProcessableFile } from "./ffmpeg.js"; export function createWhisperClient(options) { const effectiveOptions = { executablePath: defaultExecutablePath, ...options, modelPath: getModelPath(options), }; async function ensureModel() { if (!(await fs.stat(effectiveOptions.modelPath))) { throw new Error(`Model not found at "${effectiveOptions.modelPath}".`); } } return { translate: async (filePath, options) => { await ensureModel(); try { const flags = options?.whisperOptions ? getFlags(options.whisperOptions) : []; const args = [ ...flags, "-tr", "-m", effectiveOptions.modelPath, ...(options?.sourceLanguage ? ["--language", options.sourceLanguage] : []), "-f", filePath, ]; // TODO: add return for continually updated progress value const translation = await execute(effectiveOptions.executablePath, args, false, options?.signal); if (options?.signal?.aborted) { throw new Error("Operation aborted"); } return transcriptToArray(translation.stdout.toString()); } catch (cause) { throw new Error("Error during whisper operation", { cause }); } }, transcribe: async (filePath, options) => { await ensureModel(); try { const flags = options?.whisperOptions ? getFlags(options.whisperOptions) : []; const args = [ ...flags, "-m", effectiveOptions.modelPath, ...(options?.sourceLanguage ? ["--language", options.sourceLanguage] : []), "-f", filePath, ]; // TODO: add return for continually updated progress value const transcription = await execute(effectiveOptions.executablePath, args, false, options?.signal); if (options?.signal?.aborted) { throw new Error("Operation aborted"); } return transcriptToArray(transcription.stdout.toString()); } catch (cause) { throw new Error("Error during whisper operation", { cause }); } }, detectLanguage: async (filePath) => { await ensureModel(); const result = await execute(effectiveOptions.executablePath, [ "--detect-language", "-m", effectiveOptions.modelPath, filePath, ], true); // TODO: Check for probability threshold return parseDetectedLanguage(result.stderr.toString()); }, }; } function getModelPath(options) { return "modelPath" in options ? options.modelPath : path.join(nodeModulesModelPath, modelFileNames[options.modelName]); } // option flags list: https://github.com/ggerganov/whisper.cpp/blob/master/README.md?plain=1#L91 function getFlags(flags) { const s = []; // output files if (flags.generateTxt) { s.push("-otxt"); } if (flags.generateSubtitles) { s.push("-osrt"); } if (flags.generateVtt) { s.push("-ovtt"); } // timestamps if (flags.timestampSize) { s.push("-ml"); s.push(flags.timestampSize.toString()); } if (flags.wordTimestamps) { s.push("-ml"); s.push("1"); } return s; }