@pr0gramm/fluester
Version:
Node.js bindings for OpenAI's Whisper. Optimized for CPU.
118 lines (117 loc) • 4.29 kB
JavaScript
import * as fs from "node:fs/promises";
import * as path from "node:path";
import { execute } from "./execute.js";
import { defaultExecutablePath, nodeModulesModelPath } from "./interop.js";
import { modelFileNames } from "./model.js";
import transcriptToArray, { parseDetectedLanguage, } from "./transcript.js";
export { createWebVttString } from "./webvtt.js";
export { convertFileToProcessableFile } from "./ffmpeg.js";
export function createWhisperClient(options) {
const effectiveOptions = {
executablePath: defaultExecutablePath,
...options,
modelPath: getModelPath(options),
};
async function ensureModel() {
if (!(await fs.stat(effectiveOptions.modelPath))) {
throw new Error(`Model not found at "${effectiveOptions.modelPath}".`);
}
}
return {
translate: async (filePath, options) => {
await ensureModel();
try {
const flags = options?.whisperOptions
? getFlags(options.whisperOptions)
: [];
const args = [
...flags,
"-tr",
"-m",
effectiveOptions.modelPath,
...(options?.sourceLanguage
? ["--language", options.sourceLanguage]
: []),
"-f",
filePath,
];
// TODO: add return for continually updated progress value
const translation = await execute(effectiveOptions.executablePath, args, false, options?.signal);
if (options?.signal?.aborted) {
throw new Error("Operation aborted");
}
return transcriptToArray(translation.stdout.toString());
}
catch (cause) {
throw new Error("Error during whisper operation", { cause });
}
},
transcribe: async (filePath, options) => {
await ensureModel();
try {
const flags = options?.whisperOptions
? getFlags(options.whisperOptions)
: [];
const args = [
...flags,
"-m",
effectiveOptions.modelPath,
...(options?.sourceLanguage
? ["--language", options.sourceLanguage]
: []),
"-f",
filePath,
];
// TODO: add return for continually updated progress value
const transcription = await execute(effectiveOptions.executablePath, args, false, options?.signal);
if (options?.signal?.aborted) {
throw new Error("Operation aborted");
}
return transcriptToArray(transcription.stdout.toString());
}
catch (cause) {
throw new Error("Error during whisper operation", { cause });
}
},
detectLanguage: async (filePath) => {
await ensureModel();
const result = await execute(effectiveOptions.executablePath, [
"--detect-language",
"-m",
effectiveOptions.modelPath,
filePath,
], true);
// TODO: Check for probability threshold
return parseDetectedLanguage(result.stderr.toString());
},
};
}
function getModelPath(options) {
return "modelPath" in options
? options.modelPath
: path.join(nodeModulesModelPath, modelFileNames[options.modelName]);
}
// option flags list: https://github.com/ggerganov/whisper.cpp/blob/master/README.md?plain=1#L91
function getFlags(flags) {
const s = [];
// output files
if (flags.generateTxt) {
s.push("-otxt");
}
if (flags.generateSubtitles) {
s.push("-osrt");
}
if (flags.generateVtt) {
s.push("-ovtt");
}
// timestamps
if (flags.timestampSize) {
s.push("-ml");
s.push(flags.timestampSize.toString());
}
if (flags.wordTimestamps) {
s.push("-ml");
s.push("1");
}
return s;
}