UNPKG

@remotion/install-whisper-cpp

Version:

Helpers for installing and using Whisper.cpp

190 lines (189 loc) 8.91 kB
"use strict"; var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || (function () { var ownKeys = function(o) { ownKeys = Object.getOwnPropertyNames || function (o) { var ar = []; for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k; return ar; }; return ownKeys(o); }; return function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]); __setModuleDefault(result, mod); return result; }; })(); var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.transcribe = exports.modelToDtw = void 0; const node_child_process_1 = require("node:child_process"); const node_fs_1 = __importStar(require("node:fs")); const node_path_1 = __importDefault(require("node:path")); const download_whisper_model_1 = require("./download-whisper-model"); const install_whisper_cpp_1 = require("./install-whisper-cpp"); const isWavFile = (inputPath) => { const splitted = inputPath.split('.'); if (!splitted) { return false; } return splitted[splitted.length - 1] === 'wav'; }; const readJson = async (jsonPath) => { const data = await node_fs_1.default.promises.readFile(jsonPath, 'utf8'); return JSON.parse(data); }; // https://github.com/ggerganov/whisper.cpp/blob/fe36c909715e6751277ddb020e7892c7670b61d4/examples/main/main.cpp#L989-L999 // https://github.com/remotion-dev/remotion/issues/4168 const modelToDtw = (model) => { if (model === 'large-v3-turbo') { return 'large.v3.turbo'; } if (model === 'large-v3') { return 'large.v3'; } if (model === 'large-v2') { return 'large.v2'; } if (model === 'large-v1') { return 'large.v1'; } return model; }; exports.modelToDtw = modelToDtw; const transcribeToTemporaryFile = async ({ fileToTranscribe, whisperPath, whisperCppVersion, model, tmpJSONPath, modelFolder, translate, tokenLevelTimestamps, printOutput, tokensPerItem, language, splitOnWord, signal, onProgress, flashAttention, additionalArgs, }) => { const modelPath = (0, download_whisper_model_1.getModelPath)(modelFolder !== null && modelFolder !== void 0 ? modelFolder : whisperPath, model); if (!node_fs_1.default.existsSync(modelPath)) { throw new Error(`Error: Model ${model} does not exist at ${modelFolder ? modelFolder : modelPath}. Check out the downloadWhisperModel() API at https://www.remotion.dev/docs/install-whisper-cpp/download-whisper-model to see how to install whisper models`); } const executable = (0, install_whisper_cpp_1.getWhisperExecutablePath)(whisperPath, whisperCppVersion); const args = [ '-f', fileToTranscribe, '--output-file', tmpJSONPath, '--output-json', tokensPerItem ? ['--max-len', tokensPerItem] : null, '-ojf', // Output full JSON tokenLevelTimestamps ? ['--dtw', (0, exports.modelToDtw)(model)] : null, model ? [`-m`, `${modelPath}`] : null, ['-pp'], // print progress translate ? '-tr' : null, language ? ['-l', language.toLowerCase()] : null, splitOnWord ? ['--split-on-word', splitOnWord] : null, flashAttention ? ['--flash-attn', 'true'] : null, ...(additionalArgs !== null && additionalArgs !== void 0 ? additionalArgs : []), ] .flat(1) .filter(Boolean); const outputPath = await new Promise((resolve, reject) => { const task = (0, node_child_process_1.spawn)(executable, args, { cwd: node_path_1.default.resolve(process.cwd(), whisperPath), signal: signal !== null && signal !== void 0 ? signal : undefined, }); const predictedPath = `${tmpJSONPath}.json`; let output = ''; const onData = (data) => { const str = data.toString('utf-8'); const hasProgress = str.includes('progress ='); if (hasProgress) { const progress = parseFloat(str.split('progress =')[1].trim()); onProgress === null || onProgress === void 0 ? void 0 : onProgress(progress / 100); } output += str; // Sometimes it hangs here if (str.includes('ggml_metal_free: deallocating')) { task.kill(); } }; let stderr = ''; const onStderr = (data) => { onData(data); const utf8 = data.toString('utf-8'); stderr += utf8; if (printOutput) { process.stderr.write(utf8); } }; const onStdout = (data) => { onData(data); if (printOutput) { process.stdout.write(data.toString('utf-8')); } }; task.stdout.on('data', onStdout); task.stderr.on('data', onStderr); task.on('exit', (code, exitSignal) => { // Whisper sometimes files also with error code 0 // https://github.com/ggerganov/whisper.cpp/pull/1952/files if ((0, node_fs_1.existsSync)(predictedPath)) { resolve(predictedPath); onProgress === null || onProgress === void 0 ? void 0 : onProgress(1); return; } if (exitSignal) { reject(new Error(`Process was killed with signal ${exitSignal}: ${output}`)); return; } if (stderr.includes('must be 16 kHz')) { reject(new Error('wav file must be 16 kHz - See https://www.remotion.dev/docs/webcodecs/resample-audio-16khz#on-the-server on how to convert your audio to a 16-bit, 16KHz, WAVE file')); } reject(new Error(`No transcription was created (process exited with code ${code}): ${output}`)); }); }); return { outputPath }; }; const transcribe = async ({ inputPath, whisperPath, whisperCppVersion, model, modelFolder, translateToEnglish = false, tokenLevelTimestamps, printOutput = true, tokensPerItem, language, splitOnWord, signal, onProgress, flashAttention, additionalArgs, }) => { if (!(0, node_fs_1.existsSync)(whisperPath)) { throw new Error(`Whisper does not exist at ${whisperPath}. Double-check the passed whisperPath. If you havent installed whisper, check out the installWhisperCpp() API at https://www.remotion.dev/docs/install-whisper-cpp/install-whisper-cpp to see how to install whisper programatically.`); } if (!(0, node_fs_1.existsSync)(inputPath)) { throw new Error(`Input file does not exist at ${inputPath}`); } if (!isWavFile(inputPath)) { throw new Error('Invalid inputFile type. The provided file is not a wav file! Convert the file to a 16KHz wav file first: "ffmpeg -i input.mp4 -ar 16000 output.wav -y"'); } const tmpJSONDir = node_path_1.default.join(process.cwd(), 'tmp'); const { outputPath: tmpJSONPath } = await transcribeToTemporaryFile({ fileToTranscribe: inputPath, whisperPath, whisperCppVersion, model, tmpJSONPath: tmpJSONDir, modelFolder: modelFolder !== null && modelFolder !== void 0 ? modelFolder : null, translate: translateToEnglish, tokenLevelTimestamps, printOutput, tokensPerItem: tokenLevelTimestamps ? 1 : (tokensPerItem !== null && tokensPerItem !== void 0 ? tokensPerItem : 1), language: language !== null && language !== void 0 ? language : null, signal: signal !== null && signal !== void 0 ? signal : null, splitOnWord: splitOnWord !== null && splitOnWord !== void 0 ? splitOnWord : null, onProgress: onProgress !== null && onProgress !== void 0 ? onProgress : null, flashAttention, additionalArgs, }); const json = (await readJson(tmpJSONPath)); node_fs_1.default.unlinkSync(tmpJSONPath); return json; }; exports.transcribe = transcribe;