@remotion/install-whisper-cpp
Version:
Helpers for installing and using Whisper.cpp
190 lines (189 loc) • 8.91 kB
JavaScript
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || (function () {
var ownKeys = function(o) {
ownKeys = Object.getOwnPropertyNames || function (o) {
var ar = [];
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
return ar;
};
return ownKeys(o);
};
return function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
__setModuleDefault(result, mod);
return result;
};
})();
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.transcribe = exports.modelToDtw = void 0;
const node_child_process_1 = require("node:child_process");
const node_fs_1 = __importStar(require("node:fs"));
const node_path_1 = __importDefault(require("node:path"));
const download_whisper_model_1 = require("./download-whisper-model");
const install_whisper_cpp_1 = require("./install-whisper-cpp");
const isWavFile = (inputPath) => {
const splitted = inputPath.split('.');
if (!splitted) {
return false;
}
return splitted[splitted.length - 1] === 'wav';
};
const readJson = async (jsonPath) => {
const data = await node_fs_1.default.promises.readFile(jsonPath, 'utf8');
return JSON.parse(data);
};
// https://github.com/ggerganov/whisper.cpp/blob/fe36c909715e6751277ddb020e7892c7670b61d4/examples/main/main.cpp#L989-L999
// https://github.com/remotion-dev/remotion/issues/4168
const modelToDtw = (model) => {
if (model === 'large-v3-turbo') {
return 'large.v3.turbo';
}
if (model === 'large-v3') {
return 'large.v3';
}
if (model === 'large-v2') {
return 'large.v2';
}
if (model === 'large-v1') {
return 'large.v1';
}
return model;
};
exports.modelToDtw = modelToDtw;
const transcribeToTemporaryFile = async ({ fileToTranscribe, whisperPath, whisperCppVersion, model, tmpJSONPath, modelFolder, translate, tokenLevelTimestamps, printOutput, tokensPerItem, language, splitOnWord, signal, onProgress, flashAttention, additionalArgs, }) => {
const modelPath = (0, download_whisper_model_1.getModelPath)(modelFolder !== null && modelFolder !== void 0 ? modelFolder : whisperPath, model);
if (!node_fs_1.default.existsSync(modelPath)) {
throw new Error(`Error: Model ${model} does not exist at ${modelFolder ? modelFolder : modelPath}. Check out the downloadWhisperModel() API at https://www.remotion.dev/docs/install-whisper-cpp/download-whisper-model to see how to install whisper models`);
}
const executable = (0, install_whisper_cpp_1.getWhisperExecutablePath)(whisperPath, whisperCppVersion);
const args = [
'-f',
fileToTranscribe,
'--output-file',
tmpJSONPath,
'--output-json',
tokensPerItem ? ['--max-len', tokensPerItem] : null,
'-ojf', // Output full JSON
tokenLevelTimestamps ? ['--dtw', (0, exports.modelToDtw)(model)] : null,
model ? [`-m`, `${modelPath}`] : null,
['-pp'], // print progress
translate ? '-tr' : null,
language ? ['-l', language.toLowerCase()] : null,
splitOnWord ? ['--split-on-word', splitOnWord] : null,
flashAttention ? ['--flash-attn', 'true'] : null,
...(additionalArgs !== null && additionalArgs !== void 0 ? additionalArgs : []),
]
.flat(1)
.filter(Boolean);
const outputPath = await new Promise((resolve, reject) => {
const task = (0, node_child_process_1.spawn)(executable, args, {
cwd: node_path_1.default.resolve(process.cwd(), whisperPath),
signal: signal !== null && signal !== void 0 ? signal : undefined,
});
const predictedPath = `${tmpJSONPath}.json`;
let output = '';
const onData = (data) => {
const str = data.toString('utf-8');
const hasProgress = str.includes('progress =');
if (hasProgress) {
const progress = parseFloat(str.split('progress =')[1].trim());
onProgress === null || onProgress === void 0 ? void 0 : onProgress(progress / 100);
}
output += str;
// Sometimes it hangs here
if (str.includes('ggml_metal_free: deallocating')) {
task.kill();
}
};
let stderr = '';
const onStderr = (data) => {
onData(data);
const utf8 = data.toString('utf-8');
stderr += utf8;
if (printOutput) {
process.stderr.write(utf8);
}
};
const onStdout = (data) => {
onData(data);
if (printOutput) {
process.stdout.write(data.toString('utf-8'));
}
};
task.stdout.on('data', onStdout);
task.stderr.on('data', onStderr);
task.on('exit', (code, exitSignal) => {
// Whisper sometimes files also with error code 0
// https://github.com/ggerganov/whisper.cpp/pull/1952/files
if ((0, node_fs_1.existsSync)(predictedPath)) {
resolve(predictedPath);
onProgress === null || onProgress === void 0 ? void 0 : onProgress(1);
return;
}
if (exitSignal) {
reject(new Error(`Process was killed with signal ${exitSignal}: ${output}`));
return;
}
if (stderr.includes('must be 16 kHz')) {
reject(new Error('wav file must be 16 kHz - See https://www.remotion.dev/docs/webcodecs/resample-audio-16khz#on-the-server on how to convert your audio to a 16-bit, 16KHz, WAVE file'));
}
reject(new Error(`No transcription was created (process exited with code ${code}): ${output}`));
});
});
return { outputPath };
};
const transcribe = async ({ inputPath, whisperPath, whisperCppVersion, model, modelFolder, translateToEnglish = false, tokenLevelTimestamps, printOutput = true, tokensPerItem, language, splitOnWord, signal, onProgress, flashAttention, additionalArgs, }) => {
if (!(0, node_fs_1.existsSync)(whisperPath)) {
throw new Error(`Whisper does not exist at ${whisperPath}. Double-check the passed whisperPath. If you havent installed whisper, check out the installWhisperCpp() API at https://www.remotion.dev/docs/install-whisper-cpp/install-whisper-cpp to see how to install whisper programatically.`);
}
if (!(0, node_fs_1.existsSync)(inputPath)) {
throw new Error(`Input file does not exist at ${inputPath}`);
}
if (!isWavFile(inputPath)) {
throw new Error('Invalid inputFile type. The provided file is not a wav file! Convert the file to a 16KHz wav file first: "ffmpeg -i input.mp4 -ar 16000 output.wav -y"');
}
const tmpJSONDir = node_path_1.default.join(process.cwd(), 'tmp');
const { outputPath: tmpJSONPath } = await transcribeToTemporaryFile({
fileToTranscribe: inputPath,
whisperPath,
whisperCppVersion,
model,
tmpJSONPath: tmpJSONDir,
modelFolder: modelFolder !== null && modelFolder !== void 0 ? modelFolder : null,
translate: translateToEnglish,
tokenLevelTimestamps,
printOutput,
tokensPerItem: tokenLevelTimestamps ? 1 : (tokensPerItem !== null && tokensPerItem !== void 0 ? tokensPerItem : 1),
language: language !== null && language !== void 0 ? language : null,
signal: signal !== null && signal !== void 0 ? signal : null,
splitOnWord: splitOnWord !== null && splitOnWord !== void 0 ? splitOnWord : null,
onProgress: onProgress !== null && onProgress !== void 0 ? onProgress : null,
flashAttention,
additionalArgs,
});
const json = (await readJson(tmpJSONPath));
node_fs_1.default.unlinkSync(tmpJSONPath);
return json;
};
exports.transcribe = transcribe;
;