@lumen-labs-dev/whisper-node
Version:
Local audio transcription on CPU. Node.js bindings for OpenAI's Whisper.
150 lines (149 loc) • 7.96 kB
JavaScript
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || (function () {
var ownKeys = function(o) {
ownKeys = Object.getOwnPropertyNames || function (o) {
var ar = [];
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
return ar;
};
return ownKeys(o);
};
return function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
__setModuleDefault(result, mod);
return result;
};
})();
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.whisper = void 0;
const fs_1 = require("fs");
const shell_1 = __importDefault(require("./infra/shell"));
const whisper_1 = require("./core/whisper");
const transcription_1 = __importStar(require("./utils/transcription"));
const convert_1 = __importDefault(require("./utils/convert"));
const config_1 = __importDefault(require("./config/config"));
const diarization_1 = require("./core/diarization");
const errors_1 = require("./utils/errors");
const logger_1 = require("./utils/logger");
const logger = (0, logger_1.createLogger)('main');
/**
* Transcribe an audio file using whisper.cpp via this Node wrapper.
*
* @param filePath Path to the audio file (.wav 16kHz recommended)
* @param options Optional configuration including model selection and flags
* @returns Array of transcript lines: {start, end, speech}
*/
const whisper = (filePath, options) => __awaiter(void 0, void 0, void 0, function* () {
var _a, _b;
// Input validation
if (!filePath || typeof filePath !== 'string') {
throw new errors_1.ValidationError('File path must be a non-empty string');
}
try {
logger.info('Starting transcription', { filePath, options });
// Early file size validation
try {
const stats = yield fs_1.promises.stat(filePath);
const fileSizeGB = stats.size / (1024 * 1024 * 1024);
logger.debug('Input file size', { fileSizeGB: fileSizeGB.toFixed(3) });
if (fileSizeGB > 2) { // 2GB limit
logger.warn('Large audio file detected', { fileSizeGB });
// Don't throw, just warn - let the user decide
}
}
catch (error) {
// File doesn't exist or can't be accessed - will be caught by validateFilePath
logger.debug('Could not get file stats during initial validation', { error });
}
const preparedFilePath = yield (0, errors_1.safeAsync)(() => (0, convert_1.default)(filePath), "Failed to prepare audio file");
logger.debug('Audio file prepared', { preparedFilePath });
const cfg = (0, config_1.default)();
const effectiveOptions = {
modelName: (_a = options === null || options === void 0 ? void 0 : options.modelName) !== null && _a !== void 0 ? _a : cfg.modelName,
modelPath: (_b = options === null || options === void 0 ? void 0 : options.modelPath) !== null && _b !== void 0 ? _b : cfg.modelPath,
whisperOptions: Object.assign(Object.assign({}, (cfg.whisperOptions || {})), ((options === null || options === void 0 ? void 0 : options.whisperOptions) || {})),
shellOptions: Object.assign(Object.assign({}, (cfg.shellOptions || {})), ((options === null || options === void 0 ? void 0 : options.shellOptions) || {})),
diarization: options === null || options === void 0 ? void 0 : options.diarization,
};
const command = yield (0, whisper_1.createCppCommand)({
filePath: preparedFilePath, // Already normalized by createCppCommand
modelName: effectiveOptions.modelName,
modelPath: effectiveOptions.modelPath,
options: effectiveOptions.whisperOptions,
});
const transcript = yield (0, errors_1.safeAsync)(() => (0, shell_1.default)(command, effectiveOptions.shellOptions), "Failed to execute whisper transcription");
logger.debug('Whisper transcription completed');
let transcriptArray = (0, transcription_1.default)(transcript);
logger.info('Transcript parsed', { lineCount: transcriptArray.length });
if ((0, transcription_1.looksLikeWordLevelTranscript)(transcriptArray)) {
const before = transcriptArray.length;
transcriptArray = (0, transcription_1.mergeWordLevelTranscript)(transcriptArray);
logger.info('Merged word-level lines into sentences', { before, after: transcriptArray.length });
}
const diarize = effectiveOptions.diarization;
if (diarize === null || diarize === void 0 ? void 0 : diarize.enabled) {
try {
logger.info('Starting diarization');
const dia = yield (0, diarization_1.runDiarization)(preparedFilePath, diarize);
transcriptArray = (0, diarization_1.assignSpeakersToTranscript)(transcriptArray, dia);
logger.info('Diarization completed successfully');
}
catch (e) {
logger.warn('Diarization failed', { error: e instanceof Error ? e.message : e });
}
}
logger.info('Transcription completed successfully', {
lineCount: transcriptArray.length,
hasSpeakers: transcriptArray.some(line => line.speaker)
});
return transcriptArray;
}
catch (error) {
if ((0, errors_1.isWhisperNodeError)(error)) {
logger.error('Whisper-node error occurred', { error: error.message, code: error.code });
// Re-throw our custom errors as-is
throw error;
}
const msg = String(error || "");
if (msg.includes("not downloaded") || msg.includes("not found") || msg.includes("modelName")) {
logger.error('Model issue detected. Run `npx whisper-node download` to fetch models, or configure { modelPath: \'.../ggml-*.bin\' }.', { error: msg });
}
else {
logger.error('Transcription failed with unknown error', { error });
}
// Wrap unknown errors in our error type
throw new errors_1.WhisperNodeError(error instanceof Error ? error.message : String(error));
}
});
exports.whisper = whisper;
exports.default = exports.whisper;
;