UNPKG

@lumen-labs-dev/whisper-node

Version:

Local audio transcription on CPU. Node.js bindings for OpenAI's Whisper.

www.npmjs.com/@lumen-labs-dev/whisper-node

LumenLabsDev/whisper-node

150 lines (149 loc) • 7.96 kB

JavaScript

"use strict"; var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || (function () { var ownKeys = function(o) { ownKeys = Object.getOwnPropertyNames || function (o) { var ar = []; for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k; return ar; }; return ownKeys(o); }; return function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]); __setModuleDefault(result, mod); return result; }; })(); var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.whisper = void 0; const fs_1 = require("fs"); const shell_1 = __importDefault(require("./infra/shell")); const whisper_1 = require("./core/whisper"); const transcription_1 = __importStar(require("./utils/transcription")); const convert_1 = __importDefault(require("./utils/convert")); const config_1 = __importDefault(require("./config/config")); const diarization_1 = require("./core/diarization"); const errors_1 = require("./utils/errors"); const logger_1 = require("./utils/logger"); const logger = (0, logger_1.createLogger)('main'); /** * Transcribe an audio file using whisper.cpp via this Node wrapper. * * @param filePath Path to the audio file (.wav 16kHz recommended) * @param options Optional configuration including model selection and flags * @returns Array of transcript lines: {start, end, speech} */ const whisper = (filePath, options) => __awaiter(void 0, void 0, void 0, function* () { var _a, _b; // Input validation if (!filePath || typeof filePath !== 'string') { throw new errors_1.ValidationError('File path must be a non-empty string'); } try { logger.info('Starting transcription', { filePath, options }); // Early file size validation try { const stats = yield fs_1.promises.stat(filePath); const fileSizeGB = stats.size / (1024 * 1024 * 1024); logger.debug('Input file size', { fileSizeGB: fileSizeGB.toFixed(3) }); if (fileSizeGB > 2) { // 2GB limit logger.warn('Large audio file detected', { fileSizeGB }); // Don't throw, just warn - let the user decide } } catch (error) { // File doesn't exist or can't be accessed - will be caught by validateFilePath logger.debug('Could not get file stats during initial validation', { error }); } const preparedFilePath = yield (0, errors_1.safeAsync)(() => (0, convert_1.default)(filePath), "Failed to prepare audio file"); logger.debug('Audio file prepared', { preparedFilePath }); const cfg = (0, config_1.default)(); const effectiveOptions = { modelName: (_a = options === null || options === void 0 ? void 0 : options.modelName) !== null && _a !== void 0 ? _a : cfg.modelName, modelPath: (_b = options === null || options === void 0 ? void 0 : options.modelPath) !== null && _b !== void 0 ? _b : cfg.modelPath, whisperOptions: Object.assign(Object.assign({}, (cfg.whisperOptions || {})), ((options === null || options === void 0 ? void 0 : options.whisperOptions) || {})), shellOptions: Object.assign(Object.assign({}, (cfg.shellOptions || {})), ((options === null || options === void 0 ? void 0 : options.shellOptions) || {})), diarization: options === null || options === void 0 ? void 0 : options.diarization, }; const command = yield (0, whisper_1.createCppCommand)({ filePath: preparedFilePath, // Already normalized by createCppCommand modelName: effectiveOptions.modelName, modelPath: effectiveOptions.modelPath, options: effectiveOptions.whisperOptions, }); const transcript = yield (0, errors_1.safeAsync)(() => (0, shell_1.default)(command, effectiveOptions.shellOptions), "Failed to execute whisper transcription"); logger.debug('Whisper transcription completed'); let transcriptArray = (0, transcription_1.default)(transcript); logger.info('Transcript parsed', { lineCount: transcriptArray.length }); if ((0, transcription_1.looksLikeWordLevelTranscript)(transcriptArray)) { const before = transcriptArray.length; transcriptArray = (0, transcription_1.mergeWordLevelTranscript)(transcriptArray); logger.info('Merged word-level lines into sentences', { before, after: transcriptArray.length }); } const diarize = effectiveOptions.diarization; if (diarize === null || diarize === void 0 ? void 0 : diarize.enabled) { try { logger.info('Starting diarization'); const dia = yield (0, diarization_1.runDiarization)(preparedFilePath, diarize); transcriptArray = (0, diarization_1.assignSpeakersToTranscript)(transcriptArray, dia); logger.info('Diarization completed successfully'); } catch (e) { logger.warn('Diarization failed', { error: e instanceof Error ? e.message : e }); } } logger.info('Transcription completed successfully', { lineCount: transcriptArray.length, hasSpeakers: transcriptArray.some(line => line.speaker) }); return transcriptArray; } catch (error) { if ((0, errors_1.isWhisperNodeError)(error)) { logger.error('Whisper-node error occurred', { error: error.message, code: error.code }); // Re-throw our custom errors as-is throw error; } const msg = String(error || ""); if (msg.includes("not downloaded") || msg.includes("not found") || msg.includes("modelName")) { logger.error('Model issue detected. Run `npx whisper-node download` to fetch models, or configure { modelPath: \'.../ggml-*.bin\' }.', { error: msg }); } else { logger.error('Transcription failed with unknown error', { error }); } // Wrap unknown errors in our error type throw new errors_1.WhisperNodeError(error instanceof Error ? error.message : String(error)); } }); exports.whisper = whisper; exports.default = exports.whisper;