UNPKG

whisper-nodejs-wrapper

Version:

Node.js wrapper for OpenAI Whisper speech recognition with TypeScript support

219 lines 8.23 kB
"use strict"; var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || (function () { var ownKeys = function(o) { ownKeys = Object.getOwnPropertyNames || function (o) { var ar = []; for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k; return ar; }; return ownKeys(o); }; return function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]); __setModuleDefault(result, mod); return result; }; })(); Object.defineProperty(exports, "__esModule", { value: true }); exports.whisper = exports.WhisperTranscriber = void 0; const child_process_1 = require("child_process"); const path = __importStar(require("path")); const fs = __importStar(require("fs")); class WhisperTranscriber { constructor(options) { this.isInitialized = false; // Use virtual environment Python if available, otherwise system Python this.pythonPath = options?.pythonPath || this.findPython(); this.pythonScriptPath = path.join(__dirname, '..', 'python', 'whisper_transcribe.py'); } findPython() { // Check for virtual environment first const packageRoot = path.join(__dirname, '..'); const venvPaths = process.platform === 'win32' ? [ path.join(packageRoot, '.venv', 'Scripts', 'python.exe'), path.join(process.cwd(), '.venv', 'Scripts', 'python.exe'), ] : [ path.join(packageRoot, '.venv', 'bin', 'python'), path.join(packageRoot, '.venv', 'bin', 'python3'), path.join(process.cwd(), '.venv', 'bin', 'python'), path.join(process.cwd(), '.venv', 'bin', 'python3'), ]; for (const venvPath of venvPaths) { if (fs.existsSync(venvPath)) { return venvPath; } } // Fall back to system Python return process.platform === 'win32' ? 'python' : 'python3'; } /** * Check if Whisper and dependencies are installed */ async checkDependencies() { return new Promise((resolve) => { const checkScript = ` import sys try: import whisper import torch print("OK") sys.exit(0) except ImportError as e: print(f"Missing: {e}") sys.exit(1) `; const pythonProcess = (0, child_process_1.spawn)(this.pythonPath, ['-c', checkScript]); pythonProcess.on('close', (code) => { resolve(code === 0); }); }); } /** * Install Whisper and required dependencies */ async installDependencies() { return new Promise((resolve, reject) => { console.log('Installing Whisper dependencies...'); const packages = ['openai-whisper', 'torch']; const pipProcess = (0, child_process_1.spawn)(this.pythonPath, [ '-m', 'pip', 'install', '--upgrade', ...packages ]); pipProcess.stdout.on('data', (data) => { if (process.env.WHISPER_VERBOSE === 'true') { console.log(data.toString()); } }); pipProcess.stderr.on('data', (data) => { console.error(data.toString()); }); pipProcess.on('close', (code) => { if (code === 0) { console.log('Whisper dependencies installed successfully'); resolve(); } else { reject(new Error(`Failed to install dependencies (exit code: ${code})`)); } }); }); } /** * Initialize the transcriber (check/install dependencies) */ async initialize() { if (this.isInitialized) return; const depsInstalled = await this.checkDependencies(); if (!depsInstalled) { console.log('Whisper dependencies not found. Installing...'); await this.installDependencies(); } this.isInitialized = true; } /** * Transcribe an audio file */ async transcribe(audioPath, options) { // Ensure dependencies are installed if (!this.isInitialized) { await this.initialize(); } return new Promise((resolve, reject) => { if (!fs.existsSync(audioPath)) { reject(new Error(`Audio file not found: ${audioPath}`)); return; } const args = [ this.pythonScriptPath, audioPath, options?.language || 'en', options?.modelSize || 'base' ]; const env = { ...process.env, WHISPER_CPU_ONLY: options?.cpuOnly ? '1' : '0' }; const pythonProcess = (0, child_process_1.spawn)(this.pythonPath, args, { env }); let outputData = ''; let errorData = ''; pythonProcess.stdout.on('data', (data) => { outputData += data.toString(); }); pythonProcess.stderr.on('data', (data) => { errorData += data.toString(); if (options?.verbose) { console.warn('Whisper stderr:', data.toString()); } }); pythonProcess.on('close', (code) => { if (code !== 0) { reject(new Error(`Whisper process exited with code ${code}: ${errorData}`)); return; } try { const result = JSON.parse(outputData); if (result.error) { reject(new Error(result.error)); return; } resolve(result); } catch (e) { const error = e; reject(new Error(`Failed to parse Whisper output: ${error.message}`)); } }); }); } /** * Transcribe with automatic retry on failure */ async transcribeWithRetry(audioPath, options, maxRetries = 3) { let lastError = null; for (let attempt = 1; attempt <= maxRetries; attempt++) { try { if (options?.verbose) { console.log(`Transcription attempt ${attempt}/${maxRetries}`); } return await this.transcribe(audioPath, options); } catch (error) { lastError = error; if (attempt < maxRetries) { const waitTime = Math.min(1000 * Math.pow(2, attempt - 1), 10000); if (options?.verbose) { console.log(`Retrying in ${waitTime}ms...`); } await new Promise(resolve => setTimeout(resolve, waitTime)); } } } throw new Error(`Transcription failed after ${maxRetries} attempts: ${lastError?.message}`); } } exports.WhisperTranscriber = WhisperTranscriber; // Export a default instance exports.whisper = new WhisperTranscriber(); // Also export for convenience exports.default = WhisperTranscriber; //# sourceMappingURL=index.js.map