UNPKG

js-tts-wrapper

Version:

A JavaScript/TypeScript library that provides a unified API for working with multiple cloud-based Text-to-Speech (TTS) services

1,114 lines (1,113 loc) 62.7 kB
"use strict"; var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || (function () { var ownKeys = function(o) { ownKeys = Object.getOwnPropertyNames || function (o) { var ar = []; for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k; return ar; }; return ownKeys(o); }; return function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]); __setModuleDefault(result, mod); return result; }; })(); var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.SherpaOnnxTTS = exports.SherpaOnnxTTSClient = void 0; const fs = __importStar(require("node:fs")); const os = __importStar(require("node:os")); const path = __importStar(require("node:path")); // Import necessary modules for ESM path resolution // import { fileURLToPath } from 'url'; // No longer needed const decompress_1 = __importDefault(require("decompress")); const decompress_tarbz2_1 = __importDefault(require("decompress-tarbz2")); const abstract_tts_1 = require("../core/abstract-tts"); // Capture native fetch at module level const nativeFetch = globalThis.fetch; // Import the generated models config const generated_models_1 = require("./sherpaonnx/generated_models"); // Import the sherpaonnx-loader const sherpaOnnxLoaderModule = __importStar(require("../utils/sherpaonnx-loader")); // Module scope variables to hold the imported modules let sherpa; let sherpaOnnxLoader = null; let sherpaOnnxEnvironmentCheck = null; // Try to initialize the loader and check environment try { sherpaOnnxLoader = sherpaOnnxLoaderModule; sherpaOnnxEnvironmentCheck = sherpaOnnxLoader.canRunSherpaOnnx(); if (!sherpaOnnxEnvironmentCheck.canRun) { console.warn("SherpaOnnx environment check failed:", sherpaOnnxEnvironmentCheck.issues.join(", ")); console.warn("SherpaOnnx will use mock implementation. Install required packages to enable native TTS."); // Provide specific installation guidance if (sherpaOnnxLoader.getInstallationInstructions) { console.warn("Installation instructions:"); console.warn(sherpaOnnxLoader.getInstallationInstructions()); } } } catch (error) { console.warn("Could not load sherpaonnx-loader:", error); } /** * SherpaOnnx TTS client */ class SherpaOnnxTTSClient extends abstract_tts_1.AbstractTTSClient { /** * Get comprehensive diagnostics for SherpaOnnx setup * @returns Detailed diagnostic information */ static getDiagnostics() { if (sherpaOnnxLoader?.getSherpaOnnxDiagnostics) { return sherpaOnnxLoader.getSherpaOnnxDiagnostics(); } return { platform: "unknown", expectedPackage: null, hasMainPackage: false, hasPlatformPackage: false, hasNativeModule: false, environmentVariables: {}, recommendations: ["SherpaOnnx loader not available"], canRun: false, }; } /** * Create a new SherpaOnnx TTS client * @param credentials SherpaOnnx credentials */ constructor(credentials) { super(credentials); /** * Path to the model file */ Object.defineProperty(this, "modelPath", { enumerable: true, configurable: true, writable: true, value: null }); /** * Voice model ID */ Object.defineProperty(this, "modelId", { enumerable: true, configurable: true, writable: true, value: null }); /** * Base directory for models */ Object.defineProperty(this, "baseDir", { enumerable: true, configurable: true, writable: true, value: void 0 }); /** * SherpaOnnx TTS instance */ Object.defineProperty(this, "tts", { enumerable: true, configurable: true, writable: true, value: null }); /** * Model configuration */ Object.defineProperty(this, "jsonModels", { enumerable: true, configurable: true, writable: true, value: {} }); // Initialize instance variables this.modelPath = credentials.modelPath || null; this.modelId = credentials.modelId || null; // Use a dedicated models directory if modelPath is not provided if (this.modelPath) { this.baseDir = this.modelPath; } else { // Create a models directory in the user's home directory const homeDir = os.homedir(); const modelsDir = path.join(homeDir, ".js-tts-wrapper", "models"); // Create the models directory if it doesn't exist if (!fs.existsSync(modelsDir)) { fs.mkdirSync(modelsDir, { recursive: true }); } this.baseDir = modelsDir; console.log("Using default models directory:", modelsDir); } // Set the library path environment variable this.setLibraryPath(); // Load model configuration this.jsonModels = this.loadModelsAndVoices(); // Only set up voice if we have a modelId or auto-download is enabled if (this.modelId || !credentials.noDefaultDownload) { this.modelId = this.modelId || "mms_eng"; // Default to English if not specified this.setVoice(this.modelId); } else { console.log("Skipping automatic model download (noDefaultDownload=true)"); } } /** * Load models and voices from the JSON configuration file * @returns Record of model configurations */ loadModelsAndVoices() { try { // Return the embedded models config directly return generated_models_1.SHERPA_MODELS_CONFIG; } catch (error) { // This should ideally not happen if the generation script ran correctly throw new Error(`Could not load embedded models configuration. Build might be broken. Error: ${error.message}`); } } /** * Download a file from a URL to a destination path * @param url URL to download from * @param destination Destination path * @returns Promise resolving when the download is complete */ async downloadFile(url, destination) { try { console.log(`Downloading file from ${url}`); // Diagnostic log to check the NATIVE fetch implementation we captured console.log(`DEBUG: typeof nativeFetch = ${typeof nativeFetch}`); if (typeof nativeFetch === "function" && nativeFetch.toString) { console.log(`DEBUG: nativeFetch.toString() = ${nativeFetch.toString().substring(0, 200)}...`); // Log first 200 chars } // Use the captured native fetch const response = await nativeFetch(url); if (!response.ok) { throw new Error(`Failed to download file: ${response.statusText}`); } // Add check before calling arrayBuffer if (typeof response.arrayBuffer !== "function") { console.error("DEBUG: response object does NOT have arrayBuffer method. Response keys:", Object.keys(response)); throw new Error("response.arrayBuffer is not a function"); } const buffer = await response.arrayBuffer(); fs.writeFileSync(destination, Buffer.from(buffer)); console.log(`File downloaded to ${destination}`); } catch (error) { const err = error; console.error(`Error downloading file: ${err.message}`); // Log the full error object for more details console.error("DEBUG: Full download error stack:", err.stack); throw err; } } /** * Extract a tar.bz2 archive to a destination directory * @param archivePath Path to the archive file * @param destinationDir Destination directory * @returns Promise resolving to a map of extracted file paths */ async extractTarBz2(archivePath, destinationDir) { try { console.log(`Extracting archive ${archivePath} to ${destinationDir}`); // Create the destination directory if it doesn't exist if (!fs.existsSync(destinationDir)) { fs.mkdirSync(destinationDir, { recursive: true }); } // Use the decompress library to extract the archive const files = await (0, decompress_1.default)(archivePath, destinationDir, { plugins: [(0, decompress_tarbz2_1.default)()], }); console.log(`Extracted ${files.length} files from ${archivePath}`); // Create a map to store the extracted file paths const extractedFiles = new Map(); // Store the file paths in the map for (const file of files) { const filePath = path.join(destinationDir, file.path); extractedFiles.set(file.path, filePath); console.log(`Extracted ${file.path} to ${filePath}`); } console.log(`Extraction of ${archivePath} completed successfully`); return extractedFiles; } catch (error) { const err = error; console.error(`Error extracting archive: ${err.message}`); throw err; } } /** * Check if model and token files exist * @param modelPath Path to model file * @param tokensPath Path to tokens file * @param modelId Optional model ID to determine voice type requirements * @returns True if all required files exist and are not empty */ checkFilesExist(modelPath, tokensPath, modelId) { try { // Check that both files exist if (!fs.existsSync(modelPath) || !fs.existsSync(tokensPath)) { return false; } // Check that both files are not empty const modelStats = fs.statSync(modelPath); const tokensStats = fs.statSync(tokensPath); if (modelStats.size === 0 || tokensStats.size === 0) { return false; } // For Piper voices, check for espeak-ng-data directory if (modelId && this.isPiperVoice(modelId)) { const voiceDir = path.dirname(modelPath); const espeakDataDir = path.join(voiceDir, "espeak-ng-data"); // Check if espeak-ng-data directory exists and has content if (!fs.existsSync(espeakDataDir) || !fs.statSync(espeakDataDir).isDirectory()) { console.log(`Piper voice ${modelId} missing espeak-ng-data directory at ${espeakDataDir}`); return false; } // Check if espeak-ng-data directory has content try { const espeakFiles = fs.readdirSync(espeakDataDir); if (espeakFiles.length === 0) { console.log(`Piper voice ${modelId} has empty espeak-ng-data directory`); return false; } } catch (error) { console.log(`Piper voice ${modelId} cannot read espeak-ng-data directory: ${error}`); return false; } } // For Kokoro voices, check for additional required files if (modelId && this.isKokoroVoice(modelId)) { const voiceDir = path.dirname(modelPath); const voicesPath = path.join(voiceDir, "voices.bin"); const espeakDataDir = path.join(voiceDir, "espeak-ng-data"); // Check for voices.bin file if (!fs.existsSync(voicesPath) || fs.statSync(voicesPath).size === 0) { console.log(`Kokoro voice ${modelId} missing or empty voices.bin file at ${voicesPath}`); return false; } // Check for espeak-ng-data directory if (!fs.existsSync(espeakDataDir) || !fs.statSync(espeakDataDir).isDirectory()) { console.log(`Kokoro voice ${modelId} missing espeak-ng-data directory at ${espeakDataDir}`); return false; } } return true; } catch (error) { console.error("Error checking files:", error); return false; } } /** * Check if a voice is a Piper voice based on its ID * @param modelId Voice model ID * @returns True if this is a Piper voice */ isPiperVoice(modelId) { return (modelId.startsWith("piper-") || (this.jsonModels[modelId] && this.jsonModels[modelId].developer === "piper")); } /** * Check if a voice is a Kokoro voice based on its ID * @param modelId Voice model ID * @returns True if this is a Kokoro voice */ isKokoroVoice(modelId) { return (modelId.startsWith("kokoro-") || (this.jsonModels[modelId] && this.jsonModels[modelId].model_type === "kokoro")); } /** * Check if a voice is a Matcha voice based on its ID * @param modelId Voice model ID * @returns True if this is a Matcha voice */ isMatchaVoice(modelId) { return (this.jsonModels[modelId] && this.jsonModels[modelId].model_type === "matcha"); } /** * Get the model type for a given model ID * @param modelId Voice model ID * @returns Model type (vits, kokoro, matcha) */ getModelType(modelId) { if (this.isKokoroVoice(modelId)) return "kokoro"; if (this.isMatchaVoice(modelId)) return "matcha"; return "vits"; // Default to vits for backward compatibility } /** * Find files matching a pattern in a directory recursively * @param dir Directory to search * @param pattern Regex pattern to match * @returns Array of matching file paths */ findFilesInDirectory(dir, pattern) { const results = []; const searchRecursive = (currentDir) => { try { const items = fs.readdirSync(currentDir); for (const item of items) { const itemPath = path.join(currentDir, item); const stat = fs.statSync(itemPath); if (stat.isDirectory()) { searchRecursive(itemPath); } else if (pattern.test(item)) { results.push(itemPath); } } } catch (error) { // Ignore errors and continue } }; searchRecursive(dir); return results; } /** * Find a specific file in a directory recursively * @param dir Directory to search * @param filename Filename to find * @returns Path to the file or null if not found */ findFileInDirectory(dir, filename) { const files = this.findFilesInDirectory(dir, new RegExp(`^${filename}$`)); return files.length > 0 ? files[0] : null; } /** * Find a specific directory in a directory recursively * @param dir Directory to search * @param dirname Directory name to find * @returns Path to the directory or null if not found */ findDirectoryInDestination(dir, dirname) { const searchRecursive = (currentDir) => { try { const items = fs.readdirSync(currentDir); for (const item of items) { const itemPath = path.join(currentDir, item); const stat = fs.statSync(itemPath); if (stat.isDirectory()) { if (item === dirname) { return itemPath; } const result = searchRecursive(itemPath); if (result) { return result; } } } } catch (error) { // Ignore errors and continue } return null; }; return searchRecursive(dir); } /** * Check if a model is from GitHub (archive-based) * @param modelId Voice model ID * @returns True if this is a GitHub model */ isGitHubModel(modelId) { const githubPrefixes = [ "piper-", "coqui-", "icefall-", "mimic3-", "melo-", "vctk-", "zh-", "ljs-", "cantonese-", "kokoro-" ]; return githubPrefixes.some(prefix => modelId.startsWith(prefix)); } /** * Get dict directory from voice directory * @param voiceDir Voice directory path * @returns Dict directory path or empty string */ getDictDir(voiceDir) { try { const items = fs.readdirSync(voiceDir); for (const item of items) { const itemPath = path.join(voiceDir, item); const stat = fs.statSync(itemPath); if (stat.isDirectory()) { // Check if this directory contains .txt files (dict files) const subItems = fs.readdirSync(itemPath); if (subItems.some(subItem => subItem.endsWith('.txt'))) { return itemPath; } } } } catch (error) { // Ignore errors and return empty string } return ""; } /** * Ensure vocoder is downloaded for Matcha models * @returns Path to vocoder file */ async ensureVocoderDownloaded() { const vocoderFilename = "vocos-22khz-univ.onnx"; const vocoderPath = path.join(this.baseDir, vocoderFilename); if (fs.existsSync(vocoderPath)) { console.log(`Vocoder already exists: ${vocoderPath}`); return vocoderPath; } // Download vocoder from sherpa-onnx releases const vocoderUrl = "https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx"; console.log(`Downloading vocoder from ${vocoderUrl}`); try { await this.downloadFile(vocoderUrl, vocoderPath); console.log(`Vocoder downloaded to ${vocoderPath}`); return vocoderPath; } catch (error) { console.error(`Failed to download vocoder: ${error}`); // Return empty string if download fails - let sherpa-onnx handle the error return ""; } } /** * Recursively copy a directory and all its contents * @param src Source directory path * @param dest Destination directory path */ copyDirectoryRecursive(src, dest) { try { // Create destination directory if it doesn't exist if (!fs.existsSync(dest)) { fs.mkdirSync(dest, { recursive: true }); } // Read the source directory const entries = fs.readdirSync(src, { withFileTypes: true }); for (const entry of entries) { const srcPath = path.join(src, entry.name); const destPath = path.join(dest, entry.name); if (entry.isDirectory()) { // Recursively copy subdirectory this.copyDirectoryRecursive(srcPath, destPath); } else { // Copy file fs.copyFileSync(srcPath, destPath); } } } catch (error) { console.error(`Error copying directory from ${src} to ${dest}:`, error); throw error; } } /** * Download model and token files to voice-specific directory * @param destinationDir Base directory for model files * @param modelId Voice model ID * @returns Tuple of (model_path, tokens_path, lexicon_path, dict_dir) */ async downloadModelAndTokens(destinationDir, modelId) { let lexiconPath = ""; let dictDir = ""; // Handle null modelId const safeModelId = modelId || "default"; // Get model URL from JSON config if (!(safeModelId in this.jsonModels)) { throw new Error(`Model ID ${safeModelId} not found in configuration`); } const modelConfig = this.jsonModels[safeModelId]; const modelUrl = modelConfig.url; // Set paths in voice directory const modelPath = path.join(destinationDir, "model.onnx"); const tokensPath = path.join(destinationDir, "tokens.txt"); if (modelConfig.compression) { // Handle compressed archive console.log("Downloading compressed model from", modelUrl); // Download to a temporary file const archivePath = path.join(destinationDir, "model.tar.bz2"); await this.downloadFile(modelUrl, archivePath); console.log("Compressed model downloaded to", archivePath); // Extract the archive console.log("Extracting model archive..."); try { // Extract the archive const extractedFiles = await this.extractTarBz2(archivePath, destinationDir); // Find the model and tokens files in the extracted files let modelFile = ""; let tokensFile = ""; let espeakDataDir = ""; // Look for model.onnx, tokens.txt, and espeak-ng-data in the extracted files for (const [fileName, filePath] of extractedFiles.entries()) { if (fileName.endsWith(".onnx")) { modelFile = filePath; } else if (fileName.endsWith("tokens.txt")) { tokensFile = filePath; } else if (fileName.includes("espeak-ng-data/") && !espeakDataDir) { // Find the espeak-ng-data directory (take the parent directory of any file in espeak-ng-data) const parts = fileName.split("/"); const espeakIndex = parts.findIndex((part) => part === "espeak-ng-data"); if (espeakIndex >= 0) { const espeakRelativePath = parts.slice(0, espeakIndex + 1).join("/"); espeakDataDir = path.join(destinationDir, espeakRelativePath); } } } // If we found the required files, update the paths if (modelFile && tokensFile) { console.log(`Found model file: ${modelFile}`); console.log(`Found tokens file: ${tokensFile}`); // Copy the basic files fs.copyFileSync(modelFile, modelPath); fs.copyFileSync(tokensFile, tokensPath); console.log(`Copied model file to ${modelPath}`); console.log(`Copied tokens file to ${tokensPath}`); // For Piper voices, copy espeak-ng-data directory if (this.isPiperVoice(safeModelId)) { if (espeakDataDir && fs.existsSync(espeakDataDir)) { const espeakDestDir = path.join(destinationDir, "espeak-ng-data"); this.copyDirectoryRecursive(espeakDataDir, espeakDestDir); console.log(`Copied espeak-ng-data directory to ${espeakDestDir}`); } else { console.warn(`Piper voice ${safeModelId} missing espeak-ng-data directory in archive`); } } // For Kokoro voices, copy additional required files if (this.isKokoroVoice(safeModelId)) { // Copy voices.bin file const voicesFile = this.findFileInDirectory(destinationDir, "voices.bin"); if (voicesFile) { const voicesDestPath = path.join(destinationDir, "voices.bin"); if (voicesFile !== voicesDestPath) { fs.copyFileSync(voicesFile, voicesDestPath); console.log(`Copied voices.bin file to ${voicesDestPath}`); } } else { console.warn(`Kokoro voice ${safeModelId} missing voices.bin file in archive`); } // Copy espeak-ng-data directory if (espeakDataDir && fs.existsSync(espeakDataDir)) { const espeakDestDir = path.join(destinationDir, "espeak-ng-data"); this.copyDirectoryRecursive(espeakDataDir, espeakDestDir); console.log(`Copied espeak-ng-data directory to ${espeakDestDir}`); } else { console.warn(`Kokoro voice ${safeModelId} missing espeak-ng-data directory in archive`); } // Copy lexicon files if they exist const lexiconFiles = this.findFilesInDirectory(destinationDir, /lexicon.*\.txt$/); lexiconFiles.forEach((lexiconFile) => { const lexiconName = path.basename(lexiconFile); const lexiconDestPath = path.join(destinationDir, lexiconName); if (lexiconFile !== lexiconDestPath) { fs.copyFileSync(lexiconFile, lexiconDestPath); console.log(`Copied lexicon file to ${lexiconDestPath}`); } }); // Copy other potential files (dict directory, fst files, etc.) const dictDir = this.findDirectoryInDestination(destinationDir, "dict"); if (dictDir) { const dictDestDir = path.join(destinationDir, "dict"); if (dictDir !== dictDestDir) { this.copyDirectoryRecursive(dictDir, dictDestDir); console.log(`Copied dict directory to ${dictDestDir}`); } } const fstFiles = this.findFilesInDirectory(destinationDir, /\.fst$/); fstFiles.forEach((fstFile) => { const fstName = path.basename(fstFile); const fstDestPath = path.join(destinationDir, fstName); if (fstFile !== fstDestPath) { fs.copyFileSync(fstFile, fstDestPath); console.log(`Copied FST file to ${fstDestPath}`); } }); } } else { throw new Error("Could not find model.onnx and tokens.txt in the extracted files"); } } catch (error) { const err = error; console.error(`Error extracting archive: ${err.message}`); throw new Error(`Failed to extract model files for ${safeModelId}: ${err.message}`); } } else { // Check if the URL is from the merged_models.json file // The URL format in merged_models.json is different from the hardcoded URLs const isFromMergedModels = modelUrl.includes("willwade/mms-tts-multilingual-models-onnx"); if (isFromMergedModels) { // Handle direct files from willwade/mms-tts-multilingual-models-onnx // The URL format is different, it points to a directory const baseUrl = modelUrl; const modelFileUrl = `${baseUrl}/model.onnx`; const tokensFileUrl = `${baseUrl}/tokens.txt`; // Download model file console.log("Downloading model from", modelFileUrl); await this.downloadFile(modelFileUrl, modelPath); console.log("Model downloaded to", modelPath); // Download tokens file console.log("Downloading tokens from", tokensFileUrl); await this.downloadFile(tokensFileUrl, tokensPath); console.log("Tokens downloaded to", tokensPath); } else { // Handle direct files from other sources const baseUrl = modelUrl; const directModelUrl = `${baseUrl}/model.onnx?download=true`; const tokensUrl = `${baseUrl}/tokens.txt`; // Download model file console.log("Downloading model from", directModelUrl); await this.downloadFile(directModelUrl, modelPath); console.log("Model downloaded to", modelPath); // Download tokens file console.log("Downloading tokens from", tokensUrl); await this.downloadFile(tokensUrl, tokensPath); console.log("Tokens downloaded to", tokensPath); } } // Set additional paths lexiconPath = path.join(destinationDir, "lexicon.txt"); dictDir = this.getDictDir(destinationDir); return [modelPath, tokensPath, lexiconPath, dictDir]; } /** * Check if model exists and download if not * @param modelId Voice model ID * @returns Tuple of (model_path, tokens_path, lexicon_path, dict_dir) */ async checkAndDownloadModel(modelId) { // Create voice-specific directory const voiceDir = path.join(this.baseDir, modelId); if (!fs.existsSync(voiceDir)) { fs.mkdirSync(voiceDir, { recursive: true }); } console.log("Using voice directory:", voiceDir); // Expected paths for this voice const modelPath = path.join(voiceDir, "model.onnx"); const tokensPath = path.join(voiceDir, "tokens.txt"); // Check if files exist in voice directory if (this.checkFilesExist(modelPath, tokensPath, modelId)) { const lexiconPath = path.join(voiceDir, "lexicon.txt"); const dictDir = this.getDictDir(voiceDir); return [modelPath, tokensPath, lexiconPath, dictDir]; } console.log("Downloading model and tokens languages for", modelId, "because we can't find it"); // Download to voice-specific directory const [_modelPath, _tokensPath, lexiconPath, dictDir] = await this.downloadModelAndTokens(voiceDir, modelId); // Verify files were downloaded correctly if (!this.checkFilesExist(modelPath, tokensPath, modelId)) { throw new Error(`Failed to download model files for ${modelId}`); } return [modelPath, tokensPath, lexiconPath, dictDir]; } /** * Set the platform-specific library path environment variable for SherpaOnnx * @returns True if the environment variable was set successfully */ setLibraryPath() { try { // Only needed in Node.js environment if (typeof process === "undefined" || typeof process.env === "undefined") { return false; } // Determine platform-specific library paths and environment variables let libPathEnvVar = ""; let possiblePaths = []; const pathSeparator = process.platform === "win32" ? ";" : ":"; if (process.platform === "darwin") { // macOS uses DYLD_LIBRARY_PATH libPathEnvVar = "DYLD_LIBRARY_PATH"; possiblePaths = [ path.join(process.cwd(), "node_modules", "sherpa-onnx-darwin-arm64"), path.join(process.cwd(), "node_modules", "sherpa-onnx-darwin-x64"), ]; } else if (process.platform === "linux") { // Linux uses LD_LIBRARY_PATH libPathEnvVar = "LD_LIBRARY_PATH"; possiblePaths = [ path.join(process.cwd(), "node_modules", "sherpa-onnx-linux-arm64"), path.join(process.cwd(), "node_modules", "sherpa-onnx-linux-x64"), ]; } else if (process.platform === "win32") { // Windows uses PATH libPathEnvVar = "PATH"; possiblePaths = [path.join(process.cwd(), "node_modules", "sherpa-onnx-win-x64")]; } else { console.warn(`Unsupported platform: ${process.platform}`); return false; } // Find the sherpa-onnx library directory if (libPathEnvVar) { let sherpaOnnxPath = ""; for (const libPath of possiblePaths) { if (fs.existsSync(libPath)) { console.log(`Found sherpa-onnx library at ${libPath}`); sherpaOnnxPath = libPath; break; } } if (sherpaOnnxPath) { // Set the environment variable const currentPath = process.env[libPathEnvVar] || ""; if (!currentPath.includes(sherpaOnnxPath)) { process.env[libPathEnvVar] = sherpaOnnxPath + (currentPath ? pathSeparator + currentPath : ""); console.log(`Set ${libPathEnvVar} to ${process.env[libPathEnvVar]}`); return true; } // Already set correctly return true; } console.warn(`Could not find sherpa-onnx library directory for ${process.platform}. SherpaOnnx TTS may not work correctly.`); return false; } return false; } catch (error) { console.error("Error setting library path:", error); return false; } } /** * Initialize the SherpaOnnx TTS engine * @param modelPath Path to model file * @param tokensPath Path to tokens file */ async initializeTTS(modelPath, tokensPath) { try { // Set the library path environment variable this.setLibraryPath(); // Dynamically import sherpa-onnx-node if not already loaded if (!sherpa) { console.log("Attempting to load sherpa-onnx-node..."); if (!sherpaOnnxLoader) { throw new Error("SherpaOnnx loader not available"); } // Use the safe loader that provides detailed error information const loadResult = await sherpaOnnxLoader.loadSherpaOnnxNodeSafe(); if (loadResult.success && loadResult.module) { sherpa = loadResult.module; console.log("Successfully loaded sherpa-onnx-node"); } else { // Log detailed environment information console.error("Failed to load sherpa-onnx-node:"); console.error("Environment check:", loadResult.environmentCheck); if (loadResult.error) { console.error("Load error:", loadResult.error.message); } // Provide specific installation instructions based on what's missing if (!loadResult.environmentCheck.hasMainPackage) { console.error("Missing main package: sherpa-onnx-node"); } if (!loadResult.environmentCheck.hasPlatformPackage) { console.error(`Missing platform package: ${loadResult.environmentCheck.expectedPackage}`); } if (!loadResult.environmentCheck.hasNativeModule) { console.error("Native module (.node file) not found"); } // Provide installation instructions if (sherpaOnnxLoader.getInstallationInstructions) { console.error(sherpaOnnxLoader.getInstallationInstructions()); } throw new Error(`SherpaOnnx native module loading failed: ${loadResult.error?.message || "Unknown error"}`); } } // Create the TTS configuration based on model type const modelType = this.modelId ? this.getModelType(this.modelId) : "vits"; const voiceDir = path.dirname(modelPath); let modelConfig = {}; if (modelType === "kokoro") { // Kokoro model configuration - matches Python implementation const voicesPath = path.join(voiceDir, "voices.bin"); const espeakDataDir = path.join(voiceDir, "espeak-ng-data"); modelConfig = { model: modelPath, voices: voicesPath, tokens: tokensPath, dataDir: fs.existsSync(espeakDataDir) ? espeakDataDir : "", }; console.log(`Using Kokoro model configuration with voices: ${voicesPath}`); } else if (modelType === "matcha") { // Matcha model configuration - matches Python implementation const espeakDataDir = path.join(voiceDir, "espeak-ng-data"); const vocoderPath = await this.ensureVocoderDownloaded(); modelConfig = { acousticModel: modelPath, vocoder: vocoderPath, lexicon: "", // Matcha models typically don't use lexicon tokens: tokensPath, dataDir: fs.existsSync(espeakDataDir) ? espeakDataDir : "", }; console.log(`Using Matcha model configuration with vocoder: ${vocoderPath}`); } else { // VITS model configuration (default) - matches Python implementation const lexiconPath = path.join(voiceDir, "lexicon.txt"); const dictDir = this.getDictDir(voiceDir); modelConfig = { model: modelPath, lexicon: fs.existsSync(lexiconPath) ? lexiconPath : "", tokens: tokensPath, dataDir: "", dictDir: "", }; // For Piper voices and GitHub models, use dataDir instead of dictDir if (this.modelId && (this.isPiperVoice(this.modelId) || this.isGitHubModel(this.modelId))) { const espeakDataDir = path.join(voiceDir, "espeak-ng-data"); if (fs.existsSync(espeakDataDir)) { modelConfig.dataDir = espeakDataDir; modelConfig.dictDir = ""; // Avoid jieba warnings console.log(`Using espeak-ng-data directory: ${espeakDataDir}`); } } else if (dictDir) { // For other models, use dictDir modelConfig.dictDir = dictDir; } } const config = { model: { [modelType]: modelConfig, debug: false, numThreads: 1, provider: "cpu", }, maxNumSentences: 1, }; // Log the config for debugging (only in non-test environments) if (process.env.NODE_ENV !== "test") { console.log("SherpaOnnx TTS config:", JSON.stringify(config, null, 2)); // Log what sherpa contains console.log("sherpa object keys:", Object.keys(sherpa)); } // Handle different module export formats let OfflineTts = sherpa.OfflineTts; if (!OfflineTts && sherpa.default && sherpa.default.OfflineTts) { if (process.env.NODE_ENV !== "test") { console.log("Using sherpa.default.OfflineTts"); } OfflineTts = sherpa.default.OfflineTts; } else if (OfflineTts) { if (process.env.NODE_ENV !== "test") { console.log("Using sherpa.OfflineTts"); } } else { if (process.env.NODE_ENV !== "test") { console.log("sherpa.OfflineTts does not exist"); console.log("Available sherpa properties:", Object.keys(sherpa)); if (sherpa.default) { console.log("Available sherpa.default properties:", Object.keys(sherpa.default)); } } } if (!OfflineTts) { throw new Error("OfflineTts constructor not found in sherpa-onnx-node package"); } // Create the TTS instance try { if (process.env.NODE_ENV !== "test") { console.log("Creating OfflineTts instance..."); } this.tts = new OfflineTts(config); if (process.env.NODE_ENV !== "test") { console.log("SherpaOnnx TTS initialized successfully"); } } catch (instanceError) { console.error("Error creating OfflineTts instance:", instanceError); console.error("Error details:", instanceError instanceof Error ? instanceError.stack : String(instanceError)); throw instanceError; } } catch (error) { console.error("Error initializing SherpaOnnx TTS:", error); console.error("Error stack:", error instanceof Error ? error.stack : "No stack trace available"); throw new Error(`Failed to initialize SherpaOnnx TTS. ${error instanceof Error ? error.message : String(error)}`); } } /** * Get available voices from the provider * @returns Promise resolving to an array of voice objects */ async _getVoices() { // Convert the JSON models to an array of voice objects return Object.entries(this.jsonModels).map(([id, config]) => ({ id, name: config.name, language: config.language, gender: config.gender, description: config.description, })); } /** * Map SherpaOnnx voice objects to unified format * @param rawVoices Array of SherpaOnnx voice objects * @returns Promise resolving to an array of unified voice objects */ async _mapVoicesToUnified(rawVoices) { return rawVoices.map((voice) => { // Get language code and ensure it's a string let langCode = "en-US"; if (voice.language) { // Handle different language formats from merged_models.json if (typeof voice.language === "string") { langCode = voice.language; } else if (Array.isArray(voice.language) && voice.language.length > 0) { // Handle the format from merged_models.json where language is an array of objects const firstLang = voice.language[0]; if (firstLang && typeof firstLang === "object") { // Try to get language code from different possible properties if (firstLang["Iso Code"]) { langCode = firstLang["Iso Code"]; } else if (firstLang.lang_code) { langCode = firstLang.lang_code; } // If we have a language name but no code, use the name if (langCode === "en-US" && firstLang["Language Name"]) { langCode = firstLang["Language Name"].toLowerCase().substring(0, 2); } } } } // Ensure langCode is in BCP-47 format (e.g., en-US) if (!langCode.includes("-")) { // Convert ISO 639-3 to BCP-47 format if (langCode === "eng") { langCode = "en-US"; } else if (langCode.length === 3) { // For other 3-letter codes, use first 2 letters and add country langCode = `${langCode.substring(0, 2)}-${langCode.substring(0, 2).toUpperCase()}`; } else if (langCode.length === 2) { // For 2-letter codes, add country langCode = `${langCode}-${langCode.toUpperCase()}`; } } // Create language code object const languageCode = { bcp47: langCode, iso639_3: langCode.split("-")[0], display: langCode, }; return { id: voice.id, name: voice.name, gender: voice.gender, provider: "sherpaonnx", languageCodes: [languageCode], }; }); } /** * Get a property value * @param property Property name * @returns Property value */ getProperty(property) { if (property === "voice") { return this.voiceId; } return super.getProperty(property); } /** * Set the voice to use for synthesis * @param voiceId Voice ID to use */ async setVoice(voiceId) { try { // Check if the voice exists in the configuration if (!(voiceId in this.jsonModels)) { throw new Error(`Voice ID ${voiceId} not found in configuration`); } // Set the voice ID this.voiceId = voiceId; this.modelId = voiceId; try { // Check and download the model if needed const [modelPath, tokensPath, _lexiconPath, _dictDir] = await this.checkAndDownloadModel(voiceId); // Initialize the TTS engine await this.initializeTTS(modelPath, tokensPath); // Set the model path this.modelPath = modelPath; } catch (downloadError) { const err = downloadError; console.warn(`Could not download or initialize model for voice ${voiceId}: ${err.message}`); console.warn("Using mock implementation for example."); // We'll continue without the model for the example // In a real application, you might want to throw an error here } } catch (error) { const err = error; console.error("Error setting voice:", err.message); // Don't throw the error, just log it and continue } } /** * Convert text to audio bytes * @param text Text to synthesize * @returns Promise resolving to audio bytes */ async synthToBytes(text) { try { // Remove SSML tags if present (SherpaOnnx doesn't support SSML) let plainText = text; if (this._isSSML(plainText)) { plainText = this.stripSSML(plainText); } // Ensure TTS is initialized before synthesis if (!this.tts) { // Try to initialize with default model if not already initialized try { await this.checkCredentials(); // This will initialize the TTS if possible } catch (initError) { console.warn("Failed to initialize SherpaOnnx TTS:", initError); } } if (!this.tts) { // Check if we have environment information to provide better error messages