UNPKG

js-tts-wrapper

Version:

A JavaScript/TypeScript library that provides a unified API for working with multiple cloud-based Text-to-Speech (TTS) services

899 lines (898 loc) 39.3 kB
"use strict"; var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || (function () { var ownKeys = function(o) { ownKeys = Object.getOwnPropertyNames || function (o) { var ar = []; for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k; return ar; }; return ownKeys(o); }; return function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]); __setModuleDefault(result, mod); return result; }; })(); var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.SherpaOnnxTTSClient = void 0; const abstract_tts_1 = require("../core/abstract-tts"); const fs = __importStar(require("fs")); const path = __importStar(require("path")); const os = __importStar(require("os")); const node_fetch_1 = __importDefault(require("node-fetch")); const decompress_1 = __importDefault(require("decompress")); const decompress_tarbz2_1 = __importDefault(require("decompress-tarbz2")); /** * SherpaOnnx TTS client */ class SherpaOnnxTTSClient extends abstract_tts_1.AbstractTTSClient { /** * Create a new SherpaOnnx TTS client * @param credentials SherpaOnnx credentials */ constructor(credentials) { super(credentials); /** * Path to the model file */ Object.defineProperty(this, "modelPath", { enumerable: true, configurable: true, writable: true, value: null }); /** * Voice model ID */ Object.defineProperty(this, "modelId", { enumerable: true, configurable: true, writable: true, value: null }); /** * Base directory for models */ Object.defineProperty(this, "baseDir", { enumerable: true, configurable: true, writable: true, value: void 0 }); /** * SherpaOnnx TTS instance */ Object.defineProperty(this, "tts", { enumerable: true, configurable: true, writable: true, value: null }); /** * Sample rate */ Object.defineProperty(this, "sampleRate", { enumerable: true, configurable: true, writable: true, value: 16000 }); /** * Model configuration */ Object.defineProperty(this, "jsonModels", { enumerable: true, configurable: true, writable: true, value: {} }); // Initialize instance variables this.modelPath = credentials.modelPath || null; this.modelId = credentials.modelId || null; // Use a dedicated models directory if modelPath is not provided if (this.modelPath) { this.baseDir = path.dirname(this.modelPath); } else { // Create a models directory in the user's home directory const homeDir = os.homedir(); const modelsDir = path.join(homeDir, ".js-tts-wrapper", "models"); // Create the models directory if it doesn't exist if (!fs.existsSync(modelsDir)) { fs.mkdirSync(modelsDir, { recursive: true }); } this.baseDir = modelsDir; console.log("Using default models directory:", modelsDir); } // Load model configuration this.jsonModels = this.loadModelsAndVoices(); // Only set up voice if we have a modelId or auto-download is enabled if (this.modelId || !credentials.noDefaultDownload) { this.modelId = this.modelId || "mms_eng"; // Default to English if not specified this.setVoice(this.modelId); } else { console.log("Skipping automatic model download (noDefaultDownload=true)"); } } /** * Load models and voices from the JSON configuration file * @returns Record of model configurations */ loadModelsAndVoices() { try { // First try to load from the package directory const packageDir = path.dirname(__filename); const modelsFilePath = path.join(packageDir, "sherpaonnx", SherpaOnnxTTSClient.MODELS_FILE); if (fs.existsSync(modelsFilePath)) { const modelsJson = fs.readFileSync(modelsFilePath, "utf-8"); return JSON.parse(modelsJson); } // If that fails, try to load from the models directory const modelsFilePathInModels = path.join(this.baseDir, SherpaOnnxTTSClient.MODELS_FILE); if (fs.existsSync(modelsFilePathInModels)) { const modelsJson = fs.readFileSync(modelsFilePathInModels, "utf-8"); return JSON.parse(modelsJson); } // If that fails too, download the models file console.log("Models file not found, downloading..."); // Create a temporary models file with a default configuration const defaultModels = { "mms_eng": { url: "https://huggingface.co/willwade/mms-tts-multilingual-models-onnx/resolve/main/eng", name: "MMS English", language: "en-US", gender: "Female", description: "MMS English TTS model" } }; // Save the default models file fs.writeFileSync(path.join(this.baseDir, SherpaOnnxTTSClient.MODELS_FILE), JSON.stringify(defaultModels, null, 2)); // Download the actual models file this.downloadModelsFile(); return defaultModels; } catch (error) { console.error("Error loading models and voices:", error); return {}; } } /** * Download the models file from the repository */ async downloadModelsFile() { try { // Try to download the models file from the repository const url = "https://raw.githubusercontent.com/willwade/tts-wrapper/main/tts_wrapper/engines/sherpaonnx/merged_models.json"; try { const response = await (0, node_fetch_1.default)(url); if (!response.ok) { throw new Error(`Failed to download models file: ${response.statusText}`); } const modelsJson = await response.text(); fs.writeFileSync(path.join(this.baseDir, SherpaOnnxTTSClient.MODELS_FILE), modelsJson); console.log("Models file downloaded successfully"); // Update the models configuration this.jsonModels = JSON.parse(modelsJson); return; } catch (downloadError) { console.warn("Could not download models file from repository, using default configuration"); } // If download fails, use our default configuration const defaultModels = { "mms_eng": { url: "https://huggingface.co/willwade/mms-tts-multilingual-models-onnx/resolve/main/eng", name: "MMS English", language: "en-US", gender: "Female", description: "MMS English TTS model" } }; // Save the models file const modelsJson = JSON.stringify(defaultModels, null, 2); fs.writeFileSync(path.join(this.baseDir, SherpaOnnxTTSClient.MODELS_FILE), modelsJson); console.log("Models file created successfully"); // Update the models configuration this.jsonModels = defaultModels; } catch (error) { console.error("Error creating models file:", error); } } /** * Download a file from a URL to a destination path * @param url URL to download from * @param destination Destination path * @returns Promise resolving when the download is complete */ async downloadFile(url, destination) { try { console.log(`Downloading file from ${url}`); const response = await (0, node_fetch_1.default)(url); if (!response.ok) { throw new Error(`Failed to download file: ${response.statusText}`); } const buffer = await response.arrayBuffer(); fs.writeFileSync(destination, Buffer.from(buffer)); console.log(`File downloaded to ${destination}`); } catch (error) { const err = error; console.error(`Error downloading file: ${err.message}`); throw err; } } /** * Extract a tar.bz2 archive to a destination directory * @param archivePath Path to the archive file * @param destinationDir Destination directory * @returns Promise resolving to a map of extracted file paths */ async extractTarBz2(archivePath, destinationDir) { try { console.log(`Extracting archive ${archivePath} to ${destinationDir}`); // Create the destination directory if it doesn't exist if (!fs.existsSync(destinationDir)) { fs.mkdirSync(destinationDir, { recursive: true }); } // Use the decompress library to extract the archive const files = await (0, decompress_1.default)(archivePath, destinationDir, { plugins: [(0, decompress_tarbz2_1.default)()] }); console.log(`Extracted ${files.length} files from ${archivePath}`); // Create a map to store the extracted file paths const extractedFiles = new Map(); // Store the file paths in the map for (const file of files) { const filePath = path.join(destinationDir, file.path); extractedFiles.set(file.path, filePath); console.log(`Extracted ${file.path} to ${filePath}`); } console.log(`Extraction of ${archivePath} completed successfully`); return extractedFiles; } catch (error) { const err = error; console.error(`Error extracting archive: ${err.message}`); throw err; } } /** * Check if model and token files exist * @param modelPath Path to model file * @param tokensPath Path to tokens file * @returns True if both files exist and are not empty */ checkFilesExist(modelPath, tokensPath) { try { // Check that both files exist if (!fs.existsSync(modelPath) || !fs.existsSync(tokensPath)) { return false; } // Check that both files are not empty const modelStats = fs.statSync(modelPath); const tokensStats = fs.statSync(tokensPath); return modelStats.size > 0 && tokensStats.size > 0; } catch (error) { console.error("Error checking files:", error); return false; } } /** * Get dict_dir from extracted model * @param destinationDir Destination directory * @returns Path to dict_dir */ getDictDir(destinationDir) { try { // Walk through directory tree const walkSync = (dir) => { const files = fs.readdirSync(dir); // Check if any file ends with .txt if (files.some((file) => file.endsWith(".txt"))) { return dir; } // Check subdirectories for (const file of files) { const filePath = path.join(dir, file); const stats = fs.statSync(filePath); if (stats.isDirectory()) { const result = walkSync(filePath); if (result) { return result; } } } return ""; }; return walkSync(destinationDir); } catch (error) { console.error("Error getting dict_dir:", error); return ""; } } /** * Download model and token files to voice-specific directory * @param destinationDir Base directory for model files * @param modelId Voice model ID * @returns Tuple of (model_path, tokens_path, lexicon_path, dict_dir) */ async downloadModelAndTokens(destinationDir, modelId) { let lexiconPath = ""; let dictDir = ""; // Handle null modelId const safeModelId = modelId || "default"; // Get model URL from JSON config if (!(safeModelId in this.jsonModels)) { throw new Error(`Model ID ${safeModelId} not found in configuration`); } const modelConfig = this.jsonModels[safeModelId]; const modelUrl = modelConfig.url; // Set paths in voice directory const modelPath = path.join(destinationDir, "model.onnx"); const tokensPath = path.join(destinationDir, "tokens.txt"); if (modelConfig.compression) { // Handle compressed archive console.log("Downloading compressed model from", modelUrl); // Download to a temporary file const archivePath = path.join(destinationDir, "model.tar.bz2"); await this.downloadFile(modelUrl, archivePath); console.log("Compressed model downloaded to", archivePath); // Extract the archive console.log("Extracting model archive..."); try { // Extract the archive const extractedFiles = await this.extractTarBz2(archivePath, destinationDir); // Find the model and tokens files in the extracted files let modelFile = ""; let tokensFile = ""; // Look for model.onnx and tokens.txt in the extracted files for (const [fileName, filePath] of extractedFiles.entries()) { if (fileName.endsWith(".onnx")) { modelFile = filePath; } else if (fileName.endsWith("tokens.txt")) { tokensFile = filePath; } } // If we found the files, update the paths if (modelFile && tokensFile) { console.log(`Found model file: ${modelFile}`); console.log(`Found tokens file: ${tokensFile}`); // Update the paths fs.copyFileSync(modelFile, modelPath); fs.copyFileSync(tokensFile, tokensPath); console.log(`Copied model file to ${modelPath}`); console.log(`Copied tokens file to ${tokensPath}`); } else { throw new Error("Could not find model.onnx and tokens.txt in the extracted files"); } } catch (error) { const err = error; console.error(`Error extracting archive: ${err.message}`); throw new Error(`Failed to extract model files for ${safeModelId}: ${err.message}`); } } else { // Check if the URL is from the merged_models.json file // The URL format in merged_models.json is different from the hardcoded URLs const isFromMergedModels = modelUrl.includes("willwade/mms-tts-multilingual-models-onnx"); if (isFromMergedModels) { // Handle direct files from willwade/mms-tts-multilingual-models-onnx // The URL format is different, it points to a directory const baseUrl = modelUrl; const modelFileUrl = `${baseUrl}/model.onnx`; const tokensFileUrl = `${baseUrl}/tokens.txt`; // Download model file console.log("Downloading model from", modelFileUrl); await this.downloadFile(modelFileUrl, modelPath); console.log("Model downloaded to", modelPath); // Download tokens file console.log("Downloading tokens from", tokensFileUrl); await this.downloadFile(tokensFileUrl, tokensPath); console.log("Tokens downloaded to", tokensPath); } else { // Handle direct files from other sources const baseUrl = modelUrl; const directModelUrl = `${baseUrl}/model.onnx?download=true`; const tokensUrl = `${baseUrl}/tokens.txt`; // Download model file console.log("Downloading model from", directModelUrl); await this.downloadFile(directModelUrl, modelPath); console.log("Model downloaded to", modelPath); // Download tokens file console.log("Downloading tokens from", tokensUrl); await this.downloadFile(tokensUrl, tokensPath); console.log("Tokens downloaded to", tokensPath); } } // Set additional paths lexiconPath = path.join(destinationDir, "lexicon.txt"); dictDir = this.getDictDir(destinationDir); return [modelPath, tokensPath, lexiconPath, dictDir]; } /** * Check if model exists and download if not * @param modelId Voice model ID * @returns Tuple of (model_path, tokens_path, lexicon_path, dict_dir) */ async checkAndDownloadModel(modelId) { // Create voice-specific directory const voiceDir = path.join(this.baseDir, modelId); if (!fs.existsSync(voiceDir)) { fs.mkdirSync(voiceDir, { recursive: true }); } console.log("Using voice directory:", voiceDir); // Expected paths for this voice const modelPath = path.join(voiceDir, "model.onnx"); const tokensPath = path.join(voiceDir, "tokens.txt"); // Check if files exist in voice directory if (!this.checkFilesExist(modelPath, tokensPath)) { console.log("Downloading model and tokens languages for", modelId, "because we can't find it"); // Download to voice-specific directory const [, , lexiconPath, dictDir] = await this.downloadModelAndTokens(voiceDir, modelId); // Verify files were downloaded correctly if (!this.checkFilesExist(modelPath, tokensPath)) { throw new Error(`Failed to download model files for ${modelId}`); } return [modelPath, tokensPath, lexiconPath, dictDir]; } else { const lexiconPath = path.join(voiceDir, "lexicon.txt"); const dictDir = this.getDictDir(voiceDir); return [modelPath, tokensPath, lexiconPath, dictDir]; } } /** * Initialize the SherpaOnnx TTS engine * @param modelPath Path to model file * @param tokensPath Path to tokens file */ initializeTTS(modelPath, tokensPath) { try { // Dynamically import sherpa-onnx-node let sherpaOnnx; try { // Try to set library path environment variable based on platform let libPathEnvVar = ""; let possiblePaths = []; let pathSeparator = process.platform === "win32" ? ";" : ":"; if (process.platform === "darwin") { // macOS uses DYLD_LIBRARY_PATH libPathEnvVar = "DYLD_LIBRARY_PATH"; possiblePaths = [ path.join(process.cwd(), "node_modules", "sherpa-onnx-darwin-arm64"), path.join(process.cwd(), "node_modules", "sherpa-onnx-darwin-x64"), path.join(__dirname, "..", "..", "node_modules", "sherpa-onnx-darwin-arm64"), path.join(__dirname, "..", "..", "node_modules", "sherpa-onnx-darwin-x64") ]; } else if (process.platform === "linux") { // Linux uses LD_LIBRARY_PATH libPathEnvVar = "LD_LIBRARY_PATH"; possiblePaths = [ path.join(process.cwd(), "node_modules", "sherpa-onnx-linux-arm64"), path.join(process.cwd(), "node_modules", "sherpa-onnx-linux-x64"), path.join(__dirname, "..", "..", "node_modules", "sherpa-onnx-linux-arm64"), path.join(__dirname, "..", "..", "node_modules", "sherpa-onnx-linux-x64") ]; } else if (process.platform === "win32") { // Windows uses PATH libPathEnvVar = "PATH"; possiblePaths = [ path.join(process.cwd(), "node_modules", "sherpa-onnx-win32-x64"), path.join(__dirname, "..", "..", "node_modules", "sherpa-onnx-win32-x64") ]; } if (libPathEnvVar) { for (const libPath of possiblePaths) { if (fs.existsSync(libPath)) { console.log(`Found sherpa-onnx library at ${libPath}`); // Set environment variable const currentPath = process.env[libPathEnvVar] || ""; if (!currentPath.includes(libPath)) { process.env[libPathEnvVar] = libPath + (currentPath ? pathSeparator + currentPath : ""); console.log(`Set ${libPathEnvVar} to ${process.env[libPathEnvVar]}`); } break; } } } // Try to import the module sherpaOnnx = require("sherpa-onnx-node"); } catch (importError) { const err = importError; if (err.code === "MODULE_NOT_FOUND") { throw new Error("The sherpa-onnx-node module is not installed. " + "This is an optional dependency that provides offline TTS capabilities. " + "You can install it with 'npm install sherpa-onnx-node'. " + "The wrapper will continue to function with a mock implementation for testing purposes."); } else { // For other errors, suggest setting the appropriate environment variable let envVarName = ""; let libPath = ""; let exportCmd = ""; if (process.platform === "darwin" && err.message && err.message.includes("sherpa-onnx-darwin")) { envVarName = "DYLD_LIBRARY_PATH"; libPath = path.join(process.cwd(), "node_modules", "sherpa-onnx-darwin-arm64"); exportCmd = `export ${envVarName}=${libPath}:$${envVarName}`; } else if (process.platform === "linux" && err.message && err.message.includes("sherpa-onnx-linux")) { envVarName = "LD_LIBRARY_PATH"; libPath = path.join(process.cwd(), "node_modules", "sherpa-onnx-linux-x64"); exportCmd = `export ${envVarName}=${libPath}:$${envVarName}`; } else if (process.platform === "win32" && err.message && err.message.includes("sherpa-onnx-win32")) { envVarName = "PATH"; libPath = path.join(process.cwd(), "node_modules", "sherpa-onnx-win32-x64"); exportCmd = `set ${envVarName}=${libPath};%${envVarName}%`; } if (envVarName) { throw new Error(`Could not load sherpa-onnx-node. Please set the ${envVarName} environment variable:\n` + `${exportCmd}\n\n` + `Or use the provided helper script:\n` + `node scripts/run-with-sherpaonnx.js your-script.js\n\n` + `Original error: ${err.message}`); } throw err; } } // Create the TTS configuration const config = { model: { vits: { model: modelPath, tokens: tokensPath, }, debug: false, numThreads: 1, provider: "cpu", }, maxNumSentences: 1, }; // Create the TTS instance this.tts = new sherpaOnnx.OfflineTts(config); console.log("SherpaOnnx TTS initialized successfully"); } catch (error) { console.error("Error initializing SherpaOnnx TTS:", error); throw new Error("Failed to initialize SherpaOnnx TTS. " + (error instanceof Error ? error.message : String(error))); } } /** * Estimate word boundaries from text * @param text Text to estimate word boundaries for * @param audioDuration Duration of the audio in seconds * @returns Array of word boundaries */ estimateWordBoundaries(text, audioDuration) { // Split text into words const words = text.split(/\\s+/); // Estimate duration per word (simple approach) const durationPerWord = audioDuration / words.length; // Create word boundaries const wordBoundaries = []; let currentOffset = 0; for (const word of words) { if (word.trim()) { wordBoundaries.push({ text: word, offset: currentOffset * 1000, // Convert to milliseconds duration: durationPerWord * 1000, // Convert to milliseconds }); currentOffset += durationPerWord; } } return wordBoundaries; } /** * Get available voices from the provider * @returns Promise resolving to an array of voice objects */ async _getVoices() { // Convert the JSON models to an array of voice objects return Object.entries(this.jsonModels).map(([id, config]) => ({ id, name: config.name, language: config.language, gender: config.gender, description: config.description, })); } /** * Map SherpaOnnx voice objects to unified format * @param rawVoices Array of SherpaOnnx voice objects * @returns Promise resolving to an array of unified voice objects */ async _mapVoicesToUnified(rawVoices) { return rawVoices.map((voice) => { // Get language code and ensure it's a string let langCode = "en-US"; if (voice.language) { // Handle different language formats from merged_models.json if (typeof voice.language === "string") { langCode = voice.language; } else if (Array.isArray(voice.language) && voice.language.length > 0) { // Handle the format from merged_models.json where language is an array of objects const firstLang = voice.language[0]; if (firstLang && typeof firstLang === "object") { // Try to get language code from different possible properties if (firstLang["Iso Code"]) { langCode = firstLang["Iso Code"]; } else if (firstLang["lang_code"]) { langCode = firstLang["lang_code"]; } // If we have a language name but no code, use the name if (langCode === "en-US" && firstLang["Language Name"]) { langCode = firstLang["Language Name"].toLowerCase().substring(0, 2); } } } } // Ensure langCode is in BCP-47 format (e.g., en-US) if (!langCode.includes("-")) { // Convert ISO 639-3 to BCP-47 format if (langCode === "eng") { langCode = "en-US"; } else if (langCode.length === 3) { // For other 3-letter codes, use first 2 letters and add country langCode = `${langCode.substring(0, 2)}-${langCode.substring(0, 2).toUpperCase()}`; } else if (langCode.length === 2) { // For 2-letter codes, add country langCode = `${langCode}-${langCode.toUpperCase()}`; } } // Create language code object const languageCode = { bcp47: langCode, iso639_3: langCode.split("-")[0], display: langCode, }; return { id: voice.id, name: voice.name, gender: voice.gender, provider: "sherpaonnx", languageCodes: [languageCode], }; }); } /** * Set the voice to use for synthesis * @param voiceId Voice ID to use */ async setVoice(voiceId) { try { // Check if the voice exists in the configuration if (!(voiceId in this.jsonModels)) { throw new Error(`Voice ID ${voiceId} not found in configuration`); } // Set the voice ID this.voiceId = voiceId; this.modelId = voiceId; try { // Check and download the model if needed const [modelPath, tokensPath, ,] = await this.checkAndDownloadModel(voiceId); // Initialize the TTS engine this.initializeTTS(modelPath, tokensPath); // Set the model path this.modelPath = modelPath; } catch (downloadError) { const err = downloadError; console.warn(`Could not download or initialize model for voice ${voiceId}: ${err.message}`); console.warn("Using mock implementation for example."); // We'll continue without the model for the example // In a real application, you might want to throw an error here } } catch (error) { const err = error; console.error("Error setting voice:", err.message); // Don't throw the error, just log it and continue } } /** * Convert text to audio bytes * @param text Text to synthesize * @returns Promise resolving to audio bytes */ async synthToBytes(text) { try { // Remove SSML tags if present (SherpaOnnx doesn't support SSML) let plainText = text; if (this._isSSML(plainText)) { plainText = this.stripSSML(plainText); } if (!this.tts) { console.warn("SherpaOnnx TTS is not initialized. Using mock implementation for example."); // Generate mock audio data for example purposes // In a real application, you would want to throw an error here const mockSamples = new Float32Array(16000); // 1 second of silence at 16kHz // Add some noise to make it sound like something for (let i = 0; i < mockSamples.length; i++) { mockSamples[i] = (Math.random() - 0.5) * 0.01; // Very quiet noise } // Convert Float32Array to Uint8Array (16-bit PCM) const pcmData = new Int16Array(mockSamples.length); for (let i = 0; i < mockSamples.length; i++) { // Convert float to 16-bit PCM const sample = Math.max(-1, Math.min(1, mockSamples[i])); pcmData[i] = sample < 0 ? sample * 0x8000 : sample * 0x7FFF; } // Convert Int16Array to Uint8Array const buffer = new Uint8Array(pcmData.buffer); // Set the sample rate this.sampleRate = 16000; return buffer; } // Generate audio using the real TTS engine const audio = this.tts.generate({ text: plainText, sid: 0, // Default speaker ID speed: this.properties.rate === "slow" ? 0.8 : this.properties.rate === "medium" ? 1.0 : this.properties.rate === "fast" ? 1.2 : 1.0, }); // Convert Float32Array to Uint8Array (16-bit PCM) const pcmData = new Int16Array(audio.samples.length); for (let i = 0; i < audio.samples.length; i++) { // Convert float to 16-bit PCM const sample = Math.max(-1, Math.min(1, audio.samples[i])); pcmData[i] = sample < 0 ? sample * 0x8000 : sample * 0x7FFF; } // Convert Int16Array to Uint8Array const buffer = new Uint8Array(pcmData.buffer); // Set the sample rate this.sampleRate = audio.sampleRate; return buffer; } catch (error) { console.error("Error synthesizing speech:", error); throw error; } } /** * Synthesize text to a byte stream with word boundaries * @param text Text to synthesize * @param options Synthesis options * @returns Promise resolving to a readable stream of audio bytes with word boundaries */ async synthToBytestream(text, options) { try { // Check if word boundary information is requested const useWordBoundary = options?.useWordBoundary !== false; // Get audio bytes const audioBytes = await this.synthToBytes(text); if (useWordBoundary) { // Remove SSML tags if present let plainText = text; if (this._isSSML(plainText)) { plainText = this.stripSSML(plainText); } // Estimate audio duration in seconds const audioDuration = audioBytes.length / (this.sampleRate * 2); // 16-bit = 2 bytes per sample // Estimate word boundaries const wordBoundaries = this.estimateWordBoundaries(plainText, audioDuration); // Create a readable stream from the audio bytes const audioStream = new ReadableStream({ start(controller) { controller.enqueue(audioBytes); controller.close(); }, }); // Return both the audio stream and word boundaries return { audioStream, wordBoundaries, }; } else { // If word boundaries are not needed, just return the audio as a stream return new ReadableStream({ start(controller) { controller.enqueue(audioBytes); controller.close(); }, }); } } catch (error) { console.error("Error synthesizing speech stream:", error); throw error; } } /** * Strip SSML tags from text * @param text Text with SSML tags * @returns Plain text without SSML tags */ stripSSML(text) { // Remove all XML tags return text.replace(/<[^>]*>/g, ""); } /** * Check if credentials are valid * @returns Promise resolving to true if credentials are valid */ async checkCredentials() { try { // For SherpaOnnx, we'll consider credentials valid if we can initialize the engine // or if we have the model files available if (this.tts) { return true; } // If we don't have the engine initialized, check if we can initialize it if (this.modelId) { try { // Check if the model files exist const voiceDir = path.join(this.baseDir, this.modelId); const modelPath = path.join(voiceDir, "model.onnx"); const tokensPath = path.join(voiceDir, "tokens.txt"); if (this.checkFilesExist(modelPath, tokensPath)) { // Try to initialize the engine this.initializeTTS(modelPath, tokensPath); return !!this.tts; } } catch (error) { console.error("Error initializing SherpaOnnx TTS:", error); } } // For the example, we'll return true to allow the example to continue // In a real application, you might want to return false here console.log("SherpaOnnx model files not available. Using mock implementation for example."); return true; } catch (error) { console.error("Error checking SherpaOnnx credentials:", error); return false; } } } exports.SherpaOnnxTTSClient = SherpaOnnxTTSClient; /** * Path to the models file */ Object.defineProperty(SherpaOnnxTTSClient, "MODELS_FILE", { enumerable: true, configurable: true, writable: true, value: "merged_models.json" });