UNPKG

js-tts-wrapper

Version:

A JavaScript/TypeScript library that provides a unified API for working with multiple cloud-based Text-to-Speech (TTS) services

github.com/willwade/js-tts-wrapper

willwade/js-tts-wrapper

1,086 lines • 62.9 kB

JavaScript

"use strict"; /** * SherpaOnnx WebAssembly TTS Client * * Enhanced version with multi-model support for browser environments. * Supports dynamic loading of Kokoro, Matcha, and VITS models. * * BACKWARD COMPATIBILITY: Maintains full compatibility with existing API. * New multi-model features are opt-in via constructor options. */ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || (function () { var ownKeys = function(o) { ownKeys = Object.getOwnPropertyNames || function (o) { var ar = []; for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k; return ar; }; return ownKeys(o); }; return function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]); __setModuleDefault(result, mod); return result; }; })(); Object.defineProperty(exports, "__esModule", { value: true }); exports.SherpaOnnxWasmTTSClient = void 0; const abstract_tts_1 = require("../core/abstract-tts"); const SpeechMarkdown = __importStar(require("../markdown/converter")); const SSMLUtils = __importStar(require("../core/ssml-utils")); const environment_1 = require("../utils/environment"); const word_timing_estimator_1 = require("../utils/word-timing-estimator"); /** * Enhanced SherpaOnnx WebAssembly TTS Client * * Supports both legacy single-model mode and new multi-model mode. * Maintains full backward compatibility with existing API. */ class SherpaOnnxWasmTTSClient extends abstract_tts_1.AbstractTTSClient { /** * Create a new SherpaOnnx WebAssembly TTS client * @param credentials Optional credentials object * @param enhancedOptions Optional enhanced options for multi-model support */ constructor(credentials = {}, enhancedOptions = {}) { super(credentials); Object.defineProperty(this, "wasmModule", { enumerable: true, configurable: true, writable: true, value: null }); Object.defineProperty(this, "tts", { enumerable: true, configurable: true, writable: true, value: null }); Object.defineProperty(this, "wasmPath", { enumerable: true, configurable: true, writable: true, value: "" }); Object.defineProperty(this, "wasmLoaded", { enumerable: true, configurable: true, writable: true, value: false }); Object.defineProperty(this, "wasmBaseUrl", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "mergedModelsUrl", { enumerable: true, configurable: true, writable: true, value: void 0 }); // Enhanced multi-model support Object.defineProperty(this, "enhancedOptions", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "modelRepository", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "modelManager", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "currentVoiceId", { enumerable: true, configurable: true, writable: true, value: void 0 }); // Capabilities: Browser-only engine, requires WASM runtime this.capabilities = { browserSupported: true, nodeSupported: false, needsWasm: true }; // Set default sample rate for the Piper model this.sampleRate = 22050; // Optional configuration from credentials this.wasmPath = credentials.wasmPath || ""; // JS glue path (if provided) this.wasmBaseUrl = credentials.wasmBaseUrl || undefined; // Base URL for glue+wasm this.mergedModelsUrl = credentials.mergedModelsUrl || credentials.modelsUrl || undefined; // Enhanced options with defaults for backward compatibility this.enhancedOptions = { enableMultiModel: false, // Disabled by default for backward compatibility maxCachedModels: 3, ...enhancedOptions }; // Initialize multi-model components if enabled if (this.enhancedOptions.enableMultiModel) { this.modelRepository = new ModelRepository(this.mergedModelsUrl); } } /** * Get the list of required credential types for this engine * @returns Array of required credential field names */ getRequiredCredentials() { return []; // SherpaOnnx WASM doesn't require credentials, only WASM files } /** * Check if the credentials are valid * @returns Promise resolving to true if credentials are valid */ async checkCredentials() { try { // First check if SherpaOnnx is properly initialized const status = this.getInitializationStatus(); if (status.isInitialized) { return true; } // In a browser environment, we can't check if the WASM file exists // so we'll check if it's likely to be loaded later if (typeof window !== "undefined") { if (status.issues.length > 0) { console.warn("SherpaOnnx not yet initialized:", status.issues.join(', ')); } return true; // Assume it will be loaded later in browser } // In Node.js, check if the WASM file exists if (environment_1.isNode && this.wasmPath && environment_1.fileSystem.existsSync(this.wasmPath)) { if (status.issues.length > 0) { console.warn("SherpaOnnx WASM file exists but not initialized:", status.issues.join(', ')); } return true; } // If no WASM path is provided, assume it will be loaded later if (!this.wasmPath) { console.warn("No WASM path provided. SherpaOnnx WebAssembly TTS will need to be initialized manually."); return true; } console.warn(`WASM file not found at ${this.wasmPath}`); return false; } catch (error) { console.error("Error checking SherpaOnnx WebAssembly credentials:", error); return false; } } /** * Get available voices * @returns Promise resolving to an array of unified voice objects */ async _getVoices() { try { // Enhanced multi-model support if (this.enhancedOptions.enableMultiModel && this.modelRepository) { console.log("Using enhanced multi-model voice repository"); try { const models = this.modelRepository.getAvailableModels(); return models.map((model) => ({ id: model.id, name: model.name, gender: model.gender, provider: "sherpaonnx-wasm", languageCodes: [ { bcp47: model.language, iso639_3: model.language.split("-")[0], display: model.language, }, ], })); } catch (error) { console.error("Error getting voices from enhanced repository:", error); // Fall through to legacy mode } } // Legacy voice loading (backward compatibility) console.log("Using legacy voice loading mode"); // Load the voice models JSON file let voiceModels = []; try { // In Node.js, read from the file system if (environment_1.isNode) { const modelsJsonPath = environment_1.pathUtils.join(__dirname, "..", "data", "merged_models.json"); if (environment_1.fileSystem.existsSync(modelsJsonPath)) { const modelsJson = environment_1.fileSystem.readFileSync(modelsJsonPath); voiceModels = JSON.parse(modelsJson); } } else { // In browser environments, try to fetch from a URL try { const response = await fetch(this.mergedModelsUrl || "./data/merged_models.json"); if (response.ok) { const modelsJson = await response.text(); voiceModels = JSON.parse(modelsJson); } else { console.warn("Voice models JSON file not available in browser environment."); // Return a default voice for testing return [ { id: "piper_en_US", name: "Piper English (US)", gender: "Unknown", provider: "sherpaonnx-wasm", languageCodes: [ { bcp47: "en-US", iso639_3: "eng", display: "English (US)", }, ], }, ]; } } catch (fetchError) { console.warn("Failed to fetch voice models JSON file:", fetchError); // Return a default voice for testing return [ { id: "piper_en_US", name: "Piper English (US)", gender: "Unknown", provider: "sherpaonnx-wasm", languageCodes: [ { bcp47: "en-US", iso639_3: "eng", display: "English (US)", }, ], }, ]; } } } catch (error) { console.error("Error loading voice models:", error); } // Filter for SherpaOnnx models and map to unified format const sherpaOnnxModels = voiceModels.filter((model) => model.engine === "sherpaonnx" || model.engine === "sherpaonnx-wasm"); console.log("Found SherpaOnnx models:", sherpaOnnxModels); const voices = sherpaOnnxModels.map((model) => ({ id: model.id, name: model.name, gender: model.gender || "Unknown", provider: "sherpaonnx-wasm", languageCodes: [ { bcp47: model.language || "en-US", iso639_3: model.language ? model.language.split("-")[0] : "eng", display: model.language_display || "English (US)", }, ], })); // If no voices found, return a default voice for backward compatibility if (voices.length === 0) { return [ { id: "piper_en_US", name: "Piper English (US)", gender: "Unknown", provider: "sherpaonnx-wasm", languageCodes: [ { bcp47: "en-US", iso639_3: "eng", display: "English (US)", }, ], }, ]; } return voices; } catch (error) { console.error("Error getting SherpaOnnx WebAssembly voices:", error); return []; } } /** * Initialize the WebAssembly module * @param wasmUrl URL to the WebAssembly file * @returns Promise resolving when the module is initialized */ async initializeWasm(wasmUrl) { if (this.wasmLoaded) { return; } try { // In browser environments, load the WebAssembly module if (environment_1.isBrowser) { if (!wasmUrl) { console.warn("No WebAssembly URL provided for browser environment."); this.wasmLoaded = false; return; } console.log("Loading WebAssembly module from", wasmUrl); console.log(`Current state: wasmLoaded=${this.wasmLoaded}, wasmModule=${!!this.wasmModule}`); try { // Store the URL for later use this.wasmPath = wasmUrl; console.log("Setting wasmPath to:", this.wasmPath); // Auto-load JS glue and WASM if not present const w = window; let baseUrl = this.wasmBaseUrl; let scriptUrl; const provided = wasmUrl || this.wasmPath || ""; if (provided) { if (/\.js($|\?)/.test(provided)) { scriptUrl = provided; if (!baseUrl) { const idx = provided.lastIndexOf("/"); if (idx > -1) baseUrl = provided.slice(0, idx); } } else { baseUrl = provided; } } if (!scriptUrl && baseUrl) { const b = baseUrl.replace(/\/$/, ""); // Default glue filename (can be overridden by passing full wasmPath) scriptUrl = `${b}/sherpaonnx.js`; } if (!scriptUrl) { console.warn("No WASM script URL provided; attempting default ./sherpaonnx.js"); scriptUrl = "./sherpaonnx.js"; } // Persist the resolved script URL this.wasmPath = scriptUrl; console.log("Resolved wasmPath to:", this.wasmPath); // Ensure Module.locateFile points to the base for .wasm w.Module = w.Module || {}; if (baseUrl) { const b = baseUrl.replace(/\/$/, ""); w.Module.locateFile = (p) => `${b}/${p}`; } // Load the glue JS if createOfflineTts is not available if (typeof w.createOfflineTts !== "function") { await new Promise((resolve, reject) => { const s = document.createElement("script"); s.src = scriptUrl; s.async = true; s.onload = () => resolve(); s.onerror = () => reject(new Error(`Failed to load SherpaONNX glue: ${scriptUrl}`)); document.head.appendChild(s); }); } // Wait for Module.calledRun and createOfflineTts to be ready await new Promise((resolve, reject) => { const giveUpAt = Date.now() + 15000; // 15s const checkReady = () => { if (typeof w.createOfflineTts === "function" && typeof w.Module !== "undefined" && w.Module.calledRun) { resolve(); } else if (Date.now() > giveUpAt) { reject(new Error("Timed out waiting for SherpaONNX WASM to initialize")); } else { setTimeout(checkReady, 200); } }; checkReady(); }); // Now that we know createOfflineTts and Module are available, store them console.log("Storing Module and createOfflineTts"); this.wasmModule = window.Module; this.wasmLoaded = true; // Store the createOfflineTts function if (this.wasmModule && !this.wasmModule.createOfflineTts) { this.wasmModule.createOfflineTts = window.createOfflineTts; } // Initialize multi-model support if enabled if (this.enhancedOptions.enableMultiModel && this.modelRepository) { console.log("Initializing enhanced multi-model support..."); try { // Load models index await this.modelRepository.loadModelsIndex(); // Initialize model manager if (this.wasmModule) { this.modelManager = new WasmModelManager(this.wasmModule, this.enhancedOptions.maxCachedModels); } console.log("Enhanced multi-model support initialized successfully"); } catch (error) { console.error("Error initializing multi-model support:", error); console.log("Falling back to legacy single-model mode"); this.enhancedOptions.enableMultiModel = false; } } console.log("WebAssembly module initialized successfully"); } catch (error) { console.error("Error initializing WebAssembly:", error); this.wasmLoaded = false; } } else { // In Node.js, we can't directly use WebAssembly in the same way console.warn("WebAssembly loading not implemented for Node.js environments."); this.wasmLoaded = false; } } catch (error) { console.error("Error initializing WebAssembly:", error); this.wasmLoaded = false; } console.log("End of initializeWasm method. wasmLoaded:", this.wasmLoaded, "wasmModule:", !!this.wasmModule); console.log("createOfflineTts available at end of initializeWasm:", typeof window.createOfflineTts === "function"); console.log("window.Module available at end of initializeWasm:", typeof window.Module !== "undefined"); if (typeof window.Module !== "undefined") { console.log("window.Module.calledRun at end of initializeWasm:", window.Module.calledRun); } } /** * Synthesize text to speech and return the audio as a byte array * @param text Text to synthesize * @param options Options for synthesis * @returns Promise resolving to a byte array of audio data */ async synthToBytes(text, _options) { // Prepare text for synthesis (handle Speech Markdown and SSML) let processedText = text; // Convert from Speech Markdown if requested if (_options?.useSpeechMarkdown && SpeechMarkdown.isSpeechMarkdown(processedText)) { // Convert to SSML first, then strip SSML tags since SherpaOnnx doesn't support SSML const ssml = await SpeechMarkdown.toSSML(processedText); processedText = SSMLUtils.stripSSML(ssml); } // If text is SSML, strip the tags as SherpaOnnx doesn't support SSML if (SSMLUtils.isSSML(processedText)) { processedText = SSMLUtils.stripSSML(processedText); } console.log("synthToBytes called with text:", processedText); // Ensure runtime is initialized before attempting synthesis if (environment_1.isBrowser) { const status = this.getInitializationStatus(); if (!status.isInitialized) { await this.initializeWasm(this.wasmPath || this.wasmBaseUrl || ""); } } // Enhanced multi-model synthesis if (this.enhancedOptions.enableMultiModel && this.wasmModule && this.currentVoiceId) { console.log(`Using enhanced multi-model synthesis for voice ${this.currentVoiceId}`); try { if (!this.wasmModule._GenerateAudio) { throw new Error('Enhanced WASM module not loaded - _GenerateAudio not available'); } // Generate audio using the enhanced WASM interface const result = this.wasmModule._GenerateAudio(processedText, 0, 1.0); // text, speaker_id, speed if (!result || !result.samples) { throw new Error('Failed to generate audio with enhanced interface'); } console.log(`Enhanced synthesis generated ${result.samples.length} samples at ${result.sampleRate}Hz`); // Update sample rate if provided if (result.sampleRate) { this.sampleRate = result.sampleRate; } // Convert to WAV format return this._convertAudioFormat(result.samples); } catch (error) { console.error('Error with enhanced multi-model synthesis:', error); console.log('Falling back to legacy synthesis mode'); // Fall through to legacy mode } } // Legacy synthesis mode (backward compatibility) console.log("Using legacy synthesis mode"); // IMPORTANT: We need to access the global window object directly // This is because our code is bundled and the window object might not be accessible in the same way const globalWindow = typeof window !== "undefined" ? window : typeof global !== "undefined" ? global : {}; console.log("Global window type:", typeof globalWindow); // Check if we're in a browser environment if (typeof globalWindow !== "undefined" && typeof document !== "undefined") { console.log("Browser environment detected"); // Check if createOfflineTts is available in the global scope const createOfflineTtsFn = globalWindow.createOfflineTts; const moduleObj = globalWindow.Module; console.log("createOfflineTts available in global scope:", typeof createOfflineTtsFn === "function"); console.log("Module available in global scope:", typeof moduleObj !== "undefined"); console.log("Module.calledRun:", moduleObj?.calledRun); // Try to use the global createOfflineTts function directly if (typeof createOfflineTtsFn === "function" && typeof moduleObj !== "undefined" && moduleObj.calledRun) { console.log("Using global createOfflineTts function directly"); try { // Create a new TTS instance directly console.log("About to call createOfflineTts..."); const directTts = createOfflineTtsFn(moduleObj); console.log("createOfflineTts call successful, tts object:", directTts); console.log("TTS initialized with default configuration"); console.log(`Sample rate: ${directTts?.sampleRate}`); console.log(`Number of speakers: ${directTts?.numSpeakers}`); // Update the sample rate from the TTS engine if (directTts && typeof directTts.sampleRate === "number") { this.sampleRate = directTts.sampleRate; console.log(`Updated sample rate to ${this.sampleRate}`); } else { console.warn("Could not update sample rate, using default"); } // Generate audio console.log("Generating audio directly..."); const result = directTts.generate({ text: processedText, sid: 0, speed: 1.0 }); console.log("Audio generated directly:", result); console.log(`Generated ${result?.samples?.length} samples at ${result?.sampleRate}Hz`); // Convert to WAV const audioBytes = this._convertAudioFormat(result.samples); console.log("Converted audio to WAV format, returning bytes"); return audioBytes; } catch (directError) { console.error("Error using direct approach:", directError); console.log("Falling back to standard approach"); } } else { console.log("Direct approach not available, reason:"); if (typeof createOfflineTtsFn !== "function") console.log("- createOfflineTts is not a function"); if (typeof moduleObj === "undefined") console.log("- Module is undefined"); if (moduleObj && !moduleObj.calledRun) console.log("- Module.calledRun is false"); } } else { console.log("Not in a browser environment, skipping direct approach"); } // If direct approach failed or not available, try the standard approach console.log("Using standard approach"); console.log("Current state - wasmLoaded:", this.wasmLoaded, "wasmModule:", !!this.wasmModule); console.log("createOfflineTts available:", typeof globalWindow.createOfflineTts === "function"); // Check if SherpaOnnx is properly initialized const status = this.getInitializationStatus(); if (!status.isInitialized) { const errorMessage = this.getInitializationErrorMessage(); console.error(errorMessage); throw new Error(errorMessage); } try { // Use the SherpaOnnx WebAssembly API to generate audio console.log("Using SherpaOnnx WebAssembly to generate audio"); // Create a TTS instance if it doesn't exist if (!this.tts) { console.log("Creating TTS instance"); try { // Create the TTS instance if (typeof window.createOfflineTts === "function") { // Using the sherpa-onnx-tts.js API console.log("Using createOfflineTts API from global scope"); console.log("createOfflineTts:", window.createOfflineTts); console.log("Module:", window.Module); try { // Create the TTS instance with default configuration console.log("About to call createOfflineTts..."); this.tts = window.createOfflineTts(window.Module); console.log("createOfflineTts call successful, tts object:", this.tts); console.log("TTS initialized with default configuration"); console.log(`Sample rate: ${this.tts?.sampleRate}`); console.log(`Number of speakers: ${this.tts?.numSpeakers}`); // Update the sample rate from the TTS engine if (this.tts && typeof this.tts.sampleRate === "number") { this.sampleRate = this.tts.sampleRate; console.log(`Updated sample rate to ${this.sampleRate}`); } else { console.warn("Could not update sample rate, using default"); } } catch (error) { console.error("Error creating TTS instance with createOfflineTts:", error); throw error; } } else if (this.wasmModule?.OfflineTts) { // Using the Module.OfflineTts API console.log("Using Module.OfflineTts API"); this.tts = new this.wasmModule.OfflineTts(); } else { throw new Error("No compatible TTS API found"); } console.log("TTS instance created successfully"); } catch (error) { console.error("Error creating TTS instance:", error); throw new Error(`Failed to create SherpaOnnx TTS instance: ${error instanceof Error ? error.message : String(error)}`); } } // Generate the audio console.log("Generating audio for text:", text); let samples; if (typeof this.tts.generate === "function") { // Using the generate method from sherpa-onnx-tts.js console.log("Using generate method"); console.log("this.tts.generate:", this.tts.generate); try { console.log("Calling generate with:", { text: processedText, sid: 0, speed: 1.0 }); const result = this.tts.generate({ text: processedText, sid: 0, speed: 1.0 }); console.log("Generate call successful, result:", result); samples = result.samples; console.log(`Generated audio with sample rate: ${result.sampleRate} and samples: ${samples.length}`); } catch (error) { console.error("Error calling generate:", error); throw error; } } else if (typeof this.tts.generateWithText === "function") { // Using the generateWithText method console.log("Using generateWithText method"); console.log("this.tts.generateWithText:", this.tts.generateWithText); try { console.log("Calling generateWithText with:", processedText); samples = this.tts.generateWithText(processedText); console.log(`Generated audio with samples: ${samples.length}`); } catch (error) { console.error("Error calling generateWithText:", error); throw error; } } else { console.error("No compatible generate method found"); console.log("Available methods on this.tts:", Object.keys(this.tts).filter((key) => typeof this.tts[key] === "function")); throw new Error("No compatible generate method found"); } console.log("Audio generated successfully, samples:", samples.length); // Convert the samples to the requested format const audioBytes = this._convertAudioFormat(samples); return audioBytes; } catch (error) { console.error("Error synthesizing text:", error); throw new Error(`SherpaOnnx synthesis failed: ${error instanceof Error ? error.message : String(error)}`); } } /** * Convert audio samples to the requested format * @param samples Float32Array of audio samples * @returns Uint8Array of audio data in the requested format */ _convertAudioFormat(samples) { // For now, we'll just return a WAV file // In a real implementation, we would use a library like audioEncoder // to convert to the requested format // Convert Float32Array to Int16Array const int16Samples = new Int16Array(samples.length); for (let i = 0; i < samples.length; i++) { // Scale to 16-bit range and clamp const sample = Math.max(-1, Math.min(1, samples[i])); int16Samples[i] = Math.floor(sample * 32767); } // Create a WAV file header const wavHeader = new ArrayBuffer(44); const view = new DataView(wavHeader); // "RIFF" chunk descriptor view.setUint8(0, "R".charCodeAt(0)); view.setUint8(1, "I".charCodeAt(0)); view.setUint8(2, "F".charCodeAt(0)); view.setUint8(3, "F".charCodeAt(0)); // Chunk size (file size - 8) view.setUint32(4, 36 + int16Samples.length * 2, true); // Format ("WAVE") view.setUint8(8, "W".charCodeAt(0)); view.setUint8(9, "A".charCodeAt(0)); view.setUint8(10, "V".charCodeAt(0)); view.setUint8(11, "E".charCodeAt(0)); // "fmt " sub-chunk view.setUint8(12, "f".charCodeAt(0)); view.setUint8(13, "m".charCodeAt(0)); view.setUint8(14, "t".charCodeAt(0)); view.setUint8(15, " ".charCodeAt(0)); // Sub-chunk size (16 for PCM) view.setUint32(16, 16, true); // Audio format (1 for PCM) view.setUint16(20, 1, true); // Number of channels (1 for mono) view.setUint16(22, 1, true); // Sample rate view.setUint32(24, this.sampleRate, true); // Byte rate (sample rate * channels * bytes per sample) view.setUint32(28, this.sampleRate * 1 * 2, true); // Block align (channels * bytes per sample) view.setUint16(32, 1 * 2, true); // Bits per sample view.setUint16(34, 16, true); // "data" sub-chunk view.setUint8(36, "d".charCodeAt(0)); view.setUint8(37, "a".charCodeAt(0)); view.setUint8(38, "t".charCodeAt(0)); view.setUint8(39, "a".charCodeAt(0)); // Sub-chunk size (number of samples * channels * bytes per sample) view.setUint32(40, int16Samples.length * 1 * 2, true); // Combine the header and the samples const wavBytes = new Uint8Array(wavHeader.byteLength + int16Samples.length * 2); wavBytes.set(new Uint8Array(wavHeader), 0); // Convert Int16Array to Uint8Array const samplesBytes = new Uint8Array(int16Samples.buffer); wavBytes.set(samplesBytes, wavHeader.byteLength); return wavBytes; } /** * Check if SherpaOnnx is properly initialized * @returns Object with initialization status and details */ getInitializationStatus() { const globalWindow = (typeof window !== 'undefined' ? window : global); const issues = []; if (!this.wasmLoaded) { issues.push("WebAssembly module not loaded"); } if (!this.wasmModule) { issues.push("WebAssembly module is null"); } if (typeof globalWindow.createOfflineTts !== "function") { issues.push("createOfflineTts function not available"); } return { isInitialized: issues.length === 0, wasmLoaded: this.wasmLoaded, wasmModule: !!this.wasmModule, createOfflineTts: typeof globalWindow.createOfflineTts === "function", issues }; } /** * Get detailed error message for initialization issues * @returns Detailed error message with troubleshooting steps */ getInitializationErrorMessage() { const status = this.getInitializationStatus(); let message = "SherpaOnnx WebAssembly TTS is not properly initialized.\n\n"; message += "Issues found:\n"; status.issues.forEach(issue => { message += `- ${issue}\n`; }); message += "\nTroubleshooting steps:\n"; message += "1. Ensure the SherpaOnnx WebAssembly files are properly loaded\n"; message += "2. Check that the WebAssembly module initialization completed successfully\n"; message += "3. Verify that createOfflineTts function is available in the global scope\n"; message += "4. Check browser console for WebAssembly loading errors\n"; message += "5. Ensure you're running in a supported environment (browser with WebAssembly support)\n"; return message; } /** * Synthesize text to speech and stream the audio * @param text Text to synthesize * @param onAudioBuffer Callback for audio buffers * @param onStart Callback for when synthesis starts * @param onEnd Callback for when synthesis ends * @param onWord Callback for word boundary events * @param options Options for synthesis * @returns Promise resolving when synthesis is complete */ async synthToStream(text, onAudioBuffer, onStart, onEnd, onWord, options) { try { // Call onStart callback if (onStart) { onStart(); } // Synthesize the entire audio const audioBytes = await this.synthToBytes(text, options); // Estimate word boundaries if (onWord) { const wordBoundaries = (0, word_timing_estimator_1.estimateWordBoundaries)(text); // Schedule word boundary events for (const boundary of wordBoundaries) { setTimeout(() => { onWord(boundary.word, boundary.start, boundary.end); }, boundary.start * 1000); } } // Send the audio buffer onAudioBuffer(audioBytes); // Call onEnd callback if (onEnd) { onEnd(); } } catch (error) { console.error("Error synthesizing text to stream:", error); // Call onEnd callback even if there's an error if (onEnd) { onEnd(); } // Re-throw the error so it can be caught by the caller throw error; } } /** * Synthesize text to speech and save to a file * @param text Text to synthesize * @param filename Filename to save as * @param format Audio format (mp3 or wav) * @param options Options for synthesis * @returns Promise resolving when synthesis is complete */ async synthToFile(text, filename, format = "wav", // Override base class to only allow 'wav' options // Use specific options type ) { try { let outputFormat = format; // Sherpa-ONNX only supports WAV output if (outputFormat !== "wav") { console.warn("SherpaOnnx WebAssembly TTS only supports WAV output. Using WAV instead of", outputFormat); outputFormat = "wav"; } // Use the base class's file saving logic (which detects Node/Browser) await super.synthToFile(text, filename, outputFormat, options); } catch (error) { console.error("Error synthesizing text to file:", error); throw error; } } /** * Get a property value * @param property Property name * @returns Property value */ getProperty(property) { switch (property) { case "voice": return this.currentVoiceId || this.voiceId || undefined; case "sampleRate": return this.sampleRate; case "wasmLoaded": return this.wasmLoaded; case "wasmPath": return this.wasmPath; case "wasmBaseUrl": return this.wasmBaseUrl; case "mergedModelsUrl": return this.mergedModelsUrl; case "multiModelEnabled": return this.enhancedOptions.enableMultiModel; case "maxCachedModels": return this.enhancedOptions.maxCachedModels; case "loadedModels": return this.modelManager ? Array.from(this.modelManager['loadedModels'].keys()) : []; case "currentModel": return this.modelManager?.getCurrentModel(); case "availableModels": return this.modelRepository?.getAvailableModels() || []; default: return super.getProperty(property); } } /** * Set a property value * @param property Property name * @param value Property value */ setProperty(property, value) { switch (property) { case "voice": this.setVoice(value); break; case "wasmPath": this.wasmPath = value; break; case "wasmBaseUrl": this.wasmBaseUrl = value; break; case "mergedModelsUrl": this.mergedModelsUrl = value; if (this.modelRepository) { // Recreate repository with new URL on the fly this.modelRepository = new ModelRepository(this.mergedModelsUrl); } break; default: super.setProperty(property, value); break; } } /** * Set the voice to use for synthesis * Enhanced with multi-model support while maintaining backward compatibility * @param voiceId Voice ID to use */ async setVoice(voiceId) { // Call the parent method to set the voiceId super.setVoice(voiceId); console.log(`Setting voice to ${voiceId}`); // Enhanced multi-model support (loader-only runtime: fetch, extract, mount into /assets) if (this.enhancedOptions.enableMultiModel && this.modelRepository) { console.log(`Using enhanced multi-model mode for voice ${voiceId}`); // Ensure WASM is initialized to access FS if (!this.wasmLoaded) { await this.initializeWasm(this.wasmPath || this.wasmBaseUrl || ""); } if (!this.wasmModule) { throw new Error("WASM module not initialized"); } // Resolve model config and URL const cfg = this.modelRepository.getModelConfig(voiceId); if (!cfg || !cfg.url) { throw new Error(`No URL found for model ${voiceId}`); } // Fetch archive console.log(`Fetching model archive: ${cfg.url}`); const res = await fetch(cfg.url); if (!res.ok) throw new Error(`Failed to fetch model: ${res.status} ${res.statusText}`); const archiveBuf = await res.arrayBuffer(); // Decompress .bz2 if needed let tarBuffer = archiveBuf; if (cfg.compressed) { const compressjsMod = await Promise.resolve().then(() => __importStar(require("compressjs"))); const Bzip2 = compressjsMod.Bzip2 || compressjsMod.BZ2; if (!Bzip2 || typeof Bzip2.decompressFile !== "function") { throw new Error("Bzip2 decompressor not available"); } const outArr = Bzip2.decompressFile(new Uint8Array(archiveBuf)); tarBuffer = new Uint8Array(outArr).buffer; } // Extract tar const untarMod = await Promise.resolve().then(() => __importStar(require("js-untar"))); const untar = untarMod.default || untarMod; const entries = await untar(tarBuffer); console.log(`Extracted ${entries.length} entries from TAR`); const M = this.wasmModule; const FS = M.FS; if (!FS) throw new Error("Emscripten FS not available"); // Ensure /assets exists try { FS.mkdir("/assets"); } catch { } try { FS.mkdir("/assets/espeak-ng-data"); } catch { } // Helper to create directories recursively const mkdirp = (dir) => { if (FS.mkdirTree) { try { FS.mkdirTree(dir); return; } catch { } } const parts = dir.split("/").filter(Boolean); let cur = ""; for (const p of parts) { cur += "/" + p; try { FS.mkdir(cur); } catch { } } }; // Map archive files to expected /assets layout for (const e of entries) { if (!e || !e.name) continue; const name = String(e.name).replace(/^\.\//, ""); const lower = name.toLowerCase(); // Only write file entries (js-untar uses `buffer` for file data) if (!e.buffer) continue; let outPath = null; if (lower.endsWith("/model.onnx") || lower === "model.onnx") { outPath = "/assets/model.onnx"; } else if (lower.endsWith("/tokens.txt") || lower === "tokens.txt") { outPath = "/assets/tokens.txt"; } else if (lower.includes("/espeak-ng-data/") || lower.startsWith("espeak-ng-data/")) { outPath = "/assets/" + name.substring(name.toLowerCase().indexOf("espeak-ng-data/")); } else if (lower.endsWith("voices.bin") || (lower.includes("voices") && lower.endsWith(".bin"))) { outPath = "/assets/voices.bin"; } else if (lower.includes("vocoder") && lower.endsWith(".onnx")) { outPath = "/assets/vocoder.onnx"; } if (outPath) { const dir = outPath.substring(0, outPath.lastIndexOf("/")); mkdirp(dir); FS.writeFile(outPath, new Uint8Array(e.buffer)); } } // Reset TTS so next synthesis uses the new assets if (this.tts) { try { if (typeof this.wasmModule._ttsDestroyOffline === "function") this.wasmModule._ttsDestroyOffline(this.tts); } catch { } this.tts = null; } this.currentVoiceId = voiceId; console.log(`Prepared /assets for voice ${voiceId}`); return; } // Legacy single-model mode (backward compatibility) console.log(`Using legacy single-model mode for voice ${voiceId}`); // Reset the TTS instance so it will be recreated with the new voice if (this.tts) { console.log("Resetting TTS instance for new voice"); this.tts = null; } } /** * Clean up resources * Enhanced to handle multi-model cleanup */ dispose() { // Clean up multi-model resources if (this.modelManager) { console.log("Disposing multi-model resources"); this.modelManager.dispose(); this.modelManager = undefined; } // Clean up legacy TTS instance if (this.wasmModule && this.tts !== 0) { if (typeof this.wasmModule._ttsDestroyOffline === "function") { this.wasmModule._ttsDestroyOffline(this.tts); } this.tts = null; } // Reset state this.currentVoiceId = undefined; this.wasmLoaded = false; this.wasmModule = null; } /** * Synthesize text to a byte stream *