UNPKG

js-tts-wrapper

Version:

A JavaScript/TypeScript library that provides a unified API for working with multiple cloud-based Text-to-Speech (TTS) services

1,031 lines 79.9 kB
"use strict"; /** * SherpaOnnx WebAssembly TTS Client * * Enhanced version with multi-model support for browser environments. * Supports dynamic loading of Kokoro, Matcha, and VITS models. * * BACKWARD COMPATIBILITY: Maintains full compatibility with existing API. * New multi-model features are opt-in via constructor options. */ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || (function () { var ownKeys = function(o) { ownKeys = Object.getOwnPropertyNames || function (o) { var ar = []; for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k; return ar; }; return ownKeys(o); }; return function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]); __setModuleDefault(result, mod); return result; }; })(); Object.defineProperty(exports, "__esModule", { value: true }); exports.SherpaOnnxWasmTTSClient = void 0; const abstract_tts_1 = require("../core/abstract-tts"); const SSMLUtils = __importStar(require("../core/ssml-utils")); const SpeechMarkdown = __importStar(require("../markdown/converter")); const bzip2_1 = require("../utils/bzip2"); const environment_1 = require("../utils/environment"); const word_timing_estimator_1 = require("../utils/word-timing-estimator"); /** * Enhanced SherpaOnnx WebAssembly TTS Client * * Supports both legacy single-model mode and new multi-model mode. * Maintains full backward compatibility with existing API. */ class SherpaOnnxWasmTTSClient extends abstract_tts_1.AbstractTTSClient { /** * Create a new SherpaOnnx WebAssembly TTS client * @param credentials Optional credentials object * @param enhancedOptions Optional enhanced options for multi-model support */ constructor(credentials = {}, enhancedOptions = {}) { super(credentials); Object.defineProperty(this, "wasmModule", { enumerable: true, configurable: true, writable: true, value: null }); Object.defineProperty(this, "tts", { enumerable: true, configurable: true, writable: true, value: null }); Object.defineProperty(this, "wasmPath", { enumerable: true, configurable: true, writable: true, value: "" }); Object.defineProperty(this, "wasmLoaded", { enumerable: true, configurable: true, writable: true, value: false }); Object.defineProperty(this, "wasmBaseUrl", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "mergedModelsUrl", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "modelsMirrorUrl", { enumerable: true, configurable: true, writable: true, value: void 0 }); // Enhanced multi-model support Object.defineProperty(this, "enhancedOptions", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "modelRepository", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "modelManager", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "currentVoiceId", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "currentVoiceConfig", { enumerable: true, configurable: true, writable: true, value: void 0 }); this._models = [{ id: "sherpaonnx-wasm", features: ["open-source"] }]; // Capabilities: Browser-only engine, requires WASM runtime this.capabilities = { browserSupported: true, nodeSupported: false, needsWasm: true }; // Set default sample rate for the Piper model this.sampleRate = 22050; // Optional configuration from credentials this.wasmPath = credentials.wasmPath || ""; // JS glue path (if provided) this.wasmBaseUrl = credentials.wasmBaseUrl || undefined; // Base URL for glue+wasm this.mergedModelsUrl = credentials.mergedModelsUrl || credentials.modelsUrl || undefined; this.modelsMirrorUrl = credentials.modelsMirrorUrl || enhancedOptions?.modelsMirrorBaseUrl || undefined; // Enhanced options with defaults for backward compatibility this.enhancedOptions = { enableMultiModel: false, // Disabled by default for backward compatibility maxCachedModels: 3, modelsMirrorBaseUrl: this.modelsMirrorUrl, ...enhancedOptions, }; // Initialize multi-model components if enabled this.modelRepository = new ModelRepository(this.mergedModelsUrl); } /** * Get the list of required credential types for this engine * @returns Array of required credential field names */ getRequiredCredentials() { return []; // SherpaOnnx WASM doesn't require credentials, only WASM files } /** * Check if the credentials are valid * @returns Promise resolving to true if credentials are valid */ async checkCredentials() { try { // First check if SherpaOnnx is properly initialized const status = this.getInitializationStatus(); if (status.isInitialized) { return true; } // In a browser environment, we can't check if the WASM file exists // so we'll check if it's likely to be loaded later if (typeof window !== "undefined") { if (status.issues.length > 0) { console.warn("SherpaOnnx not yet initialized:", status.issues.join(", ")); } return true; // Assume it will be loaded later in browser } // In Node.js, check if the WASM file exists if (environment_1.isNode && this.wasmPath && environment_1.fileSystem.existsSync(this.wasmPath)) { if (status.issues.length > 0) { console.warn("SherpaOnnx WASM file exists but not initialized:", status.issues.join(", ")); } return true; } // If no WASM path is provided, assume it will be loaded later if (!this.wasmPath) { console.warn("No WASM path provided. SherpaOnnx WebAssembly TTS will need to be initialized manually."); return true; } console.warn(`WASM file not found at ${this.wasmPath}`); return false; } catch (error) { console.error("Error checking SherpaOnnx WebAssembly credentials:", error); return false; } } /** * Get available voices * @returns Promise resolving to an array of unified voice objects */ async _getVoices() { try { if (this.modelRepository) { console.log("Loading voices from model repository"); await this.modelRepository.loadModelsIndex(); const models = this.modelRepository.getAvailableModels(); if (models.length > 0) { return models.map((model) => { const langCode = model.language || "en"; const iso = langCode.split("-")[0] || "en"; return { id: model.id, name: model.name, gender: (model.gender || "Unknown"), provider: "sherpaonnx-wasm", languageCodes: [ { bcp47: langCode, iso639_3: iso, display: langCode, }, ], }; }); } } console.warn("Model repository unavailable or empty; falling back to default voice"); return [ { id: "sherpa_en", name: "Sherpa English", gender: "Unknown", provider: "sherpaonnx-wasm", languageCodes: [ { bcp47: "en-US", iso639_3: "eng", display: "English (US)", }, ], }, ]; } catch (error) { console.error("Error getting SherpaOnnx WebAssembly voices:", error); return []; } } /** * Initialize the WebAssembly module * @param wasmUrl URL to the WebAssembly file * @returns Promise resolving when the module is initialized */ async initializeWasm(wasmUrl) { if (this.wasmLoaded) { return; } try { // In browser environments, load the WebAssembly module if (environment_1.isBrowser) { const requestedUrl = wasmUrl || this.wasmPath || this.wasmBaseUrl || "./sherpaonnx.js"; if (!wasmUrl && !this.wasmPath && !this.wasmBaseUrl) { console.warn("No WebAssembly URL provided for browser environment; defaulting to ./sherpaonnx.js"); } console.log("Loading WebAssembly module from", requestedUrl); console.log(`Current state: wasmLoaded=${this.wasmLoaded}, wasmModule=${!!this.wasmModule}`); try { // Auto-load JS glue and WASM if not present const w = window; let baseUrl = this.wasmBaseUrl; let scriptUrl; const provided = wasmUrl || this.wasmPath || this.wasmBaseUrl || ""; if (provided) { if (/\.js($|\?)/.test(provided)) { scriptUrl = provided; if (!baseUrl) { const idx = provided.lastIndexOf("/"); if (idx > -1) baseUrl = provided.slice(0, idx); } } else { baseUrl = provided; } } if (!scriptUrl && baseUrl) { const b = baseUrl.replace(/\/$/, ""); // Default glue filename (can be overridden by passing full wasmPath) scriptUrl = `${b}/sherpaonnx.js`; } if (!scriptUrl) { console.warn("No WASM script URL provided; attempting default ./sherpaonnx.js"); } const resolvedScriptUrl = scriptUrl ?? "./sherpaonnx.js"; // Persist the resolved script URL this.wasmPath = resolvedScriptUrl; console.log("Resolved wasmPath to:", this.wasmPath); // Ensure Module.locateFile points to the base for .wasm w.Module = w.Module || {}; if (baseUrl) { const b = baseUrl.replace(/\/$/, ""); w.Module.locateFile = (p) => `${b}/${p}`; } const deriveBaseFromScript = () => { const lastSlash = resolvedScriptUrl.lastIndexOf("/"); return lastSlash >= 0 ? resolvedScriptUrl.slice(0, lastSlash) : "."; }; const normalizedBase = (baseUrl ?? deriveBaseFromScript()).replace(/\/$/, ""); // Determine if we're using the wrapper glue (sherpa-onnx-tts.js) const isWrapper = /sherpa-onnx-tts\.js($|\?)/.test(resolvedScriptUrl); const mainGlueUrl = `${normalizedBase}/sherpa-onnx-wasm-main-tts.js`; // If a compatible module is already present, don't inject again const moduleReady = () => { const hasModule = typeof w.Module !== "undefined"; const hasCreate = typeof w.createOfflineTts === "function"; const hasOffline = !!(hasModule && w.Module && (w.Module.OfflineTts || w.Module.calledRun)); const hasUtf8 = !!(hasModule && typeof w.Module.lengthBytesUTF8 === "function"); const hasMalloc = !!(hasModule && typeof w.Module._malloc === "function"); const hasRun = !!(hasModule && w.Module && w.Module.calledRun === true); // Wrapper requires full runtime ready: createOfflineTts + lengthBytesUTF8 + _malloc + calledRun return (hasModule && (isWrapper ? hasCreate && hasUtf8 && hasMalloc && hasRun : hasCreate || hasOffline || hasMalloc)); }; if (!moduleReady()) { if (isWrapper) { // Ensure main Emscripten glue is loaded first const existingMain = document.querySelector('script[data-sherpa-main-glue="true"]'); if (!existingMain) { await new Promise((resolve, reject) => { const sMain = document.createElement("script"); sMain.setAttribute("data-sherpa-main-glue", "true"); sMain.src = mainGlueUrl; sMain.async = true; sMain.onload = () => resolve(); sMain.onerror = () => reject(new Error(`Failed to load SherpaONNX main glue: ${mainGlueUrl}`)); document.head.appendChild(sMain); }); } // Then load the wrapper glue that exposes createOfflineTts const existingWrapper = document.querySelector('script[data-sherpa-wrapper-glue="true"]'); if (!existingWrapper) { await new Promise((resolve, reject) => { const sWrap = document.createElement("script"); sWrap.setAttribute("data-sherpa-wrapper-glue", "true"); sWrap.src = resolvedScriptUrl; sWrap.async = true; sWrap.onload = () => resolve(); sWrap.onerror = () => reject(new Error(`Failed to load SherpaONNX wrapper glue: ${resolvedScriptUrl}`)); document.head.appendChild(sWrap); }); } } else { // Single-file glue path const existing = document.querySelector('script[data-sherpa-glue="true"]'); if (!existing) { await new Promise((resolve, reject) => { const s = document.createElement("script"); s.setAttribute("data-sherpa-glue", "true"); s.src = resolvedScriptUrl; s.async = true; s.onload = () => resolve(); s.onerror = () => reject(new Error(`Failed to load SherpaONNX glue: ${resolvedScriptUrl}`)); document.head.appendChild(s); }); } } } // Wait for glue + Module to be ready. For wrapper, require createOfflineTts and Module.lengthBytesUTF8 await new Promise((resolve, reject) => { const giveUpAt = Date.now() + 25000; // 25s const checkReady = () => { const hasModule = typeof w.Module !== "undefined"; const hasCreate = typeof w.createOfflineTts === "function"; const hasOffline = !!(hasModule && w.Module && (w.Module.OfflineTts || w.Module.calledRun)); const hasUtf8 = !!(hasModule && typeof w.Module.lengthBytesUTF8 === "function"); const hasMalloc = !!(hasModule && typeof w.Module._malloc === "function"); const hasRun = !!(hasModule && w.Module && w.Module.calledRun === true); const ready = hasModule && (isWrapper ? hasCreate && hasUtf8 && hasMalloc && hasRun : hasCreate || hasOffline || hasMalloc); if (ready) { resolve(); return; } if (Date.now() > giveUpAt) { reject(new Error("Timed out waiting for SherpaONNX WASM to initialize")); } else { setTimeout(checkReady, 200); } }; checkReady(); }); // Now that we know Module is available, store it console.log("Storing Module (and createOfflineTts if present)"); this.wasmModule = window.Module; this.wasmLoaded = true; // Store the createOfflineTts function reference for convenience if present if (this.wasmModule && !this.wasmModule.createOfflineTts && typeof window.createOfflineTts === "function") { this.wasmModule.createOfflineTts = window.createOfflineTts; } // Initialize multi-model support if enabled if (this.enhancedOptions.enableMultiModel && this.modelRepository) { console.log("Initializing enhanced multi-model support..."); try { // Load models index await this.modelRepository.loadModelsIndex(); // Initialize model manager if (this.wasmModule) { const maxCached = this.enhancedOptions.maxCachedModels ?? 3; this.modelManager = new WasmModelManager(this.wasmModule, maxCached); } console.log("Enhanced multi-model support initialized successfully"); } catch (error) { console.error("Error initializing multi-model support:", error); console.log("Falling back to legacy single-model mode"); this.enhancedOptions.enableMultiModel = false; } } console.log("WebAssembly module initialized successfully"); } catch (error) { console.error("Error initializing WebAssembly:", error); this.wasmLoaded = false; } } else { // In Node.js, we can't directly use WebAssembly in the same way console.warn("WebAssembly loading not implemented for Node.js environments."); this.wasmLoaded = false; } } catch (error) { console.error("Error initializing WebAssembly:", error); this.wasmLoaded = false; } console.log("End of initializeWasm method. wasmLoaded:", this.wasmLoaded, "wasmModule:", !!this.wasmModule); console.log("createOfflineTts available at end of initializeWasm:", typeof window.createOfflineTts === "function"); console.log("window.Module available at end of initializeWasm:", typeof window.Module !== "undefined"); if (typeof window.Module !== "undefined") { console.log("window.Module.calledRun at end of initializeWasm:", window.Module.calledRun); } } /** * Synthesize text to speech and return the audio as a byte array * @param text Text to synthesize * @param options Options for synthesis * @returns Promise resolving to a byte array of audio data */ async synthToBytes(text, _options) { // Prepare text for synthesis (handle Speech Markdown and SSML) let processedText = text; // Convert from Speech Markdown if requested if (_options?.useSpeechMarkdown && SpeechMarkdown.isSpeechMarkdown(processedText)) { // Convert to SSML first, then strip SSML tags since SherpaOnnx doesn't support SSML // Use "w3c" platform for generic SSML (will be stripped anyway) const ssml = await SpeechMarkdown.toSSML(processedText, "w3c"); processedText = SSMLUtils.stripSSML(ssml); } // If text is SSML, strip the tags as SherpaOnnx doesn't support SSML if (SSMLUtils.isSSML(processedText)) { processedText = SSMLUtils.stripSSML(processedText); } console.log("synthToBytes called with text:", processedText); // Ensure runtime is initialized before attempting synthesis if (environment_1.isBrowser) { const status = this.getInitializationStatus(); if (!status.isInitialized) { await this.initializeWasm(this.wasmPath || this.wasmBaseUrl || ""); } } // Ensure the selected model files are mounted before synthesis if (this.wasmModule && this.modelRepository) { try { const FS = this.wasmModule.FS; const needModel = (() => { try { return !FS.lookupPath("/model.onnx", { follow: true }); } catch { return true; } })(); const needTokens = (() => { try { return !FS.lookupPath("/tokens.txt", { follow: true }); } catch { return true; } })(); const needVoices = (() => { try { return !FS.lookupPath("/voices.bin", { follow: true }); } catch { return true; } })(); const needVocoder = (() => { try { return !FS.lookupPath("/vocoder.onnx", { follow: true }); } catch { return true; } })(); if (needModel || needTokens || needVoices || needVocoder) { // Decide which voice to mount let targetVoice = this.currentVoiceId || this.voiceId; if (!targetVoice && this.modelRepository) { const models = this.modelRepository.getAvailableModels(); // Prefer MMS English to avoid CORS issues (then any MMS, then any English, else first) const isEn = (x) => (x.language || "").toLowerCase().startsWith("en"); const preferred = models.find((m) => m.type === "mms" && isEn(m)) || models.find((m) => m.type === "mms") || models.find((m) => isEn(m)) || models[0]; targetVoice = preferred?.id; } if (!targetVoice) { throw new Error("No voice selected and no models available to mount"); } console.log("Model files not present; mounting for voice", targetVoice); await this.setVoice(targetVoice); } } catch (e) { console.warn("Could not verify/mount model files before synthesis:", e); } } // Enhanced multi-model synthesis path (if the enhanced WASM exports are available) if (this.enhancedOptions.enableMultiModel && this.wasmModule && this.currentVoiceId) { console.log(`Using enhanced multi-model synthesis for voice ${this.currentVoiceId}`); try { if (!this.wasmModule._GenerateAudio) { throw new Error("Enhanced WASM module not loaded - _GenerateAudio not available"); } // Generate audio using the enhanced WASM interface const result = this.wasmModule._GenerateAudio(processedText, 0, 1.0); // text, speaker_id, speed if (!result || !result.samples) { throw new Error("Failed to generate audio with enhanced interface"); } console.log(`Enhanced synthesis generated ${result.samples.length} samples at ${result.sampleRate}Hz`); // Update sample rate if provided if (result.sampleRate) { this.sampleRate = result.sampleRate; } // Convert to WAV format return this._convertAudioFormat(result.samples); } catch (error) { console.error("Error with enhanced multi-model synthesis:", error); console.log("Falling back to legacy synthesis mode"); // Fall through to legacy mode } } // Legacy synthesis mode (backward compatibility) console.log("Using legacy synthesis mode"); // IMPORTANT: We need to access the global window object directly // This is because our code is bundled and the window object might not be accessible in the same way const globalWindow = typeof window !== "undefined" ? window : typeof global !== "undefined" ? global : {}; console.log("Global window type:", typeof globalWindow); // Check if we're in a browser environment if (typeof globalWindow !== "undefined" && typeof document !== "undefined") { console.log("Browser environment detected"); // Check if createOfflineTts is available in the global scope const createOfflineTtsFn = globalWindow.createOfflineTts; // Prefer the stored module instance captured during readiness const moduleObj = (this.wasmModule || globalWindow.Module); console.log("createOfflineTts available in global scope:", typeof createOfflineTtsFn === "function"); console.log("Module available (stored or global):", !!moduleObj); console.log("Module._malloc exists:", typeof moduleObj?._malloc === "function"); // Try to use the createOfflineTts function directly when we have a real module instance if (typeof createOfflineTtsFn === "function" && moduleObj && typeof moduleObj._malloc === "function") { console.log("Using global createOfflineTts function directly"); try { // Ensure model files are mounted for legacy path too if (this.currentVoiceId && this.modelRepository) { try { const FS = this.wasmModule.FS; const needModel = (() => { try { return !FS.lookupPath("/model.onnx", { follow: true }); } catch { return true; } })(); const needTokens = (() => { try { return !FS.lookupPath("/tokens.txt", { follow: true }); } catch { return true; } })(); if (needModel || needTokens) { console.log("Legacy path: mounting model files for voice", this.currentVoiceId); await this.setVoice(this.currentVoiceId); } } catch { } } // Create a new TTS instance directly console.log("About to call createOfflineTts..."); const offlineConfig = this.buildOfflineTtsConfig(); const directTts = createOfflineTtsFn(moduleObj, offlineConfig); console.log("createOfflineTts call successful, tts object:", directTts); console.log("TTS initialized with default configuration"); console.log(`Sample rate: ${directTts?.sampleRate}`); console.log(`Number of speakers: ${directTts?.numSpeakers}`); // Update the sample rate from the TTS engine if (directTts && typeof directTts.sampleRate === "number") { this.sampleRate = directTts.sampleRate; console.log(`Updated sample rate to ${this.sampleRate}`); } else { console.warn("Could not update sample rate, using default"); } // Generate audio console.log("Generating audio directly..."); const result = directTts.generate({ text: processedText, sid: 0, speed: 1.0 }); console.log("Audio generated directly:", result); console.log(`Generated ${result?.samples?.length} samples at ${result?.sampleRate}Hz`); // Convert to WAV const audioBytes = this._convertAudioFormat(result.samples); console.log("Converted audio to WAV format, returning bytes"); return audioBytes; } catch (directError) { console.error("Error using direct approach:", directError); console.log("Falling back to standard approach"); } } else { console.log("Direct approach not available, reason:"); if (typeof createOfflineTtsFn !== "function") console.log("- createOfflineTts is not a function"); if (typeof moduleObj === "undefined") console.log("- Module is undefined"); if (moduleObj && !moduleObj.calledRun) console.log("- Module.calledRun is false"); } } else { console.log("Not in a browser environment, skipping direct approach"); } // If direct approach failed or not available, try the standard approach console.log("Using standard approach"); console.log("Current state - wasmLoaded:", this.wasmLoaded, "wasmModule:", !!this.wasmModule); console.log("createOfflineTts available:", typeof globalWindow.createOfflineTts === "function"); // Check if SherpaOnnx is properly initialized const status = this.getInitializationStatus(); if (!status.isInitialized) { const errorMessage = this.getInitializationErrorMessage(); console.error(errorMessage); throw new Error(errorMessage); } try { // Use the SherpaOnnx WebAssembly API to generate audio console.log("Using SherpaOnnx WebAssembly to generate audio"); // Create a TTS instance if it doesn't exist if (!this.tts) { console.log("Creating TTS instance"); try { // Create the TTS instance if (typeof window.createOfflineTts === "function") { // Using the sherpa-onnx-tts.js API console.log("Using createOfflineTts API from global scope"); console.log("createOfflineTts:", window.createOfflineTts); console.log("Module:", window.Module); try { const offlineConfig = this.buildOfflineTtsConfig(); console.log("About to call createOfflineTts with derived config"); this.tts = window.createOfflineTts(window.Module, offlineConfig); console.log("createOfflineTts call successful, tts object:", this.tts); console.log("TTS initialized"); console.log(`Sample rate: ${this.tts?.sampleRate}`); console.log(`Number of speakers: ${this.tts?.numSpeakers}`); // Update the sample rate from the TTS engine if (this.tts && typeof this.tts.sampleRate === "number") { this.sampleRate = this.tts.sampleRate; console.log(`Updated sample rate to ${this.sampleRate}`); } else { console.warn("Could not update sample rate, using default"); } } catch (error) { console.error("Error creating TTS instance with createOfflineTts:", error); throw error; } } else if (this.wasmModule?.OfflineTts) { // Using the Module.OfflineTts API console.log("Using Module.OfflineTts API"); this.tts = new this.wasmModule.OfflineTts(); } else { throw new Error("No compatible TTS API found"); } console.log("TTS instance created successfully"); } catch (error) { console.error("Error creating TTS instance:", error); throw new Error(`Failed to create SherpaOnnx TTS instance: ${error instanceof Error ? error.message : String(error)}`); } } // Generate the audio console.log("Generating audio for text:", text); let samples; if (typeof this.tts.generate === "function") { // Using the generate method from sherpa-onnx-tts.js console.log("Using generate method"); console.log("this.tts.generate:", this.tts.generate); try { console.log("Calling generate with:", { text: processedText, sid: 0, speed: 1.0 }); const result = this.tts.generate({ text: processedText, sid: 0, speed: 1.0 }); console.log("Generate call successful, result:", result); samples = result.samples; console.log(`Generated audio with sample rate: ${result.sampleRate} and samples: ${samples.length}`); } catch (error) { console.error("Error calling generate:", error); throw error; } } else if (typeof this.tts.generateWithText === "function") { // Using the generateWithText method console.log("Using generateWithText method"); console.log("this.tts.generateWithText:", this.tts.generateWithText); try { console.log("Calling generateWithText with:", processedText); samples = this.tts.generateWithText(processedText); console.log(`Generated audio with samples: ${samples.length}`); } catch (error) { console.error("Error calling generateWithText:", error); throw error; } } else { console.error("No compatible generate method found"); console.log("Available methods on this.tts:", Object.keys(this.tts).filter((key) => typeof this.tts[key] === "function")); throw new Error("No compatible generate method found"); } console.log("Audio generated successfully, samples:", samples.length); // Convert the samples to the requested format const audioBytes = this._convertAudioFormat(samples); return audioBytes; } catch (error) { console.error("Error synthesizing text:", error); throw new Error(`SherpaOnnx synthesis failed: ${error instanceof Error ? error.message : String(error)}`); } } /** * Convert audio samples to the requested format * @param samples Float32Array of audio samples * @returns Uint8Array of audio data in the requested format */ _convertAudioFormat(samples) { // For now, we'll just return a WAV file // In a real implementation, we would use a library like audioEncoder // to convert to the requested format // Convert Float32Array to Int16Array const int16Samples = new Int16Array(samples.length); for (let i = 0; i < samples.length; i++) { // Scale to 16-bit range and clamp const sample = Math.max(-1, Math.min(1, samples[i])); int16Samples[i] = Math.floor(sample * 32767); } // Create a WAV file header const wavHeader = new ArrayBuffer(44); const view = new DataView(wavHeader); // "RIFF" chunk descriptor view.setUint8(0, "R".charCodeAt(0)); view.setUint8(1, "I".charCodeAt(0)); view.setUint8(2, "F".charCodeAt(0)); view.setUint8(3, "F".charCodeAt(0)); // Chunk size (file size - 8) view.setUint32(4, 36 + int16Samples.length * 2, true); // Format ("WAVE") view.setUint8(8, "W".charCodeAt(0)); view.setUint8(9, "A".charCodeAt(0)); view.setUint8(10, "V".charCodeAt(0)); view.setUint8(11, "E".charCodeAt(0)); // "fmt " sub-chunk view.setUint8(12, "f".charCodeAt(0)); view.setUint8(13, "m".charCodeAt(0)); view.setUint8(14, "t".charCodeAt(0)); view.setUint8(15, " ".charCodeAt(0)); // Sub-chunk size (16 for PCM) view.setUint32(16, 16, true); // Audio format (1 for PCM) view.setUint16(20, 1, true); // Number of channels (1 for mono) view.setUint16(22, 1, true); // Sample rate view.setUint32(24, this.sampleRate, true); // Byte rate (sample rate * channels * bytes per sample) view.setUint32(28, this.sampleRate * 1 * 2, true); // Block align (channels * bytes per sample) view.setUint16(32, 1 * 2, true); // Bits per sample view.setUint16(34, 16, true); // "data" sub-chunk view.setUint8(36, "d".charCodeAt(0)); view.setUint8(37, "a".charCodeAt(0)); view.setUint8(38, "t".charCodeAt(0)); view.setUint8(39, "a".charCodeAt(0)); // Sub-chunk size (number of samples * channels * bytes per sample) view.setUint32(40, int16Samples.length * 1 * 2, true); // Combine the header and the samples const wavBytes = new Uint8Array(wavHeader.byteLength + int16Samples.length * 2); wavBytes.set(new Uint8Array(wavHeader), 0); // Convert Int16Array to Uint8Array const samplesBytes = new Uint8Array(int16Samples.buffer); wavBytes.set(samplesBytes, wavHeader.byteLength); return wavBytes; } /** * Check if SherpaOnnx is properly initialized * @returns Object with initialization status and details */ getInitializationStatus() { const globalWindow = (typeof window !== "undefined" ? window : global); const issues = []; const hasModule = !!this.wasmModule; const winMod = (globalWindow && globalWindow.Module) || null; const hasGlobalModule = !!winMod; const hasCreate = typeof globalWindow.createOfflineTts === "function"; const hasOffline = !!(winMod && (winMod.OfflineTts || winMod.calledRun)); if (!this.wasmLoaded) { issues.push("WebAssembly module not loaded"); } if (!hasModule && !hasGlobalModule) { issues.push("WebAssembly module is null"); } if (!hasCreate && !hasOffline) { issues.push("No SherpaONNX TTS API found (neither createOfflineTts nor Module.OfflineTts)"); } const ready = this.wasmLoaded && (hasModule || hasGlobalModule) && (hasCreate || hasOffline); return { isInitialized: ready && issues.length === 0 ? true : ready, // consider ready if runtime is present wasmLoaded: this.wasmLoaded, wasmModule: !!(hasModule || hasGlobalModule), createOfflineTts: hasCreate, issues, }; } /** * Get detailed error message for initialization issues * @returns Detailed error message with troubleshooting steps */ getInitializationErrorMessage() { const status = this.getInitializationStatus(); let message = "SherpaOnnx WebAssembly TTS is not properly initialized.\n\n"; message += "Issues found:\n"; for (const issue of status.issues) { message += `- ${issue}\n`; } message += "\nTroubleshooting steps:\n"; message += "1. Ensure the SherpaOnnx WebAssembly files are properly loaded\n"; message += "2. Check that the WebAssembly module initialization completed successfully\n"; message += "3. Verify that createOfflineTts function is available in the global scope\n"; message += "4. Check browser console for WebAssembly loading errors\n"; message += "5. Ensure you're running in a supported environment (browser with WebAssembly support)\n"; return message; } /** * Synthesize text to speech and stream the audio * @param text Text to synthesize * @param onAudioBuffer Callback for audio buffers * @param onStart Callback for when synthesis starts * @param onEnd Callback for when synthesis ends * @param onWord Callback for word boundary events * @param options Options for synthesis * @returns Promise resolving when synthesis is complete */ async synthToStream(text, onAudioBuffer, onStart, onEnd, onWord, options) { try { // Call onStart callback if (onStart) { onStart(); } // Synthesize the entire audio const audioBytes = await this.synthToBytes(text, options); // Estimate word boundaries if (onWord) { const wordBoundaries = (0, word_timing_estimator_1.estimateWordBoundaries)(text); // Schedule word boundary events for (const boundary of wordBoundaries) { setTimeout(() => { onWord(boundary.word, boundary.start, boundary.end); }, boundary.start * 1000); } } // Send the audio buffer onAudioBuffer(audioBytes); // Call onEnd callback if (onEnd) { onEnd(); } } catch (error) { console.error("Error synthesizing text to stream:", error); // Call onEnd callback even if there's an error if (onEnd) { onEnd(); } // Re-throw the error so it can be caught by the caller throw error; } } /** * Synthesize text to speech and save to a file * @param text Text to synthesize * @param filename Filename to save as * @param format Audio format (mp3 or wav) * @param options Options for synthesis * @returns Promise resolving when synthesis is complete */ async synthToFile(text, filename, format = "wav", // Override base class to only allow 'wav' options // Use specific options type ) { try { let outputFormat = format; // Sherpa-ONNX only supports WAV output if (outputFormat !== "wav") { console.warn("SherpaOnnx WebAssembly TTS only supports WAV output. Using WAV instead of", outputFormat); outputFormat = "wav"; } // Use the base class's file saving logic (which detects Node/Browser) await super.synthToFile(text, filename, outputFormat, options); } catch (error) { console.error("Error synthesizing text to file:", error); throw error; } } /** * Get a property value * @param property Property name * @returns Property value */ getProperty(property) { switch (property) { case "voice": return this.currentVoiceId || this.voiceId || undefined; case "sampleRate": return this.sampleRate; case "wasmLoaded": return this.wasmLoaded; case "wasmPath": return this.wasmPath; case "wasmBaseUrl": return this.wasmBaseUrl; case "mergedModelsUrl": return this.mergedModelsUrl; case "multiModelEnabled": return this.enhancedOptions.enableMultiModel; case "maxCachedModels": return this.enhancedOptions.maxCachedModels; case "loadedModels": return this.modelManager?.getLoadedModelIds() ?? []; case "currentModel": return this.modelManager?.getCurrentModel(); case "availableModels": return this.modelRepository?.getAvailableModels() || []; default: return super.getProperty(property); } } /** * Set a property value * @param property Property name * @param value Property value */ setProperty(property, value) { switch (property) { case "voice": this.setVoice(value); break; case "wasmPath": this.wasmPath = value; break; case "wasmBaseUrl": this.wasmBaseUrl = value; break; case "mergedModelsUrl": this.mergedModelsUrl = value; if (this.modelRepository) { // Recreate repository with new URL on the fly this.modelRepository = new ModelRepository(this.mergedModelsUrl); } break; default: super.setProperty(property, value); break; } } /** * Build the OfflineTts configuration object expected by sherpa-onnx-tts.js. * Uses the currently selected voice metadata to decide which model block * (vits/ko