js-tts-wrapper
Version:
A JavaScript/TypeScript library that provides a unified API for working with multiple cloud-based Text-to-Speech (TTS) services
1,031 lines • 79.9 kB
JavaScript
"use strict";
/**
* SherpaOnnx WebAssembly TTS Client
*
* Enhanced version with multi-model support for browser environments.
* Supports dynamic loading of Kokoro, Matcha, and VITS models.
*
* BACKWARD COMPATIBILITY: Maintains full compatibility with existing API.
* New multi-model features are opt-in via constructor options.
*/
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || (function () {
var ownKeys = function(o) {
ownKeys = Object.getOwnPropertyNames || function (o) {
var ar = [];
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
return ar;
};
return ownKeys(o);
};
return function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
__setModuleDefault(result, mod);
return result;
};
})();
Object.defineProperty(exports, "__esModule", { value: true });
exports.SherpaOnnxWasmTTSClient = void 0;
const abstract_tts_1 = require("../core/abstract-tts");
const SSMLUtils = __importStar(require("../core/ssml-utils"));
const SpeechMarkdown = __importStar(require("../markdown/converter"));
const bzip2_1 = require("../utils/bzip2");
const environment_1 = require("../utils/environment");
const word_timing_estimator_1 = require("../utils/word-timing-estimator");
/**
* Enhanced SherpaOnnx WebAssembly TTS Client
*
* Supports both legacy single-model mode and new multi-model mode.
* Maintains full backward compatibility with existing API.
*/
class SherpaOnnxWasmTTSClient extends abstract_tts_1.AbstractTTSClient {
/**
* Create a new SherpaOnnx WebAssembly TTS client
* @param credentials Optional credentials object
* @param enhancedOptions Optional enhanced options for multi-model support
*/
constructor(credentials = {}, enhancedOptions = {}) {
super(credentials);
Object.defineProperty(this, "wasmModule", {
enumerable: true,
configurable: true,
writable: true,
value: null
});
Object.defineProperty(this, "tts", {
enumerable: true,
configurable: true,
writable: true,
value: null
});
Object.defineProperty(this, "wasmPath", {
enumerable: true,
configurable: true,
writable: true,
value: ""
});
Object.defineProperty(this, "wasmLoaded", {
enumerable: true,
configurable: true,
writable: true,
value: false
});
Object.defineProperty(this, "wasmBaseUrl", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "mergedModelsUrl", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "modelsMirrorUrl", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
// Enhanced multi-model support
Object.defineProperty(this, "enhancedOptions", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "modelRepository", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "modelManager", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "currentVoiceId", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "currentVoiceConfig", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
this._models = [{ id: "sherpaonnx-wasm", features: ["open-source"] }];
// Capabilities: Browser-only engine, requires WASM runtime
this.capabilities = { browserSupported: true, nodeSupported: false, needsWasm: true };
// Set default sample rate for the Piper model
this.sampleRate = 22050;
// Optional configuration from credentials
this.wasmPath = credentials.wasmPath || ""; // JS glue path (if provided)
this.wasmBaseUrl = credentials.wasmBaseUrl || undefined; // Base URL for glue+wasm
this.mergedModelsUrl =
credentials.mergedModelsUrl || credentials.modelsUrl || undefined;
this.modelsMirrorUrl =
credentials.modelsMirrorUrl || enhancedOptions?.modelsMirrorBaseUrl || undefined;
// Enhanced options with defaults for backward compatibility
this.enhancedOptions = {
enableMultiModel: false, // Disabled by default for backward compatibility
maxCachedModels: 3,
modelsMirrorBaseUrl: this.modelsMirrorUrl,
...enhancedOptions,
};
// Initialize multi-model components if enabled
this.modelRepository = new ModelRepository(this.mergedModelsUrl);
}
/**
* Get the list of required credential types for this engine
* @returns Array of required credential field names
*/
getRequiredCredentials() {
return []; // SherpaOnnx WASM doesn't require credentials, only WASM files
}
/**
* Check if the credentials are valid
* @returns Promise resolving to true if credentials are valid
*/
async checkCredentials() {
try {
// First check if SherpaOnnx is properly initialized
const status = this.getInitializationStatus();
if (status.isInitialized) {
return true;
}
// In a browser environment, we can't check if the WASM file exists
// so we'll check if it's likely to be loaded later
if (typeof window !== "undefined") {
if (status.issues.length > 0) {
console.warn("SherpaOnnx not yet initialized:", status.issues.join(", "));
}
return true; // Assume it will be loaded later in browser
}
// In Node.js, check if the WASM file exists
if (environment_1.isNode && this.wasmPath && environment_1.fileSystem.existsSync(this.wasmPath)) {
if (status.issues.length > 0) {
console.warn("SherpaOnnx WASM file exists but not initialized:", status.issues.join(", "));
}
return true;
}
// If no WASM path is provided, assume it will be loaded later
if (!this.wasmPath) {
console.warn("No WASM path provided. SherpaOnnx WebAssembly TTS will need to be initialized manually.");
return true;
}
console.warn(`WASM file not found at ${this.wasmPath}`);
return false;
}
catch (error) {
console.error("Error checking SherpaOnnx WebAssembly credentials:", error);
return false;
}
}
/**
* Get available voices
* @returns Promise resolving to an array of unified voice objects
*/
async _getVoices() {
try {
if (this.modelRepository) {
console.log("Loading voices from model repository");
await this.modelRepository.loadModelsIndex();
const models = this.modelRepository.getAvailableModels();
if (models.length > 0) {
return models.map((model) => {
const langCode = model.language || "en";
const iso = langCode.split("-")[0] || "en";
return {
id: model.id,
name: model.name,
gender: (model.gender || "Unknown"),
provider: "sherpaonnx-wasm",
languageCodes: [
{
bcp47: langCode,
iso639_3: iso,
display: langCode,
},
],
};
});
}
}
console.warn("Model repository unavailable or empty; falling back to default voice");
return [
{
id: "sherpa_en",
name: "Sherpa English",
gender: "Unknown",
provider: "sherpaonnx-wasm",
languageCodes: [
{
bcp47: "en-US",
iso639_3: "eng",
display: "English (US)",
},
],
},
];
}
catch (error) {
console.error("Error getting SherpaOnnx WebAssembly voices:", error);
return [];
}
}
/**
* Initialize the WebAssembly module
* @param wasmUrl URL to the WebAssembly file
* @returns Promise resolving when the module is initialized
*/
async initializeWasm(wasmUrl) {
if (this.wasmLoaded) {
return;
}
try {
// In browser environments, load the WebAssembly module
if (environment_1.isBrowser) {
const requestedUrl = wasmUrl || this.wasmPath || this.wasmBaseUrl || "./sherpaonnx.js";
if (!wasmUrl && !this.wasmPath && !this.wasmBaseUrl) {
console.warn("No WebAssembly URL provided for browser environment; defaulting to ./sherpaonnx.js");
}
console.log("Loading WebAssembly module from", requestedUrl);
console.log(`Current state: wasmLoaded=${this.wasmLoaded}, wasmModule=${!!this.wasmModule}`);
try {
// Auto-load JS glue and WASM if not present
const w = window;
let baseUrl = this.wasmBaseUrl;
let scriptUrl;
const provided = wasmUrl || this.wasmPath || this.wasmBaseUrl || "";
if (provided) {
if (/\.js($|\?)/.test(provided)) {
scriptUrl = provided;
if (!baseUrl) {
const idx = provided.lastIndexOf("/");
if (idx > -1)
baseUrl = provided.slice(0, idx);
}
}
else {
baseUrl = provided;
}
}
if (!scriptUrl && baseUrl) {
const b = baseUrl.replace(/\/$/, "");
// Default glue filename (can be overridden by passing full wasmPath)
scriptUrl = `${b}/sherpaonnx.js`;
}
if (!scriptUrl) {
console.warn("No WASM script URL provided; attempting default ./sherpaonnx.js");
}
const resolvedScriptUrl = scriptUrl ?? "./sherpaonnx.js";
// Persist the resolved script URL
this.wasmPath = resolvedScriptUrl;
console.log("Resolved wasmPath to:", this.wasmPath);
// Ensure Module.locateFile points to the base for .wasm
w.Module = w.Module || {};
if (baseUrl) {
const b = baseUrl.replace(/\/$/, "");
w.Module.locateFile = (p) => `${b}/${p}`;
}
const deriveBaseFromScript = () => {
const lastSlash = resolvedScriptUrl.lastIndexOf("/");
return lastSlash >= 0 ? resolvedScriptUrl.slice(0, lastSlash) : ".";
};
const normalizedBase = (baseUrl ?? deriveBaseFromScript()).replace(/\/$/, "");
// Determine if we're using the wrapper glue (sherpa-onnx-tts.js)
const isWrapper = /sherpa-onnx-tts\.js($|\?)/.test(resolvedScriptUrl);
const mainGlueUrl = `${normalizedBase}/sherpa-onnx-wasm-main-tts.js`;
// If a compatible module is already present, don't inject again
const moduleReady = () => {
const hasModule = typeof w.Module !== "undefined";
const hasCreate = typeof w.createOfflineTts === "function";
const hasOffline = !!(hasModule &&
w.Module &&
(w.Module.OfflineTts || w.Module.calledRun));
const hasUtf8 = !!(hasModule && typeof w.Module.lengthBytesUTF8 === "function");
const hasMalloc = !!(hasModule && typeof w.Module._malloc === "function");
const hasRun = !!(hasModule && w.Module && w.Module.calledRun === true);
// Wrapper requires full runtime ready: createOfflineTts + lengthBytesUTF8 + _malloc + calledRun
return (hasModule &&
(isWrapper
? hasCreate && hasUtf8 && hasMalloc && hasRun
: hasCreate || hasOffline || hasMalloc));
};
if (!moduleReady()) {
if (isWrapper) {
// Ensure main Emscripten glue is loaded first
const existingMain = document.querySelector('script[data-sherpa-main-glue="true"]');
if (!existingMain) {
await new Promise((resolve, reject) => {
const sMain = document.createElement("script");
sMain.setAttribute("data-sherpa-main-glue", "true");
sMain.src = mainGlueUrl;
sMain.async = true;
sMain.onload = () => resolve();
sMain.onerror = () => reject(new Error(`Failed to load SherpaONNX main glue: ${mainGlueUrl}`));
document.head.appendChild(sMain);
});
}
// Then load the wrapper glue that exposes createOfflineTts
const existingWrapper = document.querySelector('script[data-sherpa-wrapper-glue="true"]');
if (!existingWrapper) {
await new Promise((resolve, reject) => {
const sWrap = document.createElement("script");
sWrap.setAttribute("data-sherpa-wrapper-glue", "true");
sWrap.src = resolvedScriptUrl;
sWrap.async = true;
sWrap.onload = () => resolve();
sWrap.onerror = () => reject(new Error(`Failed to load SherpaONNX wrapper glue: ${resolvedScriptUrl}`));
document.head.appendChild(sWrap);
});
}
}
else {
// Single-file glue path
const existing = document.querySelector('script[data-sherpa-glue="true"]');
if (!existing) {
await new Promise((resolve, reject) => {
const s = document.createElement("script");
s.setAttribute("data-sherpa-glue", "true");
s.src = resolvedScriptUrl;
s.async = true;
s.onload = () => resolve();
s.onerror = () => reject(new Error(`Failed to load SherpaONNX glue: ${resolvedScriptUrl}`));
document.head.appendChild(s);
});
}
}
}
// Wait for glue + Module to be ready. For wrapper, require createOfflineTts and Module.lengthBytesUTF8
await new Promise((resolve, reject) => {
const giveUpAt = Date.now() + 25000; // 25s
const checkReady = () => {
const hasModule = typeof w.Module !== "undefined";
const hasCreate = typeof w.createOfflineTts === "function";
const hasOffline = !!(hasModule &&
w.Module &&
(w.Module.OfflineTts || w.Module.calledRun));
const hasUtf8 = !!(hasModule && typeof w.Module.lengthBytesUTF8 === "function");
const hasMalloc = !!(hasModule && typeof w.Module._malloc === "function");
const hasRun = !!(hasModule && w.Module && w.Module.calledRun === true);
const ready = hasModule &&
(isWrapper
? hasCreate && hasUtf8 && hasMalloc && hasRun
: hasCreate || hasOffline || hasMalloc);
if (ready) {
resolve();
return;
}
if (Date.now() > giveUpAt) {
reject(new Error("Timed out waiting for SherpaONNX WASM to initialize"));
}
else {
setTimeout(checkReady, 200);
}
};
checkReady();
});
// Now that we know Module is available, store it
console.log("Storing Module (and createOfflineTts if present)");
this.wasmModule = window.Module;
this.wasmLoaded = true;
// Store the createOfflineTts function reference for convenience if present
if (this.wasmModule &&
!this.wasmModule.createOfflineTts &&
typeof window.createOfflineTts === "function") {
this.wasmModule.createOfflineTts = window.createOfflineTts;
}
// Initialize multi-model support if enabled
if (this.enhancedOptions.enableMultiModel && this.modelRepository) {
console.log("Initializing enhanced multi-model support...");
try {
// Load models index
await this.modelRepository.loadModelsIndex();
// Initialize model manager
if (this.wasmModule) {
const maxCached = this.enhancedOptions.maxCachedModels ?? 3;
this.modelManager = new WasmModelManager(this.wasmModule, maxCached);
}
console.log("Enhanced multi-model support initialized successfully");
}
catch (error) {
console.error("Error initializing multi-model support:", error);
console.log("Falling back to legacy single-model mode");
this.enhancedOptions.enableMultiModel = false;
}
}
console.log("WebAssembly module initialized successfully");
}
catch (error) {
console.error("Error initializing WebAssembly:", error);
this.wasmLoaded = false;
}
}
else {
// In Node.js, we can't directly use WebAssembly in the same way
console.warn("WebAssembly loading not implemented for Node.js environments.");
this.wasmLoaded = false;
}
}
catch (error) {
console.error("Error initializing WebAssembly:", error);
this.wasmLoaded = false;
}
console.log("End of initializeWasm method. wasmLoaded:", this.wasmLoaded, "wasmModule:", !!this.wasmModule);
console.log("createOfflineTts available at end of initializeWasm:", typeof window.createOfflineTts === "function");
console.log("window.Module available at end of initializeWasm:", typeof window.Module !== "undefined");
if (typeof window.Module !== "undefined") {
console.log("window.Module.calledRun at end of initializeWasm:", window.Module.calledRun);
}
}
/**
* Synthesize text to speech and return the audio as a byte array
* @param text Text to synthesize
* @param options Options for synthesis
* @returns Promise resolving to a byte array of audio data
*/
async synthToBytes(text, _options) {
// Prepare text for synthesis (handle Speech Markdown and SSML)
let processedText = text;
// Convert from Speech Markdown if requested
if (_options?.useSpeechMarkdown && SpeechMarkdown.isSpeechMarkdown(processedText)) {
// Convert to SSML first, then strip SSML tags since SherpaOnnx doesn't support SSML
// Use "w3c" platform for generic SSML (will be stripped anyway)
const ssml = await SpeechMarkdown.toSSML(processedText, "w3c");
processedText = SSMLUtils.stripSSML(ssml);
}
// If text is SSML, strip the tags as SherpaOnnx doesn't support SSML
if (SSMLUtils.isSSML(processedText)) {
processedText = SSMLUtils.stripSSML(processedText);
}
console.log("synthToBytes called with text:", processedText);
// Ensure runtime is initialized before attempting synthesis
if (environment_1.isBrowser) {
const status = this.getInitializationStatus();
if (!status.isInitialized) {
await this.initializeWasm(this.wasmPath || this.wasmBaseUrl || "");
}
}
// Ensure the selected model files are mounted before synthesis
if (this.wasmModule && this.modelRepository) {
try {
const FS = this.wasmModule.FS;
const needModel = (() => {
try {
return !FS.lookupPath("/model.onnx", { follow: true });
}
catch {
return true;
}
})();
const needTokens = (() => {
try {
return !FS.lookupPath("/tokens.txt", { follow: true });
}
catch {
return true;
}
})();
const needVoices = (() => {
try {
return !FS.lookupPath("/voices.bin", { follow: true });
}
catch {
return true;
}
})();
const needVocoder = (() => {
try {
return !FS.lookupPath("/vocoder.onnx", { follow: true });
}
catch {
return true;
}
})();
if (needModel || needTokens || needVoices || needVocoder) {
// Decide which voice to mount
let targetVoice = this.currentVoiceId || this.voiceId;
if (!targetVoice && this.modelRepository) {
const models = this.modelRepository.getAvailableModels();
// Prefer MMS English to avoid CORS issues (then any MMS, then any English, else first)
const isEn = (x) => (x.language || "").toLowerCase().startsWith("en");
const preferred = models.find((m) => m.type === "mms" && isEn(m)) ||
models.find((m) => m.type === "mms") ||
models.find((m) => isEn(m)) ||
models[0];
targetVoice = preferred?.id;
}
if (!targetVoice) {
throw new Error("No voice selected and no models available to mount");
}
console.log("Model files not present; mounting for voice", targetVoice);
await this.setVoice(targetVoice);
}
}
catch (e) {
console.warn("Could not verify/mount model files before synthesis:", e);
}
}
// Enhanced multi-model synthesis path (if the enhanced WASM exports are available)
if (this.enhancedOptions.enableMultiModel && this.wasmModule && this.currentVoiceId) {
console.log(`Using enhanced multi-model synthesis for voice ${this.currentVoiceId}`);
try {
if (!this.wasmModule._GenerateAudio) {
throw new Error("Enhanced WASM module not loaded - _GenerateAudio not available");
}
// Generate audio using the enhanced WASM interface
const result = this.wasmModule._GenerateAudio(processedText, 0, 1.0); // text, speaker_id, speed
if (!result || !result.samples) {
throw new Error("Failed to generate audio with enhanced interface");
}
console.log(`Enhanced synthesis generated ${result.samples.length} samples at ${result.sampleRate}Hz`);
// Update sample rate if provided
if (result.sampleRate) {
this.sampleRate = result.sampleRate;
}
// Convert to WAV format
return this._convertAudioFormat(result.samples);
}
catch (error) {
console.error("Error with enhanced multi-model synthesis:", error);
console.log("Falling back to legacy synthesis mode");
// Fall through to legacy mode
}
}
// Legacy synthesis mode (backward compatibility)
console.log("Using legacy synthesis mode");
// IMPORTANT: We need to access the global window object directly
// This is because our code is bundled and the window object might not be accessible in the same way
const globalWindow = typeof window !== "undefined" ? window : typeof global !== "undefined" ? global : {};
console.log("Global window type:", typeof globalWindow);
// Check if we're in a browser environment
if (typeof globalWindow !== "undefined" && typeof document !== "undefined") {
console.log("Browser environment detected");
// Check if createOfflineTts is available in the global scope
const createOfflineTtsFn = globalWindow.createOfflineTts;
// Prefer the stored module instance captured during readiness
const moduleObj = (this.wasmModule || globalWindow.Module);
console.log("createOfflineTts available in global scope:", typeof createOfflineTtsFn === "function");
console.log("Module available (stored or global):", !!moduleObj);
console.log("Module._malloc exists:", typeof moduleObj?._malloc === "function");
// Try to use the createOfflineTts function directly when we have a real module instance
if (typeof createOfflineTtsFn === "function" &&
moduleObj &&
typeof moduleObj._malloc === "function") {
console.log("Using global createOfflineTts function directly");
try {
// Ensure model files are mounted for legacy path too
if (this.currentVoiceId && this.modelRepository) {
try {
const FS = this.wasmModule.FS;
const needModel = (() => {
try {
return !FS.lookupPath("/model.onnx", { follow: true });
}
catch {
return true;
}
})();
const needTokens = (() => {
try {
return !FS.lookupPath("/tokens.txt", { follow: true });
}
catch {
return true;
}
})();
if (needModel || needTokens) {
console.log("Legacy path: mounting model files for voice", this.currentVoiceId);
await this.setVoice(this.currentVoiceId);
}
}
catch { }
}
// Create a new TTS instance directly
console.log("About to call createOfflineTts...");
const offlineConfig = this.buildOfflineTtsConfig();
const directTts = createOfflineTtsFn(moduleObj, offlineConfig);
console.log("createOfflineTts call successful, tts object:", directTts);
console.log("TTS initialized with default configuration");
console.log(`Sample rate: ${directTts?.sampleRate}`);
console.log(`Number of speakers: ${directTts?.numSpeakers}`);
// Update the sample rate from the TTS engine
if (directTts && typeof directTts.sampleRate === "number") {
this.sampleRate = directTts.sampleRate;
console.log(`Updated sample rate to ${this.sampleRate}`);
}
else {
console.warn("Could not update sample rate, using default");
}
// Generate audio
console.log("Generating audio directly...");
const result = directTts.generate({ text: processedText, sid: 0, speed: 1.0 });
console.log("Audio generated directly:", result);
console.log(`Generated ${result?.samples?.length} samples at ${result?.sampleRate}Hz`);
// Convert to WAV
const audioBytes = this._convertAudioFormat(result.samples);
console.log("Converted audio to WAV format, returning bytes");
return audioBytes;
}
catch (directError) {
console.error("Error using direct approach:", directError);
console.log("Falling back to standard approach");
}
}
else {
console.log("Direct approach not available, reason:");
if (typeof createOfflineTtsFn !== "function")
console.log("- createOfflineTts is not a function");
if (typeof moduleObj === "undefined")
console.log("- Module is undefined");
if (moduleObj && !moduleObj.calledRun)
console.log("- Module.calledRun is false");
}
}
else {
console.log("Not in a browser environment, skipping direct approach");
}
// If direct approach failed or not available, try the standard approach
console.log("Using standard approach");
console.log("Current state - wasmLoaded:", this.wasmLoaded, "wasmModule:", !!this.wasmModule);
console.log("createOfflineTts available:", typeof globalWindow.createOfflineTts === "function");
// Check if SherpaOnnx is properly initialized
const status = this.getInitializationStatus();
if (!status.isInitialized) {
const errorMessage = this.getInitializationErrorMessage();
console.error(errorMessage);
throw new Error(errorMessage);
}
try {
// Use the SherpaOnnx WebAssembly API to generate audio
console.log("Using SherpaOnnx WebAssembly to generate audio");
// Create a TTS instance if it doesn't exist
if (!this.tts) {
console.log("Creating TTS instance");
try {
// Create the TTS instance
if (typeof window.createOfflineTts === "function") {
// Using the sherpa-onnx-tts.js API
console.log("Using createOfflineTts API from global scope");
console.log("createOfflineTts:", window.createOfflineTts);
console.log("Module:", window.Module);
try {
const offlineConfig = this.buildOfflineTtsConfig();
console.log("About to call createOfflineTts with derived config");
this.tts = window.createOfflineTts(window.Module, offlineConfig);
console.log("createOfflineTts call successful, tts object:", this.tts);
console.log("TTS initialized");
console.log(`Sample rate: ${this.tts?.sampleRate}`);
console.log(`Number of speakers: ${this.tts?.numSpeakers}`);
// Update the sample rate from the TTS engine
if (this.tts && typeof this.tts.sampleRate === "number") {
this.sampleRate = this.tts.sampleRate;
console.log(`Updated sample rate to ${this.sampleRate}`);
}
else {
console.warn("Could not update sample rate, using default");
}
}
catch (error) {
console.error("Error creating TTS instance with createOfflineTts:", error);
throw error;
}
}
else if (this.wasmModule?.OfflineTts) {
// Using the Module.OfflineTts API
console.log("Using Module.OfflineTts API");
this.tts = new this.wasmModule.OfflineTts();
}
else {
throw new Error("No compatible TTS API found");
}
console.log("TTS instance created successfully");
}
catch (error) {
console.error("Error creating TTS instance:", error);
throw new Error(`Failed to create SherpaOnnx TTS instance: ${error instanceof Error ? error.message : String(error)}`);
}
}
// Generate the audio
console.log("Generating audio for text:", text);
let samples;
if (typeof this.tts.generate === "function") {
// Using the generate method from sherpa-onnx-tts.js
console.log("Using generate method");
console.log("this.tts.generate:", this.tts.generate);
try {
console.log("Calling generate with:", { text: processedText, sid: 0, speed: 1.0 });
const result = this.tts.generate({ text: processedText, sid: 0, speed: 1.0 });
console.log("Generate call successful, result:", result);
samples = result.samples;
console.log(`Generated audio with sample rate: ${result.sampleRate} and samples: ${samples.length}`);
}
catch (error) {
console.error("Error calling generate:", error);
throw error;
}
}
else if (typeof this.tts.generateWithText === "function") {
// Using the generateWithText method
console.log("Using generateWithText method");
console.log("this.tts.generateWithText:", this.tts.generateWithText);
try {
console.log("Calling generateWithText with:", processedText);
samples = this.tts.generateWithText(processedText);
console.log(`Generated audio with samples: ${samples.length}`);
}
catch (error) {
console.error("Error calling generateWithText:", error);
throw error;
}
}
else {
console.error("No compatible generate method found");
console.log("Available methods on this.tts:", Object.keys(this.tts).filter((key) => typeof this.tts[key] === "function"));
throw new Error("No compatible generate method found");
}
console.log("Audio generated successfully, samples:", samples.length);
// Convert the samples to the requested format
const audioBytes = this._convertAudioFormat(samples);
return audioBytes;
}
catch (error) {
console.error("Error synthesizing text:", error);
throw new Error(`SherpaOnnx synthesis failed: ${error instanceof Error ? error.message : String(error)}`);
}
}
/**
* Convert audio samples to the requested format
* @param samples Float32Array of audio samples
* @returns Uint8Array of audio data in the requested format
*/
_convertAudioFormat(samples) {
// For now, we'll just return a WAV file
// In a real implementation, we would use a library like audioEncoder
// to convert to the requested format
// Convert Float32Array to Int16Array
const int16Samples = new Int16Array(samples.length);
for (let i = 0; i < samples.length; i++) {
// Scale to 16-bit range and clamp
const sample = Math.max(-1, Math.min(1, samples[i]));
int16Samples[i] = Math.floor(sample * 32767);
}
// Create a WAV file header
const wavHeader = new ArrayBuffer(44);
const view = new DataView(wavHeader);
// "RIFF" chunk descriptor
view.setUint8(0, "R".charCodeAt(0));
view.setUint8(1, "I".charCodeAt(0));
view.setUint8(2, "F".charCodeAt(0));
view.setUint8(3, "F".charCodeAt(0));
// Chunk size (file size - 8)
view.setUint32(4, 36 + int16Samples.length * 2, true);
// Format ("WAVE")
view.setUint8(8, "W".charCodeAt(0));
view.setUint8(9, "A".charCodeAt(0));
view.setUint8(10, "V".charCodeAt(0));
view.setUint8(11, "E".charCodeAt(0));
// "fmt " sub-chunk
view.setUint8(12, "f".charCodeAt(0));
view.setUint8(13, "m".charCodeAt(0));
view.setUint8(14, "t".charCodeAt(0));
view.setUint8(15, " ".charCodeAt(0));
// Sub-chunk size (16 for PCM)
view.setUint32(16, 16, true);
// Audio format (1 for PCM)
view.setUint16(20, 1, true);
// Number of channels (1 for mono)
view.setUint16(22, 1, true);
// Sample rate
view.setUint32(24, this.sampleRate, true);
// Byte rate (sample rate * channels * bytes per sample)
view.setUint32(28, this.sampleRate * 1 * 2, true);
// Block align (channels * bytes per sample)
view.setUint16(32, 1 * 2, true);
// Bits per sample
view.setUint16(34, 16, true);
// "data" sub-chunk
view.setUint8(36, "d".charCodeAt(0));
view.setUint8(37, "a".charCodeAt(0));
view.setUint8(38, "t".charCodeAt(0));
view.setUint8(39, "a".charCodeAt(0));
// Sub-chunk size (number of samples * channels * bytes per sample)
view.setUint32(40, int16Samples.length * 1 * 2, true);
// Combine the header and the samples
const wavBytes = new Uint8Array(wavHeader.byteLength + int16Samples.length * 2);
wavBytes.set(new Uint8Array(wavHeader), 0);
// Convert Int16Array to Uint8Array
const samplesBytes = new Uint8Array(int16Samples.buffer);
wavBytes.set(samplesBytes, wavHeader.byteLength);
return wavBytes;
}
/**
* Check if SherpaOnnx is properly initialized
* @returns Object with initialization status and details
*/
getInitializationStatus() {
const globalWindow = (typeof window !== "undefined" ? window : global);
const issues = [];
const hasModule = !!this.wasmModule;
const winMod = (globalWindow && globalWindow.Module) || null;
const hasGlobalModule = !!winMod;
const hasCreate = typeof globalWindow.createOfflineTts === "function";
const hasOffline = !!(winMod && (winMod.OfflineTts || winMod.calledRun));
if (!this.wasmLoaded) {
issues.push("WebAssembly module not loaded");
}
if (!hasModule && !hasGlobalModule) {
issues.push("WebAssembly module is null");
}
if (!hasCreate && !hasOffline) {
issues.push("No SherpaONNX TTS API found (neither createOfflineTts nor Module.OfflineTts)");
}
const ready = this.wasmLoaded && (hasModule || hasGlobalModule) && (hasCreate || hasOffline);
return {
isInitialized: ready && issues.length === 0 ? true : ready, // consider ready if runtime is present
wasmLoaded: this.wasmLoaded,
wasmModule: !!(hasModule || hasGlobalModule),
createOfflineTts: hasCreate,
issues,
};
}
/**
* Get detailed error message for initialization issues
* @returns Detailed error message with troubleshooting steps
*/
getInitializationErrorMessage() {
const status = this.getInitializationStatus();
let message = "SherpaOnnx WebAssembly TTS is not properly initialized.\n\n";
message += "Issues found:\n";
for (const issue of status.issues) {
message += `- ${issue}\n`;
}
message += "\nTroubleshooting steps:\n";
message += "1. Ensure the SherpaOnnx WebAssembly files are properly loaded\n";
message += "2. Check that the WebAssembly module initialization completed successfully\n";
message += "3. Verify that createOfflineTts function is available in the global scope\n";
message += "4. Check browser console for WebAssembly loading errors\n";
message +=
"5. Ensure you're running in a supported environment (browser with WebAssembly support)\n";
return message;
}
/**
* Synthesize text to speech and stream the audio
* @param text Text to synthesize
* @param onAudioBuffer Callback for audio buffers
* @param onStart Callback for when synthesis starts
* @param onEnd Callback for when synthesis ends
* @param onWord Callback for word boundary events
* @param options Options for synthesis
* @returns Promise resolving when synthesis is complete
*/
async synthToStream(text, onAudioBuffer, onStart, onEnd, onWord, options) {
try {
// Call onStart callback
if (onStart) {
onStart();
}
// Synthesize the entire audio
const audioBytes = await this.synthToBytes(text, options);
// Estimate word boundaries
if (onWord) {
const wordBoundaries = (0, word_timing_estimator_1.estimateWordBoundaries)(text);
// Schedule word boundary events
for (const boundary of wordBoundaries) {
setTimeout(() => {
onWord(boundary.word, boundary.start, boundary.end);
}, boundary.start * 1000);
}
}
// Send the audio buffer
onAudioBuffer(audioBytes);
// Call onEnd callback
if (onEnd) {
onEnd();
}
}
catch (error) {
console.error("Error synthesizing text to stream:", error);
// Call onEnd callback even if there's an error
if (onEnd) {
onEnd();
}
// Re-throw the error so it can be caught by the caller
throw error;
}
}
/**
* Synthesize text to speech and save to a file
* @param text Text to synthesize
* @param filename Filename to save as
* @param format Audio format (mp3 or wav)
* @param options Options for synthesis
* @returns Promise resolving when synthesis is complete
*/
async synthToFile(text, filename, format = "wav", // Override base class to only allow 'wav'
options // Use specific options type
) {
try {
let outputFormat = format;
// Sherpa-ONNX only supports WAV output
if (outputFormat !== "wav") {
console.warn("SherpaOnnx WebAssembly TTS only supports WAV output. Using WAV instead of", outputFormat);
outputFormat = "wav";
}
// Use the base class's file saving logic (which detects Node/Browser)
await super.synthToFile(text, filename, outputFormat, options);
}
catch (error) {
console.error("Error synthesizing text to file:", error);
throw error;
}
}
/**
* Get a property value
* @param property Property name
* @returns Property value
*/
getProperty(property) {
switch (property) {
case "voice":
return this.currentVoiceId || this.voiceId || undefined;
case "sampleRate":
return this.sampleRate;
case "wasmLoaded":
return this.wasmLoaded;
case "wasmPath":
return this.wasmPath;
case "wasmBaseUrl":
return this.wasmBaseUrl;
case "mergedModelsUrl":
return this.mergedModelsUrl;
case "multiModelEnabled":
return this.enhancedOptions.enableMultiModel;
case "maxCachedModels":
return this.enhancedOptions.maxCachedModels;
case "loadedModels":
return this.modelManager?.getLoadedModelIds() ?? [];
case "currentModel":
return this.modelManager?.getCurrentModel();
case "availableModels":
return this.modelRepository?.getAvailableModels() || [];
default:
return super.getProperty(property);
}
}
/**
* Set a property value
* @param property Property name
* @param value Property value
*/
setProperty(property, value) {
switch (property) {
case "voice":
this.setVoice(value);
break;
case "wasmPath":
this.wasmPath = value;
break;
case "wasmBaseUrl":
this.wasmBaseUrl = value;
break;
case "mergedModelsUrl":
this.mergedModelsUrl = value;
if (this.modelRepository) {
// Recreate repository with new URL on the fly
this.modelRepository = new ModelRepository(this.mergedModelsUrl);
}
break;
default:
super.setProperty(property, value);
break;
}
}
/**
* Build the OfflineTts configuration object expected by sherpa-onnx-tts.js.
* Uses the currently selected voice metadata to decide which model block
* (vits/ko