js-tts-wrapper
Version:
A JavaScript/TypeScript library that provides a unified API for working with multiple cloud-based Text-to-Speech (TTS) services
1,086 lines • 62.9 kB
JavaScript
"use strict";
/**
* SherpaOnnx WebAssembly TTS Client
*
* Enhanced version with multi-model support for browser environments.
* Supports dynamic loading of Kokoro, Matcha, and VITS models.
*
* BACKWARD COMPATIBILITY: Maintains full compatibility with existing API.
* New multi-model features are opt-in via constructor options.
*/
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || (function () {
var ownKeys = function(o) {
ownKeys = Object.getOwnPropertyNames || function (o) {
var ar = [];
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
return ar;
};
return ownKeys(o);
};
return function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
__setModuleDefault(result, mod);
return result;
};
})();
Object.defineProperty(exports, "__esModule", { value: true });
exports.SherpaOnnxWasmTTSClient = void 0;
const abstract_tts_1 = require("../core/abstract-tts");
const SpeechMarkdown = __importStar(require("../markdown/converter"));
const SSMLUtils = __importStar(require("../core/ssml-utils"));
const environment_1 = require("../utils/environment");
const word_timing_estimator_1 = require("../utils/word-timing-estimator");
/**
* Enhanced SherpaOnnx WebAssembly TTS Client
*
* Supports both legacy single-model mode and new multi-model mode.
* Maintains full backward compatibility with existing API.
*/
class SherpaOnnxWasmTTSClient extends abstract_tts_1.AbstractTTSClient {
/**
* Create a new SherpaOnnx WebAssembly TTS client
* @param credentials Optional credentials object
* @param enhancedOptions Optional enhanced options for multi-model support
*/
constructor(credentials = {}, enhancedOptions = {}) {
super(credentials);
Object.defineProperty(this, "wasmModule", {
enumerable: true,
configurable: true,
writable: true,
value: null
});
Object.defineProperty(this, "tts", {
enumerable: true,
configurable: true,
writable: true,
value: null
});
Object.defineProperty(this, "wasmPath", {
enumerable: true,
configurable: true,
writable: true,
value: ""
});
Object.defineProperty(this, "wasmLoaded", {
enumerable: true,
configurable: true,
writable: true,
value: false
});
Object.defineProperty(this, "wasmBaseUrl", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "mergedModelsUrl", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
// Enhanced multi-model support
Object.defineProperty(this, "enhancedOptions", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "modelRepository", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "modelManager", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "currentVoiceId", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
// Capabilities: Browser-only engine, requires WASM runtime
this.capabilities = { browserSupported: true, nodeSupported: false, needsWasm: true };
// Set default sample rate for the Piper model
this.sampleRate = 22050;
// Optional configuration from credentials
this.wasmPath = credentials.wasmPath || ""; // JS glue path (if provided)
this.wasmBaseUrl = credentials.wasmBaseUrl || undefined; // Base URL for glue+wasm
this.mergedModelsUrl = credentials.mergedModelsUrl || credentials.modelsUrl || undefined;
// Enhanced options with defaults for backward compatibility
this.enhancedOptions = {
enableMultiModel: false, // Disabled by default for backward compatibility
maxCachedModels: 3,
...enhancedOptions
};
// Initialize multi-model components if enabled
if (this.enhancedOptions.enableMultiModel) {
this.modelRepository = new ModelRepository(this.mergedModelsUrl);
}
}
/**
* Get the list of required credential types for this engine
* @returns Array of required credential field names
*/
getRequiredCredentials() {
return []; // SherpaOnnx WASM doesn't require credentials, only WASM files
}
/**
* Check if the credentials are valid
* @returns Promise resolving to true if credentials are valid
*/
async checkCredentials() {
try {
// First check if SherpaOnnx is properly initialized
const status = this.getInitializationStatus();
if (status.isInitialized) {
return true;
}
// In a browser environment, we can't check if the WASM file exists
// so we'll check if it's likely to be loaded later
if (typeof window !== "undefined") {
if (status.issues.length > 0) {
console.warn("SherpaOnnx not yet initialized:", status.issues.join(', '));
}
return true; // Assume it will be loaded later in browser
}
// In Node.js, check if the WASM file exists
if (environment_1.isNode && this.wasmPath && environment_1.fileSystem.existsSync(this.wasmPath)) {
if (status.issues.length > 0) {
console.warn("SherpaOnnx WASM file exists but not initialized:", status.issues.join(', '));
}
return true;
}
// If no WASM path is provided, assume it will be loaded later
if (!this.wasmPath) {
console.warn("No WASM path provided. SherpaOnnx WebAssembly TTS will need to be initialized manually.");
return true;
}
console.warn(`WASM file not found at ${this.wasmPath}`);
return false;
}
catch (error) {
console.error("Error checking SherpaOnnx WebAssembly credentials:", error);
return false;
}
}
/**
* Get available voices
* @returns Promise resolving to an array of unified voice objects
*/
async _getVoices() {
try {
// Enhanced multi-model support
if (this.enhancedOptions.enableMultiModel && this.modelRepository) {
console.log("Using enhanced multi-model voice repository");
try {
const models = this.modelRepository.getAvailableModels();
return models.map((model) => ({
id: model.id,
name: model.name,
gender: model.gender,
provider: "sherpaonnx-wasm",
languageCodes: [
{
bcp47: model.language,
iso639_3: model.language.split("-")[0],
display: model.language,
},
],
}));
}
catch (error) {
console.error("Error getting voices from enhanced repository:", error);
// Fall through to legacy mode
}
}
// Legacy voice loading (backward compatibility)
console.log("Using legacy voice loading mode");
// Load the voice models JSON file
let voiceModels = [];
try {
// In Node.js, read from the file system
if (environment_1.isNode) {
const modelsJsonPath = environment_1.pathUtils.join(__dirname, "..", "data", "merged_models.json");
if (environment_1.fileSystem.existsSync(modelsJsonPath)) {
const modelsJson = environment_1.fileSystem.readFileSync(modelsJsonPath);
voiceModels = JSON.parse(modelsJson);
}
}
else {
// In browser environments, try to fetch from a URL
try {
const response = await fetch(this.mergedModelsUrl || "./data/merged_models.json");
if (response.ok) {
const modelsJson = await response.text();
voiceModels = JSON.parse(modelsJson);
}
else {
console.warn("Voice models JSON file not available in browser environment.");
// Return a default voice for testing
return [
{
id: "piper_en_US",
name: "Piper English (US)",
gender: "Unknown",
provider: "sherpaonnx-wasm",
languageCodes: [
{
bcp47: "en-US",
iso639_3: "eng",
display: "English (US)",
},
],
},
];
}
}
catch (fetchError) {
console.warn("Failed to fetch voice models JSON file:", fetchError);
// Return a default voice for testing
return [
{
id: "piper_en_US",
name: "Piper English (US)",
gender: "Unknown",
provider: "sherpaonnx-wasm",
languageCodes: [
{
bcp47: "en-US",
iso639_3: "eng",
display: "English (US)",
},
],
},
];
}
}
}
catch (error) {
console.error("Error loading voice models:", error);
}
// Filter for SherpaOnnx models and map to unified format
const sherpaOnnxModels = voiceModels.filter((model) => model.engine === "sherpaonnx" || model.engine === "sherpaonnx-wasm");
console.log("Found SherpaOnnx models:", sherpaOnnxModels);
const voices = sherpaOnnxModels.map((model) => ({
id: model.id,
name: model.name,
gender: model.gender || "Unknown",
provider: "sherpaonnx-wasm",
languageCodes: [
{
bcp47: model.language || "en-US",
iso639_3: model.language ? model.language.split("-")[0] : "eng",
display: model.language_display || "English (US)",
},
],
}));
// If no voices found, return a default voice for backward compatibility
if (voices.length === 0) {
return [
{
id: "piper_en_US",
name: "Piper English (US)",
gender: "Unknown",
provider: "sherpaonnx-wasm",
languageCodes: [
{
bcp47: "en-US",
iso639_3: "eng",
display: "English (US)",
},
],
},
];
}
return voices;
}
catch (error) {
console.error("Error getting SherpaOnnx WebAssembly voices:", error);
return [];
}
}
/**
* Initialize the WebAssembly module
* @param wasmUrl URL to the WebAssembly file
* @returns Promise resolving when the module is initialized
*/
async initializeWasm(wasmUrl) {
if (this.wasmLoaded) {
return;
}
try {
// In browser environments, load the WebAssembly module
if (environment_1.isBrowser) {
if (!wasmUrl) {
console.warn("No WebAssembly URL provided for browser environment.");
this.wasmLoaded = false;
return;
}
console.log("Loading WebAssembly module from", wasmUrl);
console.log(`Current state: wasmLoaded=${this.wasmLoaded}, wasmModule=${!!this.wasmModule}`);
try {
// Store the URL for later use
this.wasmPath = wasmUrl;
console.log("Setting wasmPath to:", this.wasmPath);
// Auto-load JS glue and WASM if not present
const w = window;
let baseUrl = this.wasmBaseUrl;
let scriptUrl;
const provided = wasmUrl || this.wasmPath || "";
if (provided) {
if (/\.js($|\?)/.test(provided)) {
scriptUrl = provided;
if (!baseUrl) {
const idx = provided.lastIndexOf("/");
if (idx > -1)
baseUrl = provided.slice(0, idx);
}
}
else {
baseUrl = provided;
}
}
if (!scriptUrl && baseUrl) {
const b = baseUrl.replace(/\/$/, "");
// Default glue filename (can be overridden by passing full wasmPath)
scriptUrl = `${b}/sherpaonnx.js`;
}
if (!scriptUrl) {
console.warn("No WASM script URL provided; attempting default ./sherpaonnx.js");
scriptUrl = "./sherpaonnx.js";
}
// Persist the resolved script URL
this.wasmPath = scriptUrl;
console.log("Resolved wasmPath to:", this.wasmPath);
// Ensure Module.locateFile points to the base for .wasm
w.Module = w.Module || {};
if (baseUrl) {
const b = baseUrl.replace(/\/$/, "");
w.Module.locateFile = (p) => `${b}/${p}`;
}
// Load the glue JS if createOfflineTts is not available
if (typeof w.createOfflineTts !== "function") {
await new Promise((resolve, reject) => {
const s = document.createElement("script");
s.src = scriptUrl;
s.async = true;
s.onload = () => resolve();
s.onerror = () => reject(new Error(`Failed to load SherpaONNX glue: ${scriptUrl}`));
document.head.appendChild(s);
});
}
// Wait for Module.calledRun and createOfflineTts to be ready
await new Promise((resolve, reject) => {
const giveUpAt = Date.now() + 15000; // 15s
const checkReady = () => {
if (typeof w.createOfflineTts === "function" &&
typeof w.Module !== "undefined" &&
w.Module.calledRun) {
resolve();
}
else if (Date.now() > giveUpAt) {
reject(new Error("Timed out waiting for SherpaONNX WASM to initialize"));
}
else {
setTimeout(checkReady, 200);
}
};
checkReady();
});
// Now that we know createOfflineTts and Module are available, store them
console.log("Storing Module and createOfflineTts");
this.wasmModule = window.Module;
this.wasmLoaded = true;
// Store the createOfflineTts function
if (this.wasmModule && !this.wasmModule.createOfflineTts) {
this.wasmModule.createOfflineTts = window.createOfflineTts;
}
// Initialize multi-model support if enabled
if (this.enhancedOptions.enableMultiModel && this.modelRepository) {
console.log("Initializing enhanced multi-model support...");
try {
// Load models index
await this.modelRepository.loadModelsIndex();
// Initialize model manager
if (this.wasmModule) {
this.modelManager = new WasmModelManager(this.wasmModule, this.enhancedOptions.maxCachedModels);
}
console.log("Enhanced multi-model support initialized successfully");
}
catch (error) {
console.error("Error initializing multi-model support:", error);
console.log("Falling back to legacy single-model mode");
this.enhancedOptions.enableMultiModel = false;
}
}
console.log("WebAssembly module initialized successfully");
}
catch (error) {
console.error("Error initializing WebAssembly:", error);
this.wasmLoaded = false;
}
}
else {
// In Node.js, we can't directly use WebAssembly in the same way
console.warn("WebAssembly loading not implemented for Node.js environments.");
this.wasmLoaded = false;
}
}
catch (error) {
console.error("Error initializing WebAssembly:", error);
this.wasmLoaded = false;
}
console.log("End of initializeWasm method. wasmLoaded:", this.wasmLoaded, "wasmModule:", !!this.wasmModule);
console.log("createOfflineTts available at end of initializeWasm:", typeof window.createOfflineTts === "function");
console.log("window.Module available at end of initializeWasm:", typeof window.Module !== "undefined");
if (typeof window.Module !== "undefined") {
console.log("window.Module.calledRun at end of initializeWasm:", window.Module.calledRun);
}
}
/**
* Synthesize text to speech and return the audio as a byte array
* @param text Text to synthesize
* @param options Options for synthesis
* @returns Promise resolving to a byte array of audio data
*/
async synthToBytes(text, _options) {
// Prepare text for synthesis (handle Speech Markdown and SSML)
let processedText = text;
// Convert from Speech Markdown if requested
if (_options?.useSpeechMarkdown && SpeechMarkdown.isSpeechMarkdown(processedText)) {
// Convert to SSML first, then strip SSML tags since SherpaOnnx doesn't support SSML
const ssml = await SpeechMarkdown.toSSML(processedText);
processedText = SSMLUtils.stripSSML(ssml);
}
// If text is SSML, strip the tags as SherpaOnnx doesn't support SSML
if (SSMLUtils.isSSML(processedText)) {
processedText = SSMLUtils.stripSSML(processedText);
}
console.log("synthToBytes called with text:", processedText);
// Ensure runtime is initialized before attempting synthesis
if (environment_1.isBrowser) {
const status = this.getInitializationStatus();
if (!status.isInitialized) {
await this.initializeWasm(this.wasmPath || this.wasmBaseUrl || "");
}
}
// Enhanced multi-model synthesis
if (this.enhancedOptions.enableMultiModel && this.wasmModule && this.currentVoiceId) {
console.log(`Using enhanced multi-model synthesis for voice ${this.currentVoiceId}`);
try {
if (!this.wasmModule._GenerateAudio) {
throw new Error('Enhanced WASM module not loaded - _GenerateAudio not available');
}
// Generate audio using the enhanced WASM interface
const result = this.wasmModule._GenerateAudio(processedText, 0, 1.0); // text, speaker_id, speed
if (!result || !result.samples) {
throw new Error('Failed to generate audio with enhanced interface');
}
console.log(`Enhanced synthesis generated ${result.samples.length} samples at ${result.sampleRate}Hz`);
// Update sample rate if provided
if (result.sampleRate) {
this.sampleRate = result.sampleRate;
}
// Convert to WAV format
return this._convertAudioFormat(result.samples);
}
catch (error) {
console.error('Error with enhanced multi-model synthesis:', error);
console.log('Falling back to legacy synthesis mode');
// Fall through to legacy mode
}
}
// Legacy synthesis mode (backward compatibility)
console.log("Using legacy synthesis mode");
// IMPORTANT: We need to access the global window object directly
// This is because our code is bundled and the window object might not be accessible in the same way
const globalWindow = typeof window !== "undefined" ? window : typeof global !== "undefined" ? global : {};
console.log("Global window type:", typeof globalWindow);
// Check if we're in a browser environment
if (typeof globalWindow !== "undefined" && typeof document !== "undefined") {
console.log("Browser environment detected");
// Check if createOfflineTts is available in the global scope
const createOfflineTtsFn = globalWindow.createOfflineTts;
const moduleObj = globalWindow.Module;
console.log("createOfflineTts available in global scope:", typeof createOfflineTtsFn === "function");
console.log("Module available in global scope:", typeof moduleObj !== "undefined");
console.log("Module.calledRun:", moduleObj?.calledRun);
// Try to use the global createOfflineTts function directly
if (typeof createOfflineTtsFn === "function" &&
typeof moduleObj !== "undefined" &&
moduleObj.calledRun) {
console.log("Using global createOfflineTts function directly");
try {
// Create a new TTS instance directly
console.log("About to call createOfflineTts...");
const directTts = createOfflineTtsFn(moduleObj);
console.log("createOfflineTts call successful, tts object:", directTts);
console.log("TTS initialized with default configuration");
console.log(`Sample rate: ${directTts?.sampleRate}`);
console.log(`Number of speakers: ${directTts?.numSpeakers}`);
// Update the sample rate from the TTS engine
if (directTts && typeof directTts.sampleRate === "number") {
this.sampleRate = directTts.sampleRate;
console.log(`Updated sample rate to ${this.sampleRate}`);
}
else {
console.warn("Could not update sample rate, using default");
}
// Generate audio
console.log("Generating audio directly...");
const result = directTts.generate({ text: processedText, sid: 0, speed: 1.0 });
console.log("Audio generated directly:", result);
console.log(`Generated ${result?.samples?.length} samples at ${result?.sampleRate}Hz`);
// Convert to WAV
const audioBytes = this._convertAudioFormat(result.samples);
console.log("Converted audio to WAV format, returning bytes");
return audioBytes;
}
catch (directError) {
console.error("Error using direct approach:", directError);
console.log("Falling back to standard approach");
}
}
else {
console.log("Direct approach not available, reason:");
if (typeof createOfflineTtsFn !== "function")
console.log("- createOfflineTts is not a function");
if (typeof moduleObj === "undefined")
console.log("- Module is undefined");
if (moduleObj && !moduleObj.calledRun)
console.log("- Module.calledRun is false");
}
}
else {
console.log("Not in a browser environment, skipping direct approach");
}
// If direct approach failed or not available, try the standard approach
console.log("Using standard approach");
console.log("Current state - wasmLoaded:", this.wasmLoaded, "wasmModule:", !!this.wasmModule);
console.log("createOfflineTts available:", typeof globalWindow.createOfflineTts === "function");
// Check if SherpaOnnx is properly initialized
const status = this.getInitializationStatus();
if (!status.isInitialized) {
const errorMessage = this.getInitializationErrorMessage();
console.error(errorMessage);
throw new Error(errorMessage);
}
try {
// Use the SherpaOnnx WebAssembly API to generate audio
console.log("Using SherpaOnnx WebAssembly to generate audio");
// Create a TTS instance if it doesn't exist
if (!this.tts) {
console.log("Creating TTS instance");
try {
// Create the TTS instance
if (typeof window.createOfflineTts === "function") {
// Using the sherpa-onnx-tts.js API
console.log("Using createOfflineTts API from global scope");
console.log("createOfflineTts:", window.createOfflineTts);
console.log("Module:", window.Module);
try {
// Create the TTS instance with default configuration
console.log("About to call createOfflineTts...");
this.tts = window.createOfflineTts(window.Module);
console.log("createOfflineTts call successful, tts object:", this.tts);
console.log("TTS initialized with default configuration");
console.log(`Sample rate: ${this.tts?.sampleRate}`);
console.log(`Number of speakers: ${this.tts?.numSpeakers}`);
// Update the sample rate from the TTS engine
if (this.tts && typeof this.tts.sampleRate === "number") {
this.sampleRate = this.tts.sampleRate;
console.log(`Updated sample rate to ${this.sampleRate}`);
}
else {
console.warn("Could not update sample rate, using default");
}
}
catch (error) {
console.error("Error creating TTS instance with createOfflineTts:", error);
throw error;
}
}
else if (this.wasmModule?.OfflineTts) {
// Using the Module.OfflineTts API
console.log("Using Module.OfflineTts API");
this.tts = new this.wasmModule.OfflineTts();
}
else {
throw new Error("No compatible TTS API found");
}
console.log("TTS instance created successfully");
}
catch (error) {
console.error("Error creating TTS instance:", error);
throw new Error(`Failed to create SherpaOnnx TTS instance: ${error instanceof Error ? error.message : String(error)}`);
}
}
// Generate the audio
console.log("Generating audio for text:", text);
let samples;
if (typeof this.tts.generate === "function") {
// Using the generate method from sherpa-onnx-tts.js
console.log("Using generate method");
console.log("this.tts.generate:", this.tts.generate);
try {
console.log("Calling generate with:", { text: processedText, sid: 0, speed: 1.0 });
const result = this.tts.generate({ text: processedText, sid: 0, speed: 1.0 });
console.log("Generate call successful, result:", result);
samples = result.samples;
console.log(`Generated audio with sample rate: ${result.sampleRate} and samples: ${samples.length}`);
}
catch (error) {
console.error("Error calling generate:", error);
throw error;
}
}
else if (typeof this.tts.generateWithText === "function") {
// Using the generateWithText method
console.log("Using generateWithText method");
console.log("this.tts.generateWithText:", this.tts.generateWithText);
try {
console.log("Calling generateWithText with:", processedText);
samples = this.tts.generateWithText(processedText);
console.log(`Generated audio with samples: ${samples.length}`);
}
catch (error) {
console.error("Error calling generateWithText:", error);
throw error;
}
}
else {
console.error("No compatible generate method found");
console.log("Available methods on this.tts:", Object.keys(this.tts).filter((key) => typeof this.tts[key] === "function"));
throw new Error("No compatible generate method found");
}
console.log("Audio generated successfully, samples:", samples.length);
// Convert the samples to the requested format
const audioBytes = this._convertAudioFormat(samples);
return audioBytes;
}
catch (error) {
console.error("Error synthesizing text:", error);
throw new Error(`SherpaOnnx synthesis failed: ${error instanceof Error ? error.message : String(error)}`);
}
}
/**
* Convert audio samples to the requested format
* @param samples Float32Array of audio samples
* @returns Uint8Array of audio data in the requested format
*/
_convertAudioFormat(samples) {
// For now, we'll just return a WAV file
// In a real implementation, we would use a library like audioEncoder
// to convert to the requested format
// Convert Float32Array to Int16Array
const int16Samples = new Int16Array(samples.length);
for (let i = 0; i < samples.length; i++) {
// Scale to 16-bit range and clamp
const sample = Math.max(-1, Math.min(1, samples[i]));
int16Samples[i] = Math.floor(sample * 32767);
}
// Create a WAV file header
const wavHeader = new ArrayBuffer(44);
const view = new DataView(wavHeader);
// "RIFF" chunk descriptor
view.setUint8(0, "R".charCodeAt(0));
view.setUint8(1, "I".charCodeAt(0));
view.setUint8(2, "F".charCodeAt(0));
view.setUint8(3, "F".charCodeAt(0));
// Chunk size (file size - 8)
view.setUint32(4, 36 + int16Samples.length * 2, true);
// Format ("WAVE")
view.setUint8(8, "W".charCodeAt(0));
view.setUint8(9, "A".charCodeAt(0));
view.setUint8(10, "V".charCodeAt(0));
view.setUint8(11, "E".charCodeAt(0));
// "fmt " sub-chunk
view.setUint8(12, "f".charCodeAt(0));
view.setUint8(13, "m".charCodeAt(0));
view.setUint8(14, "t".charCodeAt(0));
view.setUint8(15, " ".charCodeAt(0));
// Sub-chunk size (16 for PCM)
view.setUint32(16, 16, true);
// Audio format (1 for PCM)
view.setUint16(20, 1, true);
// Number of channels (1 for mono)
view.setUint16(22, 1, true);
// Sample rate
view.setUint32(24, this.sampleRate, true);
// Byte rate (sample rate * channels * bytes per sample)
view.setUint32(28, this.sampleRate * 1 * 2, true);
// Block align (channels * bytes per sample)
view.setUint16(32, 1 * 2, true);
// Bits per sample
view.setUint16(34, 16, true);
// "data" sub-chunk
view.setUint8(36, "d".charCodeAt(0));
view.setUint8(37, "a".charCodeAt(0));
view.setUint8(38, "t".charCodeAt(0));
view.setUint8(39, "a".charCodeAt(0));
// Sub-chunk size (number of samples * channels * bytes per sample)
view.setUint32(40, int16Samples.length * 1 * 2, true);
// Combine the header and the samples
const wavBytes = new Uint8Array(wavHeader.byteLength + int16Samples.length * 2);
wavBytes.set(new Uint8Array(wavHeader), 0);
// Convert Int16Array to Uint8Array
const samplesBytes = new Uint8Array(int16Samples.buffer);
wavBytes.set(samplesBytes, wavHeader.byteLength);
return wavBytes;
}
/**
* Check if SherpaOnnx is properly initialized
* @returns Object with initialization status and details
*/
getInitializationStatus() {
const globalWindow = (typeof window !== 'undefined' ? window : global);
const issues = [];
if (!this.wasmLoaded) {
issues.push("WebAssembly module not loaded");
}
if (!this.wasmModule) {
issues.push("WebAssembly module is null");
}
if (typeof globalWindow.createOfflineTts !== "function") {
issues.push("createOfflineTts function not available");
}
return {
isInitialized: issues.length === 0,
wasmLoaded: this.wasmLoaded,
wasmModule: !!this.wasmModule,
createOfflineTts: typeof globalWindow.createOfflineTts === "function",
issues
};
}
/**
* Get detailed error message for initialization issues
* @returns Detailed error message with troubleshooting steps
*/
getInitializationErrorMessage() {
const status = this.getInitializationStatus();
let message = "SherpaOnnx WebAssembly TTS is not properly initialized.\n\n";
message += "Issues found:\n";
status.issues.forEach(issue => {
message += `- ${issue}\n`;
});
message += "\nTroubleshooting steps:\n";
message += "1. Ensure the SherpaOnnx WebAssembly files are properly loaded\n";
message += "2. Check that the WebAssembly module initialization completed successfully\n";
message += "3. Verify that createOfflineTts function is available in the global scope\n";
message += "4. Check browser console for WebAssembly loading errors\n";
message += "5. Ensure you're running in a supported environment (browser with WebAssembly support)\n";
return message;
}
/**
* Synthesize text to speech and stream the audio
* @param text Text to synthesize
* @param onAudioBuffer Callback for audio buffers
* @param onStart Callback for when synthesis starts
* @param onEnd Callback for when synthesis ends
* @param onWord Callback for word boundary events
* @param options Options for synthesis
* @returns Promise resolving when synthesis is complete
*/
async synthToStream(text, onAudioBuffer, onStart, onEnd, onWord, options) {
try {
// Call onStart callback
if (onStart) {
onStart();
}
// Synthesize the entire audio
const audioBytes = await this.synthToBytes(text, options);
// Estimate word boundaries
if (onWord) {
const wordBoundaries = (0, word_timing_estimator_1.estimateWordBoundaries)(text);
// Schedule word boundary events
for (const boundary of wordBoundaries) {
setTimeout(() => {
onWord(boundary.word, boundary.start, boundary.end);
}, boundary.start * 1000);
}
}
// Send the audio buffer
onAudioBuffer(audioBytes);
// Call onEnd callback
if (onEnd) {
onEnd();
}
}
catch (error) {
console.error("Error synthesizing text to stream:", error);
// Call onEnd callback even if there's an error
if (onEnd) {
onEnd();
}
// Re-throw the error so it can be caught by the caller
throw error;
}
}
/**
* Synthesize text to speech and save to a file
* @param text Text to synthesize
* @param filename Filename to save as
* @param format Audio format (mp3 or wav)
* @param options Options for synthesis
* @returns Promise resolving when synthesis is complete
*/
async synthToFile(text, filename, format = "wav", // Override base class to only allow 'wav'
options // Use specific options type
) {
try {
let outputFormat = format;
// Sherpa-ONNX only supports WAV output
if (outputFormat !== "wav") {
console.warn("SherpaOnnx WebAssembly TTS only supports WAV output. Using WAV instead of", outputFormat);
outputFormat = "wav";
}
// Use the base class's file saving logic (which detects Node/Browser)
await super.synthToFile(text, filename, outputFormat, options);
}
catch (error) {
console.error("Error synthesizing text to file:", error);
throw error;
}
}
/**
* Get a property value
* @param property Property name
* @returns Property value
*/
getProperty(property) {
switch (property) {
case "voice":
return this.currentVoiceId || this.voiceId || undefined;
case "sampleRate":
return this.sampleRate;
case "wasmLoaded":
return this.wasmLoaded;
case "wasmPath":
return this.wasmPath;
case "wasmBaseUrl":
return this.wasmBaseUrl;
case "mergedModelsUrl":
return this.mergedModelsUrl;
case "multiModelEnabled":
return this.enhancedOptions.enableMultiModel;
case "maxCachedModels":
return this.enhancedOptions.maxCachedModels;
case "loadedModels":
return this.modelManager ? Array.from(this.modelManager['loadedModels'].keys()) : [];
case "currentModel":
return this.modelManager?.getCurrentModel();
case "availableModels":
return this.modelRepository?.getAvailableModels() || [];
default:
return super.getProperty(property);
}
}
/**
* Set a property value
* @param property Property name
* @param value Property value
*/
setProperty(property, value) {
switch (property) {
case "voice":
this.setVoice(value);
break;
case "wasmPath":
this.wasmPath = value;
break;
case "wasmBaseUrl":
this.wasmBaseUrl = value;
break;
case "mergedModelsUrl":
this.mergedModelsUrl = value;
if (this.modelRepository) {
// Recreate repository with new URL on the fly
this.modelRepository = new ModelRepository(this.mergedModelsUrl);
}
break;
default:
super.setProperty(property, value);
break;
}
}
/**
* Set the voice to use for synthesis
* Enhanced with multi-model support while maintaining backward compatibility
* @param voiceId Voice ID to use
*/
async setVoice(voiceId) {
// Call the parent method to set the voiceId
super.setVoice(voiceId);
console.log(`Setting voice to ${voiceId}`);
// Enhanced multi-model support (loader-only runtime: fetch, extract, mount into /assets)
if (this.enhancedOptions.enableMultiModel && this.modelRepository) {
console.log(`Using enhanced multi-model mode for voice ${voiceId}`);
// Ensure WASM is initialized to access FS
if (!this.wasmLoaded) {
await this.initializeWasm(this.wasmPath || this.wasmBaseUrl || "");
}
if (!this.wasmModule) {
throw new Error("WASM module not initialized");
}
// Resolve model config and URL
const cfg = this.modelRepository.getModelConfig(voiceId);
if (!cfg || !cfg.url) {
throw new Error(`No URL found for model ${voiceId}`);
}
// Fetch archive
console.log(`Fetching model archive: ${cfg.url}`);
const res = await fetch(cfg.url);
if (!res.ok)
throw new Error(`Failed to fetch model: ${res.status} ${res.statusText}`);
const archiveBuf = await res.arrayBuffer();
// Decompress .bz2 if needed
let tarBuffer = archiveBuf;
if (cfg.compressed) {
const compressjsMod = await Promise.resolve().then(() => __importStar(require("compressjs")));
const Bzip2 = compressjsMod.Bzip2 || compressjsMod.BZ2;
if (!Bzip2 || typeof Bzip2.decompressFile !== "function") {
throw new Error("Bzip2 decompressor not available");
}
const outArr = Bzip2.decompressFile(new Uint8Array(archiveBuf));
tarBuffer = new Uint8Array(outArr).buffer;
}
// Extract tar
const untarMod = await Promise.resolve().then(() => __importStar(require("js-untar")));
const untar = untarMod.default || untarMod;
const entries = await untar(tarBuffer);
console.log(`Extracted ${entries.length} entries from TAR`);
const M = this.wasmModule;
const FS = M.FS;
if (!FS)
throw new Error("Emscripten FS not available");
// Ensure /assets exists
try {
FS.mkdir("/assets");
}
catch { }
try {
FS.mkdir("/assets/espeak-ng-data");
}
catch { }
// Helper to create directories recursively
const mkdirp = (dir) => {
if (FS.mkdirTree) {
try {
FS.mkdirTree(dir);
return;
}
catch { }
}
const parts = dir.split("/").filter(Boolean);
let cur = "";
for (const p of parts) {
cur += "/" + p;
try {
FS.mkdir(cur);
}
catch { }
}
};
// Map archive files to expected /assets layout
for (const e of entries) {
if (!e || !e.name)
continue;
const name = String(e.name).replace(/^\.\//, "");
const lower = name.toLowerCase();
// Only write file entries (js-untar uses `buffer` for file data)
if (!e.buffer)
continue;
let outPath = null;
if (lower.endsWith("/model.onnx") || lower === "model.onnx") {
outPath = "/assets/model.onnx";
}
else if (lower.endsWith("/tokens.txt") || lower === "tokens.txt") {
outPath = "/assets/tokens.txt";
}
else if (lower.includes("/espeak-ng-data/") || lower.startsWith("espeak-ng-data/")) {
outPath = "/assets/" + name.substring(name.toLowerCase().indexOf("espeak-ng-data/"));
}
else if (lower.endsWith("voices.bin") || (lower.includes("voices") && lower.endsWith(".bin"))) {
outPath = "/assets/voices.bin";
}
else if (lower.includes("vocoder") && lower.endsWith(".onnx")) {
outPath = "/assets/vocoder.onnx";
}
if (outPath) {
const dir = outPath.substring(0, outPath.lastIndexOf("/"));
mkdirp(dir);
FS.writeFile(outPath, new Uint8Array(e.buffer));
}
}
// Reset TTS so next synthesis uses the new assets
if (this.tts) {
try {
if (typeof this.wasmModule._ttsDestroyOffline === "function")
this.wasmModule._ttsDestroyOffline(this.tts);
}
catch { }
this.tts = null;
}
this.currentVoiceId = voiceId;
console.log(`Prepared /assets for voice ${voiceId}`);
return;
}
// Legacy single-model mode (backward compatibility)
console.log(`Using legacy single-model mode for voice ${voiceId}`);
// Reset the TTS instance so it will be recreated with the new voice
if (this.tts) {
console.log("Resetting TTS instance for new voice");
this.tts = null;
}
}
/**
* Clean up resources
* Enhanced to handle multi-model cleanup
*/
dispose() {
// Clean up multi-model resources
if (this.modelManager) {
console.log("Disposing multi-model resources");
this.modelManager.dispose();
this.modelManager = undefined;
}
// Clean up legacy TTS instance
if (this.wasmModule && this.tts !== 0) {
if (typeof this.wasmModule._ttsDestroyOffline === "function") {
this.wasmModule._ttsDestroyOffline(this.tts);
}
this.tts = null;
}
// Reset state
this.currentVoiceId = undefined;
this.wasmLoaded = false;
this.wasmModule = null;
}
/**
* Synthesize text to a byte stream
*