js-tts-wrapper
Version:
A JavaScript/TypeScript library that provides a unified API for working with multiple cloud-based Text-to-Speech (TTS) services
171 lines (170 loc) • 5.69 kB
TypeScript
/**
* SherpaOnnx WebAssembly TTS Client
*
* Enhanced version with multi-model support for browser environments.
* Supports dynamic loading of Kokoro, Matcha, and VITS models.
*
* BACKWARD COMPATIBILITY: Maintains full compatibility with existing API.
* New multi-model features are opt-in via constructor options.
*/
import { AbstractTTSClient } from "../core/abstract-tts";
import type { SpeakOptions, TTSCredentials, UnifiedVoice, WordBoundaryCallback } from "../types";
export type ModelType = 'kokoro' | 'matcha' | 'vits';
export interface ModelConfig {
id: string;
type: ModelType;
name: string;
language: string;
gender: string;
sampleRate: number;
files: {
model: string;
tokens: string;
voices?: string;
vocoder?: string;
dataDir?: string;
};
size: number;
}
export interface ModelFiles {
model: ArrayBuffer;
tokens: ArrayBuffer;
voices?: ArrayBuffer;
vocoder?: ArrayBuffer;
dataDir?: ArrayBuffer;
}
export interface LoadedModel {
config: ModelConfig;
handle: number;
loaded: boolean;
lastUsed: number;
}
export interface EnhancedWasmOptions {
enableMultiModel?: boolean;
maxCachedModels?: number;
}
declare global {
interface Window {
SherpaOnnx?: any;
}
}
/**
* Extended options for SherpaOnnxWasm TTS
*/
export interface SherpaOnnxWasmTTSOptions extends SpeakOptions {
format?: "wav";
}
/**
* Enhanced SherpaOnnx WebAssembly TTS Client
*
* Supports both legacy single-model mode and new multi-model mode.
* Maintains full backward compatibility with existing API.
*/
export declare class SherpaOnnxWasmTTSClient extends AbstractTTSClient {
private wasmModule;
private tts;
private wasmPath;
private wasmLoaded;
private enhancedOptions;
private modelRepository?;
private modelManager?;
private currentVoiceId?;
/**
* Create a new SherpaOnnx WebAssembly TTS client
* @param credentials Optional credentials object
* @param enhancedOptions Optional enhanced options for multi-model support
*/
constructor(credentials?: TTSCredentials, enhancedOptions?: EnhancedWasmOptions);
/**
* Check if the credentials are valid
* @returns Promise resolving to true if credentials are valid
*/
checkCredentials(): Promise<boolean>;
/**
* Get available voices
* @returns Promise resolving to an array of unified voice objects
*/
protected _getVoices(): Promise<UnifiedVoice[]>;
/**
* Initialize the WebAssembly module
* @param wasmUrl URL to the WebAssembly file
* @returns Promise resolving when the module is initialized
*/
initializeWasm(wasmUrl: string): Promise<void>;
/**
* Synthesize text to speech and return the audio as a byte array
* @param text Text to synthesize
* @param options Options for synthesis
* @returns Promise resolving to a byte array of audio data
*/
synthToBytes(text: string, _options?: SpeakOptions): Promise<Uint8Array>;
/**
* Convert audio samples to the requested format
* @param samples Float32Array of audio samples
* @returns Uint8Array of audio data in the requested format
*/
private _convertAudioFormat;
/**
* Mock implementation for synthToBytes
* @returns Promise resolving to a byte array of audio data
*/
private _mockSynthToBytes;
/**
* Synthesize text to speech and stream the audio
* @param text Text to synthesize
* @param onAudioBuffer Callback for audio buffers
* @param onStart Callback for when synthesis starts
* @param onEnd Callback for when synthesis ends
* @param onWord Callback for word boundary events
* @param options Options for synthesis
* @returns Promise resolving when synthesis is complete
*/
synthToStream(text: string, onAudioBuffer: (audioBuffer: Uint8Array) => void, onStart?: () => void, onEnd?: () => void, onWord?: WordBoundaryCallback, options?: SpeakOptions): Promise<void>;
/**
* Synthesize text to speech and save to a file
* @param text Text to synthesize
* @param filename Filename to save as
* @param format Audio format (mp3 or wav)
* @param options Options for synthesis
* @returns Promise resolving when synthesis is complete
*/
synthToFile(text: string, filename: string, format?: "wav", // Override base class to only allow 'wav'
options?: SherpaOnnxWasmTTSOptions): Promise<void>;
/**
* Get a property value
* @param property Property name
* @returns Property value
*/
getProperty(property: string): any;
/**
* Set a property value
* @param property Property name
* @param value Property value
*/
setProperty(property: string, value: any): void;
/**
* Set the voice to use for synthesis
* Enhanced with multi-model support while maintaining backward compatibility
* @param voiceId Voice ID to use
*/
setVoice(voiceId: string): Promise<void>;
/**
* Clean up resources
* Enhanced to handle multi-model cleanup
*/
dispose(): void;
/**
* Synthesize text to a byte stream
* @param text Text to synthesize
* @param options Options for synthesis
* @returns Promise resolving to an object containing the audio stream and an empty word boundaries array
*/
synthToBytestream(text: string, options?: SpeakOptions): Promise<{
audioStream: ReadableStream<Uint8Array>;
wordBoundaries: Array<{
text: string;
offset: number;
duration: number;
}>;
}>;
}