UNPKG

js-tts-wrapper

Version:

A JavaScript/TypeScript library that provides a unified API for working with multiple cloud-based Text-to-Speech (TTS) services

171 lines (170 loc) 5.69 kB
/** * SherpaOnnx WebAssembly TTS Client * * Enhanced version with multi-model support for browser environments. * Supports dynamic loading of Kokoro, Matcha, and VITS models. * * BACKWARD COMPATIBILITY: Maintains full compatibility with existing API. * New multi-model features are opt-in via constructor options. */ import { AbstractTTSClient } from "../core/abstract-tts"; import type { SpeakOptions, TTSCredentials, UnifiedVoice, WordBoundaryCallback } from "../types"; export type ModelType = 'kokoro' | 'matcha' | 'vits'; export interface ModelConfig { id: string; type: ModelType; name: string; language: string; gender: string; sampleRate: number; files: { model: string; tokens: string; voices?: string; vocoder?: string; dataDir?: string; }; size: number; } export interface ModelFiles { model: ArrayBuffer; tokens: ArrayBuffer; voices?: ArrayBuffer; vocoder?: ArrayBuffer; dataDir?: ArrayBuffer; } export interface LoadedModel { config: ModelConfig; handle: number; loaded: boolean; lastUsed: number; } export interface EnhancedWasmOptions { enableMultiModel?: boolean; maxCachedModels?: number; } declare global { interface Window { SherpaOnnx?: any; } } /** * Extended options for SherpaOnnxWasm TTS */ export interface SherpaOnnxWasmTTSOptions extends SpeakOptions { format?: "wav"; } /** * Enhanced SherpaOnnx WebAssembly TTS Client * * Supports both legacy single-model mode and new multi-model mode. * Maintains full backward compatibility with existing API. */ export declare class SherpaOnnxWasmTTSClient extends AbstractTTSClient { private wasmModule; private tts; private wasmPath; private wasmLoaded; private enhancedOptions; private modelRepository?; private modelManager?; private currentVoiceId?; /** * Create a new SherpaOnnx WebAssembly TTS client * @param credentials Optional credentials object * @param enhancedOptions Optional enhanced options for multi-model support */ constructor(credentials?: TTSCredentials, enhancedOptions?: EnhancedWasmOptions); /** * Check if the credentials are valid * @returns Promise resolving to true if credentials are valid */ checkCredentials(): Promise<boolean>; /** * Get available voices * @returns Promise resolving to an array of unified voice objects */ protected _getVoices(): Promise<UnifiedVoice[]>; /** * Initialize the WebAssembly module * @param wasmUrl URL to the WebAssembly file * @returns Promise resolving when the module is initialized */ initializeWasm(wasmUrl: string): Promise<void>; /** * Synthesize text to speech and return the audio as a byte array * @param text Text to synthesize * @param options Options for synthesis * @returns Promise resolving to a byte array of audio data */ synthToBytes(text: string, _options?: SpeakOptions): Promise<Uint8Array>; /** * Convert audio samples to the requested format * @param samples Float32Array of audio samples * @returns Uint8Array of audio data in the requested format */ private _convertAudioFormat; /** * Mock implementation for synthToBytes * @returns Promise resolving to a byte array of audio data */ private _mockSynthToBytes; /** * Synthesize text to speech and stream the audio * @param text Text to synthesize * @param onAudioBuffer Callback for audio buffers * @param onStart Callback for when synthesis starts * @param onEnd Callback for when synthesis ends * @param onWord Callback for word boundary events * @param options Options for synthesis * @returns Promise resolving when synthesis is complete */ synthToStream(text: string, onAudioBuffer: (audioBuffer: Uint8Array) => void, onStart?: () => void, onEnd?: () => void, onWord?: WordBoundaryCallback, options?: SpeakOptions): Promise<void>; /** * Synthesize text to speech and save to a file * @param text Text to synthesize * @param filename Filename to save as * @param format Audio format (mp3 or wav) * @param options Options for synthesis * @returns Promise resolving when synthesis is complete */ synthToFile(text: string, filename: string, format?: "wav", // Override base class to only allow 'wav' options?: SherpaOnnxWasmTTSOptions): Promise<void>; /** * Get a property value * @param property Property name * @returns Property value */ getProperty(property: string): any; /** * Set a property value * @param property Property name * @param value Property value */ setProperty(property: string, value: any): void; /** * Set the voice to use for synthesis * Enhanced with multi-model support while maintaining backward compatibility * @param voiceId Voice ID to use */ setVoice(voiceId: string): Promise<void>; /** * Clean up resources * Enhanced to handle multi-model cleanup */ dispose(): void; /** * Synthesize text to a byte stream * @param text Text to synthesize * @param options Options for synthesis * @returns Promise resolving to an object containing the audio stream and an empty word boundaries array */ synthToBytestream(text: string, options?: SpeakOptions): Promise<{ audioStream: ReadableStream<Uint8Array>; wordBoundaries: Array<{ text: string; offset: number; duration: number; }>; }>; }