UNPKG

kitten-tts-webgpu

Version:

Run Kitten TTS (80M) locally in the browser via WebGPU. One function call: textToSpeech('Hello!') → WAV blob.

43 lines (42 loc) 1.72 kB
/** * Browser-side phonemization for Kitten TTS. * * Primary: espeak-ng via WASM (phonemizer.js by xenova) — matches official kittentts package. * Fallback: 234K-word espeak-ng dictionary + rule engine for unknown words. */ /** * Convert pre-phonemized text (IPA string) to input_ids. * * The phonemizer splits tokens with regex: /\w+|[^\w\s]/g * then joins with spaces, looks up each char in symbol table, * and wraps with start/end tokens. */ export declare function phonemesToInputIds(phonemes: string): number[]; /** * Convert English text to IPA phonemes using espeak-ng WASM. * Preserves punctuation to match official kittentts (which uses * phonemizer.backend.EspeakBackend with preserve_punctuation=True). * * espeak-ng strips punctuation, so we: * 1. Extract punctuation + positions from the original text * 2. Phonemize the stripped text * 3. Re-insert punctuation at their original word boundaries */ export declare function textToPhonemesEspeak(text: string): Promise<string>; /** * Convert English text to IPA phonemes using the large dictionary + rule engine. * Preserves punctuation (matching espeak preserve_punctuation=True). * * espeak-ng with preserve_punctuation=True attaches punctuation directly to * the adjacent phoneme word (no space before sentence-final punctuation). */ export declare function textToPhonemesDictRules(text: string): Promise<string>; /** * Convert English text to input_ids. * Primary: espeak-ng WASM (exact match with official kittentts). * Fallback: large dictionary + rule engine (for Safari where WASM fails). */ export declare function textToInputIds(text: string): Promise<{ ids: number[]; method: 'wasm' | 'dictionary'; }>;