kitten-tts-webgpu
Version:
Run Kitten TTS (80M) locally in the browser via WebGPU. One function call: textToSpeech('Hello!') → WAV blob.
43 lines (42 loc) • 1.72 kB
TypeScript
/**
* Browser-side phonemization for Kitten TTS.
*
* Primary: espeak-ng via WASM (phonemizer.js by xenova) — matches official kittentts package.
* Fallback: 234K-word espeak-ng dictionary + rule engine for unknown words.
*/
/**
* Convert pre-phonemized text (IPA string) to input_ids.
*
* The phonemizer splits tokens with regex: /\w+|[^\w\s]/g
* then joins with spaces, looks up each char in symbol table,
* and wraps with start/end tokens.
*/
export declare function phonemesToInputIds(phonemes: string): number[];
/**
* Convert English text to IPA phonemes using espeak-ng WASM.
* Preserves punctuation to match official kittentts (which uses
* phonemizer.backend.EspeakBackend with preserve_punctuation=True).
*
* espeak-ng strips punctuation, so we:
* 1. Extract punctuation + positions from the original text
* 2. Phonemize the stripped text
* 3. Re-insert punctuation at their original word boundaries
*/
export declare function textToPhonemesEspeak(text: string): Promise<string>;
/**
* Convert English text to IPA phonemes using the large dictionary + rule engine.
* Preserves punctuation (matching espeak preserve_punctuation=True).
*
* espeak-ng with preserve_punctuation=True attaches punctuation directly to
* the adjacent phoneme word (no space before sentence-final punctuation).
*/
export declare function textToPhonemesDictRules(text: string): Promise<string>;
/**
* Convert English text to input_ids.
* Primary: espeak-ng WASM (exact match with official kittentts).
* Fallback: large dictionary + rule engine (for Safari where WASM fails).
*/
export declare function textToInputIds(text: string): Promise<{
ids: number[];
method: 'wasm' | 'dictionary';
}>;