kitten-tts-webgpu
Version:
Run Kitten TTS (80M) locally in the browser via WebGPU. One function call: textToSpeech('Hello!') → WAV blob.
60 lines (59 loc) • 2.05 kB
TypeScript
/**
* kitten-tts-webgpu — Run Kitten TTS (80M) in the browser via WebGPU.
*
* One function is all you need:
*
* ```typescript
* import { textToSpeech } from 'kitten-tts-webgpu';
*
* const wav = await textToSpeech('Hello, world!');
* const audio = new Audio(URL.createObjectURL(wav));
* audio.play();
* ```
*/
/** Model size variant. Default: `'nano'`. */
export type ModelSize = 'mini' | 'micro' | 'nano';
/** Options for the `textToSpeech` function. */
export interface TextToSpeechOptions {
/** Voice name. Default: `'Bella'`. */
voice?: string;
/** Speaking speed multiplier. Default: `1.0`. Range: 0.5 – 2.0. */
speed?: number;
/** Model size variant. Default: `'nano'` (15M, fastest, 24 MB). `'micro'` (40M) and `'mini'` (80M, best quality) are larger. */
model?: ModelSize;
/** Progress callback, called with stage descriptions like `'Loading model…'`. */
onProgress?: (stage: string) => void;
}
/**
* Convert text to a WAV audio blob using Kitten TTS.
*
* On the first call, this downloads the model (~75 MB) and initializes WebGPU.
* Subsequent calls reuse the loaded model and are fast (~1 second).
*
* @param text - English text to synthesize (up to ~500 characters recommended)
* @param options - Optional voice, speed, and progress callback
* @returns A WAV audio Blob (16-bit PCM, 24 kHz, mono)
*
* @example
* ```typescript
* import { textToSpeech } from 'kitten-tts-webgpu';
*
* // Simple usage
* const wav = await textToSpeech('Hello, world!');
*
* // Play it
* const audio = new Audio(URL.createObjectURL(wav));
* audio.play();
*
* // With options
* const wav2 = await textToSpeech('Slow and steady wins the race.', {
* voice: 'Bella',
* speed: 0.8,
* onProgress: (stage) => console.log(stage),
* });
* ```
*/
export declare function textToSpeech(text: string, options?: TextToSpeechOptions): Promise<Blob>;
export { KittenTTSEngine } from './engine.js';
export { textToInputIds } from './phonemizer.js';
export { float32ToWav } from './wav.js';