kitten-tts-webgpu

Version:

Run Kitten TTS (80M) locally in the browser via WebGPU. One function call: textToSpeech('Hello!') → WAV blob.

60 lines (59 loc) • 2.05 kB

TypeScript

/** * kitten-tts-webgpu — Run Kitten TTS (80M) in the browser via WebGPU. * * One function is all you need: * * ```typescript * import { textToSpeech } from 'kitten-tts-webgpu'; * * const wav = await textToSpeech('Hello, world!'); * const audio = new Audio(URL.createObjectURL(wav)); * audio.play(); * ``` */ /** Model size variant. Default: `'nano'`. */ export type ModelSize = 'mini' | 'micro' | 'nano'; /** Options for the `textToSpeech` function. */ export interface TextToSpeechOptions { /** Voice name. Default: `'Bella'`. */ voice?: string; /** Speaking speed multiplier. Default: `1.0`. Range: 0.5 – 2.0. */ speed?: number; /** Model size variant. Default: `'nano'` (15M, fastest, 24 MB). `'micro'` (40M) and `'mini'` (80M, best quality) are larger. */ model?: ModelSize; /** Progress callback, called with stage descriptions like `'Loading model…'`. */ onProgress?: (stage: string) => void; } /** * Convert text to a WAV audio blob using Kitten TTS. * * On the first call, this downloads the model (~75 MB) and initializes WebGPU. * Subsequent calls reuse the loaded model and are fast (~1 second). * * @param text - English text to synthesize (up to ~500 characters recommended) * @param options - Optional voice, speed, and progress callback * @returns A WAV audio Blob (16-bit PCM, 24 kHz, mono) * * @example * ```typescript * import { textToSpeech } from 'kitten-tts-webgpu'; * * // Simple usage * const wav = await textToSpeech('Hello, world!'); * * // Play it * const audio = new Audio(URL.createObjectURL(wav)); * audio.play(); * * // With options * const wav2 = await textToSpeech('Slow and steady wins the race.', { * voice: 'Bella', * speed: 0.8, * onProgress: (stage) => console.log(stage), * }); * ``` */ export declare function textToSpeech(text: string, options?: TextToSpeechOptions): Promise<Blob>; export { KittenTTSEngine } from './engine.js'; export { textToInputIds } from './phonemizer.js'; export { float32ToWav } from './wav.js';