kitten-tts-webgpu

Version:

Run Kitten TTS (80M) locally in the browser via WebGPU. One function call: textToSpeech('Hello!') → WAV blob.

56 lines (55 loc) • 1.8 kB

TypeScript

/** * ONNX weight parser for Kitten TTS. * Loads weights from the ONNX protobuf file and dequantizes them for WebGPU. */ interface OnnxTensor { name: string; dims: number[]; dataType: number; rawData: Uint8Array; } /** * Minimal ONNX protobuf parser. * Only parses TensorProto initializers from the model graph. * Avoids pulling in a full protobuf library. */ export declare class OnnxParser { private buffer; private view; constructor(buffer: ArrayBuffer); /** Parse all initializer tensors from the ONNX model. */ parseInitializers(): Map<string, OnnxTensor>; private parseTensorProto; private readTag; private readVarint; private skipField; private findField; } /** * Dequantize INT8 weights to float32. * ONNX uses: float_val = (int8_val - zero_point) * scale */ export declare function dequantizeInt8(quantized: Int8Array, scale: Float32Array, zeroPoint: Int8Array | null, shape: number[]): Float32Array; /** * Dequantize UINT8 weights to float32. */ export declare function dequantizeUint8(quantized: Uint8Array, scale: Float32Array, zeroPoint: Uint8Array | null, shape: number[]): Float32Array; /** * Convert float16 to float32. */ export declare function float16ToFloat32(f16: Uint16Array): Float32Array; /** * Parse NPZ file (NumPy compressed archive) for voice embeddings. * NPZ is just a ZIP file containing .npy files. */ export declare function parseNpz(buffer: ArrayBuffer): Promise<Map<string, { shape: number[]; data: Float32Array; }>>; /** Parse a .npy file, handling float32, float16, and int64 dtypes. Always returns Float32Array. */ export declare function parseNpyGeneric(buffer: ArrayBuffer, byteOffset?: number): { shape: number[]; data: Float32Array; dtype: string; }; export {};