kitten-tts-webgpu
Version:
Run Kitten TTS (80M) locally in the browser via WebGPU. One function call: textToSpeech('Hello!') → WAV blob.
56 lines (55 loc) • 1.8 kB
TypeScript
/**
* ONNX weight parser for Kitten TTS.
* Loads weights from the ONNX protobuf file and dequantizes them for WebGPU.
*/
interface OnnxTensor {
name: string;
dims: number[];
dataType: number;
rawData: Uint8Array;
}
/**
* Minimal ONNX protobuf parser.
* Only parses TensorProto initializers from the model graph.
* Avoids pulling in a full protobuf library.
*/
export declare class OnnxParser {
private buffer;
private view;
constructor(buffer: ArrayBuffer);
/** Parse all initializer tensors from the ONNX model. */
parseInitializers(): Map<string, OnnxTensor>;
private parseTensorProto;
private readTag;
private readVarint;
private skipField;
private findField;
}
/**
* Dequantize INT8 weights to float32.
* ONNX uses: float_val = (int8_val - zero_point) * scale
*/
export declare function dequantizeInt8(quantized: Int8Array, scale: Float32Array, zeroPoint: Int8Array | null, shape: number[]): Float32Array;
/**
* Dequantize UINT8 weights to float32.
*/
export declare function dequantizeUint8(quantized: Uint8Array, scale: Float32Array, zeroPoint: Uint8Array | null, shape: number[]): Float32Array;
/**
* Convert float16 to float32.
*/
export declare function float16ToFloat32(f16: Uint16Array): Float32Array;
/**
* Parse NPZ file (NumPy compressed archive) for voice embeddings.
* NPZ is just a ZIP file containing .npy files.
*/
export declare function parseNpz(buffer: ArrayBuffer): Promise<Map<string, {
shape: number[];
data: Float32Array;
}>>;
/** Parse a .npy file, handling float32, float16, and int64 dtypes. Always returns Float32Array. */
export declare function parseNpyGeneric(buffer: ArrayBuffer, byteOffset?: number): {
shape: number[];
data: Float32Array;
dtype: string;
};
export {};