@steelbrain/media-speech-detection-web

Version:

Production-ready speech detection using Silero VAD ONNX model for web browsers

github.com/steelbrain/media-toolkit/tree/main/projects/media-speech-detection-web

76 lines • 2.69 kB

text/typescript

/** * Simple VAD event handlers */ export interface VADEventHandlers { onSpeechStart?: () => void; onSpeechEnd?: (speechAudio: Float32Array) => void; onMisfire?: () => void; onError?: (error: Error) => void; onDebugLog?: (message: string) => void; } /** * Simple VAD configuration options */ export interface VADConfig { /** Speech detection threshold (0-1). Default: 0.5 */ threshold?: number; /** Minimum speech duration in milliseconds. Default: 160ms */ minSpeechDurationMs?: number; /** Grace period in milliseconds before confirming speech end. Default: 400ms */ redemptionDurationMs?: number; /** Lookback buffer duration in milliseconds for smooth speech start. Default: 384ms */ lookBackDurationMs?: number; } /** * Combined options for the simple VAD interface */ export interface VADOptions extends VADEventHandlers, VADConfig { /** If true, don't emit speech chunks downstream. Only trigger callbacks. Default: false */ noEmit?: boolean; } /** * Preloads the Silero VAD ONNX model by fetching it into browser cache. * * This function fetches the VAD model file to ensure it's cached by the browser, * eliminating the network delay when speech detection is first used. The browser's * HTTP cache will handle storing and serving the model for subsequent requests. * * @returns Promise that resolves when the model file has been fetched and cached * @throws Error if the model file cannot be fetched * * @example * ```typescript * // Preload during app initialization * await preloadModel(); * * // Later, speech filters will load faster from browser cache * const speechTransform = speechFilter({ * onSpeechStart: () => console.log('🎤 Speech started') * }); * ``` */ export declare function preloadModel(): Promise<void>; /** * Speech filter transform stream - filters audio to only output speech chunks * * Usage: * ```typescript * const speechTransform = speechFilter({ * onSpeechStart: () => console.log('🎤 Speech started'), * onSpeechEnd: () => console.log('🔇 Speech ended') * }); * * audioStream.pipeThrough(speechTransform).pipeTo(speechProcessor); * * // .tee() pattern for events-only processing * const [liveStream, eventsStream] = audioStream.tee(); * liveStream.pipeTo(speechProcessor); * eventsStream.pipeThrough(speechFilter({ * noEmit: true, // Don't emit chunks * onSpeechStart: () => showRecordingIndicator(), * onSpeechEnd: () => hideRecordingIndicator() * })); * ``` */ export declare function speechFilter(options?: VADOptions): TransformStream<Float32Array, Float32Array>; //# sourceMappingURL=index.d.ts.map