@steelbrain/media-speech-detection-web
Version:
Production-ready speech detection using Silero VAD ONNX model for web browsers
76 lines • 2.69 kB
TypeScript
/**
* Simple VAD event handlers
*/
export interface VADEventHandlers {
onSpeechStart?: () => void;
onSpeechEnd?: (speechAudio: Float32Array) => void;
onMisfire?: () => void;
onError?: (error: Error) => void;
onDebugLog?: (message: string) => void;
}
/**
* Simple VAD configuration options
*/
export interface VADConfig {
/** Speech detection threshold (0-1). Default: 0.5 */
threshold?: number;
/** Minimum speech duration in milliseconds. Default: 160ms */
minSpeechDurationMs?: number;
/** Grace period in milliseconds before confirming speech end. Default: 400ms */
redemptionDurationMs?: number;
/** Lookback buffer duration in milliseconds for smooth speech start. Default: 384ms */
lookBackDurationMs?: number;
}
/**
* Combined options for the simple VAD interface
*/
export interface VADOptions extends VADEventHandlers, VADConfig {
/** If true, don't emit speech chunks downstream. Only trigger callbacks. Default: false */
noEmit?: boolean;
}
/**
* Preloads the Silero VAD ONNX model by fetching it into browser cache.
*
* This function fetches the VAD model file to ensure it's cached by the browser,
* eliminating the network delay when speech detection is first used. The browser's
* HTTP cache will handle storing and serving the model for subsequent requests.
*
* @returns Promise that resolves when the model file has been fetched and cached
* @throws Error if the model file cannot be fetched
*
* @example
* ```typescript
* // Preload during app initialization
* await preloadModel();
*
* // Later, speech filters will load faster from browser cache
* const speechTransform = speechFilter({
* onSpeechStart: () => console.log('🎤 Speech started')
* });
* ```
*/
export declare function preloadModel(): Promise<void>;
/**
* Speech filter transform stream - filters audio to only output speech chunks
*
* Usage:
* ```typescript
* const speechTransform = speechFilter({
* onSpeechStart: () => console.log('🎤 Speech started'),
* onSpeechEnd: () => console.log('🔇 Speech ended')
* });
*
* audioStream.pipeThrough(speechTransform).pipeTo(speechProcessor);
*
* // .tee() pattern for events-only processing
* const [liveStream, eventsStream] = audioStream.tee();
* liveStream.pipeTo(speechProcessor);
* eventsStream.pipeThrough(speechFilter({
* noEmit: true, // Don't emit chunks
* onSpeechStart: () => showRecordingIndicator(),
* onSpeechEnd: () => hideRecordingIndicator()
* }));
* ```
*/
export declare function speechFilter(options?: VADOptions): TransformStream<Float32Array, Float32Array>;
//# sourceMappingURL=index.d.ts.map