@ricky0123/vad-web
Version:
Powerful, user-friendly voice activity detector (VAD) for the browser
77 lines • 3.46 kB
TypeScript
import { Message } from "./messages";
import { SpeechProbabilities } from "./models";
export interface FrameProcessorOptions {
/** Threshold over which values returned by the Silero VAD model will be considered as positively indicating speech.
* The Silero VAD model is run on each frame. This number should be between 0 and 1.
*/
positiveSpeechThreshold: number;
/** Threshold under which values returned by the Silero VAD model will be considered as indicating an absence of speech.
* Note that the creators of the Silero VAD have historically set this number at 0.15 less than `positiveSpeechThreshold`.
*/
negativeSpeechThreshold: number;
/** After a VAD value under the `negativeSpeechThreshold` is observed, the algorithm will wait `redemptionMs` ms
* before running `onSpeechEnd`. If the model returns a value over `positiveSpeechThreshold` during this grace period, then
* the algorithm will consider the previously-detected "speech end" as having been a false negative.
*/
redemptionMs: number;
/** Number of ms to prepend to the audio segment that will be passed to `onSpeechEnd`. */
preSpeechPadMs: number;
/** If an audio segment is detected as a speech segment according to initial algorithm but it is shorter than `minSpeechMs`,
* it will be discarded and `onVADMisfire` will be run instead of `onSpeechEnd`.
*/
minSpeechMs: number;
/**
* If true, when the user pauses the VAD, it may trigger `onSpeechEnd`.
*/
submitUserSpeechOnPause: boolean;
}
export declare const defaultFrameProcessorOptions: FrameProcessorOptions;
export declare function validateOptions(options: FrameProcessorOptions): void;
export interface FrameProcessorInterface {
resume: () => void;
process: (arr: Float32Array, handleEvent: (event: FrameProcessorEvent) => void) => Promise<void>;
endSegment: (handleEvent: (event: FrameProcessorEvent) => void) => {
msg?: Message;
audio?: Float32Array;
};
}
export declare class FrameProcessor implements FrameProcessorInterface {
modelProcessFunc: (frame: Float32Array) => Promise<SpeechProbabilities>;
modelResetFunc: () => void;
options: FrameProcessorOptions;
msPerFrame: number;
redemptionFrames: number;
preSpeechPadFrames: number;
minSpeechFrames: number;
speaking: boolean;
audioBuffer: {
frame: Float32Array;
isSpeech: boolean;
}[];
redemptionCounter: number;
speechFrameCount: number;
active: boolean;
speechRealStartFired: boolean;
constructor(modelProcessFunc: (frame: Float32Array) => Promise<SpeechProbabilities>, modelResetFunc: () => void, options: FrameProcessorOptions, msPerFrame: number);
setOptions: (update: Partial<FrameProcessorOptions>) => void;
reset: () => void;
pause: (handleEvent: (event: FrameProcessorEvent) => void) => void;
resume: () => void;
endSegment: (handleEvent: (event: FrameProcessorEvent) => void) => {};
process: (frame: Float32Array, handleEvent: (event: FrameProcessorEvent) => void) => Promise<void>;
}
export type FrameProcessorEvent = {
msg: Message.VADMisfire;
} | {
msg: Message.SpeechStart;
} | {
msg: Message.SpeechRealStart;
} | {
msg: Message.SpeechEnd;
audio: Float32Array;
} | {
msg: Message.FrameProcessed;
probs: SpeechProbabilities;
frame: Float32Array;
};
//# sourceMappingURL=frame-processor.d.ts.map