UNPKG

@ericedouard/vad-node-realtime

Version:

Powerful, user-friendly realtime voice activity detector (VAD) for node

165 lines 6.11 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.RealTimeVAD = exports.getDefaultRealTimeVADOptions = exports.DEFAULT_MODEL = void 0; const frame_processor_1 = require("./common/frame-processor"); const messages_1 = require("./common/messages"); const models_1 = require("./common/models"); const resampler_1 = require("./common/resampler"); exports.DEFAULT_MODEL = "v5"; /** * Build default options based on chosen model */ function getDefaultRealTimeVADOptions(model = exports.DEFAULT_MODEL) { const frameOpts = model === "v5" ? frame_processor_1.defaultV5FrameProcessorOptions : frame_processor_1.defaultLegacyFrameProcessorOptions; return { ...frameOpts, sampleRate: 16000, onFrameProcessed: () => { }, onVADMisfire: () => { /* no-op */ }, onSpeechStart: () => { /* no-op */ }, onSpeechRealStart: () => { /* no-op */ }, onSpeechEnd: () => { /* no-op */ }, ortConfig: undefined, model, }; } exports.getDefaultRealTimeVADOptions = getDefaultRealTimeVADOptions; /** * RealTimeVAD processes raw audio buffers, frames, and emits events */ class RealTimeVAD { /** * Construct a new instance with provided options and loaded model */ constructor(options, modelInstance) { this.options = options; this.buffer = new Float32Array(0); this.active = false; this.resampler = null; /** Handle events emitted by the frame processor */ this.handleEvent = (ev) => { switch (ev.msg) { case messages_1.Message.FrameProcessed: this.options.onFrameProcessed(ev.probs, ev.frame); break; case messages_1.Message.SpeechStart: this.options.onSpeechStart(); break; case messages_1.Message.SpeechRealStart: this.options.onSpeechRealStart(); break; case messages_1.Message.VADMisfire: this.options.onVADMisfire(); break; case messages_1.Message.SpeechEnd: this.options.onSpeechEnd(ev.audio); break; } }; this.modelInstance = modelInstance; this.frameSize = options.frameSamples; this.frameProcessor = new frame_processor_1.FrameProcessor(modelInstance.process, modelInstance.reset_state, { frameSamples: options.frameSamples, positiveSpeechThreshold: options.positiveSpeechThreshold, negativeSpeechThreshold: options.negativeSpeechThreshold, redemptionFrames: options.redemptionFrames, preSpeechPadFrames: options.preSpeechPadFrames, minSpeechFrames: options.minSpeechFrames, submitUserSpeechOnPause: options.submitUserSpeechOnPause, }); if (options.sampleRate !== 16000) { this.resampler = new resampler_1.Resampler({ nativeSampleRate: options.sampleRate, targetSampleRate: 16000, targetFrameSize: this.frameSize, }); } } /** * Create and initialize a RealTimeVAD instance */ static async new(ort, modelFetcher, opts = {}) { const fullOpts = { ...getDefaultRealTimeVADOptions(opts.model), ...opts, }; (0, frame_processor_1.validateOptions)(fullOpts); if (fullOpts.ortConfig) fullOpts.ortConfig(ort); const modelVersion = fullOpts.model || exports.DEFAULT_MODEL; const factory = modelVersion === "v5" ? models_1.SileroV5.new : models_1.SileroLegacy.new; const modelInstance = await factory(ort, modelFetcher); return new RealTimeVAD(fullOpts, modelInstance); } /** Start processing incoming frames */ start() { this.active = true; this.frameProcessor.resume(); } /** Pause processing; may emit end-segment on pause */ pause() { this.active = false; this.frameProcessor.pause(this.handleEvent); } /** Feed raw audio (any sample rate) into the VAD */ async processAudio(audioData) { if (!this.active) return; let data = audioData; if (this.resampler) { const chunks = []; for await (const frame of this.resampler.stream(audioData)) { chunks.push(frame); } data = new Float32Array(chunks.reduce((sum, c) => sum + c.length, 0)); let off = 0; for (const c of chunks) { data.set(c, off); off += c.length; } } // append to internal buffer const tmp = new Float32Array(this.buffer.length + data.length); tmp.set(this.buffer); tmp.set(data, this.buffer.length); this.buffer = tmp; // process complete frames while (this.buffer.length >= this.frameSize) { const frame = this.buffer.subarray(0, this.frameSize); this.buffer = this.buffer.subarray(this.frameSize); await this.frameProcessor.process(frame, this.handleEvent); } } /** Flush any remaining audio and end segment */ async flush() { if (this.buffer.length > 0 && this.buffer.length < this.frameSize) { const pad = new Float32Array(this.frameSize); pad.set(this.buffer); await this.frameProcessor.process(pad, this.handleEvent); } this.frameProcessor.endSegment(this.handleEvent); this.buffer = new Float32Array(0); } /** Reset internal state */ reset() { this.buffer = new Float32Array(0); this.modelInstance.reset_state(); } /** Clean up resources */ destroy() { this.pause(); this.reset(); } } exports.RealTimeVAD = RealTimeVAD; //# sourceMappingURL=real-time-vad.js.map