echogarden
Version:
An easy-to-use speech toolset. Includes tools for synthesis, recognition, alignment, speech translation, language detection, source separation and more.
57 lines • 2.59 kB
JavaScript
import { concatFloat32Arrays } from '../utilities/Utilities.js';
import { getOnnxSessionOptions } from '../utilities/OnnxUtilities.js';
export async function detectVoiceActivity(rawAudio, modelPath, frameDuration, executionProviders) {
if (rawAudio.sampleRate != 16000) {
throw new Error('Audio sample rate must be 16KHz');
}
const audioSamples = rawAudio.audioChannels[0];
const frameLength = Math.floor(16000 * (frameDuration / 1000));
const sileroVad = new SileroVAD(modelPath, executionProviders);
const frameProbabilities = [];
for (let position = 0; position < audioSamples.length; position += frameLength) {
let chunk = audioSamples.subarray(position, position + frameLength);
if (chunk.length < frameLength) {
chunk = concatFloat32Arrays([chunk, new Float32Array(frameLength - chunk.length)]);
}
const probability = await sileroVad.predictAudioFrame(chunk);
frameProbabilities.push(probability);
}
return frameProbabilities;
}
export class SileroVAD {
modelPath;
executionProviders;
session;
modelStateH;
modelStateC;
modelSampleRate;
constructor(modelPath, executionProviders) {
this.modelPath = modelPath;
this.executionProviders = executionProviders;
}
async predictAudioFrame(frame) {
await this.initializeIfNeeded();
const Onnx = await import('onnxruntime-node');
const inputTensor = new Onnx.Tensor('float32', frame, [1, frame.length]);
const inputs = { input: inputTensor, sr: this.modelSampleRate, h: this.modelStateH, c: this.modelStateC };
const results = await this.session.run(inputs);
const probability = results['output'].data[0];
this.modelStateH = results['hn'];
this.modelStateC = results['cn'];
return probability;
}
async initializeIfNeeded() {
if (this.session) {
return;
}
const Onnx = await import('onnxruntime-node');
const h = new Float32Array(2 * 1 * 64);
const c = new Float32Array(2 * 1 * 64);
this.modelStateH = new Onnx.Tensor('float32', h, [2, 1, 64]);
this.modelStateC = new Onnx.Tensor('float32', c, [2, 1, 64]);
this.modelSampleRate = new Onnx.Tensor('int64', new BigInt64Array([BigInt(16000)]), []);
const onnxSessionOptions = getOnnxSessionOptions({ executionProviders: this.executionProviders });
this.session = await Onnx.InferenceSession.create(this.modelPath, onnxSessionOptions);
}
}
//# sourceMappingURL=SileroVAD.js.map