@jaehyun-ko/speaker-verification
Version:
Real-time speaker verification in the browser using NeXt-TDNN models
113 lines (112 loc) • 4.08 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.FFT = void 0;
exports.nextPowerOf2 = nextPowerOf2;
exports.normalizeAudio = normalizeAudio;
exports.resampleAudio = resampleAudio;
class FFT {
constructor(size) {
this.size = size;
const log2Size = Math.log2(size);
if (log2Size !== Math.floor(log2Size)) {
throw new Error('FFT size must be a power of 2');
}
// Precompute tables
this.cosTable = new Float32Array(size / 2);
this.sinTable = new Float32Array(size / 2);
for (let i = 0; i < size / 2; i++) {
const angle = 2 * Math.PI * i / size;
this.cosTable[i] = Math.cos(angle);
this.sinTable[i] = Math.sin(angle);
}
// Bit reversal table
this.reverseTable = new Uint32Array(size);
const shift = 32 - log2Size;
for (let i = 0; i < size; i++) {
this.reverseTable[i] = (this.reverseBits(i) >>> shift);
}
}
reverseBits(x) {
x = ((x & 0x55555555) << 1) | ((x & 0xAAAAAAAA) >>> 1);
x = ((x & 0x33333333) << 2) | ((x & 0xCCCCCCCC) >>> 2);
x = ((x & 0x0F0F0F0F) << 4) | ((x & 0xF0F0F0F0) >>> 4);
x = ((x & 0x00FF00FF) << 8) | ((x & 0xFF00FF00) >>> 8);
x = ((x & 0x0000FFFF) << 16) | ((x & 0xFFFF0000) >>> 16);
return x;
}
forward(real, imag) {
const n = this.size;
// Bit reversal
for (let i = 0; i < n; i++) {
const j = this.reverseTable[i];
if (j > i) {
[real[i], real[j]] = [real[j], real[i]];
[imag[i], imag[j]] = [imag[j], imag[i]];
}
}
// Cooley-Tukey FFT
for (let size = 2; size <= n; size *= 2) {
const halfSize = size / 2;
const tableStep = n / size;
for (let i = 0; i < n; i += size) {
for (let j = i, k = 0; j < i + halfSize; j++, k += tableStep) {
const l = j + halfSize;
const cos = this.cosTable[k];
const sin = this.sinTable[k];
const tReal = real[l] * cos - imag[l] * sin;
const tImag = real[l] * sin + imag[l] * cos;
real[l] = real[j] - tReal;
imag[l] = imag[j] - tImag;
real[j] += tReal;
imag[j] += tImag;
}
}
}
}
// Compute magnitude spectrum
getMagnitudeSpectrum(real, imag) {
const halfSize = Math.floor(this.size / 2) + 1;
const magnitude = new Float32Array(halfSize);
for (let i = 0; i < halfSize; i++) {
magnitude[i] = Math.sqrt(real[i] * real[i] + imag[i] * imag[i]);
}
return magnitude;
}
}
exports.FFT = FFT;
// Utility functions for audio processing
function nextPowerOf2(n) {
return Math.pow(2, Math.ceil(Math.log2(n)));
}
function normalizeAudio(audio) {
const maxValue = Math.max(...audio.map(Math.abs));
if (maxValue === 0)
return audio;
const normalized = new Float32Array(audio.length);
const scale = 1.0 / maxValue;
for (let i = 0; i < audio.length; i++) {
normalized[i] = audio[i] * scale;
}
return normalized;
}
function resampleAudio(audio, fromSampleRate, toSampleRate) {
if (fromSampleRate === toSampleRate)
return audio;
const ratio = toSampleRate / fromSampleRate;
const newLength = Math.round(audio.length * ratio);
const resampled = new Float32Array(newLength);
// Simple linear interpolation
for (let i = 0; i < newLength; i++) {
const srcIndex = i / ratio;
const srcIndexInt = Math.floor(srcIndex);
const fraction = srcIndex - srcIndexInt;
if (srcIndexInt + 1 < audio.length) {
resampled[i] = audio[srcIndexInt] * (1 - fraction) +
audio[srcIndexInt + 1] * fraction;
}
else {
resampled[i] = audio[srcIndexInt];
}
}
return resampled;
}