UNPKG

@jaehyun-ko/speaker-verification

Version:

Real-time speaker verification in the browser using NeXt-TDNN models

113 lines (112 loc) 4.08 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.FFT = void 0; exports.nextPowerOf2 = nextPowerOf2; exports.normalizeAudio = normalizeAudio; exports.resampleAudio = resampleAudio; class FFT { constructor(size) { this.size = size; const log2Size = Math.log2(size); if (log2Size !== Math.floor(log2Size)) { throw new Error('FFT size must be a power of 2'); } // Precompute tables this.cosTable = new Float32Array(size / 2); this.sinTable = new Float32Array(size / 2); for (let i = 0; i < size / 2; i++) { const angle = 2 * Math.PI * i / size; this.cosTable[i] = Math.cos(angle); this.sinTable[i] = Math.sin(angle); } // Bit reversal table this.reverseTable = new Uint32Array(size); const shift = 32 - log2Size; for (let i = 0; i < size; i++) { this.reverseTable[i] = (this.reverseBits(i) >>> shift); } } reverseBits(x) { x = ((x & 0x55555555) << 1) | ((x & 0xAAAAAAAA) >>> 1); x = ((x & 0x33333333) << 2) | ((x & 0xCCCCCCCC) >>> 2); x = ((x & 0x0F0F0F0F) << 4) | ((x & 0xF0F0F0F0) >>> 4); x = ((x & 0x00FF00FF) << 8) | ((x & 0xFF00FF00) >>> 8); x = ((x & 0x0000FFFF) << 16) | ((x & 0xFFFF0000) >>> 16); return x; } forward(real, imag) { const n = this.size; // Bit reversal for (let i = 0; i < n; i++) { const j = this.reverseTable[i]; if (j > i) { [real[i], real[j]] = [real[j], real[i]]; [imag[i], imag[j]] = [imag[j], imag[i]]; } } // Cooley-Tukey FFT for (let size = 2; size <= n; size *= 2) { const halfSize = size / 2; const tableStep = n / size; for (let i = 0; i < n; i += size) { for (let j = i, k = 0; j < i + halfSize; j++, k += tableStep) { const l = j + halfSize; const cos = this.cosTable[k]; const sin = this.sinTable[k]; const tReal = real[l] * cos - imag[l] * sin; const tImag = real[l] * sin + imag[l] * cos; real[l] = real[j] - tReal; imag[l] = imag[j] - tImag; real[j] += tReal; imag[j] += tImag; } } } } // Compute magnitude spectrum getMagnitudeSpectrum(real, imag) { const halfSize = Math.floor(this.size / 2) + 1; const magnitude = new Float32Array(halfSize); for (let i = 0; i < halfSize; i++) { magnitude[i] = Math.sqrt(real[i] * real[i] + imag[i] * imag[i]); } return magnitude; } } exports.FFT = FFT; // Utility functions for audio processing function nextPowerOf2(n) { return Math.pow(2, Math.ceil(Math.log2(n))); } function normalizeAudio(audio) { const maxValue = Math.max(...audio.map(Math.abs)); if (maxValue === 0) return audio; const normalized = new Float32Array(audio.length); const scale = 1.0 / maxValue; for (let i = 0; i < audio.length; i++) { normalized[i] = audio[i] * scale; } return normalized; } function resampleAudio(audio, fromSampleRate, toSampleRate) { if (fromSampleRate === toSampleRate) return audio; const ratio = toSampleRate / fromSampleRate; const newLength = Math.round(audio.length * ratio); const resampled = new Float32Array(newLength); // Simple linear interpolation for (let i = 0; i < newLength; i++) { const srcIndex = i / ratio; const srcIndexInt = Math.floor(srcIndex); const fraction = srcIndex - srcIndexInt; if (srcIndexInt + 1 < audio.length) { resampled[i] = audio[srcIndexInt] * (1 - fraction) + audio[srcIndexInt + 1] * fraction; } else { resampled[i] = audio[srcIndexInt]; } } return resampled; }