@qgustavor/stream-audio-fingerprint
Version:
Audio landmark fingerprinting as a JavaScript module
258 lines (256 loc) • 8.35 kB
JavaScript
// src/lib/fft.ts
var FFTNayuki = class {
constructor(n) {
this.peakBand = 0;
this.peak = 0;
this.n = n;
this.levels = -1;
for (let i = 0; i < 32; i++) {
if (1 << i === n) {
this.levels = i;
}
}
if (this.levels === -1) {
throw Error("Length is not a power of 2");
}
this.cosTable = new Array(n / 2);
this.sinTable = new Array(n / 2);
for (let i = 0; i < n / 2; i++) {
this.cosTable[i] = Math.cos(2 * Math.PI * i / n);
this.sinTable[i] = Math.sin(2 * Math.PI * i / n);
}
this.spectrum = new Array(n / 4);
}
forward(real, imag) {
for (let i = 0; i < this.n; i++) {
const j = reverseBits(i, this.levels);
if (j > i) {
let temp = real[i];
real[i] = real[j];
real[j] = temp;
temp = imag[i];
imag[i] = imag[j];
imag[j] = temp;
}
}
for (let size = 2; size <= this.n; size *= 2) {
const halfsize = size / 2;
const tablestep = this.n / size;
for (let i = 0; i < this.n; i += size) {
for (let j = i, k = 0; j < i + halfsize; j++, k += tablestep) {
const l = j + halfsize;
const tpre = real[l] * this.cosTable[k] + imag[l] * this.sinTable[k];
const tpim = -real[l] * this.sinTable[k] + imag[l] * this.cosTable[k];
real[l] = real[j] - tpre;
imag[l] = imag[j] - tpim;
real[j] += tpre;
imag[j] += tpim;
}
}
}
this.calculateSpectrum(real, imag);
function reverseBits(x, bits) {
let y = 0;
for (let i = 0; i < bits; i++) {
y = y << 1 | x & 1;
x >>>= 1;
}
return y;
}
}
calculateSpectrum(real, imag) {
const bSi = 4 / this.n;
let mag;
for (let i = 0, N = this.n / 4; i < N; i++) {
mag = bSi * Math.sqrt(real[i] ** 2 + imag[i] ** 2);
if (mag > this.peak) {
this.peakBand = i;
this.peak = mag;
}
this.spectrum[i] = mag;
}
}
};
var fft_default = FFTNayuki;
// src/codegen_landmark.ts
var buildOptions = (options) => {
const verbose = options.verbose ?? false;
const samplingRate = options.samplingRate ?? 22050;
const bps = options.bps ?? 2;
const mnlm = options.mnlm ?? 5;
const mppp = options.mppp ?? 3;
const nfft = options.nfft ?? 512;
const step = options.step ?? nfft / 2;
const dt = options.dt ?? 1 / (samplingRate / step);
const hwin = options.hwin ?? Array(nfft).fill(null).map((_f, i) => 0.5 * (1 - Math.cos(2 * Math.PI * i / (nfft - 1))));
const maskDecayLog = options.maskDecayLog ?? Math.log(0.995);
const ifMin = options.ifMin ?? 0;
const ifMax = options.ifMax ?? nfft / 2;
const windowDf = options.windowDf ?? 60;
const windowDt = options.windowDt ?? 96;
const pruningDt = options.pruningDt ?? 24;
const maskDf = options.maskDf ?? 3;
const eww = options.eww ?? Array(nfft / 2).fill(null).map((_f, i) => Array(nfft / 2).fill(null).map((_f2, j) => -0.5 * Math.pow((j - i) / maskDf / Math.sqrt(i + 3), 2)));
return {
verbose,
samplingRate,
bps,
mnlm,
mppp,
nfft,
step,
dt,
hwin,
maskDecayLog,
ifMin,
ifMax,
windowDf,
windowDt,
pruningDt,
maskDf,
eww
};
};
var Codegen = class {
constructor(options) {
this.options = buildOptions(options ?? {});
this.buffer = new Uint8Array(0);
this.bufferDelta = 0;
this.stepIndex = 0;
this.marks = [];
this.threshold = Array(this.options.nfft).fill(null).map(() => -3);
this.fft = new fft_default(this.options.nfft);
}
process(chunk) {
const {
verbose,
bps,
mnlm,
mppp,
nfft,
step,
hwin,
maskDecayLog,
ifMin,
ifMax,
windowDf,
windowDt,
pruningDt,
eww
} = this.options;
if (verbose) {
const t = Math.round(this.stepIndex / step).toString();
const received = chunk.length.toString();
console.log(`t=${t} received ${received} bytes`);
}
const tcodes = [];
const hcodes = [];
const concatedBuffer = new Uint8Array(this.buffer.length + chunk.length);
concatedBuffer.set(this.buffer, 0);
concatedBuffer.set(chunk, this.buffer.length);
this.buffer = concatedBuffer;
const bufferView = new DataView(concatedBuffer.buffer);
while ((this.stepIndex + nfft) * bps < this.buffer.length + this.bufferDelta) {
const data = new Array(nfft);
const image = new Array(nfft).fill(0);
for (let i = 0, limit = nfft; i < limit; i++) {
const readInt = bufferView.getInt16((this.stepIndex + i) * bps - this.bufferDelta, true);
data[i] = hwin[i] * readInt / Math.pow(2, 8 * bps - 1);
}
this.stepIndex += step;
this.fft.forward(data, image);
for (let i = ifMin; i < ifMax; i += 1) {
this.fft.spectrum[i] = Math.abs(this.fft.spectrum[i]) * Math.sqrt(i + 16);
}
const diff = new Array(nfft / 2);
for (let i = ifMin; i < ifMax; i += 1) {
diff[i] = Math.max(Math.log(Math.max(1e-6, this.fft.spectrum[i])) - this.threshold[i], 0);
}
const iLocMax = new Array(mnlm);
const vLocMax = new Array(mnlm);
for (let i = 0; i < mnlm; i += 1) {
iLocMax[i] = NaN;
vLocMax[i] = Number.NEGATIVE_INFINITY;
}
for (let i = ifMin + 1; i < ifMax - 1; i += 1) {
if (diff[i] > diff[i - 1] && diff[i] > diff[i + 1] && this.fft.spectrum[i] > vLocMax[mnlm - 1]) {
for (let j = mnlm - 1; j >= 0; j -= 1) {
if (j >= 1 && this.fft.spectrum[i] > vLocMax[j - 1])
continue;
for (let k = mnlm - 1; k >= j + 1; k -= 1) {
iLocMax[k] = iLocMax[k - 1];
vLocMax[k] = vLocMax[k - 1];
}
iLocMax[j] = i;
vLocMax[j] = this.fft.spectrum[i];
break;
}
}
}
for (let i = 0; i < mnlm; i += 1) {
if (vLocMax[i] > Number.NEGATIVE_INFINITY) {
for (let j = ifMin; j < ifMax; j += 1) {
this.threshold[j] = Math.max(this.threshold[j], Math.log(this.fft.spectrum[iLocMax[i]]) + eww[iLocMax[i]][j]);
}
} else {
vLocMax.splice(i, mnlm - i);
iLocMax.splice(i, mnlm - i);
break;
}
}
this.marks.push({ t: Math.round(this.stepIndex / step), i: iLocMax, v: vLocMax });
const nm = this.marks.length;
const t0 = nm - pruningDt - 1;
for (let i = nm - 1; i >= Math.max(t0 + 1, 0); i -= 1) {
for (let j = 0; j < this.marks[i].v.length; j += 1) {
if (this.marks[i].i[j] !== 0 && Math.log(this.marks[i].v[j]) < this.threshold[this.marks[i].i[j]] + maskDecayLog * (nm - 1 - i)) {
this.marks[i].v[j] = Number.NEGATIVE_INFINITY;
this.marks[i].i[j] = Number.NEGATIVE_INFINITY;
}
}
}
let nFingersTotal = 0;
if (t0 >= 0) {
const m = this.marks[t0];
for (let i = 0; i < m.i.length; i += 1) {
let nFingers = 0;
let canBreak = false;
for (let j = t0; j >= Math.max(0, t0 - windowDt); j -= 1) {
if (canBreak)
break;
const m2 = this.marks[j];
for (let k = 0; k < m2.i.length; k += 1) {
if (canBreak)
break;
if (m2.i[k] !== m.i[i] && Math.abs(m2.i[k] - m.i[i]) < windowDf) {
tcodes.push(m.t);
hcodes.push(m2.i[k] + nfft / 2 * (m.i[i] + nfft / 2 * (t0 - j)));
nFingers += 1;
nFingersTotal += 1;
if (nFingers >= mppp)
canBreak = true;
}
}
}
}
}
if (nFingersTotal > 0 && verbose) {
console.log(`t=${Math.round(this.stepIndex / step)} generated ${nFingersTotal} fingerprints`);
}
this.marks.splice(0, t0 + 1 - windowDt);
for (let j = 0; j < this.threshold.length; j += 1) {
this.threshold[j] += maskDecayLog;
}
}
if (this.buffer.length > 1e6) {
const delta = this.buffer.length - 2e4;
this.bufferDelta += delta;
this.buffer = this.buffer.slice(delta);
}
return { tcodes, hcodes };
}
};
var codegen_landmark_default = Codegen;
export {
codegen_landmark_default as default
};