UNPKG

@qgustavor/stream-audio-fingerprint

Version:

Audio landmark fingerprinting as a JavaScript module

258 lines (256 loc) 8.35 kB
// src/lib/fft.ts var FFTNayuki = class { constructor(n) { this.peakBand = 0; this.peak = 0; this.n = n; this.levels = -1; for (let i = 0; i < 32; i++) { if (1 << i === n) { this.levels = i; } } if (this.levels === -1) { throw Error("Length is not a power of 2"); } this.cosTable = new Array(n / 2); this.sinTable = new Array(n / 2); for (let i = 0; i < n / 2; i++) { this.cosTable[i] = Math.cos(2 * Math.PI * i / n); this.sinTable[i] = Math.sin(2 * Math.PI * i / n); } this.spectrum = new Array(n / 4); } forward(real, imag) { for (let i = 0; i < this.n; i++) { const j = reverseBits(i, this.levels); if (j > i) { let temp = real[i]; real[i] = real[j]; real[j] = temp; temp = imag[i]; imag[i] = imag[j]; imag[j] = temp; } } for (let size = 2; size <= this.n; size *= 2) { const halfsize = size / 2; const tablestep = this.n / size; for (let i = 0; i < this.n; i += size) { for (let j = i, k = 0; j < i + halfsize; j++, k += tablestep) { const l = j + halfsize; const tpre = real[l] * this.cosTable[k] + imag[l] * this.sinTable[k]; const tpim = -real[l] * this.sinTable[k] + imag[l] * this.cosTable[k]; real[l] = real[j] - tpre; imag[l] = imag[j] - tpim; real[j] += tpre; imag[j] += tpim; } } } this.calculateSpectrum(real, imag); function reverseBits(x, bits) { let y = 0; for (let i = 0; i < bits; i++) { y = y << 1 | x & 1; x >>>= 1; } return y; } } calculateSpectrum(real, imag) { const bSi = 4 / this.n; let mag; for (let i = 0, N = this.n / 4; i < N; i++) { mag = bSi * Math.sqrt(real[i] ** 2 + imag[i] ** 2); if (mag > this.peak) { this.peakBand = i; this.peak = mag; } this.spectrum[i] = mag; } } }; var fft_default = FFTNayuki; // src/codegen_landmark.ts var buildOptions = (options) => { const verbose = options.verbose ?? false; const samplingRate = options.samplingRate ?? 22050; const bps = options.bps ?? 2; const mnlm = options.mnlm ?? 5; const mppp = options.mppp ?? 3; const nfft = options.nfft ?? 512; const step = options.step ?? nfft / 2; const dt = options.dt ?? 1 / (samplingRate / step); const hwin = options.hwin ?? Array(nfft).fill(null).map((_f, i) => 0.5 * (1 - Math.cos(2 * Math.PI * i / (nfft - 1)))); const maskDecayLog = options.maskDecayLog ?? Math.log(0.995); const ifMin = options.ifMin ?? 0; const ifMax = options.ifMax ?? nfft / 2; const windowDf = options.windowDf ?? 60; const windowDt = options.windowDt ?? 96; const pruningDt = options.pruningDt ?? 24; const maskDf = options.maskDf ?? 3; const eww = options.eww ?? Array(nfft / 2).fill(null).map((_f, i) => Array(nfft / 2).fill(null).map((_f2, j) => -0.5 * Math.pow((j - i) / maskDf / Math.sqrt(i + 3), 2))); return { verbose, samplingRate, bps, mnlm, mppp, nfft, step, dt, hwin, maskDecayLog, ifMin, ifMax, windowDf, windowDt, pruningDt, maskDf, eww }; }; var Codegen = class { constructor(options) { this.options = buildOptions(options ?? {}); this.buffer = new Uint8Array(0); this.bufferDelta = 0; this.stepIndex = 0; this.marks = []; this.threshold = Array(this.options.nfft).fill(null).map(() => -3); this.fft = new fft_default(this.options.nfft); } process(chunk) { const { verbose, bps, mnlm, mppp, nfft, step, hwin, maskDecayLog, ifMin, ifMax, windowDf, windowDt, pruningDt, eww } = this.options; if (verbose) { const t = Math.round(this.stepIndex / step).toString(); const received = chunk.length.toString(); console.log(`t=${t} received ${received} bytes`); } const tcodes = []; const hcodes = []; const concatedBuffer = new Uint8Array(this.buffer.length + chunk.length); concatedBuffer.set(this.buffer, 0); concatedBuffer.set(chunk, this.buffer.length); this.buffer = concatedBuffer; const bufferView = new DataView(concatedBuffer.buffer); while ((this.stepIndex + nfft) * bps < this.buffer.length + this.bufferDelta) { const data = new Array(nfft); const image = new Array(nfft).fill(0); for (let i = 0, limit = nfft; i < limit; i++) { const readInt = bufferView.getInt16((this.stepIndex + i) * bps - this.bufferDelta, true); data[i] = hwin[i] * readInt / Math.pow(2, 8 * bps - 1); } this.stepIndex += step; this.fft.forward(data, image); for (let i = ifMin; i < ifMax; i += 1) { this.fft.spectrum[i] = Math.abs(this.fft.spectrum[i]) * Math.sqrt(i + 16); } const diff = new Array(nfft / 2); for (let i = ifMin; i < ifMax; i += 1) { diff[i] = Math.max(Math.log(Math.max(1e-6, this.fft.spectrum[i])) - this.threshold[i], 0); } const iLocMax = new Array(mnlm); const vLocMax = new Array(mnlm); for (let i = 0; i < mnlm; i += 1) { iLocMax[i] = NaN; vLocMax[i] = Number.NEGATIVE_INFINITY; } for (let i = ifMin + 1; i < ifMax - 1; i += 1) { if (diff[i] > diff[i - 1] && diff[i] > diff[i + 1] && this.fft.spectrum[i] > vLocMax[mnlm - 1]) { for (let j = mnlm - 1; j >= 0; j -= 1) { if (j >= 1 && this.fft.spectrum[i] > vLocMax[j - 1]) continue; for (let k = mnlm - 1; k >= j + 1; k -= 1) { iLocMax[k] = iLocMax[k - 1]; vLocMax[k] = vLocMax[k - 1]; } iLocMax[j] = i; vLocMax[j] = this.fft.spectrum[i]; break; } } } for (let i = 0; i < mnlm; i += 1) { if (vLocMax[i] > Number.NEGATIVE_INFINITY) { for (let j = ifMin; j < ifMax; j += 1) { this.threshold[j] = Math.max(this.threshold[j], Math.log(this.fft.spectrum[iLocMax[i]]) + eww[iLocMax[i]][j]); } } else { vLocMax.splice(i, mnlm - i); iLocMax.splice(i, mnlm - i); break; } } this.marks.push({ t: Math.round(this.stepIndex / step), i: iLocMax, v: vLocMax }); const nm = this.marks.length; const t0 = nm - pruningDt - 1; for (let i = nm - 1; i >= Math.max(t0 + 1, 0); i -= 1) { for (let j = 0; j < this.marks[i].v.length; j += 1) { if (this.marks[i].i[j] !== 0 && Math.log(this.marks[i].v[j]) < this.threshold[this.marks[i].i[j]] + maskDecayLog * (nm - 1 - i)) { this.marks[i].v[j] = Number.NEGATIVE_INFINITY; this.marks[i].i[j] = Number.NEGATIVE_INFINITY; } } } let nFingersTotal = 0; if (t0 >= 0) { const m = this.marks[t0]; for (let i = 0; i < m.i.length; i += 1) { let nFingers = 0; let canBreak = false; for (let j = t0; j >= Math.max(0, t0 - windowDt); j -= 1) { if (canBreak) break; const m2 = this.marks[j]; for (let k = 0; k < m2.i.length; k += 1) { if (canBreak) break; if (m2.i[k] !== m.i[i] && Math.abs(m2.i[k] - m.i[i]) < windowDf) { tcodes.push(m.t); hcodes.push(m2.i[k] + nfft / 2 * (m.i[i] + nfft / 2 * (t0 - j))); nFingers += 1; nFingersTotal += 1; if (nFingers >= mppp) canBreak = true; } } } } } if (nFingersTotal > 0 && verbose) { console.log(`t=${Math.round(this.stepIndex / step)} generated ${nFingersTotal} fingerprints`); } this.marks.splice(0, t0 + 1 - windowDt); for (let j = 0; j < this.threshold.length; j += 1) { this.threshold[j] += maskDecayLog; } } if (this.buffer.length > 1e6) { const delta = this.buffer.length - 2e4; this.bufferDelta += delta; this.buffer = this.buffer.slice(delta); } return { tcodes, hcodes }; } }; var codegen_landmark_default = Codegen; export { codegen_landmark_default as default };