UNPKG

museaikit

Version:

A powerful music-focused AI toolkit

310 lines 11.3 kB
import * as tf from '@tensorflow/tfjs'; import FFTModule from 'fft.js'; import * as ndarray from 'ndarray'; import * as resample from 'ndarray-resample'; import { fetch, getOfflineAudioContext, isSafari } from '../core/compat/global'; import * as logging from './logging'; const SAMPLE_RATE = 16000; const offlineCtx = getOfflineAudioContext(SAMPLE_RATE); export async function loadAudioFromUrl(url) { return fetch(url) .then((body) => body.arrayBuffer()) .then((buffer) => offlineCtx.decodeAudioData(buffer)); } export async function loadAudioFromFile(blob) { const fileReader = new FileReader(); const loadFile = new Promise((resolve, reject) => { fileReader.onerror = () => { fileReader.abort(); reject(new DOMException('Something went wrong reading that file.')); }; fileReader.onload = () => { resolve(fileReader.result); }; fileReader.readAsArrayBuffer(blob); }); return loadFile.then((arrayBuffer) => offlineCtx.decodeAudioData(arrayBuffer)); } export function melSpectrogram(y, params) { if (!params.power) { params.power = 2.0; } const stftMatrix = stft(y, params); const [spec, nFft] = magSpectrogram(stftMatrix, params.power); params.nFft = nFft; const melBasis = createMelFilterbank(params); return applyWholeFilterbank(spec, melBasis); } export function powerToDb(spec, amin = 1e-10, topDb = 80.0) { const width = spec.length; const height = spec[0].length; const logSpec = []; for (let i = 0; i < width; i++) { logSpec[i] = new Float32Array(height); } for (let i = 0; i < width; i++) { for (let j = 0; j < height; j++) { const val = spec[i][j]; logSpec[i][j] = 10.0 * Math.log10(Math.max(amin, val)); } } if (topDb) { if (topDb < 0) { throw new Error(`topDb must be non-negative.`); } for (let i = 0; i < width; i++) { const maxVal = max(logSpec[i]); for (let j = 0; j < height; j++) { logSpec[i][j] = Math.max(logSpec[i][j], maxVal - topDb); } } } return logSpec; } function getMonoAudio(audioBuffer) { if (audioBuffer.numberOfChannels === 1) { return audioBuffer.getChannelData(0); } if (audioBuffer.numberOfChannels !== 2) { throw Error(`${audioBuffer.numberOfChannels} channel audio is not supported.`); } const ch0 = audioBuffer.getChannelData(0); const ch1 = audioBuffer.getChannelData(1); const mono = new Float32Array(audioBuffer.length); for (let i = 0; i < audioBuffer.length; ++i) { mono[i] = (ch0[i] + ch1[i]) / 2; } return mono; } export async function resampleAndMakeMono(audioBuffer, targetSr = SAMPLE_RATE) { if (audioBuffer.sampleRate === targetSr) { return getMonoAudio(audioBuffer); } const sourceSr = audioBuffer.sampleRate; const lengthRes = (audioBuffer.length * targetSr) / sourceSr; if (!isSafari) { const _offlineCtx = new OfflineAudioContext(audioBuffer.numberOfChannels, audioBuffer.duration * targetSr, targetSr); const bufferSource = _offlineCtx.createBufferSource(); bufferSource.buffer = audioBuffer; bufferSource.connect(_offlineCtx.destination); bufferSource.start(); return _offlineCtx.startRendering().then((buffer) => buffer.getChannelData(0)); } else { logging.log('Safari does not support WebAudio resampling, so this may be slow.', 'O&F', 5); const originalAudio = getMonoAudio(audioBuffer); const resampledAudio = new Float32Array(lengthRes); resample(ndarray(resampledAudio, [lengthRes]), ndarray(originalAudio, [originalAudio.length])); return resampledAudio; } } function magSpectrogram(stft, power) { const spec = stft.map((fft) => pow(mag(fft), power)); const nFft = stft[0].length - 1; return [spec, nFft]; } function stft(y, params) { const nFft = params.nFft || 2048; const winLength = params.winLength || nFft; const hopLength = params.hopLength || Math.floor(winLength / 4); let fftWindow = hannWindow(winLength); fftWindow = padCenterToLength(fftWindow, nFft); y = padReflect(y, Math.floor(nFft / 2)); const yFrames = frame(y, nFft, hopLength); const stftMatrix = []; const width = yFrames.length; const height = nFft + 2; for (let i = 0; i < width; i++) { const col = new Float32Array(height); stftMatrix[i] = col; } for (let i = 0; i < width; i++) { const winBuffer = applyWindow(yFrames[i], fftWindow); const col = fft(winBuffer); stftMatrix[i].set(col.slice(0, height)); } return stftMatrix; } function applyWholeFilterbank(spec, filterbank) { const out = []; for (let i = 0; i < spec.length; i++) { out[i] = applyFilterbank(spec[i], filterbank); } return out; } function applyFilterbank(mags, filterbank) { if (mags.length !== filterbank[0].length) { throw new Error(`Each entry in filterbank should have dimensions ` + `matching FFT. |mags| = ${mags.length}, ` + `|filterbank[0]| = ${filterbank[0].length}.`); } const out = new Float32Array(filterbank.length); for (let i = 0; i < filterbank.length; i++) { const win = applyWindow(mags, filterbank[i]); out[i] = win.reduce((a, b) => a + b); } return out; } export function applyWindow(buffer, win) { if (buffer.length !== win.length) { console.error(`Buffer length ${buffer.length} != window length ${win.length}.`); return null; } const out = new Float32Array(buffer.length); for (let i = 0; i < buffer.length; i++) { out[i] = win[i] * buffer[i]; } return out; } export function padCenterToLength(data, length) { if (data.length > length) { throw new Error('Data is longer than length.'); } const paddingLeft = Math.floor((length - data.length) / 2); const paddingRight = length - data.length - paddingLeft; return padConstant(data, [paddingLeft, paddingRight]); } export function padConstant(data, padding) { let padLeft, padRight; if (typeof padding === 'object') { [padLeft, padRight] = padding; } else { padLeft = padRight = padding; } const out = new Float32Array(data.length + padLeft + padRight); out.set(data, padLeft); return out; } function padReflect(data, padding) { const out = padConstant(data, padding); for (let i = 0; i < padding; i++) { out[i] = out[2 * padding - i]; out[out.length - i - 1] = out[out.length - 2 * padding + i - 1]; } return out; } export function frame(data, frameLength, hopLength) { const bufferCount = Math.floor((data.length - frameLength) / hopLength) + 1; const buffers = Array.from({ length: bufferCount }, (x, i) => new Float32Array(frameLength)); for (let i = 0; i < bufferCount; i++) { const ind = i * hopLength; const buffer = data.slice(ind, ind + frameLength); buffers[i].set(buffer); if (buffer.length !== frameLength) { continue; } } return buffers; } function createMelFilterbank(params) { const fMin = params.fMin || 0; const fMax = params.fMax || params.sampleRate / 2; const nMels = params.nMels || 128; const nFft = params.nFft || 2048; const fftFreqs = calculateFftFreqs(params.sampleRate, nFft); const melFreqs = calculateMelFreqs(nMels + 2, fMin, fMax); const melDiff = internalDiff(melFreqs); const ramps = outerSubtract(melFreqs, fftFreqs); const filterSize = ramps[0].length; const weights = []; for (let i = 0; i < nMels; i++) { weights[i] = new Float32Array(filterSize); for (let j = 0; j < ramps[i].length; j++) { const lower = -ramps[i][j] / melDiff[i]; const upper = ramps[i + 2][j] / melDiff[i + 1]; const weight = Math.max(0, Math.min(lower, upper)); weights[i][j] = weight; } } for (let i = 0; i < weights.length; i++) { const enorm = 2.0 / (melFreqs[2 + i] - melFreqs[i]); weights[i] = weights[i].map((val) => val * enorm); } return weights; } function fft(y) { const fft = new FFTModule(y.length); const out = fft.createComplexArray(); const data = fft.toComplexArray(y); fft.transform(out, data); return out; } export function hannWindow(length) { const win = new Float32Array(length); for (let i = 0; i < length; i++) { win[i] = 0.5 * (1 - Math.cos((2 * Math.PI * i) / (length - 1))); } return win; } function linearSpace(start, end, count) { const delta = (end - start) / (count - 1); const out = new Float32Array(count); for (let i = 0; i < count; i++) { out[i] = start + delta * i; } return out; } function mag(y) { const out = new Float32Array(y.length / 2); for (let i = 0; i < y.length / 2; i++) { out[i] = Math.sqrt(y[i * 2] * y[i * 2] + y[i * 2 + 1] * y[i * 2 + 1]); } return out; } export function midiToHz(notes) { let notesTensor = tf.sub(notes, 69.0); notesTensor = tf.div(notesTensor, 12.0); notesTensor = tf.pow(2.0, notesTensor); notesTensor = tf.mul(440.0, notesTensor); return notesTensor; } export async function hzToMidi(frequencies) { let frequenciesTensor = tf.sub(tf.div(tf.log(frequencies), tf.log(2)), tf.div(tf.log(440.0), tf.log(2))); frequenciesTensor = tf.mul(12, frequenciesTensor); frequenciesTensor = tf.add(frequenciesTensor, 69); const frequenciesVal = await frequenciesTensor.array(); return frequenciesVal; } function hzToMel(hz) { return 1125.0 * Math.log(1 + hz / 700.0); } function melToHz(mel) { return 700.0 * (Math.exp(mel / 1125.0) - 1); } function calculateFftFreqs(sampleRate, nFft) { return linearSpace(0, sampleRate / 2, Math.floor(1 + nFft / 2)); } function calculateMelFreqs(nMels, fMin, fMax) { const melMin = hzToMel(fMin); const melMax = hzToMel(fMax); const mels = linearSpace(melMin, melMax, nMels); const hzs = mels.map((mel) => melToHz(mel)); return hzs; } function internalDiff(arr) { const out = new Float32Array(arr.length - 1); for (let i = 0; i < arr.length; i++) { out[i] = arr[i + 1] - arr[i]; } return out; } function outerSubtract(arr, arr2) { const out = []; for (let i = 0; i < arr.length; i++) { out[i] = new Float32Array(arr2.length); } for (let i = 0; i < arr.length; i++) { for (let j = 0; j < arr2.length; j++) { out[i][j] = arr[i] - arr2[j]; } } return out; } function pow(arr, power) { return arr.map((v) => Math.pow(v, power)); } function max(arr) { return arr.reduce((a, b) => Math.max(a, b)); } //# sourceMappingURL=audio_utils.js.map