meyda
Version:
Real-time feature extraction for the web audio api
682 lines (644 loc) • 23.8 kB
JavaScript
'use strict';
function isPowerOfTwo(num) {
while (num % 2 === 0 && num > 1) {
num /= 2;
}
return num === 1;
}
function createBarkScale(length, sampleRate, bufferSize) {
var barkScale = new Float32Array(length);
for (var i = 0; i < barkScale.length; i++) {
barkScale[i] = (i * sampleRate) / bufferSize;
barkScale[i] =
13 * Math.atan(barkScale[i] / 1315.8) +
3.5 * Math.atan(Math.pow(barkScale[i] / 7518, 2));
}
return barkScale;
}
function _melToFreq(melValue) {
var freqValue = 700 * (Math.exp(melValue / 1125) - 1);
return freqValue;
}
function _freqToMel(freqValue) {
var melValue = 1125 * Math.log(1 + freqValue / 700);
return melValue;
}
function createMelFilterBank(numFilters, sampleRate, bufferSize) {
//the +2 is the upper and lower limits
var melValues = new Float32Array(numFilters + 2);
var melValuesInFreq = new Float32Array(numFilters + 2);
//Generate limits in Hz - from 0 to the nyquist.
var lowerLimitFreq = 0;
var upperLimitFreq = sampleRate / 2;
//Convert the limits to Mel
var lowerLimitMel = _freqToMel(lowerLimitFreq);
var upperLimitMel = _freqToMel(upperLimitFreq);
//Find the range
var range = upperLimitMel - lowerLimitMel;
//Find the range as part of the linear interpolation
var valueToAdd = range / (numFilters + 1);
var fftBinsOfFreq = new Array(numFilters + 2);
for (var i = 0; i < melValues.length; i++) {
// Initialising the mel frequencies
// They're a linear interpolation between the lower and upper limits.
melValues[i] = i * valueToAdd;
// Convert back to Hz
melValuesInFreq[i] = _melToFreq(melValues[i]);
// Find the corresponding bins
fftBinsOfFreq[i] = Math.floor(((bufferSize + 1) * melValuesInFreq[i]) / sampleRate);
}
var filterBank = new Array(numFilters);
for (var j = 0; j < filterBank.length; j++) {
// Create a two dimensional array of size numFilters * (buffersize/2)+1
// pre-populating the arrays with 0s.
filterBank[j] = new Array(bufferSize / 2 + 1).fill(0);
//creating the lower and upper slopes for each bin
for (var i = fftBinsOfFreq[j]; i < fftBinsOfFreq[j + 1]; i++) {
filterBank[j][i] =
(i - fftBinsOfFreq[j]) / (fftBinsOfFreq[j + 1] - fftBinsOfFreq[j]);
}
for (var i = fftBinsOfFreq[j + 1]; i < fftBinsOfFreq[j + 2]; i++) {
filterBank[j][i] =
(fftBinsOfFreq[j + 2] - i) /
(fftBinsOfFreq[j + 2] - fftBinsOfFreq[j + 1]);
}
}
return filterBank;
}
function frame(buffer, frameLength, hopLength) {
if (buffer.length < frameLength) {
throw new Error("Buffer is too short for frame length");
}
if (hopLength < 1) {
throw new Error("Hop length cannot be less that 1");
}
if (frameLength < 1) {
throw new Error("Frame length cannot be less that 1");
}
var numFrames = 1 + Math.floor((buffer.length - frameLength) / hopLength);
return new Array(numFrames)
.fill(0)
.map(function (_, i) { return buffer.slice(i * hopLength, i * hopLength + frameLength); });
}
function rms (_a) {
var signal = _a.signal;
// Keeping this bad runtime typecheck for consistency
if (typeof signal !== "object") {
throw new TypeError();
}
var rms = 0;
for (var i = 0; i < signal.length; i++) {
rms += Math.pow(signal[i], 2);
}
rms = rms / signal.length;
rms = Math.sqrt(rms);
return rms;
}
function energy (_a) {
var signal = _a.signal;
if (typeof signal !== "object") {
throw new TypeError();
}
var energy = 0;
for (var i = 0; i < signal.length; i++) {
energy += Math.pow(Math.abs(signal[i]), 2);
}
return energy;
}
function spectralSlope (_a) {
var ampSpectrum = _a.ampSpectrum, sampleRate = _a.sampleRate, bufferSize = _a.bufferSize;
if (typeof ampSpectrum !== "object") {
throw new TypeError();
}
//linear regression
var ampSum = 0;
var freqSum = 0;
var freqs = new Float32Array(ampSpectrum.length);
var powFreqSum = 0;
var ampFreqSum = 0;
for (var i = 0; i < ampSpectrum.length; i++) {
ampSum += ampSpectrum[i];
var curFreq = (i * sampleRate) / bufferSize;
freqs[i] = curFreq;
powFreqSum += curFreq * curFreq;
freqSum += curFreq;
ampFreqSum += curFreq * ampSpectrum[i];
}
return ((ampSpectrum.length * ampFreqSum - freqSum * ampSum) /
(ampSum * (powFreqSum - Math.pow(freqSum, 2))));
}
function mu(i, amplitudeSpect) {
var numerator = 0;
var denominator = 0;
for (var k = 0; k < amplitudeSpect.length; k++) {
numerator += Math.pow(k, i) * Math.abs(amplitudeSpect[k]);
denominator += amplitudeSpect[k];
}
return numerator / denominator;
}
function spectralCentroid (_a) {
var ampSpectrum = _a.ampSpectrum;
if (typeof ampSpectrum !== "object") {
throw new TypeError();
}
return mu(1, ampSpectrum);
}
function spectralRolloff (_a) {
var ampSpectrum = _a.ampSpectrum, sampleRate = _a.sampleRate;
if (typeof ampSpectrum !== "object") {
throw new TypeError();
}
var ampspec = ampSpectrum;
//calculate nyquist bin
var nyqBin = sampleRate / (2 * (ampspec.length - 1));
var ec = 0;
for (var i = 0; i < ampspec.length; i++) {
ec += ampspec[i];
}
var threshold = 0.99 * ec;
var n = ampspec.length - 1;
while (ec > threshold && n >= 0) {
ec -= ampspec[n];
--n;
}
return (n + 1) * nyqBin;
}
function spectralFlatness (_a) {
var ampSpectrum = _a.ampSpectrum;
if (typeof ampSpectrum !== "object") {
throw new TypeError();
}
var numerator = 0;
var denominator = 0;
for (var i = 0; i < ampSpectrum.length; i++) {
numerator += Math.log(ampSpectrum[i]);
denominator += ampSpectrum[i];
}
return ((Math.exp(numerator / ampSpectrum.length) * ampSpectrum.length) /
denominator);
}
function spectralSpread (_a) {
var ampSpectrum = _a.ampSpectrum;
if (typeof ampSpectrum !== "object") {
throw new TypeError();
}
return Math.sqrt(mu(2, ampSpectrum) - Math.pow(mu(1, ampSpectrum), 2));
}
function spectralSkewness (_a) {
var ampSpectrum = _a.ampSpectrum;
if (typeof ampSpectrum !== "object") {
throw new TypeError();
}
var mu1 = mu(1, ampSpectrum);
var mu2 = mu(2, ampSpectrum);
var mu3 = mu(3, ampSpectrum);
var numerator = 2 * Math.pow(mu1, 3) - 3 * mu1 * mu2 + mu3;
var denominator = Math.pow(Math.sqrt(mu2 - Math.pow(mu1, 2)), 3);
return numerator / denominator;
}
function spectralKurtosis (_a) {
var ampSpectrum = _a.ampSpectrum;
if (typeof ampSpectrum !== "object") {
throw new TypeError();
}
var ampspec = ampSpectrum;
var mu1 = mu(1, ampspec);
var mu2 = mu(2, ampspec);
var mu3 = mu(3, ampspec);
var mu4 = mu(4, ampspec);
var numerator = -3 * Math.pow(mu1, 4) + 6 * mu1 * mu2 - 4 * mu1 * mu3 + mu4;
var denominator = Math.pow(Math.sqrt(mu2 - Math.pow(mu1, 2)), 4);
return numerator / denominator;
}
function zcr (_a) {
var signal = _a.signal;
if (typeof signal !== "object") {
throw new TypeError();
}
var zcr = 0;
for (var i = 1; i < signal.length; i++) {
if ((signal[i - 1] >= 0 && signal[i] < 0) ||
(signal[i - 1] < 0 && signal[i] >= 0)) {
zcr++;
}
}
return zcr;
}
function loudness (_a) {
var ampSpectrum = _a.ampSpectrum, barkScale = _a.barkScale, _b = _a.numberOfBarkBands, numberOfBarkBands = _b === void 0 ? 24 : _b;
if (typeof ampSpectrum !== "object" || typeof barkScale !== "object") {
throw new TypeError();
}
var NUM_BARK_BANDS = numberOfBarkBands;
var specific = new Float32Array(NUM_BARK_BANDS);
var total = 0;
var normalisedSpectrum = ampSpectrum;
var bbLimits = new Int32Array(NUM_BARK_BANDS + 1);
bbLimits[0] = 0;
var currentBandEnd = barkScale[normalisedSpectrum.length - 1] / NUM_BARK_BANDS;
var currentBand = 1;
for (var i = 0; i < normalisedSpectrum.length; i++) {
while (barkScale[i] > currentBandEnd) {
bbLimits[currentBand++] = i;
currentBandEnd =
(currentBand * barkScale[normalisedSpectrum.length - 1]) /
NUM_BARK_BANDS;
}
}
bbLimits[NUM_BARK_BANDS] = normalisedSpectrum.length - 1;
//process
for (var i = 0; i < NUM_BARK_BANDS; i++) {
var sum = 0;
for (var j = bbLimits[i]; j < bbLimits[i + 1]; j++) {
sum += normalisedSpectrum[j];
}
specific[i] = Math.pow(sum, 0.23);
}
//get total loudness
for (var i = 0; i < specific.length; i++) {
total += specific[i];
}
return {
specific: specific,
total: total,
};
}
function perceptualSpread (_a) {
var ampSpectrum = _a.ampSpectrum, barkScale = _a.barkScale;
var loudnessValue = loudness({ ampSpectrum: ampSpectrum, barkScale: barkScale });
var max = 0;
for (var i = 0; i < loudnessValue.specific.length; i++) {
if (loudnessValue.specific[i] > max) {
max = loudnessValue.specific[i];
}
}
var spread = Math.pow((loudnessValue.total - max) / loudnessValue.total, 2);
return spread;
}
function perceptualSharpness (_a) {
var ampSpectrum = _a.ampSpectrum, barkScale = _a.barkScale;
var loudnessValue = loudness({ ampSpectrum: ampSpectrum, barkScale: barkScale });
var spec = loudnessValue.specific;
var output = 0;
for (var i = 0; i < spec.length; i++) {
if (i < 15) {
output += (i + 1) * spec[i + 1];
}
else {
output += 0.066 * Math.exp(0.171 * (i + 1));
}
}
output *= 0.11 / loudnessValue.total;
return output;
}
function extractPowerSpectrum (_a) {
var ampSpectrum = _a.ampSpectrum;
if (typeof ampSpectrum !== "object") {
throw new TypeError();
}
var powerSpectrum = new Float32Array(ampSpectrum.length);
for (var i = 0; i < powerSpectrum.length; i++) {
powerSpectrum[i] = Math.pow(ampSpectrum[i], 2);
}
return powerSpectrum;
}
function extractMelBands (_a) {
var ampSpectrum = _a.ampSpectrum, melFilterBank = _a.melFilterBank, bufferSize = _a.bufferSize;
if (typeof ampSpectrum !== "object") {
throw new TypeError("Valid ampSpectrum is required to generate melBands");
}
if (typeof melFilterBank !== "object") {
throw new TypeError("Valid melFilterBank is required to generate melBands");
}
var powSpec = extractPowerSpectrum({ ampSpectrum: ampSpectrum });
var numFilters = melFilterBank.length;
var filtered = Array(numFilters);
var loggedMelBands = new Float32Array(numFilters);
for (var i = 0; i < loggedMelBands.length; i++) {
filtered[i] = new Float32Array(bufferSize / 2);
loggedMelBands[i] = 0;
for (var j = 0; j < bufferSize / 2; j++) {
//point-wise multiplication between power spectrum and filterbanks.
filtered[i][j] = melFilterBank[i][j] * powSpec[j];
//summing up all of the coefficients into one array
loggedMelBands[i] += filtered[i][j];
}
//log each coefficient.
loggedMelBands[i] = Math.log(loggedMelBands[i] + 1);
}
return Array.prototype.slice.call(loggedMelBands);
}
function getDefaultExportFromCjs (x) {
return x && x.__esModule && Object.prototype.hasOwnProperty.call(x, 'default') ? x['default'] : x;
}
/*===========================================================================*\
* Discrete Cosine Transform
*
* (c) Vail Systems. Joshua Jung and Ben Bryan. 2015
*
* This code is not designed to be highly optimized but as an educational
* tool to understand the Mel-scale and its related coefficients used in
* human speech analysis.
\*===========================================================================*/
var cosMap = null;
// Builds a cosine map for the given input size. This allows multiple input sizes to be memoized automagically
// if you want to run the DCT over and over.
var memoizeCosines = function(N) {
cosMap = cosMap || {};
cosMap[N] = new Array(N*N);
var PI_N = Math.PI / N;
for (var k = 0; k < N; k++) {
for (var n = 0; n < N; n++) {
cosMap[N][n + (k * N)] = Math.cos(PI_N * (n + 0.5) * k);
}
}
};
function dct$2(signal, scale) {
var L = signal.length;
scale = scale || 2;
if (!cosMap || !cosMap[L]) memoizeCosines(L);
var coefficients = signal.map(function () {return 0;});
return coefficients.map(function (__, ix) {
return scale * signal.reduce(function (prev, cur, ix_, arr) {
return prev + (cur * cosMap[L][ix_ + (ix * L)]);
}, 0);
});
}
var dct_1 = dct$2;
var dct = dct_1;
var dct$1 = /*@__PURE__*/getDefaultExportFromCjs(dct);
function mfcc (_a) {
// Tutorial from:
// http://practicalcryptography.com/miscellaneous/machine-learning
// /guide-mel-frequency-cepstral-coefficients-mfccs/
// @ts-ignore
var ampSpectrum = _a.ampSpectrum, melFilterBank = _a.melFilterBank, numberOfMFCCCoefficients = _a.numberOfMFCCCoefficients, bufferSize = _a.bufferSize;
var _numberOfMFCCCoefficients = Math.min(40, Math.max(1, numberOfMFCCCoefficients || 13));
var numFilters = melFilterBank.length;
if (numFilters < _numberOfMFCCCoefficients) {
throw new Error("Insufficient filter bank for requested number of coefficients");
}
var loggedMelBandsArray = extractMelBands({
ampSpectrum: ampSpectrum,
melFilterBank: melFilterBank,
bufferSize: bufferSize,
});
var mfccs = dct$1(loggedMelBandsArray).slice(0, _numberOfMFCCCoefficients);
return mfccs;
}
function chroma (_a) {
var ampSpectrum = _a.ampSpectrum, chromaFilterBank = _a.chromaFilterBank;
if (typeof ampSpectrum !== "object") {
throw new TypeError("Valid ampSpectrum is required to generate chroma");
}
if (typeof chromaFilterBank !== "object") {
throw new TypeError("Valid chromaFilterBank is required to generate chroma");
}
var chromagram = chromaFilterBank.map(function (row, i) {
return ampSpectrum.reduce(function (acc, v, j) { return acc + v * row[j]; }, 0);
});
var maxVal = Math.max.apply(Math, chromagram);
return maxVal ? chromagram.map(function (v) { return v / maxVal; }) : chromagram;
}
// This file isn't being typechecked at all because there are major issues with it.
// See #852 for details. Once that's merged, this file should be typechecked.
// @ts-nocheck
function spectralFlux (_a) {
var signal = _a.signal, previousSignal = _a.previousSignal, bufferSize = _a.bufferSize;
if (typeof signal !== "object" || typeof previousSignal != "object") {
throw new TypeError();
}
var sf = 0;
for (var i = -(bufferSize / 2); i < signal.length / 2 - 1; i++) {
x = Math.abs(signal[i]) - Math.abs(previousSignal[i]);
sf += (x + Math.abs(x)) / 2;
}
return sf;
}
function spectralCrest (_a) {
var ampSpectrum = _a.ampSpectrum;
if (typeof ampSpectrum !== "object") {
throw new TypeError();
}
var rms = 0;
var peak = -Infinity;
ampSpectrum.forEach(function (x) {
rms += Math.pow(x, 2);
peak = x > peak ? x : peak;
});
rms = rms / ampSpectrum.length;
rms = Math.sqrt(rms);
return peak / rms;
}
var buffer = function (args) {
return args.signal;
};
var complexSpectrum = function (args) {
return args.complexSpectrum;
};
var amplitudeSpectrum = function (args) {
return args.ampSpectrum;
};
var featureExtractors = /*#__PURE__*/Object.freeze({
__proto__: null,
amplitudeSpectrum: amplitudeSpectrum,
buffer: buffer,
chroma: chroma,
complexSpectrum: complexSpectrum,
energy: energy,
loudness: loudness,
melBands: extractMelBands,
mfcc: mfcc,
perceptualSharpness: perceptualSharpness,
perceptualSpread: perceptualSpread,
powerSpectrum: extractPowerSpectrum,
rms: rms,
spectralCentroid: spectralCentroid,
spectralCrest: spectralCrest,
spectralFlatness: spectralFlatness,
spectralFlux: spectralFlux,
spectralKurtosis: spectralKurtosis,
spectralRolloff: spectralRolloff,
spectralSkewness: spectralSkewness,
spectralSlope: spectralSlope,
spectralSpread: spectralSpread,
zcr: zcr
});
/**
* Meyda's interface to the Web Audio API. MeydaAnalyzer abstracts an API on
* top of the Web Audio API's ScriptProcessorNode, running the Meyda audio
* feature extractors inside that context.
*
* MeydaAnalyzer's constructor should not be called directly - MeydaAnalyzer
* objects should be generated using the {@link createMeydaAnalyzer}
* factory function in the main Meyda class.
*
* Options are of type {@link MeydaAnalyzerOptions}.
*
* @example
* ```javascript
* const analyzer = Meyda.createMeydaAnalyzer({
* "audioContext": audioContext,
* "source": source,
* "bufferSize": 512,
* "featureExtractors": ["rms"],
* "inputs": 2,
* "numberOfMFCCCoefficients": 20
* "callback": features => {
* levelRangeElement.value = features.rms;
* }
* });
* ```
*/
var MeydaAnalyzer = /** @class */ (function () {
/** @hidden */
function MeydaAnalyzer(options, _this) {
var _this_1 = this;
this._m = _this;
if (!options.audioContext) {
throw this._m.errors.noAC;
}
else if (options.bufferSize &&
!isPowerOfTwo(options.bufferSize)) {
throw this._m._errors.notPow2;
}
else if (!options.source) {
throw this._m._errors.noSource;
}
this._m.audioContext = options.audioContext;
// TODO: validate options
this._m.bufferSize = options.bufferSize || this._m.bufferSize || 256;
this._m.hopSize = options.hopSize || this._m.hopSize || this._m.bufferSize;
this._m.sampleRate =
options.sampleRate || this._m.audioContext.sampleRate || 44100;
this._m.callback = options.callback;
this._m.windowingFunction = options.windowingFunction || "hanning";
this._m.featureExtractors = featureExtractors;
this._m.EXTRACTION_STARTED = options.startImmediately || false;
this._m.channel = typeof options.channel === "number" ? options.channel : 0;
this._m.inputs = options.inputs || 1;
this._m.outputs = options.outputs || 1;
this._m.numberOfMFCCCoefficients =
options.numberOfMFCCCoefficients ||
this._m.numberOfMFCCCoefficients ||
13;
this._m.numberOfBarkBands =
options.numberOfBarkBands || this._m.numberOfBarkBands || 24;
//create nodes
this._m.spn = this._m.audioContext.createScriptProcessor(this._m.bufferSize, this._m.inputs, this._m.outputs);
this._m.spn.connect(this._m.audioContext.destination);
this._m._featuresToExtract = options.featureExtractors || [];
//always recalculate BS and MFB when a new Meyda analyzer is created.
this._m.barkScale = createBarkScale(this._m.bufferSize, this._m.sampleRate, this._m.bufferSize);
this._m.melFilterBank = createMelFilterBank(Math.max(this._m.melBands, this._m.numberOfMFCCCoefficients), this._m.sampleRate, this._m.bufferSize);
this._m.inputData = null;
this._m.previousInputData = null;
this._m.frame = null;
this._m.previousFrame = null;
this.setSource(options.source);
this._m.spn.onaudioprocess = function (e) {
var buffer;
if (_this_1._m.inputData !== null) {
_this_1._m.previousInputData = _this_1._m.inputData;
}
_this_1._m.inputData = e.inputBuffer.getChannelData(_this_1._m.channel);
if (!_this_1._m.previousInputData) {
buffer = _this_1._m.inputData;
}
else {
buffer = new Float32Array(_this_1._m.previousInputData.length +
_this_1._m.inputData.length -
_this_1._m.hopSize);
buffer.set(_this_1._m.previousInputData.slice(_this_1._m.hopSize));
buffer.set(_this_1._m.inputData, _this_1._m.previousInputData.length - _this_1._m.hopSize);
}
var frames = frame(buffer, _this_1._m.bufferSize, _this_1._m.hopSize);
frames.forEach(function (f) {
_this_1._m.frame = f;
var features = _this_1._m.extract(_this_1._m._featuresToExtract, _this_1._m.frame, _this_1._m.previousFrame);
// call callback if applicable
if (typeof _this_1._m.callback === "function" &&
_this_1._m.EXTRACTION_STARTED) {
_this_1._m.callback(features);
}
_this_1._m.previousFrame = _this_1._m.frame;
});
};
}
/**
* Start feature extraction
* The audio features will be passed to the callback function that was defined
* in the MeydaOptions that were passed to the factory when constructing the
* MeydaAnalyzer.
* @param {(string|Array.<string>)} [features]
* Change the features that Meyda is extracting. Defaults to the features that
* were set upon construction in the options parameter.
* @example
* ```javascript
* analyzer.start('chroma');
* ```
*/
MeydaAnalyzer.prototype.start = function (features) {
this._m._featuresToExtract = features || this._m._featuresToExtract;
this._m.EXTRACTION_STARTED = true;
};
/**
* Stop feature extraction.
* @example
* ```javascript
* analyzer.stop();
* ```
*/
MeydaAnalyzer.prototype.stop = function () {
this._m.EXTRACTION_STARTED = false;
};
/**
* Set the Audio Node for Meyda to listen to.
* @param {AudioNode} source - The Audio Node for Meyda to listen to
* @example
* ```javascript
* analyzer.setSource(audioSourceNode);
* ```
*/
MeydaAnalyzer.prototype.setSource = function (source) {
this._m.source && this._m.source.disconnect(this._m.spn);
this._m.source = source;
this._m.source.connect(this._m.spn);
};
/**
* Set the channel of the audio node for Meyda to listen to
* @param {number} channel - the index of the channel on the input audio node
* for Meyda to listen to.
* @example
* ```javascript
* analyzer.setChannel(0);
* ```
*/
MeydaAnalyzer.prototype.setChannel = function (channel) {
if (channel <= this._m.inputs) {
this._m.channel = channel;
}
else {
console.error("Channel ".concat(channel, " does not exist. Make sure you've provided a value for 'inputs' that is greater than ").concat(channel, " when instantiating the MeydaAnalyzer"));
}
};
/**
* Get a set of features from the current frame.
* @param {(string|Array.<string>)} [features]
* Change the features that Meyda is extracting
* @example
* ```javascript
* analyzer.get('spectralFlatness');
* ```
*/
MeydaAnalyzer.prototype.get = function (features) {
if (this._m.inputData) {
return this._m.extract(features || this._m._featuresToExtract, this._m.inputData, this._m.previousInputData);
}
else {
return null;
}
};
return MeydaAnalyzer;
}());
exports.MeydaAnalyzer = MeydaAnalyzer;