essentia.js
Version:
JavaScript library for music/audio analysis and processing, powered by Essentia WebAssembly
624 lines • 246 kB
TypeScript
/**
* @license
* Copyright (C) 2006-2020 Music Technology Group - Universitat Pompeu Fabra
*
* This file is part of Essentia
*
* Essentia is free software: you can redistribute it and/or modify it under
* the terms of the GNU Affero General Public License as published by the Free
* Software Foundation (FSF), either version 3 of the License, or (at your
* option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the Affero GNU General Public License
* version 3 along with this program. If not, see http://www.gnu.org/licenses/
*/
/**
* essentia.js-core JS API
* @class
* @example
* const essentia = new Essentia(EssentiaWASM);
*/
declare class Essentia {
EssentiaWASM: any;
isDebug: boolean;
/**
* @property {EssentiaEmscriptenModule} this.module Essentia WASM emcripten global module object
* @property {string} this.version Essentia WASM backend version
* @property {string} this.algorithmNames List of available Essentia alogrithms from the WASM backend
*/
private algorithms;
module: any;
version: string;
algorithmNames: string;
/**
* @constructs
* @param {EssentiaWASM} Essentia WASM backend (emcripten global module object) which is loaded from 'essentia-wasm.*.js file'
* @param {boolean} [isDebug=false]
*/
constructor(EssentiaWASM: any, isDebug?: boolean);
/**
* Decode and returns the audio buffer of a given audio url or blob uri using Web Audio API.
* (NOTE: This method doesn't works on Safari browser)
* @async
* @method
* @param {string} audioURL web url or blob uri of a audio file
* @param {AudioContext} webAudioCtx an instance of Web Audio API `AudioContext`
* @returns {AudioBuffer} decoded audio buffer object
* @memberof Essentia
*/
getAudioBufferFromURL(audioURL: string, webAudioCtx: AudioContext): Promise<AudioBuffer>;
/**
* Decode and returns the audio channel data from an given audio url or blob uri using Web Audio API.
* (NOTE: This method doesn't works on Safari browser)
* @async
* @method
* @param {string} audioURL web url or blob uri of a audio file
* @param {AudioContext} webAudioCtx an instance of Web Audio API `AudioContext`
* @param {number} [channel=0] audio channel number
* @returns {Float32Array} decode and returns the audio data as Float32 array for the given channel
* @memberof Essentia
*/
getAudioChannelDataFromURL(audioURL: string, webAudioCtx: AudioContext, channel?: number): Promise<Float32Array>;
/**
* Convert an AudioBuffer object to a Mono audio signal array. The audio signal is downmixed
* to mono using essentia `MonoMixer` algorithm if the audio buffer has 2 channels of audio.
* Throws an expection if the input AudioBuffer object has more than 2 channels of audio.
* @method
* @param {AudioBuffer} buffer `AudioBuffer` object decoded from an audio file.
* @returns {Float32Array} audio channel data. (downmixed to mono if its stereo signal).
* @memberof Essentia
*/
audioBufferToMonoSignal(buffer: AudioBuffer): Float32Array;
/**
* Method to shutdown essentia algorithm instance after it's use
* @method
* @memberof Essentia
*/
shutdown(): void;
/**
* Method for re-instantiating essentia algorithms instance after using the shutdown method
* @method
* @memberof Essentia
*/
reinstantiate(): void;
/**
* Delete essentiajs class instance
* @method
* @memberof Essentia
*/
delete(): void;
/**
* Convert an input JS array into VectorFloat type
* @method
* @param {Float32Array} inputArray input JS typed array
* @returns {VectorFloat} returns vector float
* @memberof Essentia
*/
arrayToVector(inputArray: any): any;
/**
* Convert an input VectorFloat array into typed JS Float32Array
* @method
* @param {VectorFloat} inputVector input VectorFloat array
* @returns {Float32Array} returns converted JS typed array
* @memberof Essentia
*/
vectorToArray(inputVector: any): Float32Array;
/**
* Cuts an audio signal data into overlapping frames given frame size and hop size
* @method
* @param {Float32Array} inputAudioData a single channel audio channel data
* @param {number} [frameSize=2048] frame size for cutting the audio signal
* @param {number} [hopSize=1024] size of overlapping frame
* @returns {VectorVectorFloat} Returns a 2D vector float of sliced audio frames
* @memberof Essentia
*/
FrameGenerator(inputAudioData: Float32Array, frameSize?: number, hopSize?: number): any;
/**
* This algorithm downmixes the signal into a single channel given a stereo signal. It is a wrapper around https://essentia.upf.edu/reference/std_MonoMixer.html.
* @method
* @param {VectorFloat} leftChannel the left channel of the stereo audio signal
* @param {VectorFloat} rightChannel the right channel of the stereo audio signal
* @returns {object} {audio: 'the downmixed mono signal'}
* @memberof Essentia
*/
MonoMixer(leftSignal: any, rightSignal: any): any;
/**
* This algorithm computes the EBUR128 loudness descriptors of an audio signal. It is a wrapper around https://essentia.upf.edu/reference/std_LoudnessEBUR128.html.
* @method
* @param {VectorFloat} leftChannel the left channel of the stereo audio signal
* @param {VectorFloat} rightChannel the right channel of the stereo audio signal
* @param {number} [hopSize=0.1] the hop size with which the loudness is computed [s]
* @param {number} [sampleRate=44100] the sampling rate of the audio signal [Hz]
* @param {boolean} [startAtZero=false] start momentary/short-term loudness estimation at time 0 (zero-centered loudness estimation windows) if true; otherwise start both windows at time 0 (time positions for momentary and short-term values will not be syncronized)
* @returns {object} {momentaryLoudness: 'momentary loudness (over 400ms) (LUFS)', shortTermLoudness: 'short-term loudness (over 3 seconds) (LUFS)', integratedLoudness: 'integrated loudness (overall) (LUFS)', loudnessRange: 'loudness range over an arbitrary long time interval [3] (dB, LU)'}
* @memberof Essentia
*/
LoudnessEBUR128(leftSignal: any, rightSignal: any, hopSize?: number, sampleRate?: number, startAtZero?: boolean): any;
/**
* This algorithm computes the ratio between the pitch energy after the pitch maximum and the pitch energy before the pitch maximum. Sounds having an monotonically ascending pitch or one unique pitch will show a value of (0,1], while sounds having a monotonically descending pitch will show a value of [1,inf). In case there is no energy before the max pitch, the algorithm will return the energy after the maximum pitch. Check https://essentia.upf.edu/reference/std_AfterMaxToBeforeMaxEnergyRatio.html for more details.
* @method
* @param {VectorFloat} pitch the array of pitch values [Hz]
* @returns {object} {afterMaxToBeforeMaxEnergyRatio: 'the ratio between the pitch energy after the pitch maximum to the pitch energy before the pitch maximum'}
* @memberof Essentia
*/
AfterMaxToBeforeMaxEnergyRatio(pitch: any): any;
/**
* This algorithm implements a IIR all-pass filter of order 1 or 2. Because of its dependence on IIR, IIR's requirements are inherited. Check https://essentia.upf.edu/reference/std_AllPass.html for more details.
* @method
* @param {VectorFloat} signal the input signal
* @param {number} [bandwidth=500] the bandwidth of the filter [Hz] (used only for 2nd-order filters)
* @param {number} [cutoffFrequency=1500] the cutoff frequency for the filter [Hz]
* @param {number} [order=1] the order of the filter
* @param {number} [sampleRate=44100] the sampling rate of the audio signal [Hz]
* @returns {object} {signal: 'the filtered signal'}
* @memberof Essentia
*/
AllPass(signal: any, bandwidth?: number, cutoffFrequency?: number, order?: number, sampleRate?: number): any;
/**
* This algorithm creates a wave file in which a given audio signal is mixed with a series of time onsets. The sonification of the onsets can be heard as beeps, or as short white noise pulses if configured to do so. Check https://essentia.upf.edu/reference/std_AudioOnsetsMarker.html for more details.
* @method
* @param {VectorFloat} signal the input signal
* @param {any[]} [onsets=[]] the list of onset locations [s]
* @param {number} [sampleRate=44100] the sampling rate of the output signal [Hz]
* @param {string} [type=beep] the type of sound to be added on the event
* @returns {object} {signal: 'the input signal mixed with bursts at onset locations'}
* @memberof Essentia
*/
AudioOnsetsMarker(signal: any, onsets?: any[], sampleRate?: number, type?: string): any;
/**
* This algorithm computes the autocorrelation vector of a signal.
It uses the version most commonly used in signal processing, which doesn't remove the mean from the observations.
Using the 'generalized' option this algorithm computes autocorrelation as described in [3]. Check https://essentia.upf.edu/reference/std_AutoCorrelation.html for more details.
* @method
* @param {VectorFloat} array the array to be analyzed
* @param {number} [frequencyDomainCompression=0.5] factor at which FFT magnitude is compressed (only used if 'generalized' is set to true, see [3])
* @param {boolean} [generalized=false] bool value to indicate whether to compute the 'generalized' autocorrelation as described in [3]
* @param {string} [normalization=standard] type of normalization to compute: either 'standard' (default) or 'unbiased'
* @returns {object} {autoCorrelation: 'the autocorrelation vector'}
* @memberof Essentia
*/
AutoCorrelation(array: any, frequencyDomainCompression?: number, generalized?: boolean, normalization?: string): any;
/**
* This algorithm computes the bark-frequency cepstrum coefficients of a spectrum. Bark bands and their subsequent usage in cepstral analysis have shown to be useful in percussive content [1, 2]
This algorithm is implemented using the Bark scaling approach in the Rastamat version of the MFCC algorithm and in a similar manner to the MFCC-FB40 default specs: Check https://essentia.upf.edu/reference/std_BFCC.html for more details.
* @method
* @param {VectorFloat} spectrum the audio spectrum
* @param {number} [dctType=2] the DCT type
* @param {number} [highFrequencyBound=11000] the upper bound of the frequency range [Hz]
* @param {number} [inputSize=1025] the size of input spectrum
* @param {number} [liftering=0] the liftering coefficient. Use '0' to bypass it
* @param {string} [logType=dbamp] logarithmic compression type. Use 'dbpow' if working with power and 'dbamp' if working with magnitudes
* @param {number} [lowFrequencyBound=0] the lower bound of the frequency range [Hz]
* @param {string} [normalize=unit_sum] 'unit_max' makes the vertex of all the triangles equal to 1, 'unit_sum' makes the area of all the triangles equal to 1
* @param {number} [numberBands=40] the number of bark bands in the filter
* @param {number} [numberCoefficients=13] the number of output cepstrum coefficients
* @param {number} [sampleRate=44100] the sampling rate of the audio signal [Hz]
* @param {string} [type=power] use magnitude or power spectrum
* @param {string} [weighting=warping] type of weighting function for determining triangle area
* @returns {object} {bands: 'the energies in bark bands', bfcc: 'the bark frequency cepstrum coefficients'}
* @memberof Essentia
*/
BFCC(spectrum: any, dctType?: number, highFrequencyBound?: number, inputSize?: number, liftering?: number, logType?: string, lowFrequencyBound?: number, normalize?: string, numberBands?: number, numberCoefficients?: number, sampleRate?: number, type?: string, weighting?: string): any;
/**
* This algorithm implements a break point function which linearly interpolates between discrete xy-coordinates to construct a continuous function. Check https://essentia.upf.edu/reference/std_BPF.html for more details.
* @method
* @param {number} x the input coordinate (x-axis)
* @param {any[]} [xPoints=[0, 1]] the x-coordinates of the points forming the break-point function (the points must be arranged in ascending order and cannot contain duplicates)
* @param {any[]} [yPoints=[0, 1]] the y-coordinates of the points forming the break-point function
* @returns {object} {y: 'the output coordinate (y-axis)'}
* @memberof Essentia
*/
BPF(x: number, xPoints?: any[], yPoints?: any[]): any;
/**
* This algorithm implements a 2nd order IIR band-pass filter. Because of its dependence on IIR, IIR's requirements are inherited. Check https://essentia.upf.edu/reference/std_BandPass.html for more details.
* @method
* @param {VectorFloat} signal the input audio signal
* @param {number} [bandwidth=500] the bandwidth of the filter [Hz]
* @param {number} [cutoffFrequency=1500] the cutoff frequency for the filter [Hz]
* @param {number} [sampleRate=44100] the sampling rate of the audio signal [Hz]
* @returns {object} {signal: 'the filtered signal'}
* @memberof Essentia
*/
BandPass(signal: any, bandwidth?: number, cutoffFrequency?: number, sampleRate?: number): any;
/**
* This algorithm implements a 2nd order IIR band-reject filter. Because of its dependence on IIR, IIR's requirements are inherited. Check https://essentia.upf.edu/reference/std_BandReject.html for more details.
* @method
* @param {VectorFloat} signal the input signal
* @param {number} [bandwidth=500] the bandwidth of the filter [Hz]
* @param {number} [cutoffFrequency=1500] the cutoff frequency for the filter [Hz]
* @param {number} [sampleRate=44100] the sampling rate of the audio signal [Hz]
* @returns {object} {signal: 'the filtered signal'}
* @memberof Essentia
*/
BandReject(signal: any, bandwidth?: number, cutoffFrequency?: number, sampleRate?: number): any;
/**
* This algorithm computes energy in Bark bands of a spectrum. The band frequencies are: [0.0, 50.0, 100.0, 150.0, 200.0, 300.0, 400.0, 510.0, 630.0, 770.0, 920.0, 1080.0, 1270.0, 1480.0, 1720.0, 2000.0, 2320.0, 2700.0, 3150.0, 3700.0, 4400.0, 5300.0, 6400.0, 7700.0, 9500.0, 12000.0, 15500.0, 20500.0, 27000.0]. The first two Bark bands [0,100] and [100,200] have been split in half for better resolution (because of an observed better performance in beat detection). For each bark band the power-spectrum (mag-squared) is summed. Check https://essentia.upf.edu/reference/std_BarkBands.html for more details.
* @method
* @param {VectorFloat} spectrum the input spectrum
* @param {number} [numberBands=27] the number of desired barkbands
* @param {number} [sampleRate=44100] the sampling rate of the audio signal [Hz]
* @returns {object} {bands: 'the energy of the bark bands'}
* @memberof Essentia
*/
BarkBands(spectrum: any, numberBands?: number, sampleRate?: number): any;
/**
* This algorithm estimates the beat positions given an input signal. It computes 'complex spectral difference' onset detection function and utilizes the beat tracking algorithm (TempoTapDegara) to extract beats [1]. The algorithm works with the optimized settings of 2048/1024 frame/hop size for the computation of the detection function, with its posterior x2 resampling.) While it has a lower accuracy than BeatTrackerMultifeature (see the evaluation results in [2]), its computational speed is significantly higher, which makes reasonable to apply this algorithm for batch processings of large amounts of audio signals. Check https://essentia.upf.edu/reference/std_BeatTrackerDegara.html for more details.
* @method
* @param {VectorFloat} signal the audio input signal
* @param {number} [maxTempo=208] the fastest tempo to detect [bpm]
* @param {number} [minTempo=40] the slowest tempo to detect [bpm]
* @returns {object} {ticks: ' the estimated tick locations [s]'}
* @memberof Essentia
*/
BeatTrackerDegara(signal: any, maxTempo?: number, minTempo?: number): any;
/**
* This algorithm estimates the beat positions given an input signal. It computes a number of onset detection functions and estimates beat location candidates from them using TempoTapDegara algorithm. Thereafter the best candidates are selected using TempoTapMaxAgreement. The employed detection functions, and the optimal frame/hop sizes used for their computation are:
- complex spectral difference (see 'complex' method in OnsetDetection algorithm, 2048/1024 with posterior x2 upsample or the detection function)
- energy flux (see 'rms' method in OnsetDetection algorithm, the same settings)
- spectral flux in Mel-frequency bands (see 'melflux' method in OnsetDetection algorithm, the same settings)
- beat emphasis function (see 'beat_emphasis' method in OnsetDetectionGlobal algorithm, 2048/512)
- spectral flux between histogrammed spectrum frames, measured by the modified information gain (see 'infogain' method in OnsetDetectionGlobal algorithm, 2048/512) Check https://essentia.upf.edu/reference/std_BeatTrackerMultiFeature.html for more details.
* @method
* @param {VectorFloat} signal the audio input signal
* @param {number} [maxTempo=208] the fastest tempo to detect [bpm]
* @param {number} [minTempo=40] the slowest tempo to detect [bpm]
* @returns {object} {ticks: ' the estimated tick locations [s]', confidence: 'confidence of the beat tracker [0, 5.32]'}
* @memberof Essentia
*/
BeatTrackerMultiFeature(signal: any, maxTempo?: number, minTempo?: number): any;
/**
* This algorithm filters the loudness matrix given by BeatsLoudness algorithm in order to keep only the most salient beat band representation.
This algorithm has been found to be useful for estimating time signatures. Check https://essentia.upf.edu/reference/std_Beatogram.html for more details.
* @method
* @param {VectorFloat} loudness the loudness at each beat
* @param {VectorVectorFloat} loudnessBandRatio matrix of loudness ratios at each band and beat
* @param {number} [size=16] number of beats for dynamic filtering
* @returns {object} {beatogram: 'filtered matrix loudness'}
* @memberof Essentia
*/
Beatogram(loudness: any, loudnessBandRatio: any, size?: number): any;
/**
* This algorithm computes the spectrum energy of beats in an audio signal given their positions. The energy is computed both on the whole frequency range and for each of the specified frequency bands. See the SingleBeatLoudness algorithm for a more detailed explanation. Check https://essentia.upf.edu/reference/std_BeatsLoudness.html for more details.
* @method
* @param {VectorFloat} signal the input audio signal
* @param {number} [beatDuration=0.05] the duration of the window in which the beat will be restricted [s]
* @param {number} [beatWindowDuration=0.1] the duration of the window in which to look for the beginning of the beat (centered around the positions in 'beats') [s]
* @param {any[]} [beats=[]] the list of beat positions (each position is in seconds)
* @param {any[]} [frequencyBands=[20, 150, 400, 3200, 7000, 22000]] the list of bands to compute energy ratios [Hz
* @param {number} [sampleRate=44100] the audio sampling rate [Hz]
* @returns {object} {loudness: 'the beat's energy in the whole spectrum', loudnessBandRatio: 'the ratio of the beat's energy on each frequency band'}
* @memberof Essentia
*/
BeatsLoudness(signal: any, beatDuration?: number, beatWindowDuration?: number, beats?: any[], frequencyBands?: any[], sampleRate?: number): any;
/**
* This algorithm performs basic arithmetical operations element by element given two arrays.
Note:
- using this algorithm in streaming mode can cause diamond shape graphs which have not been tested with the current scheduler. There is NO GUARANTEE of its correct work for diamond shape graphs.
- for y<0, x/y is invalid Check https://essentia.upf.edu/reference/std_BinaryOperator.html for more details.
* @method
* @param {VectorFloat} array1 the first operand input array
* @param {VectorFloat} array2 the second operand input array
* @param {string} [type=add] the type of the binary operator to apply to the input arrays
* @returns {object} {array: 'the array containing the result of binary operation'}
* @memberof Essentia
*/
BinaryOperator(array1: any, array2: any, type?: string): any;
/**
* This algorithm performs basic arithmetical operations element by element given two arrays.
Note:
- using this algorithm in streaming mode can cause diamond shape graphs which have not been tested with the current scheduler. There is NO GUARANTEE of its correct work for diamond shape graphs.
- for y<0, x/y is invalid Check https://essentia.upf.edu/reference/std_BinaryOperatorStream.html for more details.
* @method
* @param {VectorFloat} array1 the first operand input array
* @param {VectorFloat} array2 the second operand input array
* @param {string} [type=add] the type of the binary operator to apply to the input arrays
* @returns {object} {array: 'the array containing the result of binary operation'}
* @memberof Essentia
*/
BinaryOperatorStream(array1: any, array2: any, type?: string): any;
/**
* This algorithm computes beats per minute histogram and its statistics for the highest and second highest peak.
Note: histogram vector contains occurance frequency for each bpm value, 0-th element corresponds to 0 bpm value. Check https://essentia.upf.edu/reference/std_BpmHistogramDescriptors.html for more details.
* @method
* @param {VectorFloat} bpmIntervals the list of bpm intervals [s]
* @returns {object} {firstPeakBPM: 'value for the highest peak [bpm]', firstPeakWeight: 'weight of the highest peak', firstPeakSpread: 'spread of the highest peak', secondPeakBPM: 'value for the second highest peak [bpm]', secondPeakWeight: 'weight of the second highest peak', secondPeakSpread: 'spread of the second highest peak', histogram: 'bpm histogram [bpm]'}
* @memberof Essentia
*/
BpmHistogramDescriptors(bpmIntervals: any): any;
/**
* This algorithm extracts the locations of large tempo changes from a list of beat ticks. Check https://essentia.upf.edu/reference/std_BpmRubato.html for more details.
* @method
* @param {VectorFloat} beats list of detected beat ticks [s]
* @param {number} [longRegionsPruningTime=20] time for the longest constant tempo region inside a rubato region [s]
* @param {number} [shortRegionsMergingTime=4] time for the shortest constant tempo region from one tempo region to another [s]
* @param {number} [tolerance=0.08] minimum tempo deviation to look for
* @returns {object} {rubatoStart: 'list of timestamps where the start of a rubato region was detected [s]', rubatoStop: 'list of timestamps where the end of a rubato region was detected [s]', rubatoNumber: 'number of detected rubato regions'}
* @memberof Essentia
*/
BpmRubato(beats: any, longRegionsPruningTime?: number, shortRegionsMergingTime?: number, tolerance?: number): any;
/**
* This algorithm extracts the 0th, 1st, 2nd, 3rd and 4th central moments of an array. It returns a 5-tuple in which the index corresponds to the order of the moment. Check https://essentia.upf.edu/reference/std_CentralMoments.html for more details.
* @method
* @param {VectorFloat} array the input array
* @param {string} [mode=pdf] compute central moments considering array values as a probability density function over array index or as sample points of a distribution
* @param {number} [range=1] the range of the input array, used for normalizing the results in the 'pdf' mode
* @returns {object} {centralMoments: 'the central moments of the input array'}
* @memberof Essentia
*/
CentralMoments(array: any, mode?: string, range?: number): any;
/**
* This algorithm computes the centroid of an array. The centroid is normalized to a specified range. This algorithm can be used to compute spectral centroid or temporal centroid. Check https://essentia.upf.edu/reference/std_Centroid.html for more details.
* @method
* @param {VectorFloat} array the input array
* @param {number} [range=1] the range of the input array, used for normalizing the results
* @returns {object} {centroid: 'the centroid of the array'}
* @memberof Essentia
*/
Centroid(array: any, range?: number): any;
/**
* Given a chord progression this algorithm describes it by means of key, scale, histogram, and rate of change.
Note:
- chordsHistogram indexes follow the circle of fifths order, while being shifted to the input key and scale
- key and scale are taken from the most frequent chord. In the case where multiple chords are equally frequent, the chord is hierarchically chosen from the circle of fifths.
- chords should follow this name convention `<A-G>[<#/b><m>]` (i.e. C, C# or C#m are valid chords). Chord names not fitting this convention will throw an exception. Check https://essentia.upf.edu/reference/std_ChordsDescriptors.html for more details.
* @method
* @param {VectorString} chords the chord progression
* @param {string} key the key of the whole song, from A to G
* @param {string} scale the scale of the whole song (major or minor)
* @returns {object} {chordsHistogram: 'the normalized histogram of chords', chordsNumberRate: 'the ratio of different chords from the total number of chords in the progression', chordsChangesRate: 'the rate at which chords change in the progression', chordsKey: 'the most frequent chord of the progression', chordsScale: 'the scale of the most frequent chord of the progression (either 'major' or 'minor')'}
* @memberof Essentia
*/
ChordsDescriptors(chords: any, key: string, scale: string): any;
/**
* This algorithm estimates chords given an input sequence of harmonic pitch class profiles (HPCPs). It finds the best matching major or minor triad and outputs the result as a string (e.g. A#, Bm, G#m, C). This algorithm uses the Sharp versions of each Flatted note (i.e. Bb -> A#). Check https://essentia.upf.edu/reference/std_ChordsDetection.html for more details.
* @method
* @param {VectorVectorFloat} pcp the pitch class profile from which to detect the chord
* @param {number} [hopSize=2048] the hop size with which the input PCPs were computed
* @param {number} [sampleRate=44100] the sampling rate of the audio signal [Hz]
* @param {number} [windowSize=2] the size of the window on which to estimate the chords [s]
* @returns {object} {chords: 'the resulting chords, from A to G', strength: 'the strength of the chord'}
* @memberof Essentia
*/
ChordsDetection(pcp: any, hopSize?: number, sampleRate?: number, windowSize?: number): any;
/**
* This algorithm estimates chords using pitch profile classes on segments between beats. It is similar to ChordsDetection algorithm, but the chords are estimated on audio segments between each pair of consecutive beats. For each segment the estimation is done based on a chroma (HPCP) vector characterizing it, which can be computed by two methods:
- 'interbeat_median', each resulting chroma vector component is a median of all the component values in the segment
- 'starting_beat', chroma vector is sampled from the start of the segment (that is, its starting beat position) using its first frame. It makes sense if chroma is preliminary smoothed. Check https://essentia.upf.edu/reference/std_ChordsDetectionBeats.html for more details.
* @method
* @param {VectorVectorFloat} pcp the pitch class profile from which to detect the chord
* @param {VectorFloat} ticks the list of beat positions (in seconds)
* @param {string} [chromaPick=interbeat_median] method of calculating singleton chroma for interbeat interval
* @param {number} [hopSize=2048] the hop size with which the input PCPs were computed
* @param {number} [sampleRate=44100] the sampling rate of the audio signal [Hz]
* @returns {object} {chords: 'the resulting chords, from A to G', strength: 'the strength of the chords'}
* @memberof Essentia
*/
ChordsDetectionBeats(pcp: any, ticks: any, chromaPick?: string, hopSize?: number, sampleRate?: number): any;
/**
* This algorithm computes a binary cross similarity matrix from two chromagam feature vectors of a query and reference song. Check https://essentia.upf.edu/reference/std_ChromaCrossSimilarity.html for more details.
* @method
* @param {VectorVectorFloat} queryFeature frame-wise chromagram of the query song (e.g., a HPCP)
* @param {VectorVectorFloat} referenceFeature frame-wise chromagram of the reference song (e.g., a HPCP)
* @param {number} [binarizePercentile=0.095] maximum percent of distance values to consider as similar in each row and each column
* @param {number} [frameStackSize=9] number of input frames to stack together and treat as a feature vector for similarity computation. Choose 'frameStackSize=1' to use the original input frames without stacking
* @param {number} [frameStackStride=1] stride size to form a stack of frames (e.g., 'frameStackStride'=1 to use consecutive frames; 'frameStackStride'=2 for using every second frame)
* @param {number} [noti=12] number of circular shifts to be checked for Optimal Transposition Index [1]
* @param {boolean} [oti=true] whether to transpose the key of the reference song to the query song by Optimal Transposition Index [1]
* @param {boolean} [otiBinary=false] whether to use the OTI-based chroma binary similarity method [3]
* @param {boolean} [streaming=false] whether to accumulate the input 'queryFeature' in the euclidean similarity matrix calculation on each compute() method call
* @returns {object} {csm: '2D binary cross-similarity matrix of the query and reference features'}
* @memberof Essentia
*/
ChromaCrossSimilarity(queryFeature: any, referenceFeature: any, binarizePercentile?: number, frameStackSize?: number, frameStackStride?: number, noti?: number, oti?: boolean, otiBinary?: boolean, streaming?: boolean): any;
/**
* This algorithm computes the Constant-Q chromagram using FFT. See ConstantQ algorithm for more details.
Check https://essentia.upf.edu/reference/std_Chromagram.html for more details.
* @method
* @param {VectorFloat} frame the input audio frame
* @param {number} [binsPerOctave=12] number of bins per octave
* @param {number} [minFrequency=32.7] minimum frequency [Hz]
* @param {number} [minimumKernelSize=4] minimum size allowed for frequency kernels
* @param {string} [normalizeType=unit_max] normalize type
* @param {number} [numberBins=84] number of frequency bins, starting at minFrequency
* @param {number} [sampleRate=44100] FFT sampling rate [Hz]
* @param {number} [scale=1] filters scale. Larger values use longer windows
* @param {number} [threshold=0.01] bins whose magnitude is below this quantile are discarded
* @param {string} [windowType=hann] the window type, which can be 'hamming', 'hann', 'triangular', 'square' or 'blackmanharrisXX'
* @param {boolean} [zeroPhase=true] a boolean value that enables zero-phase windowing. Input audio frames should be windowed with the same phase mode
* @returns {object} {chromagram: 'the magnitude constant-Q chromagram'}
* @memberof Essentia
*/
Chromagram(frame: any, binsPerOctave?: number, minFrequency?: number, minimumKernelSize?: number, normalizeType?: string, numberBins?: number, sampleRate?: number, scale?: number, threshold?: number, windowType?: string, zeroPhase?: boolean): any;
/**
* This algorithm detects the locations of impulsive noises (clicks and pops) on the input audio frame. It relies on LPC coefficients to inverse-filter the audio in order to attenuate the stationary part and enhance the prediction error (or excitation noise)[1]. After this, a matched filter is used to further enhance the impulsive peaks. The detection threshold is obtained from a robust estimate of the excitation noise power [2] plus a parametric gain value. Check https://essentia.upf.edu/reference/std_ClickDetector.html for more details.
* @method
* @param {VectorFloat} frame the input frame (must be non-empty)
* @param {number} [detectionThreshold=30] 'detectionThreshold' the threshold is based on the instant power of the noisy excitation signal plus detectionThreshold dBs
* @param {number} [frameSize=512] the expected size of the input audio signal (this is an optional parameter to optimize memory allocation)
* @param {number} [hopSize=256] hop size used for the analysis. This parameter must be set correctly as it cannot be obtained from the input data
* @param {number} [order=12] scalar giving the number of LPCs to use
* @param {number} [powerEstimationThreshold=10] the noisy excitation is clipped to 'powerEstimationThreshold' times its median.
* @param {number} [sampleRate=44100] sample rate used for the analysis
* @param {number} [silenceThreshold=-50] threshold to skip silent frames
* @returns {object} {starts: 'starting indexes of the clicks', ends: 'ending indexes of the clicks'}
* @memberof Essentia
*/
ClickDetector(frame: any, detectionThreshold?: number, frameSize?: number, hopSize?: number, order?: number, powerEstimationThreshold?: number, sampleRate?: number, silenceThreshold?: number): any;
/**
* This algorithm clips the input signal to fit its values into a specified interval. Check https://essentia.upf.edu/reference/std_Clipper.html for more details.
* @method
* @param {VectorFloat} signal the input signal
* @param {number} [max=1] the maximum value above which the signal will be clipped
* @param {number} [min=-1] the minimum value below which the signal will be clipped
* @returns {object} {signal: 'the output signal with the added noise'}
* @memberof Essentia
*/
Clipper(signal: any, max?: number, min?: number): any;
/**
* This algorithm computes a cover song similiarity measure from a binary cross similarity matrix input between two chroma vectors of a query and reference song using various alignment constraints of smith-waterman local-alignment algorithm. Check https://essentia.upf.edu/reference/std_CoverSongSimilarity.html for more details.
* @method
* @param {VectorVectorFloat} inputArray a 2D binary cross-similarity matrix between two audio chroma vectors (query vs reference song) (refer 'ChromaCrossSimilarity' algorithm').
* @param {string} [alignmentType=serra09] choose either one of the given local-alignment constraints for smith-waterman algorithm as described in [2] or [3] respectively.
* @param {number} [disExtension=0.5] penalty for disruption extension
* @param {number} [disOnset=0.5] penalty for disruption onset
* @param {string} [distanceType=asymmetric] choose the type of distance. By default the algorithm outputs a asymmetric disctance which is obtained by normalising the maximum score in the alignment score matrix with length of reference song
* @returns {object} {scoreMatrix: 'a 2D smith-waterman alignment score matrix from the input binary cross-similarity matrix', distance: 'cover song similarity distance between the query and reference song from the input similarity matrix. Either 'asymmetric' (as described in [2]) or 'symmetric' (maximum score in the alignment score matrix).'}
* @memberof Essentia
*/
CoverSongSimilarity(inputArray: any, alignmentType?: string, disExtension?: number, disOnset?: number, distanceType?: string): any;
/**
* This algorithm computes the crest of an array. The crest is defined as the ratio between the maximum value and the arithmetic mean of an array. Typically it is used on the magnitude spectrum. Check https://essentia.upf.edu/reference/std_Crest.html for more details.
* @method
* @param {VectorFloat} array the input array (cannot contain negative values, and must be non-empty)
* @returns {object} {crest: 'the crest of the input array'}
* @memberof Essentia
*/
Crest(array: any): any;
/**
* This algorithm computes the cross-correlation vector of two signals. It accepts 2 parameters, minLag and maxLag which define the range of the computation of the innerproduct. Check https://essentia.upf.edu/reference/std_CrossCorrelation.html for more details.
* @method
* @param {VectorFloat} arrayX the first input array
* @param {VectorFloat} arrayY the second input array
* @param {number} [maxLag=1] the maximum lag to be computed between the two vectors
* @param {number} [minLag=0] the minimum lag to be computed between the two vectors
* @returns {object} {crossCorrelation: 'the cross-correlation vector between the two input arrays (its size is equal to maxLag - minLag + 1)'}
* @memberof Essentia
*/
CrossCorrelation(arrayX: any, arrayY: any, maxLag?: number, minLag?: number): any;
/**
* This algorithm computes a euclidean cross-similarity matrix of two sequences of frame features. Similarity values can be optionally binarized Check https://essentia.upf.edu/reference/std_CrossSimilarityMatrix.html for more details.
* @method
* @param {VectorVectorFloat} queryFeature input frame features of the query song (e.g., a chromagram)
* @param {VectorVectorFloat} referenceFeature input frame features of the reference song (e.g., a chromagram)
* @param {boolean} [binarize=false] whether to binarize the euclidean cross-similarity matrix
* @param {number} [binarizePercentile=0.095] maximum percent of distance values to consider as similar in each row and each column
* @param {number} [frameStackSize=1] number of input frames to stack together and treat as a feature vector for similarity computation. Choose 'frameStackSize=1' to use the original input frames without stacking
* @param {number} [frameStackStride=1] stride size to form a stack of frames (e.g., 'frameStackStride'=1 to use consecutive frames; 'frameStackStride'=2 for using every second frame)
* @returns {object} {csm: '2D cross-similarity matrix of two input frame sequences (query vs reference)'}
* @memberof Essentia
*/
CrossSimilarityMatrix(queryFeature: any, referenceFeature: any, binarize?: boolean, binarizePercentile?: number, frameStackSize?: number, frameStackStride?: number): any;
/**
* Computes the second derivatives of a piecewise cubic spline.
The input value, i.e. the point at which the spline is to be evaluated typically should be between xPoints[0] and xPoints[size-1]. If the value lies outside this range, extrapolation is used.
Regarding [left/right] boundary condition flag parameters:
- 0: the cubic spline should be a quadratic over the first interval
- 1: the first derivative at the [left/right] endpoint should be [left/right]BoundaryFlag
- 2: the second derivative at the [left/right] endpoint should be [left/right]BoundaryFlag
References:
[1] Spline interpolation - Wikipedia, the free encyclopedia,
http://en.wikipedia.org/wiki/Spline_interpolation Check https://essentia.upf.edu/reference/std_CubicSpline.html for more details.
* @method
* @param {number} x the input coordinate (x-axis)
* @param {number} [leftBoundaryFlag=0] type of boundary condition for the left boundary
* @param {number} [leftBoundaryValue=0] the value to be used in the left boundary, when leftBoundaryFlag is 1 or 2
* @param {number} [rightBoundaryFlag=0] type of boundary condition for the right boundary
* @param {number} [rightBoundaryValue=0] the value to be used in the right boundary, when rightBoundaryFlag is 1 or 2
* @param {any[]} [xPoints=[0, 1]] the x-coordinates where data is specified (the points must be arranged in ascending order and cannot contain duplicates)
* @param {any[]} [yPoints=[0, 1]] the y-coordinates to be interpolated (i.e. the known data)
* @returns {object} {y: 'the value of the spline at x', dy: 'the first derivative of the spline at x', ddy: 'the second derivative of the spline at x'}
* @memberof Essentia
*/
CubicSpline(x: number, leftBoundaryFlag?: number, leftBoundaryValue?: number, rightBoundaryFlag?: number, rightBoundaryValue?: number, xPoints?: any[], yPoints?: any[]): any;
/**
* This algorithm removes the DC offset from a signal using a 1st order IIR highpass filter. Because of its dependence on IIR, IIR's requirements are inherited. Check https://essentia.upf.edu/reference/std_DCRemoval.html for more details.
* @method
* @param {VectorFloat} signal the input audio signal
* @param {number} [cutoffFrequency=40] the cutoff frequency for the filter [Hz]
* @param {number} [sampleRate=44100] the sampling rate of the audio signal [Hz]
* @returns {object} {signal: 'the filtered signal, with the DC component removed'}
* @memberof Essentia
*/
DCRemoval(signal: any, cutoffFrequency?: number, sampleRate?: number): any;
/**
* This algorithm computes the Discrete Cosine Transform of an array.
It uses the DCT-II form, with the 1/sqrt(2) scaling factor for the first coefficient. Check https://essentia.upf.edu/reference/std_DCT.html for more details.
* @method
* @param {VectorFloat} array the input array
* @param {number} [dctType=2] the DCT type
* @param {number} [inputSize=10] the size of the input array
* @param {number} [liftering=0] the liftering coefficient. Use '0' to bypass it
* @param {number} [outputSize=10] the number of output coefficients
* @returns {object} {dct: 'the discrete cosine transform of the input array'}
* @memberof Essentia
*/
DCT(array: any, dctType?: number, inputSize?: number, liftering?: number, outputSize?: number): any;
/**
* This algorithm estimates danceability of a given audio signal. The algorithm is derived from Detrended Fluctuation Analysis (DFA) described in [1]. The parameters minTau and maxTau are used to define the range of time over which DFA will be performed. The output of this algorithm is the danceability of the audio signal. These values usually range from 0 to 3 (higher values meaning more danceable). Check https://essentia.upf.edu/reference/std_Danceability.html for more details.
* @method
* @param {VectorFloat} signal the input signal
* @param {number} [maxTau=8800] maximum segment length to consider [ms]
* @param {number} [minTau=310] minimum segment length to consider [ms]
* @param {number} [sampleRate=44100] the sampling rate of the audio signal [Hz]
* @param {number} [tauMultiplier=1.1] multiplier to increment from min to max tau
* @returns {object} {danceability: 'the danceability value. Normal values range from 0 to ~3. The higher, the more danceable.', dfa: 'the DFA exponent vector for considered segment length (tau) values'}
* @memberof Essentia
*/
Danceability(signal: any, maxTau?: number, minTau?: number, sampleRate?: number, tauMultiplier?: number): any;
/**
* This algorithm computes the decrease of an array defined as the linear regression coefficient. The range parameter is used to normalize the result. For a spectral centroid, the range should be equal to Nyquist and for an audio centroid the range should be equal to (audiosize - 1) / samplerate.
The size of the input array must be at least two elements for "decrease" to be computed, otherwise an exception is thrown.
References:
[1] Least Squares Fitting -- from Wolfram MathWorld,
http://mathworld.wolfram.com/LeastSquaresFitting.html Check https://essentia.upf.edu/reference/std_Decrease.html for more details.
* @method
* @param {VectorFloat} array the input array
* @param {number} [range=1] the range of the input array, used for normalizing the results
* @returns {object} {decrease: 'the decrease of the input array'}
* @memberof Essentia
*/
Decrease(array: any, range?: number): any;
/**
* This algorithm returns the first-order derivative of an input signal. That is, for each input value it returns the value minus the previous one. Check https://essentia.upf.edu/reference/std_Derivative.html for more details.
* @method
* @param {VectorFloat} signal the input signal
* @returns {object} {signal: 'the derivative of the input signal'}
* @memberof Essentia
*/
Derivative(signal: any): any;
/**
* This algorithm computes two descriptors that are based on the derivative of a signal envelope. Check https://essentia.upf.edu/reference/std_DerivativeSFX.html for more details.
* @method
* @param {VectorFloat} envelope the envelope of the signal
* @returns {object} {derAvAfterMax: 'the weighted average of the derivative after the maximum amplitude', maxDerBeforeMax: 'the maximum derivative before the maximum amplitude'}
* @memberof Essentia
*/
DerivativeSFX(envelope: any): any;
/**
* This algorithm uses LPC and some heuristics to detect discontinuities in an audio signal. [1]. Check https://essentia.upf.edu/reference/std_DiscontinuityDetector.html for more details.
* @method
* @param {VectorFloat} frame the input frame (must be non-empty)
* @param {number} [detectionThreshold=8] 'detectionThreshold' times the standard deviation plus the median of the frame is used as detection threshold
* @param {number} [energyThreshold=-60] threshold in dB to detect silent subframes
* @param {number} [frameSize=512] the expected size of the input audio signal (this is an optional parameter to optimize memory allocation)
* @param {number} [hopSize=256] hop size used for the analysis. This parameter must be set correctly as it cannot be obtained from the input data
* @param {number} [kernelSize=7] scalar giving the size of the median filter window. Must be odd
* @param {number} [order=3] scalar giving the number of LPCs to use
* @param {number} [silenceThreshold=-50] threshold to skip silent frames
* @param {number} [subFrameSize=32] size of the window used to compute silent subframes
* @returns {object} {discontinuityLocations: 'the index of the detected discontinuities (if any)', discontinuityAmplitudes: 'the peak values of the prediction error for the discontinuities (if any)'}
* @memberof Essentia
*/
DiscontinuityDetector(frame: any, detectionThreshold?: number, energyThreshold?: number, frameSize?: number, hopSize?: number, kernelSize?: number, order?: number, silenceThreshold?: number, subFrameSize?: number): any;
/**
* This algorithm computes the sensory dissonance of an audio signal given its spectral peaks. Sensory dissonance (to be distinguished from musical or theoretical dissonance) measures perceptual roughness of the sound and is based on the roughness of its spectral peaks. Given the spectral peaks, the algorithm estimates total dissonance by summing up the normalized dissonance values for each pair of peaks. These values are computed using dissonance curves, which define dissonace between two spectral peaks according to their frequency and amplitude relations. The dissonance curves are based on perceptual experiments conducted in [1].
Exceptions are thrown when the size of the input vectors are not equal or if input frequencies are not ordered ascendantly
References:
[1] R. Plomp and W. J. M. Levelt, "Tonal Consonance and Critical
Bandwidth," The Journal of the Acoustical Society of America, vol. 38,
no. 4, pp. 548–560, 1965. Check https://essentia.upf.edu/reference/std_Dissonance.html for more details.
* @method
* @param {VectorFloat} frequencies the frequencies of the spectral peaks (must be sorted by frequency)
* @param {VectorFloat} magnitudes the magnitudes of the spectral peaks (must be sorted by frequency
* @returns {object} {dissonance: 'the dissonance of the audio signal (0 meaning completely consonant, and 1 meaning completely dissonant)'}
* @memberof Essentia
*/
Dissonance(frequencies: any, magnitudes: any): any;
/**
* This algorithm computes the spread (variance), skewness and kurtosis of an array given its central moments. The extracted features are good indicators of the shape of the distribution. For the required input see CentralMoments algorithm.
The size of the input array must be at least 5. An exception will be thrown otherwise. Check https://essentia.upf.edu/reference/std_DistributionShape.