formantanalyzer
Version:
Extract formant features such as frequency, power, energy, and bandwidth of formants at syllable or word level from audio sources in a web browser using WebAudio API.
487 lines (376 loc) • 13.5 kB
JavaScript
// analyzernode.main.js
let spec_type = 1; //1=mel, 2=power, 3=freq
let window_step = 0.025;
let window_width = 0.040;
let f_min = 50;
let f_max = 4000;
let N_FFT_cut = 256;
let N_mels = 128;
let mel_bins_Hz = [];
let P_GAIN = 1000;
//Autoset variables
let high_f_emph = Math.round(10/N_mels)
let N_FFT_max = parseInt((sampleRate/f_max)*(N_FFT_cut-1));
let fft_const = -2 * Math.PI / N_FFT_max;
let hconst = 2*Math.PI/(1024 - 1); //Hamming constant, 1024 is temp window_buff_size
let window_buff_size = 1024; //it will autoset according to the window size
let step_buff_size = 512;
let mel_filters;
let fi = 0; //current buffer frame index
let WindowsIn = [];
let curInBuff = new Float32Array(step_buff_size);
//let current_window = new Float32Array(window_buff_size);
let lastWindowBuff = new Float32Array(window_buff_size).fill(0);
let analyzer_running = false;
let curInBuff_offset = 0;
let t0_frame = -1;
let t1_frame = -1;
class SpectrumProcessor extends AudioWorkletProcessor {
constructor () {
super()
//last_timestamp = +new Date();
//console.log(currentFrame)
//console.log(currentTime)
analyzer_running = true;
let start_settings = {spec_type: 1, f_min: 50, f_max: 4000, N_fft_bins: 256, N_mel_bins: 128, window_width: 25, window_step: 25, pre_norm_gain: 1000, high_f_emph:0.05, start_frame:-1, end_frame:-1};
set_settings(start_settings);
this.port.onmessage = (e) => {
if(e.data.spec_type)
{
this.port.postMessage(set_settings(e.data));
}
else if(WindowsIn.length>0)
{
let first_window = WindowsIn[0];
WindowsIn.splice(0,1);
if(spec_type==1)
{
//let mel_bands = get_melbanks(hamming(first_window));
this.port.postMessage(get_melbanks(hamming(first_window)));
//mel_bands = null;
}
else if(spec_type==2)
{
//let power_bands = get_fft(hamming(first_window), 1);
this.port.postMessage(get_fft(hamming(first_window), 1));
//power_bands = null;
}
else if(spec_type==3)
{
//let fft_bands = get_fft(hamming(first_window), 0);
this.port.postMessage(get_fft(hamming(first_window), 0));
//fft_bands = null;
}
first_window = null;
}
else if(e.data==22) //End
{
analyzer_running = false;
this.port.postMessage(22);
}
else if(e.data==0) //start
{
this.port.postMessage(0);
}
else if(e.data!=1)
{
console.log("Unrecognized Rx on worklet port");
console.log(e.data);
}
}
}
process (inputs, outputs, parameters)
{
if(inputs[0][0]) //inputs[0][0] isFloat32Array(128), stream of the channel 0
{
//curInBuff = curInBuff.concat(Array.from(inputs[0][0]));
let in_len = inputs[0][0].length;
let after_len = curInBuff_offset + in_len;
if(after_len < step_buff_size)
{
curInBuff.set(inputs[0][0], curInBuff_offset);
curInBuff_offset += in_len;
}
else if(after_len == step_buff_size) //if buffer is exactly full
{
curInBuff.set(inputs[0][0], curInBuff_offset);
this.create_segment(curInBuff);
curInBuff_offset = 0;
}
else //if buffer overspills
{
let split = step_buff_size - curInBuff_offset;
let this_slice = inputs[0][0].slice(0, split);
let next_slice = inputs[0][0].slice(split);
curInBuff.set(this_slice, curInBuff_offset);
this.create_segment(curInBuff);
curInBuff.set(next_slice, 0);
curInBuff_offset = next_slice.length; //start next segment
}
}
if(analyzer_running)
return true;
else
return false;
}
create_segment(NewInbuff)
{
let current_window = new Float32Array(window_buff_size);
current_window.set(lastWindowBuff.slice(step_buff_size), 0);
current_window.set(NewInbuff, window_buff_size - step_buff_size);
WindowsIn.push(current_window);
lastWindowBuff = current_window;
//current_window = new Float32Array(window_buff_size);
this.port.postMessage(1);
}
}
let fft_bins_Hz = [];
let power_bins = null;// Float32Array(N_FFT_cut);
let mel_bins = null;//new Uint32Array(N_mels);
let power_bins_2 = null;
function set_settings(this_config)
{
//console.log(currentFrame);
window_width = this_config.window_width/1000;
window_step = this_config.window_step/1000;
window_buff_size = parseInt(sampleRate*window_width);
step_buff_size = parseInt(sampleRate*window_step);
f_min = this_config.f_min;
f_max = this_config.f_max;
N_FFT_cut = this_config.N_fft_bins;
N_FFT_max = parseInt((sampleRate/f_max)*(N_FFT_cut-1));
fft_const = -2 * Math.PI / N_FFT_max;
hconst = 2*Math.PI/(window_buff_size - 1);
fft_bins_Hz = [];
for (let i = 0; i < N_FFT_cut; i += 1) //N_FFT_cut, to use only the f_max range
{
fft_bins_Hz[i] = Math.round(i*f_max/N_FFT_cut);
}
N_mels = this_config.N_mel_bins;
mel_filters = construct_mel_banks(N_FFT_cut, f_max, N_mels, f_min, f_max);
high_f_emph = this_config.high_f_emph;
P_GAIN = this_config.pre_norm_gain;
//curInBuff = new Array();
curInBuff = new Float32Array(step_buff_size);
//current_window = new Float32Array(window_buff_size);
lastWindowBuff = new Float32Array(window_buff_size).fill(0);
fi = 0;
WindowsIn = [];
spec_type = this_config.spec_type;
power_bins = null; mel_bins = null; power_bins_2 = null;
power_bins = new Float32Array(N_FFT_cut);
mel_bins = new Uint32Array(N_mels);
power_bins_2 = new Uint32Array(N_FFT_cut); //for FFT function
//var new_window_step = (step_buff_size/sampleRate);
//var new_window_width = (window_buff_size/sampleRate);
//console.log("SR:" + String(sampleRate) + ", WindowSamples:" + String(window_buff_size) + ", StepSamples:" + String(step_buff_size) + ", t_step:" + String(new_window_step) + ", t_window:" + String(new_window_width) + ", N_FFT_max:" + String(Math.round(N_FFT_max)) + ", N_FFT_cut:" + String(N_FFT_cut) + ", N_mels:" + String(N_mels) + ", f_min:" + String(f_min) + ", f_max:" + String(f_max) + ", StartFrame:" + String(currentFrame))
var return_bins_Hz = {bins_Hz: fft_bins_Hz}
if(spec_type==1)
{
return_bins_Hz = {bins_Hz: mel_bins_Hz};
}
return return_bins_Hz;
}
function get_melbanks(sig)
{
const sig_N = sig.length;
//power_bins = new Float32Array(N_FFT_cut);
//mel_bins = new Uint32Array(N_mels);
for (let fbn = 0; fbn < N_FFT_cut; fbn += 1) //N_FFT_cut, to use only the f_max range
{
let rex = 0;
let imx = 0;
for (let i = 0; i < sig_N; i += 1)
{
if (sig[i])
{
const rotAngle = fft_const*fbn*i;//-1 * (2 * 3.1416) * fbn * (i / N_FFT_max);
rex += Math.cos(rotAngle) * sig[i];
imx += Math.sin(rotAngle) * sig[i];
}
}
var yo = Math.sqrt((rex*rex) + (imx*imx));
power_bins[fbn] = P_GAIN * yo * yo;// /N_FFT_cut;
}
mel_filters.forEach(function (filter, fIx) {
let tot = 0;
let emph = 1 + (high_f_emph*fIx);
power_bins.forEach(function (fp, pIx) {
tot += fp * filter[pIx];
});
tot = tot*emph;
mel_bins[fIx] = tot;
});
//power_bins = null;
return mel_bins;
}
function get_fft(sig, return_power)
{
const sig_N = sig.length;
//let power_bins_2 = new Uint32Array(N_FFT_cut);
for (let fbn = 0; fbn < N_FFT_cut; fbn += 1) //N_FFT_cut, to use only the f_max range
{
let rex = 0;
let imx = 0;
for (let i = 0; i < sig_N; i += 1)
{
if (sig[i])
{
const rotAngle = fft_const*fbn*i;//-1 * (2 * 3.1416) * fbn * (i / N_FFT_max);
rex += Math.cos(rotAngle) * sig[i];
imx += Math.sin(rotAngle) * sig[i];
}
}
var yo = Math.sqrt((rex*rex) + (imx*imx));
if(!return_power)
yo = P_GAIN/100 * yo;
else
yo = 100*(P_GAIN/N_FFT_cut) * yo * yo;// /N_FFT_cut;
var emph = 1 + (high_f_emph*fbn);
yo = yo*emph;
power_bins_2[fbn] = yo;
}
return power_bins_2;
}
function hamming(signal)
{
let ret = [];
for (let i = 0; i < signal.length; i += 1)
ret[i] = (( 0.53836 - ( 0.46164 * Math.cos( hconst * i ) ) ) * signal[i]);
return ret;
}
function arrayMin(arr) {
let len = arr.length, min = Infinity;
while (len--) {
if (arr[len] < min) {
min = arr[len];
}
}
return min;
};
function arrayMax(arr) {
let len = arr.length, max = -Infinity;
while (len--) {
if (arr[len] > max) {
max = arr[len];
}
}
return max;
};
function arrayAverage(arr){
let sum = 0;
let len = arr.length;
for(let i in arr) {
sum += arr[i];
}
return (sum / len);
};
function get_mel_filters(power_spec_bands)
{
let ret = new Array(N_mels);
mel_filters.forEach(function (filter, fIx) {
var tot = 0;
power_spec_bands.forEach(function (fp, pIx) {
tot += fp * filter[pIx];
});
ret[fIx] = tot;
});
return ret;
}
function mels2Hz(melsxx645) {
return 700 * (Math.exp(melsxx645 / 1127) - 1);
}
function Hz2mels(hertz653654) {
return 1127 * Math.log(1 + hertz653654/700);
}
function construct_mel_banks(fftSize, fft_max_freq, nFilters, lowF, highF)
{
let fftbin = [];
let filters = [];
let lowM = Hz2mels(lowF);
let highM = Hz2mels(highF);
let delMel = ((highM - lowM) / (parseInt(nFilters)+1));
//console.log(String(lowM) + ", " + String(highM) + ", " + String(delMel) + ", " + String(nFilters));
mel_bins_Hz = new Array(nFilters);
for (let i = 0; i < nFilters; i++)
{
let mel_bin_Hz = mels2Hz(lowM + (i * delMel)); //between lowM and highM.
fftbin[i] = Math.floor( mel_bin_Hz / (fft_max_freq/fftSize));
mel_bins_Hz[i] = Math.round(mel_bin_Hz);
};
// one mel cone per fftbin
for (let i = 0; i < fftbin.length; i++)
{
filters[i] = [];
var filterRange = (i != fftbin.length-1) ? fftbin[i+1] - fftbin[i] : fftbin[i] - fftbin[i-1];
filters[i].filterRange = filterRange;
for (let f = 0; f < fftSize; f++)
{
if (f > fftbin[i] + filterRange) filters[i][f] = 0.0;
else if (f > fftbin[i]) filters[i][f] = 1.0 - ((f - fftbin[i]) / filterRange);
else if (f == fftbin[i]) filters[i][f] = 1.0;
else if (f >= fftbin[i] - filterRange) filters[i][f] = 1.0 - (fftbin[i] - f) / filterRange;
else filters[i][f] = 0.0;
}
}
return filters;
}
registerProcessor('spectrum-processor', SpectrumProcessor)
//&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
/*
//FFT bins: Sample_Rate / (N_FFT_max + 1)
let ret = new Array(signal.length);
for (let i = 0; i < signal.length; i += 1)
{
ret[i] = (( 0.53836 - ( 0.46164 * Math.cos( hconst * i ) ) ) * signal[i]);
}
let ret = signal.map(function(elm, i) {
return (( 0.53836 - ( 0.46164 * Math.cos( hconst * i ) ) ) * elm);
});
*/
/*
//this.port.postMessage(ci);
// the sample rate is not going to change ever,
// because it's a read-only property of a BaseAudioContext
// and is set only during its instantiation
//console.log(sampleRate)
// you can declare any variables and use them in you processors
// for example it may be an ArrayBuffer with a wavetable
//const usefulVariable = 42
//console.log(usefulVariable)
curInBuff = curInBuff.concat(Array.from(inputs[0][0]).map(function(element) {
return element;
}) );
const now_time = currentTime;
const real_srate = parseInt(1/((now_time-last_timestamp)/step_buff_size));
last_timestamp = now_time;
function complexToAbs(complex_re, complex_im)
{
return Math.sqrt (Math.pow(complex_re, 2) + Math.pow(complex_im, 2));
}
function do_dfft(sig)
{
const N_samples = sig.length;
let ret = new Array(N_FFT_cut);
for (let fbn = 0; fbn < N_FFT_cut; fbxx += 1) //N_FFT_cut only within f_max range
{
let rex = 0;
let imx = 0;
for (let i = 0; i < N_samples; i += 1)
{
if(sig[i])
{
const rotAngle = fft_const*fbxx*i;//-1 * (2 * 3.1416) * fbxx * (i / N_FFT_max);
rex += Math.cos(rotAngle) * sig[i];
imx += Math.sin(rotAngle) * sig[i];
}
}
//fft-magnitude
//ret[fbxx] = ( Math.sqrt(Math.pow(rex, 2) + Math.pow(imx, 2)) );
//power spectrum bands
var yo = Math.sqrt((rex*rex) + (imx*imx));
ret[fbxx] = ( yo * yo )/N_FFT_cut;
}
return ret;
}
*/