echogarden
Version:
An easy-to-use speech toolset. Includes tools for synthesis, recognition, alignment, speech translation, language detection, source separation and more.
227 lines • 10.2 kB
JavaScript
import { concatFloat32Arrays, isWasmSimdSupported } from '../utilities/Utilities.js';
import { createWasmHeapManager } from 'wasm-heap-manager';
// Compute short-term Fourier transform (real-valued)
export async function stftr(samples, fftOrder, windowSize, hopSize, windowType) {
const frames = [];
const stftrGenerator = await createStftrGenerator(samples, fftOrder, windowSize, hopSize, windowType);
for (const frame of stftrGenerator) {
frames.push(frame);
}
return frames;
}
// Incrementally generate short-term Fourier transform frames (real-valued)
export async function createStftrGenerator(samples, fftOrder, windowSize, hopSize, windowType) {
if (fftOrder % 2 !== 0 || windowSize % 2 !== 0) {
throw new Error('FFT order and window size must be multiples of 2');
}
if (windowSize > fftOrder) {
throw new Error('Window size must be lesser or equal to the FFT size');
}
if (hopSize > windowSize) {
throw new Error('Hop size must be lesser or equal to the window size');
}
const halfWindowSize = windowSize / 2;
const padding = new Float32Array(halfWindowSize);
samples = concatFloat32Arrays([padding, samples, padding]);
const windowWeights = getWindowWeights(windowType, windowSize);
const m = await getPFFFTInstance(await isPffftSimdSupportedForFFTOrder(fftOrder));
const wasmHeap = createWasmHeapManager(() => m.HEAPU8.buffer, m._pffft_aligned_malloc, m._pffft_aligned_free);
const statePtr = m._pffft_new_setup(fftOrder, 0);
const sampleCount = samples.length;
const frameBufferRef = wasmHeap.allocFloat32Array(fftOrder);
const binsBufferRef = wasmHeap.allocFloat32Array(fftOrder * 2);
const workBufferRef = wasmHeap.allocFloat32Array(fftOrder * 2);
function* stftrGenerator() {
for (let offset = 0; offset < sampleCount; offset += hopSize) {
const windowSamples = samples.subarray(offset, offset + windowSize);
frameBufferRef.clear();
const frameBufferView = frameBufferRef.view;
for (let i = 0; i < windowSamples.length; i++) {
frameBufferView[i] = windowSamples[i] * windowWeights[i];
}
binsBufferRef.clear();
m._pffft_transform_ordered(statePtr, frameBufferRef.address, binsBufferRef.address, workBufferRef.address, 0);
const bins = binsBufferRef.view.slice(0, fftOrder + 2);
yield bins;
}
m._pffft_destroy_setup(statePtr);
wasmHeap.freeAll();
}
return stftrGenerator();
}
// Compute short-term inverse Fourier transform (real-valued)
export async function stiftr(binsForFrames, fftOrder, windowSize, hopSize, windowType, expectedOutputLength) {
if (fftOrder % 2 !== 0 || windowSize % 2 !== 0) {
throw new Error('FFT order and window size must multiples of 2');
}
if (windowSize > fftOrder) {
throw new Error('Window size must be lesser or equal to the FFT size');
}
if (hopSize > windowSize) {
throw new Error('Hop size must be lesser or equal to the window size');
}
const frameCount = binsForFrames.length;
const halfWindowSize = windowSize / 2;
const windowWeights = getWindowWeights(windowType, windowSize);
const outSampleCount = hopSize * frameCount;
if (expectedOutputLength == null) {
expectedOutputLength = outSampleCount;
}
const outSamples = new Float32Array(outSampleCount);
const m = await getPFFFTInstance(await isPffftSimdSupportedForFFTOrder(fftOrder));
const wasmHeap = createWasmHeapManager(() => m.HEAPU8.buffer, m._pffft_aligned_malloc, m._pffft_aligned_free);
const statePtr = m._pffft_new_setup(fftOrder, 0);
const frameBufferRef = wasmHeap.allocFloat32Array(fftOrder);
const binsRef = wasmHeap.allocFloat32Array(fftOrder * 2);
const workBufferRef = wasmHeap.allocFloat32Array(fftOrder * 2);
const sumOfSquaredWeightsForSample = new Float32Array(outSampleCount);
for (let frameIndex = 0; frameIndex < frameCount; frameIndex++) {
const binsForFrame = binsForFrames[frameIndex];
binsRef.clear();
binsRef.view.set(binsForFrame);
frameBufferRef.clear();
m._pffft_transform_ordered(statePtr, binsRef.address, frameBufferRef.address, workBufferRef.address, 1);
const frameSamples = frameBufferRef.view;
const frameStartOffset = frameIndex * hopSize;
for (let windowOffset = 0; windowOffset < windowSize; windowOffset++) {
const frameSample = frameSamples[windowOffset];
const weight = windowWeights[windowOffset];
const writePosition = frameStartOffset + windowOffset;
outSamples[writePosition] += frameSample * weight;
sumOfSquaredWeightsForSample[writePosition] += weight ** 2;
}
}
m._pffft_destroy_setup(statePtr);
wasmHeap.freeAll();
// Divide each output sample by the sum of squared weights
for (let i = 0; i < outSamples.length; i++) {
outSamples[i] /= sumOfSquaredWeightsForSample[i] + 1e-8;
}
const outSamplesTrimmed = outSamples.slice(halfWindowSize, halfWindowSize + expectedOutputLength);
return outSamplesTrimmed;
}
// Get bin frequency thresholds for a particular bin count and maximum frequency
export function getBinFrequencies(binCount, maxFrequency) {
const binFrequencies = new Float32Array(binCount);
const frequencyStep = maxFrequency / (binCount - 1);
for (let i = 0, frequency = 0; i < binFrequencies.length; i++, frequency += frequencyStep) {
binFrequencies[i] = frequency;
}
return binFrequencies;
}
// Convert an array of raw FFT frames to a power spectrum
export function fftFramesToPowerSpectrogram(fftFrames) {
return fftFrames.map(fftFrame => fftFrameToPowerSpectrum(fftFrame));
}
// Convert a raw FFT frame to a power spectrum
export function fftFrameToPowerSpectrum(fftFrame) {
const powerSpectrum = new Float32Array(fftFrame.length / 2);
for (let i = 0; i < powerSpectrum.length; i++) {
const binOffset = i * 2;
const fftCoefficientRealPart = fftFrame[binOffset];
const fftCoefficientImaginaryPart = fftFrame[binOffset + 1];
const binPower = (fftCoefficientRealPart ** 2) + (fftCoefficientImaginaryPart ** 2);
powerSpectrum[i] = binPower;
}
return powerSpectrum;
}
// Convert raw FFT frames to an array of complex numbers
export function binBufferToComplex(bins, extendAndMirror = false) {
const complexBins = [];
for (let i = 0; i < bins.length; i += 2) {
complexBins.push({
real: bins[i],
imaginary: bins[i + 1]
});
}
if (extendAndMirror) {
const fftSize = bins.length - 2;
for (let i = complexBins.length; i < fftSize; i++) {
const complexBinToMirror = complexBins[fftSize - i];
complexBins.push({
real: complexBinToMirror.real,
imaginary: -complexBinToMirror.imaginary
});
}
}
return complexBins;
}
// Convert an array of complex numbers to raw FFT frames
export function complexToBinBuffer(complexBins) {
const binBuffer = new Float32Array(complexBins.length * 2);
for (let i = 0, outIndex = 0; i < complexBins.length; i++) {
const complexBin = complexBins[i];
binBuffer[outIndex++] = complexBin.real;
binBuffer[outIndex++] = complexBin.imaginary;
}
return binBuffer;
}
// Convert complex bin to magnitude and phase
export function complexToMagnitudeAndPhase(real, imaginary) {
const magnitude = Math.sqrt((real ** 2) + (imaginary ** 2));
const phase = Math.atan2(imaginary, real);
return { magnitude, phase };
}
// Convert magnitude and phase to complex bin
export function magnitudeAndPhaseToComplex(magnitude, phase) {
const real = magnitude * Math.cos(phase);
const imaginary = magnitude * Math.sin(phase);
return { real, imaginary };
}
// Get window weights for a particular window function
export function getWindowWeights(windowType, windowSize) {
const weights = new Float32Array(windowSize);
const innerFactor = (2 * Math.PI) / (windowSize - 1);
if (windowType == 'hann') {
for (let i = 0; i < windowSize; i++) {
//weights[i] = 0.5 * (1 - Math.cos(2 * Math.PI * (i / (windowSize - 1))))
weights[i] = 0.5 * (1 - Math.cos(innerFactor * i));
}
}
else if (windowType == 'hann-sqrt') {
for (let i = 0; i < windowSize; i++) {
weights[i] = Math.sqrt(0.5 * (1 - Math.cos(innerFactor * i)));
}
}
else if (windowType == 'hamming') {
for (let i = 0; i < windowSize; i++) {
//weights[i] = 0.54 - (0.46 * Math.cos(2 * Math.PI * (i / (windowSize - 1))))
weights[i] = 0.54 - (0.46 * Math.cos(innerFactor * i));
}
}
else if (windowType == 'povey') {
const hannWeights = getWindowWeights('hann', windowSize);
for (let i = 0; i < windowSize; i++) {
weights[i] = hannWeights[i] ** 0.85;
}
}
else {
throw new Error(`Unsupported window function type: ${windowType}`);
}
return weights;
}
export async function isPffftSimdSupportedForFFTOrder(fftOrder) {
const simdSupported = await isWasmSimdSupported();
if (simdSupported === false) {
return false;
}
return fftOrder % 32 === 0;
}
let pffftNonSimdInstance;
let pffftSimdInstance;
export async function getPFFFTInstance(enableSimd) {
return enableSimd ? getSimdPFFFTInstance() : getNonSimdPFFFTInstance();
}
// Get non-SIMD PFFFT instance (initialize new if not exists)
async function getNonSimdPFFFTInstance() {
const { default: initializer } = await import('@echogarden/pffft-wasm');
pffftNonSimdInstance = await initializer();
return pffftNonSimdInstance;
}
// Get SIMD PFFFT instance (initialize new if not exists)
async function getSimdPFFFTInstance() {
const { default: initializer } = await import('@echogarden/pffft-wasm/simd');
pffftSimdInstance = await initializer();
return pffftSimdInstance;
}
//# sourceMappingURL=FFT.js.map