UNPKG

echogarden

Version:

An easy-to-use speech toolset. Includes tools for synthesis, recognition, alignment, speech translation, language detection, source separation and more.

227 lines 10.2 kB
import { concatFloat32Arrays, isWasmSimdSupported } from '../utilities/Utilities.js'; import { createWasmHeapManager } from 'wasm-heap-manager'; // Compute short-term Fourier transform (real-valued) export async function stftr(samples, fftOrder, windowSize, hopSize, windowType) { const frames = []; const stftrGenerator = await createStftrGenerator(samples, fftOrder, windowSize, hopSize, windowType); for (const frame of stftrGenerator) { frames.push(frame); } return frames; } // Incrementally generate short-term Fourier transform frames (real-valued) export async function createStftrGenerator(samples, fftOrder, windowSize, hopSize, windowType) { if (fftOrder % 2 !== 0 || windowSize % 2 !== 0) { throw new Error('FFT order and window size must be multiples of 2'); } if (windowSize > fftOrder) { throw new Error('Window size must be lesser or equal to the FFT size'); } if (hopSize > windowSize) { throw new Error('Hop size must be lesser or equal to the window size'); } const halfWindowSize = windowSize / 2; const padding = new Float32Array(halfWindowSize); samples = concatFloat32Arrays([padding, samples, padding]); const windowWeights = getWindowWeights(windowType, windowSize); const m = await getPFFFTInstance(await isPffftSimdSupportedForFFTOrder(fftOrder)); const wasmHeap = createWasmHeapManager(() => m.HEAPU8.buffer, m._pffft_aligned_malloc, m._pffft_aligned_free); const statePtr = m._pffft_new_setup(fftOrder, 0); const sampleCount = samples.length; const frameBufferRef = wasmHeap.allocFloat32Array(fftOrder); const binsBufferRef = wasmHeap.allocFloat32Array(fftOrder * 2); const workBufferRef = wasmHeap.allocFloat32Array(fftOrder * 2); function* stftrGenerator() { for (let offset = 0; offset < sampleCount; offset += hopSize) { const windowSamples = samples.subarray(offset, offset + windowSize); frameBufferRef.clear(); const frameBufferView = frameBufferRef.view; for (let i = 0; i < windowSamples.length; i++) { frameBufferView[i] = windowSamples[i] * windowWeights[i]; } binsBufferRef.clear(); m._pffft_transform_ordered(statePtr, frameBufferRef.address, binsBufferRef.address, workBufferRef.address, 0); const bins = binsBufferRef.view.slice(0, fftOrder + 2); yield bins; } m._pffft_destroy_setup(statePtr); wasmHeap.freeAll(); } return stftrGenerator(); } // Compute short-term inverse Fourier transform (real-valued) export async function stiftr(binsForFrames, fftOrder, windowSize, hopSize, windowType, expectedOutputLength) { if (fftOrder % 2 !== 0 || windowSize % 2 !== 0) { throw new Error('FFT order and window size must multiples of 2'); } if (windowSize > fftOrder) { throw new Error('Window size must be lesser or equal to the FFT size'); } if (hopSize > windowSize) { throw new Error('Hop size must be lesser or equal to the window size'); } const frameCount = binsForFrames.length; const halfWindowSize = windowSize / 2; const windowWeights = getWindowWeights(windowType, windowSize); const outSampleCount = hopSize * frameCount; if (expectedOutputLength == null) { expectedOutputLength = outSampleCount; } const outSamples = new Float32Array(outSampleCount); const m = await getPFFFTInstance(await isPffftSimdSupportedForFFTOrder(fftOrder)); const wasmHeap = createWasmHeapManager(() => m.HEAPU8.buffer, m._pffft_aligned_malloc, m._pffft_aligned_free); const statePtr = m._pffft_new_setup(fftOrder, 0); const frameBufferRef = wasmHeap.allocFloat32Array(fftOrder); const binsRef = wasmHeap.allocFloat32Array(fftOrder * 2); const workBufferRef = wasmHeap.allocFloat32Array(fftOrder * 2); const sumOfSquaredWeightsForSample = new Float32Array(outSampleCount); for (let frameIndex = 0; frameIndex < frameCount; frameIndex++) { const binsForFrame = binsForFrames[frameIndex]; binsRef.clear(); binsRef.view.set(binsForFrame); frameBufferRef.clear(); m._pffft_transform_ordered(statePtr, binsRef.address, frameBufferRef.address, workBufferRef.address, 1); const frameSamples = frameBufferRef.view; const frameStartOffset = frameIndex * hopSize; for (let windowOffset = 0; windowOffset < windowSize; windowOffset++) { const frameSample = frameSamples[windowOffset]; const weight = windowWeights[windowOffset]; const writePosition = frameStartOffset + windowOffset; outSamples[writePosition] += frameSample * weight; sumOfSquaredWeightsForSample[writePosition] += weight ** 2; } } m._pffft_destroy_setup(statePtr); wasmHeap.freeAll(); // Divide each output sample by the sum of squared weights for (let i = 0; i < outSamples.length; i++) { outSamples[i] /= sumOfSquaredWeightsForSample[i] + 1e-8; } const outSamplesTrimmed = outSamples.slice(halfWindowSize, halfWindowSize + expectedOutputLength); return outSamplesTrimmed; } // Get bin frequency thresholds for a particular bin count and maximum frequency export function getBinFrequencies(binCount, maxFrequency) { const binFrequencies = new Float32Array(binCount); const frequencyStep = maxFrequency / (binCount - 1); for (let i = 0, frequency = 0; i < binFrequencies.length; i++, frequency += frequencyStep) { binFrequencies[i] = frequency; } return binFrequencies; } // Convert an array of raw FFT frames to a power spectrum export function fftFramesToPowerSpectrogram(fftFrames) { return fftFrames.map(fftFrame => fftFrameToPowerSpectrum(fftFrame)); } // Convert a raw FFT frame to a power spectrum export function fftFrameToPowerSpectrum(fftFrame) { const powerSpectrum = new Float32Array(fftFrame.length / 2); for (let i = 0; i < powerSpectrum.length; i++) { const binOffset = i * 2; const fftCoefficientRealPart = fftFrame[binOffset]; const fftCoefficientImaginaryPart = fftFrame[binOffset + 1]; const binPower = (fftCoefficientRealPart ** 2) + (fftCoefficientImaginaryPart ** 2); powerSpectrum[i] = binPower; } return powerSpectrum; } // Convert raw FFT frames to an array of complex numbers export function binBufferToComplex(bins, extendAndMirror = false) { const complexBins = []; for (let i = 0; i < bins.length; i += 2) { complexBins.push({ real: bins[i], imaginary: bins[i + 1] }); } if (extendAndMirror) { const fftSize = bins.length - 2; for (let i = complexBins.length; i < fftSize; i++) { const complexBinToMirror = complexBins[fftSize - i]; complexBins.push({ real: complexBinToMirror.real, imaginary: -complexBinToMirror.imaginary }); } } return complexBins; } // Convert an array of complex numbers to raw FFT frames export function complexToBinBuffer(complexBins) { const binBuffer = new Float32Array(complexBins.length * 2); for (let i = 0, outIndex = 0; i < complexBins.length; i++) { const complexBin = complexBins[i]; binBuffer[outIndex++] = complexBin.real; binBuffer[outIndex++] = complexBin.imaginary; } return binBuffer; } // Convert complex bin to magnitude and phase export function complexToMagnitudeAndPhase(real, imaginary) { const magnitude = Math.sqrt((real ** 2) + (imaginary ** 2)); const phase = Math.atan2(imaginary, real); return { magnitude, phase }; } // Convert magnitude and phase to complex bin export function magnitudeAndPhaseToComplex(magnitude, phase) { const real = magnitude * Math.cos(phase); const imaginary = magnitude * Math.sin(phase); return { real, imaginary }; } // Get window weights for a particular window function export function getWindowWeights(windowType, windowSize) { const weights = new Float32Array(windowSize); const innerFactor = (2 * Math.PI) / (windowSize - 1); if (windowType == 'hann') { for (let i = 0; i < windowSize; i++) { //weights[i] = 0.5 * (1 - Math.cos(2 * Math.PI * (i / (windowSize - 1)))) weights[i] = 0.5 * (1 - Math.cos(innerFactor * i)); } } else if (windowType == 'hann-sqrt') { for (let i = 0; i < windowSize; i++) { weights[i] = Math.sqrt(0.5 * (1 - Math.cos(innerFactor * i))); } } else if (windowType == 'hamming') { for (let i = 0; i < windowSize; i++) { //weights[i] = 0.54 - (0.46 * Math.cos(2 * Math.PI * (i / (windowSize - 1)))) weights[i] = 0.54 - (0.46 * Math.cos(innerFactor * i)); } } else if (windowType == 'povey') { const hannWeights = getWindowWeights('hann', windowSize); for (let i = 0; i < windowSize; i++) { weights[i] = hannWeights[i] ** 0.85; } } else { throw new Error(`Unsupported window function type: ${windowType}`); } return weights; } export async function isPffftSimdSupportedForFFTOrder(fftOrder) { const simdSupported = await isWasmSimdSupported(); if (simdSupported === false) { return false; } return fftOrder % 32 === 0; } let pffftNonSimdInstance; let pffftSimdInstance; export async function getPFFFTInstance(enableSimd) { return enableSimd ? getSimdPFFFTInstance() : getNonSimdPFFFTInstance(); } // Get non-SIMD PFFFT instance (initialize new if not exists) async function getNonSimdPFFFTInstance() { const { default: initializer } = await import('@echogarden/pffft-wasm'); pffftNonSimdInstance = await initializer(); return pffftNonSimdInstance; } // Get SIMD PFFFT instance (initialize new if not exists) async function getSimdPFFFTInstance() { const { default: initializer } = await import('@echogarden/pffft-wasm/simd'); pffftSimdInstance = await initializer(); return pffftSimdInstance; } //# sourceMappingURL=FFT.js.map