UNPKG

echogarden

Version:

An easy-to-use speech toolset. Includes tools for synthesis, recognition, alignment, speech translation, language detection, source separation and more.

108 lines 5.6 kB
import { Logger } from '../utilities/Logger.js'; import * as FFT from './FFT.js'; export async function computeMelSpectrogram(rawAudio, fftOrder, windowSize, hopLength, filterbankCount, lowerFrequencyHz, upperFrequencyHz, windowType = 'hann') { const logger = new Logger(); logger.start('Compute Mel filterbank'); const binCount = (fftOrder / 2) + 2; const nyquistFrequency = rawAudio.sampleRate / 2; const binFrequencies = FFT.getBinFrequencies(binCount, nyquistFrequency); const lowerFrequencyMel = hertzToMel(lowerFrequencyHz); const upperFrequencyMel = hertzToMel(upperFrequencyHz); const filterbanksCenterFrequencies = getMelFilterbanksCenterFrequencies(filterbankCount, lowerFrequencyMel, upperFrequencyMel); const melFilterbanks = getMelFilterbanks(binFrequencies, filterbanksCenterFrequencies, lowerFrequencyMel, upperFrequencyMel); logger.end(); return computeMelSpectrogramUsingFilterbanks(rawAudio, fftOrder, windowSize, hopLength, melFilterbanks, windowType); } export async function computeMelSpectrogramUsingFilterbanks(rawAudio, fftOrder, windowSize, hopLength, filterbanks, windowType = 'hann') { const logger = new Logger(); logger.start('Compute short-time FFTs'); const audioSamples = rawAudio.audioChannels[0]; const fftFrames = await FFT.stftr(audioSamples, fftOrder, windowSize, hopLength, windowType); logger.start('Convert FFT frames to Mel spectrogram'); const melSpectrogram = fftFramesToMelSpectrogram(fftFrames, filterbanks); logger.end(); return { melSpectrogram, fftFrames }; } export function fftFramesToMelSpectrogram(fftFrames, melFilterbanks) { return fftFrames.map(fftFrame => { const powerSpectrum = FFT.fftFrameToPowerSpectrum(fftFrame); return powerSpectrumToMelSpectrum(powerSpectrum, melFilterbanks); }); } export function powerSpectrumToMelSpectrum(powerSpectrum, filterbanks) { const filterbankCount = filterbanks.length; const melSpectrum = new Float32Array(filterbankCount); for (let melBandIndex = 0; melBandIndex < filterbankCount; melBandIndex++) { const filterbank = filterbanks[melBandIndex]; const filterbankStartIndex = filterbank.startIndex; const filterbankWeights = filterbank.weights; if (filterbankStartIndex === -1) { continue; } let melBandValue = 0; for (let i = 0; i < filterbankWeights.length; i++) { const powerSpectrumIndex = filterbankStartIndex + i; if (powerSpectrumIndex >= powerSpectrum.length) { break; } const weight = filterbankWeights[i]; const powerSpectrumValue = powerSpectrum[powerSpectrumIndex]; melBandValue += weight * powerSpectrumValue; } melSpectrum[melBandIndex] = melBandValue; } return melSpectrum; } export function getMelFilterbanks(powerSpectrumFrequenciesHz, centerFrequenciesMel, lowerFrequencyMel, upperFrequencyMel) { const filterbankCount = centerFrequenciesMel.length; const powerSpectrumFrequenciesMel = powerSpectrumFrequenciesHz.map(frequencyHz => hertzToMel(frequencyHz)); const filterbanks = []; for (let filterbankIndex = 0; filterbankIndex < filterbankCount; filterbankIndex++) { const centerFrequency = centerFrequenciesMel[filterbankIndex]; const leftFrequency = filterbankIndex > 0 ? centerFrequenciesMel[filterbankIndex - 1] : lowerFrequencyMel; const rightFrequency = filterbankIndex < filterbankCount - 1 ? centerFrequenciesMel[filterbankIndex + 1] : upperFrequencyMel; const width = rightFrequency - leftFrequency; const halfWidth = width / 2; let startIndex = -1; let weights = []; let weightSum = 0; for (let powerSpectrumBandIndex = 0; powerSpectrumBandIndex < powerSpectrumFrequenciesMel.length; powerSpectrumBandIndex++) { const powerSpectrumBandFrequencyMel = powerSpectrumFrequenciesMel[powerSpectrumBandIndex]; let weight = 0; if (powerSpectrumBandFrequencyMel >= leftFrequency && powerSpectrumBandFrequencyMel <= centerFrequency) { weight = (powerSpectrumBandFrequencyMel - leftFrequency) / halfWidth; } else if (powerSpectrumBandFrequencyMel > centerFrequency && powerSpectrumBandFrequencyMel <= rightFrequency) { weight = (rightFrequency - powerSpectrumBandFrequencyMel) / halfWidth; } if (weight > 0) { if (startIndex == -1) { startIndex = powerSpectrumBandIndex; } weights.push(weight); weightSum += weight; } else if (startIndex != -1) { break; } } weights = weights.map(weight => weight / weightSum); filterbanks.push({ startIndex, weights }); } return filterbanks; } export function getMelFilterbanksCenterFrequencies(melBandCount, lowerFrequencyMel, upperFrequencyMel) { const stepSizeMel = (upperFrequencyMel - lowerFrequencyMel) / (melBandCount + 1); const centerFrequencies = new Float32Array(melBandCount); for (let i = 0; i < melBandCount; i++) { centerFrequencies[i] = lowerFrequencyMel + ((i + 1) * stepSizeMel); } return centerFrequencies; } export function hertzToMel(frequency) { return 2595.0 * Math.log10(1.0 + (frequency / 700.0)); } export function melToHertz(mel) { return 700.0 * (Math.pow(10.0, mel / 2595.0) - 1.0); } //# sourceMappingURL=MelSpectrogram.js.map