UNPKG

echogarden

Version:

An easy-to-use speech toolset. Includes tools for synthesis, recognition, alignment, speech translation, language detection, source separation and more.

127 lines 6.33 kB
import { extendDeep } from '../utilities/ObjectUtilities.js'; import { applyGainDecibels, applyGainDecibelsInPlace, attenuateIfClippingInPlace, ensureRawAudio, getSamplePeakDecibels, mixAudio, normalizeAudioLevelInPlace } from '../audio/AudioUtilities.js'; import { Logger } from '../utilities/Logger.js'; import { logToStderr } from '../utilities/Utilities.js'; import { resampleAudioSpeex } from '../dsp/SpeexResampler.js'; import chalk from 'chalk'; import { defaultNSNet2Options } from '../denoising/NSNet2.js'; import { loadPackage } from '../utilities/PackageManager.js'; const log = logToStderr; export async function denoise(input, options) { const logger = new Logger(); const startTime = logger.getTimestamp(); options = extendDeep(defaultDenoisingOptions, options); const inputRawAudio = await ensureRawAudio(input); logger.start(`Initialize ${options.engine} module`); let denoisedAudio; switch (options.engine) { case 'rnnoise': { const RNNoise = await import('../denoising/RNNoise.js'); logger.end(); const processingSampleRate = 48000; logger.start(`Resample audio to ${processingSampleRate} Hz`); const inputRawAudioResampled = await resampleAudioSpeex(inputRawAudio, processingSampleRate, 0); const denoisedAudioChannels = []; for (let channelIndex = 0; channelIndex < inputRawAudioResampled.audioChannels.length; channelIndex++) { const audioChannel = inputRawAudioResampled.audioChannels[channelIndex]; const audioChannelRawAudio = { audioChannels: [audioChannel], sampleRate: processingSampleRate }; logger.end(); logger.logTitledMessage(`Denoise audio channel`, `${channelIndex}`, chalk.magentaBright); const { denoisedRawAudio, frameVadProbabilities } = await RNNoise.denoiseAudio(audioChannelRawAudio); logger.end(); denoisedAudioChannels.push(denoisedRawAudio.audioChannels[0]); } denoisedAudio = { audioChannels: denoisedAudioChannels, sampleRate: processingSampleRate }; break; } case 'nsnet2': { const NSNet2 = await import('../denoising/NSNet2.js'); logger.end(); const nsnet2Options = options.nsnet2; let processingSampleRate; let packageName; if (nsnet2Options.model === 'baseline-16khz') { processingSampleRate = 16000; packageName = 'nsnet2-20ms-baseline'; } else if (nsnet2Options.model === 'baseline-48khz') { processingSampleRate = 48000; packageName = 'nsnet2-20ms-48k-baseline'; } else { throw new Error(`Unknown model name: ${nsnet2Options.model}`); } if (!nsnet2Options.modelDirectoryPath) { nsnet2Options.modelDirectoryPath = await loadPackage(packageName); } logger.start(`Resample audio to ${processingSampleRate} Hz`); const inputRawAudioResampled = await resampleAudioSpeex(inputRawAudio, processingSampleRate, 0); const denoisedAudioChannels = []; for (let channelIndex = 0; channelIndex < inputRawAudioResampled.audioChannels.length; channelIndex++) { const audioChannel = inputRawAudioResampled.audioChannels[channelIndex]; const audioChannelRawAudio = { audioChannels: [audioChannel], sampleRate: processingSampleRate }; logger.end(); logger.logTitledMessage(`Denoise audio channel`, `${channelIndex}`, chalk.magentaBright); const { denoisedAudio } = await NSNet2.denoiseAudio(audioChannelRawAudio, nsnet2Options); logger.end(); denoisedAudioChannels.push(denoisedAudio.audioChannels[0]); } denoisedAudio = { audioChannels: denoisedAudioChannels, sampleRate: processingSampleRate }; break; } default: { throw new Error(`Engine '${options.engine}' is not recognized.`); } } logger.logTitledMessage(`Postprocess`, ``, chalk.magentaBright); logger.start(`Resample denoised audio (${denoisedAudio.sampleRate} Hz) back to original sample rate (${inputRawAudio.sampleRate} Hz)`); denoisedAudio = await ensureRawAudio(denoisedAudio, inputRawAudio.sampleRate, inputRawAudio.audioChannels.length); logger.start('Postprocess audio'); const shouldNormalize = options.postProcessing.normalizeAudio; const targetPeakDecibels = options.postProcessing.targetPeak; const maxGainIncreaseDecibels = options.postProcessing.maxGainIncrease; const dryMixGainDecibels = options.postProcessing.dryMixGain; attenuateIfClippingInPlace(denoisedAudio); const preMixPeakDecibels = getSamplePeakDecibels(denoisedAudio.audioChannels); const inputRawAudioWithGain = applyGainDecibels(inputRawAudio, dryMixGainDecibels); denoisedAudio = mixAudio(denoisedAudio, inputRawAudioWithGain); const postMixPeakDecibels = getSamplePeakDecibels(denoisedAudio.audioChannels); if (shouldNormalize) { normalizeAudioLevelInPlace(denoisedAudio, targetPeakDecibels, maxGainIncreaseDecibels); } else { applyGainDecibelsInPlace(denoisedAudio, preMixPeakDecibels - postMixPeakDecibels); } logger.end(); logger.log(''); logger.logDuration('Total denoising time', startTime, chalk.magentaBright); return { denoisedAudio, inputRawAudio }; } export const defaultDenoisingOptions = { engine: 'rnnoise', postProcessing: { normalizeAudio: false, targetPeak: -3, maxGainIncrease: 30, dryMixGain: -100, }, nsnet2: defaultNSNet2Options, }; export const denoisingEngines = [ { id: 'rnnoise', name: 'RNNoise', description: 'A noise suppression library based on a recurrent neural network.', type: 'local' }, { id: 'nsnet2', name: 'Noise Suppression Net 2', description: 'Noise suppression models used as baselines for the ICASSP 2021 Deep Noise Suppression challenge.', type: 'local' } ]; //# sourceMappingURL=Denoising.js.map