echogarden
Version:
An easy-to-use speech toolset. Includes tools for synthesis, recognition, alignment, speech translation, language detection, source separation and more.
127 lines • 6.33 kB
JavaScript
import { extendDeep } from '../utilities/ObjectUtilities.js';
import { applyGainDecibels, applyGainDecibelsInPlace, attenuateIfClippingInPlace, ensureRawAudio, getSamplePeakDecibels, mixAudio, normalizeAudioLevelInPlace } from '../audio/AudioUtilities.js';
import { Logger } from '../utilities/Logger.js';
import { logToStderr } from '../utilities/Utilities.js';
import { resampleAudioSpeex } from '../dsp/SpeexResampler.js';
import chalk from 'chalk';
import { defaultNSNet2Options } from '../denoising/NSNet2.js';
import { loadPackage } from '../utilities/PackageManager.js';
const log = logToStderr;
export async function denoise(input, options) {
const logger = new Logger();
const startTime = logger.getTimestamp();
options = extendDeep(defaultDenoisingOptions, options);
const inputRawAudio = await ensureRawAudio(input);
logger.start(`Initialize ${options.engine} module`);
let denoisedAudio;
switch (options.engine) {
case 'rnnoise': {
const RNNoise = await import('../denoising/RNNoise.js');
logger.end();
const processingSampleRate = 48000;
logger.start(`Resample audio to ${processingSampleRate} Hz`);
const inputRawAudioResampled = await resampleAudioSpeex(inputRawAudio, processingSampleRate, 0);
const denoisedAudioChannels = [];
for (let channelIndex = 0; channelIndex < inputRawAudioResampled.audioChannels.length; channelIndex++) {
const audioChannel = inputRawAudioResampled.audioChannels[channelIndex];
const audioChannelRawAudio = { audioChannels: [audioChannel], sampleRate: processingSampleRate };
logger.end();
logger.logTitledMessage(`Denoise audio channel`, `${channelIndex}`, chalk.magentaBright);
const { denoisedRawAudio, frameVadProbabilities } = await RNNoise.denoiseAudio(audioChannelRawAudio);
logger.end();
denoisedAudioChannels.push(denoisedRawAudio.audioChannels[0]);
}
denoisedAudio = { audioChannels: denoisedAudioChannels, sampleRate: processingSampleRate };
break;
}
case 'nsnet2': {
const NSNet2 = await import('../denoising/NSNet2.js');
logger.end();
const nsnet2Options = options.nsnet2;
let processingSampleRate;
let packageName;
if (nsnet2Options.model === 'baseline-16khz') {
processingSampleRate = 16000;
packageName = 'nsnet2-20ms-baseline';
}
else if (nsnet2Options.model === 'baseline-48khz') {
processingSampleRate = 48000;
packageName = 'nsnet2-20ms-48k-baseline';
}
else {
throw new Error(`Unknown model name: ${nsnet2Options.model}`);
}
if (!nsnet2Options.modelDirectoryPath) {
nsnet2Options.modelDirectoryPath = await loadPackage(packageName);
}
logger.start(`Resample audio to ${processingSampleRate} Hz`);
const inputRawAudioResampled = await resampleAudioSpeex(inputRawAudio, processingSampleRate, 0);
const denoisedAudioChannels = [];
for (let channelIndex = 0; channelIndex < inputRawAudioResampled.audioChannels.length; channelIndex++) {
const audioChannel = inputRawAudioResampled.audioChannels[channelIndex];
const audioChannelRawAudio = { audioChannels: [audioChannel], sampleRate: processingSampleRate };
logger.end();
logger.logTitledMessage(`Denoise audio channel`, `${channelIndex}`, chalk.magentaBright);
const { denoisedAudio } = await NSNet2.denoiseAudio(audioChannelRawAudio, nsnet2Options);
logger.end();
denoisedAudioChannels.push(denoisedAudio.audioChannels[0]);
}
denoisedAudio = { audioChannels: denoisedAudioChannels, sampleRate: processingSampleRate };
break;
}
default: {
throw new Error(`Engine '${options.engine}' is not recognized.`);
}
}
logger.logTitledMessage(`Postprocess`, ``, chalk.magentaBright);
logger.start(`Resample denoised audio (${denoisedAudio.sampleRate} Hz) back to original sample rate (${inputRawAudio.sampleRate} Hz)`);
denoisedAudio = await ensureRawAudio(denoisedAudio, inputRawAudio.sampleRate, inputRawAudio.audioChannels.length);
logger.start('Postprocess audio');
const shouldNormalize = options.postProcessing.normalizeAudio;
const targetPeakDecibels = options.postProcessing.targetPeak;
const maxGainIncreaseDecibels = options.postProcessing.maxGainIncrease;
const dryMixGainDecibels = options.postProcessing.dryMixGain;
attenuateIfClippingInPlace(denoisedAudio);
const preMixPeakDecibels = getSamplePeakDecibels(denoisedAudio.audioChannels);
const inputRawAudioWithGain = applyGainDecibels(inputRawAudio, dryMixGainDecibels);
denoisedAudio = mixAudio(denoisedAudio, inputRawAudioWithGain);
const postMixPeakDecibels = getSamplePeakDecibels(denoisedAudio.audioChannels);
if (shouldNormalize) {
normalizeAudioLevelInPlace(denoisedAudio, targetPeakDecibels, maxGainIncreaseDecibels);
}
else {
applyGainDecibelsInPlace(denoisedAudio, preMixPeakDecibels - postMixPeakDecibels);
}
logger.end();
logger.log('');
logger.logDuration('Total denoising time', startTime, chalk.magentaBright);
return {
denoisedAudio,
inputRawAudio
};
}
export const defaultDenoisingOptions = {
engine: 'rnnoise',
postProcessing: {
normalizeAudio: false,
targetPeak: -3,
maxGainIncrease: 30,
dryMixGain: -100,
},
nsnet2: defaultNSNet2Options,
};
export const denoisingEngines = [
{
id: 'rnnoise',
name: 'RNNoise',
description: 'A noise suppression library based on a recurrent neural network.',
type: 'local'
},
{
id: 'nsnet2',
name: 'Noise Suppression Net 2',
description: 'Noise suppression models used as baselines for the ICASSP 2021 Deep Noise Suppression challenge.',
type: 'local'
}
];
//# sourceMappingURL=Denoising.js.map