UNPKG

echogarden

Version:

An easy-to-use speech toolset. Includes tools for synthesis, recognition, alignment, speech translation, language detection, source separation and more.

250 lines (190 loc) 6.85 kB
import { RawAudio } from '../audio/AudioUtilities.js' import { extendDeep } from '../utilities/ObjectUtilities.js' import { concatFloat32Arrays } from '../utilities/Utilities.js' import { Float32ArrayRef, wrapEmscriptenModuleHeap } from 'wasm-heap-manager' let rubberbandInstance: any export async function stretchTimePitch(rawAudio: RawAudio, speed: number, pitchScale: number, options: RubberbandOptions) { options = extendDeep(defaultRubberbandOptions, options) const channels = rawAudio.audioChannels const channelCount = channels.length const sampleCount = channels[0].length const sampleRate = rawAudio.sampleRate const m = await getRubberbandInstance() const wasmHeap = wrapEmscriptenModuleHeap(m) const optionFlags = rubberBandOptionsToFlags(options) const statePtr = m._rubberband_new(sampleRate, channelCount, optionFlags, 1, 1) m._rubberband_set_time_ratio(statePtr, 1 / speed) m._rubberband_set_pitch_scale(statePtr, pitchScale) const samplesRequired = m._rubberband_get_samples_required(statePtr) const bufferSize = Math.min(samplesRequired, sampleCount) const bufferChannelPtrsRef = wasmHeap.allocUint32Array(bufferSize) const bufferChannelRefs: Float32ArrayRef[] = [] for (let i = 0; i < channelCount; i++) { const bufferChannelRef = wasmHeap.allocFloat32Array(bufferSize) bufferChannelPtrsRef.view[i] = bufferChannelRef.address bufferChannelRefs.push(bufferChannelRef) } m._rubberband_set_expected_input_duration(statePtr, sampleCount) //m._rubberband_set_max_process_size(statePtr, bufferSize) for (let offset = 0; offset < sampleCount; offset += bufferSize) { let writtenSize: number let isFinal: 0 | 1 if (sampleCount - offset > bufferSize) { writtenSize = bufferSize isFinal = 0 } else { writtenSize = sampleCount - offset isFinal = 1 } for (let i = 0; i < channelCount; i++) { const samplesToWrite = channels[i].subarray(offset, offset + writtenSize) bufferChannelRefs[i].view.set(samplesToWrite) } m._rubberband_study(statePtr, bufferChannelPtrsRef.address, writtenSize, isFinal) } const outputAudioChannelChunks: Float32Array[][] = [] for (let i = 0; i < channelCount; i++) { outputAudioChannelChunks.push([]) } for (let readOffset = 0; readOffset < sampleCount; readOffset += bufferSize) { let writtenSize: number let isFinal: 0 | 1 if (sampleCount - readOffset > bufferSize) { writtenSize = bufferSize isFinal = 0 } else { writtenSize = sampleCount - readOffset isFinal = 1 } for (let i = 0; i < channelCount; i++) { const samplesToWrite = channels[i].subarray(readOffset, readOffset + writtenSize) bufferChannelRefs[i].view.set(samplesToWrite) } m._rubberband_process(statePtr, bufferChannelPtrsRef.address, writtenSize, isFinal) while (true) { const samplesAvailable = m._rubberband_available(statePtr) if (samplesAvailable <= 0) { break } const sizeToRead = Math.min(samplesAvailable, bufferSize) const readCount = m._rubberband_retrieve(statePtr, bufferChannelPtrsRef.address, sizeToRead) for (let i = 0; i < channelCount; i++) { const readSamplesForChannel = bufferChannelRefs[i].view.slice(0, readCount) outputAudioChannelChunks[i].push(readSamplesForChannel) } } } m._rubberband_delete(statePtr) wasmHeap.freeAll() const outputAudioChannels = outputAudioChannelChunks.map(chunks => concatFloat32Arrays(chunks)) const outputRawAudio: RawAudio = { audioChannels: outputAudioChannels, sampleRate } return outputRawAudio } export async function getRubberbandInstance() { if (!rubberbandInstance) { const { default: RubberbandInitializer } = await import('@echogarden/rubberband-wasm') rubberbandInstance = await RubberbandInitializer() } return rubberbandInstance } export function rubberBandOptionsToFlags(options: RubberbandOptions) { let flags = 0 if (options.stretch == 'precise') { flags += RubberBandOptionFlag.StretchPrecise } if (options.transients == 'mixed') { flags += RubberBandOptionFlag.TransientsMixed } else if (options.transients == 'smooth') { flags += RubberBandOptionFlag.TransientsSmooth } if (options.detector == 'percussive') { flags += RubberBandOptionFlag.DetectorPercussive } else if (options.detector == 'soft') { flags += RubberBandOptionFlag.DetectorSoft } if (options.phase == 'independent') { flags += RubberBandOptionFlag.PhaseIndependent } if (options.window == 'short') { flags += RubberBandOptionFlag.WindowShort } else if (options.window == 'long') { flags += RubberBandOptionFlag.WindowLong } if (options.smoothing == 'on') { flags += RubberBandOptionFlag.SmoothingOn } if (options.formant == 'preserved') { flags += RubberBandOptionFlag.FormantPreserved } if (options.pitch == 'high-quality') { flags += RubberBandOptionFlag.PitchHighQuality } else if (options.pitch == 'high-consistency') { flags += RubberBandOptionFlag.PitchHighConsistency } if (options.channels == 'together') { flags += RubberBandOptionFlag.ChannelsTogether } if (options.engine == 'finer') { flags += RubberBandOptionFlag.EngineFiner } return flags } export enum RubberBandOptionFlag { ProcessOffline = 0x00000000, ProcessRealTime = 0x00000001, StretchElastic = 0x00000000, StretchPrecise = 0x00000010, TransientsCrisp = 0x00000000, TransientsMixed = 0x00000100, TransientsSmooth = 0x00000200, DetectorCompound = 0x00000000, DetectorPercussive = 0x00000400, DetectorSoft = 0x00000800, PhaseLaminar = 0x00000000, PhaseIndependent = 0x00002000, ThreadingAuto = 0x00000000, ThreadingNever = 0x00010000, ThreadingAlways = 0x00020000, WindowStandard = 0x00000000, WindowShort = 0x00100000, WindowLong = 0x00200000, SmoothingOff = 0x00000000, SmoothingOn = 0x00800000, FormantShifted = 0x00000000, FormantPreserved = 0x01000000, PitchHighSpeed = 0x00000000, PitchHighQuality = 0x02000000, PitchHighConsistency = 0x04000000, ChannelsApart = 0x00000000, ChannelsTogether = 0x10000000, EngineFaster = 0x00000000, EngineFiner = 0x20000000 } export enum RubberBandPresetOption { DefaultOptions = 0x00000000, PercussiveOptions = 0x00102000, } export const defaultRubberbandOptions: RubberbandOptions = { stretch: 'elastic', transients: 'crisp', detector: 'compound', phase: 'laminar', window: 'standard', smoothing: 'off', formant: 'shited', pitch: 'high-speed', channels: 'apart', engine: 'faster' } export type RubberbandOptions = { stretch?: 'elastic' | 'precise' transients?: 'crisp' | 'mixed' | 'smooth' detector?: 'compound' | 'percussive' | 'soft' phase?: 'laminar' | 'independent' window?: 'standard' | 'long' | 'short' smoothing?: 'off' | 'on' formant?: 'shited' | 'preserved' pitch?: 'high-speed' | 'high-quality' | 'high-consistency' channels?: 'apart' | 'together' engine?: 'faster' | 'finer' }