speechflow
Version:
Speech Processing Flow Graph
308 lines (278 loc) • 10.8 kB
text/typescript
/*
** SpeechFlow - Speech Processing Flow Graph
** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
*/
/* standard dependencies */
import path from "node:path"
/* external dependencies */
import { AudioContext, AudioWorkletNode } from "node-web-audio-api"
/* calculate duration of an audio buffer */
export function audioBufferDuration (
buffer: Buffer,
sampleRate = 48000,
bitDepth = 16,
channels = 1,
littleEndian = true
) {
/* sanity check parameters */
if (!Buffer.isBuffer(buffer))
throw new Error("invalid input (Buffer expected)")
if (littleEndian !== true)
throw new Error("only Little Endian supported")
if (sampleRate <= 0)
throw new Error("sample rate must be positive")
if (bitDepth <= 0 || bitDepth % 8 !== 0)
throw new Error("bit depth must be positive and multiple of 8")
if (channels <= 0)
throw new Error("channels must be positive")
/* calculate duration */
const bytesPerSample = bitDepth / 8
const totalSamples = buffer.length / (bytesPerSample * channels)
return totalSamples / sampleRate
}
/* calculate duration of an audio array */
export function audioArrayDuration (
arr: Float32Array,
sampleRate = 48000,
channels = 1
) {
/* sanity check parameters */
if (arr.length === 0)
return 0
if (sampleRate <= 0)
throw new Error("sample rate must be positive")
if (channels <= 0)
throw new Error("channels must be positive")
/* calculate duration */
const totalSamples = arr.length / channels
return totalSamples / sampleRate
}
/* helper function: convert Buffer in PCM/I16 to Float32Array in PCM/F32 format */
export function convertBufToF32 (buf: Buffer, littleEndian = true) {
if (buf.length % 2 !== 0)
throw new Error("buffer length must be even for 16-bit samples")
const dataView = new DataView(buf.buffer)
const arr = new Float32Array(buf.length / 2)
for (let i = 0; i < arr.length; i++)
arr[i] = dataView.getInt16(i * 2, littleEndian) / 32768
return arr
}
/* helper function: convert Float32Array in PCM/F32 to Buffer in PCM/I16 format */
export function convertF32ToBuf (arr: Float32Array) {
if (arr.length === 0)
return Buffer.alloc(0)
const int16Array = new Int16Array(arr.length)
for (let i = 0; i < arr.length; i++) {
let sample = arr[i]
if (Number.isNaN(sample))
sample = 0
int16Array[i] = Math.max(-32768, Math.min(32767, Math.round(sample * 32768)))
}
return Buffer.from(int16Array.buffer)
}
/* helper function: convert Buffer in PCM/I16 to Int16Array */
export function convertBufToI16 (buf: Buffer, littleEndian = true) {
if (buf.length % 2 !== 0)
throw new Error("buffer length must be even for 16-bit samples")
const dataView = new DataView(buf.buffer, buf.byteOffset, buf.byteLength)
const arr = new Int16Array(buf.length / 2)
for (let i = 0; i < buf.length / 2; i++)
arr[i] = dataView.getInt16(i * 2, littleEndian)
return arr
}
/* helper function: convert In16Array in PCM/I16 to Buffer */
export function convertI16ToBuf (arr: Int16Array, littleEndian = true) {
if (arr.length === 0)
return Buffer.alloc(0)
const buf = Buffer.allocUnsafe(arr.length * 2)
for (let i = 0; i < arr.length; i++) {
if (littleEndian)
buf.writeInt16LE(arr[i], i * 2)
else
buf.writeInt16BE(arr[i], i * 2)
}
return buf
}
/* process Int16Array in fixed-size segments */
export async function processInt16ArrayInSegments (
data: Int16Array<ArrayBuffer>,
segmentSize: number,
processor: (segment: Int16Array<ArrayBuffer>) => Promise<Int16Array<ArrayBuffer>>
): Promise<Int16Array<ArrayBuffer>> {
/* process full segments */
let i = 0
while ((i + segmentSize) <= data.length) {
const segment = data.slice(i, i + segmentSize)
const result = await processor(segment)
data.set(result, i)
i += segmentSize
}
/* process final partial segment if it exists */
if (i < data.length) {
const len = data.length - i
const segment = new Int16Array(segmentSize)
segment.set(data.slice(i), 0)
segment.fill(0, len, segmentSize)
const result = await processor(segment)
data.set(result.slice(0, len), i)
}
return data
}
/* update envelope (smoothed amplitude contour) for single channel */
export function updateEnvelopeForChannel(
env: number[],
sampleRate: number,
chan: number,
samples: Float32Array,
attack: number,
release: number
): number {
/* fetch old envelope value */
if (env[chan] === undefined)
env[chan] = 1e-12
let currentEnv = env[chan]
/* calculate attack/release alpha values */
const alphaA = Math.exp(-1 / (attack * sampleRate))
const alphaR = Math.exp(-1 / (release * sampleRate))
/* iterate over all samples and calculate RMS */
for (const s of samples) {
const x = Math.abs(s)
const det = x * x
if (det > currentEnv)
currentEnv = alphaA * currentEnv + (1 - alphaA) * det
else
currentEnv = alphaR * currentEnv + (1 - alphaR) * det
}
return Math.sqrt(Math.max(currentEnv, 1e-12))
}
/* helper functions for linear/decibel conversions */
export function lin2dB (x: number): number {
return 20 * Math.log10(Math.max(x, 1e-12))
}
export function dB2lin (db: number): number {
return Math.pow(10, db / 20)
}
export class WebAudio {
/* internal state */
public audioContext: AudioContext
public sourceNode: AudioWorkletNode | null = null
public captureNode: AudioWorkletNode | null = null
private pendingPromises = new Map<string, {
resolve: (value: Int16Array) => void
reject: (error: Error) => void
timeout: ReturnType<typeof setTimeout>
}>()
/* construct object */
constructor(
public sampleRate: number,
public channels: number
) {
/* create new audio context */
this.audioContext = new AudioContext({
sampleRate,
latencyHint: "interactive"
})
}
/* setup object */
public async setup (): Promise<void> {
/* ensure audio context is not suspended */
if (this.audioContext.state === "suspended")
await this.audioContext.resume()
/* add audio worklet module */
const url = path.resolve(__dirname, "speechflow-util-audio-wt.js")
await this.audioContext.audioWorklet.addModule(url)
/* create source node */
this.sourceNode = new AudioWorkletNode(this.audioContext, "source", {
numberOfInputs: 0,
numberOfOutputs: 1,
outputChannelCount: [ this.channels ]
})
/* create capture node */
this.captureNode = new AudioWorkletNode(this.audioContext, "capture", {
numberOfInputs: 1,
numberOfOutputs: 0
})
this.captureNode!.port.addEventListener("message", (event) => {
const { type, chunkId, data } = event.data ?? {}
if (type === "capture-complete") {
const promise = this.pendingPromises.get(chunkId)
if (promise) {
clearTimeout(promise.timeout)
this.pendingPromises.delete(chunkId)
const int16Data = new Int16Array(data.length)
for (let i = 0; i < data.length; i++)
int16Data[i] = Math.max(-32768, Math.min(32767, Math.round(data[i] * 32767)))
promise.resolve(int16Data)
}
}
})
/* start ports */
this.sourceNode.port.start()
this.captureNode!.port.start()
}
/* process single audio chunk */
public async process (int16Array: Int16Array): Promise<Int16Array> {
const chunkId = `chunk_${Date.now()}_${Math.random().toString(36).substring(2, 11)}`
return new Promise<Int16Array>((resolve, reject) => {
const timeout = setTimeout(() => {
this.pendingPromises.delete(chunkId)
reject(new Error("processing timeout"))
}, (int16Array.length / this.audioContext.sampleRate) * 1000 + 250)
if (this.captureNode !== null)
this.pendingPromises.set(chunkId, { resolve, reject, timeout })
try {
const float32Data = new Float32Array(int16Array.length)
for (let i = 0; i < int16Array.length; i++)
float32Data[i] = int16Array[i] / 32768.0
/* start capture first */
if (this.captureNode !== null) {
this.captureNode?.port.postMessage({
type: "start-capture",
chunkId,
expectedSamples: int16Array.length
})
}
/* small delay to ensure capture is ready before sending data */
setTimeout(() => {
/* send input to source node */
this.sourceNode?.port.postMessage({
type: "input-chunk",
chunkId,
data: { pcmData: float32Data, channels: this.channels }
}, [ float32Data.buffer ])
}, 5)
}
catch (error) {
clearTimeout(timeout)
if (this.captureNode !== null)
this.pendingPromises.delete(chunkId)
reject(new Error(`failed to process chunk: ${error}`))
}
})
}
public async destroy (): Promise<void> {
/* reject all pending promises */
try {
this.pendingPromises.forEach(({ reject, timeout }) => {
clearTimeout(timeout)
reject(new Error("WebAudio destroyed"))
})
this.pendingPromises.clear()
}
catch (_err) {
/* ignored - cleanup during shutdown */
}
/* disconnect nodes */
if (this.sourceNode !== null) {
this.sourceNode.disconnect()
this.sourceNode = null
}
if (this.captureNode !== null) {
this.captureNode.disconnect()
this.captureNode = null
}
/* stop context */
await this.audioContext.close()
}
}