speechflow
Version:
Speech Processing Flow Graph
205 lines (186 loc) • 9.17 kB
text/typescript
/*
** SpeechFlow - Speech Processing Flow Graph
** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
*/
/* standard dependencies */
import Stream from "node:stream"
/* internal dependencies */
import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
/* write WAV header */
const writeWavHeader = (
length: number,
options?: { audioFormat?: number, channels?: number, sampleRate?: number, bitDepth?: number }
) => {
const audioFormat = options?.audioFormat ?? 0x001 /* PCM */
const channels = options?.channels ?? 1 /* mono */
const sampleRate = options?.sampleRate ?? 44100 /* 44KHz */
const bitDepth = options?.bitDepth ?? 16 /* 16-Bit */
const headerLength = 44
const maxDataSize = Math.pow(2, 32) - 100 /* safe maximum for 32-bit WAV files */
const dataLength = length ?? maxDataSize
const fileSize = dataLength + headerLength
const header = Buffer.alloc(headerLength)
const byteRate = (sampleRate * channels * bitDepth) / 8
const blockAlign = (channels * bitDepth) / 8
let offset = 0
header.write("RIFF", offset); offset += 4
header.writeUInt32LE(fileSize - 8, offset); offset += 4
header.write("WAVE", offset); offset += 4
header.write("fmt ", offset); offset += 4
header.writeUInt32LE(16, offset); offset += 4
header.writeUInt16LE(audioFormat, offset); offset += 2
header.writeUInt16LE(channels, offset); offset += 2
header.writeUInt32LE(sampleRate, offset); offset += 4
header.writeUInt32LE(byteRate, offset); offset += 4
header.writeUInt16LE(blockAlign, offset); offset += 2
header.writeUInt16LE(bitDepth, offset); offset += 2
header.write("data", offset); offset += 4
header.writeUInt32LE(dataLength, offset); offset += 4
return header
}
/* read WAV header */
const readWavHeader = (buffer: Buffer) => {
if (buffer.length < 44)
throw new Error("WAV header too short, expected at least 44 bytes")
let offset = 0
const riffHead = buffer.subarray(offset, offset + 4).toString(); offset += 4
const fileSize = buffer.readUInt32LE(offset); offset += 4
const waveHead = buffer.subarray(offset, offset + 4).toString(); offset += 4
const fmtHead = buffer.subarray(offset, offset + 4).toString(); offset += 4
const formatLength = buffer.readUInt32LE(offset); offset += 4
const audioFormat = buffer.readUInt16LE(offset); offset += 2
const channels = buffer.readUInt16LE(offset); offset += 2
const sampleRate = buffer.readUInt32LE(offset); offset += 4
const byteRate = buffer.readUInt32LE(offset); offset += 4
const blockAlign = buffer.readUInt16LE(offset); offset += 2
const bitDepth = buffer.readUInt16LE(offset); offset += 2
const data = buffer.subarray(offset, offset + 4).toString(); offset += 4
const dataLength = buffer.readUInt32LE(offset); offset += 4
if (riffHead !== "RIFF")
throw new Error(`Invalid WAV file: expected RIFF header, got "${riffHead}"`)
if (waveHead !== "WAVE")
throw new Error(`Invalid WAV file: expected WAVE header, got "${waveHead}"`)
if (fmtHead !== "fmt ")
throw new Error(`Invalid WAV file: expected "fmt " header, got "${fmtHead}"`)
if (data !== "data")
throw new Error(`Invalid WAV file: expected "data" header, got "${data}"`)
return {
riffHead, fileSize, waveHead, fmtHead, formatLength, audioFormat,
channels, sampleRate, byteRate, blockAlign, bitDepth, data, dataLength
}
}
/* SpeechFlow node for WAV format conversion */
export default class SpeechFlowNodeA2AWAV extends SpeechFlowNode {
/* declare official node name */
public static name = "a2a-wav"
/* construct node */
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
super(id, cfg, opts, args)
/* declare node configuration parameters */
this.configure({
mode: { type: "string", pos: 1, val: "encode", match: /^(?:encode|decode)$/ }
})
/* declare node input/output format */
this.input = "audio"
this.output = "audio"
}
/* open node */
async open () {
/* establish a transform stream */
const self = this
let firstChunk = true
this.stream = new Stream.Transform({
readableObjectMode: true,
writableObjectMode: true,
decodeStrings: false,
highWaterMark: 1,
transform (chunk: SpeechFlowChunk, encoding, callback) {
if (!Buffer.isBuffer(chunk.payload))
callback(new Error("invalid chunk payload type"))
else if (firstChunk) {
if (self.params.mode === "encode") {
/* convert raw/PCM to WAV/PCM
(NOTICE: as this is a continuous stream, the
resulting WAV header is not 100% conforming
to the WAV standard, as it has to use a zero
duration information. This cannot be changed in
a stream-based processing.) */
const headerBuffer = writeWavHeader(0, {
audioFormat: 0x0001 /* PCM */,
channels: self.config.audioChannels,
sampleRate: self.config.audioSampleRate,
bitDepth: self.config.audioBitDepth
})
const headerChunk = chunk.clone()
headerChunk.payload = headerBuffer
this.push(headerChunk)
this.push(chunk)
callback()
}
else if (self.params.mode === "decode") {
/* convert WAV/PCM to raw/PCM */
if (chunk.payload.length < 44) {
callback(new Error("WAV header too short, expected at least 44 bytes"))
return
}
const header = readWavHeader(chunk.payload)
self.log("info", "WAV audio stream: " +
`audioFormat=${header.audioFormat === 0x0001 ? "PCM" :
"0x" + (header.audioFormat as number).toString(16).padStart(4, "0")} ` +
`channels=${header.channels} ` +
`sampleRate=${header.sampleRate} ` +
`bitDepth=${header.bitDepth}`)
if (header.audioFormat !== 0x0001 /* PCM */) {
callback(new Error("WAV not based on PCM format"))
return
}
if (header.bitDepth !== self.config.audioBitDepth) {
callback(new Error(`WAV not based on ${self.config.audioBitDepth} bit samples`))
return
}
if (header.sampleRate !== self.config.audioSampleRate) {
callback(new Error(`WAV not based on ${self.config.audioSampleRate}Hz sample rate`))
return
}
if (header.channels !== self.config.audioChannels) {
callback(new Error(`WAV not based on ${self.config.audioChannels} channel(s)`))
return
}
chunk.payload = chunk.payload.subarray(44)
this.push(chunk)
callback()
}
else {
callback(new Error(`invalid operation mode "${self.params.mode}"`))
return
}
firstChunk = false
}
else {
/* pass-through original chunk */
this.push(chunk)
callback()
}
},
final (callback) {
this.push(null)
callback()
}
})
}
/* close node */
async close () {
/* shutdown stream */
if (this.stream !== null) {
await new Promise<void>((resolve) => {
if (this.stream instanceof Stream.Duplex)
this.stream.end(() => { resolve() })
else
resolve()
})
this.stream.destroy()
this.stream = null
}
}
}