UNPKG

echogarden

Version:

An easy-to-use speech toolset. Includes tools for synthesis, recognition, alignment, speech translation, language detection, source separation and more.

224 lines 11.4 kB
import * as AudioBufferConversion from '../audio/AudioBufferConversion.js'; import { readUint16LE, readUint32LE, writeAscii, writeUint16LE, writeUint32LE } from '../utilities/BinaryUtilities.js'; import { encodeHex, decodeHex } from '../encodings/Hex.js'; import { concatUint8Arrays, logToStderr } from '../utilities/Utilities.js'; import { decodeAscii } from '../encodings/Ascii.js'; const log = logToStderr; export function encodeWave(rawAudio, bitDepth = 16, sampleFormat = SampleFormat.PCM, speakerPositionMask = 0) { const audioChannels = rawAudio.audioChannels; const sampleRate = rawAudio.sampleRate; const audioBuffer = AudioBufferConversion.encodeToAudioBuffer(audioChannels, bitDepth, sampleFormat); const audioDataLength = audioBuffer.length; const shouldUseExtensibleFormat = bitDepth > 16 || audioChannels.length > 2; const formatSubChunk = new WaveFormat(audioChannels.length, sampleRate, bitDepth, sampleFormat, speakerPositionMask); const formatSubChunkBuffer = formatSubChunk.serialize(shouldUseExtensibleFormat); const dataSubChunkBuffer = new Uint8Array(4 + 4 + audioDataLength); writeAscii(dataSubChunkBuffer, 'data', 0); const dataChunkLength = Math.min(audioDataLength, 4294967295); // Ensure large data chunk length is clipped to max writeUint32LE(dataSubChunkBuffer, dataChunkLength, 4); dataSubChunkBuffer.set(audioBuffer, 8); const riffChunkHeaderBuffer = new Uint8Array(12); writeAscii(riffChunkHeaderBuffer, 'RIFF', 0); const riffChunkLength = Math.min(4 + formatSubChunkBuffer.length + dataSubChunkBuffer.length, 4294967295); // Ensure large RIFF chunk length is clipped to max writeUint32LE(riffChunkHeaderBuffer, riffChunkLength, 4); writeAscii(riffChunkHeaderBuffer, 'WAVE', 8); return concatUint8Arrays([riffChunkHeaderBuffer, formatSubChunkBuffer, dataSubChunkBuffer]); } export function decodeWave(waveData, ignoreTruncatedChunks = true, ignoreOverflowingDataChunks = true) { let readOffset = 0; const riffId = decodeAscii(waveData.subarray(readOffset, readOffset + 4)); if (riffId != 'RIFF') { throw new Error('Not a valid wave file. No RIFF id found at offset 0.'); } readOffset += 4; let riffChunkSize = readUint32LE(waveData, readOffset); readOffset += 4; const waveId = decodeAscii(waveData.subarray(readOffset, readOffset + 4)); if (waveId != 'WAVE') { throw new Error('Not a valid wave file. No WAVE id found at offset 8.'); } if (ignoreOverflowingDataChunks && riffChunkSize === 4294967295) { riffChunkSize = waveData.length - 8; } if (riffChunkSize < waveData.length - 8) { throw new Error(`RIFF chunk length ${riffChunkSize} is smaller than the remaining size of the buffer (${waveData.length - 8})`); } if (!ignoreTruncatedChunks && riffChunkSize > waveData.length - 8) { throw new Error(`RIFF chunk length (${riffChunkSize}) is greater than the remaining size of the buffer (${waveData.length - 8})`); } readOffset += 4; let formatSubChunkBodyBuffer; const dataBuffers = []; while (true) { const subChunkIdentifier = decodeAscii(waveData.subarray(readOffset, readOffset + 4)); readOffset += 4; let subChunkSize = readUint32LE(waveData, readOffset); readOffset += 4; if (!ignoreTruncatedChunks && subChunkSize > waveData.length - readOffset) { throw new Error(`Encountered a '${subChunkIdentifier}' subchunk with a size of ${subChunkSize} which is greater than the remaining size of the buffer (${waveData.length - readOffset})`); } if (subChunkIdentifier == 'fmt ') { formatSubChunkBodyBuffer = waveData.subarray(readOffset, readOffset + subChunkSize); } else if (subChunkIdentifier == 'data') { if (!formatSubChunkBodyBuffer) { throw new Error('A data subchunk was encountered before a format subchunk'); } // If the data chunk is truncated or extended beyond 4 GiB, // the data would be read up to the end of the buffer if (ignoreOverflowingDataChunks && subChunkSize === 4294967295) { subChunkSize = waveData.length - readOffset; } const subChunkData = waveData.subarray(readOffset, readOffset + subChunkSize); dataBuffers.push(subChunkData); } // All sub chunks other than 'data' (e.g. 'LIST', 'fact', 'plst', 'junk' etc.) are ignored // This addition operation may overflow if JavaScript integers were 32 bits, // but since they are 52 bits, it is okay: readOffset += subChunkSize; // Break if readOffset is equal to or is greater than the size of the buffer if (readOffset >= waveData.length) { break; } } if (!formatSubChunkBodyBuffer) { throw new Error('No format subchunk was found in the wave file'); } if (dataBuffers.length === 0) { throw new Error('No data subchunks were found in the wave file'); } const waveFormat = WaveFormat.deserializeFrom(formatSubChunkBodyBuffer); const sampleFormat = waveFormat.sampleFormat; const channelCount = waveFormat.channelCount; const sampleRate = waveFormat.sampleRate; const bitDepth = waveFormat.bitDepth; const speakerPositionMask = waveFormat.speakerPositionMask; const concatenatedDataBuffers = concatUint8Arrays(dataBuffers); dataBuffers.length = 0; // Allow the garbage collector to free up memory held by the data buffers const audioChannels = AudioBufferConversion.decodeToChannels(concatenatedDataBuffers, channelCount, bitDepth, sampleFormat); return { rawAudio: { audioChannels, sampleRate }, sourceSampleFormat: sampleFormat, sourceBitDepth: bitDepth, sourceSpeakerPositionMask: speakerPositionMask }; } export function repairWave(waveData) { const { rawAudio, sourceSampleFormat, sourceBitDepth } = decodeWave(waveData); return encodeWave(rawAudio, sourceBitDepth, sourceSampleFormat); } class WaveFormat { sampleFormat; // 2 bytes LE channelCount; // 2 bytes LE sampleRate; // 4 bytes LE get byteRate() { return this.sampleRate * this.bytesPerSample * this.channelCount; } // 4 bytes LE get blockAlign() { return this.bytesPerSample * this.channelCount; } // 2 bytes LE bitDepth; // 2 bytes LE speakerPositionMask; // 4 bytes LE get guid() { return sampleFormatToGuid[this.sampleFormat]; } // 16 bytes BE // helpers: get bytesPerSample() { return this.bitDepth / 8; } constructor(channelCount, sampleRate, bitDepth, sampleFormat, speakerPositionMask = 0) { this.sampleFormat = sampleFormat; this.channelCount = channelCount; this.sampleRate = sampleRate; this.bitDepth = bitDepth; this.speakerPositionMask = speakerPositionMask; } serialize(useExtensibleFormat) { let sampleFormatId = this.sampleFormat; if (useExtensibleFormat) { sampleFormatId = 65534; } const serializedSize = sampleFormatToSerializedSize[sampleFormatId]; const result = new Uint8Array(serializedSize); writeAscii(result, 'fmt ', 0); // + 4 writeUint32LE(result, serializedSize - 8, 4); // + 4 writeUint16LE(result, sampleFormatId, 8); // + 2 writeUint16LE(result, this.channelCount, 10); // + 2 writeUint32LE(result, this.sampleRate, 12); // + 4 writeUint32LE(result, this.byteRate, 16); // + 4 writeUint16LE(result, this.blockAlign, 20); // + 2 writeUint16LE(result, this.bitDepth, 22); // + 2 if (useExtensibleFormat) { writeUint16LE(result, serializedSize - 26, 24); // + 2 (extension size) writeUint16LE(result, this.bitDepth, 26); // + 2 (valid bits per sample) writeUint32LE(result, this.speakerPositionMask, 28); // + 2 (speaker position mask) if (this.sampleFormat == SampleFormat.PCM || this.sampleFormat == SampleFormat.Float) { result.set(decodeHex(this.guid), 32); } else { throw new Error(`Extensible format is not supported for sample format ${this.sampleFormat}`); } } return result; } static deserializeFrom(formatChunkBody) { let sampleFormat = readUint16LE(formatChunkBody, 0); // + 2 const channelCount = readUint16LE(formatChunkBody, 2); // + 2 const sampleRate = readUint32LE(formatChunkBody, 4); // + 4 const bitDepth = readUint16LE(formatChunkBody, 14); let speakerPositionMask = 0; if (sampleFormat == 65534) { if (formatChunkBody.length < 40) { throw new Error(`Format subchunk specifies a format id of 65534 (extensible) but its body size is ${formatChunkBody.length} bytes, which is smaller than the minimum expected of 40 bytes`); } speakerPositionMask = readUint16LE(formatChunkBody, 20); const guid = encodeHex(formatChunkBody.subarray(24, 40)); if (guid == sampleFormatToGuid[SampleFormat.PCM]) { sampleFormat = SampleFormat.PCM; } else if (guid == sampleFormatToGuid[SampleFormat.Float]) { sampleFormat = SampleFormat.Float; } else { throw new Error(`Unsupported format GUID in extended format subchunk: ${guid}`); } } if (sampleFormat == SampleFormat.PCM) { if (bitDepth != 8 && bitDepth != 16 && bitDepth != 24 && bitDepth != 32) { throw new Error(`PCM audio has a bit depth of ${bitDepth}, which is not supported`); } } else if (sampleFormat == SampleFormat.Float) { if (bitDepth != 32 && bitDepth != 64) { throw new Error(`IEEE float audio has a bit depth of ${bitDepth}, which is not supported`); } } else if (sampleFormat == SampleFormat.Alaw) { if (bitDepth != 8) { throw new Error(`Alaw audio has a bit depth of ${bitDepth}, which is not supported`); } } else if (sampleFormat == SampleFormat.Mulaw) { if (bitDepth != 8) { throw new Error(`Mulaw audio has a bit depth of ${bitDepth}, which is not supported`); } } else { throw new Error(`Wave audio format id ${sampleFormat} is not supported`); } return new WaveFormat(channelCount, sampleRate, bitDepth, sampleFormat, speakerPositionMask); } } export var SampleFormat; (function (SampleFormat) { SampleFormat[SampleFormat["PCM"] = 1] = "PCM"; SampleFormat[SampleFormat["Float"] = 3] = "Float"; SampleFormat[SampleFormat["Alaw"] = 6] = "Alaw"; SampleFormat[SampleFormat["Mulaw"] = 7] = "Mulaw"; })(SampleFormat || (SampleFormat = {})); const sampleFormatToSerializedSize = { [SampleFormat.PCM]: 24, [SampleFormat.Float]: 26, [SampleFormat.Alaw]: 26, [SampleFormat.Mulaw]: 26, 65534: 48 }; const sampleFormatToGuid = { [SampleFormat.PCM]: '0100000000001000800000aa00389b71', [SampleFormat.Float]: '0300000000001000800000aa00389b71', [SampleFormat.Alaw]: '', [SampleFormat.Mulaw]: '', }; //# sourceMappingURL=WaveCodec.js.map