UNPKG

mediabunny

Version:

Pure TypeScript media toolkit for reading, writing, and converting media files, directly in the browser.

545 lines (544 loc) 23.8 kB
/*! * Copyright (c) 2026-present, Vanilagy and contributors * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ import { FlacBlockType, readVorbisComments } from '../codec-data.js'; import { Demuxer } from '../demuxer.js'; import { assert, AsyncMutex, binarySearchLessOrEqual, textDecoder, UNDETERMINED_LANGUAGE, } from '../misc.js'; import { EncodedPacket, PLACEHOLDER_DATA } from '../packet.js'; import { readBytes, readU24Be, readU32Be, readU8, } from '../reader.js'; import { DEFAULT_TRACK_DISPOSITION } from '../metadata.js'; import { calculateCrc8, readBlockSize, getBlockSizeOrUncommon, readCodedNumber, readSampleRate, getSampleRateOrUncommon, } from './flac-misc.js'; import { Bitstream } from '../../shared/bitstream.js'; export class FlacDemuxer extends Demuxer { constructor(input) { super(input); this.loadedSamples = []; // All samples from the start of the file to lastLoadedPos this.metadataPromise = null; this.trackBacking = null; this.metadataTags = {}; this.audioInfo = null; this.lastLoadedPos = null; this.blockingBit = null; this.readingMutex = new AsyncMutex(); this.lastSampleLoaded = false; this.reader = input._reader; } async getMetadataTags() { await this.readMetadata(); return this.metadataTags; } async getTrackBackings() { await this.readMetadata(); assert(this.trackBacking); return [this.trackBacking]; } async getMimeType() { return 'audio/flac'; } async readMetadata() { let currentPos = 4; // Skip 'fLaC' return (this.metadataPromise ??= (async () => { while (this.reader.fileSize === null || currentPos < this.reader.fileSize) { let sizeSlice = this.reader.requestSlice(currentPos, 4); if (sizeSlice instanceof Promise) sizeSlice = await sizeSlice; currentPos += 4; if (sizeSlice === null) { throw new Error(`Metadata block at position ${currentPos} is too small! Corrupted file.`); } assert(sizeSlice); const byte = readU8(sizeSlice); // first bit: isLastMetadata, remaining 7 bits: metaBlockType const size = readU24Be(sizeSlice); const isLastMetadata = (byte & 0x80) !== 0; const metaBlockType = byte & 0x7f; switch (metaBlockType) { case FlacBlockType.STREAMINFO: { // Parse streaminfo block // https://www.rfc-editor.org/rfc/rfc9639.html#section-8.2 let streamInfoBlock = this.reader.requestSlice(currentPos, size); if (streamInfoBlock instanceof Promise) streamInfoBlock = await streamInfoBlock; assert(streamInfoBlock); if (streamInfoBlock === null) { throw new Error(`StreamInfo block at position ${currentPos} is too small! Corrupted file.`); } const streamInfoBytes = readBytes(streamInfoBlock, 34); const bitstream = new Bitstream(streamInfoBytes); const minimumBlockSize = bitstream.readBits(16); const maximumBlockSize = bitstream.readBits(16); const minimumFrameSize = bitstream.readBits(24); const maximumFrameSize = bitstream.readBits(24); const sampleRate = bitstream.readBits(20); const numberOfChannels = bitstream.readBits(3) + 1; bitstream.readBits(5); // bitsPerSample - 1 const totalSamples = bitstream.readBits(36); // https://www.w3.org/TR/webcodecs-flac-codec-registration/#audiodecoderconfig-description // description is required, and has to be the following: // 1. The bytes 0x66 0x4C 0x61 0x43 ("fLaC" in ASCII) // 2. A metadata block (called the STREAMINFO block) as described in section 7 of [FLAC] // 3. Optionaly (sic) other metadata blocks, that are not used by the specification bitstream.skipBits(16 * 8); // md5 hash const description = new Uint8Array(42); // 1. "fLaC" description.set(new Uint8Array([0x66, 0x4c, 0x61, 0x43]), 0); // 2. STREAMINFO block description.set(new Uint8Array([128, 0, 0, 34]), 4); // 3. Other metadata blocks description.set(streamInfoBytes, 8); this.audioInfo = { numberOfChannels, sampleRate, totalSamples, minimumBlockSize, maximumBlockSize, minimumFrameSize, maximumFrameSize, description, }; this.trackBacking = new FlacAudioTrackBacking(this); break; } case FlacBlockType.VORBIS_COMMENT: { // Parse vorbis comment block // https://www.rfc-editor.org/rfc/rfc9639.html#name-vorbis-comment let vorbisCommentBlock = this.reader.requestSlice(currentPos, size); if (vorbisCommentBlock instanceof Promise) vorbisCommentBlock = await vorbisCommentBlock; assert(vorbisCommentBlock); readVorbisComments(readBytes(vorbisCommentBlock, size), this.metadataTags); break; } case FlacBlockType.PICTURE: { // Parse picture block // https://www.rfc-editor.org/rfc/rfc9639.html#name-picture let pictureBlock = this.reader.requestSlice(currentPos, size); if (pictureBlock instanceof Promise) pictureBlock = await pictureBlock; assert(pictureBlock); const pictureType = readU32Be(pictureBlock); const mediaTypeLength = readU32Be(pictureBlock); const mediaType = textDecoder.decode(readBytes(pictureBlock, mediaTypeLength)); const descriptionLength = readU32Be(pictureBlock); const description = textDecoder.decode(readBytes(pictureBlock, descriptionLength)); pictureBlock.skip(4 + 4 + 4 + 4); // Skip width, height, color depth, number of indexed colors const dataLength = readU32Be(pictureBlock); const data = readBytes(pictureBlock, dataLength); this.metadataTags.images ??= []; this.metadataTags.images.push({ data, mimeType: mediaType, // https://www.rfc-editor.org/rfc/rfc9639.html#table13 kind: pictureType === 3 ? 'coverFront' : pictureType === 4 ? 'coverBack' : 'unknown', description, }); break; } default: break; } currentPos += size; if (isLastMetadata) { this.lastLoadedPos = currentPos; break; } } if (!this.audioInfo) { throw new Error('Missing STREAMINFO metadata block! Corrupted FLAC file.'); } })()); } async readNextFlacFrame({ startPos, isFirstPacket, }) { assert(this.audioInfo); // we expect that there are at least `minimumFrameSize` bytes left in the file // Ideally we also want to validate the next header is valid // to throw out an accidential sync word // The shortest valid FLAC header I can think of, based off the code // of readFlacFrameHeader: // 4 bytes used for bitstream from syncword to bit depth // 1 byte coded number // (uncommon values, no bytes read) // 1 byte crc // --> 6 bytes const minimumHeaderLength = 6; // If we read everything in readFlacFrameHeader, we read 16 bytes const maximumHeaderLength = 16; // The shortest valid FLAC frame per RFC 9639: // 6 bytes header (see minimumHeaderLength above) // 2 bytes subframe (constant subframe with minimum bit depth, // padded to byte boundary) // 2 bytes footer (CRC-16) // --> 10 bytes const minimumFrameLength = 10; // The longest valid FLAC frame per RFC 9639: // https://www.rfc-editor.org/rfc/rfc9639.html#name-prediction // https://www.rfc-editor.org/rfc/rfc9639.html#name-frame-structure // maximumBlockSize * numberOfChannels * 4 bytes (max 32 bps verbatim) // + 16 bytes header (see maximumHeaderSize above) // + 2 bytes footer (CRC-16) const maximumFrameLength = this.audioInfo.maximumBlockSize * this.audioInfo.numberOfChannels * 4 + maximumHeaderLength + 2; // Per RFC 9639, a value of 0 means "unknown" for frame sizes. const effectiveMinFrameSize = this.audioInfo.minimumFrameSize || minimumFrameLength; const effectiveMaxFrameSize = this.audioInfo.maximumFrameSize || maximumFrameLength; const maximumSliceLength = effectiveMaxFrameSize + maximumHeaderLength; const slice = await this.reader.requestSliceRange(startPos, maximumHeaderLength, maximumSliceLength); if (!slice) { return null; } const frameHeader = this.readFlacFrameHeader({ slice, isFirstPacket: isFirstPacket, }); if (!frameHeader) { return null; } // We don't know exactly how long the packet is, we only know the `minimumFrameSize` and `maximumFrameSize` // The packet is over if the next 2 bytes are the sync word followed by a valid header // or the end of the file is reached // The next sync word is expected at earliest when `minimumFrameSize` is reached, // we can skip over anything before that slice.filePos = startPos + effectiveMinFrameSize; while (true) { // Reached end of the file, packet is over if (slice.filePos > slice.end - minimumHeaderLength) { return { num: frameHeader.num, blockSize: frameHeader.blockSize, sampleRate: frameHeader.sampleRate, size: slice.end - startPos, isLastFrame: true, }; } const nextByte = readU8(slice); if (nextByte === 0xff) { const positionBeforeReading = slice.filePos; const byteAfterNextByte = readU8(slice); const expected = this.blockingBit === 1 ? 0b1111_1001 : 0b1111_1000; if (byteAfterNextByte !== expected) { slice.filePos = positionBeforeReading; continue; } slice.skip(-2); const lengthIfNextFlacFrameHeaderIsLegit = slice.filePos - startPos; const nextFrameHeader = this.readFlacFrameHeader({ slice, isFirstPacket: false, }); if (!nextFrameHeader) { slice.filePos = positionBeforeReading; continue; } // Ensure the frameOrSampleNum is consecutive. // https://github.com/Vanilagy/mediabunny/issues/194 if (this.blockingBit === 0) { // Case A: If the stream is fixed block size, this is the frame number, which increments by 1 if (nextFrameHeader.num - frameHeader.num !== 1) { slice.filePos = positionBeforeReading; continue; } } else { // Case B: If the stream is variable block size, this is the sample number, which increments by // amount of samples in a frame. if (nextFrameHeader.num - frameHeader.num !== frameHeader.blockSize) { slice.filePos = positionBeforeReading; continue; } } return { num: frameHeader.num, blockSize: frameHeader.blockSize, sampleRate: frameHeader.sampleRate, size: lengthIfNextFlacFrameHeaderIsLegit, isLastFrame: false, }; } } } readFlacFrameHeader({ slice, isFirstPacket, }) { // In this function, generally it is not safe to throw errors. // We might end up here because we stumbled upon a syncword, // but the data might not actually be a FLAC frame, it might be random bitstream // data, in that case we should return null and continue. const startOffset = slice.filePos; // https://www.rfc-editor.org/rfc/rfc9639.html#section-9.1 // Each frame MUST start on a byte boundary and start with the 15-bit frame // sync code 0b111111111111100. Following the sync code is the blocking strategy // bit, which MUST NOT change during the audio stream. const bytes = readBytes(slice, 4); const bitstream = new Bitstream(bytes); const bits = bitstream.readBits(15); if (bits !== 0b111111111111100) { // This cannot be a valid FLAC frame, must start with the syncword return null; } if (this.blockingBit === null) { assert(isFirstPacket); const newBlockingBit = bitstream.readBits(1); this.blockingBit = newBlockingBit; } else if (this.blockingBit === 1) { assert(!isFirstPacket); const newBlockingBit = bitstream.readBits(1); if (newBlockingBit !== 1) { // This cannot be a valid FLAC frame, expected 1 but got 0 return null; } } else if (this.blockingBit === 0) { assert(!isFirstPacket); const newBlockingBit = bitstream.readBits(1); if (newBlockingBit !== 0) { // This cannot be a valid FLAC frame, expected 0 but got 1 return null; } } else { throw new Error('Invalid blocking bit'); } const blockSizeOrUncommon = getBlockSizeOrUncommon(bitstream.readBits(4)); if (!blockSizeOrUncommon) { // This cannot be a valid FLAC frame, the syncword was just coincidental return null; } assert(this.audioInfo); const sampleRateOrUncommon = getSampleRateOrUncommon(bitstream.readBits(4), this.audioInfo.sampleRate); if (!sampleRateOrUncommon) { // This cannot be a valid FLAC frame, the syncword was just coincidental return null; } bitstream.readBits(4); // channel count bitstream.readBits(3); // bit depth const reservedZero = bitstream.readBits(1); // reserved zero if (reservedZero !== 0) { // This cannot be a valid FLAC frame, the syncword was just coincidental return null; } const num = readCodedNumber(slice); const blockSize = readBlockSize(slice, blockSizeOrUncommon); const sampleRate = readSampleRate(slice, sampleRateOrUncommon); if (sampleRate === null) { // This cannot be a valid FLAC frame, the syncword was just coincidental return null; } if (sampleRate !== this.audioInfo.sampleRate) { // This cannot be a valid FLAC frame, the sample rate is not the same as in the stream info return null; } const size = slice.filePos - startOffset; const crc = readU8(slice); slice.skip(-size); slice.skip(-1); const crcCalculated = calculateCrc8(readBytes(slice, size)); if (crc !== crcCalculated) { // Maybe this wasn't a FLAC frame at all, the syncword was just coincidentally // in the bitstream return null; } return { num, blockSize, sampleRate }; } async advanceReader() { await this.readMetadata(); assert(this.lastLoadedPos !== null); assert(this.audioInfo); const startPos = this.lastLoadedPos; const frame = await this.readNextFlacFrame({ startPos, isFirstPacket: this.loadedSamples.length === 0, }); if (!frame) { // Unexpected case, failed to read next FLAC frame // handling gracefully this.lastSampleLoaded = true; return; } const lastSample = this.loadedSamples[this.loadedSamples.length - 1]; const blockOffset = lastSample ? lastSample.blockOffset + lastSample.blockSize : 0; const sample = { blockOffset, blockSize: frame.blockSize, byteOffset: startPos, byteSize: frame.size, }; this.lastLoadedPos = this.lastLoadedPos + frame.size; this.loadedSamples.push(sample); if (frame.isLastFrame) { this.lastSampleLoaded = true; return; } } } class FlacAudioTrackBacking { constructor(demuxer) { this.demuxer = demuxer; } getType() { return 'audio'; } getId() { return 1; } getNumber() { return 1; } getCodec() { return 'flac'; } getInternalCodecId() { return null; } getNumberOfChannels() { assert(this.demuxer.audioInfo); return this.demuxer.audioInfo.numberOfChannels; } getSampleRate() { assert(this.demuxer.audioInfo); return this.demuxer.audioInfo.sampleRate; } getName() { return null; } getLanguageCode() { return UNDETERMINED_LANGUAGE; } getTimeResolution() { assert(this.demuxer.audioInfo); return this.demuxer.audioInfo.sampleRate; } isRelativeToUnixEpoch() { return false; } getPairingMask() { return 1n; } getBitrate() { return null; } getAverageBitrate() { return null; } async getDurationFromMetadata() { assert(this.demuxer.audioInfo); if (this.demuxer.audioInfo.totalSamples === 0) { return null; } return this.demuxer.audioInfo.totalSamples / this.demuxer.audioInfo.sampleRate; } async getLiveRefreshInterval() { return null; } getDisposition() { return { ...DEFAULT_TRACK_DISPOSITION, }; } async getDecoderConfig() { assert(this.demuxer.audioInfo); return { codec: 'flac', numberOfChannels: this.demuxer.audioInfo.numberOfChannels, sampleRate: this.demuxer.audioInfo.sampleRate, description: this.demuxer.audioInfo.description, }; } async getPacket(timestamp, options) { assert(this.demuxer.audioInfo); if (timestamp < 0) { return null; } const release = await this.demuxer.readingMutex.acquire(); try { while (true) { const packetIndex = binarySearchLessOrEqual(this.demuxer.loadedSamples, timestamp, x => x.blockOffset / this.demuxer.audioInfo.sampleRate); if (packetIndex === -1) { await this.demuxer.advanceReader(); continue; } const packet = this.demuxer.loadedSamples[packetIndex]; const sampleTimestamp = packet.blockOffset / this.demuxer.audioInfo.sampleRate; const sampleDuration = packet.blockSize / this.demuxer.audioInfo.sampleRate; if (sampleTimestamp + sampleDuration <= timestamp) { if (this.demuxer.lastSampleLoaded) { return this.getPacketAtIndex(this.demuxer.loadedSamples.length - 1, options); } await this.demuxer.advanceReader(); continue; } return this.getPacketAtIndex(packetIndex, options); } } finally { release(); } } async getNextPacket(packet, options) { const release = await this.demuxer.readingMutex.acquire(); try { const nextIndex = packet.sequenceNumber + 1; if (this.demuxer.lastSampleLoaded && nextIndex >= this.demuxer.loadedSamples.length) { return null; } // Ensure the next sample exists while (nextIndex >= this.demuxer.loadedSamples.length && !this.demuxer.lastSampleLoaded) { await this.demuxer.advanceReader(); } return this.getPacketAtIndex(nextIndex, options); } finally { release(); } } getKeyPacket(timestamp, options) { return this.getPacket(timestamp, options); } getNextKeyPacket(packet, options) { return this.getNextPacket(packet, options); } async getPacketAtIndex(sampleIndex, options) { const rawSample = this.demuxer.loadedSamples[sampleIndex]; if (!rawSample) { return null; } let data; if (options.metadataOnly) { data = PLACEHOLDER_DATA; } else { let slice = this.demuxer.reader.requestSlice(rawSample.byteOffset, rawSample.byteSize); if (slice instanceof Promise) slice = await slice; if (!slice) { return null; // Data didn't fit into the rest of the file } data = readBytes(slice, rawSample.byteSize); } assert(this.demuxer.audioInfo); const timestamp = rawSample.blockOffset / this.demuxer.audioInfo.sampleRate; const duration = rawSample.blockSize / this.demuxer.audioInfo.sampleRate; return new EncodedPacket(data, 'key', timestamp, duration, sampleIndex, rawSample.byteSize); } async getFirstPacket(options) { // Ensure the next sample exists while (this.demuxer.loadedSamples.length === 0 && !this.demuxer.lastSampleLoaded) { await this.demuxer.advanceReader(); } return this.getPacketAtIndex(0, options); } }