UNPKG

mediabunny

Version:

Pure TypeScript media toolkit for reading, writing, and converting media files, directly in the browser.

1,576 lines (1,334 loc) 87.3 kB
/*! * Copyright (c) 2026-present, Vanilagy and contributors * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ import { parsePcmCodec, PCM_AUDIO_CODECS, PcmAudioCodec, VideoCodec, AudioCodec } from './codec'; import { concatAvcNalUnits, deserializeAvcDecoderConfigurationRecord, determineVideoPacketType, extractNalUnitTypeForAvc, extractNalUnitTypeForHevc, HevcNalUnitType, iterateAvcNalUnits, iterateHevcNalUnits, parseAvcSps, sanitizeHevcPacketForChromium, } from './codec-data'; import { CustomVideoDecoder, customVideoDecoders, CustomAudioDecoder, customAudioDecoders } from './custom-coder'; import { InputDisposedError } from './input'; import { InputAudioTrack, InputTrack, InputVideoTrack } from './input-track'; import { AnyIterable, assert, assertNever, CallSerializer, getInt24, getUint24, insertSorted, isChromium, isFirefox, isNumber, isWebKit, last, mapAsyncGenerator, promiseWithResolvers, Rotation, toAsyncIterator, toDataView, toUint8Array, validateAnyIterable, } from './misc'; import { EncodedPacket } from './packet'; import { fromAlaw, fromUlaw } from './pcm'; import { AudioSample, clampCropRectangle, CropRectangle, validateCropRectangle, VideoSample, VideoSamplePixelFormat, } from './sample'; /** * Additional options for controlling packet retrieval. * @group Media sinks * @public */ export type PacketRetrievalOptions = { /** * When set to `true`, only packet metadata (like timestamp) will be retrieved - the actual packet data will not * be loaded. */ metadataOnly?: boolean; /** * When set to `true`, key packets will be verified upon retrieval by looking into the packet's bitstream. * If not enabled, the packet types will be determined solely by what's stored in the containing file and may be * incorrect, potentially leading to decoder errors. Since determining a packet's actual type requires looking into * its data, this option cannot be enabled together with `metadataOnly`. */ verifyKeyPackets?: boolean; /** * When querying packets in live media that are in the future relative to the current live edge, Mediabunny will, * by default, wait for the stream to advance until the query can be satisfied. In a sense, Mediabunny simply treats * live streams as media files that are still being written, and any read that depends on future information will * wait until it can be fulfilled. * * If you want to query packets based only on the currently known information, set this field to `true` - this way, * Mediabunny will never wait for the live stream to catch up. * * For non-live media, this field has no effect. */ skipLiveWait?: boolean; }; const validatePacketRetrievalOptions = (options: PacketRetrievalOptions) => { if (!options || typeof options !== 'object') { throw new TypeError('options must be an object.'); } if (options.metadataOnly !== undefined && typeof options.metadataOnly !== 'boolean') { throw new TypeError('options.metadataOnly, when defined, must be a boolean.'); } if (options.verifyKeyPackets !== undefined && typeof options.verifyKeyPackets !== 'boolean') { throw new TypeError('options.verifyKeyPackets, when defined, must be a boolean.'); } if (options.verifyKeyPackets && options.metadataOnly) { throw new TypeError('options.verifyKeyPackets and options.metadataOnly cannot be enabled together.'); } if (options.skipLiveWait !== undefined && typeof options.skipLiveWait !== 'boolean') { throw new TypeError('options.skipLiveWait, when defined, must be a boolean.'); } }; const validateTimestamp = (timestamp: number) => { if (!isNumber(timestamp)) { throw new TypeError('timestamp must be a number.'); // It can be non-finite, that's fine } }; const maybeFixPacketType = ( track: InputTrack, promise: Promise<EncodedPacket | null>, options: PacketRetrievalOptions, ) => { if (options.verifyKeyPackets) { return promise.then(async (packet) => { if (!packet || packet.type === 'delta') { return packet; } const determinedType = await track.determinePacketType(packet); if (determinedType) { // @ts-expect-error Technically readonly packet.type = determinedType; } return packet; }); } else { return promise; } }; /** * Sink for retrieving encoded packets from an input track. * @group Media sinks * @public */ export class EncodedPacketSink { /** @internal */ _track: InputTrack; /** Creates a new {@link EncodedPacketSink} for the given {@link InputTrack}. */ constructor(track: InputTrack) { if (!(track instanceof InputTrack)) { throw new TypeError('track must be an InputTrack.'); } this._track = track; } /** * Retrieves the track's first packet (in decode order), or null if it has no packets. The first packet is very * likely to be a key packet, but it doesn't have to be. */ async getFirstPacket(options: PacketRetrievalOptions = {}) { validatePacketRetrievalOptions(options); if (this._track.input._disposed) { throw new InputDisposedError(); } return maybeFixPacketType(this._track, this._track._backing.getFirstPacket(options), options); } /** Retrieves the track's first key packet (in decode order), or null if it has no key packets. */ async getFirstKeyPacket(options: PacketRetrievalOptions = {}) { validatePacketRetrievalOptions(options); const firstPacket = await this.getFirstPacket(options); if (!firstPacket) { return null; } if (firstPacket.type === 'key') { // Great return firstPacket; } return this.getNextKeyPacket(firstPacket, options); } /** * Retrieves the packet corresponding to the given timestamp, in seconds. More specifically, returns the last packet * (in presentation order) with a start timestamp less than or equal to the given timestamp. This method can be * used to retrieve a track's last packet using `getPacket(Infinity)`. The method returns null if the timestamp * is before the first packet in the track. * * @param timestamp - The timestamp used for retrieval, in seconds. */ async getPacket(timestamp: number, options: PacketRetrievalOptions = {}) { validateTimestamp(timestamp); validatePacketRetrievalOptions(options); if (this._track.input._disposed) { throw new InputDisposedError(); } return maybeFixPacketType(this._track, this._track._backing.getPacket(timestamp, options), options); } /** * Retrieves the packet following the given packet (in decode order), or null if the given packet is the * last packet. */ async getNextPacket(packet: EncodedPacket, options: PacketRetrievalOptions = {}) { if (!(packet instanceof EncodedPacket)) { throw new TypeError('packet must be an EncodedPacket.'); } validatePacketRetrievalOptions(options); if (this._track.input._disposed) { throw new InputDisposedError(); } return maybeFixPacketType(this._track, this._track._backing.getNextPacket(packet, options), options); } /** * Retrieves the key packet corresponding to the given timestamp, in seconds. More specifically, returns the last * key packet (in presentation order) with a start timestamp less than or equal to the given timestamp. A key packet * is a packet that doesn't require previous packets to be decoded. This method can be used to retrieve a track's * last key packet using `getKeyPacket(Infinity)`. The method returns null if the timestamp is before the first * key packet in the track. * * To ensure that the returned packet is guaranteed to be a real key frame, enable `options.verifyKeyPackets`. * * @param timestamp - The timestamp used for retrieval, in seconds. */ async getKeyPacket(timestamp: number, options: PacketRetrievalOptions = {}): Promise<EncodedPacket | null> { validateTimestamp(timestamp); validatePacketRetrievalOptions(options); if (this._track.input._disposed) { throw new InputDisposedError(); } if (!options.verifyKeyPackets) { return this._track._backing.getKeyPacket(timestamp, options); } const packet = await this._track._backing.getKeyPacket(timestamp, options); if (!packet) { return packet; } assert(packet.type === 'key'); const determinedType = await this._track.determinePacketType(packet); if (determinedType === 'delta') { // Try returning the previous key packet (in hopes that it's actually a key packet) return this.getKeyPacket(packet.timestamp - 1 / await this._track.getTimeResolution(), options); } return packet; } /** * Retrieves the key packet following the given packet (in decode order), or null if the given packet is the last * key packet. * * To ensure that the returned packet is guaranteed to be a real key frame, enable `options.verifyKeyPackets`. */ async getNextKeyPacket(packet: EncodedPacket, options: PacketRetrievalOptions = {}): Promise<EncodedPacket | null> { if (!(packet instanceof EncodedPacket)) { throw new TypeError('packet must be an EncodedPacket.'); } validatePacketRetrievalOptions(options); if (this._track.input._disposed) { throw new InputDisposedError(); } if (!options.verifyKeyPackets) { return this._track._backing.getNextKeyPacket(packet, options); } const nextPacket = await this._track._backing.getNextKeyPacket(packet, options); if (!nextPacket) { return nextPacket; } assert(nextPacket.type === 'key'); const determinedType = await this._track.determinePacketType(nextPacket); if (determinedType === 'delta') { // Try returning the next key packet (in hopes that it's actually a key packet) return this.getNextKeyPacket(nextPacket, options); } return nextPacket; } /** * Creates an async iterator that yields the packets in this track in decode order. To enable fast iteration, this * method will intelligently preload packets based on the speed of the consumer. * * @param startPacket - (optional) The packet from which iteration should begin. This packet will also be yielded. * @param endPacket - (optional) The packet at which iteration should end. This packet will _not_ be yielded. */ packets( startPacket?: EncodedPacket, endPacket?: EncodedPacket, options: PacketRetrievalOptions = {}, ): AsyncGenerator<EncodedPacket, void, unknown> { if (startPacket !== undefined && !(startPacket instanceof EncodedPacket)) { throw new TypeError('startPacket must be an EncodedPacket.'); } if (startPacket !== undefined && startPacket.isMetadataOnly && !options?.metadataOnly) { throw new TypeError('startPacket can only be metadata-only if options.metadataOnly is enabled.'); } if (endPacket !== undefined && !(endPacket instanceof EncodedPacket)) { throw new TypeError('endPacket must be an EncodedPacket.'); } validatePacketRetrievalOptions(options); if (this._track.input._disposed) { throw new InputDisposedError(); } const packetQueue: EncodedPacket[] = []; let { promise: queueNotEmpty, resolve: onQueueNotEmpty } = promiseWithResolvers(); let { promise: queueDequeue, resolve: onQueueDequeue } = promiseWithResolvers(); let ended = false; let terminated = false; // This stores errors that are "out of band" in the sense that they didn't occur in the normal flow of this // method but instead in a different context. This error should not go unnoticed and must be bubbled up to // the consumer. let outOfBandError = null as Error | null; const timestamps: number[] = []; // The queue should always be big enough to hold 1 second worth of packets const maxQueueSize = () => Math.max(2, timestamps.length); // The following is the "pump" process that keeps pumping packets into the queue (async () => { let packet = startPacket ?? await this.getFirstPacket(options); while (packet && !terminated && !this._track.input._disposed) { if (endPacket && packet.sequenceNumber >= endPacket?.sequenceNumber) { break; } if (packetQueue.length > maxQueueSize()) { ({ promise: queueDequeue, resolve: onQueueDequeue } = promiseWithResolvers()); await queueDequeue; continue; } packetQueue.push(packet); onQueueNotEmpty(); ({ promise: queueNotEmpty, resolve: onQueueNotEmpty } = promiseWithResolvers()); packet = await this.getNextPacket(packet, options); } ended = true; onQueueNotEmpty(); })().catch((error: Error) => { if (!outOfBandError) { outOfBandError = error; onQueueNotEmpty(); } }); const track = this._track; return { async next() { while (true) { if (track.input._disposed) { throw new InputDisposedError(); } else if (terminated) { return { value: undefined, done: true }; } else if (outOfBandError) { throw outOfBandError; } else if (packetQueue.length > 0) { const value = packetQueue.shift()!; const now = performance.now(); timestamps.push(now); while (timestamps.length > 0 && now - timestamps[0]! >= 1000) { timestamps.shift(); } onQueueDequeue(); return { value, done: false }; } else if (ended) { return { value: undefined, done: true }; } else { await queueNotEmpty; } } }, async return() { terminated = true; onQueueDequeue(); onQueueNotEmpty(); return { value: undefined, done: true }; }, async throw(error) { throw error; }, [Symbol.asyncIterator]() { return this; }, }; } } abstract class DecoderWrapper< MediaSample extends VideoSample | AudioSample, > { constructor( public onSample: (sample: MediaSample) => unknown, public onError: (error: Error) => unknown, ) {} abstract getDecodeQueueSize(): number; abstract decode(packet: EncodedPacket): void; abstract flush(): Promise<void>; abstract close(): void; } /** * Base class for decoded media sample sinks. * @group Media sinks * @public */ export abstract class BaseMediaSampleSink< MediaSample extends VideoSample | AudioSample, > { /** @internal */ abstract _track: InputTrack; /** @internal */ abstract _createDecoder( onSample: (sample: MediaSample) => unknown, onError: (error: Error) => unknown ): Promise<DecoderWrapper<MediaSample>>; /** @internal */ abstract _createPacketSink(): EncodedPacketSink; /** @internal */ protected mediaSamplesInRange( startTimestamp = -Infinity, endTimestamp = Infinity, options: PacketRetrievalOptions, ): AsyncGenerator<MediaSample, void, unknown> { validateTimestamp(startTimestamp); validateTimestamp(endTimestamp); const sampleQueue: MediaSample[] = []; let firstSampleQueued = false; let lastSample: MediaSample | null = null; let { promise: queueNotEmpty, resolve: onQueueNotEmpty } = promiseWithResolvers(); let { promise: queueDequeue, resolve: onQueueDequeue } = promiseWithResolvers(); let decoderIsFlushed = false; let ended = false; let terminated = false; // This stores errors that are "out of band" in the sense that they didn't occur in the normal flow of this // method but instead in a different context. This error should not go unnoticed and must be bubbled up to // the consumer. let outOfBandError = null as Error | null; const packetRetrievalOptions: PacketRetrievalOptions = { ...options, verifyKeyPackets: true, metadataOnly: false, }; // The following is the "pump" process that keeps pumping packets into the decoder (async () => { const decoder = await this._createDecoder((sample) => { onQueueDequeue(); if (sample.timestamp >= endTimestamp) { ended = true; } if (ended) { sample.close(); return; } if (lastSample) { if (sample.timestamp > startTimestamp) { // We don't know ahead of time what the first first is. This is because the first first is the // last first whose timestamp is less than or equal to the start timestamp. Therefore we need to // wait for the first first after the start timestamp, and then we'll know that the previous // first was the first first. sampleQueue.push(lastSample); firstSampleQueued = true; } else { lastSample.close(); } } if (sample.timestamp >= startTimestamp) { sampleQueue.push(sample); firstSampleQueued = true; } lastSample = firstSampleQueued ? null : sample; if (sampleQueue.length > 0) { onQueueNotEmpty(); ({ promise: queueNotEmpty, resolve: onQueueNotEmpty } = promiseWithResolvers()); } }, (error) => { if (!outOfBandError) { outOfBandError = error; onQueueNotEmpty(); } }); const packetSink = this._createPacketSink(); const keyPacket = await packetSink.getKeyPacket(startTimestamp, packetRetrievalOptions) ?? await packetSink.getFirstKeyPacket(packetRetrievalOptions); let currentPacket: EncodedPacket | null = keyPacket; // B-frames make it exceedingly difficult to properly define an upper bound for packet iteration if an end // timestamp is set, so we just don't do it. The case that makes it especially tricky is when the frames // following a key frame have a lower timestamp than the keyframe; something that quite frequently happens // in HEVC streams. The price to pay for not upper-bounding the packet iterator is a slight increase in // decoder work at the end of the range, but the added correctness and reliability makes this tradeoff worth // it. const endPacket = undefined; const packets = packetSink.packets(keyPacket ?? undefined, endPacket, packetRetrievalOptions); await packets.next(); // Skip the start packet as we already have it while (currentPacket && !ended && !this._track.input._disposed) { const maxQueueSize = computeMaxQueueSize(sampleQueue.length); if (sampleQueue.length + decoder.getDecodeQueueSize() > maxQueueSize) { ({ promise: queueDequeue, resolve: onQueueDequeue } = promiseWithResolvers()); await queueDequeue; continue; } decoder.decode(currentPacket); const packetResult = await packets.next(); if (packetResult.done) { break; } currentPacket = packetResult.value; } await packets.return(); if (!terminated && !this._track.input._disposed) { await decoder.flush(); } decoder.close(); if (!firstSampleQueued && lastSample) { sampleQueue.push(lastSample); } decoderIsFlushed = true; onQueueNotEmpty(); // To unstuck the generator })().catch((error: Error) => { if (!outOfBandError) { outOfBandError = error; onQueueNotEmpty(); } }); const track = this._track; const closeSamples = () => { lastSample?.close(); for (const sample of sampleQueue) { sample.close(); } }; return { async next() { while (true) { if (track.input._disposed) { closeSamples(); throw new InputDisposedError(); } else if (terminated) { return { value: undefined, done: true }; } else if (outOfBandError) { closeSamples(); throw outOfBandError; } else if (sampleQueue.length > 0) { const value = sampleQueue.shift()!; onQueueDequeue(); return { value, done: false }; } else if (!decoderIsFlushed) { await queueNotEmpty; } else { return { value: undefined, done: true }; } } }, async return() { terminated = true; ended = true; onQueueDequeue(); onQueueNotEmpty(); closeSamples(); return { value: undefined, done: true }; }, async throw(error) { throw error; }, [Symbol.asyncIterator]() { return this; }, }; } /** @internal */ protected mediaSamplesAtTimestamps( timestamps: AnyIterable<number>, options: PacketRetrievalOptions, ): AsyncGenerator<MediaSample | null, void, unknown> { validateAnyIterable(timestamps); const timestampIterator = toAsyncIterator(timestamps); const timestampsOfInterest: number[] = []; const sampleQueue: (MediaSample | null)[] = []; let { promise: queueNotEmpty, resolve: onQueueNotEmpty } = promiseWithResolvers(); let { promise: queueDequeue, resolve: onQueueDequeue } = promiseWithResolvers(); let decoderIsFlushed = false; let terminated = false; // This stores errors that are "out of band" in the sense that they didn't occur in the normal flow of this // method but instead in a different context. This error should not go unnoticed and must be bubbled up to // the consumer. let outOfBandError = null as Error | null; const pushToQueue = (sample: MediaSample | null) => { sampleQueue.push(sample); onQueueNotEmpty(); ({ promise: queueNotEmpty, resolve: onQueueNotEmpty } = promiseWithResolvers()); }; const retrievalOptions: PacketRetrievalOptions = { ...options, verifyKeyPackets: true, metadataOnly: false, }; // The following is the "pump" process that keeps pumping packets into the decoder (async () => { const decoder = await this._createDecoder((sample) => { onQueueDequeue(); if (terminated) { sample.close(); return; } let sampleUses = 0; while ( timestampsOfInterest.length > 0 && sample.timestamp - timestampsOfInterest[0]! > -1e-10 // Give it a little epsilon ) { sampleUses++; timestampsOfInterest.shift(); } if (sampleUses > 0) { for (let i = 0; i < sampleUses; i++) { // Clone the sample if we need to emit it multiple times pushToQueue((i < sampleUses - 1 ? sample.clone() : sample) as MediaSample); } } else { sample.close(); } }, (error) => { if (!outOfBandError) { outOfBandError = error; onQueueNotEmpty(); } }); const packetSink = this._createPacketSink(); let lastPacket: EncodedPacket | null = null; let lastKeyPacket: EncodedPacket | null = null; // The end sequence number (inclusive) in the next batch of packets that will be decoded. The batch starts // at the last key frame and goes until this sequence number. let maxSequenceNumber = -1; const decodePackets = async () => { assert(lastKeyPacket); // Start at the current key packet let currentPacket = lastKeyPacket; decoder.decode(currentPacket); while (currentPacket.sequenceNumber < maxSequenceNumber) { const maxQueueSize = computeMaxQueueSize(sampleQueue.length); while (sampleQueue.length + decoder.getDecodeQueueSize() > maxQueueSize && !terminated) { ({ promise: queueDequeue, resolve: onQueueDequeue } = promiseWithResolvers()); await queueDequeue; } if (terminated) { break; } const nextPacket = await packetSink.getNextPacket(currentPacket, retrievalOptions); assert(nextPacket); decoder.decode(nextPacket); currentPacket = nextPacket; } maxSequenceNumber = -1; }; const flushDecoder = async () => { await decoder.flush(); // We don't expect this list to have any elements in it anymore, but in case it does, let's emit // nulls for every remaining element, then clear it. for (let i = 0; i < timestampsOfInterest.length; i++) { pushToQueue(null); } timestampsOfInterest.length = 0; }; for await (const timestamp of timestampIterator) { validateTimestamp(timestamp); if (terminated || this._track.input._disposed) { break; } const targetPacket = await packetSink.getPacket(timestamp, retrievalOptions); const keyPacket = targetPacket && await packetSink.getKeyPacket(timestamp, retrievalOptions); if (!keyPacket) { if (maxSequenceNumber !== -1) { await decodePackets(); await flushDecoder(); } pushToQueue(null); lastPacket = null; continue; } // Check if the key packet has changed or if we're going back in time if ( lastPacket && ( keyPacket.sequenceNumber !== lastKeyPacket!.sequenceNumber || targetPacket.timestamp < lastPacket.timestamp ) ) { await decodePackets(); await flushDecoder(); // Always flush here, improves decoder compatibility } timestampsOfInterest.push(targetPacket.timestamp); maxSequenceNumber = Math.max(targetPacket.sequenceNumber, maxSequenceNumber); lastPacket = targetPacket; lastKeyPacket = keyPacket; } if (!terminated && !this._track.input._disposed) { if (maxSequenceNumber !== -1) { // We still need to decode packets await decodePackets(); } await flushDecoder(); } decoder.close(); decoderIsFlushed = true; onQueueNotEmpty(); // To unstuck the generator })().catch((error: Error) => { if (!outOfBandError) { outOfBandError = error; onQueueNotEmpty(); } }); const track = this._track; const closeSamples = () => { for (const sample of sampleQueue) { sample?.close(); } }; return { async next() { while (true) { if (track.input._disposed) { closeSamples(); throw new InputDisposedError(); } else if (terminated) { return { value: undefined, done: true }; } else if (outOfBandError) { closeSamples(); throw outOfBandError; } else if (sampleQueue.length > 0) { const value = sampleQueue.shift(); assert(value !== undefined); onQueueDequeue(); return { value, done: false }; } else if (!decoderIsFlushed) { await queueNotEmpty; } else { return { value: undefined, done: true }; } } }, async return() { terminated = true; onQueueDequeue(); onQueueNotEmpty(); closeSamples(); return { value: undefined, done: true }; }, async throw(error) { throw error; }, [Symbol.asyncIterator]() { return this; }, }; } } const computeMaxQueueSize = (decodedSampleQueueSize: number) => { // If we have decoded samples lying around, limit the total queue size to a small value (decoded samples can use up // a lot of memory). If not, we're fine with a much bigger queue of encoded packets waiting to be decoded. In fact, // some decoders only start flushing out decoded chunks when the packet queue is large enough. return decodedSampleQueueSize === 0 ? 40 : 8; }; class VideoDecoderWrapper extends DecoderWrapper<VideoSample> { decoder: VideoDecoder | null = null; customDecoder: CustomVideoDecoder | null = null; customDecoderCallSerializer = new CallSerializer(); customDecoderQueueSize = 0; inputTimestamps: number[] = []; // Timestamps input into the decoder, sorted. sampleQueue: VideoSample[] = []; // Safari-specific thing, check usage. currentPacketIndex = 0; raslSkipped = false; // For HEVC stuff // Alpha stuff alphaDecoder: VideoDecoder | null = null; alphaHadKeyframe = false; colorQueue: VideoFrame[] = []; alphaQueue: (VideoFrame | null)[] = []; merger: ColorAlphaMerger | null = null; decodedAlphaChunkCount = 0; alphaDecoderQueueSize = 0; /** Each value is the number of decoded alpha chunks at which a null alpha frame should be added. */ nullAlphaFrameQueue: number[] = []; currentAlphaPacketIndex = 0; alphaRaslSkipped = false; // For HEVC stuff frameHandlerSerializer = new CallSerializer(); constructor( onSample: (sample: VideoSample) => unknown, onError: (error: Error) => unknown, public codec: VideoCodec, public decoderConfig: VideoDecoderConfig, public rotation: Rotation, public timeResolution: number, ) { super(onSample, onError); const MatchingCustomDecoder = customVideoDecoders.find(x => x.supports(codec, decoderConfig)); if (MatchingCustomDecoder) { // @ts-expect-error "Can't create instance of abstract class 🤓" this.customDecoder = new MatchingCustomDecoder() as CustomVideoDecoder; // @ts-expect-error It's technically readonly this.customDecoder.codec = codec; // @ts-expect-error It's technically readonly this.customDecoder.config = decoderConfig; // @ts-expect-error It's technically readonly this.customDecoder.onSample = (sample) => { if (!(sample instanceof VideoSample)) { throw new TypeError('The argument passed to onSample must be a VideoSample.'); } this.finalizeAndEmitSample(sample); }; void this.customDecoderCallSerializer.call(() => this.customDecoder!.init()); } else { const colorHandler = (frame: VideoFrame) => { this.frameHandlerSerializer.call(async () => { if (this.alphaQueue.length > 0) { // Even when no alpha data is present (most of the time), there will be nulls in this queue const alphaFrame = this.alphaQueue.shift(); assert(alphaFrame !== undefined); await this.mergeAlpha(frame, alphaFrame); } else { this.colorQueue.push(frame); } }).catch((error: Error) => this.onError(error)); }; if (codec === 'avc' && this.decoderConfig.description && isChromium()) { // Chromium has/had a bug with playing interlaced AVC (https://issues.chromium.org/issues/456919096) // which can be worked around by requesting that software decoding be used. So, here we peek into the // AVC description, if present, and switch to software decoding if we find interlaced content. const record = deserializeAvcDecoderConfigurationRecord(toUint8Array(this.decoderConfig.description)); if (record && record.sequenceParameterSets.length > 0) { const sps = parseAvcSps(record.sequenceParameterSets[0]!); if (sps && sps.frameMbsOnlyFlag === 0) { this.decoderConfig = { ...this.decoderConfig, hardwareAcceleration: 'prefer-software', }; } } } const stack = new Error('Decoding error').stack; this.decoder = new VideoDecoder({ output: (frame) => { try { colorHandler(frame); } catch (error) { this.onError(error as Error); } }, error: (error) => { error.stack = stack; // Provide a more useful stack trace, the default one sucks this.onError(error); }, }); this.decoder.configure(this.decoderConfig); } } getDecodeQueueSize() { if (this.customDecoder) { return this.customDecoderQueueSize; } else { assert(this.decoder); return Math.max( this.decoder.decodeQueueSize, this.alphaDecoder?.decodeQueueSize ?? 0, ); } } decode(packet: EncodedPacket) { if (this.codec === 'hevc' && this.currentPacketIndex > 0 && !this.raslSkipped) { if (this.hasHevcRaslPicture(packet.data)) { return; // Drop } this.raslSkipped = true; } if (this.customDecoder) { this.customDecoderQueueSize++; void this.customDecoderCallSerializer .call(() => this.customDecoder!.decode(packet)) .then(() => this.customDecoderQueueSize--); } else { assert(this.decoder); if (!isWebKit()) { insertSorted(this.inputTimestamps, packet.timestamp, x => x); } if (isChromium() && this.currentPacketIndex === 0) { if (this.codec === 'avc') { // Workaround for https://issues.chromium.org/issues/470109459 const filteredNalUnits: Uint8Array[] = []; for (const loc of iterateAvcNalUnits(packet.data, this.decoderConfig)) { const type = extractNalUnitTypeForAvc(packet.data[loc.offset]!); // These trip up Chromium's key frame detection, so let's strip them if (!(type >= 20 && type <= 31)) { filteredNalUnits.push(packet.data.subarray(loc.offset, loc.offset + loc.length)); } } const newData = concatAvcNalUnits(filteredNalUnits, this.decoderConfig); packet = new EncodedPacket(newData, packet.type, packet.timestamp, packet.duration); } else if (this.codec === 'hevc') { // Workaround for https://issues.chromium.org/issues/507611247 const sanitizedData = sanitizeHevcPacketForChromium(packet.data, this.decoderConfig); if (sanitizedData) { packet = new EncodedPacket(sanitizedData, packet.type, packet.timestamp, packet.duration); } } } this.decoder.decode(packet.toEncodedVideoChunk()); this.decodeAlphaData(packet); } this.currentPacketIndex++; } decodeAlphaData(packet: EncodedPacket) { if (!packet.sideData.alpha) { // No alpha side data in the packet, most common case this.pushNullAlphaFrame(); return; } if (!this.merger) { this.merger = new ColorAlphaMerger(); } // Check if we need to set up the alpha decoder if (!this.alphaDecoder) { const alphaHandler = (frame: VideoFrame) => { this.frameHandlerSerializer.call(async () => { if (this.colorQueue.length > 0) { const colorFrame = this.colorQueue.shift(); assert(colorFrame !== undefined); await this.mergeAlpha(colorFrame, frame); } else { this.alphaQueue.push(frame); } // Check if any null frames have been queued for this point this.decodedAlphaChunkCount++; while ( this.nullAlphaFrameQueue.length > 0 && this.nullAlphaFrameQueue[0] === this.decodedAlphaChunkCount ) { this.nullAlphaFrameQueue.shift(); if (this.colorQueue.length > 0) { const colorFrame = this.colorQueue.shift(); assert(colorFrame !== undefined); await this.mergeAlpha(colorFrame, null); } else { this.alphaQueue.push(null); } } this.alphaDecoderQueueSize--; }).catch((error: Error) => this.onError(error)); }; const stack = new Error('Decoding error').stack; this.alphaDecoder = new VideoDecoder({ output: (frame) => { try { alphaHandler(frame); } catch (error) { this.onError(error as Error); } }, error: (error) => { error.stack = stack; // Provide a more useful stack trace, the default one sucks this.onError(error); }, }); this.alphaDecoder.configure(this.decoderConfig); } const type = determineVideoPacketType(this.codec, this.decoderConfig, packet.sideData.alpha); // Alpha packets might follow a different key frame rhythm than the main packets. Therefore, before we start // decoding, we must first find a packet that's actually a key frame. Until then, we treat the image as opaque. if (!this.alphaHadKeyframe) { this.alphaHadKeyframe = type === 'key'; } if (this.alphaHadKeyframe) { // Same RASL skipping logic as for color, unlikely to be hit (since who uses HEVC with separate alpha??) but // here for symmetry. if (this.codec === 'hevc' && this.currentAlphaPacketIndex > 0 && !this.alphaRaslSkipped) { if (this.hasHevcRaslPicture(packet.sideData.alpha)) { this.pushNullAlphaFrame(); return; } this.alphaRaslSkipped = true; } this.currentAlphaPacketIndex++; this.alphaDecoder.decode(packet.alphaToEncodedVideoChunk(type ?? packet.type)); this.alphaDecoderQueueSize++; } else { this.pushNullAlphaFrame(); } } pushNullAlphaFrame() { if (this.alphaDecoderQueueSize === 0) { // Easy this.alphaQueue.push(null); } else { // There are still alpha chunks being decoded, so pushing `null` immediately would result in out-of-order // data and be incorrect. Instead, we need to enqueue a "null frame" for when the current decoder workload // has finished. this.nullAlphaFrameQueue.push(this.decodedAlphaChunkCount + this.alphaDecoderQueueSize); } } /** * If we're using HEVC, we need to make sure to skip any RASL slices that follow a non-IDR key frame such as * CRA_NUT. This is because RASL slices cannot be decoded without data before the CRA_NUT. Browsers behave * differently here: Chromium drops the packets, Safari throws a decoder error. Either way, it's not good * and causes bugs upstream. So, let's take the dropping into our own hands. */ hasHevcRaslPicture(packetData: Uint8Array) { for (const loc of iterateHevcNalUnits(packetData, this.decoderConfig)) { const type = extractNalUnitTypeForHevc(packetData[loc.offset]!); if (type === HevcNalUnitType.RASL_N || type === HevcNalUnitType.RASL_R) { return true; } } return false; } /** Handler for the WebCodecs VideoDecoder for ironing out browser differences. */ sampleHandler(sample: VideoSample) { if (isWebKit()) { // For correct B-frame handling, we don't just hand over the frames directly but instead add them to // a queue, because we want to ensure frames are emitted in presentation order. We flush the queue // each time we receive a frame with a timestamp larger than the highest we've seen so far, as we // can sure that is not a B-frame. Typically, WebCodecs automatically guarantees that frames are // emitted in presentation order, but Safari doesn't always follow this rule. if (this.sampleQueue.length > 0 && (sample.timestamp >= last(this.sampleQueue)!.timestamp)) { for (const sample of this.sampleQueue) { this.finalizeAndEmitSample(sample); } this.sampleQueue.length = 0; } insertSorted(this.sampleQueue, sample, x => x.timestamp); } else { // Assign it the next earliest timestamp from the input. We do this because browsers, by spec, are // required to emit decoded frames in presentation order *while* retaining the timestamp of their // originating EncodedVideoChunk. For files with B-frames but no out-of-order timestamps (like a // missing ctts box, for example), this causes a mismatch. We therefore fix the timestamps and // ensure they are sorted by doing this. const timestamp = this.inputTimestamps.shift(); // There's no way we'd have more decoded frames than encoded packets we passed in. Actually, the // correspondence should be 1:1. assert(timestamp !== undefined); sample.setTimestamp(timestamp); this.finalizeAndEmitSample(sample); } } finalizeAndEmitSample(sample: VideoSample) { // Round the timestamps to the time resolution sample.setTimestamp(Math.round(sample.timestamp * this.timeResolution) / this.timeResolution); sample.setDuration(Math.round(sample.duration * this.timeResolution) / this.timeResolution); sample.setRotation(this.rotation); this.onSample(sample); } async mergeAlpha(color: VideoFrame, alpha: VideoFrame | null) { if (!alpha) { // Nothing needs to be merged const finalSample = new VideoSample(color); this.sampleHandler(finalSample); return; } assert(this.merger); // The merger takes ownership of the frames, so no need to close them ourselves const finalFrame = await this.merger.update(color, alpha); const finalSample = new VideoSample(finalFrame); this.sampleHandler(finalSample); } async flush() { if (this.customDecoder) { await this.customDecoderCallSerializer.call(() => this.customDecoder!.flush()); } else { assert(this.decoder); await Promise.all([ this.decoder.flush(), this.alphaDecoder?.flush(), ]); await this.frameHandlerSerializer.currentPromise; this.colorQueue.forEach(x => x.close()); this.colorQueue.length = 0; this.alphaQueue.forEach(x => x?.close()); this.alphaQueue.length = 0; this.alphaHadKeyframe = false; this.decodedAlphaChunkCount = 0; this.alphaDecoderQueueSize = 0; this.nullAlphaFrameQueue.length = 0; this.currentAlphaPacketIndex = 0; this.alphaRaslSkipped = false; } if (isWebKit()) { for (const sample of this.sampleQueue) { this.finalizeAndEmitSample(sample); } this.sampleQueue.length = 0; } this.currentPacketIndex = 0; this.raslSkipped = false; } close() { if (this.customDecoder) { void this.customDecoderCallSerializer.call(() => this.customDecoder!.close()); } else { assert(this.decoder); this.decoder.close(); this.alphaDecoder?.close(); this.colorQueue.forEach(x => x.close()); this.colorQueue.length = 0; this.alphaQueue.forEach(x => x?.close()); this.alphaQueue.length = 0; this.merger?.close(); } for (const sample of this.sampleQueue) { sample.close(); } this.sampleQueue.length = 0; } } let mergerGpuUnavailable = false; /** Utility class that merges together color and alpha information using simple WebGL 2 shaders. */ export class ColorAlphaMerger { static forceCpu = true; canvas: OffscreenCanvas | HTMLCanvasElement | null = null; private gl: WebGL2RenderingContext | null = null; private program: WebGLProgram | null = null; private vao: WebGLVertexArrayObject | null = null; private colorTexture: WebGLTexture | null = null; private alphaTexture: WebGLTexture | null = null; private worker: Worker | null = null; private pendingRequests = new Map<number, ReturnType<typeof promiseWithResolvers<VideoFrame>>>(); private nextRequestId = 0; constructor() { const canMakeCanvas = typeof OffscreenCanvas !== 'undefined' // eslint-disable-next-line @typescript-eslint/no-deprecated || (typeof document !== 'undefined' && typeof document.createElement === 'function'); if (!ColorAlphaMerger.forceCpu && canMakeCanvas && !mergerGpuUnavailable) { // Try the GPU path. If anything goes wrong, we silently fall back to the CPU path. try { // Canvas will be resized later if (typeof OffscreenCanvas !== 'undefined') { // Prefer OffscreenCanvas for Worker environments this.canvas = new OffscreenCanvas(300, 150); } else { this.canvas = document.createElement('canvas'); } const gl = this.canvas.getContext('webgl2', { premultipliedAlpha: false, }) as unknown as WebGL2RenderingContext | null; // Casting because of some TypeScript weirdness if (!gl) { throw new Error('Couldn\'t acquire WebGL 2 context.'); } this.gl = gl; this.program = this.createProgram(); this.vao = this.createVAO(); this.colorTexture = this.createTexture(); this.alphaTexture = this.createTexture(); this.gl.useProgram(this.program); this.gl.uniform1i(this.gl.getUniformLocation(this.program, 'u_colorTexture'), 0); this.gl.uniform1i(this.gl.getUniformLocation(this.program, 'u_alphaTexture'), 1); } catch (error) { this.gl = null; this.canvas = null; mergerGpuUnavailable = true; console.warn('Falling back to CPU for color/alpha merging.', error); } } } async update(color: VideoFrame, alpha: VideoFrame): Promise<VideoFrame> { if (this.gl) { return this.updateGpu(color, alpha); } else { return this.updateCpu(color, alpha); } } private createProgram(): WebGLProgram { assert(this.gl); const vertexShader = this.createShader(this.gl.VERTEX_SHADER, `#version 300 es in vec2 a_position; in vec2 a_texCoord; out vec2 v_texCoord; void main() { gl_Position = vec4(a_position, 0.0, 1.0); v_texCoord = a_texCoord; } `); const fragmentShader = this.createShader(this.gl.FRAGMENT_SHADER, `#version 300 es precision highp float; uniform sampler2D u_colorTexture; uniform sampler2D u_alphaTexture; in vec2 v_texCoord; out vec4 fragColor; void main() { vec3 color = texture(u_colorTexture, v_texCoord).rgb; float alpha = texture(u_alphaTexture, v_texCoord).r; fragColor = vec4(color, alpha); } `); const program = this.gl.createProgram(); this.gl.attachShader(program, vertexShader); this.gl.attachShader(program, fragmentShader); this.gl.linkProgram(program); return program; } private createShader(type: number, source: string): WebGLShader { assert(this.gl); const shader = this.gl.createShader(type)!; this.gl.shaderSource(shader, source); this.gl.compileShader(shader); return shader; } private createVAO(): WebGLVertexArrayObject { assert(this.gl); assert(this.program); const vao = this.gl.createVertexArray(); this.gl.bindVertexArray(vao); const vertices = new Float32Array([ -1, -1, 0, 1, 1, -1, 1, 1, -1, 1, 0, 0, 1, 1, 1, 0, ]); const buffer = this.gl.createBuffer(); this.gl.bindBuffer(this.gl.ARRAY_BUFFER, buffer); this.gl.bufferData(this.gl.ARRAY_BUFFER, vertices, this.gl.STATIC_DRAW); const positionLocation = this.gl.getAttribLocation(this.program, 'a_position'); const texCoordLocation = this.gl.getAttribLocation(this.program, 'a_texCoord'); this.gl.enableVertexAttribArray(positionLocation); this.gl.vertexAttribPointer(positionLocation, 2, this.gl.FLOAT, false, 16, 0); this.gl.enableVertexAttribArray(texCoordLocation); this.gl.vertexAttribPointer(texCoordLocation, 2, this.gl.FLOAT, false, 16, 8); return vao; } private createTexture(): WebGLTexture { assert(this.gl); const texture = this.gl.createTexture(); this.gl.bindTexture(this.gl.TEXTURE_2D, texture); this.gl.texParameteri(this.gl.TEXTURE_2D, this.gl.TEXTURE_WRAP_S, this.gl.CLAMP_TO_EDGE); this.gl.texParameteri(this.gl.TEXTURE_2D, this.gl.TEXTURE_WRAP_T, this.gl.CLAMP_TO_EDGE); this.gl.texParameteri(this.gl.TEXTURE_2D, this.gl.TEXTURE_MIN_FILTER, this.gl.LINEAR); this.gl.texParameteri(this.gl.TEXTURE_2D, this.gl.TEXTURE_MAG_FILTER, this.gl.LINEAR); return texture; } private updateGpu(color: VideoFrame, alpha: VideoFrame): VideoFrame { assert(this.gl); assert(this.canvas); if (color.displayWidth !== this.canvas.width || color.displayHeight !== this.canvas.height) { this.canvas.width = color.displayWidth; this.canvas.height = color.displayHeight; } this.gl.activeTexture(this.gl.TEXTURE0); this.gl.bindTexture(this.gl.TEXTURE_2D, this.colorTexture); this.gl.texImage2D(this.gl.TEXTURE_2D, 0, this.gl.RGBA, this.gl.RGBA, this.gl.UNSIGNED_BYTE, color); this.gl.activeTexture(this.gl.TEXTURE1); this.gl.bindTexture(this.gl.TEXTURE_2D, this.alphaTexture); this.gl.texImage2D(this.gl.TEXTURE_2D, 0, this.gl.RGBA, this.gl.RGBA, this.gl.UNSIGNED_BYTE, alpha); this.gl.viewport(0, 0, this.canvas.width, this.canvas.height); this.gl.clear(this.gl.COLOR_BUFFER_BIT); this.gl.bindVertexArray(this.vao); this.gl.drawArrays(this.gl.TRIANGLE_STRIP, 0, 4); const finalFrame = new VideoFrame(this.canvas, { timestamp: color.timestamp, duration: color.duration ?? undefined, }); color.close(); alpha.close(); return finalFrame; } private updateCpu(color: VideoFrame, alpha: VideoFrame): Promise<VideoFrame> { if (!this.worker) { const blob = new Blob( [`(${colorAlphaMergerWorkerCode.toString()})()`], { type: 'application/javascript' }, ); const url = URL.createObjectURL(blob); this.worker = new Worker(url); URL.revokeObjectURL(url); this.worker.addEventListener('message', (event: MessageEvent<ColorAlphaMergerWorkerResponse>) => { const data = event.data; const pending = this.pendingRequests.get(data.id); if (!pending) { return; } this.pendingRequests.delete(data.id); if ('error' in data) { pending.reject(new Error(data.error)); } else { pending.resolve(data.frame); } }); this.worker.addEventListener('error', (event) => { const error = new Error(event.message || 'Color/alpha merge worker error.'); for (const pending of this.pendingRequests.values()) { pending.reject(error); } this.pendingRequests.clear(); }); } const id = this.nextRequestId++; const pending = promiseWithResolvers<VideoFrame>(); this.pendingRequests.set(id, pending); this.worker.postMessage({ id, color, alpha }, { transfer: [color, alpha] }); return pending.promise; } close() { this.gl?.getExtension('WEBGL_lose_context')?.loseContext(); this.gl = null; this.canvas = null; this.worker?.terminate(); this.worker = null; const error = new Error('Color/alpha merger closed.'); for (const pending of this.pendingRequests.values()) { pending.reject(error); } this.pendingRequests.clear(); } } type ColorAlphaMergerWorkerRequest = { id: number; color: VideoFrame; alpha: VideoFrame; }; type ColorAlphaMergerWorkerResponse = | { id: number; frame: VideoFrame } | { id: number; error: string }; const colorAlphaMergerWorkerCode = () => { // These buffers are reused across frames as long as the size matches, since consecutive frames usually share // dimensions let cpuAlphaBuffer: Uint8Array | null = null; let cpuColorBuffer: Uint8Array | null = null; // Serialize execution internally so concurrent requests don't race on the shared cpu*Buffer state. let chain: Promise<void> = Promise.resolve(); self.addEventListener('message', (event: MessageEvent<ColorAlphaMergerWorkerRequest>) => { const { id, color, alpha } = event.data; chain = chain.then(async () => { try { const frame = await merge(color, alpha); self.postMessage({ id, frame }, { transfer: [frame] }); } catch (error) { self.postMessage({ id, error: (error as Error).message }); } finally { // We took ownership of the inputs via transfer; close them now that the merge (or its error) is done. color.close(); alpha.close(); } }); }); const merge = async (color: VideoFrame, alpha: VideoFrame): Promise<VideoFrame> => { const format = color.format as VideoSamplePixelFormat | null; const alphaFormat = alpha.format as VideoSamplePixelFormat | null; if (!format || !alphaFormat) { throw new Error('CPU color/alpha merging requires a known VideoFrame format.'); } // The alpha frame must have the same bit depth as the color frame const colorIs10 = format.includes('P10'); const colorIs12 = format.includes('P12'); const alphaIs10 = alphaFormat.includes('P10'); const alphaIs12 = alphaFormat.includes('P12'); if (alphaIs10 !== colorIs10 || alphaIs12 !== colorIs12) { throw new Error( `CPU color/alpha merging requires the alpha frame to have the same bit depth as the color frame` + ` (color: '${format}', alpha: '${alphaFormat}').`, ); } const width = color.codedWidth; const height = color.codedHeight; if (format === 'RGBX' || format === 'RGBA' || format === 'BGRX' || format === 'BGRA') { return await mergeInterleavedRgba(color, alpha, width, height, format); } else if ( format === 'I420' || format === 'I420P10' || format === 'I420P12' || format === 'I422' || format === 'I422P10' || format === 'I422P12' || format === 'I444' || format === 'I444P10' || format === 'I444P12' )