mediabunny
Version:
Pure TypeScript media toolkit for reading, writing, and converting media files, directly in the browser.
1,576 lines (1,334 loc) • 87.3 kB
text/typescript
/*!
* Copyright (c) 2026-present, Vanilagy and contributors
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/.
*/
import { parsePcmCodec, PCM_AUDIO_CODECS, PcmAudioCodec, VideoCodec, AudioCodec } from './codec';
import {
concatAvcNalUnits,
deserializeAvcDecoderConfigurationRecord,
determineVideoPacketType,
extractNalUnitTypeForAvc,
extractNalUnitTypeForHevc,
HevcNalUnitType,
iterateAvcNalUnits,
iterateHevcNalUnits,
parseAvcSps,
sanitizeHevcPacketForChromium,
} from './codec-data';
import { CustomVideoDecoder, customVideoDecoders, CustomAudioDecoder, customAudioDecoders } from './custom-coder';
import { InputDisposedError } from './input';
import { InputAudioTrack, InputTrack, InputVideoTrack } from './input-track';
import {
AnyIterable,
assert,
assertNever,
CallSerializer,
getInt24,
getUint24,
insertSorted,
isChromium,
isFirefox,
isNumber,
isWebKit,
last,
mapAsyncGenerator,
promiseWithResolvers,
Rotation,
toAsyncIterator,
toDataView,
toUint8Array,
validateAnyIterable,
} from './misc';
import { EncodedPacket } from './packet';
import { fromAlaw, fromUlaw } from './pcm';
import {
AudioSample,
clampCropRectangle,
CropRectangle,
validateCropRectangle,
VideoSample,
VideoSamplePixelFormat,
} from './sample';
/**
* Additional options for controlling packet retrieval.
* @group Media sinks
* @public
*/
export type PacketRetrievalOptions = {
/**
* When set to `true`, only packet metadata (like timestamp) will be retrieved - the actual packet data will not
* be loaded.
*/
metadataOnly?: boolean;
/**
* When set to `true`, key packets will be verified upon retrieval by looking into the packet's bitstream.
* If not enabled, the packet types will be determined solely by what's stored in the containing file and may be
* incorrect, potentially leading to decoder errors. Since determining a packet's actual type requires looking into
* its data, this option cannot be enabled together with `metadataOnly`.
*/
verifyKeyPackets?: boolean;
/**
* When querying packets in live media that are in the future relative to the current live edge, Mediabunny will,
* by default, wait for the stream to advance until the query can be satisfied. In a sense, Mediabunny simply treats
* live streams as media files that are still being written, and any read that depends on future information will
* wait until it can be fulfilled.
*
* If you want to query packets based only on the currently known information, set this field to `true` - this way,
* Mediabunny will never wait for the live stream to catch up.
*
* For non-live media, this field has no effect.
*/
skipLiveWait?: boolean;
};
const validatePacketRetrievalOptions = (options: PacketRetrievalOptions) => {
if (!options || typeof options !== 'object') {
throw new TypeError('options must be an object.');
}
if (options.metadataOnly !== undefined && typeof options.metadataOnly !== 'boolean') {
throw new TypeError('options.metadataOnly, when defined, must be a boolean.');
}
if (options.verifyKeyPackets !== undefined && typeof options.verifyKeyPackets !== 'boolean') {
throw new TypeError('options.verifyKeyPackets, when defined, must be a boolean.');
}
if (options.verifyKeyPackets && options.metadataOnly) {
throw new TypeError('options.verifyKeyPackets and options.metadataOnly cannot be enabled together.');
}
if (options.skipLiveWait !== undefined && typeof options.skipLiveWait !== 'boolean') {
throw new TypeError('options.skipLiveWait, when defined, must be a boolean.');
}
};
const validateTimestamp = (timestamp: number) => {
if (!isNumber(timestamp)) {
throw new TypeError('timestamp must be a number.'); // It can be non-finite, that's fine
}
};
const maybeFixPacketType = (
track: InputTrack,
promise: Promise<EncodedPacket | null>,
options: PacketRetrievalOptions,
) => {
if (options.verifyKeyPackets) {
return promise.then(async (packet) => {
if (!packet || packet.type === 'delta') {
return packet;
}
const determinedType = await track.determinePacketType(packet);
if (determinedType) {
// @ts-expect-error Technically readonly
packet.type = determinedType;
}
return packet;
});
} else {
return promise;
}
};
/**
* Sink for retrieving encoded packets from an input track.
* @group Media sinks
* @public
*/
export class EncodedPacketSink {
/** @internal */
_track: InputTrack;
/** Creates a new {@link EncodedPacketSink} for the given {@link InputTrack}. */
constructor(track: InputTrack) {
if (!(track instanceof InputTrack)) {
throw new TypeError('track must be an InputTrack.');
}
this._track = track;
}
/**
* Retrieves the track's first packet (in decode order), or null if it has no packets. The first packet is very
* likely to be a key packet, but it doesn't have to be.
*/
async getFirstPacket(options: PacketRetrievalOptions = {}) {
validatePacketRetrievalOptions(options);
if (this._track.input._disposed) {
throw new InputDisposedError();
}
return maybeFixPacketType(this._track, this._track._backing.getFirstPacket(options), options);
}
/** Retrieves the track's first key packet (in decode order), or null if it has no key packets. */
async getFirstKeyPacket(options: PacketRetrievalOptions = {}) {
validatePacketRetrievalOptions(options);
const firstPacket = await this.getFirstPacket(options);
if (!firstPacket) {
return null;
}
if (firstPacket.type === 'key') {
// Great
return firstPacket;
}
return this.getNextKeyPacket(firstPacket, options);
}
/**
* Retrieves the packet corresponding to the given timestamp, in seconds. More specifically, returns the last packet
* (in presentation order) with a start timestamp less than or equal to the given timestamp. This method can be
* used to retrieve a track's last packet using `getPacket(Infinity)`. The method returns null if the timestamp
* is before the first packet in the track.
*
* @param timestamp - The timestamp used for retrieval, in seconds.
*/
async getPacket(timestamp: number, options: PacketRetrievalOptions = {}) {
validateTimestamp(timestamp);
validatePacketRetrievalOptions(options);
if (this._track.input._disposed) {
throw new InputDisposedError();
}
return maybeFixPacketType(this._track, this._track._backing.getPacket(timestamp, options), options);
}
/**
* Retrieves the packet following the given packet (in decode order), or null if the given packet is the
* last packet.
*/
async getNextPacket(packet: EncodedPacket, options: PacketRetrievalOptions = {}) {
if (!(packet instanceof EncodedPacket)) {
throw new TypeError('packet must be an EncodedPacket.');
}
validatePacketRetrievalOptions(options);
if (this._track.input._disposed) {
throw new InputDisposedError();
}
return maybeFixPacketType(this._track, this._track._backing.getNextPacket(packet, options), options);
}
/**
* Retrieves the key packet corresponding to the given timestamp, in seconds. More specifically, returns the last
* key packet (in presentation order) with a start timestamp less than or equal to the given timestamp. A key packet
* is a packet that doesn't require previous packets to be decoded. This method can be used to retrieve a track's
* last key packet using `getKeyPacket(Infinity)`. The method returns null if the timestamp is before the first
* key packet in the track.
*
* To ensure that the returned packet is guaranteed to be a real key frame, enable `options.verifyKeyPackets`.
*
* @param timestamp - The timestamp used for retrieval, in seconds.
*/
async getKeyPacket(timestamp: number, options: PacketRetrievalOptions = {}): Promise<EncodedPacket | null> {
validateTimestamp(timestamp);
validatePacketRetrievalOptions(options);
if (this._track.input._disposed) {
throw new InputDisposedError();
}
if (!options.verifyKeyPackets) {
return this._track._backing.getKeyPacket(timestamp, options);
}
const packet = await this._track._backing.getKeyPacket(timestamp, options);
if (!packet) {
return packet;
}
assert(packet.type === 'key');
const determinedType = await this._track.determinePacketType(packet);
if (determinedType === 'delta') {
// Try returning the previous key packet (in hopes that it's actually a key packet)
return this.getKeyPacket(packet.timestamp - 1 / await this._track.getTimeResolution(), options);
}
return packet;
}
/**
* Retrieves the key packet following the given packet (in decode order), or null if the given packet is the last
* key packet.
*
* To ensure that the returned packet is guaranteed to be a real key frame, enable `options.verifyKeyPackets`.
*/
async getNextKeyPacket(packet: EncodedPacket, options: PacketRetrievalOptions = {}): Promise<EncodedPacket | null> {
if (!(packet instanceof EncodedPacket)) {
throw new TypeError('packet must be an EncodedPacket.');
}
validatePacketRetrievalOptions(options);
if (this._track.input._disposed) {
throw new InputDisposedError();
}
if (!options.verifyKeyPackets) {
return this._track._backing.getNextKeyPacket(packet, options);
}
const nextPacket = await this._track._backing.getNextKeyPacket(packet, options);
if (!nextPacket) {
return nextPacket;
}
assert(nextPacket.type === 'key');
const determinedType = await this._track.determinePacketType(nextPacket);
if (determinedType === 'delta') {
// Try returning the next key packet (in hopes that it's actually a key packet)
return this.getNextKeyPacket(nextPacket, options);
}
return nextPacket;
}
/**
* Creates an async iterator that yields the packets in this track in decode order. To enable fast iteration, this
* method will intelligently preload packets based on the speed of the consumer.
*
* @param startPacket - (optional) The packet from which iteration should begin. This packet will also be yielded.
* @param endPacket - (optional) The packet at which iteration should end. This packet will _not_ be yielded.
*/
packets(
startPacket?: EncodedPacket,
endPacket?: EncodedPacket,
options: PacketRetrievalOptions = {},
): AsyncGenerator<EncodedPacket, void, unknown> {
if (startPacket !== undefined && !(startPacket instanceof EncodedPacket)) {
throw new TypeError('startPacket must be an EncodedPacket.');
}
if (startPacket !== undefined && startPacket.isMetadataOnly && !options?.metadataOnly) {
throw new TypeError('startPacket can only be metadata-only if options.metadataOnly is enabled.');
}
if (endPacket !== undefined && !(endPacket instanceof EncodedPacket)) {
throw new TypeError('endPacket must be an EncodedPacket.');
}
validatePacketRetrievalOptions(options);
if (this._track.input._disposed) {
throw new InputDisposedError();
}
const packetQueue: EncodedPacket[] = [];
let { promise: queueNotEmpty, resolve: onQueueNotEmpty } = promiseWithResolvers();
let { promise: queueDequeue, resolve: onQueueDequeue } = promiseWithResolvers();
let ended = false;
let terminated = false;
// This stores errors that are "out of band" in the sense that they didn't occur in the normal flow of this
// method but instead in a different context. This error should not go unnoticed and must be bubbled up to
// the consumer.
let outOfBandError = null as Error | null;
const timestamps: number[] = [];
// The queue should always be big enough to hold 1 second worth of packets
const maxQueueSize = () => Math.max(2, timestamps.length);
// The following is the "pump" process that keeps pumping packets into the queue
(async () => {
let packet = startPacket ?? await this.getFirstPacket(options);
while (packet && !terminated && !this._track.input._disposed) {
if (endPacket && packet.sequenceNumber >= endPacket?.sequenceNumber) {
break;
}
if (packetQueue.length > maxQueueSize()) {
({ promise: queueDequeue, resolve: onQueueDequeue } = promiseWithResolvers());
await queueDequeue;
continue;
}
packetQueue.push(packet);
onQueueNotEmpty();
({ promise: queueNotEmpty, resolve: onQueueNotEmpty } = promiseWithResolvers());
packet = await this.getNextPacket(packet, options);
}
ended = true;
onQueueNotEmpty();
})().catch((error: Error) => {
if (!outOfBandError) {
outOfBandError = error;
onQueueNotEmpty();
}
});
const track = this._track;
return {
async next() {
while (true) {
if (track.input._disposed) {
throw new InputDisposedError();
} else if (terminated) {
return { value: undefined, done: true };
} else if (outOfBandError) {
throw outOfBandError;
} else if (packetQueue.length > 0) {
const value = packetQueue.shift()!;
const now = performance.now();
timestamps.push(now);
while (timestamps.length > 0 && now - timestamps[0]! >= 1000) {
timestamps.shift();
}
onQueueDequeue();
return { value, done: false };
} else if (ended) {
return { value: undefined, done: true };
} else {
await queueNotEmpty;
}
}
},
async return() {
terminated = true;
onQueueDequeue();
onQueueNotEmpty();
return { value: undefined, done: true };
},
async throw(error) {
throw error;
},
[Symbol.asyncIterator]() {
return this;
},
};
}
}
abstract class DecoderWrapper<
MediaSample extends VideoSample | AudioSample,
> {
constructor(
public onSample: (sample: MediaSample) => unknown,
public onError: (error: Error) => unknown,
) {}
abstract getDecodeQueueSize(): number;
abstract decode(packet: EncodedPacket): void;
abstract flush(): Promise<void>;
abstract close(): void;
}
/**
* Base class for decoded media sample sinks.
* @group Media sinks
* @public
*/
export abstract class BaseMediaSampleSink<
MediaSample extends VideoSample | AudioSample,
> {
/** @internal */
abstract _track: InputTrack;
/** @internal */
abstract _createDecoder(
onSample: (sample: MediaSample) => unknown,
onError: (error: Error) => unknown
): Promise<DecoderWrapper<MediaSample>>;
/** @internal */
abstract _createPacketSink(): EncodedPacketSink;
/** @internal */
protected mediaSamplesInRange(
startTimestamp = -Infinity,
endTimestamp = Infinity,
options: PacketRetrievalOptions,
): AsyncGenerator<MediaSample, void, unknown> {
validateTimestamp(startTimestamp);
validateTimestamp(endTimestamp);
const sampleQueue: MediaSample[] = [];
let firstSampleQueued = false;
let lastSample: MediaSample | null = null;
let { promise: queueNotEmpty, resolve: onQueueNotEmpty } = promiseWithResolvers();
let { promise: queueDequeue, resolve: onQueueDequeue } = promiseWithResolvers();
let decoderIsFlushed = false;
let ended = false;
let terminated = false;
// This stores errors that are "out of band" in the sense that they didn't occur in the normal flow of this
// method but instead in a different context. This error should not go unnoticed and must be bubbled up to
// the consumer.
let outOfBandError = null as Error | null;
const packetRetrievalOptions: PacketRetrievalOptions = {
...options,
verifyKeyPackets: true,
metadataOnly: false,
};
// The following is the "pump" process that keeps pumping packets into the decoder
(async () => {
const decoder = await this._createDecoder((sample) => {
onQueueDequeue();
if (sample.timestamp >= endTimestamp) {
ended = true;
}
if (ended) {
sample.close();
return;
}
if (lastSample) {
if (sample.timestamp > startTimestamp) {
// We don't know ahead of time what the first first is. This is because the first first is the
// last first whose timestamp is less than or equal to the start timestamp. Therefore we need to
// wait for the first first after the start timestamp, and then we'll know that the previous
// first was the first first.
sampleQueue.push(lastSample);
firstSampleQueued = true;
} else {
lastSample.close();
}
}
if (sample.timestamp >= startTimestamp) {
sampleQueue.push(sample);
firstSampleQueued = true;
}
lastSample = firstSampleQueued ? null : sample;
if (sampleQueue.length > 0) {
onQueueNotEmpty();
({ promise: queueNotEmpty, resolve: onQueueNotEmpty } = promiseWithResolvers());
}
}, (error) => {
if (!outOfBandError) {
outOfBandError = error;
onQueueNotEmpty();
}
});
const packetSink = this._createPacketSink();
const keyPacket = await packetSink.getKeyPacket(startTimestamp, packetRetrievalOptions)
?? await packetSink.getFirstKeyPacket(packetRetrievalOptions);
let currentPacket: EncodedPacket | null = keyPacket;
// B-frames make it exceedingly difficult to properly define an upper bound for packet iteration if an end
// timestamp is set, so we just don't do it. The case that makes it especially tricky is when the frames
// following a key frame have a lower timestamp than the keyframe; something that quite frequently happens
// in HEVC streams. The price to pay for not upper-bounding the packet iterator is a slight increase in
// decoder work at the end of the range, but the added correctness and reliability makes this tradeoff worth
// it.
const endPacket = undefined;
const packets = packetSink.packets(keyPacket ?? undefined, endPacket, packetRetrievalOptions);
await packets.next(); // Skip the start packet as we already have it
while (currentPacket && !ended && !this._track.input._disposed) {
const maxQueueSize = computeMaxQueueSize(sampleQueue.length);
if (sampleQueue.length + decoder.getDecodeQueueSize() > maxQueueSize) {
({ promise: queueDequeue, resolve: onQueueDequeue } = promiseWithResolvers());
await queueDequeue;
continue;
}
decoder.decode(currentPacket);
const packetResult = await packets.next();
if (packetResult.done) {
break;
}
currentPacket = packetResult.value;
}
await packets.return();
if (!terminated && !this._track.input._disposed) {
await decoder.flush();
}
decoder.close();
if (!firstSampleQueued && lastSample) {
sampleQueue.push(lastSample);
}
decoderIsFlushed = true;
onQueueNotEmpty(); // To unstuck the generator
})().catch((error: Error) => {
if (!outOfBandError) {
outOfBandError = error;
onQueueNotEmpty();
}
});
const track = this._track;
const closeSamples = () => {
lastSample?.close();
for (const sample of sampleQueue) {
sample.close();
}
};
return {
async next() {
while (true) {
if (track.input._disposed) {
closeSamples();
throw new InputDisposedError();
} else if (terminated) {
return { value: undefined, done: true };
} else if (outOfBandError) {
closeSamples();
throw outOfBandError;
} else if (sampleQueue.length > 0) {
const value = sampleQueue.shift()!;
onQueueDequeue();
return { value, done: false };
} else if (!decoderIsFlushed) {
await queueNotEmpty;
} else {
return { value: undefined, done: true };
}
}
},
async return() {
terminated = true;
ended = true;
onQueueDequeue();
onQueueNotEmpty();
closeSamples();
return { value: undefined, done: true };
},
async throw(error) {
throw error;
},
[Symbol.asyncIterator]() {
return this;
},
};
}
/** @internal */
protected mediaSamplesAtTimestamps(
timestamps: AnyIterable<number>,
options: PacketRetrievalOptions,
): AsyncGenerator<MediaSample | null, void, unknown> {
validateAnyIterable(timestamps);
const timestampIterator = toAsyncIterator(timestamps);
const timestampsOfInterest: number[] = [];
const sampleQueue: (MediaSample | null)[] = [];
let { promise: queueNotEmpty, resolve: onQueueNotEmpty } = promiseWithResolvers();
let { promise: queueDequeue, resolve: onQueueDequeue } = promiseWithResolvers();
let decoderIsFlushed = false;
let terminated = false;
// This stores errors that are "out of band" in the sense that they didn't occur in the normal flow of this
// method but instead in a different context. This error should not go unnoticed and must be bubbled up to
// the consumer.
let outOfBandError = null as Error | null;
const pushToQueue = (sample: MediaSample | null) => {
sampleQueue.push(sample);
onQueueNotEmpty();
({ promise: queueNotEmpty, resolve: onQueueNotEmpty } = promiseWithResolvers());
};
const retrievalOptions: PacketRetrievalOptions = {
...options,
verifyKeyPackets: true,
metadataOnly: false,
};
// The following is the "pump" process that keeps pumping packets into the decoder
(async () => {
const decoder = await this._createDecoder((sample) => {
onQueueDequeue();
if (terminated) {
sample.close();
return;
}
let sampleUses = 0;
while (
timestampsOfInterest.length > 0
&& sample.timestamp - timestampsOfInterest[0]! > -1e-10 // Give it a little epsilon
) {
sampleUses++;
timestampsOfInterest.shift();
}
if (sampleUses > 0) {
for (let i = 0; i < sampleUses; i++) {
// Clone the sample if we need to emit it multiple times
pushToQueue((i < sampleUses - 1 ? sample.clone() : sample) as MediaSample);
}
} else {
sample.close();
}
}, (error) => {
if (!outOfBandError) {
outOfBandError = error;
onQueueNotEmpty();
}
});
const packetSink = this._createPacketSink();
let lastPacket: EncodedPacket | null = null;
let lastKeyPacket: EncodedPacket | null = null;
// The end sequence number (inclusive) in the next batch of packets that will be decoded. The batch starts
// at the last key frame and goes until this sequence number.
let maxSequenceNumber = -1;
const decodePackets = async () => {
assert(lastKeyPacket);
// Start at the current key packet
let currentPacket = lastKeyPacket;
decoder.decode(currentPacket);
while (currentPacket.sequenceNumber < maxSequenceNumber) {
const maxQueueSize = computeMaxQueueSize(sampleQueue.length);
while (sampleQueue.length + decoder.getDecodeQueueSize() > maxQueueSize && !terminated) {
({ promise: queueDequeue, resolve: onQueueDequeue } = promiseWithResolvers());
await queueDequeue;
}
if (terminated) {
break;
}
const nextPacket = await packetSink.getNextPacket(currentPacket, retrievalOptions);
assert(nextPacket);
decoder.decode(nextPacket);
currentPacket = nextPacket;
}
maxSequenceNumber = -1;
};
const flushDecoder = async () => {
await decoder.flush();
// We don't expect this list to have any elements in it anymore, but in case it does, let's emit
// nulls for every remaining element, then clear it.
for (let i = 0; i < timestampsOfInterest.length; i++) {
pushToQueue(null);
}
timestampsOfInterest.length = 0;
};
for await (const timestamp of timestampIterator) {
validateTimestamp(timestamp);
if (terminated || this._track.input._disposed) {
break;
}
const targetPacket = await packetSink.getPacket(timestamp, retrievalOptions);
const keyPacket = targetPacket && await packetSink.getKeyPacket(timestamp, retrievalOptions);
if (!keyPacket) {
if (maxSequenceNumber !== -1) {
await decodePackets();
await flushDecoder();
}
pushToQueue(null);
lastPacket = null;
continue;
}
// Check if the key packet has changed or if we're going back in time
if (
lastPacket
&& (
keyPacket.sequenceNumber !== lastKeyPacket!.sequenceNumber
|| targetPacket.timestamp < lastPacket.timestamp
)
) {
await decodePackets();
await flushDecoder(); // Always flush here, improves decoder compatibility
}
timestampsOfInterest.push(targetPacket.timestamp);
maxSequenceNumber = Math.max(targetPacket.sequenceNumber, maxSequenceNumber);
lastPacket = targetPacket;
lastKeyPacket = keyPacket;
}
if (!terminated && !this._track.input._disposed) {
if (maxSequenceNumber !== -1) {
// We still need to decode packets
await decodePackets();
}
await flushDecoder();
}
decoder.close();
decoderIsFlushed = true;
onQueueNotEmpty(); // To unstuck the generator
})().catch((error: Error) => {
if (!outOfBandError) {
outOfBandError = error;
onQueueNotEmpty();
}
});
const track = this._track;
const closeSamples = () => {
for (const sample of sampleQueue) {
sample?.close();
}
};
return {
async next() {
while (true) {
if (track.input._disposed) {
closeSamples();
throw new InputDisposedError();
} else if (terminated) {
return { value: undefined, done: true };
} else if (outOfBandError) {
closeSamples();
throw outOfBandError;
} else if (sampleQueue.length > 0) {
const value = sampleQueue.shift();
assert(value !== undefined);
onQueueDequeue();
return { value, done: false };
} else if (!decoderIsFlushed) {
await queueNotEmpty;
} else {
return { value: undefined, done: true };
}
}
},
async return() {
terminated = true;
onQueueDequeue();
onQueueNotEmpty();
closeSamples();
return { value: undefined, done: true };
},
async throw(error) {
throw error;
},
[Symbol.asyncIterator]() {
return this;
},
};
}
}
const computeMaxQueueSize = (decodedSampleQueueSize: number) => {
// If we have decoded samples lying around, limit the total queue size to a small value (decoded samples can use up
// a lot of memory). If not, we're fine with a much bigger queue of encoded packets waiting to be decoded. In fact,
// some decoders only start flushing out decoded chunks when the packet queue is large enough.
return decodedSampleQueueSize === 0 ? 40 : 8;
};
class VideoDecoderWrapper extends DecoderWrapper<VideoSample> {
decoder: VideoDecoder | null = null;
customDecoder: CustomVideoDecoder | null = null;
customDecoderCallSerializer = new CallSerializer();
customDecoderQueueSize = 0;
inputTimestamps: number[] = []; // Timestamps input into the decoder, sorted.
sampleQueue: VideoSample[] = []; // Safari-specific thing, check usage.
currentPacketIndex = 0;
raslSkipped = false; // For HEVC stuff
// Alpha stuff
alphaDecoder: VideoDecoder | null = null;
alphaHadKeyframe = false;
colorQueue: VideoFrame[] = [];
alphaQueue: (VideoFrame | null)[] = [];
merger: ColorAlphaMerger | null = null;
decodedAlphaChunkCount = 0;
alphaDecoderQueueSize = 0;
/** Each value is the number of decoded alpha chunks at which a null alpha frame should be added. */
nullAlphaFrameQueue: number[] = [];
currentAlphaPacketIndex = 0;
alphaRaslSkipped = false; // For HEVC stuff
frameHandlerSerializer = new CallSerializer();
constructor(
onSample: (sample: VideoSample) => unknown,
onError: (error: Error) => unknown,
public codec: VideoCodec,
public decoderConfig: VideoDecoderConfig,
public rotation: Rotation,
public timeResolution: number,
) {
super(onSample, onError);
const MatchingCustomDecoder = customVideoDecoders.find(x => x.supports(codec, decoderConfig));
if (MatchingCustomDecoder) {
// @ts-expect-error "Can't create instance of abstract class 🤓"
this.customDecoder = new MatchingCustomDecoder() as CustomVideoDecoder;
// @ts-expect-error It's technically readonly
this.customDecoder.codec = codec;
// @ts-expect-error It's technically readonly
this.customDecoder.config = decoderConfig;
// @ts-expect-error It's technically readonly
this.customDecoder.onSample = (sample) => {
if (!(sample instanceof VideoSample)) {
throw new TypeError('The argument passed to onSample must be a VideoSample.');
}
this.finalizeAndEmitSample(sample);
};
void this.customDecoderCallSerializer.call(() => this.customDecoder!.init());
} else {
const colorHandler = (frame: VideoFrame) => {
this.frameHandlerSerializer.call(async () => {
if (this.alphaQueue.length > 0) {
// Even when no alpha data is present (most of the time), there will be nulls in this queue
const alphaFrame = this.alphaQueue.shift();
assert(alphaFrame !== undefined);
await this.mergeAlpha(frame, alphaFrame);
} else {
this.colorQueue.push(frame);
}
}).catch((error: Error) => this.onError(error));
};
if (codec === 'avc' && this.decoderConfig.description && isChromium()) {
// Chromium has/had a bug with playing interlaced AVC (https://issues.chromium.org/issues/456919096)
// which can be worked around by requesting that software decoding be used. So, here we peek into the
// AVC description, if present, and switch to software decoding if we find interlaced content.
const record = deserializeAvcDecoderConfigurationRecord(toUint8Array(this.decoderConfig.description));
if (record && record.sequenceParameterSets.length > 0) {
const sps = parseAvcSps(record.sequenceParameterSets[0]!);
if (sps && sps.frameMbsOnlyFlag === 0) {
this.decoderConfig = {
...this.decoderConfig,
hardwareAcceleration: 'prefer-software',
};
}
}
}
const stack = new Error('Decoding error').stack;
this.decoder = new VideoDecoder({
output: (frame) => {
try {
colorHandler(frame);
} catch (error) {
this.onError(error as Error);
}
},
error: (error) => {
error.stack = stack; // Provide a more useful stack trace, the default one sucks
this.onError(error);
},
});
this.decoder.configure(this.decoderConfig);
}
}
getDecodeQueueSize() {
if (this.customDecoder) {
return this.customDecoderQueueSize;
} else {
assert(this.decoder);
return Math.max(
this.decoder.decodeQueueSize,
this.alphaDecoder?.decodeQueueSize ?? 0,
);
}
}
decode(packet: EncodedPacket) {
if (this.codec === 'hevc' && this.currentPacketIndex > 0 && !this.raslSkipped) {
if (this.hasHevcRaslPicture(packet.data)) {
return; // Drop
}
this.raslSkipped = true;
}
if (this.customDecoder) {
this.customDecoderQueueSize++;
void this.customDecoderCallSerializer
.call(() => this.customDecoder!.decode(packet))
.then(() => this.customDecoderQueueSize--);
} else {
assert(this.decoder);
if (!isWebKit()) {
insertSorted(this.inputTimestamps, packet.timestamp, x => x);
}
if (isChromium() && this.currentPacketIndex === 0) {
if (this.codec === 'avc') {
// Workaround for https://issues.chromium.org/issues/470109459
const filteredNalUnits: Uint8Array[] = [];
for (const loc of iterateAvcNalUnits(packet.data, this.decoderConfig)) {
const type = extractNalUnitTypeForAvc(packet.data[loc.offset]!);
// These trip up Chromium's key frame detection, so let's strip them
if (!(type >= 20 && type <= 31)) {
filteredNalUnits.push(packet.data.subarray(loc.offset, loc.offset + loc.length));
}
}
const newData = concatAvcNalUnits(filteredNalUnits, this.decoderConfig);
packet = new EncodedPacket(newData, packet.type, packet.timestamp, packet.duration);
} else if (this.codec === 'hevc') {
// Workaround for https://issues.chromium.org/issues/507611247
const sanitizedData = sanitizeHevcPacketForChromium(packet.data, this.decoderConfig);
if (sanitizedData) {
packet = new EncodedPacket(sanitizedData, packet.type, packet.timestamp, packet.duration);
}
}
}
this.decoder.decode(packet.toEncodedVideoChunk());
this.decodeAlphaData(packet);
}
this.currentPacketIndex++;
}
decodeAlphaData(packet: EncodedPacket) {
if (!packet.sideData.alpha) {
// No alpha side data in the packet, most common case
this.pushNullAlphaFrame();
return;
}
if (!this.merger) {
this.merger = new ColorAlphaMerger();
}
// Check if we need to set up the alpha decoder
if (!this.alphaDecoder) {
const alphaHandler = (frame: VideoFrame) => {
this.frameHandlerSerializer.call(async () => {
if (this.colorQueue.length > 0) {
const colorFrame = this.colorQueue.shift();
assert(colorFrame !== undefined);
await this.mergeAlpha(colorFrame, frame);
} else {
this.alphaQueue.push(frame);
}
// Check if any null frames have been queued for this point
this.decodedAlphaChunkCount++;
while (
this.nullAlphaFrameQueue.length > 0
&& this.nullAlphaFrameQueue[0] === this.decodedAlphaChunkCount
) {
this.nullAlphaFrameQueue.shift();
if (this.colorQueue.length > 0) {
const colorFrame = this.colorQueue.shift();
assert(colorFrame !== undefined);
await this.mergeAlpha(colorFrame, null);
} else {
this.alphaQueue.push(null);
}
}
this.alphaDecoderQueueSize--;
}).catch((error: Error) => this.onError(error));
};
const stack = new Error('Decoding error').stack;
this.alphaDecoder = new VideoDecoder({
output: (frame) => {
try {
alphaHandler(frame);
} catch (error) {
this.onError(error as Error);
}
},
error: (error) => {
error.stack = stack; // Provide a more useful stack trace, the default one sucks
this.onError(error);
},
});
this.alphaDecoder.configure(this.decoderConfig);
}
const type = determineVideoPacketType(this.codec, this.decoderConfig, packet.sideData.alpha);
// Alpha packets might follow a different key frame rhythm than the main packets. Therefore, before we start
// decoding, we must first find a packet that's actually a key frame. Until then, we treat the image as opaque.
if (!this.alphaHadKeyframe) {
this.alphaHadKeyframe = type === 'key';
}
if (this.alphaHadKeyframe) {
// Same RASL skipping logic as for color, unlikely to be hit (since who uses HEVC with separate alpha??) but
// here for symmetry.
if (this.codec === 'hevc' && this.currentAlphaPacketIndex > 0 && !this.alphaRaslSkipped) {
if (this.hasHevcRaslPicture(packet.sideData.alpha)) {
this.pushNullAlphaFrame();
return;
}
this.alphaRaslSkipped = true;
}
this.currentAlphaPacketIndex++;
this.alphaDecoder.decode(packet.alphaToEncodedVideoChunk(type ?? packet.type));
this.alphaDecoderQueueSize++;
} else {
this.pushNullAlphaFrame();
}
}
pushNullAlphaFrame() {
if (this.alphaDecoderQueueSize === 0) {
// Easy
this.alphaQueue.push(null);
} else {
// There are still alpha chunks being decoded, so pushing `null` immediately would result in out-of-order
// data and be incorrect. Instead, we need to enqueue a "null frame" for when the current decoder workload
// has finished.
this.nullAlphaFrameQueue.push(this.decodedAlphaChunkCount + this.alphaDecoderQueueSize);
}
}
/**
* If we're using HEVC, we need to make sure to skip any RASL slices that follow a non-IDR key frame such as
* CRA_NUT. This is because RASL slices cannot be decoded without data before the CRA_NUT. Browsers behave
* differently here: Chromium drops the packets, Safari throws a decoder error. Either way, it's not good
* and causes bugs upstream. So, let's take the dropping into our own hands.
*/
hasHevcRaslPicture(packetData: Uint8Array) {
for (const loc of iterateHevcNalUnits(packetData, this.decoderConfig)) {
const type = extractNalUnitTypeForHevc(packetData[loc.offset]!);
if (type === HevcNalUnitType.RASL_N || type === HevcNalUnitType.RASL_R) {
return true;
}
}
return false;
}
/** Handler for the WebCodecs VideoDecoder for ironing out browser differences. */
sampleHandler(sample: VideoSample) {
if (isWebKit()) {
// For correct B-frame handling, we don't just hand over the frames directly but instead add them to
// a queue, because we want to ensure frames are emitted in presentation order. We flush the queue
// each time we receive a frame with a timestamp larger than the highest we've seen so far, as we
// can sure that is not a B-frame. Typically, WebCodecs automatically guarantees that frames are
// emitted in presentation order, but Safari doesn't always follow this rule.
if (this.sampleQueue.length > 0 && (sample.timestamp >= last(this.sampleQueue)!.timestamp)) {
for (const sample of this.sampleQueue) {
this.finalizeAndEmitSample(sample);
}
this.sampleQueue.length = 0;
}
insertSorted(this.sampleQueue, sample, x => x.timestamp);
} else {
// Assign it the next earliest timestamp from the input. We do this because browsers, by spec, are
// required to emit decoded frames in presentation order *while* retaining the timestamp of their
// originating EncodedVideoChunk. For files with B-frames but no out-of-order timestamps (like a
// missing ctts box, for example), this causes a mismatch. We therefore fix the timestamps and
// ensure they are sorted by doing this.
const timestamp = this.inputTimestamps.shift();
// There's no way we'd have more decoded frames than encoded packets we passed in. Actually, the
// correspondence should be 1:1.
assert(timestamp !== undefined);
sample.setTimestamp(timestamp);
this.finalizeAndEmitSample(sample);
}
}
finalizeAndEmitSample(sample: VideoSample) {
// Round the timestamps to the time resolution
sample.setTimestamp(Math.round(sample.timestamp * this.timeResolution) / this.timeResolution);
sample.setDuration(Math.round(sample.duration * this.timeResolution) / this.timeResolution);
sample.setRotation(this.rotation);
this.onSample(sample);
}
async mergeAlpha(color: VideoFrame, alpha: VideoFrame | null) {
if (!alpha) {
// Nothing needs to be merged
const finalSample = new VideoSample(color);
this.sampleHandler(finalSample);
return;
}
assert(this.merger);
// The merger takes ownership of the frames, so no need to close them ourselves
const finalFrame = await this.merger.update(color, alpha);
const finalSample = new VideoSample(finalFrame);
this.sampleHandler(finalSample);
}
async flush() {
if (this.customDecoder) {
await this.customDecoderCallSerializer.call(() => this.customDecoder!.flush());
} else {
assert(this.decoder);
await Promise.all([
this.decoder.flush(),
this.alphaDecoder?.flush(),
]);
await this.frameHandlerSerializer.currentPromise;
this.colorQueue.forEach(x => x.close());
this.colorQueue.length = 0;
this.alphaQueue.forEach(x => x?.close());
this.alphaQueue.length = 0;
this.alphaHadKeyframe = false;
this.decodedAlphaChunkCount = 0;
this.alphaDecoderQueueSize = 0;
this.nullAlphaFrameQueue.length = 0;
this.currentAlphaPacketIndex = 0;
this.alphaRaslSkipped = false;
}
if (isWebKit()) {
for (const sample of this.sampleQueue) {
this.finalizeAndEmitSample(sample);
}
this.sampleQueue.length = 0;
}
this.currentPacketIndex = 0;
this.raslSkipped = false;
}
close() {
if (this.customDecoder) {
void this.customDecoderCallSerializer.call(() => this.customDecoder!.close());
} else {
assert(this.decoder);
this.decoder.close();
this.alphaDecoder?.close();
this.colorQueue.forEach(x => x.close());
this.colorQueue.length = 0;
this.alphaQueue.forEach(x => x?.close());
this.alphaQueue.length = 0;
this.merger?.close();
}
for (const sample of this.sampleQueue) {
sample.close();
}
this.sampleQueue.length = 0;
}
}
let mergerGpuUnavailable = false;
/** Utility class that merges together color and alpha information using simple WebGL 2 shaders. */
export class ColorAlphaMerger {
static forceCpu = true;
canvas: OffscreenCanvas | HTMLCanvasElement | null = null;
private gl: WebGL2RenderingContext | null = null;
private program: WebGLProgram | null = null;
private vao: WebGLVertexArrayObject | null = null;
private colorTexture: WebGLTexture | null = null;
private alphaTexture: WebGLTexture | null = null;
private worker: Worker | null = null;
private pendingRequests = new Map<number, ReturnType<typeof promiseWithResolvers<VideoFrame>>>();
private nextRequestId = 0;
constructor() {
const canMakeCanvas = typeof OffscreenCanvas !== 'undefined'
// eslint-disable-next-line @typescript-eslint/no-deprecated
|| (typeof document !== 'undefined' && typeof document.createElement === 'function');
if (!ColorAlphaMerger.forceCpu && canMakeCanvas && !mergerGpuUnavailable) {
// Try the GPU path. If anything goes wrong, we silently fall back to the CPU path.
try {
// Canvas will be resized later
if (typeof OffscreenCanvas !== 'undefined') {
// Prefer OffscreenCanvas for Worker environments
this.canvas = new OffscreenCanvas(300, 150);
} else {
this.canvas = document.createElement('canvas');
}
const gl = this.canvas.getContext('webgl2', {
premultipliedAlpha: false,
}) as unknown as WebGL2RenderingContext | null; // Casting because of some TypeScript weirdness
if (!gl) {
throw new Error('Couldn\'t acquire WebGL 2 context.');
}
this.gl = gl;
this.program = this.createProgram();
this.vao = this.createVAO();
this.colorTexture = this.createTexture();
this.alphaTexture = this.createTexture();
this.gl.useProgram(this.program);
this.gl.uniform1i(this.gl.getUniformLocation(this.program, 'u_colorTexture'), 0);
this.gl.uniform1i(this.gl.getUniformLocation(this.program, 'u_alphaTexture'), 1);
} catch (error) {
this.gl = null;
this.canvas = null;
mergerGpuUnavailable = true;
console.warn('Falling back to CPU for color/alpha merging.', error);
}
}
}
async update(color: VideoFrame, alpha: VideoFrame): Promise<VideoFrame> {
if (this.gl) {
return this.updateGpu(color, alpha);
} else {
return this.updateCpu(color, alpha);
}
}
private createProgram(): WebGLProgram {
assert(this.gl);
const vertexShader = this.createShader(this.gl.VERTEX_SHADER, `#version 300 es
in vec2 a_position;
in vec2 a_texCoord;
out vec2 v_texCoord;
void main() {
gl_Position = vec4(a_position, 0.0, 1.0);
v_texCoord = a_texCoord;
}
`);
const fragmentShader = this.createShader(this.gl.FRAGMENT_SHADER, `#version 300 es
precision highp float;
uniform sampler2D u_colorTexture;
uniform sampler2D u_alphaTexture;
in vec2 v_texCoord;
out vec4 fragColor;
void main() {
vec3 color = texture(u_colorTexture, v_texCoord).rgb;
float alpha = texture(u_alphaTexture, v_texCoord).r;
fragColor = vec4(color, alpha);
}
`);
const program = this.gl.createProgram();
this.gl.attachShader(program, vertexShader);
this.gl.attachShader(program, fragmentShader);
this.gl.linkProgram(program);
return program;
}
private createShader(type: number, source: string): WebGLShader {
assert(this.gl);
const shader = this.gl.createShader(type)!;
this.gl.shaderSource(shader, source);
this.gl.compileShader(shader);
return shader;
}
private createVAO(): WebGLVertexArrayObject {
assert(this.gl);
assert(this.program);
const vao = this.gl.createVertexArray();
this.gl.bindVertexArray(vao);
const vertices = new Float32Array([
-1, -1, 0, 1,
1, -1, 1, 1,
-1, 1, 0, 0,
1, 1, 1, 0,
]);
const buffer = this.gl.createBuffer();
this.gl.bindBuffer(this.gl.ARRAY_BUFFER, buffer);
this.gl.bufferData(this.gl.ARRAY_BUFFER, vertices, this.gl.STATIC_DRAW);
const positionLocation = this.gl.getAttribLocation(this.program, 'a_position');
const texCoordLocation = this.gl.getAttribLocation(this.program, 'a_texCoord');
this.gl.enableVertexAttribArray(positionLocation);
this.gl.vertexAttribPointer(positionLocation, 2, this.gl.FLOAT, false, 16, 0);
this.gl.enableVertexAttribArray(texCoordLocation);
this.gl.vertexAttribPointer(texCoordLocation, 2, this.gl.FLOAT, false, 16, 8);
return vao;
}
private createTexture(): WebGLTexture {
assert(this.gl);
const texture = this.gl.createTexture();
this.gl.bindTexture(this.gl.TEXTURE_2D, texture);
this.gl.texParameteri(this.gl.TEXTURE_2D, this.gl.TEXTURE_WRAP_S, this.gl.CLAMP_TO_EDGE);
this.gl.texParameteri(this.gl.TEXTURE_2D, this.gl.TEXTURE_WRAP_T, this.gl.CLAMP_TO_EDGE);
this.gl.texParameteri(this.gl.TEXTURE_2D, this.gl.TEXTURE_MIN_FILTER, this.gl.LINEAR);
this.gl.texParameteri(this.gl.TEXTURE_2D, this.gl.TEXTURE_MAG_FILTER, this.gl.LINEAR);
return texture;
}
private updateGpu(color: VideoFrame, alpha: VideoFrame): VideoFrame {
assert(this.gl);
assert(this.canvas);
if (color.displayWidth !== this.canvas.width || color.displayHeight !== this.canvas.height) {
this.canvas.width = color.displayWidth;
this.canvas.height = color.displayHeight;
}
this.gl.activeTexture(this.gl.TEXTURE0);
this.gl.bindTexture(this.gl.TEXTURE_2D, this.colorTexture);
this.gl.texImage2D(this.gl.TEXTURE_2D, 0, this.gl.RGBA, this.gl.RGBA, this.gl.UNSIGNED_BYTE, color);
this.gl.activeTexture(this.gl.TEXTURE1);
this.gl.bindTexture(this.gl.TEXTURE_2D, this.alphaTexture);
this.gl.texImage2D(this.gl.TEXTURE_2D, 0, this.gl.RGBA, this.gl.RGBA, this.gl.UNSIGNED_BYTE, alpha);
this.gl.viewport(0, 0, this.canvas.width, this.canvas.height);
this.gl.clear(this.gl.COLOR_BUFFER_BIT);
this.gl.bindVertexArray(this.vao);
this.gl.drawArrays(this.gl.TRIANGLE_STRIP, 0, 4);
const finalFrame = new VideoFrame(this.canvas, {
timestamp: color.timestamp,
duration: color.duration ?? undefined,
});
color.close();
alpha.close();
return finalFrame;
}
private updateCpu(color: VideoFrame, alpha: VideoFrame): Promise<VideoFrame> {
if (!this.worker) {
const blob = new Blob(
[`(${colorAlphaMergerWorkerCode.toString()})()`],
{ type: 'application/javascript' },
);
const url = URL.createObjectURL(blob);
this.worker = new Worker(url);
URL.revokeObjectURL(url);
this.worker.addEventListener('message', (event: MessageEvent<ColorAlphaMergerWorkerResponse>) => {
const data = event.data;
const pending = this.pendingRequests.get(data.id);
if (!pending) {
return;
}
this.pendingRequests.delete(data.id);
if ('error' in data) {
pending.reject(new Error(data.error));
} else {
pending.resolve(data.frame);
}
});
this.worker.addEventListener('error', (event) => {
const error = new Error(event.message || 'Color/alpha merge worker error.');
for (const pending of this.pendingRequests.values()) {
pending.reject(error);
}
this.pendingRequests.clear();
});
}
const id = this.nextRequestId++;
const pending = promiseWithResolvers<VideoFrame>();
this.pendingRequests.set(id, pending);
this.worker.postMessage({ id, color, alpha }, { transfer: [color, alpha] });
return pending.promise;
}
close() {
this.gl?.getExtension('WEBGL_lose_context')?.loseContext();
this.gl = null;
this.canvas = null;
this.worker?.terminate();
this.worker = null;
const error = new Error('Color/alpha merger closed.');
for (const pending of this.pendingRequests.values()) {
pending.reject(error);
}
this.pendingRequests.clear();
}
}
type ColorAlphaMergerWorkerRequest = {
id: number;
color: VideoFrame;
alpha: VideoFrame;
};
type ColorAlphaMergerWorkerResponse =
| { id: number; frame: VideoFrame }
| { id: number; error: string };
const colorAlphaMergerWorkerCode = () => {
// These buffers are reused across frames as long as the size matches, since consecutive frames usually share
// dimensions
let cpuAlphaBuffer: Uint8Array | null = null;
let cpuColorBuffer: Uint8Array | null = null;
// Serialize execution internally so concurrent requests don't race on the shared cpu*Buffer state.
let chain: Promise<void> = Promise.resolve();
self.addEventListener('message', (event: MessageEvent<ColorAlphaMergerWorkerRequest>) => {
const { id, color, alpha } = event.data;
chain = chain.then(async () => {
try {
const frame = await merge(color, alpha);
self.postMessage({ id, frame }, { transfer: [frame] });
} catch (error) {
self.postMessage({ id, error: (error as Error).message });
} finally {
// We took ownership of the inputs via transfer; close them now that the merge (or its error) is done.
color.close();
alpha.close();
}
});
});
const merge = async (color: VideoFrame, alpha: VideoFrame): Promise<VideoFrame> => {
const format = color.format as VideoSamplePixelFormat | null;
const alphaFormat = alpha.format as VideoSamplePixelFormat | null;
if (!format || !alphaFormat) {
throw new Error('CPU color/alpha merging requires a known VideoFrame format.');
}
// The alpha frame must have the same bit depth as the color frame
const colorIs10 = format.includes('P10');
const colorIs12 = format.includes('P12');
const alphaIs10 = alphaFormat.includes('P10');
const alphaIs12 = alphaFormat.includes('P12');
if (alphaIs10 !== colorIs10 || alphaIs12 !== colorIs12) {
throw new Error(
`CPU color/alpha merging requires the alpha frame to have the same bit depth as the color frame`
+ ` (color: '${format}', alpha: '${alphaFormat}').`,
);
}
const width = color.codedWidth;
const height = color.codedHeight;
if (format === 'RGBX' || format === 'RGBA' || format === 'BGRX' || format === 'BGRA') {
return await mergeInterleavedRgba(color, alpha, width, height, format);
} else if (
format === 'I420' || format === 'I420P10' || format === 'I420P12'
|| format === 'I422' || format === 'I422P10' || format === 'I422P12'
|| format === 'I444' || format === 'I444P10' || format === 'I444P12'
)