mediabunny
Version:
Pure TypeScript media toolkit for reading, writing, and converting media files, directly in the browser.
1,521 lines (1,319 loc) • 55.9 kB
text/typescript
/*!
* Copyright (c) 2025-present, Vanilagy and contributors
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/.
*/
import { parsePcmCodec, PCM_AUDIO_CODECS, PcmAudioCodec, VideoCodec, AudioCodec } from './codec';
import { CustomVideoDecoder, customVideoDecoders, CustomAudioDecoder, customAudioDecoders } from './custom-coder';
import { InputAudioTrack, InputTrack, InputVideoTrack } from './input-track';
import {
AnyIterable,
assert,
assertNever,
CallSerializer,
getInt24,
getUint24,
insertSorted,
isSafari,
last,
mapAsyncGenerator,
promiseWithResolvers,
Rotation,
toAsyncIterator,
toDataView,
validateAnyIterable,
} from './misc';
import { EncodedPacket } from './packet';
import { fromAlaw, fromUlaw } from './pcm';
import { AudioSample, VideoSample } from './sample';
/**
* Additional options for controlling packet retrieval.
* @public
*/
export type PacketRetrievalOptions = {
/**
* When set to true, only packet metadata (like timestamp) will be retrieved - the actual packet data will not
* be loaded.
*/
metadataOnly?: boolean;
/**
* When set to true, key packets will be verified upon retrieval by looking into the packet's bitstream.
* If not enabled, the packet types will be determined solely by what's stored in the containing file and may be
* incorrect, potentially leading to decoder errors. Since determining a packet's actual type requires looking into
* its data, this option cannot be enabled together with `metadataOnly`.
*/
verifyKeyPackets?: boolean;
};
const validatePacketRetrievalOptions = (options: PacketRetrievalOptions) => {
if (!options || typeof options !== 'object') {
throw new TypeError('options must be an object.');
}
if (options.metadataOnly !== undefined && typeof options.metadataOnly !== 'boolean') {
throw new TypeError('options.metadataOnly, when defined, must be a boolean.');
}
if (options.verifyKeyPackets !== undefined && typeof options.verifyKeyPackets !== 'boolean') {
throw new TypeError('options.verifyKeyPackets, when defined, must be a boolean.');
}
if (options.verifyKeyPackets && options.metadataOnly) {
throw new TypeError('options.verifyKeyPackets and options.metadataOnly cannot be enabled together.');
}
};
const validateTimestamp = (timestamp: number) => {
if (typeof timestamp !== 'number' || Number.isNaN(timestamp)) {
throw new TypeError('timestamp must be a number.'); // It can be non-finite, that's fine
}
};
const maybeFixPacketType = (
track: InputTrack,
promise: Promise<EncodedPacket | null>,
options: PacketRetrievalOptions,
) => {
if (options.verifyKeyPackets) {
return promise.then(async (packet) => {
if (!packet || packet.type === 'delta') {
return packet;
}
const determinedType = await track.determinePacketType(packet);
if (determinedType) {
// @ts-expect-error Technically readonly
packet.type = determinedType;
}
return packet;
});
} else {
return promise;
}
};
/**
* Sink for retrieving encoded packets from an input track.
* @public
*/
export class EncodedPacketSink {
/** @internal */
_track: InputTrack;
constructor(track: InputTrack) {
if (!(track instanceof InputTrack)) {
throw new TypeError('track must be an InputTrack.');
}
this._track = track;
}
/**
* Retrieves the track's first packet (in decode order), or null if it has no packets. The first packet is very
* likely to be a key packet.
*/
getFirstPacket(options: PacketRetrievalOptions = {}) {
validatePacketRetrievalOptions(options);
return maybeFixPacketType(this._track, this._track._backing.getFirstPacket(options), options);
}
/**
* Retrieves the packet corresponding to the given timestamp, in seconds. More specifically, returns the last packet
* (in presentation order) with a start timestamp less than or equal to the given timestamp. This method can be
* used to retrieve a track's last packet using `getPacket(Infinity)`. The method returns null if the timestamp
* is before the first packet in the track.
*
* @param timestamp - The timestamp used for retrieval, in seconds.
*/
getPacket(timestamp: number, options: PacketRetrievalOptions = {}) {
validateTimestamp(timestamp);
validatePacketRetrievalOptions(options);
return maybeFixPacketType(this._track, this._track._backing.getPacket(timestamp, options), options);
}
/**
* Retrieves the packet following the given packet (in decode order), or null if the given packet is the
* last packet.
*/
getNextPacket(packet: EncodedPacket, options: PacketRetrievalOptions = {}) {
if (!(packet instanceof EncodedPacket)) {
throw new TypeError('packet must be an EncodedPacket.');
}
validatePacketRetrievalOptions(options);
return maybeFixPacketType(this._track, this._track._backing.getNextPacket(packet, options), options);
}
/**
* Retrieves the key packet corresponding to the given timestamp, in seconds. More specifically, returns the last
* key packet (in presentation order) with a start timestamp less than or equal to the given timestamp. A key packet
* is a packet that doesn't require previous packets to be decoded. This method can be used to retrieve a track's
* last key packet using `getKeyPacket(Infinity)`. The method returns null if the timestamp is before the first
* key packet in the track.
*
* To ensure that the returned packet is guaranteed to be a real key frame, enable `options.verifyKeyPackets`.
*
* @param timestamp - The timestamp used for retrieval, in seconds.
*/
async getKeyPacket(timestamp: number, options: PacketRetrievalOptions = {}): Promise<EncodedPacket | null> {
validateTimestamp(timestamp);
validatePacketRetrievalOptions(options);
if (!options.verifyKeyPackets) {
return this._track._backing.getKeyPacket(timestamp, options);
}
const packet = await this._track._backing.getKeyPacket(timestamp, options);
if (!packet || packet.type === 'delta') {
return packet;
}
const determinedType = await this._track.determinePacketType(packet);
if (determinedType === 'delta') {
// Try returning the previous key packet (in hopes that it's actually a key packet)
return this.getKeyPacket(packet.timestamp - 1 / this._track.timeResolution, options);
}
return packet;
}
/**
* Retrieves the key packet following the given packet (in decode order), or null if the given packet is the last
* key packet.
*
* To ensure that the returned packet is guaranteed to be a real key frame, enable `options.verifyKeyPackets`.
*/
async getNextKeyPacket(packet: EncodedPacket, options: PacketRetrievalOptions = {}): Promise<EncodedPacket | null> {
if (!(packet instanceof EncodedPacket)) {
throw new TypeError('packet must be an EncodedPacket.');
}
validatePacketRetrievalOptions(options);
if (!options.verifyKeyPackets) {
return this._track._backing.getNextKeyPacket(packet, options);
}
const nextPacket = await this._track._backing.getNextKeyPacket(packet, options);
if (!nextPacket || nextPacket.type === 'delta') {
return nextPacket;
}
const determinedType = await this._track.determinePacketType(nextPacket);
if (determinedType === 'delta') {
// Try returning the next key packet (in hopes that it's actually a key packet)
return this.getNextKeyPacket(nextPacket, options);
}
return nextPacket;
}
/**
* Creates an async iterator that yields the packets in this track in decode order. To enable fast iteration, this
* method will intelligently preload packets based on the speed of the consumer.
*
* @param startPacket - (optional) The packet from which iteration should begin. This packet will also be yielded.
* @param endTimestamp - (optional) The timestamp at which iteration should end. This packet will _not_ be yielded.
*/
packets(
startPacket?: EncodedPacket,
endPacket?: EncodedPacket,
options: PacketRetrievalOptions = {},
): AsyncGenerator<EncodedPacket, void, unknown> {
if (startPacket !== undefined && !(startPacket instanceof EncodedPacket)) {
throw new TypeError('startPacket must be an EncodedPacket.');
}
if (startPacket !== undefined && startPacket.isMetadataOnly && !options?.metadataOnly) {
throw new TypeError('startPacket can only be metadata-only if options.metadataOnly is enabled.');
}
if (endPacket !== undefined && !(endPacket instanceof EncodedPacket)) {
throw new TypeError('endPacket must be an EncodedPacket.');
}
validatePacketRetrievalOptions(options);
const packetQueue: EncodedPacket[] = [];
let { promise: queueNotEmpty, resolve: onQueueNotEmpty } = promiseWithResolvers();
let { promise: queueDequeue, resolve: onQueueDequeue } = promiseWithResolvers();
let ended = false;
let terminated = false;
// This stores errors that are "out of band" in the sense that they didn't occur in the normal flow of this
// method but instead in a different context. This error should not go unnoticed and must be bubbled up to
// the consumer.
let outOfBandError = null as Error | null;
const timestamps: number[] = [];
// The queue should always be big enough to hold 1 second worth of packets
const maxQueueSize = () => Math.max(2, timestamps.length);
// The following is the "pump" process that keeps pumping packets into the queue
(async () => {
let packet = startPacket ?? await this.getFirstPacket(options);
while (packet && !terminated) {
if (endPacket && packet.sequenceNumber >= endPacket?.sequenceNumber) {
break;
}
if (packetQueue.length > maxQueueSize()) {
({ promise: queueDequeue, resolve: onQueueDequeue } = promiseWithResolvers());
await queueDequeue;
continue;
}
packetQueue.push(packet);
onQueueNotEmpty();
({ promise: queueNotEmpty, resolve: onQueueNotEmpty } = promiseWithResolvers());
packet = await this.getNextPacket(packet, options);
}
ended = true;
onQueueNotEmpty();
})().catch((error: Error) => {
if (!outOfBandError) {
outOfBandError = error;
onQueueNotEmpty();
}
});
return {
async next() {
while (true) {
if (terminated) {
return { value: undefined, done: true };
} else if (outOfBandError) {
throw outOfBandError;
} else if (packetQueue.length > 0) {
const value = packetQueue.shift()!;
const now = performance.now();
timestamps.push(now);
while (timestamps.length > 0 && now - timestamps[0]! >= 1000) {
timestamps.shift();
}
onQueueDequeue();
return { value, done: false };
} else if (ended) {
return { value: undefined, done: true };
} else {
await queueNotEmpty;
}
}
},
async return() {
terminated = true;
onQueueDequeue();
onQueueNotEmpty();
return { value: undefined, done: true };
},
async throw(error) {
throw error;
},
[Symbol.asyncIterator]() {
return this;
},
};
}
}
abstract class DecoderWrapper<
MediaSample extends VideoSample | AudioSample,
> {
constructor(
public onSample: (sample: MediaSample) => unknown,
public onError: (error: DOMException) => unknown,
) {}
abstract getDecodeQueueSize(): number;
abstract decode(packet: EncodedPacket): void;
abstract flush(): Promise<void>;
abstract close(): void;
}
/**
* Base class for decoded media sample sinks.
* @public
*/
export abstract class BaseMediaSampleSink<
MediaSample extends VideoSample | AudioSample,
> {
/** @internal */
abstract _createDecoder(
onSample: (sample: MediaSample) => unknown,
onError: (error: DOMException) => unknown
): Promise<DecoderWrapper<MediaSample>>;
/** @internal */
abstract _createPacketSink(): EncodedPacketSink;
/** @internal */
protected mediaSamplesInRange(
startTimestamp = 0,
endTimestamp = Infinity,
): AsyncGenerator<MediaSample, void, unknown> {
validateTimestamp(startTimestamp);
validateTimestamp(endTimestamp);
const sampleQueue: MediaSample[] = [];
let firstSampleQueued = false;
let lastSample: MediaSample | null = null;
let { promise: queueNotEmpty, resolve: onQueueNotEmpty } = promiseWithResolvers();
let { promise: queueDequeue, resolve: onQueueDequeue } = promiseWithResolvers();
let decoderIsFlushed = false;
let ended = false;
let terminated = false;
// This stores errors that are "out of band" in the sense that they didn't occur in the normal flow of this
// method but instead in a different context. This error should not go unnoticed and must be bubbled up to
// the consumer.
let outOfBandError = null as Error | null;
// The following is the "pump" process that keeps pumping packets into the decoder
(async () => {
const decoderError = new Error();
const decoder = await this._createDecoder((sample) => {
onQueueDequeue();
if (sample.timestamp >= endTimestamp) {
ended = true;
}
if (ended) {
sample.close();
return;
}
if (lastSample) {
if (sample.timestamp > startTimestamp) {
// We don't know ahead of time what the first first is. This is because the first first is the
// last first whose timestamp is less than or equal to the start timestamp. Therefore we need to
// wait for the first first after the start timestamp, and then we'll know that the previous
// first was the first first.
sampleQueue.push(lastSample);
firstSampleQueued = true;
} else {
lastSample.close();
}
}
if (sample.timestamp >= startTimestamp) {
sampleQueue.push(sample);
firstSampleQueued = true;
}
lastSample = firstSampleQueued ? null : sample;
if (sampleQueue.length > 0) {
onQueueNotEmpty();
({ promise: queueNotEmpty, resolve: onQueueNotEmpty } = promiseWithResolvers());
}
}, (error) => {
if (!outOfBandError) {
error.stack = decoderError.stack; // Provide a more useful stack trace
outOfBandError = error;
onQueueNotEmpty();
}
});
const packetSink = this._createPacketSink();
const keyPacket = await packetSink.getKeyPacket(startTimestamp, { verifyKeyPackets: true })
?? await packetSink.getFirstPacket();
if (!keyPacket) {
return;
}
let currentPacket: EncodedPacket | null = keyPacket;
let endPacket: EncodedPacket | undefined = undefined;
if (endTimestamp < Infinity) {
// When an end timestamp is set, we cannot simply use that for the packet iterator due to out-of-order
// frames (B-frames). Instead, we'll need to keep decoding packets until we get a frame that exceeds
// this end time. However, we can still put a bound on it: Since key frames are by definition never
// out of order, we can stop at the first key frame after the end timestamp.
const packet = await packetSink.getPacket(endTimestamp);
const keyPacket = !packet
? null
: packet.type === 'key' && packet.timestamp === endTimestamp
? packet
: await packetSink.getNextKeyPacket(packet, { verifyKeyPackets: true });
if (keyPacket) {
endPacket = keyPacket;
}
}
const packets = packetSink.packets(keyPacket, endPacket);
await packets.next(); // Skip the start packet as we already have it
while (currentPacket && !ended) {
const maxQueueSize = computeMaxQueueSize(sampleQueue.length);
if (sampleQueue.length + decoder.getDecodeQueueSize() > maxQueueSize) {
({ promise: queueDequeue, resolve: onQueueDequeue } = promiseWithResolvers());
await queueDequeue;
continue;
}
decoder.decode(currentPacket);
const packetResult = await packets.next();
if (packetResult.done) {
break;
}
currentPacket = packetResult.value;
}
await packets.return();
if (!terminated) await decoder.flush();
decoder.close();
if (!firstSampleQueued && lastSample) {
sampleQueue.push(lastSample);
}
decoderIsFlushed = true;
onQueueNotEmpty(); // To unstuck the generator
})().catch((error: Error) => {
if (!outOfBandError) {
outOfBandError = error;
onQueueNotEmpty();
}
});
return {
async next() {
while (true) {
if (terminated) {
return { value: undefined, done: true };
} else if (outOfBandError) {
throw outOfBandError;
} else if (sampleQueue.length > 0) {
const value = sampleQueue.shift()!;
onQueueDequeue();
return { value, done: false };
} else if (!decoderIsFlushed) {
await queueNotEmpty;
} else {
return { value: undefined, done: true };
}
}
},
async return() {
terminated = true;
ended = true;
onQueueDequeue();
onQueueNotEmpty();
lastSample?.close();
for (const sample of sampleQueue) {
sample.close();
}
return { value: undefined, done: true };
},
async throw(error) {
throw error;
},
[Symbol.asyncIterator]() {
return this;
},
};
}
/** @internal */
protected mediaSamplesAtTimestamps(
timestamps: AnyIterable<number>,
): AsyncGenerator<MediaSample | null, void, unknown> {
validateAnyIterable(timestamps);
const timestampIterator = toAsyncIterator(timestamps);
const timestampsOfInterest: number[] = [];
const sampleQueue: (MediaSample | null)[] = [];
let { promise: queueNotEmpty, resolve: onQueueNotEmpty } = promiseWithResolvers();
let { promise: queueDequeue, resolve: onQueueDequeue } = promiseWithResolvers();
let decoderIsFlushed = false;
let terminated = false;
// This stores errors that are "out of band" in the sense that they didn't occur in the normal flow of this
// method but instead in a different context. This error should not go unnoticed and must be bubbled up to
// the consumer.
let outOfBandError = null as Error | null;
const pushToQueue = (sample: MediaSample | null) => {
sampleQueue.push(sample);
onQueueNotEmpty();
({ promise: queueNotEmpty, resolve: onQueueNotEmpty } = promiseWithResolvers());
};
// The following is the "pump" process that keeps pumping packets into the decoder
(async () => {
const decoderError = new Error();
const decoder = await this._createDecoder((sample) => {
onQueueDequeue();
if (terminated) {
sample.close();
return;
}
let sampleUses = 0;
while (
timestampsOfInterest.length > 0
&& sample.timestamp - timestampsOfInterest[0]! > -1e-10 // Give it a little epsilon
) {
sampleUses++;
timestampsOfInterest.shift();
}
if (sampleUses > 0) {
for (let i = 0; i < sampleUses; i++) {
// Clone the sample if we need to emit it multiple times
pushToQueue((i < sampleUses - 1 ? sample.clone() : sample) as MediaSample);
}
} else {
sample.close();
}
}, (error) => {
if (!outOfBandError) {
error.stack = decoderError.stack; // Provide a more useful stack trace
outOfBandError = error;
onQueueNotEmpty();
}
});
const packetSink = this._createPacketSink();
let lastPacket: EncodedPacket | null = null;
let lastKeyPacket: EncodedPacket | null = null;
// The end sequence number (inclusive) in the next batch of packets that will be decoded. The batch starts
// at the last key frame and goes until this sequence number.
let maxSequenceNumber = -1;
const decodePackets = async () => {
assert(lastKeyPacket);
// Start at the current key packet
let currentPacket = lastKeyPacket;
decoder.decode(currentPacket);
while (currentPacket.sequenceNumber < maxSequenceNumber) {
const maxQueueSize = computeMaxQueueSize(sampleQueue.length);
while (sampleQueue.length + decoder.getDecodeQueueSize() > maxQueueSize && !terminated) {
({ promise: queueDequeue, resolve: onQueueDequeue } = promiseWithResolvers());
await queueDequeue;
}
if (terminated) {
break;
}
const nextPacket = await packetSink.getNextPacket(currentPacket);
assert(nextPacket);
currentPacket = nextPacket;
decoder.decode(nextPacket);
}
maxSequenceNumber = -1;
};
const flushDecoder = async () => {
await decoder.flush();
// We don't expect this list to have any elements in it anymore, but in case it does, let's emit
// nulls for every remaining element, then clear it.
for (let i = 0; i < timestampsOfInterest.length; i++) {
pushToQueue(null);
}
timestampsOfInterest.length = 0;
};
for await (const timestamp of timestampIterator) {
validateTimestamp(timestamp);
if (terminated) {
break;
}
const targetPacket = await packetSink.getPacket(timestamp);
const keyPacket = targetPacket && await packetSink.getKeyPacket(timestamp, { verifyKeyPackets: true });
if (!keyPacket) {
if (maxSequenceNumber !== -1) {
await decodePackets();
await flushDecoder();
}
pushToQueue(null);
lastPacket = null;
continue;
}
// Check if the key packet has changed or if we're going back in time
if (
lastPacket
&& (
keyPacket.sequenceNumber !== lastKeyPacket!.sequenceNumber
|| targetPacket.timestamp < lastPacket.timestamp
)
) {
await decodePackets();
await flushDecoder(); // Always flush here, improves decoder compatibility
}
timestampsOfInterest.push(targetPacket.timestamp);
maxSequenceNumber = Math.max(targetPacket.sequenceNumber, maxSequenceNumber);
lastPacket = targetPacket;
lastKeyPacket = keyPacket;
}
if (!terminated) {
if (maxSequenceNumber !== -1) {
// We still need to decode packets
await decodePackets();
}
await flushDecoder();
}
decoder.close();
decoderIsFlushed = true;
onQueueNotEmpty(); // To unstuck the generator
})().catch((error: Error) => {
if (!outOfBandError) {
outOfBandError = error;
onQueueNotEmpty();
}
});
return {
async next() {
while (true) {
if (terminated) {
return { value: undefined, done: true };
} else if (outOfBandError) {
throw outOfBandError;
} else if (sampleQueue.length > 0) {
const value = sampleQueue.shift();
assert(value !== undefined);
onQueueDequeue();
return { value, done: false };
} else if (!decoderIsFlushed) {
await queueNotEmpty;
} else {
return { value: undefined, done: true };
}
}
},
async return() {
terminated = true;
onQueueDequeue();
onQueueNotEmpty();
for (const sample of sampleQueue) {
sample?.close();
}
return { value: undefined, done: true };
},
async throw(error) {
throw error;
},
[Symbol.asyncIterator]() {
return this;
},
};
}
}
const computeMaxQueueSize = (decodedSampleQueueSize: number) => {
// If we have decoded samples lying around, limit the total queue size to a small value (decoded samples can use up
// a lot of memory). If not, we're fine with a much bigger queue of encoded packets waiting to be decoded. In fact,
// some decoders only start flushing out decoded chunks when the packet queue is large enough.
return decodedSampleQueueSize === 0 ? 40 : 8;
};
class VideoDecoderWrapper extends DecoderWrapper<VideoSample> {
decoder: VideoDecoder | null = null;
customDecoder: CustomVideoDecoder | null = null;
customDecoderCallSerializer = new CallSerializer();
customDecoderQueueSize = 0;
inputTimestamps: number[] = []; // Timestamps input into the decoder, sorted.
sampleQueue: VideoSample[] = []; // Safari-specific thing, check usage.
constructor(
onSample: (sample: VideoSample) => unknown,
onError: (error: DOMException) => unknown,
codec: VideoCodec,
decoderConfig: VideoDecoderConfig,
public rotation: Rotation,
public timeResolution: number,
) {
super(onSample, onError);
const MatchingCustomDecoder = customVideoDecoders.find(x => x.supports(codec, decoderConfig));
if (MatchingCustomDecoder) {
// @ts-expect-error "Can't create instance of abstract class 🤓"
this.customDecoder = new MatchingCustomDecoder() as CustomVideoDecoder;
// @ts-expect-error It's technically readonly
this.customDecoder.codec = codec;
// @ts-expect-error It's technically readonly
this.customDecoder.config = decoderConfig;
// @ts-expect-error It's technically readonly
this.customDecoder.onSample = (sample) => {
if (!(sample instanceof VideoSample)) {
throw new TypeError('The argument passed to onSample must be a VideoSample.');
}
this.finalizeAndEmitSample(sample);
};
void this.customDecoderCallSerializer.call(() => this.customDecoder!.init());
} else {
// Specific handler for the WebCodecs VideoDecoder to iron out browser differences
const sampleHandler = (sample: VideoSample) => {
if (isSafari()) {
// For correct B-frame handling, we don't just hand over the frames directly but instead add them to
// a queue, because we want to ensure frames are emitted in presentation order. We flush the queue
// each time we receive a frame with a timestamp larger than the highest we've seen so far, as we
// can sure that is not a B-frame. Typically, WebCodecs automatically guarantees that frames are
// emitted in presentation order, but Safari doesn't always follow this rule.
if (this.sampleQueue.length > 0 && (sample.timestamp >= last(this.sampleQueue)!.timestamp)) {
for (const sample of this.sampleQueue) {
this.finalizeAndEmitSample(sample);
}
this.sampleQueue.length = 0;
}
insertSorted(this.sampleQueue, sample, x => x.timestamp);
} else {
// Assign it the next earliest timestamp from the input. We do this because browsers, by spec, are
// required to emit decoded frames in presentation order *while* retaining the timestamp of their
// originating EncodedVideoChunk. For files with B-frames but no out-of-order timestamps (like a
// missing ctts box, for example), this causes a mismatch. We therefore fix the timestamps and
// ensure they are sorted by doing this.
const timestamp = this.inputTimestamps.shift();
// There's no way we'd have more decoded frames than encoded packets we passed in. Actually, the
// correspondence should be 1:1.
assert(timestamp !== undefined);
sample.setTimestamp(timestamp);
this.finalizeAndEmitSample(sample);
}
};
this.decoder = new VideoDecoder({
output: frame => sampleHandler(new VideoSample(frame)),
error: onError,
});
this.decoder.configure(decoderConfig);
}
}
finalizeAndEmitSample(sample: VideoSample) {
// Round the timestamps to the time resolution
sample.setTimestamp(Math.round(sample.timestamp * this.timeResolution) / this.timeResolution);
sample.setDuration(Math.round(sample.duration * this.timeResolution) / this.timeResolution);
sample.setRotation(this.rotation);
this.onSample(sample);
}
getDecodeQueueSize() {
if (this.customDecoder) {
return this.customDecoderQueueSize;
} else {
assert(this.decoder);
return this.decoder.decodeQueueSize;
}
}
decode(packet: EncodedPacket) {
if (this.customDecoder) {
this.customDecoderQueueSize++;
void this.customDecoderCallSerializer
.call(() => this.customDecoder!.decode(packet))
.then(() => this.customDecoderQueueSize--);
} else {
assert(this.decoder);
if (!isSafari()) {
insertSorted(this.inputTimestamps, packet.timestamp, x => x);
}
this.decoder.decode(packet.toEncodedVideoChunk());
}
}
async flush() {
if (this.customDecoder) {
await this.customDecoderCallSerializer.call(() => this.customDecoder!.flush());
} else {
assert(this.decoder);
await this.decoder.flush();
}
if (isSafari()) {
for (const sample of this.sampleQueue) {
this.finalizeAndEmitSample(sample);
}
this.sampleQueue.length = 0;
}
}
close() {
if (this.customDecoder) {
void this.customDecoderCallSerializer.call(() => this.customDecoder!.close());
} else {
assert(this.decoder);
this.decoder.close();
}
for (const sample of this.sampleQueue) {
sample.close();
}
this.sampleQueue.length = 0;
}
}
/**
* A sink that retrieves decoded video samples (video frames) from a video track.
* @public
*/
export class VideoSampleSink extends BaseMediaSampleSink<VideoSample> {
/** @internal */
_videoTrack: InputVideoTrack;
constructor(videoTrack: InputVideoTrack) {
if (!(videoTrack instanceof InputVideoTrack)) {
throw new TypeError('videoTrack must be an InputVideoTrack.');
}
super();
this._videoTrack = videoTrack;
}
/** @internal */
async _createDecoder(
onSample: (sample: VideoSample) => unknown,
onError: (error: DOMException) => unknown,
) {
if (!(await this._videoTrack.canDecode())) {
throw new Error(
'This video track cannot be decoded by this browser. Make sure to check decodability before using'
+ ' a track.',
);
}
const codec = this._videoTrack.codec;
const rotation = this._videoTrack.rotation;
const decoderConfig = await this._videoTrack.getDecoderConfig();
const timeResolution = this._videoTrack.timeResolution;
assert(codec && decoderConfig);
return new VideoDecoderWrapper(onSample, onError, codec, decoderConfig, rotation, timeResolution);
}
/** @internal */
_createPacketSink() {
return new EncodedPacketSink(this._videoTrack);
}
/**
* Retrieves the video sample (frame) corresponding to the given timestamp, in seconds. More specifically, returns
* the last video sample (in presentation order) with a start timestamp less than or equal to the given timestamp.
* Returns null if the timestamp is before the track's first timestamp.
*
* @param timestamp - The timestamp used for retrieval, in seconds.
*/
async getSample(timestamp: number) {
validateTimestamp(timestamp);
for await (const sample of this.mediaSamplesAtTimestamps([timestamp])) {
return sample;
}
throw new Error('Internal error: Iterator returned nothing.');
}
/**
* Creates an async iterator that yields the video samples (frames) of this track in presentation order. This method
* will intelligently pre-decode a few frames ahead to enable fast iteration.
*
* @param startTimestamp - The timestamp in seconds at which to start yielding samples (inclusive).
* @param endTimestamp - The timestamp in seconds at which to stop yielding samples (exclusive).
*/
samples(startTimestamp = 0, endTimestamp = Infinity) {
return this.mediaSamplesInRange(startTimestamp, endTimestamp);
}
/**
* Creates an async iterator that yields a video sample (frame) for each timestamp in the argument. This method
* uses an optimized decoding pipeline if these timestamps are monotonically sorted, decoding each packet at most
* once, and is therefore more efficient than manually getting the sample for every timestamp. The iterator may
* yield null if no frame is available for a given timestamp.
*
* @param timestamps - An iterable or async iterable of timestamps in seconds.
*/
samplesAtTimestamps(timestamps: AnyIterable<number>) {
return this.mediaSamplesAtTimestamps(timestamps);
}
}
/**
* A canvas with additional timing information (timestamp & duration).
* @public
*/
export type WrappedCanvas = {
/** A canvas element or offscreen canvas. */
canvas: HTMLCanvasElement | OffscreenCanvas;
/** The timestamp of the corresponding video sample, in seconds. */
timestamp: number;
/** The duration of the corresponding video sample, in seconds. */
duration: number;
};
/**
* Options for constructing a CanvasSink.
* @public
*/
export type CanvasSinkOptions = {
/**
* The width of the output canvas in pixels, defaulting to the display width of the video track. If height is not
* set, it will be deduced automatically based on aspect ratio.
*/
width?: number;
/**
* The height of the output canvas in pixels, defaulting to the display height of the video track. If width is not
* set, it will be deduced automatically based on aspect ratio.
*/
height?: number;
/**
* The fitting algorithm in case both width and height are set.
*
* - 'fill' will stretch the image to fill the entire box, potentially altering aspect ratio.
* - 'contain' will contain the entire image within the box while preserving aspect ratio. This may lead to
* letterboxing.
* - 'cover' will scale the image until the entire box is filled, while preserving aspect ratio.
*/
fit?: 'fill' | 'contain' | 'cover';
/**
* The clockwise rotation by which to rotate the raw video frame. Defaults to the rotation set in the file metadata.
* Rotation is applied before resizing.
*/
rotation?: Rotation;
/**
* When set, specifies the number of canvases in the pool. These canvases will be reused in a ring buffer /
* round-robin type fashion. This keeps the amount of allocated VRAM constant and relieves the browser from
* constantly allocating/deallocating canvases. A pool size of 0 or `undefined` disables the pool and means a new
* canvas is created each time.
*/
poolSize?: number;
};
/**
* A sink that renders video samples (frames) of the given video track to canvases. This is often more useful than
* directly retrieving frames, as it comes with common preprocessing steps such as resizing or applying rotation
* metadata.
*
* This sink will yield HTMLCanvasElements when in a DOM context, and OffscreenCanvases otherwise.
* @public
*/
export class CanvasSink {
/** @internal */
_videoTrack: InputVideoTrack;
/** @internal */
_width: number;
/** @internal */
_height: number;
/** @internal */
_fit: 'fill' | 'contain' | 'cover';
/** @internal */
_rotation: Rotation;
/** @internal */
_videoSampleSink: VideoSampleSink;
/** @internal */
_canvasPool: (HTMLCanvasElement | OffscreenCanvas | null)[];
/** @internal */
_nextCanvasIndex = 0;
constructor(videoTrack: InputVideoTrack, options: CanvasSinkOptions = {}) {
if (!(videoTrack instanceof InputVideoTrack)) {
throw new TypeError('videoTrack must be an InputVideoTrack.');
}
if (options && typeof options !== 'object') {
throw new TypeError('options must be an object.');
}
if (options.width !== undefined && (!Number.isInteger(options.width) || options.width <= 0)) {
throw new TypeError('options.width, when defined, must be a positive integer.');
}
if (options.height !== undefined && (!Number.isInteger(options.height) || options.height <= 0)) {
throw new TypeError('options.height, when defined, must be a positive integer.');
}
if (options.fit !== undefined && !['fill', 'contain', 'cover'].includes(options.fit)) {
throw new TypeError('options.fit, when provided, must be one of "fill", "contain", or "cover".');
}
if (
options.width !== undefined
&& options.height !== undefined
&& options.fit === undefined
) {
throw new TypeError(
'When both options.width and options.height are provided, options.fit must also be provided.',
);
}
if (options.rotation !== undefined && ![0, 90, 180, 270].includes(options.rotation)) {
throw new TypeError('options.rotation, when provided, must be 0, 90, 180 or 270.');
}
if (
options.poolSize !== undefined
&& (typeof options.poolSize !== 'number' || !Number.isInteger(options.poolSize) || options.poolSize < 0)
) {
throw new TypeError('poolSize must be a non-negative integer.');
}
const rotation = options.rotation ?? videoTrack.rotation;
let [width, height] = rotation % 180 === 0
? [videoTrack.codedWidth, videoTrack.codedHeight]
: [videoTrack.codedHeight, videoTrack.codedWidth];
const originalAspectRatio = width / height;
// If width and height aren't defined together, deduce the missing value using the aspect ratio
if (options.width !== undefined && options.height === undefined) {
width = options.width;
height = Math.round(width / originalAspectRatio);
} else if (options.width === undefined && options.height !== undefined) {
height = options.height;
width = Math.round(height * originalAspectRatio);
} else if (options.width !== undefined && options.height !== undefined) {
width = options.width;
height = options.height;
}
this._videoTrack = videoTrack;
this._width = width;
this._height = height;
this._rotation = rotation;
this._fit = options.fit ?? 'fill';
this._videoSampleSink = new VideoSampleSink(videoTrack);
this._canvasPool = Array.from({ length: options.poolSize ?? 0 }, () => null);
}
/** @internal */
_videoSampleToWrappedCanvas(sample: VideoSample): WrappedCanvas {
let canvas = this._canvasPool[this._nextCanvasIndex];
if (!canvas) {
if (typeof document !== 'undefined') {
// Prefer an HTMLCanvasElement
canvas = document.createElement('canvas');
canvas.width = this._width;
canvas.height = this._height;
} else {
canvas = new OffscreenCanvas(this._width, this._height);
}
if (this._canvasPool.length > 0) {
this._canvasPool[this._nextCanvasIndex] = canvas;
}
}
if (this._canvasPool.length > 0) {
this._nextCanvasIndex = (this._nextCanvasIndex + 1) % this._canvasPool.length;
}
const context
= canvas.getContext('2d', { alpha: false }) as CanvasRenderingContext2D | OffscreenCanvasRenderingContext2D;
assert(context);
context.resetTransform();
// These variables specify where the final sample will be drawn on the canvas
let dx: number;
let dy: number;
let newWidth: number;
let newHeight: number;
if (this._fit === 'fill') {
dx = 0;
dy = 0;
newWidth = this._width;
newHeight = this._height;
} else {
const [sampleWidth, sampleHeight] = this._rotation % 180 === 0
? [sample.codedWidth, sample.codedHeight]
: [sample.codedHeight, sample.codedWidth];
const scale = this._fit === 'contain'
? Math.min(this._width / sampleWidth, this._height / sampleHeight)
: Math.max(this._width / sampleWidth, this._height / sampleHeight);
newWidth = sampleWidth * scale;
newHeight = sampleHeight * scale;
dx = (this._width - newWidth) / 2;
dy = (this._height - newHeight) / 2;
}
const aspectRatioChange = this._rotation % 180 === 0 ? 1 : newWidth / newHeight;
context.translate(this._width / 2, this._height / 2);
context.rotate(this._rotation * Math.PI / 180);
// This aspect ratio compensation is done so that we can draw the sample with the intended dimensions and
// don't need to think about how those dimensions change after the rotation
context.scale(1 / aspectRatioChange, aspectRatioChange);
context.translate(-this._width / 2, -this._height / 2);
context.drawImage(sample.toCanvasImageSource(), dx, dy, newWidth, newHeight);
const result = {
canvas,
timestamp: sample.timestamp,
duration: sample.duration,
};
sample.close();
return result;
}
/**
* Retrieves a canvas with the video frame corresponding to the given timestamp, in seconds. More specifically,
* returns the last video frame (in presentation order) with a start timestamp less than or equal to the given
* timestamp. Returns null if the timestamp is before the track's first timestamp.
*
* @param timestamp - The timestamp used for retrieval, in seconds.
*/
async getCanvas(timestamp: number) {
validateTimestamp(timestamp);
const sample = await this._videoSampleSink.getSample(timestamp);
return sample && this._videoSampleToWrappedCanvas(sample);
}
/**
* Creates an async iterator that yields canvases with the video frames of this track in presentation order. This
* method will intelligently pre-decode a few frames ahead to enable fast iteration.
*
* @param startTimestamp - The timestamp in seconds at which to start yielding canvases (inclusive).
* @param endTimestamp - The timestamp in seconds at which to stop yielding canvases (exclusive).
*/
canvases(startTimestamp = 0, endTimestamp = Infinity) {
return mapAsyncGenerator(
this._videoSampleSink.samples(startTimestamp, endTimestamp),
sample => this._videoSampleToWrappedCanvas(sample),
);
}
/**
* Creates an async iterator that yields a canvas for each timestamp in the argument. This method uses an optimized
* decoding pipeline if these timestamps are monotonically sorted, decoding each packet at most once, and is
* therefore more efficient than manually getting the canvas for every timestamp. The iterator may yield null if
* no frame is available for a given timestamp.
*
* @param timestamps - An iterable or async iterable of timestamps in seconds.
*/
canvasesAtTimestamps(timestamps: AnyIterable<number>) {
return mapAsyncGenerator(
this._videoSampleSink.samplesAtTimestamps(timestamps),
sample => sample && this._videoSampleToWrappedCanvas(sample),
);
}
}
class AudioDecoderWrapper extends DecoderWrapper<AudioSample> {
decoder: AudioDecoder | null = null;
customDecoder: CustomAudioDecoder | null = null;
customDecoderCallSerializer = new CallSerializer();
customDecoderQueueSize = 0;
constructor(
onSample: (sample: AudioSample) => unknown,
onError: (error: DOMException) => unknown,
codec: AudioCodec,
decoderConfig: AudioDecoderConfig,
) {
super(onSample, onError);
const sampleHandler = (sample: AudioSample) => {
if (sample.numberOfFrames === 0) {
// We skip zero-data (empty) AudioSamples. These are sometimes emitted, for example, by Firefox when it
// decodes Vorbis (at the start).
sample.close();
return;
}
// Round the timestamp to the sample rate
const sampleRate = decoderConfig.sampleRate;
sample.setTimestamp(Math.round(sample.timestamp * sampleRate) / sampleRate);
onSample(sample);
};
const MatchingCustomDecoder = customAudioDecoders.find(x => x.supports(codec, decoderConfig));
if (MatchingCustomDecoder) {
// @ts-expect-error "Can't create instance of abstract class 🤓"
this.customDecoder = new MatchingCustomDecoder() as CustomAudioDecoder;
// @ts-expect-error It's technically readonly
this.customDecoder.codec = codec;
// @ts-expect-error It's technically readonly
this.customDecoder.config = decoderConfig;
// @ts-expect-error It's technically readonly
this.customDecoder.onSample = (sample) => {
if (!(sample instanceof AudioSample)) {
throw new TypeError('The argument passed to onSample must be an AudioSample.');
}
sampleHandler(sample);
};
void this.customDecoderCallSerializer.call(() => this.customDecoder!.init());
} else {
this.decoder = new AudioDecoder({
output: data => sampleHandler(new AudioSample(data)),
error: onError,
});
this.decoder.configure(decoderConfig);
}
}
getDecodeQueueSize() {
if (this.customDecoder) {
return this.customDecoderQueueSize;
} else {
assert(this.decoder);
return this.decoder.decodeQueueSize;
}
}
decode(packet: EncodedPacket) {
if (this.customDecoder) {
this.customDecoderQueueSize++;
void this.customDecoderCallSerializer
.call(() => this.customDecoder!.decode(packet))
.then(() => this.customDecoderQueueSize--);
} else {
assert(this.decoder);
this.decoder.decode(packet.toEncodedAudioChunk());
}
}
flush() {
if (this.customDecoder) {
return this.customDecoderCallSerializer.call(() => this.customDecoder!.flush());
} else {
assert(this.decoder);
return this.decoder.flush();
}
}
close() {
if (this.customDecoder) {
void this.customDecoderCallSerializer.call(() => this.customDecoder!.close());
} else {
assert(this.decoder);
this.decoder.close();
}
}
}
// There are a lot of PCM variants not natively supported by the browser and by AudioData. Therefore we need a simple
// decoder that maps any input PCM format into a PCM format supported by the browser.
class PcmAudioDecoderWrapper extends DecoderWrapper<AudioSample> {
codec: PcmAudioCodec;
inputSampleSize: 1 | 2 | 3 | 4 | 8;
readInputValue: (view: DataView, byteOffset: number) => number;
outputSampleSize: 1 | 2 | 4;
outputFormat: 'u8' | 's16' | 's32' | 'f32';
writeOutputValue: (view: DataView, byteOffset: number, value: number) => void;
// Internal state to accumulate a precise current timestamp based on audio durations, not the (potentially
// inaccurate) sample timestamps.
currentTimestamp: number | null = null;
constructor(
onSample: (sample: AudioSample) => unknown,
onError: (error: DOMException) => unknown,
public decoderConfig: AudioDecoderConfig,
) {
super(onSample, onError);
assert((PCM_AUDIO_CODECS as readonly string[]).includes(decoderConfig.codec));
this.codec = decoderConfig.codec as PcmAudioCodec;
const { dataType, sampleSize, littleEndian } = parsePcmCodec(this.codec);
this.inputSampleSize = sampleSize;
switch (sampleSize) {
case 1: {
if (dataType === 'unsigned') {
this.readInputValue = (view, byteOffset) => view.getUint8(byteOffset) - 2 ** 7;
} else if (dataType === 'signed') {
this.readInputValue = (view, byteOffset) => view.getInt8(byteOffset);
} else if (dataType === 'ulaw') {
this.readInputValue = (view, byteOffset) => fromUlaw(view.getUint8(byteOffset));
} else if (dataType === 'alaw') {
this.readInputValue = (view, byteOffset) => fromAlaw(view.getUint8(byteOffset));
} else {
assert(false);
}
}; break;
case 2: {
if (dataType === 'unsigned') {
this.readInputValue = (view, byteOffset) => view.getUint16(byteOffset, littleEndian) - 2 ** 15;
} else if (dataType === 'signed') {
this.readInputValue = (view, byteOffset) => view.getInt16(byteOffset, littleEndian);
} else {
assert(false);
}
}; break;
case 3: {
if (dataType === 'unsigned') {
this.readInputValue = (view, byteOffset) => getUint24(view, byteOffset, littleEndian) - 2 ** 23;
} else if (dataType === 'signed') {
this.readInputValue = (view, byteOffset) => getInt24(view, byteOffset, littleEndian);
} else {
assert(false);
}
}; break;
case 4: {
if (dataType === 'unsigned') {
this.readInputValue = (view, byteOffset) => view.getUint32(byteOffset, littleEndian) - 2 ** 31;
} else if (dataType === 'signed') {
this.readInputValue = (view, byteOffset) => view.getInt32(byteOffset, littleEndian);
} else if (dataType === 'float') {
this.readInputValue = (view, byteOffset) => view.getFloat32(byteOffset, littleEndian);
} else {
assert(false);
}
}; break;
case 8: {
if (dataType === 'float') {
this.readInputValue = (view, byteOffset) => view.getFloat64(byteOffset, littleEndian);
} else {
assert(false);
}
}; break;
default: {
assertNever(sampleSize);
assert(false);
};
}
switch (sampleSize) {
case 1: {
if (dataType === 'ulaw' || dataType === 'alaw') {
this.outputSampleSize = 2;
this.outputFormat = 's16';
this.writeOutputValue = (view, byteOffset, value) => view.setInt16(byteOffset, value, true);
} else {
this.outputSampleSize = 1;
this.outputFormat = 'u8';
this.writeOutputValue = (view, byteOffset, value) => view.setUint8(byteOffset, value + 2 ** 7);
}
}; break;
case 2: {
this.outputSampleSize = 2;
this.outputFormat = 's16';
this.writeOutputValue = (view, byteOffset, value) => view.setInt16(byteOffset, value, true);
}; break;
case 3: {
this.outputSampleSize = 4;
this.outputFormat = 's32';
// From https://www.w3.org/TR/webcodecs:
// AudioData containing 24-bit samples SHOULD store those samples in s32 or f32. When samples are
// stored in s32, each sample MUST be left-shifted by 8 bits.
this.writeOutputValue = (view, byteOffset, value) => view.setInt32(byteOffset, value << 8, true);
}; break;
case 4: {
this.outputSampleSize = 4;
if (dataType === 'float') {
this.outputFormat = 'f32';
this.writeOutputValue = (view, byteOffset, value) => view.setFloat32(byteOffset, value, true);
} else {
this.outputFormat = 's32';
this.writeOutputValue = (view, byteOffset, value) => view.setInt32(byteOffset, value, true);
}
}; break;
case 8: {
this.outputSampleSize = 4;
this.outputFormat = 'f32';
this.writeOutputValue = (view, byteOffset, value) => view.setFloat32(byteOffset, value, true);
}; break;
default: {
assertNever(sampleSize);
assert(false);
};
};
}
getDecodeQueueSize() {
return 0;
}
decode(packet: EncodedPacket) {
const inputView = toDataView(packet.data);
const numberOfFrames = packet.byteLength / this.decoderConfig.numberOfChannels / this.inputSampleSize;
const outputBufferSize = numberOfFrames * this.decoderConfig.numberOfChannels * this.outputSampleSize;
const outputBuffer = new ArrayBuffer(outputBufferSize);
const outputView = new DataView(outputBuffer);
for (let i = 0; i < numberOfFrames * this.decoderConfig.numberOfChannels; i++) {
const inputIndex = i * this.inputSampleSize;
const outputIndex = i * this.outputSampleSize;
const value = this.readInputValue(inputView, inputIndex);
this.writeOutputValue(outputView, outputIndex, value);
}
const preciseDuration = numberOfFrames / this.decoderConfig.sampleRate;
if (this.currentTimestamp === null || Math.abs(packet.timestamp - this.currentTimestamp) >= preciseDuration) {
// We need to sync with the packet timestamp again
this.currentTimestamp = packet.timestamp;
}
const preciseTimestamp = this.currentTimestamp;
this.currentTimestamp += preciseDuration;
const audioSample = new AudioSample({
format: this.outputFormat,
data: outputBuffer,
numberOfChannels: this.decoderConfig.numberOfChannels,
sampleRate: this.decoderConfig.sampleRate,
numberOfFrames,
timestamp: preciseTimestamp,
});
this.onSample(audioSample);
}
async flush() {
// Do nothing
}
close() {
// Do nothing
}
}
/**
* Sink for retrieving decoded audio samples from an audio track.
* @public
*/
export class AudioSampleSink extends BaseMediaSampleSink<AudioSample