mediabunny
Version:
Pure TypeScript media toolkit for reading, writing, and converting media files, directly in the browser.
1,502 lines (1,270 loc) • 103 kB
text/typescript
/*!
* Copyright (c) 2026-present, Vanilagy and contributors
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/.
*/
import { buildAacAudioSpecificConfig, parseAacAudioSpecificConfig } from '../shared/aac-misc';
import {
AUDIO_CODECS,
AudioCodec,
MediaCodec,
parsePcmCodec,
PCM_AUDIO_CODECS,
PcmAudioCodec,
SUBTITLE_CODECS,
SubtitleCodec,
VIDEO_CODECS,
VideoCodec,
} from './codec';
import { OutputAudioTrack, OutputSubtitleTrack, OutputTrack, OutputVideoTrack } from './output';
import {
assert,
assertNever,
binarySearchLessOrEqual,
CallSerializer,
clamp,
clearIntervalUnthrottled,
floorToDivisor,
last,
promiseWithResolvers,
roundToDivisor,
setInt24,
setIntervalUnthrottled,
setUint24,
toUint8Array,
UnthrottledTimerHandle,
} from './misc';
import { Muxer } from './muxer';
import { SubtitleParser } from './subtitles';
import { toAlaw, toUlaw } from './pcm';
import {
CustomVideoEncoder,
CustomAudioEncoder,
customVideoEncoders,
customAudioEncoders,
} from './custom-coder';
import { EncodedPacket, EncodedPacketSideData, PacketType } from './packet';
import {
AudioSample,
audioSampleToInterleavedFormat,
toInterleavedAudioFormat,
VideoSample,
VideoSamplePixelFormat,
} from './sample';
import {
AudioEncodingConfig,
buildAudioEncoderConfig,
buildVideoEncoderConfig,
validateAudioEncodingConfig,
validateVideoEncodingConfig,
VideoEncodingConfig,
} from './encode';
import { AudioResampler } from './resample';
import { determineVideoPacketType } from './codec-data';
/**
* Base class for media sources. Media sources are used to add media samples to an output file.
* @group Media sources
* @public
*/
export abstract class MediaSource {
/** @internal */
abstract readonly _codec: MediaCodec;
/** @internal */
_connectedTrack: OutputTrack | null = null;
/** @internal */
_closingPromise: Promise<void> | null = null;
/** @internal */
_closed = false;
/** @internal */
_ensureValidAdd() {
if (!this._connectedTrack) {
throw new Error('Source is not connected to an output track.');
}
if (this._connectedTrack.output.state === 'canceled') {
throw new Error('Output has been canceled.');
}
if (this._connectedTrack.output.state === 'finalizing' || this._connectedTrack.output.state === 'finalized') {
throw new Error('Output has been finalized.');
}
if (this._connectedTrack.output.state === 'pending') {
throw new Error('Output has not started.');
}
if (this._closed) {
throw new Error('Source is closed.');
}
}
/** @internal */
async _start() {}
/** @internal */
// eslint-disable-next-line @typescript-eslint/no-unused-vars
async _flushAndClose(forceClose: boolean) {}
/**
* Closes this source. This prevents future samples from being added and signals to the output file that no further
* samples will come in for this track. Calling `.close()` is optional but recommended after adding the
* last sample - for improved performance and reduced memory usage.
*/
close() {
if (this._closingPromise) {
return;
}
const connectedTrack = this._connectedTrack;
if (!connectedTrack) {
throw new Error('Cannot call close without connecting the source to an output track.');
}
if (connectedTrack.output.state === 'pending') {
throw new Error('Cannot call close before output has been started.');
}
this._closingPromise = (async () => {
await this._flushAndClose(false);
this._closed = true;
if (connectedTrack.output.state === 'finalizing' || connectedTrack.output.state === 'finalized') {
return;
}
connectedTrack.output._muxer.onTrackClose(connectedTrack);
})();
}
/** @internal */
async _flushOrWaitForOngoingClose(forceClose: boolean) {
return this._closingPromise ??= (async () => {
await this._flushAndClose(forceClose);
this._closed = true;
})();
}
}
/**
* Base class for video sources - sources for video tracks.
* @group Media sources
* @public
*/
export abstract class VideoSource extends MediaSource {
/** @internal */
override _connectedTrack: OutputVideoTrack | null = null;
/** @internal */
override readonly _codec: VideoCodec;
/** Internal constructor. */
constructor(codec: VideoCodec) {
super();
if (!VIDEO_CODECS.includes(codec)) {
throw new TypeError(`Invalid video codec '${codec}'. Must be one of: ${VIDEO_CODECS.join(', ')}.`);
}
this._codec = codec;
}
}
const maybeEnsureIsKeyPacket = (track: OutputVideoTrack, packet: EncodedPacket) => {
if (track.metadata.hasOnlyKeyPackets && packet.type !== 'key') {
throw new Error('Cannot add non-key packets to a hasOnlyKeyPackets video track.');
}
};
/**
* The most basic video source; can be used to directly pipe encoded packets into the output file.
* @group Media sources
* @public
*/
export class EncodedVideoPacketSource extends VideoSource {
/** Creates a new {@link EncodedVideoPacketSource} whose packets are encoded using `codec`. */
constructor(codec: VideoCodec) {
super(codec);
}
/**
* Adds an encoded packet to the output video track. Packets must be added in *decode order*, while a packet's
* timestamp must be its *presentation timestamp*. B-frames are handled automatically.
*
* @param meta - Additional metadata from the encoder. You should pass this for the first call, including a valid
* decoder config.
*
* @returns A Promise that resolves once the output is ready to receive more samples. You should await this Promise
* to respect writer and encoder backpressure.
*/
add(packet: EncodedPacket, meta?: EncodedVideoChunkMetadata) {
if (!(packet instanceof EncodedPacket)) {
throw new TypeError('packet must be an EncodedPacket.');
}
if (packet.isMetadataOnly) {
throw new TypeError('Metadata-only packets cannot be added.');
}
if (meta !== undefined && (!meta || typeof meta !== 'object')) {
throw new TypeError('meta, when provided, must be an object.');
}
this._ensureValidAdd();
maybeEnsureIsKeyPacket(this._connectedTrack!, packet);
return this._connectedTrack!.output._muxer.addEncodedVideoPacket(this._connectedTrack!, packet, meta);
}
}
class VideoEncoderWrapper {
private ensureEncoderPromise: Promise<void> | null = null;
private encoderInitialized = false;
private encoder: VideoEncoder | null = null;
private muxer: Muxer | null = null;
private lastMultipleOfKeyFrameInterval = -1;
private emittedEncoderPackets = 0;
// Tracks the input dimensions of the first frame
private codedWidth: number | null = null;
private codedHeight: number | null = null;
// Tracks the output dimensions of the first frame (used to lock dimensions for fill/contain/cover)
private outputWidth: number | null = null;
private outputHeight: number | null = null;
// Frame rate normalization state
private frameRateLastSample: VideoSample | null = null;
private frameRateLastTimestamp: number | null = null;
private frameRateLastEndTimestamp: number | null = null;
// VideoEncoder converts everything to microseconds, so we need to do some bookkeeping to restore the original
// timing information
private preciseTimings: {
microsecondTimestamp: number;
timestamp: number;
duration: number;
timestampIsValid: boolean;
durationIsValid: boolean;
}[] = [];
private customEncoder: CustomVideoEncoder | null = null;
private customEncoderCallSerializer = new CallSerializer();
private customEncoderQueueSize = 0;
// Alpha stuff
private alphaEncoder: VideoEncoder | null = null;
private splitter: ColorAlphaSplitter | null = null;
private splitterCreationFailed = false;
private alphaFrameQueue: (VideoFrame | null)[] = [];
/**
* Encoders typically throw their errors "out of band", meaning asynchronously in some other execution context.
* However, we want to surface these errors to the user within the normal control flow, so they don't go uncaught.
* So, we keep track of the encoder error and throw it as soon as we get the chance.
*/
private error: Error | null = null;
private lastMuxerPromise: Promise<void> = Promise.resolve();
constructor(private source: VideoSource, private encodingConfig: VideoEncodingConfig) {}
async add(videoSample: VideoSample, shouldClose: boolean, encodeOptions?: VideoEncoderEncodeOptions) {
const originalSample = videoSample;
try {
this.checkForEncoderError();
this.source._ensureValidAdd();
const config = this.encodingConfig;
const sizeChangeBehavior = config.sizeChangeBehavior ?? 'deny';
let isSizeChange = false;
// Ensure video sample size remains constant or handle the change
if (this.codedWidth !== null && this.codedHeight !== null) {
if (videoSample.codedWidth !== this.codedWidth || videoSample.codedHeight !== this.codedHeight) {
isSizeChange = true;
if (sizeChangeBehavior === 'deny') {
throw new Error(
`Video sample size must remain constant. Expected ${this.codedWidth}x${this.codedHeight},`
+ ` got ${videoSample.codedWidth}x${videoSample.codedHeight}. To allow the sample size to`
+ ` change over time, set \`sizeChangeBehavior\` to a value other than 'deny' in the`
+ ` encoding options.`,
);
}
}
} else {
this.codedWidth = videoSample.codedWidth;
this.codedHeight = videoSample.codedHeight;
}
// Determine if we need to apply transformations via canvas
const hasTransformConfig = config.transform?.width !== undefined
|| config.transform?.height !== undefined
|| config.transform?.rotate !== undefined
|| config.transform?.crop !== undefined
|| config.transform?.force === true;
const needsTransform = hasTransformConfig || (isSizeChange && sizeChangeBehavior !== 'passThrough');
if (needsTransform) {
let targetWidth = config.transform?.width;
let targetHeight = config.transform?.height;
let appliedFit: 'fill' | 'contain' | 'cover' = config.transform?.fit ?? 'fill';
// If the size changed and behavior is fill/contain/cover, lock to the original output dimensions
if (isSizeChange && sizeChangeBehavior !== 'passThrough') {
assert(this.outputWidth);
assert(this.outputHeight);
assert(sizeChangeBehavior !== 'deny');
targetWidth = this.outputWidth!;
targetHeight = this.outputHeight!;
appliedFit = sizeChangeBehavior;
}
const transformed = await videoSample.transform({
width: targetWidth,
height: targetHeight,
roundDimensionsTo: 2,
crop: config.transform?.crop,
rotate: config.transform?.rotate,
fit: appliedFit,
alpha: config.alpha,
});
// Save the output dimensions of the first frame
if (this.outputWidth === null || this.outputHeight === null) {
this.outputWidth = transformed.displayWidth;
this.outputHeight = transformed.displayHeight;
}
if (shouldClose) {
videoSample.close();
}
videoSample = transformed;
shouldClose = true;
} else {
// If no canvas is needed, we still need to record the output dimensions for the first frame
if (this.outputWidth === null || this.outputHeight === null) {
this.outputWidth = videoSample.codedWidth;
this.outputHeight = videoSample.codedHeight;
}
}
const frameRate = config.transform?.frameRate;
if (frameRate !== undefined) {
// Apply frame rate normalization
const originalEndTimestamp = videoSample.timestamp + videoSample.duration;
const alignedTimestamp = floorToDivisor(videoSample.timestamp, frameRate);
if (this.frameRateLastSample !== null) {
if (alignedTimestamp <= this.frameRateLastTimestamp!) {
// Same frame rate slot, replace stored sample with the newer one
this.frameRateLastSample.close();
this.frameRateLastSample = videoSample.clone();
this.frameRateLastEndTimestamp = originalEndTimestamp;
return;
} else {
// Pad the gap by repeating the previous frame
await this.padFrameRate(alignedTimestamp, encodeOptions);
}
}
// Clone if the sample is still the user's, to avoid mutating externally-owned data
if (videoSample === originalSample) {
videoSample = videoSample.clone();
shouldClose = true;
}
videoSample.setTimestamp(alignedTimestamp);
videoSample.setDuration(1 / frameRate);
this.frameRateLastSample?.close();
this.frameRateLastSample = videoSample.clone();
this.frameRateLastTimestamp = alignedTimestamp;
this.frameRateLastEndTimestamp = originalEndTimestamp;
}
await this.processAndEncode(videoSample, encodeOptions);
} finally {
if (shouldClose) {
videoSample.close();
}
}
}
/**
* Runs the process function (if any) and encodes the resulting samples.
*/
private async processAndEncode(
videoSample: VideoSample,
encodeOptions?: VideoEncoderEncodeOptions,
) {
const config = this.encodingConfig;
let samplesToEncode: VideoSample[];
// Apply the user-defined process function, if any
if (config.transform?.process) {
let processed = config.transform.process(videoSample);
if (processed instanceof Promise) {
processed = await processed;
}
if (processed === null) {
return;
}
if (!Array.isArray(processed)) {
processed = [processed];
}
samplesToEncode = processed.map((x) => {
if (x instanceof VideoSample) {
return x;
}
if (typeof VideoFrame !== 'undefined' && x instanceof VideoFrame) {
return new VideoSample(x);
}
return new VideoSample(x as CanvasImageSource, {
timestamp: videoSample.timestamp,
duration: videoSample.duration,
});
});
} else {
samplesToEncode = [videoSample];
}
try {
for (const sampleToEncode of samplesToEncode) {
if (!this.encoderInitialized) {
if (!this.ensureEncoderPromise) {
this.ensureEncoder(sampleToEncode);
}
// No, this "if" statement is not useless. Sometimes, the above call to
// `ensureEncoder` might have synchronously completed and the encoder is
// already initialized. In this case, we don't need to await the promise
// anymore. This also fixes nasty async race condition bugs when multiple
// code paths are calling this method: It's important that the call that
// initialized the encoder go through this code first.
if (!this.encoderInitialized) {
await this.ensureEncoderPromise;
}
}
assert(this.encoderInitialized);
const keyFrameInterval = this.encodingConfig.keyFrameInterval ?? 2;
const multipleOfKeyFrameInterval = Math.floor(sampleToEncode.timestamp / keyFrameInterval);
// Ensure a key frame every keyFrameInterval seconds. It is important that all video tracks
// follow the same "key frame" rhythm, because aligned key frames are required to start new
// fragments in ISOBMFF or clusters in Matroska (or at least desirable).
const finalEncodeOptions = {
...encodeOptions,
keyFrame: encodeOptions?.keyFrame
|| keyFrameInterval === 0
|| multipleOfKeyFrameInterval !== this.lastMultipleOfKeyFrameInterval,
};
this.lastMultipleOfKeyFrameInterval = multipleOfKeyFrameInterval;
if (this.customEncoder) {
this.customEncoderQueueSize++;
// We clone the sample so it cannot be closed on us from the outside before it reaches the encoder
const clonedSample = sampleToEncode.clone();
const promise = this.customEncoderCallSerializer
.call(() => this.customEncoder!.encode(clonedSample, finalEncodeOptions))
.then(() => this.customEncoderQueueSize--)
.catch((error: Error) => this.error ??= error)
.finally(() => {
clonedSample.close();
});
if (this.customEncoderQueueSize >= 4) {
await promise;
}
} else {
assert(this.encoder);
const videoFrame = sampleToEncode.toVideoFrame();
const preciseTimingIndex = binarySearchLessOrEqual(
this.preciseTimings,
videoFrame.timestamp,
x => x.microsecondTimestamp,
);
const existingEntry = preciseTimingIndex !== -1
? this.preciseTimings[preciseTimingIndex]
: null;
if (existingEntry && existingEntry.microsecondTimestamp === videoFrame.timestamp) {
if (existingEntry.timestamp !== sampleToEncode.timestamp) {
// Mapping isn't unique, can't use the timestamp
existingEntry.timestampIsValid = false;
}
if (existingEntry.duration !== sampleToEncode.duration) {
// Mapping isn't unique, can't use the duration
existingEntry.durationIsValid = false;
}
} else {
this.preciseTimings.splice(preciseTimingIndex + 1, 0, {
microsecondTimestamp: videoFrame.timestamp,
timestamp: sampleToEncode.timestamp,
duration: sampleToEncode.duration,
timestampIsValid: true,
durationIsValid: true,
});
// Make sure it doesn't grow indefinitely
if (this.preciseTimings.length > 128) {
this.preciseTimings.shift();
}
}
if (!this.alphaEncoder) {
// No alpha encoder, simple case
this.encoder.encode(videoFrame, finalEncodeOptions);
videoFrame.close();
} else {
// We're expected to encode alpha as well
const frameDefinitelyHasNoAlpha = !!videoFrame.format && !videoFrame.format.includes('A');
if (frameDefinitelyHasNoAlpha || this.splitterCreationFailed) {
this.alphaFrameQueue.push(null);
this.encoder.encode(videoFrame, finalEncodeOptions);
videoFrame.close();
} else {
const width = videoFrame.displayWidth;
const height = videoFrame.displayHeight;
if (!this.splitter) {
this.splitter = new ColorAlphaSplitter(width, height);
}
// The splitter takes ownership, so no need to close the frames ourselves
const { colorFrame, alphaFrame } = await this.splitter.update(videoFrame);
this.alphaFrameQueue.push(alphaFrame);
this.encoder.encode(colorFrame, finalEncodeOptions);
colorFrame.close();
}
}
// We need to do this after sending the frame to the encoder as the frame otherwise might be closed
if (this.encoder.encodeQueueSize >= 4) {
await new Promise(resolve =>
this.encoder!.addEventListener('dequeue', resolve, { once: true }),
);
}
}
await this.lastMuxerPromise; // Allow the writer to apply backpressure
}
} finally {
for (const sample of samplesToEncode) {
if (sample !== videoSample) {
sample.close();
}
}
}
}
/** Repeats the last frame rate sample to fill the gap up to the given timestamp. */
private async padFrameRate(until: number, encodeOptions?: VideoEncoderEncodeOptions) {
const frameRate = this.encodingConfig.transform!.frameRate!;
assert(this.frameRateLastSample);
const frameDifference = Math.round((until - this.frameRateLastTimestamp!) * frameRate);
for (let i = 1; i < frameDifference; i++) {
const sample = this.frameRateLastSample.clone();
sample.setTimestamp(this.frameRateLastTimestamp! + i / frameRate);
sample.setDuration(1 / frameRate);
await this.processAndEncode(sample, encodeOptions);
sample.close();
}
}
private ensureEncoder(videoSample: VideoSample) {
this.ensureEncoderPromise = (async () => {
const encoderConfig = buildVideoEncoderConfig({
...this.encodingConfig,
width: videoSample.codedWidth,
height: videoSample.codedHeight,
squarePixelWidth: videoSample.squarePixelWidth,
squarePixelHeight: videoSample.squarePixelHeight,
framerate: this.source._connectedTrack?.metadata.frameRate,
});
this.encodingConfig.onEncoderConfig?.(encoderConfig);
const MatchingCustomEncoder = customVideoEncoders.find(x => x.supports(
this.encodingConfig.codec,
encoderConfig,
));
if (MatchingCustomEncoder) {
// @ts-expect-error "Can't create instance of abstract class 🤓"
this.customEncoder = new MatchingCustomEncoder() as CustomVideoEncoder;
// @ts-expect-error It's technically readonly
this.customEncoder.codec = this.encodingConfig.codec;
// @ts-expect-error It's technically readonly
this.customEncoder.config = encoderConfig;
// @ts-expect-error It's technically readonly
this.customEncoder.onPacket = (packet, meta) => {
if (!(packet instanceof EncodedPacket)) {
throw new TypeError('The first argument passed to onPacket must be an EncodedPacket.');
}
if (meta !== undefined && (!meta || typeof meta !== 'object')) {
throw new TypeError('The second argument passed to onPacket must be an object or undefined.');
}
maybeEnsureIsKeyPacket(this.source._connectedTrack!, packet);
this.encodingConfig.onEncodedPacket?.(packet, meta);
this.lastMuxerPromise
= this.muxer!.addEncodedVideoPacket(this.source._connectedTrack!, packet, meta)
.catch((error) => {
this.error ??= error;
});
};
await this.customEncoder.init();
} else {
if (typeof VideoEncoder === 'undefined') {
throw new Error('VideoEncoder is not supported by this browser.');
}
encoderConfig.alpha = 'discard'; // Since we handle alpha ourselves
if (this.encodingConfig.alpha === 'keep') {
// Encoding alpha requires using two parallel encoders, so we need to make sure they stay in sync
// and that neither of them drops frames. Setting latencyMode to 'quality' achieves this, because
// "User Agents MUST not drop frames to achieve the target bitrate and/or framerate."
encoderConfig.latencyMode = 'quality';
}
const hasOddDimension = encoderConfig.width % 2 === 1 || encoderConfig.height % 2 === 1;
if (
hasOddDimension
&& (this.encodingConfig.codec === 'avc' || this.encodingConfig.codec === 'hevc')
) {
// Throw a special error for this case as it gets hit often
throw new Error(
`The dimensions ${encoderConfig.width}x${encoderConfig.height} are not supported for codec`
+ ` '${this.encodingConfig.codec}'; both width and height must be even numbers. Make sure to`
+ ` round your dimensions to the nearest even number.`,
);
}
const support = await VideoEncoder.isConfigSupported(encoderConfig);
if (!support.supported) {
throw new Error(
`This specific encoder configuration (${encoderConfig.codec}, ${encoderConfig.bitrate} bps,`
+ ` ${encoderConfig.width}x${encoderConfig.height}, hardware acceleration:`
+ ` ${encoderConfig.hardwareAcceleration ?? 'no-preference'}) is not supported by this browser.`
+ ` Consider using another codec or changing your video parameters.`,
);
}
/** Queue of color chunks waiting for their alpha counterpart. */
const colorChunkQueue: {
chunk: EncodedVideoChunk;
meta: EncodedVideoChunkMetadata | undefined;
}[] = [];
/** Each value is the number of encoded alpha chunks at which a null alpha chunk should be added. */
const nullAlphaChunkQueue: number[] = [];
let encodedAlphaChunkCount = 0;
let alphaEncoderQueue = 0;
const addPacket = (
colorChunk: EncodedVideoChunk,
alphaChunk: EncodedVideoChunk | null,
meta: EncodedVideoChunkMetadata | undefined,
) => {
const sideData: EncodedPacketSideData = {};
if (alphaChunk) {
const alphaData = new Uint8Array(alphaChunk.byteLength);
alphaChunk.copyTo(alphaData);
sideData.alpha = alphaData;
}
let packet = EncodedPacket.fromEncodedChunk(colorChunk, sideData);
// See if there's a relevant timing entry to refine the packet's timing data
const preciseTimingIndex = binarySearchLessOrEqual(
this.preciseTimings,
colorChunk.timestamp,
x => x.microsecondTimestamp,
);
const entry = preciseTimingIndex !== -1
? this.preciseTimings[preciseTimingIndex]
: null;
let actualType: PacketType | null = null;
if (this.emittedEncoderPackets === 0 && packet.type === 'delta' && meta?.decoderConfig) {
// https://github.com/Vanilagy/mediabunny/issues/365
// We expect the first packet to be a key packet. If it's not, let's actually verify that it's
// not by getting the actual type.
actualType = determineVideoPacketType(
this.encodingConfig.codec,
meta.decoderConfig,
packet.data,
);
}
// Define the packet
if ((entry && entry.microsecondTimestamp === colorChunk.timestamp) || actualType !== null) {
packet = packet.clone({
timestamp: entry?.timestampIsValid ? entry.timestamp : undefined,
duration: entry?.durationIsValid ? entry.duration : undefined,
type: actualType ?? undefined,
});
}
maybeEnsureIsKeyPacket(this.source._connectedTrack!, packet);
this.encodingConfig.onEncodedPacket?.(packet, meta);
this.lastMuxerPromise
= this.muxer!.addEncodedVideoPacket(this.source._connectedTrack!, packet, meta)
.catch((error) => {
this.error ??= error;
});
this.emittedEncoderPackets++;
};
const stack = new Error('Encoding error').stack;
this.encoder = new VideoEncoder({
output: (chunk, meta) => {
if (!this.alphaEncoder) {
// We're done
addPacket(chunk, null, meta);
return;
}
const alphaFrame = this.alphaFrameQueue.shift();
assert(alphaFrame !== undefined);
if (alphaFrame) {
this.alphaEncoder.encode(alphaFrame, {
// Crucial: The alpha frame is forced to be a key frame whenever the color frame
// also is. Without this, playback can glitch and even crash in some browsers.
// This is the reason why the two encoders are wired in series and not in parallel.
keyFrame: chunk.type === 'key',
});
alphaEncoderQueue++;
alphaFrame.close();
colorChunkQueue.push({ chunk, meta });
} else {
// There was no alpha component for this frame
if (alphaEncoderQueue === 0) {
// No pending alpha encodes either, so we're done
addPacket(chunk, null, meta);
} else {
// There are still alpha encodes pending, so we can't add the packet immediately since
// we'd end up with out-of-order packets. Instead, let's queue a null alpha chunk to be
// added in the future, after the current encoder workload has completed:
nullAlphaChunkQueue.push(encodedAlphaChunkCount + alphaEncoderQueue);
colorChunkQueue.push({ chunk, meta });
}
}
},
error: (error) => {
error.stack = stack; // Provide a more useful stack trace, the default one sucks
this.error ??= error;
},
});
this.encoder.configure(encoderConfig);
if (this.encodingConfig.alpha === 'keep') {
const stack = new Error('Encoding error').stack;
// We need to encode alpha as well, which we do with a separate encoder
this.alphaEncoder = new VideoEncoder({
// We ignore the alpha chunk's metadata
// eslint-disable-next-line @typescript-eslint/no-unused-vars
output: (chunk, meta) => {
alphaEncoderQueue--;
// There has to be a color chunk because the encoders are wired in series
const colorChunk = colorChunkQueue.shift();
assert(colorChunk !== undefined);
addPacket(colorChunk.chunk, chunk, colorChunk.meta);
// See if there are any null alpha chunks queued up
encodedAlphaChunkCount++;
while (
nullAlphaChunkQueue.length > 0
&& nullAlphaChunkQueue[0] === encodedAlphaChunkCount
) {
nullAlphaChunkQueue.shift();
const colorChunk = colorChunkQueue.shift();
assert(colorChunk !== undefined);
addPacket(colorChunk.chunk, null, colorChunk.meta);
}
},
error: (error) => {
error.stack = stack; // Provide a more useful stack trace
this.error ??= error;
},
});
this.alphaEncoder.configure(encoderConfig);
}
}
assert(this.source._connectedTrack);
this.muxer = this.source._connectedTrack.output._muxer;
this.encoderInitialized = true;
})();
}
async flushAndClose(forceClose: boolean) {
if (!forceClose) {
this.checkForEncoderError();
}
// Final frame rate padding: fill remaining frames up to the last sample's original end timestamp
if (!forceClose && this.frameRateLastSample) {
const frameRate = this.encodingConfig.transform!.frameRate!;
const alignedEnd = floorToDivisor(this.frameRateLastEndTimestamp!, frameRate);
await this.padFrameRate(alignedEnd);
}
this.frameRateLastSample?.close();
this.frameRateLastSample = null;
if (this.customEncoder) {
if (!forceClose) {
void this.customEncoderCallSerializer.call(() => this.customEncoder!.flush());
}
await this.customEncoderCallSerializer.call(() => this.customEncoder!.close());
} else if (this.encoder) {
if (!forceClose) {
// These are wired in series, therefore they must also be flushed in series
await this.encoder.flush();
await this.alphaEncoder?.flush();
}
if (this.encoder.state !== 'closed') {
this.encoder.close();
}
if (this.alphaEncoder && this.alphaEncoder.state !== 'closed') {
this.alphaEncoder.close();
}
this.alphaFrameQueue.forEach(x => x?.close());
this.splitter?.close();
}
if (!forceClose) {
this.checkForEncoderError();
}
}
getQueueSize() {
if (this.customEncoder) {
return this.customEncoderQueueSize;
} else {
// Because the color and alpha encoders are wired in series, there's no need to also include the alpha
// encoder's queue size here
return this.encoder?.encodeQueueSize ?? 0;
}
}
checkForEncoderError() {
if (this.error) {
throw this.error;
}
}
}
let splitterGpuUnavailable = false;
/** Utility class for splitting a composite frame into separate color and alpha components. */
export class ColorAlphaSplitter {
static forceCpu = true;
canvas: OffscreenCanvas | HTMLCanvasElement | null = null;
private gl: WebGL2RenderingContext | null = null;
private colorProgram: WebGLProgram | null = null;
private alphaProgram: WebGLProgram | null = null;
private vao: WebGLVertexArrayObject | null = null;
private sourceTexture: WebGLTexture | null = null;
private alphaResolutionLocation: WebGLUniformLocation | null = null;
private worker: Worker | null = null;
private pendingRequests = new Map<
number,
ReturnType<typeof promiseWithResolvers<{ colorFrame: VideoFrame; alphaFrame: VideoFrame }>>
>();
private nextRequestId = 0;
constructor(initialWidth: number, initialHeight: number) {
const canMakeCanvas = typeof OffscreenCanvas !== 'undefined'
// eslint-disable-next-line @typescript-eslint/no-deprecated
|| (typeof document !== 'undefined' && typeof document.createElement === 'function');
if (!ColorAlphaSplitter.forceCpu && canMakeCanvas && !splitterGpuUnavailable) {
// Try the GPU path. If anything goes wrong, we silently fall back to the CPU path.
try {
if (typeof OffscreenCanvas !== 'undefined') {
this.canvas = new OffscreenCanvas(initialWidth, initialHeight);
} else {
this.canvas = document.createElement('canvas');
this.canvas.width = initialWidth;
this.canvas.height = initialHeight;
}
const gl = this.canvas.getContext('webgl2', {
alpha: true, // Needed due to the YUV thing we do for alpha
}) as unknown as WebGL2RenderingContext | null; // Casting because of some TypeScript weirdness
if (!gl) {
throw new Error('Couldn\'t acquire WebGL 2 context.');
}
this.gl = gl;
this.colorProgram = this.createColorProgram();
this.alphaProgram = this.createAlphaProgram();
this.vao = this.createVAO();
this.sourceTexture = this.createTexture();
this.alphaResolutionLocation = this.gl.getUniformLocation(this.alphaProgram, 'u_resolution')!;
this.gl.useProgram(this.colorProgram);
this.gl.uniform1i(this.gl.getUniformLocation(this.colorProgram, 'u_sourceTexture'), 0);
this.gl.useProgram(this.alphaProgram);
this.gl.uniform1i(this.gl.getUniformLocation(this.alphaProgram, 'u_sourceTexture'), 0);
} catch (error) {
this.gl = null;
this.canvas = null;
splitterGpuUnavailable = true;
console.warn('Falling back to CPU for color/alpha splitting.', error);
}
}
}
async update(sourceFrame: VideoFrame) {
if (this.gl) {
return this.updateGpu(sourceFrame);
} else {
return this.updateCpu(sourceFrame);
}
}
private updateGpu(sourceFrame: VideoFrame) {
assert(this.gl);
assert(this.canvas);
if (sourceFrame.displayWidth !== this.canvas.width || sourceFrame.displayHeight !== this.canvas.height) {
this.canvas.width = sourceFrame.displayWidth;
this.canvas.height = sourceFrame.displayHeight;
}
this.gl.activeTexture(this.gl.TEXTURE0);
this.gl.bindTexture(this.gl.TEXTURE_2D, this.sourceTexture);
this.gl.texImage2D(this.gl.TEXTURE_2D, 0, this.gl.RGBA, this.gl.RGBA, this.gl.UNSIGNED_BYTE, sourceFrame);
const colorFrame = this.runColorProgram(sourceFrame);
const alphaFrame = this.runAlphaProgram(sourceFrame);
sourceFrame.close();
return { colorFrame, alphaFrame };
}
private createVertexShader(): WebGLShader {
assert(this.gl);
return this.createShader(this.gl.VERTEX_SHADER, `#version 300 es
in vec2 a_position;
in vec2 a_texCoord;
out vec2 v_texCoord;
void main() {
gl_Position = vec4(a_position, 0.0, 1.0);
v_texCoord = a_texCoord;
}
`);
}
private createColorProgram(): WebGLProgram {
assert(this.gl);
const vertexShader = this.createVertexShader();
// This shader is simple, simply copy the color information while setting alpha to 1
const fragmentShader = this.createShader(this.gl.FRAGMENT_SHADER, `#version 300 es
precision highp float;
uniform sampler2D u_sourceTexture;
in vec2 v_texCoord;
out vec4 fragColor;
void main() {
vec4 source = texture(u_sourceTexture, v_texCoord);
fragColor = vec4(source.rgb, 1.0);
}
`);
const program = this.gl.createProgram();
this.gl.attachShader(program, vertexShader);
this.gl.attachShader(program, fragmentShader);
this.gl.linkProgram(program);
return program;
}
private createAlphaProgram(): WebGLProgram {
assert(this.gl);
const vertexShader = this.createVertexShader();
// This shader's more complex. The main reason is that this shader writes data in I420 (yuv420) pixel format
// instead of regular RGBA. In other words, we use the shader to write out I420 data into an RGBA canvas, which
// we then later read out with JavaScript. The reason being that browsers weirdly encode canvases and mess up
// the color spaces, and the only way to have full control over the color space is by outputting YUV data
// directly (avoiding the RGB conversion). Doing this conversion in JS is painfully slow, so let's utlize the
// GPU since we're already calling it anyway.
const fragmentShader = this.createShader(this.gl.FRAGMENT_SHADER, `#version 300 es
precision highp float;
uniform sampler2D u_sourceTexture;
uniform vec2 u_resolution; // The width and height of the canvas
in vec2 v_texCoord;
out vec4 fragColor;
// This function determines the value for a single byte in the YUV stream
float getByteValue(float byteOffset) {
float width = u_resolution.x;
float height = u_resolution.y;
float yPlaneSize = width * height;
if (byteOffset < yPlaneSize) {
// This byte is in the luma plane. Find the corresponding pixel coordinates to sample from
float y = floor(byteOffset / width);
float x = mod(byteOffset, width);
// Add 0.5 to sample the center of the texel
vec2 sampleCoord = (vec2(x, y) + 0.5) / u_resolution;
// The luma value is the alpha from the source texture
return texture(u_sourceTexture, sampleCoord).a;
} else {
// Write a fixed value for chroma and beyond
return 128.0 / 255.0;
}
}
void main() {
// Each fragment writes 4 bytes (R, G, B, A)
float pixelIndex = floor(gl_FragCoord.y) * u_resolution.x + floor(gl_FragCoord.x);
float baseByteOffset = pixelIndex * 4.0;
vec4 result;
for (int i = 0; i < 4; i++) {
float currentByteOffset = baseByteOffset + float(i);
result[i] = getByteValue(currentByteOffset);
}
fragColor = result;
}
`);
const program = this.gl.createProgram();
this.gl.attachShader(program, vertexShader);
this.gl.attachShader(program, fragmentShader);
this.gl.linkProgram(program);
return program;
}
private createShader(type: number, source: string): WebGLShader {
assert(this.gl);
const shader = this.gl.createShader(type)!;
this.gl.shaderSource(shader, source);
this.gl.compileShader(shader);
if (!this.gl.getShaderParameter(shader, this.gl.COMPILE_STATUS)) {
console.error('Shader compile error:', this.gl.getShaderInfoLog(shader));
}
return shader;
}
private createVAO(): WebGLVertexArrayObject {
assert(this.gl);
assert(this.colorProgram);
const vao = this.gl.createVertexArray();
this.gl.bindVertexArray(vao);
const vertices = new Float32Array([
-1, -1, 0, 1,
1, -1, 1, 1,
-1, 1, 0, 0,
1, 1, 1, 0,
]);
const buffer = this.gl.createBuffer();
this.gl.bindBuffer(this.gl.ARRAY_BUFFER, buffer);
this.gl.bufferData(this.gl.ARRAY_BUFFER, vertices, this.gl.STATIC_DRAW);
const positionLocation = this.gl.getAttribLocation(this.colorProgram, 'a_position');
const texCoordLocation = this.gl.getAttribLocation(this.colorProgram, 'a_texCoord');
this.gl.enableVertexAttribArray(positionLocation);
this.gl.vertexAttribPointer(positionLocation, 2, this.gl.FLOAT, false, 16, 0);
this.gl.enableVertexAttribArray(texCoordLocation);
this.gl.vertexAttribPointer(texCoordLocation, 2, this.gl.FLOAT, false, 16, 8);
return vao;
}
private createTexture(): WebGLTexture {
assert(this.gl);
const texture = this.gl.createTexture();
this.gl.bindTexture(this.gl.TEXTURE_2D, texture);
this.gl.texParameteri(this.gl.TEXTURE_2D, this.gl.TEXTURE_WRAP_S, this.gl.CLAMP_TO_EDGE);
this.gl.texParameteri(this.gl.TEXTURE_2D, this.gl.TEXTURE_WRAP_T, this.gl.CLAMP_TO_EDGE);
this.gl.texParameteri(this.gl.TEXTURE_2D, this.gl.TEXTURE_MIN_FILTER, this.gl.LINEAR);
this.gl.texParameteri(this.gl.TEXTURE_2D, this.gl.TEXTURE_MAG_FILTER, this.gl.LINEAR);
return texture;
}
private runColorProgram(sourceFrame: VideoFrame) {
assert(this.gl);
assert(this.canvas);
this.gl.useProgram(this.colorProgram);
this.gl.viewport(0, 0, this.canvas.width, this.canvas.height);
this.gl.clear(this.gl.COLOR_BUFFER_BIT);
this.gl.bindVertexArray(this.vao);
this.gl.drawArrays(this.gl.TRIANGLE_STRIP, 0, 4);
return new VideoFrame(this.canvas, {
timestamp: sourceFrame.timestamp,
duration: sourceFrame.duration ?? undefined,
alpha: 'discard',
});
}
private runAlphaProgram(sourceFrame: VideoFrame) {
assert(this.gl);
assert(this.canvas);
this.gl.useProgram(this.alphaProgram);
this.gl.uniform2f(this.alphaResolutionLocation, this.canvas.width, this.canvas.height);
this.gl.viewport(0, 0, this.canvas.width, this.canvas.height);
this.gl.clear(this.gl.COLOR_BUFFER_BIT);
this.gl.bindVertexArray(this.vao);
this.gl.drawArrays(this.gl.TRIANGLE_STRIP, 0, 4);
const { width, height } = this.canvas;
const chromaSamples = Math.ceil(width / 2) * Math.ceil(height / 2);
const yuvSize = width * height + chromaSamples * 2;
const requiredHeight = Math.ceil(yuvSize / (width * 4));
let yuv = new Uint8Array(4 * width * requiredHeight);
this.gl.readPixels(0, 0, width, requiredHeight, this.gl.RGBA, this.gl.UNSIGNED_BYTE, yuv);
yuv = yuv.subarray(0, yuvSize);
assert(yuv[width * height] === 128); // Where chroma data starts
assert(yuv[yuv.length - 1] === 128); // Assert the YUV data has been fully written
// Defining this separately because TypeScript doesn't know `transfer` and I can't be bothered to do declaration
// merging right now
const init = {
format: 'I420' as const,
codedWidth: width,
codedHeight: height,
timestamp: sourceFrame.timestamp,
duration: sourceFrame.duration ?? undefined,
transfer: [yuv.buffer],
};
return new VideoFrame(yuv, init);
}
private updateCpu(sourceFrame: VideoFrame): Promise<{ colorFrame: VideoFrame; alphaFrame: VideoFrame }> {
if (!this.worker) {
const blob = new Blob(
[`(${colorAlphaSplitterWorkerCode.toString()})()`],
{ type: 'application/javascript' },
);
const url = URL.createObjectURL(blob);
this.worker = new Worker(url);
URL.revokeObjectURL(url);
this.worker.addEventListener('message', (event: MessageEvent<ColorAlphaSplitterWorkerResponse>) => {
const data = event.data;
const pending = this.pendingRequests.get(data.id);
if (!pending) {
return;
}
this.pendingRequests.delete(data.id);
if ('error' in data) {
pending.reject(new Error(data.error));
} else {
pending.resolve({ colorFrame: data.colorFrame, alphaFrame: data.alphaFrame });
}
});
this.worker.addEventListener('error', (event) => {
const error = new Error(event.message || 'Color/alpha splitter worker error.');
for (const pending of this.pendingRequests.values()) {
pending.reject(error);
}
this.pendingRequests.clear();
});
}
const id = this.nextRequestId++;
const pending = promiseWithResolvers<{ colorFrame: VideoFrame; alphaFrame: VideoFrame }>();
this.pendingRequests.set(id, pending);
this.worker.postMessage({ id, sourceFrame }, { transfer: [sourceFrame] });
return pending.promise;
}
close() {
this.gl?.getExtension('WEBGL_lose_context')?.loseContext();
this.gl = null;
this.canvas = null;
this.worker?.terminate();
this.worker = null;
const error = new Error('Color/alpha splitter closed.');
for (const pending of this.pendingRequests.values()) {
pending.reject(error);
}
this.pendingRequests.clear();
}
}
type ColorAlphaSplitterWorkerRequest = {
id: number;
sourceFrame: VideoFrame;
};
type ColorAlphaSplitterWorkerResponse =
| { id: number; colorFrame: VideoFrame; alphaFrame: VideoFrame }
| { id: number; error: string };
const colorAlphaSplitterWorkerCode = () => {
// Reused across frames as long as the size matches, since consecutive frames usually share dimensions.
let cpuSourceBuffer: Uint8Array | null = null;
// Serialize execution internally so concurrent requests don't race on the shared cpuSourceBuffer.
let chain: Promise<void> = Promise.resolve();
self.addEventListener('message', (event: MessageEvent<ColorAlphaSplitterWorkerRequest>) => {
const { id, sourceFrame } = event.data;
chain = chain.then(async () => {
try {
const { colorFrame, alphaFrame } = await split(sourceFrame);
self.postMessage({ id, colorFrame, alphaFrame }, { transfer: [colorFrame, alphaFrame] });
} catch (error) {
self.postMessage({ id, error: (error as Error).message });
} finally {
sourceFrame.close();
}
});
});
const split = async (sourceFrame: VideoFrame) => {
const format = sourceFrame.format as VideoSamplePixelFormat | null;
if (!format) {
throw new Error('CPU color/alpha splitting requires a known VideoFrame format.');
}
const width = sourceFrame.codedWidth;
const height = sourceFrame.codedHeight;
const sourceSize = sourceFrame.allocationSize();
if (!cpuSourceBuffer || cpuSourceBuffer.byteLength !== sourceSize) {
cpuSourceBuffer = new Uint8Array(sourceSize);
}
await sourceFrame.copyTo(cpuSourceBuffer);
if (format === 'RGBA' || format === 'BGRA') {
return splitInterleavedRgba(cpuSourceBuffer, width, height, format, sourceFrame);
} else if (
format === 'I420A' || format === 'I420AP10' || format === 'I420AP12'
|| format === 'I422A' || format === 'I422AP10' || format === 'I422AP12'
|| format === 'I444A' || format === 'I444AP10' || format === 'I444AP12'
) {
return splitPlanarYuvA(cpuSourceBuffer, width, height, format, sourceFrame);
}
throw new Error(`CPU color/alpha splitting does not support format '${format}'.`);
};
const splitInterleavedRgba = (
source: Uint8Array,
width: number,
height: number,
format: 'RGBA' | 'BGRA',
sourceFrame: VideoFrame,
) => {
const pixelCount = width * height;
const chromaW = Math.ceil(width / 2);
const chromaH = Math.ceil(height / 2);
const alphaSize = pixelCount + chromaW * chromaH * 2;
// Encode alpha as I420: Y = source A bytes, UV = 128
const alphaBuffer = new Uint8Array(alphaSize);
for (let i = 0, j = 3; i < pixelCount; i++, j += 4) {
alphaBuffer[i] = source[j]!;
}
alphaBuffer.fill(128, pixelCount);
// Hand the source buffer straight to VideoFrame as RGBX/BGRX so the A bytes are ignored
const colorFrame = new VideoFrame(source, {
format: format === 'RGBA' ? 'RGBX' : 'BGRX',
codedWidth: width,
codedHeight: height,
timestamp: sourceFrame.timestamp,
duration: sourceFrame.duration ?? undefined,
// No transfer!
});
const alphaInit = {
format: 'I420' as const,
codedWidth: width,
codedHeight: height,
timestamp: sourceFrame.timestamp,
duration: sourceFrame.duration ?? undefined,
transfer: [alphaBuffer.buffer],
};
const alphaFrame = new VideoFrame(alphaBuffer, alphaInit);
return { colorFrame, alphaFrame };
};
const splitPlanarYuvA = (
source: Uint8Array,
width: number,
height: number,
format:
| 'I420A' | 'I420AP10' | 'I420AP12'
| 'I422A' | 'I422AP10' | 'I422AP12'
| 'I444A' | 'I444AP10' | 'I444AP12',
sourceFrame: VideoFrame,
) => {
const is10 = format.includes('P10');
const is12 = format.includes('P12');
const bytesPerSample = (is10 || is12) ? 2 : 1;
let chromaW: number;
let chromaH: number;
if (format.startsWith('I420')) {
chromaW = Math.ceil(width / 2);
chromaH = Math.ceil(height / 2);
} else if (format.startsWith('I422')) {
chromaW = Math.ceil(width / 2);
chromaH = height;
} else {
chromaW = width;
chromaH = height;
}
const ySamples = width * height;
const uvSamples = chromaW * chromaH;
const yBytes = ySamples * bytesPerSample;
const uvBytes = uvSamples * bytesPerSample;
const aBytes = ySamples * bytesPerSample;
const colorBytes = yBytes + uvBytes * 2;
const colorFormat = format.replace('A', '') as VideoPixelFormat;
const alphaChromaW = Math.ceil(width / 2);
const alphaChromaH = Math.ceil(height / 2);
const alphaUvSamples = alphaChromaW * alphaChromaH;
const alphaUvBytes = alphaUvSamples * bytesPerSample;
const alphaSize = aBytes + 2 * alphaUvBytes;
const alphaBuffer = new Uint8Array(alphaSize);
const aPlaneStart = colorBytes;
alphaBuffer.set(source.subarray(aPlaneStart, aPlaneStart + aBytes), 0);
// Fill UV planes with the neutral chroma value
const uvOffset = aBytes;
const neutralChroma = is10 ? 512 : (is12 ? 2048 : 128);
if (bytesPerSample === 1) {
alphaBuffer.fill(neutralChroma, uvOffset);
} else {
const uvView = new Uint16Array(alphaBuffer.buffer, uvOffset, 2 * alphaUvSamples);
uvView.fill(neutralChroma);
}
const alphaFormat = (is10 ? 'I420P10' : (is12 ? 'I420P12' : 'I420')) as VideoPixelFormat;
// Color frame is simply a prefix of the combined bytes
const colorFrame = new VideoFrame(source.subarray(0, colorBytes), {
format: colorFormat,
codedWidth: width,
codedHeight: height,
timestamp: sourceFrame.timestamp,
duration: sourceFrame.duration ?? undefined,
});
const alphaInit = {
format: alphaFormat,
codedWidth: width,
codedHeight: height,
timestamp: sourceFrame.timestamp,
duration: sourceFrame.duration ?? undefined,
transfer: [alphaBuffer.buffer],
};
const alphaFrame = new VideoFrame(alphaBuffer, alphaInit);
return { colorFrame, alphaFrame };
};
};
/**
* This source can be used to add raw, unencoded video samples (frames) to an output video track. These frames will
* automatically be encoded and then piped into the output.
* @group Media sources
* @public
*/
export class VideoSampleSource extends VideoSource {
/** @internal */
private _encoder: VideoEncoderWrapper;
/**
* Creates a new {@link VideoSampleSource} whose samples are encoded according to the specified
* {@link VideoEncodingConfig}.
*/
constructor(encodingConfig: VideoEncodingConfig) {
validateVideoEncodingConfig(encodingConfig);
super(encodingConfig.codec);
this._encoder = new VideoEncoderWrapper(this, encodingConfig);
}
/**
* Encodes a video sample (frame) and then adds it to the output.
*
* @returns A Promise that resolves once the output is ready to receive more samples. You should await this Promise
* to respect writer and encoder backpressure.
*/
add(videoSample: VideoSample, encodeOptions?: VideoEncoderEncodeOptions) {
if (!(videoSample instanceof VideoSample)) {
throw new TypeError('videoSample must be a VideoSample.');
}
return this._encoder.add(videoSample, false, encodeOptions);
}
/** @internal */
override _flushAndClose(forceClose: boolean) {
return this._encoder.flushAndClose(forceClose);
}
}
/**
* This source can be used to add video frames to the output track from a fixed canvas element. Since canvases are often
* used for rendering, this source provides a convenient wrapper around {@link VideoSampleSource}.
* @group Media sources
* @public
*/
export class CanvasSource extends VideoSource {
/** @internal */
private _encoder: VideoEncoderWrapper;
/** @internal */
private _canvas: HTMLCanvasElement | OffscreenCanvas;
/**
* Creates a new {@link CanvasSource} from a canvas element or `OffscreenCanvas` whose samples are encoded
* according to the specified {@link VideoEncodingConfig}.
*/
constructor(canvas: HTMLCanvasElement | OffscreenCanvas, encodingConfig: VideoEncodingConfig) {
if (
!(typeof HTMLCanvasElement !== 'undefined' && canvas instanceof HTMLCanvasElement)
&&