mediabunny
Version:
Pure TypeScript media toolkit for reading, writing, and converting media files, directly in the browser.
1,025 lines (1,013 loc) • 116 kB
JavaScript
/*!
* Copyright (c) 2026-present, Vanilagy and contributors
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/.
*/
import { buildAacAudioSpecificConfig, parseAacAudioSpecificConfig } from '../shared/aac-misc.js';
import { AUDIO_CODECS, parsePcmCodec, PCM_AUDIO_CODECS, SUBTITLE_CODECS, VIDEO_CODECS, } from './codec.js';
import { assert, assertNever, binarySearchLessOrEqual, CallSerializer, clamp, clearIntervalUnthrottled, floorToDivisor, last, promiseWithResolvers, roundToDivisor, setInt24, setIntervalUnthrottled, setUint24, toUint8Array, } from './misc.js';
import { SubtitleParser } from './subtitles.js';
import { toAlaw, toUlaw } from './pcm.js';
import { customVideoEncoders, customAudioEncoders, } from './custom-coder.js';
import { EncodedPacket } from './packet.js';
import { AudioSample, audioSampleToInterleavedFormat, toInterleavedAudioFormat, VideoSample, } from './sample.js';
import { buildAudioEncoderConfig, buildVideoEncoderConfig, validateAudioEncodingConfig, validateVideoEncodingConfig, } from './encode.js';
import { AudioResampler } from './resample.js';
import { determineVideoPacketType } from './codec-data.js';
/**
* Base class for media sources. Media sources are used to add media samples to an output file.
* @group Media sources
* @public
*/
export class MediaSource {
constructor() {
/** @internal */
this._connectedTrack = null;
/** @internal */
this._closingPromise = null;
/** @internal */
this._closed = false;
}
/** @internal */
_ensureValidAdd() {
if (!this._connectedTrack) {
throw new Error('Source is not connected to an output track.');
}
if (this._connectedTrack.output.state === 'canceled') {
throw new Error('Output has been canceled.');
}
if (this._connectedTrack.output.state === 'finalizing' || this._connectedTrack.output.state === 'finalized') {
throw new Error('Output has been finalized.');
}
if (this._connectedTrack.output.state === 'pending') {
throw new Error('Output has not started.');
}
if (this._closed) {
throw new Error('Source is closed.');
}
}
/** @internal */
async _start() { }
/** @internal */
// eslint-disable-next-line @typescript-eslint/no-unused-vars
async _flushAndClose(forceClose) { }
/**
* Closes this source. This prevents future samples from being added and signals to the output file that no further
* samples will come in for this track. Calling `.close()` is optional but recommended after adding the
* last sample - for improved performance and reduced memory usage.
*/
close() {
if (this._closingPromise) {
return;
}
const connectedTrack = this._connectedTrack;
if (!connectedTrack) {
throw new Error('Cannot call close without connecting the source to an output track.');
}
if (connectedTrack.output.state === 'pending') {
throw new Error('Cannot call close before output has been started.');
}
this._closingPromise = (async () => {
await this._flushAndClose(false);
this._closed = true;
if (connectedTrack.output.state === 'finalizing' || connectedTrack.output.state === 'finalized') {
return;
}
connectedTrack.output._muxer.onTrackClose(connectedTrack);
})();
}
/** @internal */
async _flushOrWaitForOngoingClose(forceClose) {
return this._closingPromise ??= (async () => {
await this._flushAndClose(forceClose);
this._closed = true;
})();
}
}
/**
* Base class for video sources - sources for video tracks.
* @group Media sources
* @public
*/
export class VideoSource extends MediaSource {
/** Internal constructor. */
constructor(codec) {
super();
/** @internal */
this._connectedTrack = null;
if (!VIDEO_CODECS.includes(codec)) {
throw new TypeError(`Invalid video codec '${codec}'. Must be one of: ${VIDEO_CODECS.join(', ')}.`);
}
this._codec = codec;
}
}
const maybeEnsureIsKeyPacket = (track, packet) => {
if (track.metadata.hasOnlyKeyPackets && packet.type !== 'key') {
throw new Error('Cannot add non-key packets to a hasOnlyKeyPackets video track.');
}
};
/**
* The most basic video source; can be used to directly pipe encoded packets into the output file.
* @group Media sources
* @public
*/
export class EncodedVideoPacketSource extends VideoSource {
/** Creates a new {@link EncodedVideoPacketSource} whose packets are encoded using `codec`. */
constructor(codec) {
super(codec);
}
/**
* Adds an encoded packet to the output video track. Packets must be added in *decode order*, while a packet's
* timestamp must be its *presentation timestamp*. B-frames are handled automatically.
*
* @param meta - Additional metadata from the encoder. You should pass this for the first call, including a valid
* decoder config.
*
* @returns A Promise that resolves once the output is ready to receive more samples. You should await this Promise
* to respect writer and encoder backpressure.
*/
add(packet, meta) {
if (!(packet instanceof EncodedPacket)) {
throw new TypeError('packet must be an EncodedPacket.');
}
if (packet.isMetadataOnly) {
throw new TypeError('Metadata-only packets cannot be added.');
}
if (meta !== undefined && (!meta || typeof meta !== 'object')) {
throw new TypeError('meta, when provided, must be an object.');
}
this._ensureValidAdd();
maybeEnsureIsKeyPacket(this._connectedTrack, packet);
return this._connectedTrack.output._muxer.addEncodedVideoPacket(this._connectedTrack, packet, meta);
}
}
class VideoEncoderWrapper {
constructor(source, encodingConfig) {
this.source = source;
this.encodingConfig = encodingConfig;
this.ensureEncoderPromise = null;
this.encoderInitialized = false;
this.encoder = null;
this.muxer = null;
this.lastMultipleOfKeyFrameInterval = -1;
this.emittedEncoderPackets = 0;
// Tracks the input dimensions of the first frame
this.codedWidth = null;
this.codedHeight = null;
// Tracks the output dimensions of the first frame (used to lock dimensions for fill/contain/cover)
this.outputWidth = null;
this.outputHeight = null;
// Frame rate normalization state
this.frameRateLastSample = null;
this.frameRateLastTimestamp = null;
this.frameRateLastEndTimestamp = null;
// VideoEncoder converts everything to microseconds, so we need to do some bookkeeping to restore the original
// timing information
this.preciseTimings = [];
this.customEncoder = null;
this.customEncoderCallSerializer = new CallSerializer();
this.customEncoderQueueSize = 0;
// Alpha stuff
this.alphaEncoder = null;
this.splitter = null;
this.splitterCreationFailed = false;
this.alphaFrameQueue = [];
/**
* Encoders typically throw their errors "out of band", meaning asynchronously in some other execution context.
* However, we want to surface these errors to the user within the normal control flow, so they don't go uncaught.
* So, we keep track of the encoder error and throw it as soon as we get the chance.
*/
this.error = null;
this.lastMuxerPromise = Promise.resolve();
}
async add(videoSample, shouldClose, encodeOptions) {
const originalSample = videoSample;
try {
this.checkForEncoderError();
this.source._ensureValidAdd();
const config = this.encodingConfig;
const sizeChangeBehavior = config.sizeChangeBehavior ?? 'deny';
let isSizeChange = false;
// Ensure video sample size remains constant or handle the change
if (this.codedWidth !== null && this.codedHeight !== null) {
if (videoSample.codedWidth !== this.codedWidth || videoSample.codedHeight !== this.codedHeight) {
isSizeChange = true;
if (sizeChangeBehavior === 'deny') {
throw new Error(`Video sample size must remain constant. Expected ${this.codedWidth}x${this.codedHeight},`
+ ` got ${videoSample.codedWidth}x${videoSample.codedHeight}. To allow the sample size to`
+ ` change over time, set \`sizeChangeBehavior\` to a value other than 'deny' in the`
+ ` encoding options.`);
}
}
}
else {
this.codedWidth = videoSample.codedWidth;
this.codedHeight = videoSample.codedHeight;
}
// Determine if we need to apply transformations via canvas
const hasTransformConfig = config.transform?.width !== undefined
|| config.transform?.height !== undefined
|| config.transform?.rotate !== undefined
|| config.transform?.crop !== undefined
|| config.transform?.force === true;
const needsTransform = hasTransformConfig || (isSizeChange && sizeChangeBehavior !== 'passThrough');
if (needsTransform) {
let targetWidth = config.transform?.width;
let targetHeight = config.transform?.height;
let appliedFit = config.transform?.fit ?? 'fill';
// If the size changed and behavior is fill/contain/cover, lock to the original output dimensions
if (isSizeChange && sizeChangeBehavior !== 'passThrough') {
assert(this.outputWidth);
assert(this.outputHeight);
assert(sizeChangeBehavior !== 'deny');
targetWidth = this.outputWidth;
targetHeight = this.outputHeight;
appliedFit = sizeChangeBehavior;
}
const transformed = await videoSample.transform({
width: targetWidth,
height: targetHeight,
roundDimensionsTo: 2,
crop: config.transform?.crop,
rotate: config.transform?.rotate,
fit: appliedFit,
alpha: config.alpha,
});
// Save the output dimensions of the first frame
if (this.outputWidth === null || this.outputHeight === null) {
this.outputWidth = transformed.displayWidth;
this.outputHeight = transformed.displayHeight;
}
if (shouldClose) {
videoSample.close();
}
videoSample = transformed;
shouldClose = true;
}
else {
// If no canvas is needed, we still need to record the output dimensions for the first frame
if (this.outputWidth === null || this.outputHeight === null) {
this.outputWidth = videoSample.codedWidth;
this.outputHeight = videoSample.codedHeight;
}
}
const frameRate = config.transform?.frameRate;
if (frameRate !== undefined) {
// Apply frame rate normalization
const originalEndTimestamp = videoSample.timestamp + videoSample.duration;
const alignedTimestamp = floorToDivisor(videoSample.timestamp, frameRate);
if (this.frameRateLastSample !== null) {
if (alignedTimestamp <= this.frameRateLastTimestamp) {
// Same frame rate slot, replace stored sample with the newer one
this.frameRateLastSample.close();
this.frameRateLastSample = videoSample.clone();
this.frameRateLastEndTimestamp = originalEndTimestamp;
return;
}
else {
// Pad the gap by repeating the previous frame
await this.padFrameRate(alignedTimestamp, encodeOptions);
}
}
// Clone if the sample is still the user's, to avoid mutating externally-owned data
if (videoSample === originalSample) {
videoSample = videoSample.clone();
shouldClose = true;
}
videoSample.setTimestamp(alignedTimestamp);
videoSample.setDuration(1 / frameRate);
this.frameRateLastSample?.close();
this.frameRateLastSample = videoSample.clone();
this.frameRateLastTimestamp = alignedTimestamp;
this.frameRateLastEndTimestamp = originalEndTimestamp;
}
await this.processAndEncode(videoSample, encodeOptions);
}
finally {
if (shouldClose) {
videoSample.close();
}
}
}
/**
* Runs the process function (if any) and encodes the resulting samples.
*/
async processAndEncode(videoSample, encodeOptions) {
const config = this.encodingConfig;
let samplesToEncode;
// Apply the user-defined process function, if any
if (config.transform?.process) {
let processed = config.transform.process(videoSample);
if (processed instanceof Promise) {
processed = await processed;
}
if (processed === null) {
return;
}
if (!Array.isArray(processed)) {
processed = [processed];
}
samplesToEncode = processed.map((x) => {
if (x instanceof VideoSample) {
return x;
}
if (typeof VideoFrame !== 'undefined' && x instanceof VideoFrame) {
return new VideoSample(x);
}
return new VideoSample(x, {
timestamp: videoSample.timestamp,
duration: videoSample.duration,
});
});
}
else {
samplesToEncode = [videoSample];
}
try {
for (const sampleToEncode of samplesToEncode) {
if (!this.encoderInitialized) {
if (!this.ensureEncoderPromise) {
this.ensureEncoder(sampleToEncode);
}
// No, this "if" statement is not useless. Sometimes, the above call to
// `ensureEncoder` might have synchronously completed and the encoder is
// already initialized. In this case, we don't need to await the promise
// anymore. This also fixes nasty async race condition bugs when multiple
// code paths are calling this method: It's important that the call that
// initialized the encoder go through this code first.
if (!this.encoderInitialized) {
await this.ensureEncoderPromise;
}
}
assert(this.encoderInitialized);
const keyFrameInterval = this.encodingConfig.keyFrameInterval ?? 2;
const multipleOfKeyFrameInterval = Math.floor(sampleToEncode.timestamp / keyFrameInterval);
// Ensure a key frame every keyFrameInterval seconds. It is important that all video tracks
// follow the same "key frame" rhythm, because aligned key frames are required to start new
// fragments in ISOBMFF or clusters in Matroska (or at least desirable).
const finalEncodeOptions = {
...encodeOptions,
keyFrame: encodeOptions?.keyFrame
|| keyFrameInterval === 0
|| multipleOfKeyFrameInterval !== this.lastMultipleOfKeyFrameInterval,
};
this.lastMultipleOfKeyFrameInterval = multipleOfKeyFrameInterval;
if (this.customEncoder) {
this.customEncoderQueueSize++;
// We clone the sample so it cannot be closed on us from the outside before it reaches the encoder
const clonedSample = sampleToEncode.clone();
const promise = this.customEncoderCallSerializer
.call(() => this.customEncoder.encode(clonedSample, finalEncodeOptions))
.then(() => this.customEncoderQueueSize--)
.catch((error) => this.error ??= error)
.finally(() => {
clonedSample.close();
});
if (this.customEncoderQueueSize >= 4) {
await promise;
}
}
else {
assert(this.encoder);
const videoFrame = sampleToEncode.toVideoFrame();
const preciseTimingIndex = binarySearchLessOrEqual(this.preciseTimings, videoFrame.timestamp, x => x.microsecondTimestamp);
const existingEntry = preciseTimingIndex !== -1
? this.preciseTimings[preciseTimingIndex]
: null;
if (existingEntry && existingEntry.microsecondTimestamp === videoFrame.timestamp) {
if (existingEntry.timestamp !== sampleToEncode.timestamp) {
// Mapping isn't unique, can't use the timestamp
existingEntry.timestampIsValid = false;
}
if (existingEntry.duration !== sampleToEncode.duration) {
// Mapping isn't unique, can't use the duration
existingEntry.durationIsValid = false;
}
}
else {
this.preciseTimings.splice(preciseTimingIndex + 1, 0, {
microsecondTimestamp: videoFrame.timestamp,
timestamp: sampleToEncode.timestamp,
duration: sampleToEncode.duration,
timestampIsValid: true,
durationIsValid: true,
});
// Make sure it doesn't grow indefinitely
if (this.preciseTimings.length > 128) {
this.preciseTimings.shift();
}
}
if (!this.alphaEncoder) {
// No alpha encoder, simple case
this.encoder.encode(videoFrame, finalEncodeOptions);
videoFrame.close();
}
else {
// We're expected to encode alpha as well
const frameDefinitelyHasNoAlpha = !!videoFrame.format && !videoFrame.format.includes('A');
if (frameDefinitelyHasNoAlpha || this.splitterCreationFailed) {
this.alphaFrameQueue.push(null);
this.encoder.encode(videoFrame, finalEncodeOptions);
videoFrame.close();
}
else {
const width = videoFrame.displayWidth;
const height = videoFrame.displayHeight;
if (!this.splitter) {
this.splitter = new ColorAlphaSplitter(width, height);
}
// The splitter takes ownership, so no need to close the frames ourselves
const { colorFrame, alphaFrame } = await this.splitter.update(videoFrame);
this.alphaFrameQueue.push(alphaFrame);
this.encoder.encode(colorFrame, finalEncodeOptions);
colorFrame.close();
}
}
// We need to do this after sending the frame to the encoder as the frame otherwise might be closed
if (this.encoder.encodeQueueSize >= 4) {
await new Promise(resolve => this.encoder.addEventListener('dequeue', resolve, { once: true }));
}
}
await this.lastMuxerPromise; // Allow the writer to apply backpressure
}
}
finally {
for (const sample of samplesToEncode) {
if (sample !== videoSample) {
sample.close();
}
}
}
}
/** Repeats the last frame rate sample to fill the gap up to the given timestamp. */
async padFrameRate(until, encodeOptions) {
const frameRate = this.encodingConfig.transform.frameRate;
assert(this.frameRateLastSample);
const frameDifference = Math.round((until - this.frameRateLastTimestamp) * frameRate);
for (let i = 1; i < frameDifference; i++) {
const sample = this.frameRateLastSample.clone();
sample.setTimestamp(this.frameRateLastTimestamp + i / frameRate);
sample.setDuration(1 / frameRate);
await this.processAndEncode(sample, encodeOptions);
sample.close();
}
}
ensureEncoder(videoSample) {
this.ensureEncoderPromise = (async () => {
const encoderConfig = buildVideoEncoderConfig({
...this.encodingConfig,
width: videoSample.codedWidth,
height: videoSample.codedHeight,
squarePixelWidth: videoSample.squarePixelWidth,
squarePixelHeight: videoSample.squarePixelHeight,
framerate: this.source._connectedTrack?.metadata.frameRate,
});
this.encodingConfig.onEncoderConfig?.(encoderConfig);
const MatchingCustomEncoder = customVideoEncoders.find(x => x.supports(this.encodingConfig.codec, encoderConfig));
if (MatchingCustomEncoder) {
// @ts-expect-error "Can't create instance of abstract class 🤓"
this.customEncoder = new MatchingCustomEncoder();
// @ts-expect-error It's technically readonly
this.customEncoder.codec = this.encodingConfig.codec;
// @ts-expect-error It's technically readonly
this.customEncoder.config = encoderConfig;
// @ts-expect-error It's technically readonly
this.customEncoder.onPacket = (packet, meta) => {
if (!(packet instanceof EncodedPacket)) {
throw new TypeError('The first argument passed to onPacket must be an EncodedPacket.');
}
if (meta !== undefined && (!meta || typeof meta !== 'object')) {
throw new TypeError('The second argument passed to onPacket must be an object or undefined.');
}
maybeEnsureIsKeyPacket(this.source._connectedTrack, packet);
this.encodingConfig.onEncodedPacket?.(packet, meta);
this.lastMuxerPromise
= this.muxer.addEncodedVideoPacket(this.source._connectedTrack, packet, meta)
.catch((error) => {
this.error ??= error;
});
};
await this.customEncoder.init();
}
else {
if (typeof VideoEncoder === 'undefined') {
throw new Error('VideoEncoder is not supported by this browser.');
}
encoderConfig.alpha = 'discard'; // Since we handle alpha ourselves
if (this.encodingConfig.alpha === 'keep') {
// Encoding alpha requires using two parallel encoders, so we need to make sure they stay in sync
// and that neither of them drops frames. Setting latencyMode to 'quality' achieves this, because
// "User Agents MUST not drop frames to achieve the target bitrate and/or framerate."
encoderConfig.latencyMode = 'quality';
}
const hasOddDimension = encoderConfig.width % 2 === 1 || encoderConfig.height % 2 === 1;
if (hasOddDimension
&& (this.encodingConfig.codec === 'avc' || this.encodingConfig.codec === 'hevc')) {
// Throw a special error for this case as it gets hit often
throw new Error(`The dimensions ${encoderConfig.width}x${encoderConfig.height} are not supported for codec`
+ ` '${this.encodingConfig.codec}'; both width and height must be even numbers. Make sure to`
+ ` round your dimensions to the nearest even number.`);
}
const support = await VideoEncoder.isConfigSupported(encoderConfig);
if (!support.supported) {
throw new Error(`This specific encoder configuration (${encoderConfig.codec}, ${encoderConfig.bitrate} bps,`
+ ` ${encoderConfig.width}x${encoderConfig.height}, hardware acceleration:`
+ ` ${encoderConfig.hardwareAcceleration ?? 'no-preference'}) is not supported by this browser.`
+ ` Consider using another codec or changing your video parameters.`);
}
/** Queue of color chunks waiting for their alpha counterpart. */
const colorChunkQueue = [];
/** Each value is the number of encoded alpha chunks at which a null alpha chunk should be added. */
const nullAlphaChunkQueue = [];
let encodedAlphaChunkCount = 0;
let alphaEncoderQueue = 0;
const addPacket = (colorChunk, alphaChunk, meta) => {
const sideData = {};
if (alphaChunk) {
const alphaData = new Uint8Array(alphaChunk.byteLength);
alphaChunk.copyTo(alphaData);
sideData.alpha = alphaData;
}
let packet = EncodedPacket.fromEncodedChunk(colorChunk, sideData);
// See if there's a relevant timing entry to refine the packet's timing data
const preciseTimingIndex = binarySearchLessOrEqual(this.preciseTimings, colorChunk.timestamp, x => x.microsecondTimestamp);
const entry = preciseTimingIndex !== -1
? this.preciseTimings[preciseTimingIndex]
: null;
let actualType = null;
if (this.emittedEncoderPackets === 0 && packet.type === 'delta' && meta?.decoderConfig) {
// https://github.com/Vanilagy/mediabunny/issues/365
// We expect the first packet to be a key packet. If it's not, let's actually verify that it's
// not by getting the actual type.
actualType = determineVideoPacketType(this.encodingConfig.codec, meta.decoderConfig, packet.data);
}
// Define the packet
if ((entry && entry.microsecondTimestamp === colorChunk.timestamp) || actualType !== null) {
packet = packet.clone({
timestamp: entry?.timestampIsValid ? entry.timestamp : undefined,
duration: entry?.durationIsValid ? entry.duration : undefined,
type: actualType ?? undefined,
});
}
maybeEnsureIsKeyPacket(this.source._connectedTrack, packet);
this.encodingConfig.onEncodedPacket?.(packet, meta);
this.lastMuxerPromise
= this.muxer.addEncodedVideoPacket(this.source._connectedTrack, packet, meta)
.catch((error) => {
this.error ??= error;
});
this.emittedEncoderPackets++;
};
const stack = new Error('Encoding error').stack;
this.encoder = new VideoEncoder({
output: (chunk, meta) => {
if (!this.alphaEncoder) {
// We're done
addPacket(chunk, null, meta);
return;
}
const alphaFrame = this.alphaFrameQueue.shift();
assert(alphaFrame !== undefined);
if (alphaFrame) {
this.alphaEncoder.encode(alphaFrame, {
// Crucial: The alpha frame is forced to be a key frame whenever the color frame
// also is. Without this, playback can glitch and even crash in some browsers.
// This is the reason why the two encoders are wired in series and not in parallel.
keyFrame: chunk.type === 'key',
});
alphaEncoderQueue++;
alphaFrame.close();
colorChunkQueue.push({ chunk, meta });
}
else {
// There was no alpha component for this frame
if (alphaEncoderQueue === 0) {
// No pending alpha encodes either, so we're done
addPacket(chunk, null, meta);
}
else {
// There are still alpha encodes pending, so we can't add the packet immediately since
// we'd end up with out-of-order packets. Instead, let's queue a null alpha chunk to be
// added in the future, after the current encoder workload has completed:
nullAlphaChunkQueue.push(encodedAlphaChunkCount + alphaEncoderQueue);
colorChunkQueue.push({ chunk, meta });
}
}
},
error: (error) => {
error.stack = stack; // Provide a more useful stack trace, the default one sucks
this.error ??= error;
},
});
this.encoder.configure(encoderConfig);
if (this.encodingConfig.alpha === 'keep') {
const stack = new Error('Encoding error').stack;
// We need to encode alpha as well, which we do with a separate encoder
this.alphaEncoder = new VideoEncoder({
// We ignore the alpha chunk's metadata
// eslint-disable-next-line @typescript-eslint/no-unused-vars
output: (chunk, meta) => {
alphaEncoderQueue--;
// There has to be a color chunk because the encoders are wired in series
const colorChunk = colorChunkQueue.shift();
assert(colorChunk !== undefined);
addPacket(colorChunk.chunk, chunk, colorChunk.meta);
// See if there are any null alpha chunks queued up
encodedAlphaChunkCount++;
while (nullAlphaChunkQueue.length > 0
&& nullAlphaChunkQueue[0] === encodedAlphaChunkCount) {
nullAlphaChunkQueue.shift();
const colorChunk = colorChunkQueue.shift();
assert(colorChunk !== undefined);
addPacket(colorChunk.chunk, null, colorChunk.meta);
}
},
error: (error) => {
error.stack = stack; // Provide a more useful stack trace
this.error ??= error;
},
});
this.alphaEncoder.configure(encoderConfig);
}
}
assert(this.source._connectedTrack);
this.muxer = this.source._connectedTrack.output._muxer;
this.encoderInitialized = true;
})();
}
async flushAndClose(forceClose) {
if (!forceClose) {
this.checkForEncoderError();
}
// Final frame rate padding: fill remaining frames up to the last sample's original end timestamp
if (!forceClose && this.frameRateLastSample) {
const frameRate = this.encodingConfig.transform.frameRate;
const alignedEnd = floorToDivisor(this.frameRateLastEndTimestamp, frameRate);
await this.padFrameRate(alignedEnd);
}
this.frameRateLastSample?.close();
this.frameRateLastSample = null;
if (this.customEncoder) {
if (!forceClose) {
void this.customEncoderCallSerializer.call(() => this.customEncoder.flush());
}
await this.customEncoderCallSerializer.call(() => this.customEncoder.close());
}
else if (this.encoder) {
if (!forceClose) {
// These are wired in series, therefore they must also be flushed in series
await this.encoder.flush();
await this.alphaEncoder?.flush();
}
if (this.encoder.state !== 'closed') {
this.encoder.close();
}
if (this.alphaEncoder && this.alphaEncoder.state !== 'closed') {
this.alphaEncoder.close();
}
this.alphaFrameQueue.forEach(x => x?.close());
this.splitter?.close();
}
if (!forceClose) {
this.checkForEncoderError();
}
}
getQueueSize() {
if (this.customEncoder) {
return this.customEncoderQueueSize;
}
else {
// Because the color and alpha encoders are wired in series, there's no need to also include the alpha
// encoder's queue size here
return this.encoder?.encodeQueueSize ?? 0;
}
}
checkForEncoderError() {
if (this.error) {
throw this.error;
}
}
}
let splitterGpuUnavailable = false;
/** Utility class for splitting a composite frame into separate color and alpha components. */
export class ColorAlphaSplitter {
constructor(initialWidth, initialHeight) {
this.canvas = null;
this.gl = null;
this.colorProgram = null;
this.alphaProgram = null;
this.vao = null;
this.sourceTexture = null;
this.alphaResolutionLocation = null;
this.worker = null;
this.pendingRequests = new Map();
this.nextRequestId = 0;
const canMakeCanvas = typeof OffscreenCanvas !== 'undefined'
// eslint-disable-next-line @typescript-eslint/no-deprecated
|| (typeof document !== 'undefined' && typeof document.createElement === 'function');
if (!ColorAlphaSplitter.forceCpu && canMakeCanvas && !splitterGpuUnavailable) {
// Try the GPU path. If anything goes wrong, we silently fall back to the CPU path.
try {
if (typeof OffscreenCanvas !== 'undefined') {
this.canvas = new OffscreenCanvas(initialWidth, initialHeight);
}
else {
this.canvas = document.createElement('canvas');
this.canvas.width = initialWidth;
this.canvas.height = initialHeight;
}
const gl = this.canvas.getContext('webgl2', {
alpha: true, // Needed due to the YUV thing we do for alpha
}); // Casting because of some TypeScript weirdness
if (!gl) {
throw new Error('Couldn\'t acquire WebGL 2 context.');
}
this.gl = gl;
this.colorProgram = this.createColorProgram();
this.alphaProgram = this.createAlphaProgram();
this.vao = this.createVAO();
this.sourceTexture = this.createTexture();
this.alphaResolutionLocation = this.gl.getUniformLocation(this.alphaProgram, 'u_resolution');
this.gl.useProgram(this.colorProgram);
this.gl.uniform1i(this.gl.getUniformLocation(this.colorProgram, 'u_sourceTexture'), 0);
this.gl.useProgram(this.alphaProgram);
this.gl.uniform1i(this.gl.getUniformLocation(this.alphaProgram, 'u_sourceTexture'), 0);
}
catch (error) {
this.gl = null;
this.canvas = null;
splitterGpuUnavailable = true;
console.warn('Falling back to CPU for color/alpha splitting.', error);
}
}
}
async update(sourceFrame) {
if (this.gl) {
return this.updateGpu(sourceFrame);
}
else {
return this.updateCpu(sourceFrame);
}
}
updateGpu(sourceFrame) {
assert(this.gl);
assert(this.canvas);
if (sourceFrame.displayWidth !== this.canvas.width || sourceFrame.displayHeight !== this.canvas.height) {
this.canvas.width = sourceFrame.displayWidth;
this.canvas.height = sourceFrame.displayHeight;
}
this.gl.activeTexture(this.gl.TEXTURE0);
this.gl.bindTexture(this.gl.TEXTURE_2D, this.sourceTexture);
this.gl.texImage2D(this.gl.TEXTURE_2D, 0, this.gl.RGBA, this.gl.RGBA, this.gl.UNSIGNED_BYTE, sourceFrame);
const colorFrame = this.runColorProgram(sourceFrame);
const alphaFrame = this.runAlphaProgram(sourceFrame);
sourceFrame.close();
return { colorFrame, alphaFrame };
}
createVertexShader() {
assert(this.gl);
return this.createShader(this.gl.VERTEX_SHADER, `#version 300 es
in vec2 a_position;
in vec2 a_texCoord;
out vec2 v_texCoord;
void main() {
gl_Position = vec4(a_position, 0.0, 1.0);
v_texCoord = a_texCoord;
}
`);
}
createColorProgram() {
assert(this.gl);
const vertexShader = this.createVertexShader();
// This shader is simple, simply copy the color information while setting alpha to 1
const fragmentShader = this.createShader(this.gl.FRAGMENT_SHADER, `#version 300 es
precision highp float;
uniform sampler2D u_sourceTexture;
in vec2 v_texCoord;
out vec4 fragColor;
void main() {
vec4 source = texture(u_sourceTexture, v_texCoord);
fragColor = vec4(source.rgb, 1.0);
}
`);
const program = this.gl.createProgram();
this.gl.attachShader(program, vertexShader);
this.gl.attachShader(program, fragmentShader);
this.gl.linkProgram(program);
return program;
}
createAlphaProgram() {
assert(this.gl);
const vertexShader = this.createVertexShader();
// This shader's more complex. The main reason is that this shader writes data in I420 (yuv420) pixel format
// instead of regular RGBA. In other words, we use the shader to write out I420 data into an RGBA canvas, which
// we then later read out with JavaScript. The reason being that browsers weirdly encode canvases and mess up
// the color spaces, and the only way to have full control over the color space is by outputting YUV data
// directly (avoiding the RGB conversion). Doing this conversion in JS is painfully slow, so let's utlize the
// GPU since we're already calling it anyway.
const fragmentShader = this.createShader(this.gl.FRAGMENT_SHADER, `#version 300 es
precision highp float;
uniform sampler2D u_sourceTexture;
uniform vec2 u_resolution; // The width and height of the canvas
in vec2 v_texCoord;
out vec4 fragColor;
// This function determines the value for a single byte in the YUV stream
float getByteValue(float byteOffset) {
float width = u_resolution.x;
float height = u_resolution.y;
float yPlaneSize = width * height;
if (byteOffset < yPlaneSize) {
// This byte is in the luma plane. Find the corresponding pixel coordinates to sample from
float y = floor(byteOffset / width);
float x = mod(byteOffset, width);
// Add 0.5 to sample the center of the texel
vec2 sampleCoord = (vec2(x, y) + 0.5) / u_resolution;
// The luma value is the alpha from the source texture
return texture(u_sourceTexture, sampleCoord).a;
} else {
// Write a fixed value for chroma and beyond
return 128.0 / 255.0;
}
}
void main() {
// Each fragment writes 4 bytes (R, G, B, A)
float pixelIndex = floor(gl_FragCoord.y) * u_resolution.x + floor(gl_FragCoord.x);
float baseByteOffset = pixelIndex * 4.0;
vec4 result;
for (int i = 0; i < 4; i++) {
float currentByteOffset = baseByteOffset + float(i);
result[i] = getByteValue(currentByteOffset);
}
fragColor = result;
}
`);
const program = this.gl.createProgram();
this.gl.attachShader(program, vertexShader);
this.gl.attachShader(program, fragmentShader);
this.gl.linkProgram(program);
return program;
}
createShader(type, source) {
assert(this.gl);
const shader = this.gl.createShader(type);
this.gl.shaderSource(shader, source);
this.gl.compileShader(shader);
if (!this.gl.getShaderParameter(shader, this.gl.COMPILE_STATUS)) {
console.error('Shader compile error:', this.gl.getShaderInfoLog(shader));
}
return shader;
}
createVAO() {
assert(this.gl);
assert(this.colorProgram);
const vao = this.gl.createVertexArray();
this.gl.bindVertexArray(vao);
const vertices = new Float32Array([
-1, -1, 0, 1,
1, -1, 1, 1,
-1, 1, 0, 0,
1, 1, 1, 0,
]);
const buffer = this.gl.createBuffer();
this.gl.bindBuffer(this.gl.ARRAY_BUFFER, buffer);
this.gl.bufferData(this.gl.ARRAY_BUFFER, vertices, this.gl.STATIC_DRAW);
const positionLocation = this.gl.getAttribLocation(this.colorProgram, 'a_position');
const texCoordLocation = this.gl.getAttribLocation(this.colorProgram, 'a_texCoord');
this.gl.enableVertexAttribArray(positionLocation);
this.gl.vertexAttribPointer(positionLocation, 2, this.gl.FLOAT, false, 16, 0);
this.gl.enableVertexAttribArray(texCoordLocation);
this.gl.vertexAttribPointer(texCoordLocation, 2, this.gl.FLOAT, false, 16, 8);
return vao;
}
createTexture() {
assert(this.gl);
const texture = this.gl.createTexture();
this.gl.bindTexture(this.gl.TEXTURE_2D, texture);
this.gl.texParameteri(this.gl.TEXTURE_2D, this.gl.TEXTURE_WRAP_S, this.gl.CLAMP_TO_EDGE);
this.gl.texParameteri(this.gl.TEXTURE_2D, this.gl.TEXTURE_WRAP_T, this.gl.CLAMP_TO_EDGE);
this.gl.texParameteri(this.gl.TEXTURE_2D, this.gl.TEXTURE_MIN_FILTER, this.gl.LINEAR);
this.gl.texParameteri(this.gl.TEXTURE_2D, this.gl.TEXTURE_MAG_FILTER, this.gl.LINEAR);
return texture;
}
runColorProgram(sourceFrame) {
assert(this.gl);
assert(this.canvas);
this.gl.useProgram(this.colorProgram);
this.gl.viewport(0, 0, this.canvas.width, this.canvas.height);
this.gl.clear(this.gl.COLOR_BUFFER_BIT);
this.gl.bindVertexArray(this.vao);
this.gl.drawArrays(this.gl.TRIANGLE_STRIP, 0, 4);
return new VideoFrame(this.canvas, {
timestamp: sourceFrame.timestamp,
duration: sourceFrame.duration ?? undefined,
alpha: 'discard',
});
}
runAlphaProgram(sourceFrame) {
assert(this.gl);
assert(this.canvas);
this.gl.useProgram(this.alphaProgram);
this.gl.uniform2f(this.alphaResolutionLocation, this.canvas.width, this.canvas.height);
this.gl.viewport(0, 0, this.canvas.width, this.canvas.height);
this.gl.clear(this.gl.COLOR_BUFFER_BIT);
this.gl.bindVertexArray(this.vao);
this.gl.drawArrays(this.gl.TRIANGLE_STRIP, 0, 4);
const { width, height } = this.canvas;
const chromaSamples = Math.ceil(width / 2) * Math.ceil(height / 2);
const yuvSize = width * height + chromaSamples * 2;
const requiredHeight = Math.ceil(yuvSize / (width * 4));
let yuv = new Uint8Array(4 * width * requiredHeight);
this.gl.readPixels(0, 0, width, requiredHeight, this.gl.RGBA, this.gl.UNSIGNED_BYTE, yuv);
yuv = yuv.subarray(0, yuvSize);
assert(yuv[width * height] === 128); // Where chroma data starts
assert(yuv[yuv.length - 1] === 128); // Assert the YUV data has been fully written
// Defining this separately because TypeScript doesn't know `transfer` and I can't be bothered to do declaration
// merging right now
const init = {
format: 'I420',
codedWidth: width,
codedHeight: height,
timestamp: sourceFrame.timestamp,
duration: sourceFrame.duration ?? undefined,
transfer: [yuv.buffer],
};
return new VideoFrame(yuv, init);
}
updateCpu(sourceFrame) {
if (!this.worker) {
const blob = new Blob([`(${colorAlphaSplitterWorkerCode.toString()})()`], { type: 'application/javascript' });
const url = URL.createObjectURL(blob);
this.worker = new Worker(url);
URL.revokeObjectURL(url);
this.worker.addEventListener('message', (event) => {
const data = event.data;
const pending = this.pendingRequests.get(data.id);
if (!pending) {
return;
}
this.pendingRequests.delete(data.id);
if ('error' in data) {
pending.reject(new Error(data.error));
}
else {
pending.resolve({ colorFrame: data.colorFrame, alphaFrame: data.alphaFrame });
}
});
this.worker.addEventListener('error', (event) => {
const error = new Error(event.message || 'Color/alpha splitter worker error.');
for (const pending of this.pendingRequests.values()) {
pending.reject(error);
}
this.pendingRequests.clear();
});
}
const id = this.nextRequestId++;
const pending = promiseWithResolvers();
this.pendingRequests.set(id, pending);
this.worker.postMessage({ id, sourceFrame }, { transfer: [sourceFrame] });
return pending.promise;
}
close() {
this.gl?.getExtension('WEBGL_lose_context')?.loseContext();
this.gl = null;
this.canvas = null;
this.worker?.terminate();
this.worker = null;
const error = new Error('Color/alpha splitter closed.');
for (const pending of this.pendingRequests.values()) {
pending.reject(error);
}
this.pendingRequests.clear();
}
}
ColorAlphaSplitter.forceCpu = true;
const colorAlphaSplitterWorkerCode = () => {
// Reused across frames as long as the size matches, since consecutive frames usually share dimensions.
let cpuSourceBuffer = null;
// Serialize execution internally so concurrent requests don't race on the shared cpuSourceBuffer.
let chain = Promise.resolve();
self.addEventListener('message', (event) => {
const { id, sourceFrame } = event.data;
chain = chain.then(async () => {
try {
const { colorFrame, alphaFrame } = await split(sourceFrame);
self.postMessage({ id, colorFrame, alphaFrame }, { transfer: [colorFrame, alphaFrame] });
}
catch (error) {
self.postMessage({ id, error: error.message });
}
finally {
sourceFrame.close();
}
});
});
const split = async (sourceFrame) => {
const format = sourceFrame.format;
if (!format) {
throw new Error('CPU color/alpha splitting requires a known VideoFrame format.');
}
const width = sourceFrame.codedWidth;
const height = sourceFrame.codedHeight;
const sourceSize = sourceFrame.allocati