mediabunny

/*! * Copyright (c) 2026-present, Vanilagy and contributors * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ import { buildAacAudioSpecificConfig, parseAacAudioSpecificConfig } from '../shared/aac-misc'; import { AUDIO_CODECS, AudioCodec, MediaCodec, parsePcmCodec, PCM_AUDIO_CODECS, PcmAudioCodec, SUBTITLE_CODECS, SubtitleCodec, VIDEO_CODECS, VideoCodec, } from './codec'; import { OutputAudioTrack, OutputSubtitleTrack, OutputTrack, OutputVideoTrack } from './output'; import { assert, assertNever, binarySearchLessOrEqual, CallSerializer, clamp, clearIntervalUnthrottled, floorToDivisor, last, promiseWithResolvers, roundToDivisor, setInt24, setIntervalUnthrottled, setUint24, toUint8Array, UnthrottledTimerHandle, } from './misc'; import { Muxer } from './muxer'; import { SubtitleParser } from './subtitles'; import { toAlaw, toUlaw } from './pcm'; import { CustomVideoEncoder, CustomAudioEncoder, customVideoEncoders, customAudioEncoders, } from './custom-coder'; import { EncodedPacket, EncodedPacketSideData, PacketType } from './packet'; import { AudioSample, audioSampleToInterleavedFormat, toInterleavedAudioFormat, VideoSample, VideoSamplePixelFormat, } from './sample'; import { AudioEncodingConfig, buildAudioEncoderConfig, buildVideoEncoderConfig, validateAudioEncodingConfig, validateVideoEncodingConfig, VideoEncodingConfig, } from './encode'; import { AudioResampler } from './resample'; import { determineVideoPacketType } from './codec-data'; /** * Base class for media sources. Media sources are used to add media samples to an output file. * @group Media sources * @public */ export abstract class MediaSource { /** @internal */ abstract readonly _codec: MediaCodec; /** @internal */ _connectedTrack: OutputTrack | null = null; /** @internal */ _closingPromise: Promise<void> | null = null; /** @internal */ _closed = false; /** @internal */ _ensureValidAdd() { if (!this._connectedTrack) { throw new Error('Source is not connected to an output track.'); } if (this._connectedTrack.output.state === 'canceled') { throw new Error('Output has been canceled.'); } if (this._connectedTrack.output.state === 'finalizing' || this._connectedTrack.output.state === 'finalized') { throw new Error('Output has been finalized.'); } if (this._connectedTrack.output.state === 'pending') { throw new Error('Output has not started.'); } if (this._closed) { throw new Error('Source is closed.'); } } /** @internal */ async _start() {} /** @internal */ // eslint-disable-next-line @typescript-eslint/no-unused-vars async _flushAndClose(forceClose: boolean) {} /** * Closes this source. This prevents future samples from being added and signals to the output file that no further * samples will come in for this track. Calling `.close()` is optional but recommended after adding the * last sample - for improved performance and reduced memory usage. */ close() { if (this._closingPromise) { return; } const connectedTrack = this._connectedTrack; if (!connectedTrack) { throw new Error('Cannot call close without connecting the source to an output track.'); } if (connectedTrack.output.state === 'pending') { throw new Error('Cannot call close before output has been started.'); } this._closingPromise = (async () => { await this._flushAndClose(false); this._closed = true; if (connectedTrack.output.state === 'finalizing' || connectedTrack.output.state === 'finalized') { return; } connectedTrack.output._muxer.onTrackClose(connectedTrack); })(); } /** @internal */ async _flushOrWaitForOngoingClose(forceClose: boolean) { return this._closingPromise ??= (async () => { await this._flushAndClose(forceClose); this._closed = true; })(); } } /** * Base class for video sources - sources for video tracks. * @group Media sources * @public */ export abstract class VideoSource extends MediaSource { /** @internal */ override _connectedTrack: OutputVideoTrack | null = null; /** @internal */ override readonly _codec: VideoCodec; /** Internal constructor. */ constructor(codec: VideoCodec) { super(); if (!VIDEO_CODECS.includes(codec)) { throw new TypeError(`Invalid video codec '${codec}'. Must be one of: ${VIDEO_CODECS.join(', ')}.`); } this._codec = codec; } } const maybeEnsureIsKeyPacket = (track: OutputVideoTrack, packet: EncodedPacket) => { if (track.metadata.hasOnlyKeyPackets && packet.type !== 'key') { throw new Error('Cannot add non-key packets to a hasOnlyKeyPackets video track.'); } }; /** * The most basic video source; can be used to directly pipe encoded packets into the output file. * @group Media sources * @public */ export class EncodedVideoPacketSource extends VideoSource { /** Creates a new {@link EncodedVideoPacketSource} whose packets are encoded using `codec`. */ constructor(codec: VideoCodec) { super(codec); } /** * Adds an encoded packet to the output video track. Packets must be added in *decode order*, while a packet's * timestamp must be its *presentation timestamp*. B-frames are handled automatically. * * @param meta - Additional metadata from the encoder. You should pass this for the first call, including a valid * decoder config. * * @returns A Promise that resolves once the output is ready to receive more samples. You should await this Promise * to respect writer and encoder backpressure. */ add(packet: EncodedPacket, meta?: EncodedVideoChunkMetadata) { if (!(packet instanceof EncodedPacket)) { throw new TypeError('packet must be an EncodedPacket.'); } if (packet.isMetadataOnly) { throw new TypeError('Metadata-only packets cannot be added.'); } if (meta !== undefined && (!meta || typeof meta !== 'object')) { throw new TypeError('meta, when provided, must be an object.'); } this._ensureValidAdd(); maybeEnsureIsKeyPacket(this._connectedTrack!, packet); return this._connectedTrack!.output._muxer.addEncodedVideoPacket(this._connectedTrack!, packet, meta); } } class VideoEncoderWrapper { private ensureEncoderPromise: Promise<void> | null = null; private encoderInitialized = false; private encoder: VideoEncoder | null = null; private muxer: Muxer | null = null; private lastMultipleOfKeyFrameInterval = -1; private emittedEncoderPackets = 0; // Tracks the input dimensions of the first frame private codedWidth: number | null = null; private codedHeight: number | null = null; // Tracks the output dimensions of the first frame (used to lock dimensions for fill/contain/cover) private outputWidth: number | null = null; private outputHeight: number | null = null; // Frame rate normalization state private frameRateLastSample: VideoSample | null = null; private frameRateLastTimestamp: number | null = null; private frameRateLastEndTimestamp: number | null = null; // VideoEncoder converts everything to microseconds, so we need to do some bookkeeping to restore the original // timing information private preciseTimings: { microsecondTimestamp: number; timestamp: number; duration: number; timestampIsValid: boolean; durationIsValid: boolean; }[] = []; private customEncoder: CustomVideoEncoder | null = null; private customEncoderCallSerializer = new CallSerializer(); private customEncoderQueueSize = 0; // Alpha stuff private alphaEncoder: VideoEncoder | null = null; private splitter: ColorAlphaSplitter | null = null; private splitterCreationFailed = false; private alphaFrameQueue: (VideoFrame | null)[] = []; /** * Encoders typically throw their errors "out of band", meaning asynchronously in some other execution context. * However, we want to surface these errors to the user within the normal control flow, so they don't go uncaught. * So, we keep track of the encoder error and throw it as soon as we get the chance. */ private error: Error | null = null; private lastMuxerPromise: Promise<void> = Promise.resolve(); constructor(private source: VideoSource, private encodingConfig: VideoEncodingConfig) {} async add(videoSample: VideoSample, shouldClose: boolean, encodeOptions?: VideoEncoderEncodeOptions) { const originalSample = videoSample; try { this.checkForEncoderError(); this.source._ensureValidAdd(); const config = this.encodingConfig; const sizeChangeBehavior = config.sizeChangeBehavior ?? 'deny'; let isSizeChange = false; // Ensure video sample size remains constant or handle the change if (this.codedWidth !== null && this.codedHeight !== null) { if (videoSample.codedWidth !== this.codedWidth || videoSample.codedHeight !== this.codedHeight) { isSizeChange = true; if (sizeChangeBehavior === 'deny') { throw new Error( `Video sample size must remain constant. Expected ${this.codedWidth}x${this.codedHeight},` + ` got ${videoSample.codedWidth}x${videoSample.codedHeight}. To allow the sample size to` + ` change over time, set \`sizeChangeBehavior\` to a value other than 'deny' in the` + ` encoding options.`, ); } } } else { this.codedWidth = videoSample.codedWidth; this.codedHeight = videoSample.codedHeight; } // Determine if we need to apply transformations via canvas const hasTransformConfig = config.transform?.width !== undefined || config.transform?.height !== undefined || config.transform?.rotate !== undefined || config.transform?.crop !== undefined || config.transform?.force === true; const needsTransform = hasTransformConfig || (isSizeChange && sizeChangeBehavior !== 'passThrough'); if (needsTransform) { let targetWidth = config.transform?.width; let targetHeight = config.transform?.height; let appliedFit: 'fill' | 'contain' | 'cover' = config.transform?.fit ?? 'fill'; // If the size changed and behavior is fill/contain/cover, lock to the original output dimensions if (isSizeChange && sizeChangeBehavior !== 'passThrough') { assert(this.outputWidth); assert(this.outputHeight); assert(sizeChangeBehavior !== 'deny'); targetWidth = this.outputWidth!; targetHeight = this.outputHeight!; appliedFit = sizeChangeBehavior; } const transformed = await videoSample.transform({ width: targetWidth, height: targetHeight, roundDimensionsTo: 2, crop: config.transform?.crop, rotate: config.transform?.rotate, fit: appliedFit, alpha: config.alpha, }); // Save the output dimensions of the first frame if (this.outputWidth === null || this.outputHeight === null) { this.outputWidth = transformed.displayWidth; this.outputHeight = transformed.displayHeight; } if (shouldClose) { videoSample.close(); } videoSample = transformed; shouldClose = true; } else { // If no canvas is needed, we still need to record the output dimensions for the first frame if (this.outputWidth === null || this.outputHeight === null) { this.outputWidth = videoSample.codedWidth; this.outputHeight = videoSample.codedHeight; } } const frameRate = config.transform?.frameRate; if (frameRate !== undefined) { // Apply frame rate normalization const originalEndTimestamp = videoSample.timestamp + videoSample.duration; const alignedTimestamp = floorToDivisor(videoSample.timestamp, frameRate); if (this.frameRateLastSample !== null) { if (alignedTimestamp <= this.frameRateLastTimestamp!) { // Same frame rate slot, replace stored sample with the newer one this.frameRateLastSample.close(); this.frameRateLastSample = videoSample.clone(); this.frameRateLastEndTimestamp = originalEndTimestamp; return; } else { // Pad the gap by repeating the previous frame await this.padFrameRate(alignedTimestamp, encodeOptions); } } // Clone if the sample is still the user's, to avoid mutating externally-owned data if (videoSample === originalSample) { videoSample = videoSample.clone(); shouldClose = true; } videoSample.setTimestamp(alignedTimestamp); videoSample.setDuration(1 / frameRate); this.frameRateLastSample?.close(); this.frameRateLastSample = videoSample.clone(); this.frameRateLastTimestamp = alignedTimestamp; this.frameRateLastEndTimestamp = originalEndTimestamp; } await this.processAndEncode(videoSample, encodeOptions); } finally { if (shouldClose) { videoSample.close(); } } } /** * Runs the process function (if any) and encodes the resulting samples. */ private async processAndEncode( videoSample: VideoSample, encodeOptions?: VideoEncoderEncodeOptions, ) { const config = this.encodingConfig; let samplesToEncode: VideoSample[]; // Apply the user-defined process function, if any if (config.transform?.process) { let processed = config.transform.process(videoSample); if (processed instanceof Promise) { processed = await processed; } if (processed === null) { return; } if (!Array.isArray(processed)) { processed = [processed]; } samplesToEncode = processed.map((x) => { if (x instanceof VideoSample) { return x; } if (typeof VideoFrame !== 'undefined' && x instanceof VideoFrame) { return new VideoSample(x); } return new VideoSample(x as CanvasImageSource, { timestamp: videoSample.timestamp, duration: videoSample.duration, }); }); } else { samplesToEncode = [videoSample]; } try { for (const sampleToEncode of samplesToEncode) { if (!this.encoderInitialized) { if (!this.ensureEncoderPromise) { this.ensureEncoder(sampleToEncode); } // No, this "if" statement is not useless. Sometimes, the above call to // `ensureEncoder` might have synchronously completed and the encoder is // already initialized. In this case, we don't need to await the promise // anymore. This also fixes nasty async race condition bugs when multiple // code paths are calling this method: It's important that the call that // initialized the encoder go through this code first. if (!this.encoderInitialized) { await this.ensureEncoderPromise; } } assert(this.encoderInitialized); const keyFrameInterval = this.encodingConfig.keyFrameInterval ?? 2; const multipleOfKeyFrameInterval = Math.floor(sampleToEncode.timestamp / keyFrameInterval); // Ensure a key frame every keyFrameInterval seconds. It is important that all video tracks // follow the same "key frame" rhythm, because aligned key frames are required to start new // fragments in ISOBMFF or clusters in Matroska (or at least desirable). const finalEncodeOptions = { ...encodeOptions, keyFrame: encodeOptions?.keyFrame || keyFrameInterval === 0 || multipleOfKeyFrameInterval !== this.lastMultipleOfKeyFrameInterval, }; this.lastMultipleOfKeyFrameInterval = multipleOfKeyFrameInterval; if (this.customEncoder) { this.customEncoderQueueSize++; // We clone the sample so it cannot be closed on us from the outside before it reaches the encoder const clonedSample = sampleToEncode.clone(); const promise = this.customEncoderCallSerializer .call(() => this.customEncoder!.encode(clonedSample, finalEncodeOptions)) .then(() => this.customEncoderQueueSize--) .catch((error: Error) => this.error ??= error) .finally(() => { clonedSample.close(); }); if (this.customEncoderQueueSize >= 4) { await promise; } } else { assert(this.encoder); const videoFrame = sampleToEncode.toVideoFrame(); const preciseTimingIndex = binarySearchLessOrEqual( this.preciseTimings, videoFrame.timestamp, x => x.microsecondTimestamp, ); const existingEntry = preciseTimingIndex !== -1 ? this.preciseTimings[preciseTimingIndex] : null; if (existingEntry && existingEntry.microsecondTimestamp === videoFrame.timestamp) { if (existingEntry.timestamp !== sampleToEncode.timestamp) { // Mapping isn't unique, can't use the timestamp existingEntry.timestampIsValid = false; } if (existingEntry.duration !== sampleToEncode.duration) { // Mapping isn't unique, can't use the duration existingEntry.durationIsValid = false; } } else { this.preciseTimings.splice(preciseTimingIndex + 1, 0, { microsecondTimestamp: videoFrame.timestamp, timestamp: sampleToEncode.timestamp, duration: sampleToEncode.duration, timestampIsValid: true, durationIsValid: true, }); // Make sure it doesn't grow indefinitely if (this.preciseTimings.length > 128) { this.preciseTimings.shift(); } } if (!this.alphaEncoder) { // No alpha encoder, simple case this.encoder.encode(videoFrame, finalEncodeOptions); videoFrame.close(); } else { // We're expected to encode alpha as well const frameDefinitelyHasNoAlpha = !!videoFrame.format && !videoFrame.format.includes('A'); if (frameDefinitelyHasNoAlpha || this.splitterCreationFailed) { this.alphaFrameQueue.push(null); this.encoder.encode(videoFrame, finalEncodeOptions); videoFrame.close(); } else { const width = videoFrame.displayWidth; const height = videoFrame.displayHeight; if (!this.splitter) { this.splitter = new ColorAlphaSplitter(width, height); } // The splitter takes ownership, so no need to close the frames ourselves const { colorFrame, alphaFrame } = await this.splitter.update(videoFrame); this.alphaFrameQueue.push(alphaFrame); this.encoder.encode(colorFrame, finalEncodeOptions); colorFrame.close(); } } // We need to do this after sending the frame to the encoder as the frame otherwise might be closed if (this.encoder.encodeQueueSize >= 4) { await new Promise(resolve => this.encoder!.addEventListener('dequeue', resolve, { once: true }), ); } } await this.lastMuxerPromise; // Allow the writer to apply backpressure } } finally { for (const sample of samplesToEncode) { if (sample !== videoSample) { sample.close(); } } } } /** Repeats the last frame rate sample to fill the gap up to the given timestamp. */ private async padFrameRate(until: number, encodeOptions?: VideoEncoderEncodeOptions) { const frameRate = this.encodingConfig.transform!.frameRate!; assert(this.frameRateLastSample); const frameDifference = Math.round((until - this.frameRateLastTimestamp!) * frameRate); for (let i = 1; i < frameDifference; i++) { const sample = this.frameRateLastSample.clone(); sample.setTimestamp(this.frameRateLastTimestamp! + i / frameRate); sample.setDuration(1 / frameRate); await this.processAndEncode(sample, encodeOptions); sample.close(); } } private ensureEncoder(videoSample: VideoSample) { this.ensureEncoderPromise = (async () => { const encoderConfig = buildVideoEncoderConfig({ ...this.encodingConfig, width: videoSample.codedWidth, height: videoSample.codedHeight, squarePixelWidth: videoSample.squarePixelWidth, squarePixelHeight: videoSample.squarePixelHeight, framerate: this.source._connectedTrack?.metadata.frameRate, }); this.encodingConfig.onEncoderConfig?.(encoderConfig); const MatchingCustomEncoder = customVideoEncoders.find(x => x.supports( this.encodingConfig.codec, encoderConfig, )); if (MatchingCustomEncoder) { // @ts-expect-error "Can't create instance of abstract class 🤓" this.customEncoder = new MatchingCustomEncoder() as CustomVideoEncoder; // @ts-expect-error It's technically readonly this.customEncoder.codec = this.encodingConfig.codec; // @ts-expect-error It's technically readonly this.customEncoder.config = encoderConfig; // @ts-expect-error It's technically readonly this.customEncoder.onPacket = (packet, meta) => { if (!(packet instanceof EncodedPacket)) { throw new TypeError('The first argument passed to onPacket must be an EncodedPacket.'); } if (meta !== undefined && (!meta || typeof meta !== 'object')) { throw new TypeError('The second argument passed to onPacket must be an object or undefined.'); } maybeEnsureIsKeyPacket(this.source._connectedTrack!, packet); this.encodingConfig.onEncodedPacket?.(packet, meta); this.lastMuxerPromise = this.muxer!.addEncodedVideoPacket(this.source._connectedTrack!, packet, meta) .catch((error) => { this.error ??= error; }); }; await this.customEncoder.init(); } else { if (typeof VideoEncoder === 'undefined') { throw new Error('VideoEncoder is not supported by this browser.'); } encoderConfig.alpha = 'discard'; // Since we handle alpha ourselves if (this.encodingConfig.alpha === 'keep') { // Encoding alpha requires using two parallel encoders, so we need to make sure they stay in sync // and that neither of them drops frames. Setting latencyMode to 'quality' achieves this, because // "User Agents MUST not drop frames to achieve the target bitrate and/or framerate." encoderConfig.latencyMode = 'quality'; } const hasOddDimension = encoderConfig.width % 2 === 1 || encoderConfig.height % 2 === 1; if ( hasOddDimension && (this.encodingConfig.codec === 'avc' || this.encodingConfig.codec === 'hevc') ) { // Throw a special error for this case as it gets hit often throw new Error( `The dimensions ${encoderConfig.width}x${encoderConfig.height} are not supported for codec` + ` '${this.encodingConfig.codec}'; both width and height must be even numbers. Make sure to` + ` round your dimensions to the nearest even number.`, ); } const support = await VideoEncoder.isConfigSupported(encoderConfig); if (!support.supported) { throw new Error( `This specific encoder configuration (${encoderConfig.codec}, ${encoderConfig.bitrate} bps,` + ` ${encoderConfig.width}x${encoderConfig.height}, hardware acceleration:` + ` ${encoderConfig.hardwareAcceleration ?? 'no-preference'}) is not supported by this browser.` + ` Consider using another codec or changing your video parameters.`, ); } /** Queue of color chunks waiting for their alpha counterpart. */ const colorChunkQueue: { chunk: EncodedVideoChunk; meta: EncodedVideoChunkMetadata | undefined; }[] = []; /** Each value is the number of encoded alpha chunks at which a null alpha chunk should be added. */ const nullAlphaChunkQueue: number[] = []; let encodedAlphaChunkCount = 0; let alphaEncoderQueue = 0; const addPacket = ( colorChunk: EncodedVideoChunk, alphaChunk: EncodedVideoChunk | null, meta: EncodedVideoChunkMetadata | undefined, ) => { const sideData: EncodedPacketSideData = {}; if (alphaChunk) { const alphaData = new Uint8Array(alphaChunk.byteLength); alphaChunk.copyTo(alphaData); sideData.alpha = alphaData; } let packet = EncodedPacket.fromEncodedChunk(colorChunk, sideData); // See if there's a relevant timing entry to refine the packet's timing data const preciseTimingIndex = binarySearchLessOrEqual( this.preciseTimings, colorChunk.timestamp, x => x.microsecondTimestamp, ); const entry = preciseTimingIndex !== -1 ? this.preciseTimings[preciseTimingIndex] : null; let actualType: PacketType | null = null; if (this.emittedEncoderPackets === 0 && packet.type === 'delta' && meta?.decoderConfig) { // https://github.com/Vanilagy/mediabunny/issues/365 // We expect the first packet to be a key packet. If it's not, let's actually verify that it's // not by getting the actual type. actualType = determineVideoPacketType( this.encodingConfig.codec, meta.decoderConfig, packet.data, ); } // Define the packet if ((entry && entry.microsecondTimestamp === colorChunk.timestamp) || actualType !== null) { packet = packet.clone({ timestamp: entry?.timestampIsValid ? entry.timestamp : undefined, duration: entry?.durationIsValid ? entry.duration : undefined, type: actualType ?? undefined, }); } maybeEnsureIsKeyPacket(this.source._connectedTrack!, packet); this.encodingConfig.onEncodedPacket?.(packet, meta); this.lastMuxerPromise = this.muxer!.addEncodedVideoPacket(this.source._connectedTrack!, packet, meta) .catch((error) => { this.error ??= error; }); this.emittedEncoderPackets++; }; const stack = new Error('Encoding error').stack; this.encoder = new VideoEncoder({ output: (chunk, meta) => { if (!this.alphaEncoder) { // We're done addPacket(chunk, null, meta); return; } const alphaFrame = this.alphaFrameQueue.shift(); assert(alphaFrame !== undefined); if (alphaFrame) { this.alphaEncoder.encode(alphaFrame, { // Crucial: The alpha frame is forced to be a key frame whenever the color frame // also is. Without this, playback can glitch and even crash in some browsers. // This is the reason why the two encoders are wired in series and not in parallel. keyFrame: chunk.type === 'key', }); alphaEncoderQueue++; alphaFrame.close(); colorChunkQueue.push({ chunk, meta }); } else { // There was no alpha component for this frame if (alphaEncoderQueue === 0) { // No pending alpha encodes either, so we're done addPacket(chunk, null, meta); } else { // There are still alpha encodes pending, so we can't add the packet immediately since // we'd end up with out-of-order packets. Instead, let's queue a null alpha chunk to be // added in the future, after the current encoder workload has completed: nullAlphaChunkQueue.push(encodedAlphaChunkCount + alphaEncoderQueue); colorChunkQueue.push({ chunk, meta }); } } }, error: (error) => { error.stack = stack; // Provide a more useful stack trace, the default one sucks this.error ??= error; }, }); this.encoder.configure(encoderConfig); if (this.encodingConfig.alpha === 'keep') { const stack = new Error('Encoding error').stack; // We need to encode alpha as well, which we do with a separate encoder this.alphaEncoder = new VideoEncoder({ // We ignore the alpha chunk's metadata // eslint-disable-next-line @typescript-eslint/no-unused-vars output: (chunk, meta) => { alphaEncoderQueue--; // There has to be a color chunk because the encoders are wired in series const colorChunk = colorChunkQueue.shift(); assert(colorChunk !== undefined); addPacket(colorChunk.chunk, chunk, colorChunk.meta); // See if there are any null alpha chunks queued up encodedAlphaChunkCount++; while ( nullAlphaChunkQueue.length > 0 && nullAlphaChunkQueue[0] === encodedAlphaChunkCount ) { nullAlphaChunkQueue.shift(); const colorChunk = colorChunkQueue.shift(); assert(colorChunk !== undefined); addPacket(colorChunk.chunk, null, colorChunk.meta); } }, error: (error) => { error.stack = stack; // Provide a more useful stack trace this.error ??= error; }, }); this.alphaEncoder.configure(encoderConfig); } } assert(this.source._connectedTrack); this.muxer = this.source._connectedTrack.output._muxer; this.encoderInitialized = true; })(); } async flushAndClose(forceClose: boolean) { if (!forceClose) { this.checkForEncoderError(); } // Final frame rate padding: fill remaining frames up to the last sample's original end timestamp if (!forceClose && this.frameRateLastSample) { const frameRate = this.encodingConfig.transform!.frameRate!; const alignedEnd = floorToDivisor(this.frameRateLastEndTimestamp!, frameRate); await this.padFrameRate(alignedEnd); } this.frameRateLastSample?.close(); this.frameRateLastSample = null; if (this.customEncoder) { if (!forceClose) { void this.customEncoderCallSerializer.call(() => this.customEncoder!.flush()); } await this.customEncoderCallSerializer.call(() => this.customEncoder!.close()); } else if (this.encoder) { if (!forceClose) { // These are wired in series, therefore they must also be flushed in series await this.encoder.flush(); await this.alphaEncoder?.flush(); } if (this.encoder.state !== 'closed') { this.encoder.close(); } if (this.alphaEncoder && this.alphaEncoder.state !== 'closed') { this.alphaEncoder.close(); } this.alphaFrameQueue.forEach(x => x?.close()); this.splitter?.close(); } if (!forceClose) { this.checkForEncoderError(); } } getQueueSize() { if (this.customEncoder) { return this.customEncoderQueueSize; } else { // Because the color and alpha encoders are wired in series, there's no need to also include the alpha // encoder's queue size here return this.encoder?.encodeQueueSize ?? 0; } } checkForEncoderError() { if (this.error) { throw this.error; } } } let splitterGpuUnavailable = false; /** Utility class for splitting a composite frame into separate color and alpha components. */ export class ColorAlphaSplitter { static forceCpu = true; canvas: OffscreenCanvas | HTMLCanvasElement | null = null; private gl: WebGL2RenderingContext | null = null; private colorProgram: WebGLProgram | null = null; private alphaProgram: WebGLProgram | null = null; private vao: WebGLVertexArrayObject | null = null; private sourceTexture: WebGLTexture | null = null; private alphaResolutionLocation: WebGLUniformLocation | null = null; private worker: Worker | null = null; private pendingRequests = new Map< number, ReturnType<typeof promiseWithResolvers<{ colorFrame: VideoFrame; alphaFrame: VideoFrame }>> >(); private nextRequestId = 0; constructor(initialWidth: number, initialHeight: number) { const canMakeCanvas = typeof OffscreenCanvas !== 'undefined' // eslint-disable-next-line @typescript-eslint/no-deprecated || (typeof document !== 'undefined' && typeof document.createElement === 'function'); if (!ColorAlphaSplitter.forceCpu && canMakeCanvas && !splitterGpuUnavailable) { // Try the GPU path. If anything goes wrong, we silently fall back to the CPU path. try { if (typeof OffscreenCanvas !== 'undefined') { this.canvas = new OffscreenCanvas(initialWidth, initialHeight); } else { this.canvas = document.createElement('canvas'); this.canvas.width = initialWidth; this.canvas.height = initialHeight; } const gl = this.canvas.getContext('webgl2', { alpha: true, // Needed due to the YUV thing we do for alpha }) as unknown as WebGL2RenderingContext | null; // Casting because of some TypeScript weirdness if (!gl) { throw new Error('Couldn\'t acquire WebGL 2 context.'); } this.gl = gl; this.colorProgram = this.createColorProgram(); this.alphaProgram = this.createAlphaProgram(); this.vao = this.createVAO(); this.sourceTexture = this.createTexture(); this.alphaResolutionLocation = this.gl.getUniformLocation(this.alphaProgram, 'u_resolution')!; this.gl.useProgram(this.colorProgram); this.gl.uniform1i(this.gl.getUniformLocation(this.colorProgram, 'u_sourceTexture'), 0); this.gl.useProgram(this.alphaProgram); this.gl.uniform1i(this.gl.getUniformLocation(this.alphaProgram, 'u_sourceTexture'), 0); } catch (error) { this.gl = null; this.canvas = null; splitterGpuUnavailable = true; console.warn('Falling back to CPU for color/alpha splitting.', error); } } } async update(sourceFrame: VideoFrame) { if (this.gl) { return this.updateGpu(sourceFrame); } else { return this.updateCpu(sourceFrame); } } private updateGpu(sourceFrame: VideoFrame) { assert(this.gl); assert(this.canvas); if (sourceFrame.displayWidth !== this.canvas.width || sourceFrame.displayHeight !== this.canvas.height) { this.canvas.width = sourceFrame.displayWidth; this.canvas.height = sourceFrame.displayHeight; } this.gl.activeTexture(this.gl.TEXTURE0); this.gl.bindTexture(this.gl.TEXTURE_2D, this.sourceTexture); this.gl.texImage2D(this.gl.TEXTURE_2D, 0, this.gl.RGBA, this.gl.RGBA, this.gl.UNSIGNED_BYTE, sourceFrame); const colorFrame = this.runColorProgram(sourceFrame); const alphaFrame = this.runAlphaProgram(sourceFrame); sourceFrame.close(); return { colorFrame, alphaFrame }; } private createVertexShader(): WebGLShader { assert(this.gl); return this.createShader(this.gl.VERTEX_SHADER, `#version 300 es in vec2 a_position; in vec2 a_texCoord; out vec2 v_texCoord; void main() { gl_Position = vec4(a_position, 0.0, 1.0); v_texCoord = a_texCoord; } `); } private createColorProgram(): WebGLProgram { assert(this.gl); const vertexShader = this.createVertexShader(); // This shader is simple, simply copy the color information while setting alpha to 1 const fragmentShader = this.createShader(this.gl.FRAGMENT_SHADER, `#version 300 es precision highp float; uniform sampler2D u_sourceTexture; in vec2 v_texCoord; out vec4 fragColor; void main() { vec4 source = texture(u_sourceTexture, v_texCoord); fragColor = vec4(source.rgb, 1.0); } `); const program = this.gl.createProgram(); this.gl.attachShader(program, vertexShader); this.gl.attachShader(program, fragmentShader); this.gl.linkProgram(program); return program; } private createAlphaProgram(): WebGLProgram { assert(this.gl); const vertexShader = this.createVertexShader(); // This shader's more complex. The main reason is that this shader writes data in I420 (yuv420) pixel format // instead of regular RGBA. In other words, we use the shader to write out I420 data into an RGBA canvas, which // we then later read out with JavaScript. The reason being that browsers weirdly encode canvases and mess up // the color spaces, and the only way to have full control over the color space is by outputting YUV data // directly (avoiding the RGB conversion). Doing this conversion in JS is painfully slow, so let's utlize the // GPU since we're already calling it anyway. const fragmentShader = this.createShader(this.gl.FRAGMENT_SHADER, `#version 300 es precision highp float; uniform sampler2D u_sourceTexture; uniform vec2 u_resolution; // The width and height of the canvas in vec2 v_texCoord; out vec4 fragColor; // This function determines the value for a single byte in the YUV stream float getByteValue(float byteOffset) { float width = u_resolution.x; float height = u_resolution.y; float yPlaneSize = width * height; if (byteOffset < yPlaneSize) { // This byte is in the luma plane. Find the corresponding pixel coordinates to sample from float y = floor(byteOffset / width); float x = mod(byteOffset, width); // Add 0.5 to sample the center of the texel vec2 sampleCoord = (vec2(x, y) + 0.5) / u_resolution; // The luma value is the alpha from the source texture return texture(u_sourceTexture, sampleCoord).a; } else { // Write a fixed value for chroma and beyond return 128.0 / 255.0; } } void main() { // Each fragment writes 4 bytes (R, G, B, A) float pixelIndex = floor(gl_FragCoord.y) * u_resolution.x + floor(gl_FragCoord.x); float baseByteOffset = pixelIndex * 4.0; vec4 result; for (int i = 0; i < 4; i++) { float currentByteOffset = baseByteOffset + float(i); result[i] = getByteValue(currentByteOffset); } fragColor = result; } `); const program = this.gl.createProgram(); this.gl.attachShader(program, vertexShader); this.gl.attachShader(program, fragmentShader); this.gl.linkProgram(program); return program; } private createShader(type: number, source: string): WebGLShader { assert(this.gl); const shader = this.gl.createShader(type)!; this.gl.shaderSource(shader, source); this.gl.compileShader(shader); if (!this.gl.getShaderParameter(shader, this.gl.COMPILE_STATUS)) { console.error('Shader compile error:', this.gl.getShaderInfoLog(shader)); } return shader; } private createVAO(): WebGLVertexArrayObject { assert(this.gl); assert(this.colorProgram); const vao = this.gl.createVertexArray(); this.gl.bindVertexArray(vao); const vertices = new Float32Array([ -1, -1, 0, 1, 1, -1, 1, 1, -1, 1, 0, 0, 1, 1, 1, 0, ]); const buffer = this.gl.createBuffer(); this.gl.bindBuffer(this.gl.ARRAY_BUFFER, buffer); this.gl.bufferData(this.gl.ARRAY_BUFFER, vertices, this.gl.STATIC_DRAW); const positionLocation = this.gl.getAttribLocation(this.colorProgram, 'a_position'); const texCoordLocation = this.gl.getAttribLocation(this.colorProgram, 'a_texCoord'); this.gl.enableVertexAttribArray(positionLocation); this.gl.vertexAttribPointer(positionLocation, 2, this.gl.FLOAT, false, 16, 0); this.gl.enableVertexAttribArray(texCoordLocation); this.gl.vertexAttribPointer(texCoordLocation, 2, this.gl.FLOAT, false, 16, 8); return vao; } private createTexture(): WebGLTexture { assert(this.gl); const texture = this.gl.createTexture(); this.gl.bindTexture(this.gl.TEXTURE_2D, texture); this.gl.texParameteri(this.gl.TEXTURE_2D, this.gl.TEXTURE_WRAP_S, this.gl.CLAMP_TO_EDGE); this.gl.texParameteri(this.gl.TEXTURE_2D, this.gl.TEXTURE_WRAP_T, this.gl.CLAMP_TO_EDGE); this.gl.texParameteri(this.gl.TEXTURE_2D, this.gl.TEXTURE_MIN_FILTER, this.gl.LINEAR); this.gl.texParameteri(this.gl.TEXTURE_2D, this.gl.TEXTURE_MAG_FILTER, this.gl.LINEAR); return texture; } private runColorProgram(sourceFrame: VideoFrame) { assert(this.gl); assert(this.canvas); this.gl.useProgram(this.colorProgram); this.gl.viewport(0, 0, this.canvas.width, this.canvas.height); this.gl.clear(this.gl.COLOR_BUFFER_BIT); this.gl.bindVertexArray(this.vao); this.gl.drawArrays(this.gl.TRIANGLE_STRIP, 0, 4); return new VideoFrame(this.canvas, { timestamp: sourceFrame.timestamp, duration: sourceFrame.duration ?? undefined, alpha: 'discard', }); } private runAlphaProgram(sourceFrame: VideoFrame) { assert(this.gl); assert(this.canvas); this.gl.useProgram(this.alphaProgram); this.gl.uniform2f(this.alphaResolutionLocation, this.canvas.width, this.canvas.height); this.gl.viewport(0, 0, this.canvas.width, this.canvas.height); this.gl.clear(this.gl.COLOR_BUFFER_BIT); this.gl.bindVertexArray(this.vao); this.gl.drawArrays(this.gl.TRIANGLE_STRIP, 0, 4); const { width, height } = this.canvas; const chromaSamples = Math.ceil(width / 2) * Math.ceil(height / 2); const yuvSize = width * height + chromaSamples * 2; const requiredHeight = Math.ceil(yuvSize / (width * 4)); let yuv = new Uint8Array(4 * width * requiredHeight); this.gl.readPixels(0, 0, width, requiredHeight, this.gl.RGBA, this.gl.UNSIGNED_BYTE, yuv); yuv = yuv.subarray(0, yuvSize); assert(yuv[width * height] === 128); // Where chroma data starts assert(yuv[yuv.length - 1] === 128); // Assert the YUV data has been fully written // Defining this separately because TypeScript doesn't know `transfer` and I can't be bothered to do declaration // merging right now const init = { format: 'I420' as const, codedWidth: width, codedHeight: height, timestamp: sourceFrame.timestamp, duration: sourceFrame.duration ?? undefined, transfer: [yuv.buffer], }; return new VideoFrame(yuv, init); } private updateCpu(sourceFrame: VideoFrame): Promise<{ colorFrame: VideoFrame; alphaFrame: VideoFrame }> { if (!this.worker) { const blob = new Blob( [`(${colorAlphaSplitterWorkerCode.toString()})()`], { type: 'application/javascript' }, ); const url = URL.createObjectURL(blob); this.worker = new Worker(url); URL.revokeObjectURL(url); this.worker.addEventListener('message', (event: MessageEvent<ColorAlphaSplitterWorkerResponse>) => { const data = event.data; const pending = this.pendingRequests.get(data.id); if (!pending) { return; } this.pendingRequests.delete(data.id); if ('error' in data) { pending.reject(new Error(data.error)); } else { pending.resolve({ colorFrame: data.colorFrame, alphaFrame: data.alphaFrame }); } }); this.worker.addEventListener('error', (event) => { const error = new Error(event.message || 'Color/alpha splitter worker error.'); for (const pending of this.pendingRequests.values()) { pending.reject(error); } this.pendingRequests.clear(); }); } const id = this.nextRequestId++; const pending = promiseWithResolvers<{ colorFrame: VideoFrame; alphaFrame: VideoFrame }>(); this.pendingRequests.set(id, pending); this.worker.postMessage({ id, sourceFrame }, { transfer: [sourceFrame] }); return pending.promise; } close() { this.gl?.getExtension('WEBGL_lose_context')?.loseContext(); this.gl = null; this.canvas = null; this.worker?.terminate(); this.worker = null; const error = new Error('Color/alpha splitter closed.'); for (const pending of this.pendingRequests.values()) { pending.reject(error); } this.pendingRequests.clear(); } } type ColorAlphaSplitterWorkerRequest = { id: number; sourceFrame: VideoFrame; }; type ColorAlphaSplitterWorkerResponse = | { id: number; colorFrame: VideoFrame; alphaFrame: VideoFrame } | { id: number; error: string }; const colorAlphaSplitterWorkerCode = () => { // Reused across frames as long as the size matches, since consecutive frames usually share dimensions. let cpuSourceBuffer: Uint8Array | null = null; // Serialize execution internally so concurrent requests don't race on the shared cpuSourceBuffer. let chain: Promise<void> = Promise.resolve(); self.addEventListener('message', (event: MessageEvent<ColorAlphaSplitterWorkerRequest>) => { const { id, sourceFrame } = event.data; chain = chain.then(async () => { try { const { colorFrame, alphaFrame } = await split(sourceFrame); self.postMessage({ id, colorFrame, alphaFrame }, { transfer: [colorFrame, alphaFrame] }); } catch (error) { self.postMessage({ id, error: (error as Error).message }); } finally { sourceFrame.close(); } }); }); const split = async (sourceFrame: VideoFrame) => { const format = sourceFrame.format as VideoSamplePixelFormat | null; if (!format) { throw new Error('CPU color/alpha splitting requires a known VideoFrame format.'); } const width = sourceFrame.codedWidth; const height = sourceFrame.codedHeight; const sourceSize = sourceFrame.allocationSize(); if (!cpuSourceBuffer || cpuSourceBuffer.byteLength !== sourceSize) { cpuSourceBuffer = new Uint8Array(sourceSize); } await sourceFrame.copyTo(cpuSourceBuffer); if (format === 'RGBA' || format === 'BGRA') { return splitInterleavedRgba(cpuSourceBuffer, width, height, format, sourceFrame); } else if ( format === 'I420A' || format === 'I420AP10' || format === 'I420AP12' || format === 'I422A' || format === 'I422AP10' || format === 'I422AP12' || format === 'I444A' || format === 'I444AP10' || format === 'I444AP12' ) { return splitPlanarYuvA(cpuSourceBuffer, width, height, format, sourceFrame); } throw new Error(`CPU color/alpha splitting does not support format '${format}'.`); }; const splitInterleavedRgba = ( source: Uint8Array, width: number, height: number, format: 'RGBA' | 'BGRA', sourceFrame: VideoFrame, ) => { const pixelCount = width * height; const chromaW = Math.ceil(width / 2); const chromaH = Math.ceil(height / 2); const alphaSize = pixelCount + chromaW * chromaH * 2; // Encode alpha as I420: Y = source A bytes, UV = 128 const alphaBuffer = new Uint8Array(alphaSize); for (let i = 0, j = 3; i < pixelCount; i++, j += 4) { alphaBuffer[i] = source[j]!; } alphaBuffer.fill(128, pixelCount); // Hand the source buffer straight to VideoFrame as RGBX/BGRX so the A bytes are ignored const colorFrame = new VideoFrame(source, { format: format === 'RGBA' ? 'RGBX' : 'BGRX', codedWidth: width, codedHeight: height, timestamp: sourceFrame.timestamp, duration: sourceFrame.duration ?? undefined, // No transfer! }); const alphaInit = { format: 'I420' as const, codedWidth: width, codedHeight: height, timestamp: sourceFrame.timestamp, duration: sourceFrame.duration ?? undefined, transfer: [alphaBuffer.buffer], }; const alphaFrame = new VideoFrame(alphaBuffer, alphaInit); return { colorFrame, alphaFrame }; }; const splitPlanarYuvA = ( source: Uint8Array, width: number, height: number, format: | 'I420A' | 'I420AP10' | 'I420AP12' | 'I422A' | 'I422AP10' | 'I422AP12' | 'I444A' | 'I444AP10' | 'I444AP12', sourceFrame: VideoFrame, ) => { const is10 = format.includes('P10'); const is12 = format.includes('P12'); const bytesPerSample = (is10 || is12) ? 2 : 1; let chromaW: number; let chromaH: number; if (format.startsWith('I420')) { chromaW = Math.ceil(width / 2); chromaH = Math.ceil(height / 2); } else if (format.startsWith('I422')) { chromaW = Math.ceil(width / 2); chromaH = height; } else { chromaW = width; chromaH = height; } const ySamples = width * height; const uvSamples = chromaW * chromaH; const yBytes = ySamples * bytesPerSample; const uvBytes = uvSamples * bytesPerSample; const aBytes = ySamples * bytesPerSample; const colorBytes = yBytes + uvBytes * 2; const colorFormat = format.replace('A', '') as VideoPixelFormat; const alphaChromaW = Math.ceil(width / 2); const alphaChromaH = Math.ceil(height / 2); const alphaUvSamples = alphaChromaW * alphaChromaH; const alphaUvBytes = alphaUvSamples * bytesPerSample; const alphaSize = aBytes + 2 * alphaUvBytes; const alphaBuffer = new Uint8Array(alphaSize); const aPlaneStart = colorBytes; alphaBuffer.set(source.subarray(aPlaneStart, aPlaneStart + aBytes), 0); // Fill UV planes with the neutral chroma value const uvOffset = aBytes; const neutralChroma = is10 ? 512 : (is12 ? 2048 : 128); if (bytesPerSample === 1) { alphaBuffer.fill(neutralChroma, uvOffset); } else { const uvView = new Uint16Array(alphaBuffer.buffer, uvOffset, 2 * alphaUvSamples); uvView.fill(neutralChroma); } const alphaFormat = (is10 ? 'I420P10' : (is12 ? 'I420P12' : 'I420')) as VideoPixelFormat; // Color frame is simply a prefix of the combined bytes const colorFrame = new VideoFrame(source.subarray(0, colorBytes), { format: colorFormat, codedWidth: width, codedHeight: height, timestamp: sourceFrame.timestamp, duration: sourceFrame.duration ?? undefined, }); const alphaInit = { format: alphaFormat, codedWidth: width, codedHeight: height, timestamp: sourceFrame.timestamp, duration: sourceFrame.duration ?? undefined, transfer: [alphaBuffer.buffer], }; const alphaFrame = new VideoFrame(alphaBuffer, alphaInit); return { colorFrame, alphaFrame }; }; }; /** * This source can be used to add raw, unencoded video samples (frames) to an output video track. These frames will * automatically be encoded and then piped into the output. * @group Media sources * @public */ export class VideoSampleSource extends VideoSource { /** @internal */ private _encoder: VideoEncoderWrapper; /** * Creates a new {@link VideoSampleSource} whose samples are encoded according to the specified * {@link VideoEncodingConfig}. */ constructor(encodingConfig: VideoEncodingConfig) { validateVideoEncodingConfig(encodingConfig); super(encodingConfig.codec); this._encoder = new VideoEncoderWrapper(this, encodingConfig); } /** * Encodes a video sample (frame) and then adds it to the output. * * @returns A Promise that resolves once the output is ready to receive more samples. You should await this Promise * to respect writer and encoder backpressure. */ add(videoSample: VideoSample, encodeOptions?: VideoEncoderEncodeOptions) { if (!(videoSample instanceof VideoSample)) { throw new TypeError('videoSample must be a VideoSample.'); } return this._encoder.add(videoSample, false, encodeOptions); } /** @internal */ override _flushAndClose(forceClose: boolean) { return this._encoder.flushAndClose(forceClose); } } /** * This source can be used to add video frames to the output track from a fixed canvas element. Since canvases are often * used for rendering, this source provides a convenient wrapper around {@link VideoSampleSource}. * @group Media sources * @public */ export class CanvasSource extends VideoSource { /** @internal */ private _encoder: VideoEncoderWrapper; /** @internal */ private _canvas: HTMLCanvasElement | OffscreenCanvas; /** * Creates a new {@link CanvasSource} from a canvas element or `OffscreenCanvas` whose samples are encoded * according to the specified {@link VideoEncodingConfig}. */ constructor(canvas: HTMLCanvasElement | OffscreenCanvas, encodingConfig: VideoEncodingConfig) { if ( !(typeof HTMLCanvasElement !== 'undefined' && canvas instanceof HTMLCanvasElement) &&