UNPKG

mediabunny

Version:

Pure TypeScript media toolkit for reading, writing, and converting media files, directly in the browser.

1,025 lines (1,013 loc) 116 kB
/*! * Copyright (c) 2026-present, Vanilagy and contributors * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ import { buildAacAudioSpecificConfig, parseAacAudioSpecificConfig } from '../shared/aac-misc.js'; import { AUDIO_CODECS, parsePcmCodec, PCM_AUDIO_CODECS, SUBTITLE_CODECS, VIDEO_CODECS, } from './codec.js'; import { assert, assertNever, binarySearchLessOrEqual, CallSerializer, clamp, clearIntervalUnthrottled, floorToDivisor, last, promiseWithResolvers, roundToDivisor, setInt24, setIntervalUnthrottled, setUint24, toUint8Array, } from './misc.js'; import { SubtitleParser } from './subtitles.js'; import { toAlaw, toUlaw } from './pcm.js'; import { customVideoEncoders, customAudioEncoders, } from './custom-coder.js'; import { EncodedPacket } from './packet.js'; import { AudioSample, audioSampleToInterleavedFormat, toInterleavedAudioFormat, VideoSample, } from './sample.js'; import { buildAudioEncoderConfig, buildVideoEncoderConfig, validateAudioEncodingConfig, validateVideoEncodingConfig, } from './encode.js'; import { AudioResampler } from './resample.js'; import { determineVideoPacketType } from './codec-data.js'; /** * Base class for media sources. Media sources are used to add media samples to an output file. * @group Media sources * @public */ export class MediaSource { constructor() { /** @internal */ this._connectedTrack = null; /** @internal */ this._closingPromise = null; /** @internal */ this._closed = false; } /** @internal */ _ensureValidAdd() { if (!this._connectedTrack) { throw new Error('Source is not connected to an output track.'); } if (this._connectedTrack.output.state === 'canceled') { throw new Error('Output has been canceled.'); } if (this._connectedTrack.output.state === 'finalizing' || this._connectedTrack.output.state === 'finalized') { throw new Error('Output has been finalized.'); } if (this._connectedTrack.output.state === 'pending') { throw new Error('Output has not started.'); } if (this._closed) { throw new Error('Source is closed.'); } } /** @internal */ async _start() { } /** @internal */ // eslint-disable-next-line @typescript-eslint/no-unused-vars async _flushAndClose(forceClose) { } /** * Closes this source. This prevents future samples from being added and signals to the output file that no further * samples will come in for this track. Calling `.close()` is optional but recommended after adding the * last sample - for improved performance and reduced memory usage. */ close() { if (this._closingPromise) { return; } const connectedTrack = this._connectedTrack; if (!connectedTrack) { throw new Error('Cannot call close without connecting the source to an output track.'); } if (connectedTrack.output.state === 'pending') { throw new Error('Cannot call close before output has been started.'); } this._closingPromise = (async () => { await this._flushAndClose(false); this._closed = true; if (connectedTrack.output.state === 'finalizing' || connectedTrack.output.state === 'finalized') { return; } connectedTrack.output._muxer.onTrackClose(connectedTrack); })(); } /** @internal */ async _flushOrWaitForOngoingClose(forceClose) { return this._closingPromise ??= (async () => { await this._flushAndClose(forceClose); this._closed = true; })(); } } /** * Base class for video sources - sources for video tracks. * @group Media sources * @public */ export class VideoSource extends MediaSource { /** Internal constructor. */ constructor(codec) { super(); /** @internal */ this._connectedTrack = null; if (!VIDEO_CODECS.includes(codec)) { throw new TypeError(`Invalid video codec '${codec}'. Must be one of: ${VIDEO_CODECS.join(', ')}.`); } this._codec = codec; } } const maybeEnsureIsKeyPacket = (track, packet) => { if (track.metadata.hasOnlyKeyPackets && packet.type !== 'key') { throw new Error('Cannot add non-key packets to a hasOnlyKeyPackets video track.'); } }; /** * The most basic video source; can be used to directly pipe encoded packets into the output file. * @group Media sources * @public */ export class EncodedVideoPacketSource extends VideoSource { /** Creates a new {@link EncodedVideoPacketSource} whose packets are encoded using `codec`. */ constructor(codec) { super(codec); } /** * Adds an encoded packet to the output video track. Packets must be added in *decode order*, while a packet's * timestamp must be its *presentation timestamp*. B-frames are handled automatically. * * @param meta - Additional metadata from the encoder. You should pass this for the first call, including a valid * decoder config. * * @returns A Promise that resolves once the output is ready to receive more samples. You should await this Promise * to respect writer and encoder backpressure. */ add(packet, meta) { if (!(packet instanceof EncodedPacket)) { throw new TypeError('packet must be an EncodedPacket.'); } if (packet.isMetadataOnly) { throw new TypeError('Metadata-only packets cannot be added.'); } if (meta !== undefined && (!meta || typeof meta !== 'object')) { throw new TypeError('meta, when provided, must be an object.'); } this._ensureValidAdd(); maybeEnsureIsKeyPacket(this._connectedTrack, packet); return this._connectedTrack.output._muxer.addEncodedVideoPacket(this._connectedTrack, packet, meta); } } class VideoEncoderWrapper { constructor(source, encodingConfig) { this.source = source; this.encodingConfig = encodingConfig; this.ensureEncoderPromise = null; this.encoderInitialized = false; this.encoder = null; this.muxer = null; this.lastMultipleOfKeyFrameInterval = -1; this.emittedEncoderPackets = 0; // Tracks the input dimensions of the first frame this.codedWidth = null; this.codedHeight = null; // Tracks the output dimensions of the first frame (used to lock dimensions for fill/contain/cover) this.outputWidth = null; this.outputHeight = null; // Frame rate normalization state this.frameRateLastSample = null; this.frameRateLastTimestamp = null; this.frameRateLastEndTimestamp = null; // VideoEncoder converts everything to microseconds, so we need to do some bookkeeping to restore the original // timing information this.preciseTimings = []; this.customEncoder = null; this.customEncoderCallSerializer = new CallSerializer(); this.customEncoderQueueSize = 0; // Alpha stuff this.alphaEncoder = null; this.splitter = null; this.splitterCreationFailed = false; this.alphaFrameQueue = []; /** * Encoders typically throw their errors "out of band", meaning asynchronously in some other execution context. * However, we want to surface these errors to the user within the normal control flow, so they don't go uncaught. * So, we keep track of the encoder error and throw it as soon as we get the chance. */ this.error = null; this.lastMuxerPromise = Promise.resolve(); } async add(videoSample, shouldClose, encodeOptions) { const originalSample = videoSample; try { this.checkForEncoderError(); this.source._ensureValidAdd(); const config = this.encodingConfig; const sizeChangeBehavior = config.sizeChangeBehavior ?? 'deny'; let isSizeChange = false; // Ensure video sample size remains constant or handle the change if (this.codedWidth !== null && this.codedHeight !== null) { if (videoSample.codedWidth !== this.codedWidth || videoSample.codedHeight !== this.codedHeight) { isSizeChange = true; if (sizeChangeBehavior === 'deny') { throw new Error(`Video sample size must remain constant. Expected ${this.codedWidth}x${this.codedHeight},` + ` got ${videoSample.codedWidth}x${videoSample.codedHeight}. To allow the sample size to` + ` change over time, set \`sizeChangeBehavior\` to a value other than 'deny' in the` + ` encoding options.`); } } } else { this.codedWidth = videoSample.codedWidth; this.codedHeight = videoSample.codedHeight; } // Determine if we need to apply transformations via canvas const hasTransformConfig = config.transform?.width !== undefined || config.transform?.height !== undefined || config.transform?.rotate !== undefined || config.transform?.crop !== undefined || config.transform?.force === true; const needsTransform = hasTransformConfig || (isSizeChange && sizeChangeBehavior !== 'passThrough'); if (needsTransform) { let targetWidth = config.transform?.width; let targetHeight = config.transform?.height; let appliedFit = config.transform?.fit ?? 'fill'; // If the size changed and behavior is fill/contain/cover, lock to the original output dimensions if (isSizeChange && sizeChangeBehavior !== 'passThrough') { assert(this.outputWidth); assert(this.outputHeight); assert(sizeChangeBehavior !== 'deny'); targetWidth = this.outputWidth; targetHeight = this.outputHeight; appliedFit = sizeChangeBehavior; } const transformed = await videoSample.transform({ width: targetWidth, height: targetHeight, roundDimensionsTo: 2, crop: config.transform?.crop, rotate: config.transform?.rotate, fit: appliedFit, alpha: config.alpha, }); // Save the output dimensions of the first frame if (this.outputWidth === null || this.outputHeight === null) { this.outputWidth = transformed.displayWidth; this.outputHeight = transformed.displayHeight; } if (shouldClose) { videoSample.close(); } videoSample = transformed; shouldClose = true; } else { // If no canvas is needed, we still need to record the output dimensions for the first frame if (this.outputWidth === null || this.outputHeight === null) { this.outputWidth = videoSample.codedWidth; this.outputHeight = videoSample.codedHeight; } } const frameRate = config.transform?.frameRate; if (frameRate !== undefined) { // Apply frame rate normalization const originalEndTimestamp = videoSample.timestamp + videoSample.duration; const alignedTimestamp = floorToDivisor(videoSample.timestamp, frameRate); if (this.frameRateLastSample !== null) { if (alignedTimestamp <= this.frameRateLastTimestamp) { // Same frame rate slot, replace stored sample with the newer one this.frameRateLastSample.close(); this.frameRateLastSample = videoSample.clone(); this.frameRateLastEndTimestamp = originalEndTimestamp; return; } else { // Pad the gap by repeating the previous frame await this.padFrameRate(alignedTimestamp, encodeOptions); } } // Clone if the sample is still the user's, to avoid mutating externally-owned data if (videoSample === originalSample) { videoSample = videoSample.clone(); shouldClose = true; } videoSample.setTimestamp(alignedTimestamp); videoSample.setDuration(1 / frameRate); this.frameRateLastSample?.close(); this.frameRateLastSample = videoSample.clone(); this.frameRateLastTimestamp = alignedTimestamp; this.frameRateLastEndTimestamp = originalEndTimestamp; } await this.processAndEncode(videoSample, encodeOptions); } finally { if (shouldClose) { videoSample.close(); } } } /** * Runs the process function (if any) and encodes the resulting samples. */ async processAndEncode(videoSample, encodeOptions) { const config = this.encodingConfig; let samplesToEncode; // Apply the user-defined process function, if any if (config.transform?.process) { let processed = config.transform.process(videoSample); if (processed instanceof Promise) { processed = await processed; } if (processed === null) { return; } if (!Array.isArray(processed)) { processed = [processed]; } samplesToEncode = processed.map((x) => { if (x instanceof VideoSample) { return x; } if (typeof VideoFrame !== 'undefined' && x instanceof VideoFrame) { return new VideoSample(x); } return new VideoSample(x, { timestamp: videoSample.timestamp, duration: videoSample.duration, }); }); } else { samplesToEncode = [videoSample]; } try { for (const sampleToEncode of samplesToEncode) { if (!this.encoderInitialized) { if (!this.ensureEncoderPromise) { this.ensureEncoder(sampleToEncode); } // No, this "if" statement is not useless. Sometimes, the above call to // `ensureEncoder` might have synchronously completed and the encoder is // already initialized. In this case, we don't need to await the promise // anymore. This also fixes nasty async race condition bugs when multiple // code paths are calling this method: It's important that the call that // initialized the encoder go through this code first. if (!this.encoderInitialized) { await this.ensureEncoderPromise; } } assert(this.encoderInitialized); const keyFrameInterval = this.encodingConfig.keyFrameInterval ?? 2; const multipleOfKeyFrameInterval = Math.floor(sampleToEncode.timestamp / keyFrameInterval); // Ensure a key frame every keyFrameInterval seconds. It is important that all video tracks // follow the same "key frame" rhythm, because aligned key frames are required to start new // fragments in ISOBMFF or clusters in Matroska (or at least desirable). const finalEncodeOptions = { ...encodeOptions, keyFrame: encodeOptions?.keyFrame || keyFrameInterval === 0 || multipleOfKeyFrameInterval !== this.lastMultipleOfKeyFrameInterval, }; this.lastMultipleOfKeyFrameInterval = multipleOfKeyFrameInterval; if (this.customEncoder) { this.customEncoderQueueSize++; // We clone the sample so it cannot be closed on us from the outside before it reaches the encoder const clonedSample = sampleToEncode.clone(); const promise = this.customEncoderCallSerializer .call(() => this.customEncoder.encode(clonedSample, finalEncodeOptions)) .then(() => this.customEncoderQueueSize--) .catch((error) => this.error ??= error) .finally(() => { clonedSample.close(); }); if (this.customEncoderQueueSize >= 4) { await promise; } } else { assert(this.encoder); const videoFrame = sampleToEncode.toVideoFrame(); const preciseTimingIndex = binarySearchLessOrEqual(this.preciseTimings, videoFrame.timestamp, x => x.microsecondTimestamp); const existingEntry = preciseTimingIndex !== -1 ? this.preciseTimings[preciseTimingIndex] : null; if (existingEntry && existingEntry.microsecondTimestamp === videoFrame.timestamp) { if (existingEntry.timestamp !== sampleToEncode.timestamp) { // Mapping isn't unique, can't use the timestamp existingEntry.timestampIsValid = false; } if (existingEntry.duration !== sampleToEncode.duration) { // Mapping isn't unique, can't use the duration existingEntry.durationIsValid = false; } } else { this.preciseTimings.splice(preciseTimingIndex + 1, 0, { microsecondTimestamp: videoFrame.timestamp, timestamp: sampleToEncode.timestamp, duration: sampleToEncode.duration, timestampIsValid: true, durationIsValid: true, }); // Make sure it doesn't grow indefinitely if (this.preciseTimings.length > 128) { this.preciseTimings.shift(); } } if (!this.alphaEncoder) { // No alpha encoder, simple case this.encoder.encode(videoFrame, finalEncodeOptions); videoFrame.close(); } else { // We're expected to encode alpha as well const frameDefinitelyHasNoAlpha = !!videoFrame.format && !videoFrame.format.includes('A'); if (frameDefinitelyHasNoAlpha || this.splitterCreationFailed) { this.alphaFrameQueue.push(null); this.encoder.encode(videoFrame, finalEncodeOptions); videoFrame.close(); } else { const width = videoFrame.displayWidth; const height = videoFrame.displayHeight; if (!this.splitter) { this.splitter = new ColorAlphaSplitter(width, height); } // The splitter takes ownership, so no need to close the frames ourselves const { colorFrame, alphaFrame } = await this.splitter.update(videoFrame); this.alphaFrameQueue.push(alphaFrame); this.encoder.encode(colorFrame, finalEncodeOptions); colorFrame.close(); } } // We need to do this after sending the frame to the encoder as the frame otherwise might be closed if (this.encoder.encodeQueueSize >= 4) { await new Promise(resolve => this.encoder.addEventListener('dequeue', resolve, { once: true })); } } await this.lastMuxerPromise; // Allow the writer to apply backpressure } } finally { for (const sample of samplesToEncode) { if (sample !== videoSample) { sample.close(); } } } } /** Repeats the last frame rate sample to fill the gap up to the given timestamp. */ async padFrameRate(until, encodeOptions) { const frameRate = this.encodingConfig.transform.frameRate; assert(this.frameRateLastSample); const frameDifference = Math.round((until - this.frameRateLastTimestamp) * frameRate); for (let i = 1; i < frameDifference; i++) { const sample = this.frameRateLastSample.clone(); sample.setTimestamp(this.frameRateLastTimestamp + i / frameRate); sample.setDuration(1 / frameRate); await this.processAndEncode(sample, encodeOptions); sample.close(); } } ensureEncoder(videoSample) { this.ensureEncoderPromise = (async () => { const encoderConfig = buildVideoEncoderConfig({ ...this.encodingConfig, width: videoSample.codedWidth, height: videoSample.codedHeight, squarePixelWidth: videoSample.squarePixelWidth, squarePixelHeight: videoSample.squarePixelHeight, framerate: this.source._connectedTrack?.metadata.frameRate, }); this.encodingConfig.onEncoderConfig?.(encoderConfig); const MatchingCustomEncoder = customVideoEncoders.find(x => x.supports(this.encodingConfig.codec, encoderConfig)); if (MatchingCustomEncoder) { // @ts-expect-error "Can't create instance of abstract class 🤓" this.customEncoder = new MatchingCustomEncoder(); // @ts-expect-error It's technically readonly this.customEncoder.codec = this.encodingConfig.codec; // @ts-expect-error It's technically readonly this.customEncoder.config = encoderConfig; // @ts-expect-error It's technically readonly this.customEncoder.onPacket = (packet, meta) => { if (!(packet instanceof EncodedPacket)) { throw new TypeError('The first argument passed to onPacket must be an EncodedPacket.'); } if (meta !== undefined && (!meta || typeof meta !== 'object')) { throw new TypeError('The second argument passed to onPacket must be an object or undefined.'); } maybeEnsureIsKeyPacket(this.source._connectedTrack, packet); this.encodingConfig.onEncodedPacket?.(packet, meta); this.lastMuxerPromise = this.muxer.addEncodedVideoPacket(this.source._connectedTrack, packet, meta) .catch((error) => { this.error ??= error; }); }; await this.customEncoder.init(); } else { if (typeof VideoEncoder === 'undefined') { throw new Error('VideoEncoder is not supported by this browser.'); } encoderConfig.alpha = 'discard'; // Since we handle alpha ourselves if (this.encodingConfig.alpha === 'keep') { // Encoding alpha requires using two parallel encoders, so we need to make sure they stay in sync // and that neither of them drops frames. Setting latencyMode to 'quality' achieves this, because // "User Agents MUST not drop frames to achieve the target bitrate and/or framerate." encoderConfig.latencyMode = 'quality'; } const hasOddDimension = encoderConfig.width % 2 === 1 || encoderConfig.height % 2 === 1; if (hasOddDimension && (this.encodingConfig.codec === 'avc' || this.encodingConfig.codec === 'hevc')) { // Throw a special error for this case as it gets hit often throw new Error(`The dimensions ${encoderConfig.width}x${encoderConfig.height} are not supported for codec` + ` '${this.encodingConfig.codec}'; both width and height must be even numbers. Make sure to` + ` round your dimensions to the nearest even number.`); } const support = await VideoEncoder.isConfigSupported(encoderConfig); if (!support.supported) { throw new Error(`This specific encoder configuration (${encoderConfig.codec}, ${encoderConfig.bitrate} bps,` + ` ${encoderConfig.width}x${encoderConfig.height}, hardware acceleration:` + ` ${encoderConfig.hardwareAcceleration ?? 'no-preference'}) is not supported by this browser.` + ` Consider using another codec or changing your video parameters.`); } /** Queue of color chunks waiting for their alpha counterpart. */ const colorChunkQueue = []; /** Each value is the number of encoded alpha chunks at which a null alpha chunk should be added. */ const nullAlphaChunkQueue = []; let encodedAlphaChunkCount = 0; let alphaEncoderQueue = 0; const addPacket = (colorChunk, alphaChunk, meta) => { const sideData = {}; if (alphaChunk) { const alphaData = new Uint8Array(alphaChunk.byteLength); alphaChunk.copyTo(alphaData); sideData.alpha = alphaData; } let packet = EncodedPacket.fromEncodedChunk(colorChunk, sideData); // See if there's a relevant timing entry to refine the packet's timing data const preciseTimingIndex = binarySearchLessOrEqual(this.preciseTimings, colorChunk.timestamp, x => x.microsecondTimestamp); const entry = preciseTimingIndex !== -1 ? this.preciseTimings[preciseTimingIndex] : null; let actualType = null; if (this.emittedEncoderPackets === 0 && packet.type === 'delta' && meta?.decoderConfig) { // https://github.com/Vanilagy/mediabunny/issues/365 // We expect the first packet to be a key packet. If it's not, let's actually verify that it's // not by getting the actual type. actualType = determineVideoPacketType(this.encodingConfig.codec, meta.decoderConfig, packet.data); } // Define the packet if ((entry && entry.microsecondTimestamp === colorChunk.timestamp) || actualType !== null) { packet = packet.clone({ timestamp: entry?.timestampIsValid ? entry.timestamp : undefined, duration: entry?.durationIsValid ? entry.duration : undefined, type: actualType ?? undefined, }); } maybeEnsureIsKeyPacket(this.source._connectedTrack, packet); this.encodingConfig.onEncodedPacket?.(packet, meta); this.lastMuxerPromise = this.muxer.addEncodedVideoPacket(this.source._connectedTrack, packet, meta) .catch((error) => { this.error ??= error; }); this.emittedEncoderPackets++; }; const stack = new Error('Encoding error').stack; this.encoder = new VideoEncoder({ output: (chunk, meta) => { if (!this.alphaEncoder) { // We're done addPacket(chunk, null, meta); return; } const alphaFrame = this.alphaFrameQueue.shift(); assert(alphaFrame !== undefined); if (alphaFrame) { this.alphaEncoder.encode(alphaFrame, { // Crucial: The alpha frame is forced to be a key frame whenever the color frame // also is. Without this, playback can glitch and even crash in some browsers. // This is the reason why the two encoders are wired in series and not in parallel. keyFrame: chunk.type === 'key', }); alphaEncoderQueue++; alphaFrame.close(); colorChunkQueue.push({ chunk, meta }); } else { // There was no alpha component for this frame if (alphaEncoderQueue === 0) { // No pending alpha encodes either, so we're done addPacket(chunk, null, meta); } else { // There are still alpha encodes pending, so we can't add the packet immediately since // we'd end up with out-of-order packets. Instead, let's queue a null alpha chunk to be // added in the future, after the current encoder workload has completed: nullAlphaChunkQueue.push(encodedAlphaChunkCount + alphaEncoderQueue); colorChunkQueue.push({ chunk, meta }); } } }, error: (error) => { error.stack = stack; // Provide a more useful stack trace, the default one sucks this.error ??= error; }, }); this.encoder.configure(encoderConfig); if (this.encodingConfig.alpha === 'keep') { const stack = new Error('Encoding error').stack; // We need to encode alpha as well, which we do with a separate encoder this.alphaEncoder = new VideoEncoder({ // We ignore the alpha chunk's metadata // eslint-disable-next-line @typescript-eslint/no-unused-vars output: (chunk, meta) => { alphaEncoderQueue--; // There has to be a color chunk because the encoders are wired in series const colorChunk = colorChunkQueue.shift(); assert(colorChunk !== undefined); addPacket(colorChunk.chunk, chunk, colorChunk.meta); // See if there are any null alpha chunks queued up encodedAlphaChunkCount++; while (nullAlphaChunkQueue.length > 0 && nullAlphaChunkQueue[0] === encodedAlphaChunkCount) { nullAlphaChunkQueue.shift(); const colorChunk = colorChunkQueue.shift(); assert(colorChunk !== undefined); addPacket(colorChunk.chunk, null, colorChunk.meta); } }, error: (error) => { error.stack = stack; // Provide a more useful stack trace this.error ??= error; }, }); this.alphaEncoder.configure(encoderConfig); } } assert(this.source._connectedTrack); this.muxer = this.source._connectedTrack.output._muxer; this.encoderInitialized = true; })(); } async flushAndClose(forceClose) { if (!forceClose) { this.checkForEncoderError(); } // Final frame rate padding: fill remaining frames up to the last sample's original end timestamp if (!forceClose && this.frameRateLastSample) { const frameRate = this.encodingConfig.transform.frameRate; const alignedEnd = floorToDivisor(this.frameRateLastEndTimestamp, frameRate); await this.padFrameRate(alignedEnd); } this.frameRateLastSample?.close(); this.frameRateLastSample = null; if (this.customEncoder) { if (!forceClose) { void this.customEncoderCallSerializer.call(() => this.customEncoder.flush()); } await this.customEncoderCallSerializer.call(() => this.customEncoder.close()); } else if (this.encoder) { if (!forceClose) { // These are wired in series, therefore they must also be flushed in series await this.encoder.flush(); await this.alphaEncoder?.flush(); } if (this.encoder.state !== 'closed') { this.encoder.close(); } if (this.alphaEncoder && this.alphaEncoder.state !== 'closed') { this.alphaEncoder.close(); } this.alphaFrameQueue.forEach(x => x?.close()); this.splitter?.close(); } if (!forceClose) { this.checkForEncoderError(); } } getQueueSize() { if (this.customEncoder) { return this.customEncoderQueueSize; } else { // Because the color and alpha encoders are wired in series, there's no need to also include the alpha // encoder's queue size here return this.encoder?.encodeQueueSize ?? 0; } } checkForEncoderError() { if (this.error) { throw this.error; } } } let splitterGpuUnavailable = false; /** Utility class for splitting a composite frame into separate color and alpha components. */ export class ColorAlphaSplitter { constructor(initialWidth, initialHeight) { this.canvas = null; this.gl = null; this.colorProgram = null; this.alphaProgram = null; this.vao = null; this.sourceTexture = null; this.alphaResolutionLocation = null; this.worker = null; this.pendingRequests = new Map(); this.nextRequestId = 0; const canMakeCanvas = typeof OffscreenCanvas !== 'undefined' // eslint-disable-next-line @typescript-eslint/no-deprecated || (typeof document !== 'undefined' && typeof document.createElement === 'function'); if (!ColorAlphaSplitter.forceCpu && canMakeCanvas && !splitterGpuUnavailable) { // Try the GPU path. If anything goes wrong, we silently fall back to the CPU path. try { if (typeof OffscreenCanvas !== 'undefined') { this.canvas = new OffscreenCanvas(initialWidth, initialHeight); } else { this.canvas = document.createElement('canvas'); this.canvas.width = initialWidth; this.canvas.height = initialHeight; } const gl = this.canvas.getContext('webgl2', { alpha: true, // Needed due to the YUV thing we do for alpha }); // Casting because of some TypeScript weirdness if (!gl) { throw new Error('Couldn\'t acquire WebGL 2 context.'); } this.gl = gl; this.colorProgram = this.createColorProgram(); this.alphaProgram = this.createAlphaProgram(); this.vao = this.createVAO(); this.sourceTexture = this.createTexture(); this.alphaResolutionLocation = this.gl.getUniformLocation(this.alphaProgram, 'u_resolution'); this.gl.useProgram(this.colorProgram); this.gl.uniform1i(this.gl.getUniformLocation(this.colorProgram, 'u_sourceTexture'), 0); this.gl.useProgram(this.alphaProgram); this.gl.uniform1i(this.gl.getUniformLocation(this.alphaProgram, 'u_sourceTexture'), 0); } catch (error) { this.gl = null; this.canvas = null; splitterGpuUnavailable = true; console.warn('Falling back to CPU for color/alpha splitting.', error); } } } async update(sourceFrame) { if (this.gl) { return this.updateGpu(sourceFrame); } else { return this.updateCpu(sourceFrame); } } updateGpu(sourceFrame) { assert(this.gl); assert(this.canvas); if (sourceFrame.displayWidth !== this.canvas.width || sourceFrame.displayHeight !== this.canvas.height) { this.canvas.width = sourceFrame.displayWidth; this.canvas.height = sourceFrame.displayHeight; } this.gl.activeTexture(this.gl.TEXTURE0); this.gl.bindTexture(this.gl.TEXTURE_2D, this.sourceTexture); this.gl.texImage2D(this.gl.TEXTURE_2D, 0, this.gl.RGBA, this.gl.RGBA, this.gl.UNSIGNED_BYTE, sourceFrame); const colorFrame = this.runColorProgram(sourceFrame); const alphaFrame = this.runAlphaProgram(sourceFrame); sourceFrame.close(); return { colorFrame, alphaFrame }; } createVertexShader() { assert(this.gl); return this.createShader(this.gl.VERTEX_SHADER, `#version 300 es in vec2 a_position; in vec2 a_texCoord; out vec2 v_texCoord; void main() { gl_Position = vec4(a_position, 0.0, 1.0); v_texCoord = a_texCoord; } `); } createColorProgram() { assert(this.gl); const vertexShader = this.createVertexShader(); // This shader is simple, simply copy the color information while setting alpha to 1 const fragmentShader = this.createShader(this.gl.FRAGMENT_SHADER, `#version 300 es precision highp float; uniform sampler2D u_sourceTexture; in vec2 v_texCoord; out vec4 fragColor; void main() { vec4 source = texture(u_sourceTexture, v_texCoord); fragColor = vec4(source.rgb, 1.0); } `); const program = this.gl.createProgram(); this.gl.attachShader(program, vertexShader); this.gl.attachShader(program, fragmentShader); this.gl.linkProgram(program); return program; } createAlphaProgram() { assert(this.gl); const vertexShader = this.createVertexShader(); // This shader's more complex. The main reason is that this shader writes data in I420 (yuv420) pixel format // instead of regular RGBA. In other words, we use the shader to write out I420 data into an RGBA canvas, which // we then later read out with JavaScript. The reason being that browsers weirdly encode canvases and mess up // the color spaces, and the only way to have full control over the color space is by outputting YUV data // directly (avoiding the RGB conversion). Doing this conversion in JS is painfully slow, so let's utlize the // GPU since we're already calling it anyway. const fragmentShader = this.createShader(this.gl.FRAGMENT_SHADER, `#version 300 es precision highp float; uniform sampler2D u_sourceTexture; uniform vec2 u_resolution; // The width and height of the canvas in vec2 v_texCoord; out vec4 fragColor; // This function determines the value for a single byte in the YUV stream float getByteValue(float byteOffset) { float width = u_resolution.x; float height = u_resolution.y; float yPlaneSize = width * height; if (byteOffset < yPlaneSize) { // This byte is in the luma plane. Find the corresponding pixel coordinates to sample from float y = floor(byteOffset / width); float x = mod(byteOffset, width); // Add 0.5 to sample the center of the texel vec2 sampleCoord = (vec2(x, y) + 0.5) / u_resolution; // The luma value is the alpha from the source texture return texture(u_sourceTexture, sampleCoord).a; } else { // Write a fixed value for chroma and beyond return 128.0 / 255.0; } } void main() { // Each fragment writes 4 bytes (R, G, B, A) float pixelIndex = floor(gl_FragCoord.y) * u_resolution.x + floor(gl_FragCoord.x); float baseByteOffset = pixelIndex * 4.0; vec4 result; for (int i = 0; i < 4; i++) { float currentByteOffset = baseByteOffset + float(i); result[i] = getByteValue(currentByteOffset); } fragColor = result; } `); const program = this.gl.createProgram(); this.gl.attachShader(program, vertexShader); this.gl.attachShader(program, fragmentShader); this.gl.linkProgram(program); return program; } createShader(type, source) { assert(this.gl); const shader = this.gl.createShader(type); this.gl.shaderSource(shader, source); this.gl.compileShader(shader); if (!this.gl.getShaderParameter(shader, this.gl.COMPILE_STATUS)) { console.error('Shader compile error:', this.gl.getShaderInfoLog(shader)); } return shader; } createVAO() { assert(this.gl); assert(this.colorProgram); const vao = this.gl.createVertexArray(); this.gl.bindVertexArray(vao); const vertices = new Float32Array([ -1, -1, 0, 1, 1, -1, 1, 1, -1, 1, 0, 0, 1, 1, 1, 0, ]); const buffer = this.gl.createBuffer(); this.gl.bindBuffer(this.gl.ARRAY_BUFFER, buffer); this.gl.bufferData(this.gl.ARRAY_BUFFER, vertices, this.gl.STATIC_DRAW); const positionLocation = this.gl.getAttribLocation(this.colorProgram, 'a_position'); const texCoordLocation = this.gl.getAttribLocation(this.colorProgram, 'a_texCoord'); this.gl.enableVertexAttribArray(positionLocation); this.gl.vertexAttribPointer(positionLocation, 2, this.gl.FLOAT, false, 16, 0); this.gl.enableVertexAttribArray(texCoordLocation); this.gl.vertexAttribPointer(texCoordLocation, 2, this.gl.FLOAT, false, 16, 8); return vao; } createTexture() { assert(this.gl); const texture = this.gl.createTexture(); this.gl.bindTexture(this.gl.TEXTURE_2D, texture); this.gl.texParameteri(this.gl.TEXTURE_2D, this.gl.TEXTURE_WRAP_S, this.gl.CLAMP_TO_EDGE); this.gl.texParameteri(this.gl.TEXTURE_2D, this.gl.TEXTURE_WRAP_T, this.gl.CLAMP_TO_EDGE); this.gl.texParameteri(this.gl.TEXTURE_2D, this.gl.TEXTURE_MIN_FILTER, this.gl.LINEAR); this.gl.texParameteri(this.gl.TEXTURE_2D, this.gl.TEXTURE_MAG_FILTER, this.gl.LINEAR); return texture; } runColorProgram(sourceFrame) { assert(this.gl); assert(this.canvas); this.gl.useProgram(this.colorProgram); this.gl.viewport(0, 0, this.canvas.width, this.canvas.height); this.gl.clear(this.gl.COLOR_BUFFER_BIT); this.gl.bindVertexArray(this.vao); this.gl.drawArrays(this.gl.TRIANGLE_STRIP, 0, 4); return new VideoFrame(this.canvas, { timestamp: sourceFrame.timestamp, duration: sourceFrame.duration ?? undefined, alpha: 'discard', }); } runAlphaProgram(sourceFrame) { assert(this.gl); assert(this.canvas); this.gl.useProgram(this.alphaProgram); this.gl.uniform2f(this.alphaResolutionLocation, this.canvas.width, this.canvas.height); this.gl.viewport(0, 0, this.canvas.width, this.canvas.height); this.gl.clear(this.gl.COLOR_BUFFER_BIT); this.gl.bindVertexArray(this.vao); this.gl.drawArrays(this.gl.TRIANGLE_STRIP, 0, 4); const { width, height } = this.canvas; const chromaSamples = Math.ceil(width / 2) * Math.ceil(height / 2); const yuvSize = width * height + chromaSamples * 2; const requiredHeight = Math.ceil(yuvSize / (width * 4)); let yuv = new Uint8Array(4 * width * requiredHeight); this.gl.readPixels(0, 0, width, requiredHeight, this.gl.RGBA, this.gl.UNSIGNED_BYTE, yuv); yuv = yuv.subarray(0, yuvSize); assert(yuv[width * height] === 128); // Where chroma data starts assert(yuv[yuv.length - 1] === 128); // Assert the YUV data has been fully written // Defining this separately because TypeScript doesn't know `transfer` and I can't be bothered to do declaration // merging right now const init = { format: 'I420', codedWidth: width, codedHeight: height, timestamp: sourceFrame.timestamp, duration: sourceFrame.duration ?? undefined, transfer: [yuv.buffer], }; return new VideoFrame(yuv, init); } updateCpu(sourceFrame) { if (!this.worker) { const blob = new Blob([`(${colorAlphaSplitterWorkerCode.toString()})()`], { type: 'application/javascript' }); const url = URL.createObjectURL(blob); this.worker = new Worker(url); URL.revokeObjectURL(url); this.worker.addEventListener('message', (event) => { const data = event.data; const pending = this.pendingRequests.get(data.id); if (!pending) { return; } this.pendingRequests.delete(data.id); if ('error' in data) { pending.reject(new Error(data.error)); } else { pending.resolve({ colorFrame: data.colorFrame, alphaFrame: data.alphaFrame }); } }); this.worker.addEventListener('error', (event) => { const error = new Error(event.message || 'Color/alpha splitter worker error.'); for (const pending of this.pendingRequests.values()) { pending.reject(error); } this.pendingRequests.clear(); }); } const id = this.nextRequestId++; const pending = promiseWithResolvers(); this.pendingRequests.set(id, pending); this.worker.postMessage({ id, sourceFrame }, { transfer: [sourceFrame] }); return pending.promise; } close() { this.gl?.getExtension('WEBGL_lose_context')?.loseContext(); this.gl = null; this.canvas = null; this.worker?.terminate(); this.worker = null; const error = new Error('Color/alpha splitter closed.'); for (const pending of this.pendingRequests.values()) { pending.reject(error); } this.pendingRequests.clear(); } } ColorAlphaSplitter.forceCpu = true; const colorAlphaSplitterWorkerCode = () => { // Reused across frames as long as the size matches, since consecutive frames usually share dimensions. let cpuSourceBuffer = null; // Serialize execution internally so concurrent requests don't race on the shared cpuSourceBuffer. let chain = Promise.resolve(); self.addEventListener('message', (event) => { const { id, sourceFrame } = event.data; chain = chain.then(async () => { try { const { colorFrame, alphaFrame } = await split(sourceFrame); self.postMessage({ id, colorFrame, alphaFrame }, { transfer: [colorFrame, alphaFrame] }); } catch (error) { self.postMessage({ id, error: error.message }); } finally { sourceFrame.close(); } }); }); const split = async (sourceFrame) => { const format = sourceFrame.format; if (!format) { throw new Error('CPU color/alpha splitting requires a known VideoFrame format.'); } const width = sourceFrame.codedWidth; const height = sourceFrame.codedHeight; const sourceSize = sourceFrame.allocati