mediabunny

/*! * Copyright (c) 2026-present, Vanilagy and contributors * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ import { AUDIO_CODECS, AudioCodec, NON_PCM_AUDIO_CODECS, VIDEO_CODECS, VideoCodec, } from './codec'; import { getEncodableAudioCodecs, getFirstEncodableVideoCodec, Quality, QUALITY_HIGH, VideoEncodingConfig, } from './encode'; import { Input } from './input'; import { InputAudioTrack, InputTrack, InputVideoTrack } from './input-track'; import { AudioSampleSink, EncodedPacketSink, VideoSampleSink, } from './media-sink'; import { AudioSource, EncodedVideoPacketSource, EncodedAudioPacketSource, VideoSource, VideoSampleSource, AudioSampleSource, } from './media-source'; import { assert, assertNever, ceilToMultipleOfTwo, clamp, isIso639Dash2LanguageCode, MaybePromise, normalizeRotation, promiseWithResolvers, Rotation, } from './misc'; import { Output, OutputTrackGroup, TrackType } from './output'; import { Mp4OutputFormat } from './output-format'; import { AudioSample, audioSampleToInterleavedFormat, clampCropRectangle, CropRectangle, toInterleavedAudioFormat, validateCropRectangle, VideoSample, VideoSampleResource, } from './sample'; import { MetadataTags, validateMetadataTags } from './metadata'; import { NullTarget } from './target'; import { AudioResampler } from './resample'; /** * The options for media file conversion. * @group Conversion * @public */ export type ConversionOptions = { /** The input file. */ input: Input; /** The output file. */ output: Output; /** * Defines which input tracks are used for conversion. Defaults to `'all'` unless the input is an HLS input, in * which case it defaults to `'primary'`. * * - `'all'`: All input tracks are eligible for conversion. * - `'primary'`: Only the primary video and audio track from the input are eligible for conversion. */ tracks?: 'all' | 'primary'; /** * Video-specific options. When passing an object, the same options are applied to all video tracks. When passing a * function, it will be invoked for each video track and is expected to return or resolve to the options * for that specific track. The function is passed an instance of {@link InputVideoTrack} as well as a number `n`, * which is the 1-based index of the track in the list of all video tracks. Using `n` is deprecated, prefer the * identical `track.number` instead. * * When passing an array of a function that returns an array, one output track per array element will be created, * allowing for "fan-out". Useful for creating multiple variants from a single track, for example with different * resolutions. */ video?: ConversionVideoOptions | ConversionVideoOptions[] | ((track: InputVideoTrack, n: number) => MaybePromise< ConversionVideoOptions | ConversionVideoOptions[] | undefined >); /** * Audio-specific options. When passing an object, the same options are applied to all audio tracks. When passing a * function, it will be invoked for each audio track and is expected to return or resolve to the options * for that specific track. The function is passed an instance of {@link InputAudioTrack} as well as a number `n`, * which is the 1-based index of the track in the list of all audio tracks. Using `n` is deprecated, prefer the * identical `track.number` instead. * * When passing an array of a function that returns an array, one output track per array element will be created, * allowing for "fan-out". Useful for creating multiple variants from a single track, for example with different * bitrates. */ audio?: ConversionAudioOptions | ConversionAudioOptions[] | ((track: InputAudioTrack, n: number) => MaybePromise< ConversionAudioOptions | ConversionAudioOptions[] | undefined >); /** Options to trim the input file. */ trim?: { /** * The time in the input file in seconds at which the output file should start. Must be less than `end`. * When omitted, defaults to the earliest start timestamp of the non-discarded tracks, or to 0, whichever * is higher. */ start?: number; /** * The time in the input file in seconds at which the output file should end. Must be greater than `start`. * Defaults to the duration of the input when omitted. */ end?: number; }; /** * An object or a callback that returns or resolves to an object containing the descriptive metadata tags that * should be written to the output file. If a function is passed, it will be passed the tags of the input file as * its first argument, allowing you to modify, augment or extend them. * * If no function is set, the input's metadata tags will be copied to the output. */ tags?: MetadataTags | ((inputTags: MetadataTags) => MaybePromise<MetadataTags>); /** * Whether to show potential console warnings about discarded tracks after calling `Conversion.init()`, defaults to * `true`. Set this to `false` if you're properly handling the `discardedTracks` and `isValid` fields already and * want to keep the console output clean. */ showWarnings?: boolean; }; /** * Video-specific options. * @group Conversion * @public */ export type ConversionVideoOptions = { /** If `true`, all video tracks will be discarded and will not be present in the output. */ discard?: boolean; /** * The desired width of the output video in pixels, defaulting to the video's natural display width. If height * is not set, it will be deduced automatically based on aspect ratio. */ width?: number; /** * The desired height of the output video in pixels, defaulting to the video's natural display height. If width * is not set, it will be deduced automatically based on aspect ratio. */ height?: number; /** * The fitting algorithm in case both width and height are set, or if the input video changes its size over time. * * - `'fill'` will stretch the image to fill the entire box, potentially altering aspect ratio. * - `'contain'` will contain the entire image within the box while preserving aspect ratio. This may lead to * letterboxing. * - `'cover'` will scale the image until the entire box is filled, while preserving aspect ratio. */ fit?: 'fill' | 'contain' | 'cover'; /** * The angle in degrees to rotate the input video by, clockwise. Rotation is applied before cropping and resizing. * This rotation is _in addition to_ the natural rotation of the input video as specified in input file's metadata. */ rotate?: Rotation; /** * Defaults to `true`. When enabled, Mediabunny will use the rotation metadata in the output file to perform video * rotation whenever possible. Set this field to `false` if you want to ensure the output file does not make use of * rotation metadata and that any rotation is baked into the video frames directly. */ allowRotationMetadata?: boolean; /** * Specifies the rectangular region of the input video to crop to. The crop region will automatically be clamped to * the dimensions of the input video track. Cropping is performed after rotation but before resizing. */ crop?: CropRectangle; /** * The desired frame rate of the output video, in hertz. If not specified, the original input frame rate will * be used (which may be variable). */ frameRate?: number; /** The desired output video codec. */ codec?: VideoCodec; /** The desired bitrate of the output video. */ bitrate?: number | Quality; /** * Whether to discard or keep the transparency information of the input video. The default is `'discard'`. Note that * for `'keep'` to produce a transparent video, you must use an output config that supports it, such as WebM with * VP9. */ alpha?: 'discard' | 'keep'; /** * The interval, in seconds, of how often frames are encoded as a key frame. The default is 5 seconds. Frequent key * frames improve seeking behavior but increase file size. When using multiple video tracks, you should give them * all the same key frame interval. * * Setting this fields forces a transcode. */ keyFrameInterval?: number; /** * A hint that configures the hardware acceleration method used when transcoding. This is best left on * `'no-preference'`, the default. */ hardwareAcceleration?: 'no-preference' | 'prefer-hardware' | 'prefer-software'; /** When `true`, video will always be re-encoded instead of directly copying over the encoded samples. */ forceTranscode?: boolean; /** * Allows for custom user-defined processing of video frames, e.g. for applying overlays, color transformations, or * timestamp modifications. Will be called for each input video sample after transformations and frame rate * corrections. * * Must return a {@link VideoSample}, a {@link VideoSampleResource} or a `CanvasImageSource`, an array of them, or * `null` for dropping the frame. When non-timestamped data is returned, the timestamp and duration from the source * sample will be used. Rotation metadata of the returned sample will be ignored. * * This function can also be used to manually resize frames. When doing so, you should signal the post-process * dimensions using the `processedWidth` and `processedHeight` fields, which enables the encoder to better know what * to expect. If these fields aren't set, Mediabunny will assume you won't perform any resizing. */ process?: (sample: VideoSample) => MaybePromise< CanvasImageSource | VideoSample | VideoSampleResource | (CanvasImageSource | VideoSample | VideoSampleResource)[] | null >; /** * An optional hint specifying the width of video samples returned by the `process` function, for better * encoder configuration. */ processedWidth?: number; /** * An optional hint specifying the height of video samples returned by the `process` function, for better * encoder configuration. */ processedHeight?: number; /** * Defines the group(s) the output track is a part of. For more, see {@link BaseTrackMetadata.group}. * * If left blank, tracks will internally be assigned to groups such that the output track pairability graph exactly * matches the input track pairability graph. */ group?: OutputTrackGroup | OutputTrackGroup[]; }; /** * Audio-specific options. * @group Conversion * @public */ export type ConversionAudioOptions = { /** If `true`, all audio tracks will be discarded and will not be present in the output. */ discard?: boolean; /** The desired channel count of the output audio. */ numberOfChannels?: number; /** The desired sample rate of the output audio, in hertz. */ sampleRate?: number; /** * The desired sample format (and therefore bit depth) of the audio samples before they are passed to the encoder. * Can be used to control bit depth with certain output codecs such as FLAC. * * Setting this field forces audio transcoding. */ sampleFormat?: 'u8' | 's16' | 's32' | 'f32'; /** The desired output audio codec. */ codec?: AudioCodec; /** The desired bitrate of the output audio. */ bitrate?: number | Quality; /** When `true`, audio will always be re-encoded instead of directly copying over the encoded samples. */ forceTranscode?: boolean; /** * Allows for custom user-defined processing of audio samples, e.g. for applying audio effects, transformations, or * timestamp modifications. Will be called for each input audio sample after remixing and resampling. * * Must return an {@link AudioSample}, an array of them, or `null` for dropping the sample. * * This function can also be used to manually perform remixing or resampling. When doing so, you should signal the * post-process parameters using the `processedNumberOfChannels` and `processedSampleRate` fields, which enables the * encoder to better know what to expect. If these fields aren't set, Mediabunny will assume you won't perform * remixing or resampling. */ process?: (sample: AudioSample) => MaybePromise< AudioSample | AudioSample[] | null >; /** * An optional hint specifying the channel count of audio samples returned by the `process` function, for better * encoder configuration. */ processedNumberOfChannels?: number; /** * An optional hint specifying the sample rate of audio samples returned by the `process` function, for better * encoder configuration. */ processedSampleRate?: number; /** * Defines the group(s) the output track is a part of. For more, see {@link BaseTrackMetadata.group}. * * If left blank, tracks will internally be assigned to groups such that the output track pairability graph exactly * matches the input track pairability graph. */ group?: OutputTrackGroup | OutputTrackGroup[]; }; const validateVideoOptions = (videoOptions: ConversionVideoOptions) => { if (!videoOptions || typeof videoOptions !== 'object') { throw new TypeError('options.video, when provided, must be an object.'); } if (videoOptions?.discard !== undefined && typeof videoOptions.discard !== 'boolean') { throw new TypeError('options.video.discard, when provided, must be a boolean.'); } if (videoOptions?.forceTranscode !== undefined && typeof videoOptions.forceTranscode !== 'boolean') { throw new TypeError('options.video.forceTranscode, when provided, must be a boolean.'); } if (videoOptions?.codec !== undefined && !VIDEO_CODECS.includes(videoOptions.codec)) { throw new TypeError( `options.video.codec, when provided, must be one of: ${VIDEO_CODECS.join(', ')}.`, ); } if ( videoOptions?.bitrate !== undefined && !(videoOptions.bitrate instanceof Quality) && (!Number.isInteger(videoOptions.bitrate) || videoOptions.bitrate <= 0) ) { throw new TypeError('options.video.bitrate, when provided, must be a positive integer or a quality.'); } if ( videoOptions?.width !== undefined && (!Number.isInteger(videoOptions.width) || videoOptions.width <= 0) ) { throw new TypeError('options.video.width, when provided, must be a positive integer.'); } if ( videoOptions?.height !== undefined && (!Number.isInteger(videoOptions.height) || videoOptions.height <= 0) ) { throw new TypeError('options.video.height, when provided, must be a positive integer.'); } if (videoOptions?.fit !== undefined && !['fill', 'contain', 'cover'].includes(videoOptions.fit)) { throw new TypeError('options.video.fit, when provided, must be one of \'fill\', \'contain\', or \'cover\'.'); } if ( videoOptions?.width !== undefined && videoOptions.height !== undefined && videoOptions.fit === undefined ) { throw new TypeError( 'When both options.video.width and options.video.height are provided, options.video.fit must also be' + ' provided.', ); } if (videoOptions?.rotate !== undefined && ![0, 90, 180, 270].includes(videoOptions.rotate)) { throw new TypeError('options.video.rotate, when provided, must be 0, 90, 180 or 270.'); } if (videoOptions?.allowRotationMetadata !== undefined && typeof videoOptions.allowRotationMetadata !== 'boolean') { throw new TypeError('options.video.allowRotationMetadata, when provided, must be a boolean.'); } if (videoOptions?.crop !== undefined) { validateCropRectangle(videoOptions.crop, 'options.video.'); } if ( videoOptions?.frameRate !== undefined && (!Number.isFinite(videoOptions.frameRate) || videoOptions.frameRate <= 0) ) { throw new TypeError('options.video.frameRate, when provided, must be a finite positive number.'); } if (videoOptions?.alpha !== undefined && !['discard', 'keep'].includes(videoOptions.alpha)) { throw new TypeError('options.video.alpha, when provided, must be either \'discard\' or \'keep\'.'); } if ( videoOptions?.keyFrameInterval !== undefined && (!Number.isFinite(videoOptions.keyFrameInterval) || videoOptions.keyFrameInterval < 0) ) { throw new TypeError('options.video.keyFrameInterval, when provided, must be a non-negative number.'); } if (videoOptions?.process !== undefined && typeof videoOptions.process !== 'function') { throw new TypeError('options.video.process, when provided, must be a function.'); } if ( videoOptions?.processedWidth !== undefined && (!Number.isInteger(videoOptions.processedWidth) || videoOptions.processedWidth <= 0) ) { throw new TypeError('options.video.processedWidth, when provided, must be a positive integer.'); } if ( videoOptions?.processedHeight !== undefined && (!Number.isInteger(videoOptions.processedHeight) || videoOptions.processedHeight <= 0) ) { throw new TypeError('options.video.processedHeight, when provided, must be a positive integer.'); } if ( videoOptions?.hardwareAcceleration !== undefined && !['no-preference', 'prefer-hardware', 'prefer-software'].includes(videoOptions.hardwareAcceleration) ) { throw new TypeError( 'options.video.hardwareAcceleration, when provided, must be \'no-preference\', \'prefer-hardware\' or' + ' \'prefer-software\'.', ); } if ( videoOptions?.group !== undefined && !( videoOptions.group instanceof OutputTrackGroup || (Array.isArray(videoOptions.group) && videoOptions.group.every(x => x instanceof OutputTrackGroup)) ) ) { throw new TypeError( 'options.video.group, when provided, must be an OutputTrackGroup or an array of OutputTrackGroups.', ); } }; const validateAudioOptions = (audioOptions: ConversionAudioOptions) => { if (!audioOptions || typeof audioOptions !== 'object') { throw new TypeError('options.audio, when provided, must be an object.'); } if (audioOptions?.discard !== undefined && typeof audioOptions.discard !== 'boolean') { throw new TypeError('options.audio.discard, when provided, must be a boolean.'); } if (audioOptions?.forceTranscode !== undefined && typeof audioOptions.forceTranscode !== 'boolean') { throw new TypeError('options.audio.forceTranscode, when provided, must be a boolean.'); } if (audioOptions?.codec !== undefined && !AUDIO_CODECS.includes(audioOptions.codec)) { throw new TypeError( `options.audio.codec, when provided, must be one of: ${AUDIO_CODECS.join(', ')}.`, ); } if ( audioOptions?.bitrate !== undefined && !(audioOptions.bitrate instanceof Quality) && (!Number.isInteger(audioOptions.bitrate) || audioOptions.bitrate <= 0) ) { throw new TypeError('options.audio.bitrate, when provided, must be a positive integer or a quality.'); } if ( audioOptions?.numberOfChannels !== undefined && (!Number.isInteger(audioOptions.numberOfChannels) || audioOptions.numberOfChannels <= 0) ) { throw new TypeError('options.audio.numberOfChannels, when provided, must be a positive integer.'); } if ( audioOptions?.sampleRate !== undefined && (!Number.isInteger(audioOptions.sampleRate) || audioOptions.sampleRate <= 0) ) { throw new TypeError('options.audio.sampleRate, when provided, must be a positive integer.'); } if ( audioOptions?.sampleFormat !== undefined && !['u8', 's16', 's32', 'f32'].includes(audioOptions.sampleFormat) ) { throw new TypeError('options.audio.sampleFormat, when provided, must be one of: u8, s16, s32, f32.'); } if (audioOptions?.process !== undefined && typeof audioOptions.process !== 'function') { throw new TypeError('options.audio.process, when provided, must be a function.'); } if ( audioOptions?.processedNumberOfChannels !== undefined && (!Number.isInteger(audioOptions.processedNumberOfChannels) || audioOptions.processedNumberOfChannels <= 0) ) { throw new TypeError('options.audio.processedNumberOfChannels, when provided, must be a positive integer.'); } if ( audioOptions?.processedSampleRate !== undefined && (!Number.isInteger(audioOptions.processedSampleRate) || audioOptions.processedSampleRate <= 0) ) { throw new TypeError('options.audio.processedSampleRate, when provided, must be a positive integer.'); } if ( audioOptions?.group !== undefined && !( audioOptions.group instanceof OutputTrackGroup || (Array.isArray(audioOptions.group) && audioOptions.group.every(x => x instanceof OutputTrackGroup)) ) ) { throw new TypeError( 'options.audio.group, when provided, must be an OutputTrackGroup or an array of OutputTrackGroups.', ); } }; const FALLBACK_NUMBER_OF_CHANNELS = 2; const FALLBACK_SAMPLE_RATE = 48000; /** * An input track that was discarded (excluded) from a {@link Conversion} alongside the discard reason. * @group Conversion * @public */ export type DiscardedTrack = { /** The track that was discarded. */ track: InputTrack; /** * The reason for discarding the track. * * - `'discarded_by_user'`: You discarded this track by setting `discard: true`. * - `'max_track_count_reached'`: The output had no more room for another track. * - `'max_track_count_of_type_reached'`: The output had no more room for another track of this type, or the output * doesn't support this track type at all. * - `'unknown_source_codec'`: We don't know the codec of the input track and therefore don't know what to do * with it. * - `'undecodable_source_codec'`: The input track's codec is known, but we are unable to decode it. * - `'no_encodable_target_codec'`: We can't find a codec that we are able to encode and that can be contained * within the output format. This reason can be hit if the environment doesn't support the necessary encoders, or if * you requested a codec that cannot be contained within the output format. */ reason: | 'discarded_by_user' | 'max_track_count_reached' | 'max_track_count_of_type_reached' | 'unknown_source_codec' | 'undecodable_source_codec' | 'no_encodable_target_codec'; /** The options that were provided for this track, or `{}` if none were provided. */ trackOptions: ConversionVideoOptions | ConversionAudioOptions; }; /** * Represents a media file conversion process, used to convert one media file into another. In addition to conversion, * this class can be used to resize and rotate video, resample audio, drop tracks, or trim to a specific time range. * @group Conversion * @public */ export class Conversion { /** The input file. */ readonly input: Input; /** The output file. */ readonly output: Output; /** @internal */ _options: ConversionOptions; /** @internal */ _startTimestamp!: number; /** @internal */ _endTimestamp!: number; /** @internal */ _addedCounts: Record<TrackType, number> = { video: 0, audio: 0, subtitle: 0, }; /** @internal */ _totalTrackCount = 0; /** @internal */ _nextOutputTrackId = 0; /** @internal */ _outputTrackIds: number[] = []; /** @internal */ _outputOwnTrackGroups: (OutputTrackGroup | null)[] = []; /** @internal */ _trackPromises: Promise<void>[] = []; /** @internal */ _started: Promise<void>; /** @internal */ _start: () => void; /** @internal */ _executed = false; /** @internal */ _synchronizer = new TrackSynchronizer(); /** @internal */ _totalDuration: number | null = null; /** @internal */ _maxTimestamps = new Map<number, number>(); // Track ID -> timestamp /** @internal */ _canceled = false; /** * A callback that is fired whenever the conversion progresses. Gets passed as first argument a number between * 0 and 1, indicating the completion of the conversion. Note that a progress of 1 doesn't necessarily mean the * conversion is complete; the conversion is complete once `execute()` resolves. * * As second argument, this callback receives the input time in seconds that has been processed. * * In order for progress to be computed, this property must be set before `execute` is called. */ onProgress?: (progress: number, processedTime: number) => unknown = undefined; /** @internal */ _computeProgress = false; /** @internal */ _lastProgress = 0; /** * Whether this conversion, as it has been configured, is valid and can be executed. If this field is `false`, check * the `discardedTracks` field for reasons. * * Note: a conversion having discarded tracks does not automatically mean it is invalid; if the remaining, utilized * tracks make for a valid output file, the conversion is still allowed. */ isValid = false; /** * The list of tracks that are included in the output file. When fan-out is used, the same track appears in this * array multiple times. */ readonly utilizedTracks: InputTrack[] = []; /** The list of tracks from the input file that have been discarded, alongside the discard reason. */ readonly discardedTracks: DiscardedTrack[] = []; /** Initializes a new conversion process without starting the conversion. */ static async init(options: ConversionOptions) { const conversion = new Conversion(options); await conversion._init(); return conversion; } /** Creates a new Conversion instance (duh). */ private constructor(options: ConversionOptions) { if (!options || typeof options !== 'object') { throw new TypeError('options must be an object.'); } if (!(options.input instanceof Input)) { throw new TypeError('options.input must be an Input.'); } if (!(options.output instanceof Output)) { throw new TypeError('options.output must be an Output.'); } if ( options.tracks !== undefined && options.tracks !== 'all' && options.tracks !== 'primary' ) { throw new TypeError( 'options.tracks, when provided, must be either \'all\' or \'primary\'.', ); } if ( options.output._tracks.length > 0 || Object.keys(options.output._metadataTags).length > 0 || options.output.state !== 'pending' ) { throw new TypeError('options.output must be fresh: no tracks or metadata tags added and not started.'); } if (options.video !== undefined && typeof options.video !== 'function') { if (Array.isArray(options.video)) { for (const obj of options.video) { validateVideoOptions(obj); } } else { validateVideoOptions(options.video); } } else { // We'll validate the return value later } if (options.audio !== undefined && typeof options.audio !== 'function') { if (Array.isArray(options.audio)) { for (const obj of options.audio) { validateAudioOptions(obj); } } else { validateAudioOptions(options.audio); } } else { // We'll validate the return value later } if (options.trim !== undefined && (!options.trim || typeof options.trim !== 'object')) { throw new TypeError('options.trim, when provided, must be an object.'); } if (options.trim?.start !== undefined && (!Number.isFinite(options.trim.start))) { throw new TypeError('options.trim.start, when provided, must be a finite number.'); } if (options.trim?.end !== undefined && (!Number.isFinite(options.trim.end))) { throw new TypeError('options.trim.end, when provided, must be a finite number.'); } if ( options.trim?.start !== undefined && options.trim.end !== undefined && options.trim.start >= options.trim.end) { throw new TypeError('options.trim.start must be less than options.trim.end.'); } if ( options.tags !== undefined && (typeof options.tags !== 'object' || !options.tags) && typeof options.tags !== 'function' ) { throw new TypeError('options.tags, when provided, must be an object or a function.'); } if (typeof options.tags === 'object') { validateMetadataTags(options.tags); } if (options.showWarnings !== undefined && typeof options.showWarnings !== 'boolean') { throw new TypeError('options.showWarnings, when provided, must be a boolean.'); } this._options = options; this.input = options.input; this.output = options.output; const { promise: started, resolve: start } = promiseWithResolvers(); this._started = started; this._start = start; } /** @internal */ async _init() { const inputFormat = await this.input.getFormat(); let tracks: InputTrack[]; let trackMode = this._options.tracks; if (trackMode === undefined) { // HACK to keep bundle size low, temp for now const defaultTrackMode = inputFormat.name.includes('(HLS)') ? 'primary' : 'all'; trackMode = defaultTrackMode; } if (trackMode === 'all') { tracks = await this.input.getTracks(); } else if (trackMode === 'primary') { const primaryVideoTrack = await this.input.getPrimaryVideoTrack(); const primaryAudioTrack = await this.input.getPrimaryAudioTrack(); tracks = [primaryVideoTrack, primaryAudioTrack].filter(x => x !== null); } else { assertNever(trackMode); assert(false); } const outputTrackCounts = this.output.format.getSupportedTrackCounts(); // Input track counters let nVideo = 1; let nAudio = 1; // All tracks that aren't discarded by the user const filteredTracks: InputTrack[] = []; const filteredTrackOptions: (ConversionVideoOptions | ConversionAudioOptions)[][] = []; for (const track of tracks) { let trackOptions: (ConversionVideoOptions | ConversionAudioOptions)[]; if (track.isVideoTrack()) { if (this._options.video) { if (typeof this._options.video === 'function') { const returnedTrackOptions = await this._options.video(track, nVideo) ?? {}; if (Array.isArray(returnedTrackOptions)) { for (const obj of returnedTrackOptions) { validateVideoOptions(obj); } } else { validateVideoOptions(returnedTrackOptions); } trackOptions = Array.isArray(returnedTrackOptions) ? returnedTrackOptions : [returnedTrackOptions]; nVideo++; } else { // Already validated trackOptions = Array.isArray(this._options.video) ? this._options.video : [this._options.video]; } } else { trackOptions = [{}]; } } else if (track.isAudioTrack()) { if (this._options.audio) { if (typeof this._options.audio === 'function') { const returnedTrackOptions = await this._options.audio(track, nAudio) ?? {}; if (Array.isArray(returnedTrackOptions)) { for (const obj of returnedTrackOptions) { validateAudioOptions(obj); } } else { validateAudioOptions(returnedTrackOptions); } trackOptions = Array.isArray(returnedTrackOptions) ? returnedTrackOptions : [returnedTrackOptions]; nAudio++; } else { // Already validated trackOptions = Array.isArray(this._options.audio) ? this._options.audio : [this._options.audio]; } } else { trackOptions = [{}]; } } else { assert(false); } const discardOptions = trackOptions.filter(x => x.discard); for (const discardOption of discardOptions) { this.discardedTracks.push({ track, reason: 'discarded_by_user', trackOptions: discardOption, }); } if (trackOptions.length === discardOptions.length) { if (trackOptions.length === 0) { this.discardedTracks.push({ track, reason: 'discarded_by_user', trackOptions: {}, }); } continue; } const nonDiscardOptions = trackOptions.filter(x => !x.discard); filteredTracks.push(track); filteredTrackOptions.push(nonDiscardOptions); } if (this._options.trim?.start !== undefined) { this._startTimestamp = this._options.trim.start; } else { // Compute the start timestamp from the set of filtered tracks. Techncially these can still be narrowed // down later due to discarded tracks, but we need to fix the start timestamp now due to track processing // depending on it. this._startTimestamp = Math.max( await this.input.getFirstTimestamp(filteredTracks), // Samples can also have negative timestamps, but the meaning typically is "don't present me", so let's // cut those out by default. 0, ); } this._endTimestamp = Math.max(this._options.trim?.end ?? Infinity, this._startTimestamp); // Run these sequentially so that output tracks have a deterministic order for (let i = 0; i < filteredTracks.length; i++) { const track = filteredTracks[i]!; const options = filteredTrackOptions[i]!; for (const option of options) { if (this._totalTrackCount === outputTrackCounts.total.max) { this.discardedTracks.push({ track, reason: 'max_track_count_reached', trackOptions: option, }); continue; } if (this._addedCounts[track.type] === outputTrackCounts[track.type].max) { this.discardedTracks.push({ track, reason: 'max_track_count_of_type_reached', trackOptions: option, }); continue; } const outputTrackId = this._nextOutputTrackId++; if (track.isVideoTrack()) { await this._processVideoTrack(track, option as ConversionVideoOptions, outputTrackId); } else if (track.isAudioTrack()) { await this._processAudioTrack(track, option as ConversionAudioOptions, outputTrackId); } else { assert(false); } } } // When no track groups are set by the user, then the output track pairability should be *identical* to the // input's. We do the naive algorithm to achieve this: assign each track to its own group, and pair groups with // each other based on input track pairability. for (let i = 0; i < this.utilizedTracks.length - 1; i++) { for (let j = i + 1; j < this.utilizedTracks.length; j++) { const trackA = this.utilizedTracks[i]!; const trackB = this.utilizedTracks[j]!; const ownGroupA = this._outputOwnTrackGroups[i]; const ownGroupB = this._outputOwnTrackGroups[j]; assert(ownGroupA !== undefined); assert(ownGroupB !== undefined); if (ownGroupA && ownGroupB && trackA.canBePairedWith(trackB)) { ownGroupA.pairWith(ownGroupB); } } } // Now, let's deal with metadata tags const inputTags = await this.input.getMetadataTags(); let outputTags: MetadataTags; if (this._options.tags) { const result = typeof this._options.tags === 'function' ? await this._options.tags(inputTags) : this._options.tags; validateMetadataTags(result); outputTags = result; } else { outputTags = inputTags; } // Somewhat dirty but pragmatic const inputAndOutputFormatMatch = inputFormat.mimeType === this.output.format.mimeType; const rawTagsAreUnchanged = inputTags.raw === outputTags.raw; if (inputTags.raw && rawTagsAreUnchanged && !inputAndOutputFormatMatch) { // If the input and output formats aren't the same, copying over raw metadata tags makes no sense and only // results in junk tags, so let's cut them out. delete outputTags.raw; } this.output.setMetadataTags(outputTags); // Let's check if the conversion can actually be executed this.isValid = this._totalTrackCount >= outputTrackCounts.total.min && this._addedCounts.video >= outputTrackCounts.video.min && this._addedCounts.audio >= outputTrackCounts.audio.min && this._addedCounts.subtitle >= outputTrackCounts.subtitle.min; if (this._options.showWarnings ?? true) { const warnElements: unknown[] = []; const unintentionallyDiscardedTracks = this.discardedTracks.filter(x => x.reason !== 'discarded_by_user'); if (unintentionallyDiscardedTracks.length > 0) { // Let's give the user a notice/warning about discarded tracks so they aren't confused warnElements.push( 'Some tracks had to be discarded from the conversion:', unintentionallyDiscardedTracks, ); } if (!this.isValid) { if (warnElements.length > 0) { warnElements.push('\n\n'); } warnElements.push(this._getInvalidityExplanation().join('')); } if (warnElements.length > 0) { console.warn(...warnElements); } } } /** @internal */ _getInvalidityExplanation() { const elements: string[] = []; if (this.discardedTracks.length === 0) { elements.push( 'Due to missing tracks, this conversion cannot be executed.', ); } else { const encodabilityIsTheProblem = this.discardedTracks.every(x => x.reason === 'discarded_by_user' || x.reason === 'no_encodable_target_codec', ) && this.discardedTracks.some(x => x.reason === 'no_encodable_target_codec'); elements.push( 'Due to discarded tracks, this conversion cannot be executed.', ); if (encodabilityIsTheProblem) { const codecs = this.discardedTracks.flatMap((x) => { if (x.reason === 'discarded_by_user') return []; if (x.track.type === 'video') { return this.output.format.getSupportedVideoCodecs(); } else if (x.track.type === 'audio') { return this.output.format.getSupportedAudioCodecs(); } else { return this.output.format.getSupportedSubtitleCodecs(); } }); const uniqueCodecs = [...new Set(codecs)]; if (uniqueCodecs.length === 1) { elements.push( `\nTracks were discarded because your environment is not able to encode '${uniqueCodecs[0]}'.`, ); } else { elements.push( '\nTracks were discarded because your environment is not able to encode any of the following' + ` codecs: ${uniqueCodecs.map(x => `'${x}'`).join(', ')}.`, ); } if (uniqueCodecs.includes('mp3')) { elements.push( `\nThe @mediabunny/mp3-encoder extension package provides support for encoding MP3.`, ); } if (uniqueCodecs.includes('aac')) { elements.push( '\nThe @mediabunny/aac-encoder extension package provides support for encoding AAC.', ); } if (uniqueCodecs.includes('ac3') || uniqueCodecs.includes('eac3')) { elements.push( '\nThe @mediabunny/ac3 extension package provides support' + ' for encoding and decoding AC-3/E-AC-3.', ); } if (uniqueCodecs.includes('flac')) { elements.push( '\nThe @mediabunny/flac-encoder extension package provides support for encoding FLAC.', ); } } else { elements.push('\nCheck the discardedTracks field for more info.'); } } return elements; } /** * Executes the conversion process. Resolves once conversion is complete. * * Will throw if `isValid` is `false`. */ async execute() { if (!this.isValid) { throw new Error( 'Cannot execute this conversion because its output configuration is invalid. Make sure to always check' + ' the isValid field before executing a conversion.\n' + this._getInvalidityExplanation().join(''), ); } if (this._executed) { throw new Error('Conversion cannot be executed twice.'); } this._executed = true; for (const id of this._outputTrackIds) { this._synchronizer.declareTrack(id); } if (this.onProgress) { // Compute duration using only the utilized tracks const uniqueUtilizedTracks = new Set(this.utilizedTracks); const durationPromises = [...uniqueUtilizedTracks].map(async (track) => { if (await track.isLive()) { return Infinity; // Upper bound (assuming no universe heat death) } return (await track.getDurationFromMetadata()) ?? (await track.computeDuration()); }); const duration = Math.max(0, ...await Promise.all(durationPromises)); this._computeProgress = true; this._totalDuration = Math.min( duration - this._startTimestamp, this._endTimestamp - this._startTimestamp, ); for (const id of this._outputTrackIds) { this._maxTimestamps.set(id, 0); } this.onProgress?.(0, 0); } await this.output.start(); this._start(); try { await Promise.all(this._trackPromises); } catch (error) { if (!this._canceled) { // Make sure to cancel to stop other encoding processes and clean up resources void this.cancel(); } throw error; } if (this._canceled) { throw new ConversionCanceledError(); } await this.output.finalize(); if (this._computeProgress) { const minTimestamp = Math.min(...this._maxTimestamps.values()); this.onProgress?.(1, minTimestamp); } } /** * Cancels the conversion process, causing any ongoing `execute` call to throw a `ConversionCanceledError`. * Does nothing if the conversion is already complete. */ async cancel() { if (this.output.state === 'finalizing' || this.output.state === 'finalized') { return; } if (this._canceled) { console.warn('Conversion already canceled.'); return; } this._canceled = true; await this.output.cancel(); } /** @internal */ async _processVideoTrack(track: InputVideoTrack, trackOptions: ConversionVideoOptions, outputTrackId: number) { const sourceCodec = await track.getCodec(); if (!sourceCodec) { this.discardedTracks.push({ track, reason: 'unknown_source_codec', trackOptions, }); return; } let videoSource: VideoSource; const innateRotation = await track.getRotation(); const totalRotation = normalizeRotation(innateRotation + (trackOptions.rotate ?? 0)); let outputTrackRotation = totalRotation; const canUseRotationMetadata = this.output.format.supportsVideoRotationMetadata && (trackOptions.allowRotationMetadata ?? true); const squarePixelWidth = await track.getSquarePixelWidth(); const squarePixelHeight = await track.getSquarePixelHeight(); const [rotatedWidth, rotatedHeight] = totalRotation % 180 === 0 ? [squarePixelWidth, squarePixelHeight] : [squarePixelHeight, squarePixelWidth]; let crop = trackOptions.crop; if (crop) { crop = clampCropRectangle(crop, rotatedWidth, rotatedHeight); } const [originalWidth, originalHeight] = crop ? [crop.width, crop.height] : [rotatedWidth, rotatedHeight]; let width = originalWidth; let height = originalHeight; const aspectRatio = width / height; // A lot of video encoders require that the dimensions be multiples of 2 if (trackOptions.width !== undefined && trackOptions.height === undefined) { width = ceilToMultipleOfTwo(trackOptions.width); height = ceilToMultipleOfTwo(Math.round(width / aspectRatio)); } else if (trackOptions.width === undefined && trackOptions.height !== undefined) { height = ceilToMultipleOfTwo(trackOptions.height); width = ceilToMultipleOfTwo(Math.round(height * aspectRatio)); } else if (trackOptions.width !== undefined && trackOptions.height !== undefined) { width = ceilToMultipleOfTwo(trackOptions.width); height = ceilToMultipleOfTwo(trackOptions.height); } const firstTimestamp = await track.getFirstTimestamp(); let videoCodecs = this.output.format.getSupportedVideoCodecs(); const needsTranscode = !!trackOptions.forceTranscode || firstTimestamp < this._startTimestamp || !!trackOptions.frameRate || trackOptions.keyFrameInterval !== undefined || trackOptions.process !== undefined || trackOptions.bitrate !== undefined || !videoCodecs.includes(sourceCodec) || (trackOptions.codec && trackOptions.codec !== sourceCodec) || width !== originalWidth || height !== originalHeight // TODO This is suboptimal: Forcing a rerender when both rotation and process are set is not // performance-optimal, but right now there's no other way because we can't change the track rotation // metadata after the output has already started. Should be possible with API changes in v2, though! || (totalRotation !== 0 && !canUseRotationMetadata) || !!crop; const alpha = trackOptions.alpha ?? 'discard'; if (!needsTranscode) { // Fast path, we can simply copy over the encoded packets const source = new EncodedVideoPacketSource(sourceCodec); videoSource = source; this._trackPromises.push((async () => { await this._started; const sink = new EncodedPacketSink(track); const decoderConfig = await track.getDecoderConfig(); const meta: EncodedVideoChunkMetadata = { decoderConfig: decoderConfig ?? undefined }; for await (const packet of sink.packets(undefined, undefined, { verifyKeyPackets: true })) { if (this._canceled) { return; } if (packet.timestamp >= this._endTimestamp) { break; } const modifiedPacket = packet.clone({ timestamp: packet.timestamp - this._startTimestamp, sideData: alpha === 'discard' ? {} // Remove alpha side data : packet.sideData, }); assert(modifiedPacket.timestamp >= 0); this._reportProgress(outputTrackId, modifiedPacket.timestamp + modifiedPacket.duration); await source.add(modifiedPacket, meta); if (this._synchronizer.shouldWait(outputTrackId, modifiedPacket.timestamp)) { await this._synchronizer.wait(modifiedPacket.timestamp); } } source.close(); this._synchronizer.closeTrack(outputTrackId); })()); } else { // We need to decode & reencode the video const canDecode = await track.canDecode(); if (!canDecode) { this.discardedTracks.push({ track, reason: 'undecodable_source_codec', trackOptions, }); return; } if (trackOptions.codec) { videoCodecs = videoCodecs.filter(codec => codec === trackOptions.codec); } const bitrate = trackOptions.bitrate ?? QUALITY_HIGH; const encodableCodec = await getFirstEncodableVideoCodec(videoCodecs, { width: trackOptions.process && trackOptions.processedWidth ? trackOptions.processedWidth : width, height: trackOptions.process && trackOptions.processedHeight ? trackOptions.processedHeight : height, bitrate, }); if (!encodableCodec) { this.discardedTracks.push({ track, reason: 'no_encodable_target_codec', trackOptions, }); return; } const encodingConfig: VideoEncodingConfig = { codec: encodableCodec, bitrate, keyFrameInterval: trackOptions.keyFrameInterval, sizeChangeBehavior: trackOptions.fit ?? 'passThrough', alpha, hardwareAcceleration: trackOptions.hardwareAcceleration, transform: {}, }; assert(encodingConfig.transform); let needsRerender = width !== originalWidth || height !== originalHeight || (totalRotation !== 0 && (!canUseRotationMetadata || trackOptions.process !== undefined)) || !!crop // Don't expect encoders to reliably handle non-square pixels: || squarePixelWidth !== await track.getCodedWidth() || squarePixelHeight !== await track.getCodedHeight(); if (!needsRerender) { // If we're directly passing decoded samples back to the encoder, sometimes the encoder may error due // to lack of support of certain video frame formats, like when HDR is at play. To check for this, we // first try to pass a single frame to the encoder to see how it behaves. If it throws, we then fall // back to the rerender path. // // Creating a new temporary Output is sort of hacky, but due to a lack of an isolated encoder API right // now, this is the simplest way. Will refactor in the future! TODO const tempOutput = new Output({ format: new Mp4OutputFormat(), // Supports all video codecs target: new NullTarget(), }); const tempSource = new VideoSampleSource(encodingConfig); tempOutput.addVideoTrack(tempSource); await tempOutput.start(); const sink = new VideoSampleSink(track); const firstSample = await sink.getSample(firstTimestamp); // Let's just use the first sample if (firstSample) { try { await tempSource.add(firstSample); firstSample.close(); await tempOutput.finalize(); } catch (error) { console.info('Error when probing encoder support. Falling back to rerender path.', error); needsRerender = true; void tempOutput.cancel(); } } else { await tempOutput.cancel(); } } if (trackOptions.frameRate) { encodingConfig.transform.frameRate = trackOptions.frameRate; } if (needsRerender) { outputTrackRotation = 0; // Since the rotation is baked into the output encodingConfig.transform.width = width; encodingConfig.transform.height = height; encodingConfig.transform.fit = trackOptions.fit ?? 'fill'; encodingConfig.transform.rotate = normalizeRotation(totalRotation - innateRotation); encodingConfig.transform.crop = crop; encodingConfig.transform.alpha = alpha; } const source = new VideoSampleSource(encodingConfig); videoSource = source; this._trackPromises.push((async () => { await this._started; const sink = new VideoSampleSink(track); for await (const sample of sink.samples(this._startTimestamp, this._endTimestamp)) { if (this._canceled) { sample.close(); return; } const adjustedSampleTimestamp = Math.max(sample.timestamp - this._startTimestamp, 0); sample.setTimestamp(adjustedSampleTimestamp); await this._registerVideoSample(trackOptions, outputTrackId, source, sample); sample.close(); } source.close(); this._synchronizer.closeTrack(outputTrackId); })()); } let ownGroup: OutputTrackGroup | null = null; if (!trackOptions.group) { ownGroup = new OutputTrackGroup(); } const videoTrackLanguageCode = await track.getLanguageCode(); this.output.addVideoTrack(videoSource, { frameRate: trackOptions.frameRate, // TODO: This condition can be removed when all demuxers properly homogenize to BCP47 in v2 languageCode: isIso639Dash2LanguageCode(videoTrackLanguageCode) ? videoTrackLanguageCode : undefined, name: await track.getName() ?? undefined, disposition: await track.getDisposition(), rotation: outputTrackRotation, group: ownGroup ?? trackOptions.group, }); this._addedCounts.video++; this._totalTrackCount++; this.utilizedTracks.push(track); this._outputTrackIds.push(outputTrackId); this._outputOwnTrackGroups.push(ownGroup); } /** @internal */ async _registerVideoSample( trackOptions: ConversionVideoOptions, outputTrackId: number, source: VideoSampleSource, sample: VideoSample, ) { if (this._canceled) { return; } this._reportProgress(outputTrackId, sample.timestamp + sample.duration); let finalSamples: VideoSample[]; if (!trackOptions.process) { finalSamples = [sample]; } else { let processed = trackOptions.process(sample); if (processed instanceof Promise) processed = await processed; if (!Array.isArray(processed)) { processed = processed === null ? []