mediabunny
Version:
Pure TypeScript media toolkit for reading, writing, and converting media files, directly in the browser.
1,427 lines (1,272 loc) • 62.8 kB
text/typescript
/*!
* Copyright (c) 2026-present, Vanilagy and contributors
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/.
*/
import {
AUDIO_CODECS,
AudioCodec,
NON_PCM_AUDIO_CODECS,
VIDEO_CODECS,
VideoCodec,
} from './codec';
import {
getEncodableAudioCodecs,
getFirstEncodableVideoCodec,
Quality,
QUALITY_HIGH,
VideoEncodingConfig,
} from './encode';
import { Input } from './input';
import { InputAudioTrack, InputTrack, InputVideoTrack } from './input-track';
import {
AudioSampleSink,
EncodedPacketSink,
VideoSampleSink,
} from './media-sink';
import {
AudioSource,
EncodedVideoPacketSource,
EncodedAudioPacketSource,
VideoSource,
VideoSampleSource,
AudioSampleSource,
} from './media-source';
import {
assert,
assertNever,
ceilToMultipleOfTwo,
clamp,
isIso639Dash2LanguageCode,
MaybePromise,
normalizeRotation,
promiseWithResolvers,
Rotation,
} from './misc';
import { Output, OutputTrackGroup, TrackType } from './output';
import { Mp4OutputFormat } from './output-format';
import {
AudioSample,
audioSampleToInterleavedFormat,
clampCropRectangle,
CropRectangle,
toInterleavedAudioFormat,
validateCropRectangle,
VideoSample,
VideoSampleResource,
} from './sample';
import { MetadataTags, validateMetadataTags } from './metadata';
import { NullTarget } from './target';
import { AudioResampler } from './resample';
/**
* The options for media file conversion.
* @group Conversion
* @public
*/
export type ConversionOptions = {
/** The input file. */
input: Input;
/** The output file. */
output: Output;
/**
* Defines which input tracks are used for conversion. Defaults to `'all'` unless the input is an HLS input, in
* which case it defaults to `'primary'`.
*
* - `'all'`: All input tracks are eligible for conversion.
* - `'primary'`: Only the primary video and audio track from the input are eligible for conversion.
*/
tracks?: 'all' | 'primary';
/**
* Video-specific options. When passing an object, the same options are applied to all video tracks. When passing a
* function, it will be invoked for each video track and is expected to return or resolve to the options
* for that specific track. The function is passed an instance of {@link InputVideoTrack} as well as a number `n`,
* which is the 1-based index of the track in the list of all video tracks. Using `n` is deprecated, prefer the
* identical `track.number` instead.
*
* When passing an array of a function that returns an array, one output track per array element will be created,
* allowing for "fan-out". Useful for creating multiple variants from a single track, for example with different
* resolutions.
*/
video?: ConversionVideoOptions
| ConversionVideoOptions[]
| ((track: InputVideoTrack, n: number) => MaybePromise<
ConversionVideoOptions | ConversionVideoOptions[] | undefined
>);
/**
* Audio-specific options. When passing an object, the same options are applied to all audio tracks. When passing a
* function, it will be invoked for each audio track and is expected to return or resolve to the options
* for that specific track. The function is passed an instance of {@link InputAudioTrack} as well as a number `n`,
* which is the 1-based index of the track in the list of all audio tracks. Using `n` is deprecated, prefer the
* identical `track.number` instead.
*
* When passing an array of a function that returns an array, one output track per array element will be created,
* allowing for "fan-out". Useful for creating multiple variants from a single track, for example with different
* bitrates.
*/
audio?: ConversionAudioOptions
| ConversionAudioOptions[]
| ((track: InputAudioTrack, n: number) => MaybePromise<
ConversionAudioOptions | ConversionAudioOptions[] | undefined
>);
/** Options to trim the input file. */
trim?: {
/**
* The time in the input file in seconds at which the output file should start. Must be less than `end`.
* When omitted, defaults to the earliest start timestamp of the non-discarded tracks, or to 0, whichever
* is higher.
*/
start?: number;
/**
* The time in the input file in seconds at which the output file should end. Must be greater than `start`.
* Defaults to the duration of the input when omitted.
*/
end?: number;
};
/**
* An object or a callback that returns or resolves to an object containing the descriptive metadata tags that
* should be written to the output file. If a function is passed, it will be passed the tags of the input file as
* its first argument, allowing you to modify, augment or extend them.
*
* If no function is set, the input's metadata tags will be copied to the output.
*/
tags?: MetadataTags | ((inputTags: MetadataTags) => MaybePromise<MetadataTags>);
/**
* Whether to show potential console warnings about discarded tracks after calling `Conversion.init()`, defaults to
* `true`. Set this to `false` if you're properly handling the `discardedTracks` and `isValid` fields already and
* want to keep the console output clean.
*/
showWarnings?: boolean;
};
/**
* Video-specific options.
* @group Conversion
* @public
*/
export type ConversionVideoOptions = {
/** If `true`, all video tracks will be discarded and will not be present in the output. */
discard?: boolean;
/**
* The desired width of the output video in pixels, defaulting to the video's natural display width. If height
* is not set, it will be deduced automatically based on aspect ratio.
*/
width?: number;
/**
* The desired height of the output video in pixels, defaulting to the video's natural display height. If width
* is not set, it will be deduced automatically based on aspect ratio.
*/
height?: number;
/**
* The fitting algorithm in case both width and height are set, or if the input video changes its size over time.
*
* - `'fill'` will stretch the image to fill the entire box, potentially altering aspect ratio.
* - `'contain'` will contain the entire image within the box while preserving aspect ratio. This may lead to
* letterboxing.
* - `'cover'` will scale the image until the entire box is filled, while preserving aspect ratio.
*/
fit?: 'fill' | 'contain' | 'cover';
/**
* The angle in degrees to rotate the input video by, clockwise. Rotation is applied before cropping and resizing.
* This rotation is _in addition to_ the natural rotation of the input video as specified in input file's metadata.
*/
rotate?: Rotation;
/**
* Defaults to `true`. When enabled, Mediabunny will use the rotation metadata in the output file to perform video
* rotation whenever possible. Set this field to `false` if you want to ensure the output file does not make use of
* rotation metadata and that any rotation is baked into the video frames directly.
*/
allowRotationMetadata?: boolean;
/**
* Specifies the rectangular region of the input video to crop to. The crop region will automatically be clamped to
* the dimensions of the input video track. Cropping is performed after rotation but before resizing.
*/
crop?: CropRectangle;
/**
* The desired frame rate of the output video, in hertz. If not specified, the original input frame rate will
* be used (which may be variable).
*/
frameRate?: number;
/** The desired output video codec. */
codec?: VideoCodec;
/** The desired bitrate of the output video. */
bitrate?: number | Quality;
/**
* Whether to discard or keep the transparency information of the input video. The default is `'discard'`. Note that
* for `'keep'` to produce a transparent video, you must use an output config that supports it, such as WebM with
* VP9.
*/
alpha?: 'discard' | 'keep';
/**
* The interval, in seconds, of how often frames are encoded as a key frame. The default is 5 seconds. Frequent key
* frames improve seeking behavior but increase file size. When using multiple video tracks, you should give them
* all the same key frame interval.
*
* Setting this fields forces a transcode.
*/
keyFrameInterval?: number;
/**
* A hint that configures the hardware acceleration method used when transcoding. This is best left on
* `'no-preference'`, the default.
*/
hardwareAcceleration?: 'no-preference' | 'prefer-hardware' | 'prefer-software';
/** When `true`, video will always be re-encoded instead of directly copying over the encoded samples. */
forceTranscode?: boolean;
/**
* Allows for custom user-defined processing of video frames, e.g. for applying overlays, color transformations, or
* timestamp modifications. Will be called for each input video sample after transformations and frame rate
* corrections.
*
* Must return a {@link VideoSample}, a {@link VideoSampleResource} or a `CanvasImageSource`, an array of them, or
* `null` for dropping the frame. When non-timestamped data is returned, the timestamp and duration from the source
* sample will be used. Rotation metadata of the returned sample will be ignored.
*
* This function can also be used to manually resize frames. When doing so, you should signal the post-process
* dimensions using the `processedWidth` and `processedHeight` fields, which enables the encoder to better know what
* to expect. If these fields aren't set, Mediabunny will assume you won't perform any resizing.
*/
process?: (sample: VideoSample) => MaybePromise<
CanvasImageSource | VideoSample | VideoSampleResource
| (CanvasImageSource | VideoSample | VideoSampleResource)[] | null
>;
/**
* An optional hint specifying the width of video samples returned by the `process` function, for better
* encoder configuration.
*/
processedWidth?: number;
/**
* An optional hint specifying the height of video samples returned by the `process` function, for better
* encoder configuration.
*/
processedHeight?: number;
/**
* Defines the group(s) the output track is a part of. For more, see {@link BaseTrackMetadata.group}.
*
* If left blank, tracks will internally be assigned to groups such that the output track pairability graph exactly
* matches the input track pairability graph.
*/
group?: OutputTrackGroup | OutputTrackGroup[];
};
/**
* Audio-specific options.
* @group Conversion
* @public
*/
export type ConversionAudioOptions = {
/** If `true`, all audio tracks will be discarded and will not be present in the output. */
discard?: boolean;
/** The desired channel count of the output audio. */
numberOfChannels?: number;
/** The desired sample rate of the output audio, in hertz. */
sampleRate?: number;
/**
* The desired sample format (and therefore bit depth) of the audio samples before they are passed to the encoder.
* Can be used to control bit depth with certain output codecs such as FLAC.
*
* Setting this field forces audio transcoding.
*/
sampleFormat?: 'u8' | 's16' | 's32' | 'f32';
/** The desired output audio codec. */
codec?: AudioCodec;
/** The desired bitrate of the output audio. */
bitrate?: number | Quality;
/** When `true`, audio will always be re-encoded instead of directly copying over the encoded samples. */
forceTranscode?: boolean;
/**
* Allows for custom user-defined processing of audio samples, e.g. for applying audio effects, transformations, or
* timestamp modifications. Will be called for each input audio sample after remixing and resampling.
*
* Must return an {@link AudioSample}, an array of them, or `null` for dropping the sample.
*
* This function can also be used to manually perform remixing or resampling. When doing so, you should signal the
* post-process parameters using the `processedNumberOfChannels` and `processedSampleRate` fields, which enables the
* encoder to better know what to expect. If these fields aren't set, Mediabunny will assume you won't perform
* remixing or resampling.
*/
process?: (sample: AudioSample) => MaybePromise<
AudioSample | AudioSample[] | null
>;
/**
* An optional hint specifying the channel count of audio samples returned by the `process` function, for better
* encoder configuration.
*/
processedNumberOfChannels?: number;
/**
* An optional hint specifying the sample rate of audio samples returned by the `process` function, for better
* encoder configuration.
*/
processedSampleRate?: number;
/**
* Defines the group(s) the output track is a part of. For more, see {@link BaseTrackMetadata.group}.
*
* If left blank, tracks will internally be assigned to groups such that the output track pairability graph exactly
* matches the input track pairability graph.
*/
group?: OutputTrackGroup | OutputTrackGroup[];
};
const validateVideoOptions = (videoOptions: ConversionVideoOptions) => {
if (!videoOptions || typeof videoOptions !== 'object') {
throw new TypeError('options.video, when provided, must be an object.');
}
if (videoOptions?.discard !== undefined && typeof videoOptions.discard !== 'boolean') {
throw new TypeError('options.video.discard, when provided, must be a boolean.');
}
if (videoOptions?.forceTranscode !== undefined && typeof videoOptions.forceTranscode !== 'boolean') {
throw new TypeError('options.video.forceTranscode, when provided, must be a boolean.');
}
if (videoOptions?.codec !== undefined && !VIDEO_CODECS.includes(videoOptions.codec)) {
throw new TypeError(
`options.video.codec, when provided, must be one of: ${VIDEO_CODECS.join(', ')}.`,
);
}
if (
videoOptions?.bitrate !== undefined
&& !(videoOptions.bitrate instanceof Quality)
&& (!Number.isInteger(videoOptions.bitrate) || videoOptions.bitrate <= 0)
) {
throw new TypeError('options.video.bitrate, when provided, must be a positive integer or a quality.');
}
if (
videoOptions?.width !== undefined
&& (!Number.isInteger(videoOptions.width) || videoOptions.width <= 0)
) {
throw new TypeError('options.video.width, when provided, must be a positive integer.');
}
if (
videoOptions?.height !== undefined
&& (!Number.isInteger(videoOptions.height) || videoOptions.height <= 0)
) {
throw new TypeError('options.video.height, when provided, must be a positive integer.');
}
if (videoOptions?.fit !== undefined && !['fill', 'contain', 'cover'].includes(videoOptions.fit)) {
throw new TypeError('options.video.fit, when provided, must be one of \'fill\', \'contain\', or \'cover\'.');
}
if (
videoOptions?.width !== undefined
&& videoOptions.height !== undefined
&& videoOptions.fit === undefined
) {
throw new TypeError(
'When both options.video.width and options.video.height are provided, options.video.fit must also be'
+ ' provided.',
);
}
if (videoOptions?.rotate !== undefined && ![0, 90, 180, 270].includes(videoOptions.rotate)) {
throw new TypeError('options.video.rotate, when provided, must be 0, 90, 180 or 270.');
}
if (videoOptions?.allowRotationMetadata !== undefined && typeof videoOptions.allowRotationMetadata !== 'boolean') {
throw new TypeError('options.video.allowRotationMetadata, when provided, must be a boolean.');
}
if (videoOptions?.crop !== undefined) {
validateCropRectangle(videoOptions.crop, 'options.video.');
}
if (
videoOptions?.frameRate !== undefined
&& (!Number.isFinite(videoOptions.frameRate) || videoOptions.frameRate <= 0)
) {
throw new TypeError('options.video.frameRate, when provided, must be a finite positive number.');
}
if (videoOptions?.alpha !== undefined && !['discard', 'keep'].includes(videoOptions.alpha)) {
throw new TypeError('options.video.alpha, when provided, must be either \'discard\' or \'keep\'.');
}
if (
videoOptions?.keyFrameInterval !== undefined
&& (!Number.isFinite(videoOptions.keyFrameInterval) || videoOptions.keyFrameInterval < 0)
) {
throw new TypeError('options.video.keyFrameInterval, when provided, must be a non-negative number.');
}
if (videoOptions?.process !== undefined && typeof videoOptions.process !== 'function') {
throw new TypeError('options.video.process, when provided, must be a function.');
}
if (
videoOptions?.processedWidth !== undefined
&& (!Number.isInteger(videoOptions.processedWidth) || videoOptions.processedWidth <= 0)
) {
throw new TypeError('options.video.processedWidth, when provided, must be a positive integer.');
}
if (
videoOptions?.processedHeight !== undefined
&& (!Number.isInteger(videoOptions.processedHeight) || videoOptions.processedHeight <= 0)
) {
throw new TypeError('options.video.processedHeight, when provided, must be a positive integer.');
}
if (
videoOptions?.hardwareAcceleration !== undefined
&& !['no-preference', 'prefer-hardware', 'prefer-software'].includes(videoOptions.hardwareAcceleration)
) {
throw new TypeError(
'options.video.hardwareAcceleration, when provided, must be \'no-preference\', \'prefer-hardware\' or'
+ ' \'prefer-software\'.',
);
}
if (
videoOptions?.group !== undefined
&& !(
videoOptions.group instanceof OutputTrackGroup
|| (Array.isArray(videoOptions.group) && videoOptions.group.every(x => x instanceof OutputTrackGroup))
)
) {
throw new TypeError(
'options.video.group, when provided, must be an OutputTrackGroup or an array of OutputTrackGroups.',
);
}
};
const validateAudioOptions = (audioOptions: ConversionAudioOptions) => {
if (!audioOptions || typeof audioOptions !== 'object') {
throw new TypeError('options.audio, when provided, must be an object.');
}
if (audioOptions?.discard !== undefined && typeof audioOptions.discard !== 'boolean') {
throw new TypeError('options.audio.discard, when provided, must be a boolean.');
}
if (audioOptions?.forceTranscode !== undefined && typeof audioOptions.forceTranscode !== 'boolean') {
throw new TypeError('options.audio.forceTranscode, when provided, must be a boolean.');
}
if (audioOptions?.codec !== undefined && !AUDIO_CODECS.includes(audioOptions.codec)) {
throw new TypeError(
`options.audio.codec, when provided, must be one of: ${AUDIO_CODECS.join(', ')}.`,
);
}
if (
audioOptions?.bitrate !== undefined
&& !(audioOptions.bitrate instanceof Quality)
&& (!Number.isInteger(audioOptions.bitrate) || audioOptions.bitrate <= 0)
) {
throw new TypeError('options.audio.bitrate, when provided, must be a positive integer or a quality.');
}
if (
audioOptions?.numberOfChannels !== undefined
&& (!Number.isInteger(audioOptions.numberOfChannels) || audioOptions.numberOfChannels <= 0)
) {
throw new TypeError('options.audio.numberOfChannels, when provided, must be a positive integer.');
}
if (
audioOptions?.sampleRate !== undefined
&& (!Number.isInteger(audioOptions.sampleRate) || audioOptions.sampleRate <= 0)
) {
throw new TypeError('options.audio.sampleRate, when provided, must be a positive integer.');
}
if (
audioOptions?.sampleFormat !== undefined
&& !['u8', 's16', 's32', 'f32'].includes(audioOptions.sampleFormat)
) {
throw new TypeError('options.audio.sampleFormat, when provided, must be one of: u8, s16, s32, f32.');
}
if (audioOptions?.process !== undefined && typeof audioOptions.process !== 'function') {
throw new TypeError('options.audio.process, when provided, must be a function.');
}
if (
audioOptions?.processedNumberOfChannels !== undefined
&& (!Number.isInteger(audioOptions.processedNumberOfChannels) || audioOptions.processedNumberOfChannels <= 0)
) {
throw new TypeError('options.audio.processedNumberOfChannels, when provided, must be a positive integer.');
}
if (
audioOptions?.processedSampleRate !== undefined
&& (!Number.isInteger(audioOptions.processedSampleRate) || audioOptions.processedSampleRate <= 0)
) {
throw new TypeError('options.audio.processedSampleRate, when provided, must be a positive integer.');
}
if (
audioOptions?.group !== undefined
&& !(
audioOptions.group instanceof OutputTrackGroup
|| (Array.isArray(audioOptions.group) && audioOptions.group.every(x => x instanceof OutputTrackGroup))
)
) {
throw new TypeError(
'options.audio.group, when provided, must be an OutputTrackGroup or an array of OutputTrackGroups.',
);
}
};
const FALLBACK_NUMBER_OF_CHANNELS = 2;
const FALLBACK_SAMPLE_RATE = 48000;
/**
* An input track that was discarded (excluded) from a {@link Conversion} alongside the discard reason.
* @group Conversion
* @public
*/
export type DiscardedTrack = {
/** The track that was discarded. */
track: InputTrack;
/**
* The reason for discarding the track.
*
* - `'discarded_by_user'`: You discarded this track by setting `discard: true`.
* - `'max_track_count_reached'`: The output had no more room for another track.
* - `'max_track_count_of_type_reached'`: The output had no more room for another track of this type, or the output
* doesn't support this track type at all.
* - `'unknown_source_codec'`: We don't know the codec of the input track and therefore don't know what to do
* with it.
* - `'undecodable_source_codec'`: The input track's codec is known, but we are unable to decode it.
* - `'no_encodable_target_codec'`: We can't find a codec that we are able to encode and that can be contained
* within the output format. This reason can be hit if the environment doesn't support the necessary encoders, or if
* you requested a codec that cannot be contained within the output format.
*/
reason:
| 'discarded_by_user'
| 'max_track_count_reached'
| 'max_track_count_of_type_reached'
| 'unknown_source_codec'
| 'undecodable_source_codec'
| 'no_encodable_target_codec';
/** The options that were provided for this track, or `{}` if none were provided. */
trackOptions: ConversionVideoOptions | ConversionAudioOptions;
};
/**
* Represents a media file conversion process, used to convert one media file into another. In addition to conversion,
* this class can be used to resize and rotate video, resample audio, drop tracks, or trim to a specific time range.
* @group Conversion
* @public
*/
export class Conversion {
/** The input file. */
readonly input: Input;
/** The output file. */
readonly output: Output;
/** @internal */
_options: ConversionOptions;
/** @internal */
_startTimestamp!: number;
/** @internal */
_endTimestamp!: number;
/** @internal */
_addedCounts: Record<TrackType, number> = {
video: 0,
audio: 0,
subtitle: 0,
};
/** @internal */
_totalTrackCount = 0;
/** @internal */
_nextOutputTrackId = 0;
/** @internal */
_outputTrackIds: number[] = [];
/** @internal */
_outputOwnTrackGroups: (OutputTrackGroup | null)[] = [];
/** @internal */
_trackPromises: Promise<void>[] = [];
/** @internal */
_started: Promise<void>;
/** @internal */
_start: () => void;
/** @internal */
_executed = false;
/** @internal */
_synchronizer = new TrackSynchronizer();
/** @internal */
_totalDuration: number | null = null;
/** @internal */
_maxTimestamps = new Map<number, number>(); // Track ID -> timestamp
/** @internal */
_canceled = false;
/**
* A callback that is fired whenever the conversion progresses. Gets passed as first argument a number between
* 0 and 1, indicating the completion of the conversion. Note that a progress of 1 doesn't necessarily mean the
* conversion is complete; the conversion is complete once `execute()` resolves.
*
* As second argument, this callback receives the input time in seconds that has been processed.
*
* In order for progress to be computed, this property must be set before `execute` is called.
*/
onProgress?: (progress: number, processedTime: number) => unknown = undefined;
/** @internal */
_computeProgress = false;
/** @internal */
_lastProgress = 0;
/**
* Whether this conversion, as it has been configured, is valid and can be executed. If this field is `false`, check
* the `discardedTracks` field for reasons.
*
* Note: a conversion having discarded tracks does not automatically mean it is invalid; if the remaining, utilized
* tracks make for a valid output file, the conversion is still allowed.
*/
isValid = false;
/**
* The list of tracks that are included in the output file. When fan-out is used, the same track appears in this
* array multiple times.
*/
readonly utilizedTracks: InputTrack[] = [];
/** The list of tracks from the input file that have been discarded, alongside the discard reason. */
readonly discardedTracks: DiscardedTrack[] = [];
/** Initializes a new conversion process without starting the conversion. */
static async init(options: ConversionOptions) {
const conversion = new Conversion(options);
await conversion._init();
return conversion;
}
/** Creates a new Conversion instance (duh). */
private constructor(options: ConversionOptions) {
if (!options || typeof options !== 'object') {
throw new TypeError('options must be an object.');
}
if (!(options.input instanceof Input)) {
throw new TypeError('options.input must be an Input.');
}
if (!(options.output instanceof Output)) {
throw new TypeError('options.output must be an Output.');
}
if (
options.tracks !== undefined
&& options.tracks !== 'all'
&& options.tracks !== 'primary'
) {
throw new TypeError(
'options.tracks, when provided, must be either \'all\' or \'primary\'.',
);
}
if (
options.output._tracks.length > 0
|| Object.keys(options.output._metadataTags).length > 0
|| options.output.state !== 'pending'
) {
throw new TypeError('options.output must be fresh: no tracks or metadata tags added and not started.');
}
if (options.video !== undefined && typeof options.video !== 'function') {
if (Array.isArray(options.video)) {
for (const obj of options.video) {
validateVideoOptions(obj);
}
} else {
validateVideoOptions(options.video);
}
} else {
// We'll validate the return value later
}
if (options.audio !== undefined && typeof options.audio !== 'function') {
if (Array.isArray(options.audio)) {
for (const obj of options.audio) {
validateAudioOptions(obj);
}
} else {
validateAudioOptions(options.audio);
}
} else {
// We'll validate the return value later
}
if (options.trim !== undefined && (!options.trim || typeof options.trim !== 'object')) {
throw new TypeError('options.trim, when provided, must be an object.');
}
if (options.trim?.start !== undefined && (!Number.isFinite(options.trim.start))) {
throw new TypeError('options.trim.start, when provided, must be a finite number.');
}
if (options.trim?.end !== undefined && (!Number.isFinite(options.trim.end))) {
throw new TypeError('options.trim.end, when provided, must be a finite number.');
}
if (
options.trim?.start !== undefined
&& options.trim.end !== undefined
&& options.trim.start >= options.trim.end) {
throw new TypeError('options.trim.start must be less than options.trim.end.');
}
if (
options.tags !== undefined
&& (typeof options.tags !== 'object' || !options.tags)
&& typeof options.tags !== 'function'
) {
throw new TypeError('options.tags, when provided, must be an object or a function.');
}
if (typeof options.tags === 'object') {
validateMetadataTags(options.tags);
}
if (options.showWarnings !== undefined && typeof options.showWarnings !== 'boolean') {
throw new TypeError('options.showWarnings, when provided, must be a boolean.');
}
this._options = options;
this.input = options.input;
this.output = options.output;
const { promise: started, resolve: start } = promiseWithResolvers();
this._started = started;
this._start = start;
}
/** @internal */
async _init() {
const inputFormat = await this.input.getFormat();
let tracks: InputTrack[];
let trackMode = this._options.tracks;
if (trackMode === undefined) {
// HACK to keep bundle size low, temp for now
const defaultTrackMode = inputFormat.name.includes('(HLS)')
? 'primary'
: 'all';
trackMode = defaultTrackMode;
}
if (trackMode === 'all') {
tracks = await this.input.getTracks();
} else if (trackMode === 'primary') {
const primaryVideoTrack = await this.input.getPrimaryVideoTrack();
const primaryAudioTrack = await this.input.getPrimaryAudioTrack();
tracks = [primaryVideoTrack, primaryAudioTrack].filter(x => x !== null);
} else {
assertNever(trackMode);
assert(false);
}
const outputTrackCounts = this.output.format.getSupportedTrackCounts();
// Input track counters
let nVideo = 1;
let nAudio = 1;
// All tracks that aren't discarded by the user
const filteredTracks: InputTrack[] = [];
const filteredTrackOptions: (ConversionVideoOptions | ConversionAudioOptions)[][] = [];
for (const track of tracks) {
let trackOptions: (ConversionVideoOptions | ConversionAudioOptions)[];
if (track.isVideoTrack()) {
if (this._options.video) {
if (typeof this._options.video === 'function') {
const returnedTrackOptions = await this._options.video(track, nVideo) ?? {};
if (Array.isArray(returnedTrackOptions)) {
for (const obj of returnedTrackOptions) {
validateVideoOptions(obj);
}
} else {
validateVideoOptions(returnedTrackOptions);
}
trackOptions = Array.isArray(returnedTrackOptions)
? returnedTrackOptions
: [returnedTrackOptions];
nVideo++;
} else {
// Already validated
trackOptions = Array.isArray(this._options.video)
? this._options.video
: [this._options.video];
}
} else {
trackOptions = [{}];
}
} else if (track.isAudioTrack()) {
if (this._options.audio) {
if (typeof this._options.audio === 'function') {
const returnedTrackOptions = await this._options.audio(track, nAudio) ?? {};
if (Array.isArray(returnedTrackOptions)) {
for (const obj of returnedTrackOptions) {
validateAudioOptions(obj);
}
} else {
validateAudioOptions(returnedTrackOptions);
}
trackOptions = Array.isArray(returnedTrackOptions)
? returnedTrackOptions
: [returnedTrackOptions];
nAudio++;
} else {
// Already validated
trackOptions = Array.isArray(this._options.audio)
? this._options.audio
: [this._options.audio];
}
} else {
trackOptions = [{}];
}
} else {
assert(false);
}
const discardOptions = trackOptions.filter(x => x.discard);
for (const discardOption of discardOptions) {
this.discardedTracks.push({
track,
reason: 'discarded_by_user',
trackOptions: discardOption,
});
}
if (trackOptions.length === discardOptions.length) {
if (trackOptions.length === 0) {
this.discardedTracks.push({
track,
reason: 'discarded_by_user',
trackOptions: {},
});
}
continue;
}
const nonDiscardOptions = trackOptions.filter(x => !x.discard);
filteredTracks.push(track);
filteredTrackOptions.push(nonDiscardOptions);
}
if (this._options.trim?.start !== undefined) {
this._startTimestamp = this._options.trim.start;
} else {
// Compute the start timestamp from the set of filtered tracks. Techncially these can still be narrowed
// down later due to discarded tracks, but we need to fix the start timestamp now due to track processing
// depending on it.
this._startTimestamp = Math.max(
await this.input.getFirstTimestamp(filteredTracks),
// Samples can also have negative timestamps, but the meaning typically is "don't present me", so let's
// cut those out by default.
0,
);
}
this._endTimestamp = Math.max(this._options.trim?.end ?? Infinity, this._startTimestamp);
// Run these sequentially so that output tracks have a deterministic order
for (let i = 0; i < filteredTracks.length; i++) {
const track = filteredTracks[i]!;
const options = filteredTrackOptions[i]!;
for (const option of options) {
if (this._totalTrackCount === outputTrackCounts.total.max) {
this.discardedTracks.push({
track,
reason: 'max_track_count_reached',
trackOptions: option,
});
continue;
}
if (this._addedCounts[track.type] === outputTrackCounts[track.type].max) {
this.discardedTracks.push({
track,
reason: 'max_track_count_of_type_reached',
trackOptions: option,
});
continue;
}
const outputTrackId = this._nextOutputTrackId++;
if (track.isVideoTrack()) {
await this._processVideoTrack(track, option as ConversionVideoOptions, outputTrackId);
} else if (track.isAudioTrack()) {
await this._processAudioTrack(track, option as ConversionAudioOptions, outputTrackId);
} else {
assert(false);
}
}
}
// When no track groups are set by the user, then the output track pairability should be *identical* to the
// input's. We do the naive algorithm to achieve this: assign each track to its own group, and pair groups with
// each other based on input track pairability.
for (let i = 0; i < this.utilizedTracks.length - 1; i++) {
for (let j = i + 1; j < this.utilizedTracks.length; j++) {
const trackA = this.utilizedTracks[i]!;
const trackB = this.utilizedTracks[j]!;
const ownGroupA = this._outputOwnTrackGroups[i];
const ownGroupB = this._outputOwnTrackGroups[j];
assert(ownGroupA !== undefined);
assert(ownGroupB !== undefined);
if (ownGroupA && ownGroupB && trackA.canBePairedWith(trackB)) {
ownGroupA.pairWith(ownGroupB);
}
}
}
// Now, let's deal with metadata tags
const inputTags = await this.input.getMetadataTags();
let outputTags: MetadataTags;
if (this._options.tags) {
const result = typeof this._options.tags === 'function'
? await this._options.tags(inputTags)
: this._options.tags;
validateMetadataTags(result);
outputTags = result;
} else {
outputTags = inputTags;
}
// Somewhat dirty but pragmatic
const inputAndOutputFormatMatch = inputFormat.mimeType === this.output.format.mimeType;
const rawTagsAreUnchanged = inputTags.raw === outputTags.raw;
if (inputTags.raw && rawTagsAreUnchanged && !inputAndOutputFormatMatch) {
// If the input and output formats aren't the same, copying over raw metadata tags makes no sense and only
// results in junk tags, so let's cut them out.
delete outputTags.raw;
}
this.output.setMetadataTags(outputTags);
// Let's check if the conversion can actually be executed
this.isValid = this._totalTrackCount >= outputTrackCounts.total.min
&& this._addedCounts.video >= outputTrackCounts.video.min
&& this._addedCounts.audio >= outputTrackCounts.audio.min
&& this._addedCounts.subtitle >= outputTrackCounts.subtitle.min;
if (this._options.showWarnings ?? true) {
const warnElements: unknown[] = [];
const unintentionallyDiscardedTracks = this.discardedTracks.filter(x => x.reason !== 'discarded_by_user');
if (unintentionallyDiscardedTracks.length > 0) {
// Let's give the user a notice/warning about discarded tracks so they aren't confused
warnElements.push(
'Some tracks had to be discarded from the conversion:', unintentionallyDiscardedTracks,
);
}
if (!this.isValid) {
if (warnElements.length > 0) {
warnElements.push('\n\n');
}
warnElements.push(this._getInvalidityExplanation().join(''));
}
if (warnElements.length > 0) {
console.warn(...warnElements);
}
}
}
/** @internal */
_getInvalidityExplanation() {
const elements: string[] = [];
if (this.discardedTracks.length === 0) {
elements.push(
'Due to missing tracks, this conversion cannot be executed.',
);
} else {
const encodabilityIsTheProblem = this.discardedTracks.every(x =>
x.reason === 'discarded_by_user' || x.reason === 'no_encodable_target_codec',
) && this.discardedTracks.some(x => x.reason === 'no_encodable_target_codec');
elements.push(
'Due to discarded tracks, this conversion cannot be executed.',
);
if (encodabilityIsTheProblem) {
const codecs = this.discardedTracks.flatMap((x) => {
if (x.reason === 'discarded_by_user') return [];
if (x.track.type === 'video') {
return this.output.format.getSupportedVideoCodecs();
} else if (x.track.type === 'audio') {
return this.output.format.getSupportedAudioCodecs();
} else {
return this.output.format.getSupportedSubtitleCodecs();
}
});
const uniqueCodecs = [...new Set(codecs)];
if (uniqueCodecs.length === 1) {
elements.push(
`\nTracks were discarded because your environment is not able to encode '${uniqueCodecs[0]}'.`,
);
} else {
elements.push(
'\nTracks were discarded because your environment is not able to encode any of the following'
+ ` codecs: ${uniqueCodecs.map(x => `'${x}'`).join(', ')}.`,
);
}
if (uniqueCodecs.includes('mp3')) {
elements.push(
`\nThe @mediabunny/mp3-encoder extension package provides support for encoding MP3.`,
);
}
if (uniqueCodecs.includes('aac')) {
elements.push(
'\nThe @mediabunny/aac-encoder extension package provides support for encoding AAC.',
);
}
if (uniqueCodecs.includes('ac3') || uniqueCodecs.includes('eac3')) {
elements.push(
'\nThe @mediabunny/ac3 extension package provides support'
+ ' for encoding and decoding AC-3/E-AC-3.',
);
}
if (uniqueCodecs.includes('flac')) {
elements.push(
'\nThe @mediabunny/flac-encoder extension package provides support for encoding FLAC.',
);
}
} else {
elements.push('\nCheck the discardedTracks field for more info.');
}
}
return elements;
}
/**
* Executes the conversion process. Resolves once conversion is complete.
*
* Will throw if `isValid` is `false`.
*/
async execute() {
if (!this.isValid) {
throw new Error(
'Cannot execute this conversion because its output configuration is invalid. Make sure to always check'
+ ' the isValid field before executing a conversion.\n'
+ this._getInvalidityExplanation().join(''),
);
}
if (this._executed) {
throw new Error('Conversion cannot be executed twice.');
}
this._executed = true;
for (const id of this._outputTrackIds) {
this._synchronizer.declareTrack(id);
}
if (this.onProgress) {
// Compute duration using only the utilized tracks
const uniqueUtilizedTracks = new Set(this.utilizedTracks);
const durationPromises = [...uniqueUtilizedTracks].map(async (track) => {
if (await track.isLive()) {
return Infinity; // Upper bound (assuming no universe heat death)
}
return (await track.getDurationFromMetadata()) ?? (await track.computeDuration());
});
const duration = Math.max(0, ...await Promise.all(durationPromises));
this._computeProgress = true;
this._totalDuration = Math.min(
duration - this._startTimestamp,
this._endTimestamp - this._startTimestamp,
);
for (const id of this._outputTrackIds) {
this._maxTimestamps.set(id, 0);
}
this.onProgress?.(0, 0);
}
await this.output.start();
this._start();
try {
await Promise.all(this._trackPromises);
} catch (error) {
if (!this._canceled) {
// Make sure to cancel to stop other encoding processes and clean up resources
void this.cancel();
}
throw error;
}
if (this._canceled) {
throw new ConversionCanceledError();
}
await this.output.finalize();
if (this._computeProgress) {
const minTimestamp = Math.min(...this._maxTimestamps.values());
this.onProgress?.(1, minTimestamp);
}
}
/**
* Cancels the conversion process, causing any ongoing `execute` call to throw a `ConversionCanceledError`.
* Does nothing if the conversion is already complete.
*/
async cancel() {
if (this.output.state === 'finalizing' || this.output.state === 'finalized') {
return;
}
if (this._canceled) {
console.warn('Conversion already canceled.');
return;
}
this._canceled = true;
await this.output.cancel();
}
/** @internal */
async _processVideoTrack(track: InputVideoTrack, trackOptions: ConversionVideoOptions, outputTrackId: number) {
const sourceCodec = await track.getCodec();
if (!sourceCodec) {
this.discardedTracks.push({
track,
reason: 'unknown_source_codec',
trackOptions,
});
return;
}
let videoSource: VideoSource;
const innateRotation = await track.getRotation();
const totalRotation = normalizeRotation(innateRotation + (trackOptions.rotate ?? 0));
let outputTrackRotation = totalRotation;
const canUseRotationMetadata = this.output.format.supportsVideoRotationMetadata
&& (trackOptions.allowRotationMetadata ?? true);
const squarePixelWidth = await track.getSquarePixelWidth();
const squarePixelHeight = await track.getSquarePixelHeight();
const [rotatedWidth, rotatedHeight] = totalRotation % 180 === 0
? [squarePixelWidth, squarePixelHeight]
: [squarePixelHeight, squarePixelWidth];
let crop = trackOptions.crop;
if (crop) {
crop = clampCropRectangle(crop, rotatedWidth, rotatedHeight);
}
const [originalWidth, originalHeight] = crop
? [crop.width, crop.height]
: [rotatedWidth, rotatedHeight];
let width = originalWidth;
let height = originalHeight;
const aspectRatio = width / height;
// A lot of video encoders require that the dimensions be multiples of 2
if (trackOptions.width !== undefined && trackOptions.height === undefined) {
width = ceilToMultipleOfTwo(trackOptions.width);
height = ceilToMultipleOfTwo(Math.round(width / aspectRatio));
} else if (trackOptions.width === undefined && trackOptions.height !== undefined) {
height = ceilToMultipleOfTwo(trackOptions.height);
width = ceilToMultipleOfTwo(Math.round(height * aspectRatio));
} else if (trackOptions.width !== undefined && trackOptions.height !== undefined) {
width = ceilToMultipleOfTwo(trackOptions.width);
height = ceilToMultipleOfTwo(trackOptions.height);
}
const firstTimestamp = await track.getFirstTimestamp();
let videoCodecs = this.output.format.getSupportedVideoCodecs();
const needsTranscode = !!trackOptions.forceTranscode
|| firstTimestamp < this._startTimestamp
|| !!trackOptions.frameRate
|| trackOptions.keyFrameInterval !== undefined
|| trackOptions.process !== undefined
|| trackOptions.bitrate !== undefined
|| !videoCodecs.includes(sourceCodec)
|| (trackOptions.codec && trackOptions.codec !== sourceCodec)
|| width !== originalWidth
|| height !== originalHeight
// TODO This is suboptimal: Forcing a rerender when both rotation and process are set is not
// performance-optimal, but right now there's no other way because we can't change the track rotation
// metadata after the output has already started. Should be possible with API changes in v2, though!
|| (totalRotation !== 0 && !canUseRotationMetadata)
|| !!crop;
const alpha = trackOptions.alpha ?? 'discard';
if (!needsTranscode) {
// Fast path, we can simply copy over the encoded packets
const source = new EncodedVideoPacketSource(sourceCodec);
videoSource = source;
this._trackPromises.push((async () => {
await this._started;
const sink = new EncodedPacketSink(track);
const decoderConfig = await track.getDecoderConfig();
const meta: EncodedVideoChunkMetadata = { decoderConfig: decoderConfig ?? undefined };
for await (const packet of sink.packets(undefined, undefined, { verifyKeyPackets: true })) {
if (this._canceled) {
return;
}
if (packet.timestamp >= this._endTimestamp) {
break;
}
const modifiedPacket = packet.clone({
timestamp: packet.timestamp - this._startTimestamp,
sideData: alpha === 'discard'
? {} // Remove alpha side data
: packet.sideData,
});
assert(modifiedPacket.timestamp >= 0);
this._reportProgress(outputTrackId, modifiedPacket.timestamp + modifiedPacket.duration);
await source.add(modifiedPacket, meta);
if (this._synchronizer.shouldWait(outputTrackId, modifiedPacket.timestamp)) {
await this._synchronizer.wait(modifiedPacket.timestamp);
}
}
source.close();
this._synchronizer.closeTrack(outputTrackId);
})());
} else {
// We need to decode & reencode the video
const canDecode = await track.canDecode();
if (!canDecode) {
this.discardedTracks.push({
track,
reason: 'undecodable_source_codec',
trackOptions,
});
return;
}
if (trackOptions.codec) {
videoCodecs = videoCodecs.filter(codec => codec === trackOptions.codec);
}
const bitrate = trackOptions.bitrate ?? QUALITY_HIGH;
const encodableCodec = await getFirstEncodableVideoCodec(videoCodecs, {
width: trackOptions.process && trackOptions.processedWidth
? trackOptions.processedWidth
: width,
height: trackOptions.process && trackOptions.processedHeight
? trackOptions.processedHeight
: height,
bitrate,
});
if (!encodableCodec) {
this.discardedTracks.push({
track,
reason: 'no_encodable_target_codec',
trackOptions,
});
return;
}
const encodingConfig: VideoEncodingConfig = {
codec: encodableCodec,
bitrate,
keyFrameInterval: trackOptions.keyFrameInterval,
sizeChangeBehavior: trackOptions.fit ?? 'passThrough',
alpha,
hardwareAcceleration: trackOptions.hardwareAcceleration,
transform: {},
};
assert(encodingConfig.transform);
let needsRerender = width !== originalWidth
|| height !== originalHeight
|| (totalRotation !== 0 && (!canUseRotationMetadata || trackOptions.process !== undefined))
|| !!crop
// Don't expect encoders to reliably handle non-square pixels:
|| squarePixelWidth !== await track.getCodedWidth()
|| squarePixelHeight !== await track.getCodedHeight();
if (!needsRerender) {
// If we're directly passing decoded samples back to the encoder, sometimes the encoder may error due
// to lack of support of certain video frame formats, like when HDR is at play. To check for this, we
// first try to pass a single frame to the encoder to see how it behaves. If it throws, we then fall
// back to the rerender path.
//
// Creating a new temporary Output is sort of hacky, but due to a lack of an isolated encoder API right
// now, this is the simplest way. Will refactor in the future! TODO
const tempOutput = new Output({
format: new Mp4OutputFormat(), // Supports all video codecs
target: new NullTarget(),
});
const tempSource = new VideoSampleSource(encodingConfig);
tempOutput.addVideoTrack(tempSource);
await tempOutput.start();
const sink = new VideoSampleSink(track);
const firstSample = await sink.getSample(firstTimestamp); // Let's just use the first sample
if (firstSample) {
try {
await tempSource.add(firstSample);
firstSample.close();
await tempOutput.finalize();
} catch (error) {
console.info('Error when probing encoder support. Falling back to rerender path.', error);
needsRerender = true;
void tempOutput.cancel();
}
} else {
await tempOutput.cancel();
}
}
if (trackOptions.frameRate) {
encodingConfig.transform.frameRate = trackOptions.frameRate;
}
if (needsRerender) {
outputTrackRotation = 0; // Since the rotation is baked into the output
encodingConfig.transform.width = width;
encodingConfig.transform.height = height;
encodingConfig.transform.fit = trackOptions.fit ?? 'fill';
encodingConfig.transform.rotate = normalizeRotation(totalRotation - innateRotation);
encodingConfig.transform.crop = crop;
encodingConfig.transform.alpha = alpha;
}
const source = new VideoSampleSource(encodingConfig);
videoSource = source;
this._trackPromises.push((async () => {
await this._started;
const sink = new VideoSampleSink(track);
for await (const sample of sink.samples(this._startTimestamp, this._endTimestamp)) {
if (this._canceled) {
sample.close();
return;
}
const adjustedSampleTimestamp = Math.max(sample.timestamp - this._startTimestamp, 0);
sample.setTimestamp(adjustedSampleTimestamp);
await this._registerVideoSample(trackOptions, outputTrackId, source, sample);
sample.close();
}
source.close();
this._synchronizer.closeTrack(outputTrackId);
})());
}
let ownGroup: OutputTrackGroup | null = null;
if (!trackOptions.group) {
ownGroup = new OutputTrackGroup();
}
const videoTrackLanguageCode = await track.getLanguageCode();
this.output.addVideoTrack(videoSource, {
frameRate: trackOptions.frameRate,
// TODO: This condition can be removed when all demuxers properly homogenize to BCP47 in v2
languageCode: isIso639Dash2LanguageCode(videoTrackLanguageCode) ? videoTrackLanguageCode : undefined,
name: await track.getName() ?? undefined,
disposition: await track.getDisposition(),
rotation: outputTrackRotation,
group: ownGroup ?? trackOptions.group,
});
this._addedCounts.video++;
this._totalTrackCount++;
this.utilizedTracks.push(track);
this._outputTrackIds.push(outputTrackId);
this._outputOwnTrackGroups.push(ownGroup);
}
/** @internal */
async _registerVideoSample(
trackOptions: ConversionVideoOptions,
outputTrackId: number,
source: VideoSampleSource,
sample: VideoSample,
) {
if (this._canceled) {
return;
}
this._reportProgress(outputTrackId, sample.timestamp + sample.duration);
let finalSamples: VideoSample[];
if (!trackOptions.process) {
finalSamples = [sample];
} else {
let processed = trackOptions.process(sample);
if (processed instanceof Promise) processed = await processed;
if (!Array.isArray(processed)) {
processed = processed === null ? []