UNPKG

mediabunny

Version:

Pure TypeScript media toolkit for reading, writing, and converting media files, directly in the browser.

1,017 lines 58.8 kB
/*! * Copyright (c) 2026-present, Vanilagy and contributors * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ import { AUDIO_CODECS, NON_PCM_AUDIO_CODECS, VIDEO_CODECS, } from './codec.js'; import { getEncodableAudioCodecs, getFirstEncodableVideoCodec, Quality, QUALITY_HIGH, } from './encode.js'; import { Input } from './input.js'; import { AudioSampleSink, EncodedPacketSink, VideoSampleSink, } from './media-sink.js'; import { EncodedVideoPacketSource, EncodedAudioPacketSource, VideoSampleSource, AudioSampleSource, } from './media-source.js'; import { assert, assertNever, ceilToMultipleOfTwo, clamp, isIso639Dash2LanguageCode, normalizeRotation, promiseWithResolvers, } from './misc.js'; import { Output, OutputTrackGroup } from './output.js'; import { Mp4OutputFormat } from './output-format.js'; import { AudioSample, audioSampleToInterleavedFormat, clampCropRectangle, toInterleavedAudioFormat, validateCropRectangle, VideoSample, } from './sample.js'; import { validateMetadataTags } from './metadata.js'; import { NullTarget } from './target.js'; import { AudioResampler } from './resample.js'; const validateVideoOptions = (videoOptions) => { if (!videoOptions || typeof videoOptions !== 'object') { throw new TypeError('options.video, when provided, must be an object.'); } if (videoOptions?.discard !== undefined && typeof videoOptions.discard !== 'boolean') { throw new TypeError('options.video.discard, when provided, must be a boolean.'); } if (videoOptions?.forceTranscode !== undefined && typeof videoOptions.forceTranscode !== 'boolean') { throw new TypeError('options.video.forceTranscode, when provided, must be a boolean.'); } if (videoOptions?.codec !== undefined && !VIDEO_CODECS.includes(videoOptions.codec)) { throw new TypeError(`options.video.codec, when provided, must be one of: ${VIDEO_CODECS.join(', ')}.`); } if (videoOptions?.bitrate !== undefined && !(videoOptions.bitrate instanceof Quality) && (!Number.isInteger(videoOptions.bitrate) || videoOptions.bitrate <= 0)) { throw new TypeError('options.video.bitrate, when provided, must be a positive integer or a quality.'); } if (videoOptions?.width !== undefined && (!Number.isInteger(videoOptions.width) || videoOptions.width <= 0)) { throw new TypeError('options.video.width, when provided, must be a positive integer.'); } if (videoOptions?.height !== undefined && (!Number.isInteger(videoOptions.height) || videoOptions.height <= 0)) { throw new TypeError('options.video.height, when provided, must be a positive integer.'); } if (videoOptions?.fit !== undefined && !['fill', 'contain', 'cover'].includes(videoOptions.fit)) { throw new TypeError('options.video.fit, when provided, must be one of \'fill\', \'contain\', or \'cover\'.'); } if (videoOptions?.width !== undefined && videoOptions.height !== undefined && videoOptions.fit === undefined) { throw new TypeError('When both options.video.width and options.video.height are provided, options.video.fit must also be' + ' provided.'); } if (videoOptions?.rotate !== undefined && ![0, 90, 180, 270].includes(videoOptions.rotate)) { throw new TypeError('options.video.rotate, when provided, must be 0, 90, 180 or 270.'); } if (videoOptions?.allowRotationMetadata !== undefined && typeof videoOptions.allowRotationMetadata !== 'boolean') { throw new TypeError('options.video.allowRotationMetadata, when provided, must be a boolean.'); } if (videoOptions?.crop !== undefined) { validateCropRectangle(videoOptions.crop, 'options.video.'); } if (videoOptions?.frameRate !== undefined && (!Number.isFinite(videoOptions.frameRate) || videoOptions.frameRate <= 0)) { throw new TypeError('options.video.frameRate, when provided, must be a finite positive number.'); } if (videoOptions?.alpha !== undefined && !['discard', 'keep'].includes(videoOptions.alpha)) { throw new TypeError('options.video.alpha, when provided, must be either \'discard\' or \'keep\'.'); } if (videoOptions?.keyFrameInterval !== undefined && (!Number.isFinite(videoOptions.keyFrameInterval) || videoOptions.keyFrameInterval < 0)) { throw new TypeError('options.video.keyFrameInterval, when provided, must be a non-negative number.'); } if (videoOptions?.process !== undefined && typeof videoOptions.process !== 'function') { throw new TypeError('options.video.process, when provided, must be a function.'); } if (videoOptions?.processedWidth !== undefined && (!Number.isInteger(videoOptions.processedWidth) || videoOptions.processedWidth <= 0)) { throw new TypeError('options.video.processedWidth, when provided, must be a positive integer.'); } if (videoOptions?.processedHeight !== undefined && (!Number.isInteger(videoOptions.processedHeight) || videoOptions.processedHeight <= 0)) { throw new TypeError('options.video.processedHeight, when provided, must be a positive integer.'); } if (videoOptions?.hardwareAcceleration !== undefined && !['no-preference', 'prefer-hardware', 'prefer-software'].includes(videoOptions.hardwareAcceleration)) { throw new TypeError('options.video.hardwareAcceleration, when provided, must be \'no-preference\', \'prefer-hardware\' or' + ' \'prefer-software\'.'); } if (videoOptions?.group !== undefined && !(videoOptions.group instanceof OutputTrackGroup || (Array.isArray(videoOptions.group) && videoOptions.group.every(x => x instanceof OutputTrackGroup)))) { throw new TypeError('options.video.group, when provided, must be an OutputTrackGroup or an array of OutputTrackGroups.'); } }; const validateAudioOptions = (audioOptions) => { if (!audioOptions || typeof audioOptions !== 'object') { throw new TypeError('options.audio, when provided, must be an object.'); } if (audioOptions?.discard !== undefined && typeof audioOptions.discard !== 'boolean') { throw new TypeError('options.audio.discard, when provided, must be a boolean.'); } if (audioOptions?.forceTranscode !== undefined && typeof audioOptions.forceTranscode !== 'boolean') { throw new TypeError('options.audio.forceTranscode, when provided, must be a boolean.'); } if (audioOptions?.codec !== undefined && !AUDIO_CODECS.includes(audioOptions.codec)) { throw new TypeError(`options.audio.codec, when provided, must be one of: ${AUDIO_CODECS.join(', ')}.`); } if (audioOptions?.bitrate !== undefined && !(audioOptions.bitrate instanceof Quality) && (!Number.isInteger(audioOptions.bitrate) || audioOptions.bitrate <= 0)) { throw new TypeError('options.audio.bitrate, when provided, must be a positive integer or a quality.'); } if (audioOptions?.numberOfChannels !== undefined && (!Number.isInteger(audioOptions.numberOfChannels) || audioOptions.numberOfChannels <= 0)) { throw new TypeError('options.audio.numberOfChannels, when provided, must be a positive integer.'); } if (audioOptions?.sampleRate !== undefined && (!Number.isInteger(audioOptions.sampleRate) || audioOptions.sampleRate <= 0)) { throw new TypeError('options.audio.sampleRate, when provided, must be a positive integer.'); } if (audioOptions?.sampleFormat !== undefined && !['u8', 's16', 's32', 'f32'].includes(audioOptions.sampleFormat)) { throw new TypeError('options.audio.sampleFormat, when provided, must be one of: u8, s16, s32, f32.'); } if (audioOptions?.process !== undefined && typeof audioOptions.process !== 'function') { throw new TypeError('options.audio.process, when provided, must be a function.'); } if (audioOptions?.processedNumberOfChannels !== undefined && (!Number.isInteger(audioOptions.processedNumberOfChannels) || audioOptions.processedNumberOfChannels <= 0)) { throw new TypeError('options.audio.processedNumberOfChannels, when provided, must be a positive integer.'); } if (audioOptions?.processedSampleRate !== undefined && (!Number.isInteger(audioOptions.processedSampleRate) || audioOptions.processedSampleRate <= 0)) { throw new TypeError('options.audio.processedSampleRate, when provided, must be a positive integer.'); } if (audioOptions?.group !== undefined && !(audioOptions.group instanceof OutputTrackGroup || (Array.isArray(audioOptions.group) && audioOptions.group.every(x => x instanceof OutputTrackGroup)))) { throw new TypeError('options.audio.group, when provided, must be an OutputTrackGroup or an array of OutputTrackGroups.'); } }; const FALLBACK_NUMBER_OF_CHANNELS = 2; const FALLBACK_SAMPLE_RATE = 48000; /** * Represents a media file conversion process, used to convert one media file into another. In addition to conversion, * this class can be used to resize and rotate video, resample audio, drop tracks, or trim to a specific time range. * @group Conversion * @public */ export class Conversion { /** Initializes a new conversion process without starting the conversion. */ static async init(options) { const conversion = new Conversion(options); await conversion._init(); return conversion; } /** Creates a new Conversion instance (duh). */ constructor(options) { /** @internal */ this._addedCounts = { video: 0, audio: 0, subtitle: 0, }; /** @internal */ this._totalTrackCount = 0; /** @internal */ this._nextOutputTrackId = 0; /** @internal */ this._outputTrackIds = []; /** @internal */ this._outputOwnTrackGroups = []; /** @internal */ this._trackPromises = []; /** @internal */ this._executed = false; /** @internal */ this._synchronizer = new TrackSynchronizer(); /** @internal */ this._totalDuration = null; /** @internal */ this._maxTimestamps = new Map(); // Track ID -> timestamp /** @internal */ this._canceled = false; /** * A callback that is fired whenever the conversion progresses. Gets passed as first argument a number between * 0 and 1, indicating the completion of the conversion. Note that a progress of 1 doesn't necessarily mean the * conversion is complete; the conversion is complete once `execute()` resolves. * * As second argument, this callback receives the input time in seconds that has been processed. * * In order for progress to be computed, this property must be set before `execute` is called. */ this.onProgress = undefined; /** @internal */ this._computeProgress = false; /** @internal */ this._lastProgress = 0; /** * Whether this conversion, as it has been configured, is valid and can be executed. If this field is `false`, check * the `discardedTracks` field for reasons. * * Note: a conversion having discarded tracks does not automatically mean it is invalid; if the remaining, utilized * tracks make for a valid output file, the conversion is still allowed. */ this.isValid = false; /** * The list of tracks that are included in the output file. When fan-out is used, the same track appears in this * array multiple times. */ this.utilizedTracks = []; /** The list of tracks from the input file that have been discarded, alongside the discard reason. */ this.discardedTracks = []; if (!options || typeof options !== 'object') { throw new TypeError('options must be an object.'); } if (!(options.input instanceof Input)) { throw new TypeError('options.input must be an Input.'); } if (!(options.output instanceof Output)) { throw new TypeError('options.output must be an Output.'); } if (options.tracks !== undefined && options.tracks !== 'all' && options.tracks !== 'primary') { throw new TypeError('options.tracks, when provided, must be either \'all\' or \'primary\'.'); } if (options.output._tracks.length > 0 || Object.keys(options.output._metadataTags).length > 0 || options.output.state !== 'pending') { throw new TypeError('options.output must be fresh: no tracks or metadata tags added and not started.'); } if (options.video !== undefined && typeof options.video !== 'function') { if (Array.isArray(options.video)) { for (const obj of options.video) { validateVideoOptions(obj); } } else { validateVideoOptions(options.video); } } else { // We'll validate the return value later } if (options.audio !== undefined && typeof options.audio !== 'function') { if (Array.isArray(options.audio)) { for (const obj of options.audio) { validateAudioOptions(obj); } } else { validateAudioOptions(options.audio); } } else { // We'll validate the return value later } if (options.trim !== undefined && (!options.trim || typeof options.trim !== 'object')) { throw new TypeError('options.trim, when provided, must be an object.'); } if (options.trim?.start !== undefined && (!Number.isFinite(options.trim.start))) { throw new TypeError('options.trim.start, when provided, must be a finite number.'); } if (options.trim?.end !== undefined && (!Number.isFinite(options.trim.end))) { throw new TypeError('options.trim.end, when provided, must be a finite number.'); } if (options.trim?.start !== undefined && options.trim.end !== undefined && options.trim.start >= options.trim.end) { throw new TypeError('options.trim.start must be less than options.trim.end.'); } if (options.tags !== undefined && (typeof options.tags !== 'object' || !options.tags) && typeof options.tags !== 'function') { throw new TypeError('options.tags, when provided, must be an object or a function.'); } if (typeof options.tags === 'object') { validateMetadataTags(options.tags); } if (options.showWarnings !== undefined && typeof options.showWarnings !== 'boolean') { throw new TypeError('options.showWarnings, when provided, must be a boolean.'); } this._options = options; this.input = options.input; this.output = options.output; const { promise: started, resolve: start } = promiseWithResolvers(); this._started = started; this._start = start; } /** @internal */ async _init() { const inputFormat = await this.input.getFormat(); let tracks; let trackMode = this._options.tracks; if (trackMode === undefined) { // HACK to keep bundle size low, temp for now const defaultTrackMode = inputFormat.name.includes('(HLS)') ? 'primary' : 'all'; trackMode = defaultTrackMode; } if (trackMode === 'all') { tracks = await this.input.getTracks(); } else if (trackMode === 'primary') { const primaryVideoTrack = await this.input.getPrimaryVideoTrack(); const primaryAudioTrack = await this.input.getPrimaryAudioTrack(); tracks = [primaryVideoTrack, primaryAudioTrack].filter(x => x !== null); } else { assertNever(trackMode); assert(false); } const outputTrackCounts = this.output.format.getSupportedTrackCounts(); // Input track counters let nVideo = 1; let nAudio = 1; // All tracks that aren't discarded by the user const filteredTracks = []; const filteredTrackOptions = []; for (const track of tracks) { let trackOptions; if (track.isVideoTrack()) { if (this._options.video) { if (typeof this._options.video === 'function') { const returnedTrackOptions = await this._options.video(track, nVideo) ?? {}; if (Array.isArray(returnedTrackOptions)) { for (const obj of returnedTrackOptions) { validateVideoOptions(obj); } } else { validateVideoOptions(returnedTrackOptions); } trackOptions = Array.isArray(returnedTrackOptions) ? returnedTrackOptions : [returnedTrackOptions]; nVideo++; } else { // Already validated trackOptions = Array.isArray(this._options.video) ? this._options.video : [this._options.video]; } } else { trackOptions = [{}]; } } else if (track.isAudioTrack()) { if (this._options.audio) { if (typeof this._options.audio === 'function') { const returnedTrackOptions = await this._options.audio(track, nAudio) ?? {}; if (Array.isArray(returnedTrackOptions)) { for (const obj of returnedTrackOptions) { validateAudioOptions(obj); } } else { validateAudioOptions(returnedTrackOptions); } trackOptions = Array.isArray(returnedTrackOptions) ? returnedTrackOptions : [returnedTrackOptions]; nAudio++; } else { // Already validated trackOptions = Array.isArray(this._options.audio) ? this._options.audio : [this._options.audio]; } } else { trackOptions = [{}]; } } else { assert(false); } const discardOptions = trackOptions.filter(x => x.discard); for (const discardOption of discardOptions) { this.discardedTracks.push({ track, reason: 'discarded_by_user', trackOptions: discardOption, }); } if (trackOptions.length === discardOptions.length) { if (trackOptions.length === 0) { this.discardedTracks.push({ track, reason: 'discarded_by_user', trackOptions: {}, }); } continue; } const nonDiscardOptions = trackOptions.filter(x => !x.discard); filteredTracks.push(track); filteredTrackOptions.push(nonDiscardOptions); } if (this._options.trim?.start !== undefined) { this._startTimestamp = this._options.trim.start; } else { // Compute the start timestamp from the set of filtered tracks. Techncially these can still be narrowed // down later due to discarded tracks, but we need to fix the start timestamp now due to track processing // depending on it. this._startTimestamp = Math.max(await this.input.getFirstTimestamp(filteredTracks), // Samples can also have negative timestamps, but the meaning typically is "don't present me", so let's // cut those out by default. 0); } this._endTimestamp = Math.max(this._options.trim?.end ?? Infinity, this._startTimestamp); // Run these sequentially so that output tracks have a deterministic order for (let i = 0; i < filteredTracks.length; i++) { const track = filteredTracks[i]; const options = filteredTrackOptions[i]; for (const option of options) { if (this._totalTrackCount === outputTrackCounts.total.max) { this.discardedTracks.push({ track, reason: 'max_track_count_reached', trackOptions: option, }); continue; } if (this._addedCounts[track.type] === outputTrackCounts[track.type].max) { this.discardedTracks.push({ track, reason: 'max_track_count_of_type_reached', trackOptions: option, }); continue; } const outputTrackId = this._nextOutputTrackId++; if (track.isVideoTrack()) { await this._processVideoTrack(track, option, outputTrackId); } else if (track.isAudioTrack()) { await this._processAudioTrack(track, option, outputTrackId); } else { assert(false); } } } // When no track groups are set by the user, then the output track pairability should be *identical* to the // input's. We do the naive algorithm to achieve this: assign each track to its own group, and pair groups with // each other based on input track pairability. for (let i = 0; i < this.utilizedTracks.length - 1; i++) { for (let j = i + 1; j < this.utilizedTracks.length; j++) { const trackA = this.utilizedTracks[i]; const trackB = this.utilizedTracks[j]; const ownGroupA = this._outputOwnTrackGroups[i]; const ownGroupB = this._outputOwnTrackGroups[j]; assert(ownGroupA !== undefined); assert(ownGroupB !== undefined); if (ownGroupA && ownGroupB && trackA.canBePairedWith(trackB)) { ownGroupA.pairWith(ownGroupB); } } } // Now, let's deal with metadata tags const inputTags = await this.input.getMetadataTags(); let outputTags; if (this._options.tags) { const result = typeof this._options.tags === 'function' ? await this._options.tags(inputTags) : this._options.tags; validateMetadataTags(result); outputTags = result; } else { outputTags = inputTags; } // Somewhat dirty but pragmatic const inputAndOutputFormatMatch = inputFormat.mimeType === this.output.format.mimeType; const rawTagsAreUnchanged = inputTags.raw === outputTags.raw; if (inputTags.raw && rawTagsAreUnchanged && !inputAndOutputFormatMatch) { // If the input and output formats aren't the same, copying over raw metadata tags makes no sense and only // results in junk tags, so let's cut them out. delete outputTags.raw; } this.output.setMetadataTags(outputTags); // Let's check if the conversion can actually be executed this.isValid = this._totalTrackCount >= outputTrackCounts.total.min && this._addedCounts.video >= outputTrackCounts.video.min && this._addedCounts.audio >= outputTrackCounts.audio.min && this._addedCounts.subtitle >= outputTrackCounts.subtitle.min; if (this._options.showWarnings ?? true) { const warnElements = []; const unintentionallyDiscardedTracks = this.discardedTracks.filter(x => x.reason !== 'discarded_by_user'); if (unintentionallyDiscardedTracks.length > 0) { // Let's give the user a notice/warning about discarded tracks so they aren't confused warnElements.push('Some tracks had to be discarded from the conversion:', unintentionallyDiscardedTracks); } if (!this.isValid) { if (warnElements.length > 0) { warnElements.push('\n\n'); } warnElements.push(this._getInvalidityExplanation().join('')); } if (warnElements.length > 0) { console.warn(...warnElements); } } } /** @internal */ _getInvalidityExplanation() { const elements = []; if (this.discardedTracks.length === 0) { elements.push('Due to missing tracks, this conversion cannot be executed.'); } else { const encodabilityIsTheProblem = this.discardedTracks.every(x => x.reason === 'discarded_by_user' || x.reason === 'no_encodable_target_codec') && this.discardedTracks.some(x => x.reason === 'no_encodable_target_codec'); elements.push('Due to discarded tracks, this conversion cannot be executed.'); if (encodabilityIsTheProblem) { const codecs = this.discardedTracks.flatMap((x) => { if (x.reason === 'discarded_by_user') return []; if (x.track.type === 'video') { return this.output.format.getSupportedVideoCodecs(); } else if (x.track.type === 'audio') { return this.output.format.getSupportedAudioCodecs(); } else { return this.output.format.getSupportedSubtitleCodecs(); } }); const uniqueCodecs = [...new Set(codecs)]; if (uniqueCodecs.length === 1) { elements.push(`\nTracks were discarded because your environment is not able to encode '${uniqueCodecs[0]}'.`); } else { elements.push('\nTracks were discarded because your environment is not able to encode any of the following' + ` codecs: ${uniqueCodecs.map(x => `'${x}'`).join(', ')}.`); } if (uniqueCodecs.includes('mp3')) { elements.push(`\nThe @mediabunny/mp3-encoder extension package provides support for encoding MP3.`); } if (uniqueCodecs.includes('aac')) { elements.push('\nThe @mediabunny/aac-encoder extension package provides support for encoding AAC.'); } if (uniqueCodecs.includes('ac3') || uniqueCodecs.includes('eac3')) { elements.push('\nThe @mediabunny/ac3 extension package provides support' + ' for encoding and decoding AC-3/E-AC-3.'); } if (uniqueCodecs.includes('flac')) { elements.push('\nThe @mediabunny/flac-encoder extension package provides support for encoding FLAC.'); } } else { elements.push('\nCheck the discardedTracks field for more info.'); } } return elements; } /** * Executes the conversion process. Resolves once conversion is complete. * * Will throw if `isValid` is `false`. */ async execute() { if (!this.isValid) { throw new Error('Cannot execute this conversion because its output configuration is invalid. Make sure to always check' + ' the isValid field before executing a conversion.\n' + this._getInvalidityExplanation().join('')); } if (this._executed) { throw new Error('Conversion cannot be executed twice.'); } this._executed = true; for (const id of this._outputTrackIds) { this._synchronizer.declareTrack(id); } if (this.onProgress) { // Compute duration using only the utilized tracks const uniqueUtilizedTracks = new Set(this.utilizedTracks); const durationPromises = [...uniqueUtilizedTracks].map(async (track) => { if (await track.isLive()) { return Infinity; // Upper bound (assuming no universe heat death) } return (await track.getDurationFromMetadata()) ?? (await track.computeDuration()); }); const duration = Math.max(0, ...await Promise.all(durationPromises)); this._computeProgress = true; this._totalDuration = Math.min(duration - this._startTimestamp, this._endTimestamp - this._startTimestamp); for (const id of this._outputTrackIds) { this._maxTimestamps.set(id, 0); } this.onProgress?.(0, 0); } await this.output.start(); this._start(); try { await Promise.all(this._trackPromises); } catch (error) { if (!this._canceled) { // Make sure to cancel to stop other encoding processes and clean up resources void this.cancel(); } throw error; } if (this._canceled) { throw new ConversionCanceledError(); } await this.output.finalize(); if (this._computeProgress) { const minTimestamp = Math.min(...this._maxTimestamps.values()); this.onProgress?.(1, minTimestamp); } } /** * Cancels the conversion process, causing any ongoing `execute` call to throw a `ConversionCanceledError`. * Does nothing if the conversion is already complete. */ async cancel() { if (this.output.state === 'finalizing' || this.output.state === 'finalized') { return; } if (this._canceled) { console.warn('Conversion already canceled.'); return; } this._canceled = true; await this.output.cancel(); } /** @internal */ async _processVideoTrack(track, trackOptions, outputTrackId) { const sourceCodec = await track.getCodec(); if (!sourceCodec) { this.discardedTracks.push({ track, reason: 'unknown_source_codec', trackOptions, }); return; } let videoSource; const innateRotation = await track.getRotation(); const totalRotation = normalizeRotation(innateRotation + (trackOptions.rotate ?? 0)); let outputTrackRotation = totalRotation; const canUseRotationMetadata = this.output.format.supportsVideoRotationMetadata && (trackOptions.allowRotationMetadata ?? true); const squarePixelWidth = await track.getSquarePixelWidth(); const squarePixelHeight = await track.getSquarePixelHeight(); const [rotatedWidth, rotatedHeight] = totalRotation % 180 === 0 ? [squarePixelWidth, squarePixelHeight] : [squarePixelHeight, squarePixelWidth]; let crop = trackOptions.crop; if (crop) { crop = clampCropRectangle(crop, rotatedWidth, rotatedHeight); } const [originalWidth, originalHeight] = crop ? [crop.width, crop.height] : [rotatedWidth, rotatedHeight]; let width = originalWidth; let height = originalHeight; const aspectRatio = width / height; // A lot of video encoders require that the dimensions be multiples of 2 if (trackOptions.width !== undefined && trackOptions.height === undefined) { width = ceilToMultipleOfTwo(trackOptions.width); height = ceilToMultipleOfTwo(Math.round(width / aspectRatio)); } else if (trackOptions.width === undefined && trackOptions.height !== undefined) { height = ceilToMultipleOfTwo(trackOptions.height); width = ceilToMultipleOfTwo(Math.round(height * aspectRatio)); } else if (trackOptions.width !== undefined && trackOptions.height !== undefined) { width = ceilToMultipleOfTwo(trackOptions.width); height = ceilToMultipleOfTwo(trackOptions.height); } const firstTimestamp = await track.getFirstTimestamp(); let videoCodecs = this.output.format.getSupportedVideoCodecs(); const needsTranscode = !!trackOptions.forceTranscode || firstTimestamp < this._startTimestamp || !!trackOptions.frameRate || trackOptions.keyFrameInterval !== undefined || trackOptions.process !== undefined || trackOptions.bitrate !== undefined || !videoCodecs.includes(sourceCodec) || (trackOptions.codec && trackOptions.codec !== sourceCodec) || width !== originalWidth || height !== originalHeight // TODO This is suboptimal: Forcing a rerender when both rotation and process are set is not // performance-optimal, but right now there's no other way because we can't change the track rotation // metadata after the output has already started. Should be possible with API changes in v2, though! || (totalRotation !== 0 && !canUseRotationMetadata) || !!crop; const alpha = trackOptions.alpha ?? 'discard'; if (!needsTranscode) { // Fast path, we can simply copy over the encoded packets const source = new EncodedVideoPacketSource(sourceCodec); videoSource = source; this._trackPromises.push((async () => { await this._started; const sink = new EncodedPacketSink(track); const decoderConfig = await track.getDecoderConfig(); const meta = { decoderConfig: decoderConfig ?? undefined }; for await (const packet of sink.packets(undefined, undefined, { verifyKeyPackets: true })) { if (this._canceled) { return; } if (packet.timestamp >= this._endTimestamp) { break; } const modifiedPacket = packet.clone({ timestamp: packet.timestamp - this._startTimestamp, sideData: alpha === 'discard' ? {} // Remove alpha side data : packet.sideData, }); assert(modifiedPacket.timestamp >= 0); this._reportProgress(outputTrackId, modifiedPacket.timestamp + modifiedPacket.duration); await source.add(modifiedPacket, meta); if (this._synchronizer.shouldWait(outputTrackId, modifiedPacket.timestamp)) { await this._synchronizer.wait(modifiedPacket.timestamp); } } source.close(); this._synchronizer.closeTrack(outputTrackId); })()); } else { // We need to decode & reencode the video const canDecode = await track.canDecode(); if (!canDecode) { this.discardedTracks.push({ track, reason: 'undecodable_source_codec', trackOptions, }); return; } if (trackOptions.codec) { videoCodecs = videoCodecs.filter(codec => codec === trackOptions.codec); } const bitrate = trackOptions.bitrate ?? QUALITY_HIGH; const encodableCodec = await getFirstEncodableVideoCodec(videoCodecs, { width: trackOptions.process && trackOptions.processedWidth ? trackOptions.processedWidth : width, height: trackOptions.process && trackOptions.processedHeight ? trackOptions.processedHeight : height, bitrate, }); if (!encodableCodec) { this.discardedTracks.push({ track, reason: 'no_encodable_target_codec', trackOptions, }); return; } const encodingConfig = { codec: encodableCodec, bitrate, keyFrameInterval: trackOptions.keyFrameInterval, sizeChangeBehavior: trackOptions.fit ?? 'passThrough', alpha, hardwareAcceleration: trackOptions.hardwareAcceleration, transform: {}, }; assert(encodingConfig.transform); let needsRerender = width !== originalWidth || height !== originalHeight || (totalRotation !== 0 && (!canUseRotationMetadata || trackOptions.process !== undefined)) || !!crop // Don't expect encoders to reliably handle non-square pixels: || squarePixelWidth !== await track.getCodedWidth() || squarePixelHeight !== await track.getCodedHeight(); if (!needsRerender) { // If we're directly passing decoded samples back to the encoder, sometimes the encoder may error due // to lack of support of certain video frame formats, like when HDR is at play. To check for this, we // first try to pass a single frame to the encoder to see how it behaves. If it throws, we then fall // back to the rerender path. // // Creating a new temporary Output is sort of hacky, but due to a lack of an isolated encoder API right // now, this is the simplest way. Will refactor in the future! TODO const tempOutput = new Output({ format: new Mp4OutputFormat(), // Supports all video codecs target: new NullTarget(), }); const tempSource = new VideoSampleSource(encodingConfig); tempOutput.addVideoTrack(tempSource); await tempOutput.start(); const sink = new VideoSampleSink(track); const firstSample = await sink.getSample(firstTimestamp); // Let's just use the first sample if (firstSample) { try { await tempSource.add(firstSample); firstSample.close(); await tempOutput.finalize(); } catch (error) { console.info('Error when probing encoder support. Falling back to rerender path.', error); needsRerender = true; void tempOutput.cancel(); } } else { await tempOutput.cancel(); } } if (trackOptions.frameRate) { encodingConfig.transform.frameRate = trackOptions.frameRate; } if (needsRerender) { outputTrackRotation = 0; // Since the rotation is baked into the output encodingConfig.transform.width = width; encodingConfig.transform.height = height; encodingConfig.transform.fit = trackOptions.fit ?? 'fill'; encodingConfig.transform.rotate = normalizeRotation(totalRotation - innateRotation); encodingConfig.transform.crop = crop; encodingConfig.transform.alpha = alpha; } const source = new VideoSampleSource(encodingConfig); videoSource = source; this._trackPromises.push((async () => { await this._started; const sink = new VideoSampleSink(track); for await (const sample of sink.samples(this._startTimestamp, this._endTimestamp)) { if (this._canceled) { sample.close(); return; } const adjustedSampleTimestamp = Math.max(sample.timestamp - this._startTimestamp, 0); sample.setTimestamp(adjustedSampleTimestamp); await this._registerVideoSample(trackOptions, outputTrackId, source, sample); sample.close(); } source.close(); this._synchronizer.closeTrack(outputTrackId); })()); } let ownGroup = null; if (!trackOptions.group) { ownGroup = new OutputTrackGroup(); } const videoTrackLanguageCode = await track.getLanguageCode(); this.output.addVideoTrack(videoSource, { frameRate: trackOptions.frameRate, // TODO: This condition can be removed when all demuxers properly homogenize to BCP47 in v2 languageCode: isIso639Dash2LanguageCode(videoTrackLanguageCode) ? videoTrackLanguageCode : undefined, name: await track.getName() ?? undefined, disposition: await track.getDisposition(), rotation: outputTrackRotation, group: ownGroup ?? trackOptions.group, }); this._addedCounts.video++; this._totalTrackCount++; this.utilizedTracks.push(track); this._outputTrackIds.push(outputTrackId); this._outputOwnTrackGroups.push(ownGroup); } /** @internal */ async _registerVideoSample(trackOptions, outputTrackId, source, sample) { if (this._canceled) { return; } this._reportProgress(outputTrackId, sample.timestamp + sample.duration); let finalSamples; if (!trackOptions.process) { finalSamples = [sample]; } else { let processed = trackOptions.process(sample); if (processed instanceof Promise) processed = await processed; if (!Array.isArray(processed)) { processed = processed === null ? [] : [processed]; } finalSamples = processed.map((x) => { if (x instanceof VideoSample) { return x; } if (typeof VideoFrame !== 'undefined' && x instanceof VideoFrame) { return new VideoSample(x); } // Calling the VideoSample constructor here will automatically handle input validation for us // (it throws for any non-legal argument). return new VideoSample(x, { timestamp: sample.timestamp, duration: sample.duration, }); }); } try { for (const finalSample of finalSamples) { if (this._canceled) { break; } await source.add(finalSample); if (this._synchronizer.shouldWait(outputTrackId, finalSample.timestamp)) { await this._synchronizer.wait(finalSample.timestamp); } } } finally { for (const finalSample of finalSamples) { if (finalSample !== sample) { finalSample.close(); } } } } /** @internal */ async _processAudioTrack(track, trackOptions, outputTrackId) { const sourceCodec = await track.getCodec(); if (!sourceCodec) { this.discardedTracks.push({ track, reason: 'unknown_source_codec', trackOptions, }); return; } let audioSource; const originalNumberOfChannels = await track.getNumberOfChannels(); const originalSampleRate = await track.getSampleRate(); const firstTimestamp = await track.getFirstTimestamp(); let numberOfChannels = trackOptions.numberOfChannels ?? originalNumberOfChannels; let sampleRate = trackOptions.sampleRate ?? originalSampleRate; let needsResample = numberOfChannels !== originalNumberOfChannels || sampleRate !== originalSampleRate || firstTimestamp < this._startTimestamp || (firstTimestamp > this._startTimestamp && !this.output.format.supportsTimestampedMediaData); let audioCodecs = this.output.format.getSupportedAudioCodecs(); if (!trackOptions.forceTranscode && !trackOptions.bitrate && !needsResample && audioCodecs.includes(sourceCodec) && (!trackOptions.codec || trackOptions.codec === sourceCodec) && !trackOptions.process && trackOptions.sampleFormat === undefined) { // Fast path, we can simply copy over the encoded packets const source = new EncodedAudioPacketSource(sourceCodec); audioSource = source; this._trackPromises.push((async () => { await this._started; const sink = new EncodedPacketSink(track); const decoderConfig = await track.getDecoderConfig(); const meta = { decoderConfig: decoderConfig ?? undefined }; for await (const packet of sink.packets()) { if (this._canceled) { return; } if (packet.timestamp >= this._endTimestamp) { break; } const modifiedPacket = packet.clone({ timestamp: packet.timestamp - this._startTimestamp, }); assert(modifiedPacket.timestamp >= 0); this._reportProgress(outputTrackId, modifiedPacket.timestamp + modifiedPacket.duration); await source.add(modifiedPacket, meta); if (this._synchronizer.shouldWait(outputTrackId, modifiedPacket.timestamp)) { await this._synchronizer.wait(modifiedPacket.timestamp); } } source.close(); this._synchronizer.closeTrack(outputTrackId); })()); } else { // We need to decode & reencode the audio const canDecode = await track.canDecode(); if (!canDecode) { this.discardedTracks.push({ track, reason: 'undecodable_source_codec', trackOptions, }); return; } let codecOfChoice = null; if (trackOptions.codec) { audioCodecs = audioCodecs.filter(codec => codec === trackOptions.codec); } const bitrate = trackOptions.bitrate ?? QUALITY_HIGH; const encodableCodecs = await getEncodableAudioCodecs(audioCodecs, { numberOfChannels: trackOptions.process && trackOptions.processedNumberOfChannels ? trackOptions.processedNumberOfChannels : numberOfChannels, sampleRate: trackOptions.process && trackOptions.processedSampleRate ? trackOptions.processedSampleRate : sampleRate, bitrate, }); if (!encodableCodecs.some(codec => NON_PCM_AUDIO_CODECS.includes(codec)) && audioCodecs.some(codec => NON_PCM_AUDIO_CODECS.includes(codec)) && (numberOfChannels !== FALLBACK_NUMBER_OF_CHANNELS || sampleRate !== FALLBACK_SAMPLE_RATE)) { // We could not find a compatible non-PCM codec despite the container supporting them. This can be // caused by strange channel count or sample rate configurations. Therefore, let's try again but with // fallback parameters. const encodableCodecsWithDefaultParams = await getEncodableAudioCodecs(audioCodecs, { numberOfChannels: FALLBACK_NUMBER_OF_CHANNELS, sampleRate: FALLBACK_SAMPLE_RATE, bitrate, }); const nonPcmCodec = encodableCodecsWithDefaultParams .find(codec => NON_PCM_AUDIO_CODECS.includes(codec)); if (nonPcmCodec) { // We are able to encode using a non-PCM codec, but it'll require resampling needsResample = true; codecOfChoice = nonPcmCodec; numberOfChannels = FALLBACK_NUMBER_OF_CHANNELS; sampleRa