UNPKG

mediabunny

Version:

Pure TypeScript media toolkit for reading, writing, and converting media files, directly in the browser.

555 lines (447 loc) 16.6 kB
/*! * Copyright (c) 2026-present, Vanilagy and contributors * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ import { TrackType } from './output'; import { MediaCodec } from './codec'; import { DurationMetadataRequestOptions } from './demuxer'; import { Input } from './input'; import { InputAudioTrack, InputAudioTrackBacking, InputTrack, InputTrackBacking, InputVideoTrack, InputVideoTrackBacking, } from './input-track'; import { PacketRetrievalOptions } from './media-sink'; import { arrayCount, assert, MaybePromise, roundToDivisor } from './misc'; import { EncodedPacket } from './packet'; export type SegmentedInputMetadata = { name: string | null; bitrate: number | null; // doc block: this refers to the _peak_ bitrate averageBitrate: number | null; codecs: MediaCodec[]; codecStrings: string[]; resolution: { width: number; height: number } | null; frameRate: number | null; isKeyFrameOnly: boolean; }; export type AssociatedGroup = { id: string; type: 'video' | 'audio' | 'subtitles' | 'closed-captions'; }; export type Segment = { timestamp: number; duration: number; relativeToUnixEpoch: boolean; firstSegment: Segment | null; }; export type SegmentRetrievalOptions = { skipLiveWait?: boolean; }; export type SegmentedInputTrackDeclaration = { id: number; type: TrackType; }; export abstract class SegmentedInput { input: Input; path: string; trackDeclarations: SegmentedInputTrackDeclaration[] | null; nextInputCacheAge = 0; inputCache: { segment: Segment; input: Input; age: number; }[] = []; trackBackingsPromise: Promise<InputTrackBacking[]> | null = null; firstSegment: Segment | null = null; firstSegmentFirstTimestamps = new WeakMap<Segment, number>(); firstTimestampCache = new WeakMap<Input, number>(); constructor(input: Input, path: string, trackDeclarations: SegmentedInputTrackDeclaration[] | null) { this.input = input; this.path = path; this.trackDeclarations = trackDeclarations; } abstract getFirstSegment(options: SegmentRetrievalOptions): Promise<Segment | null>; abstract getSegmentAt(timestamp: number, options: SegmentRetrievalOptions): Promise<Segment | null>; abstract getNextSegment(segment: Segment, options: SegmentRetrievalOptions): Promise<Segment | null>; abstract getPreviousSegment(segment: Segment, options: SegmentRetrievalOptions): Promise<Segment | null>; abstract getInputForSegment(segment: Segment): Input; abstract getLiveRefreshInterval(): Promise<number | null>; async getDurationFromMetadata(options: DurationMetadataRequestOptions) { const lastSegment = await this.getSegmentAt(Infinity, { skipLiveWait: options.skipLiveWait, }); if (!lastSegment) { return null; } return lastSegment.timestamp + lastSegment.duration; } async getTrackBackings(): Promise<InputTrackBacking[]> { return this.trackBackingsPromise ??= (async () => { const backings: InputTrackBacking[] = []; if (this.trackDeclarations) { for (const decl of this.trackDeclarations) { if (decl.type === 'video') { const number = arrayCount(backings, x => x.getType() === 'video') + 1; backings.push( new SegmentedInputInputVideoTrackBacking(this, decl, number), ); } else if (decl.type === 'audio') { const number = arrayCount(backings, x => x.getType() === 'audio') + 1; backings.push( new SegmentedInputInputAudioTrackBacking(this, decl, number), ); } } } else { // There are no declarations, we must determine the tracks from the first segment this.firstSegment = await this.getFirstSegment({}); if (!this.firstSegment) { return []; } const input = this.getInputForSegment(this.firstSegment); const inputTracks = await input.getTracks(); for (const track of inputTracks) { if (track.type === 'video') { const number = arrayCount(backings, x => x.getType() === 'video') + 1; backings.push( new SegmentedInputInputVideoTrackBacking(this, { id: backings.length + 1, type: 'video', }, number), ); } else if (track.type === 'audio') { const number = arrayCount(backings, x => x.getType() === 'audio') + 1; backings.push( new SegmentedInputInputAudioTrackBacking(this, { id: backings.length + 1, type: 'audio', }, number), ); } } } return backings; })(); } // This operation is done a lot and can be semi-expensive, so it's good to have a cache for it async getFirstTimestampForInput(input: Input) { const existing = this.firstTimestampCache.get(input); if (existing !== undefined) { return existing; } const firstTimestamp = await input.getFirstTimestamp(); this.firstTimestampCache.set(input, firstTimestamp); return firstTimestamp; } async getMediaOffset(segment: Segment, input: Input) { const firstSegment = segment.firstSegment ?? segment; let firstSegmentFirstTimestamp: number; if (this.firstSegmentFirstTimestamps.has(firstSegment)) { firstSegmentFirstTimestamp = this.firstSegmentFirstTimestamps.get(firstSegment)!; } else { const firstInput = this.getInputForSegment(firstSegment); firstSegmentFirstTimestamp = await this.getFirstTimestampForInput(firstInput); this.firstSegmentFirstTimestamps.set(firstSegment, firstSegmentFirstTimestamp); } if (firstSegment === segment) { return firstSegment.timestamp - firstSegmentFirstTimestamp; } const segmentFirstTimestamp = await this.getFirstTimestampForInput(input); const segmentElapsed = segment.timestamp - firstSegment.timestamp; const inputElapsed = segmentFirstTimestamp - firstSegmentFirstTimestamp; const difference = inputElapsed - segmentElapsed; if (Math.abs(difference) <= Math.min(0.25, segmentElapsed)) { // Heuristic // We're close enough return firstSegment.timestamp - firstSegmentFirstTimestamp; } else { // Ideally, each segment has absolute timestamps that are relative to some outside clock which is // consistent across segments. This is often the case, but not always. Either the container format used is // not timestamped at all (like ADTS), or the segments are just fucky. In this case, use the segment's // relative timestamp to determine where we are, and completely offset out the segment's input start // timestamp. return segment.timestamp - segmentFirstTimestamp; } } dispose() { for (const entry of this.inputCache) { entry.input.dispose(); } this.inputCache.length = 0; } } type PacketInfo = { segment: Segment; track: InputTrack; sourcePacket: EncodedPacket; }; class SegmentedInputInputTrackBacking implements InputTrackBacking { segmentedInput: SegmentedInput; decl: SegmentedInputTrackDeclaration; number: number; packetInfos = new WeakMap<EncodedPacket, PacketInfo>(); hydrationPromise: Promise<void> | null = null; firstInputTrack: InputTrack | null = null; constructor(segmentedInput: SegmentedInput, decl: SegmentedInputTrackDeclaration, number: number) { this.segmentedInput = segmentedInput; this.decl = decl; this.number = number; } hydrate() { return this.hydrationPromise ??= (async () => { this.segmentedInput.firstSegment ??= await this.segmentedInput.getFirstSegment({}); if (!this.segmentedInput.firstSegment) { throw new Error('Missing first segment, can\'t retrieve track.'); } const input = this.segmentedInput.getInputForSegment(this.segmentedInput.firstSegment); const inputTracks = await input.getTracks(); const track = inputTracks.find(x => x.type === this.decl.type && x.number === this.number); if (!track) { throw new Error('No matching track found in underlying media data.'); } this.firstInputTrack = track; })(); } getId(): number { return this.decl.id; } getType(): TrackType { return this.decl.type; } getNumber(): number { return this.number; } /** If the backing track is already present, delegate synchronously; otherwise, hydrate first. */ delegate<T>(fn: () => MaybePromise<T>): MaybePromise<T> { if (this.firstInputTrack) { return fn(); } return this.hydrate().then(fn); } async getDecoderConfig() { return this.delegate(() => this.firstInputTrack!._backing.getDecoderConfig()); } getHasOnlyKeyPackets() { return this.delegate(() => this.firstInputTrack!._backing.getHasOnlyKeyPackets?.() ?? null); } getPairingMask() { return 1n; } getCodec() { return this.delegate(() => this.firstInputTrack!._backing.getCodec()); } getInternalCodecId() { return this.delegate(() => this.firstInputTrack!._backing.getInternalCodecId()); } getDisposition() { return this.delegate(() => this.firstInputTrack!._backing.getDisposition()); } getLanguageCode() { return this.delegate(() => this.firstInputTrack!._backing.getLanguageCode()); } getName() { return this.delegate(() => this.firstInputTrack!._backing.getName()); } getTimeResolution() { return this.delegate(() => this.firstInputTrack!._backing.getTimeResolution()); } async isRelativeToUnixEpoch() { await this.hydrate(); assert(this.segmentedInput.firstSegment); return this.segmentedInput.firstSegment.relativeToUnixEpoch; } getBitrate() { return this.delegate(() => this.firstInputTrack!._backing.getBitrate()); } getAverageBitrate() { return this.delegate(() => this.firstInputTrack!._backing.getAverageBitrate()); } getDurationFromMetadata(options: DurationMetadataRequestOptions): Promise<number | null> { return this.segmentedInput.getDurationFromMetadata(options); } getLiveRefreshInterval(): Promise<number | null> { return this.segmentedInput.getLiveRefreshInterval(); } async createAdjustedPacket(packet: EncodedPacket, segment: Segment, track: InputTrack) { assert(packet.sequenceNumber >= 0); assert(this.segmentedInput.firstSegment); const mediaOffset = await this.segmentedInput.getMediaOffset(segment, track.input); // If we didn't do this then sequence numbers would exceed Number.MAX_SAFE_INTEGER for Unix-timestamped segments const segmentTimestampRelativeToFirst = segment.timestamp - this.segmentedInput.firstSegment.timestamp; const modified = packet.clone({ timestamp: roundToDivisor( packet.timestamp + mediaOffset, await track.getTimeResolution(), ), // The 1e8 assumes a max of 100 MB per second, highly unlikely to be hit, so this should guarantee // monotonically increasing sequence numbers across segments. sequenceNumber: Math.floor(1e8 * segmentTimestampRelativeToFirst) + packet.sequenceNumber, }); this.packetInfos.set(modified, { segment, track, sourcePacket: packet, }); return modified; } async getFirstPacket(options: PacketRetrievalOptions): Promise<EncodedPacket | null> { await this.hydrate(); assert(this.segmentedInput.firstSegment); assert(this.firstInputTrack); const packet = await this.firstInputTrack._backing.getFirstPacket(options); if (!packet) { return null; } return this.createAdjustedPacket(packet, this.segmentedInput.firstSegment, this.firstInputTrack); } getNextPacket(packet: EncodedPacket, options: PacketRetrievalOptions): Promise<EncodedPacket | null> { return this._getNextInternal(packet, options, false); } getNextKeyPacket(packet: EncodedPacket, options: PacketRetrievalOptions): Promise<EncodedPacket | null> { return this._getNextInternal(packet, options, true); } async _getNextInternal( packet: EncodedPacket, options: PacketRetrievalOptions, keyframesOnly: boolean, ): Promise<EncodedPacket | null> { const info = this.packetInfos.get(packet); if (!info) { throw new Error('Packet was not created from this track.'); } const nextPacket = keyframesOnly ? await info.track._backing.getNextKeyPacket(info.sourcePacket, options) : await info.track._backing.getNextPacket(info.sourcePacket, options); if (nextPacket) { return this.createAdjustedPacket(nextPacket, info.segment, info.track); } let currentSegment: Segment | null = info.segment; while (true) { const nextSegment = await this.segmentedInput.getNextSegment(currentSegment, { skipLiveWait: options.skipLiveWait, }); if (!nextSegment) { return null; } const nextInput = this.segmentedInput.getInputForSegment(nextSegment); const nextTracks = await nextInput.getTracks(); const nextTrack = nextTracks.find(t => t.type === info.track.type && t.number === info.track.number); if (!nextTrack) { currentSegment = nextSegment; continue; } const firstPacket = await nextTrack._backing.getFirstPacket(options); if (!firstPacket) { return null; } return this.createAdjustedPacket(firstPacket, nextSegment, nextTrack); } } getPacket(timestamp: number, options: PacketRetrievalOptions): Promise<EncodedPacket | null> { return this._getPacketInternal(timestamp, options, false); } getKeyPacket(timestamp: number, options: PacketRetrievalOptions): Promise<EncodedPacket | null> { return this._getPacketInternal(timestamp, options, true); } async _getPacketInternal( timestamp: number, options: PacketRetrievalOptions, keyframesOnly: boolean, ): Promise<EncodedPacket | null> { let currentSegment = await this.segmentedInput.getSegmentAt(timestamp, { skipLiveWait: options.skipLiveWait, }); if (!currentSegment) { return null; } await this.hydrate(); while (currentSegment) { const input = this.segmentedInput.getInputForSegment(currentSegment); const tracks = await input.getTracks(); const track = tracks.find(t => ( t.type === this.firstInputTrack!.type && t.number === this.firstInputTrack!.number )); if (!track) { // Search the previous segment currentSegment = await this.segmentedInput.getPreviousSegment(currentSegment, { skipLiveWait: options.skipLiveWait, }); continue; } const mediaOffset = await this.segmentedInput.getMediaOffset(currentSegment, input); const offsetTimestamp = timestamp - mediaOffset; const packet = keyframesOnly ? await track._backing.getKeyPacket(offsetTimestamp, options) : await track._backing.getPacket(offsetTimestamp, options); if (!packet) { // Search the previous segment currentSegment = await this.segmentedInput.getPreviousSegment(currentSegment, { skipLiveWait: options.skipLiveWait, }); continue; } return this.createAdjustedPacket(packet, currentSegment, track); } return null; } } class SegmentedInputInputVideoTrackBacking extends SegmentedInputInputTrackBacking implements InputVideoTrackBacking { override firstInputTrack!: InputVideoTrack | null; override getType() { return 'video' as const; } override getCodec() { return this.delegate(() => this.firstInputTrack!._backing.getCodec()); } getCodedWidth() { return this.delegate(() => this.firstInputTrack!._backing.getCodedWidth()); } getCodedHeight() { return this.delegate(() => this.firstInputTrack!._backing.getCodedHeight()); } getSquarePixelWidth() { return this.delegate(() => this.firstInputTrack!._backing.getSquarePixelWidth()); } getSquarePixelHeight() { return this.delegate(() => this.firstInputTrack!._backing.getSquarePixelHeight()); } getRotation() { return this.delegate(() => this.firstInputTrack!._backing.getRotation()); } async getColorSpace(): Promise<VideoColorSpaceInit> { return this.delegate(() => this.firstInputTrack!._backing.getColorSpace()); } async canBeTransparent(): Promise<boolean> { return this.delegate(() => this.firstInputTrack!._backing.canBeTransparent()); } override async getDecoderConfig(): Promise<VideoDecoderConfig | null> { return this.delegate(() => this.firstInputTrack!._backing.getDecoderConfig()); } } class SegmentedInputInputAudioTrackBacking extends SegmentedInputInputTrackBacking implements InputAudioTrackBacking { override firstInputTrack!: InputAudioTrack; override getType() { return 'audio' as const; } override getCodec() { return this.delegate(() => this.firstInputTrack._backing.getCodec()); } getNumberOfChannels() { return this.delegate(() => this.firstInputTrack._backing.getNumberOfChannels()); } getSampleRate() { return this.delegate(() => this.firstInputTrack._backing.getSampleRate()); } override async getDecoderConfig(): Promise<AudioDecoderConfig | null> { return this.delegate(() => this.firstInputTrack._backing.getDecoderConfig()); } }