mediabunny
Version:
Pure TypeScript media toolkit for reading, writing, and converting media files, directly in the browser.
1,580 lines (1,313 loc) • 88 kB
text/typescript
/*!
* Copyright (c) 2025-present, Vanilagy and contributors
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/.
*/
import {
AacCodecInfo,
AudioCodec,
extractAudioCodecString,
extractVideoCodecString,
MediaCodec,
parseAacAudioSpecificConfig,
parsePcmCodec,
PCM_AUDIO_CODECS,
PcmAudioCodec,
VideoCodec,
} from '../codec';
import {
AvcDecoderConfigurationRecord,
HevcDecoderConfigurationRecord,
Vp9CodecInfo,
Av1CodecInfo,
extractVp9CodecInfoFromPacket,
extractAv1CodecInfoFromPacket,
} from '../codec-data';
import { Demuxer } from '../demuxer';
import { Input } from '../input';
import {
InputAudioTrack,
InputAudioTrackBacking,
InputTrack,
InputTrackBacking,
InputVideoTrack,
InputVideoTrackBacking,
} from '../input-track';
import { PacketRetrievalOptions } from '../media-sink';
import {
assert,
COLOR_PRIMARIES_MAP_INVERSE,
MATRIX_COEFFICIENTS_MAP_INVERSE,
TRANSFER_CHARACTERISTICS_MAP_INVERSE,
binarySearchLessOrEqual,
binarySearchExact,
Rotation,
last,
AsyncMutex,
findLastIndex,
UNDETERMINED_LANGUAGE,
TransformationMatrix,
roundToPrecision,
isIso639Dash2LanguageCode,
roundToMultiple,
normalizeRotation,
Bitstream,
insertSorted,
} from '../misc';
import { EncodedPacket, PLACEHOLDER_DATA } from '../packet';
import { Reader } from '../reader';
import { buildIsobmffMimeType } from './isobmff-misc';
import { IsobmffReader, MAX_BOX_HEADER_SIZE, MIN_BOX_HEADER_SIZE } from './isobmff-reader';
type InternalTrack = {
id: number;
demuxer: IsobmffDemuxer;
inputTrack: InputTrack | null;
timescale: number;
durationInMovieTimescale: number;
durationInMediaTimescale: number;
rotation: Rotation;
languageCode: string;
sampleTableByteOffset: number;
sampleTable: SampleTable | null;
fragmentLookupTable: FragmentLookupTableEntry[] | null;
currentFragmentState: FragmentTrackState | null;
fragments: Fragment[];
fragmentsWithKeyFrame: Fragment[];
/** The segment durations of all edit list entries leading up to the main one (from which the offset is taken.) */
editListPreviousSegmentDurations: number;
/** The media time offset of the main edit list entry (with media time !== -1) */
editListOffset: number;
} & ({
info: null;
} | {
info: {
type: 'video';
width: number;
height: number;
codec: VideoCodec | null;
codecDescription: Uint8Array | null;
colorSpace: VideoColorSpaceInit | null;
avcCodecInfo: AvcDecoderConfigurationRecord | null;
hevcCodecInfo: HevcDecoderConfigurationRecord | null;
vp9CodecInfo: Vp9CodecInfo | null;
av1CodecInfo: Av1CodecInfo | null;
};
} | {
info: {
type: 'audio';
numberOfChannels: number;
sampleRate: number;
codec: AudioCodec | null;
codecDescription: Uint8Array | null;
aacCodecInfo: AacCodecInfo | null;
};
});
type InternalVideoTrack = InternalTrack & { info: { type: 'video' } };
type InternalAudioTrack = InternalTrack & { info: { type: 'audio' } };
type SampleTable = {
sampleTimingEntries: SampleTimingEntry[];
sampleCompositionTimeOffsets: SampleCompositionTimeOffsetEntry[];
sampleSizes: number[];
keySampleIndices: number[] | null; // Samples that are keyframes
chunkOffsets: number[];
sampleToChunk: SampleToChunkEntry[];
presentationTimestamps: {
presentationTimestamp: number;
sampleIndex: number;
}[] | null;
/**
* Provides a fast map from sample index to index in the sorted presentation timestamps array - so, a fast map from
* decode order to presentation order.
*/
presentationTimestampIndexMap: number[] | null;
};
type SampleTimingEntry = {
startIndex: number;
startDecodeTimestamp: number;
count: number;
delta: number;
};
type SampleCompositionTimeOffsetEntry = {
startIndex: number;
count: number;
offset: number;
};
type SampleToChunkEntry = {
startSampleIndex: number;
startChunkIndex: number;
samplesPerChunk: number;
sampleDescriptionIndex: number;
};
type FragmentTrackDefaults = {
trackId: number;
defaultSampleDescriptionIndex: number;
defaultSampleDuration: number;
defaultSampleSize: number;
defaultSampleFlags: number;
};
type FragmentLookupTableEntry = {
timestamp: number;
moofOffset: number;
};
type FragmentTrackState = {
baseDataOffset: number;
sampleDescriptionIndex: number | null;
defaultSampleDuration: number | null;
defaultSampleSize: number | null;
defaultSampleFlags: number | null;
startTimestamp: number | null;
};
type FragmentTrackData = {
startTimestamp: number;
endTimestamp: number;
firstKeyFrameTimestamp: number | null;
samples: FragmentTrackSample[];
presentationTimestamps: {
presentationTimestamp: number;
sampleIndex: number;
}[];
startTimestampIsFinal: boolean;
};
type FragmentTrackSample = {
presentationTimestamp: number;
duration: number;
byteOffset: number;
byteSize: number;
isKeyFrame: boolean;
};
type Fragment = {
moofOffset: number;
moofSize: number;
implicitBaseDataOffset: number;
trackData: Map<InternalTrack['id'], FragmentTrackData>;
dataStart: number;
dataEnd: number;
nextFragment: Fragment | null;
isKnownToBeFirstFragment: boolean;
};
export class IsobmffDemuxer extends Demuxer {
metadataReader: IsobmffReader;
currentTrack: InternalTrack | null = null;
tracks: InternalTrack[] = [];
metadataPromise: Promise<void> | null = null;
movieTimescale = -1;
movieDurationInTimescale = -1;
isQuickTime = false;
isFragmented = false;
fragmentTrackDefaults: FragmentTrackDefaults[] = [];
fragments: Fragment[] = [];
currentFragment: Fragment | null = null;
fragmentLookupMutex = new AsyncMutex();
chunkReader: IsobmffReader;
constructor(input: Input) {
super(input);
this.metadataReader = new IsobmffReader(input._mainReader);
this.chunkReader = new IsobmffReader(new Reader(input.source, 64 * 2 ** 20)); // Max 64 MiB of stored chunks
}
override async computeDuration() {
const tracks = await this.getTracks();
const trackDurations = await Promise.all(tracks.map(x => x.computeDuration()));
return Math.max(0, ...trackDurations);
}
override async getTracks() {
await this.readMetadata();
return this.tracks.map(track => track.inputTrack!);
}
override async getMimeType() {
await this.readMetadata();
const codecStrings = await Promise.all(this.tracks.map(x => x.inputTrack!.getCodecParameterString()));
return buildIsobmffMimeType({
isQuickTime: this.isQuickTime,
hasVideo: this.tracks.some(x => x.info?.type === 'video'),
hasAudio: this.tracks.some(x => x.info?.type === 'audio'),
codecStrings: codecStrings.filter(Boolean) as string[],
});
}
readMetadata() {
return this.metadataPromise ??= (async () => {
const sourceSize = await this.metadataReader.reader.source.getSize();
while (this.metadataReader.pos < sourceSize) {
await this.metadataReader.reader.loadRange(
this.metadataReader.pos,
this.metadataReader.pos + MAX_BOX_HEADER_SIZE,
);
const startPos = this.metadataReader.pos;
const boxInfo = this.metadataReader.readBoxHeader();
if (boxInfo.name === 'ftyp') {
const majorBrand = this.metadataReader.readAscii(4);
this.isQuickTime = majorBrand === 'qt ';
} else if (boxInfo.name === 'moov') {
// Found moov, load it
await this.metadataReader.reader.loadRange(
this.metadataReader.pos,
this.metadataReader.pos + boxInfo.contentSize,
);
this.readContiguousBoxes(boxInfo.contentSize);
for (const track of this.tracks) {
// Modify the edit list offset based on the previous segment durations. They are in different
// timescales, so we first convert to seconds and then into the track timescale.
const previousSegmentDurationsInSeconds
= track.editListPreviousSegmentDurations / this.movieTimescale;
track.editListOffset -= Math.round(previousSegmentDurationsInSeconds * track.timescale);
}
break;
}
this.metadataReader.pos = startPos + boxInfo.totalSize;
}
if (this.isFragmented) {
// The last 4 bytes may contain the size of the mfra box at the end of the file
await this.metadataReader.reader.loadRange(sourceSize - 4, sourceSize);
this.metadataReader.pos = sourceSize - 4;
const lastWord = this.metadataReader.readU32();
const potentialMfraPos = sourceSize - lastWord;
if (potentialMfraPos >= 0 && potentialMfraPos < sourceSize) {
await this.metadataReader.reader.loadRange(potentialMfraPos, sourceSize);
this.metadataReader.pos = potentialMfraPos;
const boxInfo = this.metadataReader.readBoxHeader();
if (boxInfo.name === 'mfra') {
// We found the mfra box, allowing for much better random access. Let's parse it:
this.readContiguousBoxes(boxInfo.contentSize);
}
}
}
})();
}
getSampleTableForTrack(internalTrack: InternalTrack) {
if (internalTrack.sampleTable) {
return internalTrack.sampleTable;
}
const sampleTable: SampleTable = {
sampleTimingEntries: [],
sampleCompositionTimeOffsets: [],
sampleSizes: [],
keySampleIndices: null,
chunkOffsets: [],
sampleToChunk: [],
presentationTimestamps: null,
presentationTimestampIndexMap: null,
};
internalTrack.sampleTable = sampleTable;
this.metadataReader.pos = internalTrack.sampleTableByteOffset;
this.currentTrack = internalTrack;
this.traverseBox();
this.currentTrack = null;
const isPcmCodec = internalTrack.info?.type === 'audio'
&& internalTrack.info.codec
&& (PCM_AUDIO_CODECS as readonly string[]).includes(internalTrack.info.codec);
if (isPcmCodec && sampleTable.sampleCompositionTimeOffsets.length === 0) {
// If the audio has PCM samples, the way the samples are defined in the sample table is somewhat
// suboptimal: Each individual audio sample is its own sample, meaning we can have 48000 samples per second.
// Because we treat each sample as its own atomic unit that can be decoded, this would lead to a huge
// amount of very short samples for PCM audio. So instead, we make a transformation: If the audio is in PCM,
// we say that each chunk (that normally holds many samples) now is one big sample. We can this because
// the samples in the chunk are contiguous and the format is PCM, so the entire chunk as one thing still
// encodes valid audio information.
assert(internalTrack.info?.type === 'audio');
const pcmInfo = parsePcmCodec(internalTrack.info.codec as PcmAudioCodec);
const newSampleTimingEntries: SampleTimingEntry[] = [];
const newSampleSizes: number[] = [];
for (let i = 0; i < sampleTable.sampleToChunk.length; i++) {
const chunkEntry = sampleTable.sampleToChunk[i]!;
const nextEntry = sampleTable.sampleToChunk[i + 1];
const chunkCount = (nextEntry ? nextEntry.startChunkIndex : sampleTable.chunkOffsets.length)
- chunkEntry.startChunkIndex;
for (let j = 0; j < chunkCount; j++) {
const startSampleIndex = chunkEntry.startSampleIndex + j * chunkEntry.samplesPerChunk;
const endSampleIndex = startSampleIndex + chunkEntry.samplesPerChunk; // Exclusive, outside of chunk
const startTimingEntryIndex = binarySearchLessOrEqual(
sampleTable.sampleTimingEntries,
startSampleIndex,
x => x.startIndex,
);
const startTimingEntry = sampleTable.sampleTimingEntries[startTimingEntryIndex]!;
const endTimingEntryIndex = binarySearchLessOrEqual(
sampleTable.sampleTimingEntries,
endSampleIndex,
x => x.startIndex,
);
const endTimingEntry = sampleTable.sampleTimingEntries[endTimingEntryIndex]!;
const firstSampleTimestamp = startTimingEntry.startDecodeTimestamp
+ (startSampleIndex - startTimingEntry.startIndex) * startTimingEntry.delta;
const lastSampleTimestamp = endTimingEntry.startDecodeTimestamp
+ (endSampleIndex - endTimingEntry.startIndex) * endTimingEntry.delta;
const delta = lastSampleTimestamp - firstSampleTimestamp;
const lastSampleTimingEntry = last(newSampleTimingEntries);
if (lastSampleTimingEntry && lastSampleTimingEntry.delta === delta) {
lastSampleTimingEntry.count++;
} else {
// One sample for the entire chunk
newSampleTimingEntries.push({
startIndex: chunkEntry.startChunkIndex + j,
startDecodeTimestamp: firstSampleTimestamp,
count: 1,
delta,
});
}
// Instead of determining the chunk's size by looping over the samples sizes in the sample table, we
// can directly compute it as we know how many PCM frames are in this chunk, and the size of each
// PCM frame. This also improves compatibility with some files which fail to write proper sample
// size values into their sample tables in the PCM case.
const chunkSize = chunkEntry.samplesPerChunk
* pcmInfo.sampleSize
* internalTrack.info.numberOfChannels;
newSampleSizes.push(chunkSize);
}
chunkEntry.startSampleIndex = chunkEntry.startChunkIndex;
chunkEntry.samplesPerChunk = 1;
}
sampleTable.sampleTimingEntries = newSampleTimingEntries;
sampleTable.sampleSizes = newSampleSizes;
}
if (sampleTable.sampleCompositionTimeOffsets.length > 0) {
// If composition time offsets are defined, we must build a list of all presentation timestamps and then
// sort them
sampleTable.presentationTimestamps = [];
for (const entry of sampleTable.sampleTimingEntries) {
for (let i = 0; i < entry.count; i++) {
sampleTable.presentationTimestamps.push({
presentationTimestamp: entry.startDecodeTimestamp + i * entry.delta,
sampleIndex: entry.startIndex + i,
});
}
}
for (const entry of sampleTable.sampleCompositionTimeOffsets) {
for (let i = 0; i < entry.count; i++) {
const sampleIndex = entry.startIndex + i;
const sample = sampleTable.presentationTimestamps[sampleIndex];
if (!sample) {
continue;
}
sample.presentationTimestamp += entry.offset;
}
}
sampleTable.presentationTimestamps.sort((a, b) => a.presentationTimestamp - b.presentationTimestamp);
sampleTable.presentationTimestampIndexMap = Array(sampleTable.presentationTimestamps.length).fill(-1);
for (let i = 0; i < sampleTable.presentationTimestamps.length; i++) {
sampleTable.presentationTimestampIndexMap[sampleTable.presentationTimestamps[i]!.sampleIndex] = i;
}
} else {
// If they're not defined, we can simply use the decode timestamps as presentation timestamps
}
return sampleTable;
}
async readFragment(): Promise<Fragment> {
const startPos = this.metadataReader.pos;
await this.metadataReader.reader.loadRange(
this.metadataReader.pos,
this.metadataReader.pos + MAX_BOX_HEADER_SIZE,
);
const moofBoxInfo = this.metadataReader.readBoxHeader();
assert(moofBoxInfo.name === 'moof');
const contentStart = this.metadataReader.pos;
await this.metadataReader.reader.loadRange(contentStart, contentStart + moofBoxInfo.contentSize);
this.metadataReader.pos = startPos;
this.traverseBox();
const index = binarySearchExact(this.fragments, startPos, x => x.moofOffset);
assert(index !== -1);
const fragment = this.fragments[index]!;
assert(fragment.moofOffset === startPos);
// We have read everything in the moof box, there's no need to keep the data around anymore
// (keep the header tho)
this.metadataReader.reader.forgetRange(contentStart, contentStart + moofBoxInfo.contentSize);
// It may be that some tracks don't define the base decode time, i.e. when the fragment begins. This means the
// only other option is to sum up the duration of all previous fragments.
for (const [trackId, trackData] of fragment.trackData) {
if (trackData.startTimestampIsFinal) {
continue;
}
const internalTrack = this.tracks.find(x => x.id === trackId)!;
this.metadataReader.pos = 0;
let currentFragment: Fragment | null = null;
let lastFragment: Fragment | null = null;
const index = binarySearchLessOrEqual(
internalTrack.fragments,
startPos - 1,
x => x.moofOffset,
);
if (index !== -1) {
// Instead of starting at the start of the file, let's start at the previous fragment instead (which
// already has final timestamps).
currentFragment = internalTrack.fragments[index]!;
lastFragment = currentFragment;
this.metadataReader.pos = currentFragment.moofOffset + currentFragment.moofSize;
}
let nextFragmentIsFirstFragment = this.metadataReader.pos === 0;
while (this.metadataReader.pos < startPos) {
if (currentFragment?.nextFragment) {
currentFragment = currentFragment.nextFragment;
this.metadataReader.pos = currentFragment.moofOffset + currentFragment.moofSize;
} else {
await this.metadataReader.reader.loadRange(
this.metadataReader.pos,
this.metadataReader.pos + MAX_BOX_HEADER_SIZE,
);
const startPos = this.metadataReader.pos;
const boxInfo = this.metadataReader.readBoxHeader();
if (boxInfo.name === 'moof') {
const index = binarySearchExact(this.fragments, startPos, x => x.moofOffset);
let fragment: Fragment;
if (index === -1) {
this.metadataReader.pos = startPos;
fragment = await this.readFragment(); // Recursive call
} else {
// We already know this fragment
fragment = this.fragments[index]!;
}
// Even if we already know the fragment, we might not yet know its predecessor; always do this
if (currentFragment) currentFragment.nextFragment = fragment;
currentFragment = fragment;
if (nextFragmentIsFirstFragment) {
fragment.isKnownToBeFirstFragment = true;
nextFragmentIsFirstFragment = false;
}
}
this.metadataReader.pos = startPos + boxInfo.totalSize;
}
if (currentFragment && currentFragment.trackData.has(trackId)) {
lastFragment = currentFragment;
}
}
if (lastFragment) {
const otherTrackData = lastFragment.trackData.get(trackId)!;
assert(otherTrackData.startTimestampIsFinal);
offsetFragmentTrackDataByTimestamp(trackData, otherTrackData.endTimestamp);
}
trackData.startTimestampIsFinal = true;
}
return fragment;
}
readContiguousBoxes(totalSize: number) {
const startIndex = this.metadataReader.pos;
while (this.metadataReader.pos - startIndex <= totalSize - MIN_BOX_HEADER_SIZE) {
this.traverseBox();
}
}
traverseBox() {
const startPos = this.metadataReader.pos;
const boxInfo = this.metadataReader.readBoxHeader();
const boxEndPos = startPos + boxInfo.totalSize;
switch (boxInfo.name) {
case 'mdia':
case 'minf':
case 'dinf':
case 'mfra':
case 'edts': {
this.readContiguousBoxes(boxInfo.contentSize);
}; break;
case 'mvhd': {
const version = this.metadataReader.readU8();
this.metadataReader.pos += 3; // Flags
if (version === 1) {
this.metadataReader.pos += 8 + 8;
this.movieTimescale = this.metadataReader.readU32();
this.movieDurationInTimescale = this.metadataReader.readU64();
} else {
this.metadataReader.pos += 4 + 4;
this.movieTimescale = this.metadataReader.readU32();
this.movieDurationInTimescale = this.metadataReader.readU32();
}
}; break;
case 'trak': {
const track = {
id: -1,
demuxer: this,
inputTrack: null,
info: null,
timescale: -1,
durationInMovieTimescale: -1,
durationInMediaTimescale: -1,
rotation: 0,
languageCode: UNDETERMINED_LANGUAGE,
sampleTableByteOffset: -1,
sampleTable: null,
fragmentLookupTable: null,
currentFragmentState: null,
fragments: [],
fragmentsWithKeyFrame: [],
editListPreviousSegmentDurations: 0,
editListOffset: 0,
} satisfies InternalTrack as InternalTrack;
this.currentTrack = track;
this.readContiguousBoxes(boxInfo.contentSize);
if (track.id !== -1 && track.timescale !== -1 && track.info !== null) {
if (track.info.type === 'video' && track.info.width !== -1) {
const videoTrack = track as InternalVideoTrack;
track.inputTrack = new InputVideoTrack(new IsobmffVideoTrackBacking(videoTrack));
this.tracks.push(track);
} else if (track.info.type === 'audio' && track.info.numberOfChannels !== -1) {
const audioTrack = track as InternalAudioTrack;
track.inputTrack = new InputAudioTrack(new IsobmffAudioTrackBacking(audioTrack));
this.tracks.push(track);
}
}
this.currentTrack = null;
}; break;
case 'tkhd': {
const track = this.currentTrack;
assert(track);
const version = this.metadataReader.readU8();
const flags = this.metadataReader.readU24();
const trackEnabled = (flags & 0x1) !== 0;
if (!trackEnabled) {
break;
}
// Skip over creation & modification time to reach the track ID
if (version === 0) {
this.metadataReader.pos += 8;
track.id = this.metadataReader.readU32();
this.metadataReader.pos += 4;
track.durationInMovieTimescale = this.metadataReader.readU32();
} else if (version === 1) {
this.metadataReader.pos += 16;
track.id = this.metadataReader.readU32();
this.metadataReader.pos += 4;
track.durationInMovieTimescale = this.metadataReader.readU64();
} else {
throw new Error(`Incorrect track header version ${version}.`);
}
this.metadataReader.pos += 2 * 4 + 2 + 2 + 2 + 2;
const matrix: TransformationMatrix = [
this.metadataReader.readFixed_16_16(),
this.metadataReader.readFixed_16_16(),
this.metadataReader.readFixed_2_30(),
this.metadataReader.readFixed_16_16(),
this.metadataReader.readFixed_16_16(),
this.metadataReader.readFixed_2_30(),
this.metadataReader.readFixed_16_16(),
this.metadataReader.readFixed_16_16(),
this.metadataReader.readFixed_2_30(),
];
const rotation = normalizeRotation(roundToMultiple(extractRotationFromMatrix(matrix), 90));
assert(rotation === 0 || rotation === 90 || rotation === 180 || rotation === 270);
track.rotation = rotation;
}; break;
case 'elst': {
const track = this.currentTrack;
assert(track);
const version = this.metadataReader.readU8();
this.metadataReader.pos += 3; // Flags
let relevantEntryFound = false;
let previousSegmentDurations = 0;
const entryCount = this.metadataReader.readU32();
for (let i = 0; i < entryCount; i++) {
const segmentDuration = version === 1
? this.metadataReader.readU64()
: this.metadataReader.readU32();
const mediaTime = version === 1
? this.metadataReader.readI64()
: this.metadataReader.readI32();
const mediaRate = this.metadataReader.readFixed_16_16();
if (segmentDuration === 0) {
// Don't care
continue;
}
if (relevantEntryFound) {
console.warn(
'Unsupported edit list: multiple edits are not currently supported. Only using first edit.',
);
break;
}
if (mediaTime === -1) {
previousSegmentDurations += segmentDuration;
continue;
}
if (mediaRate !== 1) {
console.warn('Unsupported edit list entry: media rate must be 1.');
break;
}
track.editListPreviousSegmentDurations = previousSegmentDurations;
track.editListOffset = mediaTime;
relevantEntryFound = true;
}
}; break;
case 'mdhd': {
const track = this.currentTrack;
assert(track);
const version = this.metadataReader.readU8();
this.metadataReader.pos += 3; // Flags
if (version === 0) {
this.metadataReader.pos += 8;
track.timescale = this.metadataReader.readU32();
track.durationInMediaTimescale = this.metadataReader.readU32();
} else if (version === 1) {
this.metadataReader.pos += 16;
track.timescale = this.metadataReader.readU32();
track.durationInMediaTimescale = this.metadataReader.readU64();
}
let language = this.metadataReader.readU16();
if (language > 0) {
track.languageCode = '';
for (let i = 0; i < 3; i++) {
track.languageCode = String.fromCharCode(0x60 + (language & 0b11111)) + track.languageCode;
language >>= 5;
}
if (!isIso639Dash2LanguageCode(track.languageCode)) {
// Sometimes the bytes are garbage
track.languageCode = UNDETERMINED_LANGUAGE;
}
}
}; break;
case 'hdlr': {
const track = this.currentTrack;
assert(track);
this.metadataReader.pos += 8; // Version + flags + pre-defined
const handlerType = this.metadataReader.readAscii(4);
if (handlerType === 'vide') {
track.info = {
type: 'video',
width: -1,
height: -1,
codec: null,
codecDescription: null,
colorSpace: null,
avcCodecInfo: null,
hevcCodecInfo: null,
vp9CodecInfo: null,
av1CodecInfo: null,
};
} else if (handlerType === 'soun') {
track.info = {
type: 'audio',
numberOfChannels: -1,
sampleRate: -1,
codec: null,
codecDescription: null,
aacCodecInfo: null,
};
}
}; break;
case 'stbl': {
const track = this.currentTrack;
assert(track);
track.sampleTableByteOffset = startPos;
this.readContiguousBoxes(boxInfo.contentSize);
}; break;
case 'stsd': {
const track = this.currentTrack;
assert(track);
if (track.info === null || track.sampleTable) {
break;
}
const stsdVersion = this.metadataReader.readU8();
this.metadataReader.pos += 3; // Flags
const entries = this.metadataReader.readU32();
for (let i = 0; i < entries; i++) {
const startPos = this.metadataReader.pos;
const sampleBoxInfo = this.metadataReader.readBoxHeader();
const lowercaseBoxName = sampleBoxInfo.name.toLowerCase();
if (track.info.type === 'video') {
if (lowercaseBoxName === 'avc1') {
track.info.codec = 'avc';
} else if (lowercaseBoxName === 'hvc1' || lowercaseBoxName === 'hev1') {
track.info.codec = 'hevc';
} else if (lowercaseBoxName === 'vp08') {
track.info.codec = 'vp8';
} else if (lowercaseBoxName === 'vp09') {
track.info.codec = 'vp9';
} else if (lowercaseBoxName === 'av01') {
track.info.codec = 'av1';
} else {
console.warn(`Unsupported video codec (sample entry type '${sampleBoxInfo.name}').`);
}
this.metadataReader.pos += 6 * 1 + 2 + 2 + 2 + 3 * 4;
track.info.width = this.metadataReader.readU16();
track.info.height = this.metadataReader.readU16();
this.metadataReader.pos += 4 + 4 + 4 + 2 + 32 + 2 + 2;
this.readContiguousBoxes((startPos + sampleBoxInfo.totalSize) - this.metadataReader.pos);
} else {
if (lowercaseBoxName === 'mp4a') {
// We don't know the codec yet (might be AAC, might be MP3), need to read the esds box
} else if (lowercaseBoxName === 'opus') {
track.info.codec = 'opus';
} else if (lowercaseBoxName === 'flac') {
track.info.codec = 'flac';
} else if (
lowercaseBoxName === 'twos'
|| lowercaseBoxName === 'sowt'
|| lowercaseBoxName === 'raw '
|| lowercaseBoxName === 'in24'
|| lowercaseBoxName === 'in32'
|| lowercaseBoxName === 'fl32'
|| lowercaseBoxName === 'fl64'
|| lowercaseBoxName === 'lpcm'
|| lowercaseBoxName === 'ipcm' // ISO/IEC 23003-5
|| lowercaseBoxName === 'fpcm' // "
) {
// It's PCM
// developer.apple.com/documentation/quicktime-file-format/sound_sample_descriptions/
} else if (lowercaseBoxName === 'ulaw') {
track.info.codec = 'ulaw';
} else if (lowercaseBoxName === 'alaw') {
track.info.codec = 'alaw';
} else {
console.warn(`Unsupported audio codec (sample entry type '${sampleBoxInfo.name}').`);
}
this.metadataReader.pos += 6 * 1 + 2;
const version = this.metadataReader.readU16();
this.metadataReader.pos += 3 * 2;
let channelCount = this.metadataReader.readU16();
let sampleSize = this.metadataReader.readU16();
this.metadataReader.pos += 2 * 2;
// Can't use fixed16_16 as that's signed
let sampleRate = this.metadataReader.readU32() / 0x10000;
if (stsdVersion === 0 && version > 0) {
// Additional QuickTime fields
if (version === 1) {
this.metadataReader.pos += 4;
sampleSize = 8 * this.metadataReader.readU32();
this.metadataReader.pos += 2 * 4;
} else if (version === 2) {
this.metadataReader.pos += 4;
sampleRate = this.metadataReader.readF64();
channelCount = this.metadataReader.readU32();
this.metadataReader.pos += 4; // Always 0x7f000000
sampleSize = this.metadataReader.readU32();
const flags = this.metadataReader.readU32();
this.metadataReader.pos += 2 * 4;
if (lowercaseBoxName === 'lpcm') {
const bytesPerSample = (sampleSize + 7) >> 3;
const isFloat = Boolean(flags & 1);
const isBigEndian = Boolean(flags & 2);
const sFlags = flags & 4 ? -1 : 0; // I guess it means "signed flags" or something?
if (sampleSize > 0 && sampleSize <= 64) {
if (isFloat) {
if (sampleSize === 32) {
track.info.codec = isBigEndian ? 'pcm-f32be' : 'pcm-f32';
}
} else {
if (sFlags & (1 << (bytesPerSample - 1))) {
if (bytesPerSample === 1) {
track.info.codec = 'pcm-s8';
} else if (bytesPerSample === 2) {
track.info.codec = isBigEndian ? 'pcm-s16be' : 'pcm-s16';
} else if (bytesPerSample === 3) {
track.info.codec = isBigEndian ? 'pcm-s24be' : 'pcm-s24';
} else if (bytesPerSample === 4) {
track.info.codec = isBigEndian ? 'pcm-s32be' : 'pcm-s32';
}
} else {
if (bytesPerSample === 1) {
track.info.codec = 'pcm-u8';
}
}
}
}
if (track.info.codec === null) {
console.warn('Unsupported PCM format.');
}
}
}
}
track.info.numberOfChannels = channelCount;
track.info.sampleRate = sampleRate;
// PCM codec assignments
if (lowercaseBoxName === 'twos') {
if (sampleSize === 8) {
track.info.codec = 'pcm-s8';
} else if (sampleSize === 16) {
track.info.codec = 'pcm-s16be';
} else {
console.warn(`Unsupported sample size ${sampleSize} for codec 'twos'.`);
track.info.codec = null;
}
} else if (lowercaseBoxName === 'sowt') {
if (sampleSize === 8) {
track.info.codec = 'pcm-s8';
} else if (sampleSize === 16) {
track.info.codec = 'pcm-s16';
} else {
console.warn(`Unsupported sample size ${sampleSize} for codec 'sowt'.`);
track.info.codec = null;
}
} else if (lowercaseBoxName === 'raw ') {
track.info.codec = 'pcm-u8';
} else if (lowercaseBoxName === 'in24') {
track.info.codec = 'pcm-s24be';
} else if (lowercaseBoxName === 'in32') {
track.info.codec = 'pcm-s32be';
} else if (lowercaseBoxName === 'fl32') {
track.info.codec = 'pcm-f32be';
} else if (lowercaseBoxName === 'fl64') {
track.info.codec = 'pcm-f64be';
} else if (lowercaseBoxName === 'ipcm') {
track.info.codec = 'pcm-s16be'; // Placeholder, will be adjusted by the pcmC box
} else if (lowercaseBoxName === 'fpcm') {
track.info.codec = 'pcm-f32be'; // Placeholder, will be adjusted by the pcmC box
}
this.readContiguousBoxes((startPos + sampleBoxInfo.totalSize) - this.metadataReader.pos);
}
}
}; break;
case 'avcC': {
const track = this.currentTrack;
assert(track && track.info);
track.info.codecDescription = this.metadataReader.readBytes(boxInfo.contentSize);
}; break;
case 'hvcC': {
const track = this.currentTrack;
assert(track && track.info);
track.info.codecDescription = this.metadataReader.readBytes(boxInfo.contentSize);
}; break;
case 'vpcC': {
const track = this.currentTrack;
assert(track && track.info?.type === 'video');
this.metadataReader.pos += 4; // Version + flags
const profile = this.metadataReader.readU8();
const level = this.metadataReader.readU8();
const thirdByte = this.metadataReader.readU8();
const bitDepth = thirdByte >> 4;
const chromaSubsampling = (thirdByte >> 1) & 0b111;
const videoFullRangeFlag = thirdByte & 1;
const colourPrimaries = this.metadataReader.readU8();
const transferCharacteristics = this.metadataReader.readU8();
const matrixCoefficients = this.metadataReader.readU8();
track.info.vp9CodecInfo = {
profile,
level,
bitDepth,
chromaSubsampling,
videoFullRangeFlag,
colourPrimaries,
transferCharacteristics,
matrixCoefficients,
};
}; break;
case 'av1C': {
const track = this.currentTrack;
assert(track && track.info?.type === 'video');
this.metadataReader.pos += 1; // Marker + version
const secondByte = this.metadataReader.readU8();
const profile = secondByte >> 5;
const level = secondByte & 0b11111;
const thirdByte = this.metadataReader.readU8();
const tier = thirdByte >> 7;
const highBitDepth = (thirdByte >> 6) & 1;
const twelveBit = (thirdByte >> 5) & 1;
const monochrome = (thirdByte >> 4) & 1;
const chromaSubsamplingX = (thirdByte >> 3) & 1;
const chromaSubsamplingY = (thirdByte >> 2) & 1;
const chromaSamplePosition = thirdByte & 0b11;
// Logic from https://aomediacodec.github.io/av1-spec/av1-spec.pdf
const bitDepth = profile == 2 && highBitDepth ? (twelveBit ? 12 : 10) : (highBitDepth ? 10 : 8);
track.info.av1CodecInfo = {
profile,
level,
tier,
bitDepth,
monochrome,
chromaSubsamplingX,
chromaSubsamplingY,
chromaSamplePosition,
};
}; break;
case 'colr': {
const track = this.currentTrack;
assert(track && track.info?.type === 'video');
const colourType = this.metadataReader.readAscii(4);
if (colourType !== 'nclx') {
break;
}
const colourPrimaries = this.metadataReader.readU16();
const transferCharacteristics = this.metadataReader.readU16();
const matrixCoefficients = this.metadataReader.readU16();
const fullRangeFlag = Boolean(this.metadataReader.readU8() & 0x80);
track.info.colorSpace = {
primaries: COLOR_PRIMARIES_MAP_INVERSE[colourPrimaries],
transfer: TRANSFER_CHARACTERISTICS_MAP_INVERSE[transferCharacteristics],
matrix: MATRIX_COEFFICIENTS_MAP_INVERSE[matrixCoefficients],
fullRange: fullRangeFlag,
} as VideoColorSpaceInit;
}; break;
case 'wave': {
this.readContiguousBoxes(boxInfo.contentSize);
}; break;
case 'esds': {
const track = this.currentTrack;
assert(track && track.info?.type === 'audio');
this.metadataReader.pos += 4; // Version + flags
const tag = this.metadataReader.readU8();
assert(tag === 0x03); // ES Descriptor
this.metadataReader.readIsomVariableInteger(); // Length
this.metadataReader.pos += 2; // ES ID
const mixed = this.metadataReader.readU8();
const streamDependenceFlag = (mixed & 0x80) !== 0;
const urlFlag = (mixed & 0x40) !== 0;
const ocrStreamFlag = (mixed & 0x20) !== 0;
if (streamDependenceFlag) {
this.metadataReader.pos += 2;
}
if (urlFlag) {
const urlLength = this.metadataReader.readU8();
this.metadataReader.pos += urlLength;
}
if (ocrStreamFlag) {
this.metadataReader.pos += 2;
}
const decoderConfigTag = this.metadataReader.readU8();
assert(decoderConfigTag === 0x04); // DecoderConfigDescriptor
const decoderConfigDescriptorLength = this.metadataReader.readIsomVariableInteger(); // Length
const payloadStart = this.metadataReader.pos;
const objectTypeIndication = this.metadataReader.readU8();
if (objectTypeIndication === 0x40 || objectTypeIndication === 0x67) {
track.info.codec = 'aac';
track.info.aacCodecInfo = { isMpeg2: objectTypeIndication === 0x67 };
} else if (objectTypeIndication === 0x69 || objectTypeIndication === 0x6b) {
track.info.codec = 'mp3';
} else if (objectTypeIndication === 0xdd) {
track.info.codec = 'vorbis'; // "nonstandard, gpac uses it" - FFmpeg
} else {
console.warn(
`Unsupported audio codec (objectTypeIndication ${objectTypeIndication}) - discarding track.`,
);
}
this.metadataReader.pos += 1 + 3 + 4 + 4;
if (decoderConfigDescriptorLength > this.metadataReader.pos - payloadStart) {
// There's a DecoderSpecificInfo at the end, let's read it
const decoderSpecificInfoTag = this.metadataReader.readU8();
assert(decoderSpecificInfoTag === 0x05); // DecoderSpecificInfo
const decoderSpecificInfoLength = this.metadataReader.readIsomVariableInteger();
track.info.codecDescription = this.metadataReader.readBytes(decoderSpecificInfoLength);
if (track.info.codec === 'aac') {
// Let's try to deduce more accurate values directly from the AudioSpecificConfig:
const audioSpecificConfig = parseAacAudioSpecificConfig(track.info.codecDescription);
if (audioSpecificConfig.numberOfChannels !== null) {
track.info.numberOfChannels = audioSpecificConfig.numberOfChannels;
}
if (audioSpecificConfig.sampleRate !== null) {
track.info.sampleRate = audioSpecificConfig.sampleRate;
}
}
}
}; break;
case 'enda': {
const track = this.currentTrack;
assert(track && track.info?.type === 'audio');
const littleEndian = this.metadataReader.readU16() & 0xff; // 0xff is from FFmpeg
if (littleEndian) {
if (track.info.codec === 'pcm-s16be') {
track.info.codec = 'pcm-s16';
} else if (track.info.codec === 'pcm-s24be') {
track.info.codec = 'pcm-s24';
} else if (track.info.codec === 'pcm-s32be') {
track.info.codec = 'pcm-s32';
} else if (track.info.codec === 'pcm-f32be') {
track.info.codec = 'pcm-f32';
} else if (track.info.codec === 'pcm-f64be') {
track.info.codec = 'pcm-f64';
}
}
}; break;
case 'pcmC': {
const track = this.currentTrack;
assert(track && track.info?.type === 'audio');
this.metadataReader.pos += 1 + 3; // Version + flags
// ISO/IEC 23003-5
const formatFlags = this.metadataReader.readU8();
const isLittleEndian = Boolean(formatFlags & 0x01);
const pcmSampleSize = this.metadataReader.readU8();
if (track.info.codec === 'pcm-s16be') {
// ipcm
if (isLittleEndian) {
if (pcmSampleSize === 16) {
track.info.codec = 'pcm-s16';
} else if (pcmSampleSize === 24) {
track.info.codec = 'pcm-s24';
} else if (pcmSampleSize === 32) {
track.info.codec = 'pcm-s32';
} else {
console.warn(`Invalid ipcm sample size ${pcmSampleSize}.`);
track.info.codec = null;
}
} else {
if (pcmSampleSize === 16) {
track.info.codec = 'pcm-s16be';
} else if (pcmSampleSize === 24) {
track.info.codec = 'pcm-s24be';
} else if (pcmSampleSize === 32) {
track.info.codec = 'pcm-s32be';
} else {
console.warn(`Invalid ipcm sample size ${pcmSampleSize}.`);
track.info.codec = null;
}
}
} else if (track.info.codec === 'pcm-f32be') {
// fpcm
if (isLittleEndian) {
if (pcmSampleSize === 32) {
track.info.codec = 'pcm-f32';
} else if (pcmSampleSize === 64) {
track.info.codec = 'pcm-f64';
} else {
console.warn(`Invalid fpcm sample size ${pcmSampleSize}.`);
track.info.codec = null;
}
} else {
if (pcmSampleSize === 32) {
track.info.codec = 'pcm-f32be';
} else if (pcmSampleSize === 64) {
track.info.codec = 'pcm-f64be';
} else {
console.warn(`Invalid fpcm sample size ${pcmSampleSize}.`);
track.info.codec = null;
}
}
}
break;
};
case 'dOps': { // Used for Opus audio
const track = this.currentTrack;
assert(track && track.info?.type === 'audio');
this.metadataReader.pos += 1; // Version
// https://www.opus-codec.org/docs/opus_in_isobmff.html
const outputChannelCount = this.metadataReader.readU8();
const preSkip = this.metadataReader.readU16();
const inputSampleRate = this.metadataReader.readU32();
const outputGain = this.metadataReader.readI16();
const channelMappingFamily = this.metadataReader.readU8();
let channelMappingTable: Uint8Array;
if (channelMappingFamily !== 0) {
channelMappingTable = this.metadataReader.readBytes(2 + outputChannelCount);
} else {
channelMappingTable = new Uint8Array(0);
}
// https://datatracker.ietf.org/doc/html/draft-ietf-codec-oggopus-06
const description = new Uint8Array(8 + 1 + 1 + 2 + 4 + 2 + 1 + channelMappingTable.byteLength);
const view = new DataView(description.buffer);
view.setUint32(0, 0x4f707573, false); // 'Opus'
view.setUint32(4, 0x48656164, false); // 'Head'
view.setUint8(8, 1); // Version
view.setUint8(9, outputChannelCount);
view.setUint16(10, preSkip, true);
view.setUint32(12, inputSampleRate, true);
view.setInt16(16, outputGain, true);
view.setUint8(18, channelMappingFamily);
description.set(channelMappingTable, 19);
track.info.codecDescription = description;
track.info.numberOfChannels = outputChannelCount;
track.info.sampleRate = inputSampleRate;
}; break;
case 'dfLa': { // Used for FLAC audio
const track = this.currentTrack;
assert(track && track.info?.type === 'audio');
this.metadataReader.pos += 4; // Version + flags
// https://datatracker.ietf.org/doc/rfc9639/
const BLOCK_TYPE_MASK = 0x7f;
const LAST_METADATA_BLOCK_FLAG_MASK = 0x80;
const startPos = this.metadataReader.pos;
while (this.metadataReader.pos < boxEndPos) {
const flagAndType = this.metadataReader.readU8();
const metadataBlockLength = this.metadataReader.readU24();
const type = flagAndType & BLOCK_TYPE_MASK;
// It's a STREAMINFO block; let's extract the actual sample rate and channel count
if (type === 0) {
this.metadataReader.pos += 10;
// Extract sample rate
const word = this.metadataReader.readU32();
const sampleRate = word >>> 12;
const numberOfChannels = ((word >> 9) & 0b111) + 1;
track.info.sampleRate = sampleRate;
track.info.numberOfChannels = numberOfChannels;
this.metadataReader.pos += 20;
} else {
// Simply skip ahead to the next block
this.metadataReader.pos += metadataBlockLength;
}
if (flagAndType & LAST_METADATA_BLOCK_FLAG_MASK) {
break;
}
}
const endPos = this.metadataReader.pos;
this.metadataReader.pos = startPos;
const bytes = this.metadataReader.readBytes(endPos - startPos);
const description = new Uint8Array(4 + bytes.byteLength);
const view = new DataView(description.buffer);
view.setUint32(0, 0x664c6143, false); // 'fLaC'
description.set(bytes, 4);
// Set the codec description to be 'fLaC' + all metadata blocks
track.info.codecDescription = description;
}; break;
case 'stts': {
const track = this.currentTrack;
assert(track);
if (!track.sampleTable) {
break;
}
this.metadataReader.pos += 4; // Version + flags
const entryCount = this.metadataReader.readU32();
let currentIndex = 0;
let currentTimestamp = 0;
for (let i = 0; i < entryCount; i++) {
const sampleCount = this.metadataReader.readU32();
const sampleDelta = this.metadataReader.readU32();
track.sampleTable.sampleTimingEntries.push({
startIndex: currentIndex,
startDecodeTimestamp: currentTimestamp,
count: sampleCount,
delta: sampleDelta,
});
currentIndex += sampleCount;
currentTimestamp += sampleCount * sampleDelta;
}
}; break;
case 'ctts': {
const track = this.currentTrack;
assert(track);
if (!track.sampleTable) {
break;
}
this.metadataReader.pos += 1 + 3; // Version + flags
const entryCount = this.metadataReader.readU32();
let sampleIndex = 0;
for (let i = 0; i < entryCount; i++) {
const sampleCount = this.metadataReader.readU32();
const sampleOffset = this.metadataReader.readI32();
track.sampleTable.sampleCompositionTimeOffsets.push({
startIndex: sampleIndex,
count: sampleCount,
offset: sampleOffset,
});
sampleIndex += sampleCount;
}
}; break;
case 'stsz': {
const track = this.currentTrack;
assert(track);
if (!track.sampleTable) {
break;
}
this.metadataReader.pos += 4; // Version + flags
const sampleSize = this.metadataReader.readU32();
const sampleCount = this.metadataReader.readU32();
if (sampleSize === 0) {
for (let i = 0; i < sampleCount; i++) {
const sampleSize = this.metadataReader.readU32();
track.sampleTable.sampleSizes.push(sampleSize);
}
} else {
track.sampleTable.sampleSizes.push(sampleSize);
}
}; break;
case 'stz2': {
const track = this.currentTrack;
assert(track);
if (!track.sampleTable) {
break;
}
this.metadataReader.pos += 4; // Version + flags
this.metadataReader.pos += 3; // Reserved
const fieldSize = this.metadataReader.readU8(); // in bits
const sampleCount = this.metadataReader.readU32();
const bytes = this.metadataReader.readBytes(Math.ceil(sampleCount * fieldSize / 8));
const bitstream = new Bitstream(bytes);
for (let i = 0; i < sampleCount; i++) {
const sampleSize = bitstream.readBits(fieldSize);
track.sampleTable.sampleSizes.push(sampleSize);
}
}; break;
case 'stss': {
const track = this.currentTrack;
assert(track);
if (!track.sampleTable) {
break;
}
this.metadataReader.pos += 4; // Version + flags
track.sampleTable.keySampleIndices = [];
const entryCount = this.metadataReader.readU32();
for (let i = 0; i < entryCount; i++) {
const sampleIndex = this.metadataReader.readU32() - 1; // Convert to 0-indexed
track.sampleTable.keySampleIndices.push(sampleIndex);
}
if (track.sampleTable.keySampleIndices[0] !== 0) {
// Some files don't mark the first sample a key sample, which is basically almost always incorrect.
// Here, we correct for that mistake:
track.sampleTable.keySampleIndices.unshift(0);
}
}; break;
case 'stsc': {
const track = this.currentTrack;
assert(track);
if (!track.sampleTable) {
break;
}
this.metadataReader.pos += 4;
const entryCount = this.metadataReader.readU32();
for (let i = 0; i < entryCount; i++) {
const startChunkIndex = this.metadataReader.readU32() - 1; // Convert to 0-indexed
const samplesPerChunk = this.metadataReader.readU32();
const sampleDescriptionIndex = this.metadataReader.readU32();
track.sampleTable.sampleToChunk.push({
startSampleIndex: -1,
startChunkIndex,
samplesPerChunk,
sampleDescriptionIndex,
});
}
let startSampleIndex = 0;
for (let i = 0; i < track.sampleTable.sampleToChunk.length; i++) {
track.sampleTable.sampleToChunk[i]!.startSampleIndex = startSampleIndex;
if (i < track.sampleTable.sampleToChunk.length - 1) {
const nextChunk = track.sampleTable.sampleToChunk[i + 1]!;
const chunkCount = nextChunk.startChunkIndex
- track.sampleTable.sampleToChunk[i]!.startChunkIndex;
startSampleIndex += chunkCount * track.sampleTable.sampleToChunk[i]!.samplesPerChunk;
}
}
}; break;
case 'stco': {
const track = this.currentTrack;
assert(track);
if (!track.sampleTable) {
break;
}
this.metadataReader.pos += 4; // Version + flags
const entryCount = this.metadataReader.readU32();
for (let i = 0; i < entryCount; i++) {
const chunkOffset = this.metadataReader.readU32();
track.sampleTable.chunkOffsets.push(chunkOffset);
}
}; break;
case 'co64': {
const track = this.currentTrack;
assert(track);
if (!track.sampleTable) {
break;
}
this.metadataReader.pos += 4; // Version + flags
const entryCount = this.metadataReader.readU32();
for (let i = 0; i < entryCount; i++) {
const chunkOffset = this.metadataReader.readU64();
track.sampleTable.chunkOffsets.push(chunkOffset);
}
}; break;
case 'mvex': {
this.isFragmented = true;
this.readContiguousBoxes(boxInfo.contentSize);
}; break;
case 'mehd': {
const version = this.metadataReader.readU8();
this.metadataReader.pos += 3; // Flags
const fragmentDuration = version === 1 ? this.metadataReader.readU64() : this.metadataReader.readU32();
this.movieDurationInTimescale = fragmentDuration;
}; break;
case 'trex': {
this.metadataReader.pos += 4; // Version + flags
const trackId = this.metadataReader.readU32();
const defaultSampleDescriptionIndex = this.metadataReader.readU32();
const defaultSampleDuration = this.metadataReader.readU32();
const defaultSampleSize = this.metadataReader.readU32();
const defaultSampleFlags = this.metadataReader.readU32();
// We store these separately rather than in the tracks since the tracks may not exist yet
this.fragmentTrackDefaults.push({
trackId,
defaultSampleDescriptionIndex,
defaultSampleDuration,
defaultSampleSize,
defaultSampleFlags,
});
}; break;
case 'tfra': {
const version = this.metadataReader.readU8();
this.metadataReader.pos += 3; // Flags
const tra