UNPKG

mediabunny

Version:

Pure TypeScript media toolkit for reading, writing, and converting media files, directly in the browser.

1,114 lines 107 kB
/*! * Copyright (c) 2026-present, Vanilagy and contributors * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ import { AVC_LEVEL_TABLE, VP9_LEVEL_TABLE } from './codec.js'; import { assert, assertNever, base64ToBytes, bytesToBase64, keyValueIterator, getUint24, last, readExpGolomb, readSignedExpGolomb, textDecoder, textEncoder, toDataView, toUint8Array, getChromiumVersion, isChromium, setUint24, } from './misc.js'; import { AC3_SAMPLE_RATES, EAC3_REDUCED_SAMPLE_RATES } from '../shared/ac3-misc.js'; import { Bitstream } from '../shared/bitstream.js'; // References for AVC/HEVC code: // ISO 14496-15 // Rec. ITU-T H.264 // Rec. ITU-T H.265 // https://stackoverflow.com/questions/24884827 export var AvcNalUnitType; (function (AvcNalUnitType) { AvcNalUnitType[AvcNalUnitType["NON_IDR_SLICE"] = 1] = "NON_IDR_SLICE"; AvcNalUnitType[AvcNalUnitType["SLICE_DPA"] = 2] = "SLICE_DPA"; AvcNalUnitType[AvcNalUnitType["SLICE_DPB"] = 3] = "SLICE_DPB"; AvcNalUnitType[AvcNalUnitType["SLICE_DPC"] = 4] = "SLICE_DPC"; AvcNalUnitType[AvcNalUnitType["IDR"] = 5] = "IDR"; AvcNalUnitType[AvcNalUnitType["SEI"] = 6] = "SEI"; AvcNalUnitType[AvcNalUnitType["SPS"] = 7] = "SPS"; AvcNalUnitType[AvcNalUnitType["PPS"] = 8] = "PPS"; AvcNalUnitType[AvcNalUnitType["AUD"] = 9] = "AUD"; AvcNalUnitType[AvcNalUnitType["SPS_EXT"] = 13] = "SPS_EXT"; })(AvcNalUnitType || (AvcNalUnitType = {})); export var HevcNalUnitType; (function (HevcNalUnitType) { HevcNalUnitType[HevcNalUnitType["RASL_N"] = 8] = "RASL_N"; HevcNalUnitType[HevcNalUnitType["RASL_R"] = 9] = "RASL_R"; HevcNalUnitType[HevcNalUnitType["BLA_W_LP"] = 16] = "BLA_W_LP"; HevcNalUnitType[HevcNalUnitType["RSV_IRAP_VCL23"] = 23] = "RSV_IRAP_VCL23"; HevcNalUnitType[HevcNalUnitType["VPS_NUT"] = 32] = "VPS_NUT"; HevcNalUnitType[HevcNalUnitType["SPS_NUT"] = 33] = "SPS_NUT"; HevcNalUnitType[HevcNalUnitType["PPS_NUT"] = 34] = "PPS_NUT"; HevcNalUnitType[HevcNalUnitType["AUD_NUT"] = 35] = "AUD_NUT"; HevcNalUnitType[HevcNalUnitType["PREFIX_SEI_NUT"] = 39] = "PREFIX_SEI_NUT"; HevcNalUnitType[HevcNalUnitType["SUFFIX_SEI_NUT"] = 40] = "SUFFIX_SEI_NUT"; })(HevcNalUnitType || (HevcNalUnitType = {})); export const iterateNalUnitsInAnnexB = function* (packetData) { let i = 0; let nalStart = -1; while (i < packetData.length - 2) { const zeroIndex = packetData.indexOf(0, i); if (zeroIndex === -1 || zeroIndex >= packetData.length - 2) { break; } i = zeroIndex; let startCodeLength = 0; // Check for 4-byte start code (0x00000001) if (i + 3 < packetData.length && packetData[i + 1] === 0 && packetData[i + 2] === 0 && packetData[i + 3] === 1) { startCodeLength = 4; } else if (packetData[i + 1] === 0 && packetData[i + 2] === 1) { // Check for 3-byte start code (0x000001) startCodeLength = 3; } if (startCodeLength === 0) { i++; continue; } // If we had a previous NAL unit, yield it if (nalStart !== -1 && i > nalStart) { yield { offset: nalStart, length: i - nalStart, }; } nalStart = i + startCodeLength; i = nalStart; } // Yield the last NAL unit if there is one if (nalStart !== -1 && nalStart < packetData.length) { yield { offset: nalStart, length: packetData.length - nalStart, }; } }; export const iterateNalUnitsInLengthPrefixed = function* (packetData, lengthSize) { let offset = 0; const dataView = new DataView(packetData.buffer, packetData.byteOffset, packetData.byteLength); while (offset + lengthSize <= packetData.length) { let nalUnitLength; if (lengthSize === 1) { nalUnitLength = dataView.getUint8(offset); } else if (lengthSize === 2) { nalUnitLength = dataView.getUint16(offset, false); } else if (lengthSize === 3) { nalUnitLength = getUint24(dataView, offset, false); } else { assert(lengthSize === 4); nalUnitLength = dataView.getUint32(offset, false); } offset += lengthSize; yield { offset, length: nalUnitLength, }; offset += nalUnitLength; } }; export const iterateAvcNalUnits = (packetData, decoderConfig) => { if (decoderConfig.description) { const bytes = toUint8Array(decoderConfig.description); const lengthSizeMinusOne = bytes[4] & 0b11; const lengthSize = (lengthSizeMinusOne + 1); return iterateNalUnitsInLengthPrefixed(packetData, lengthSize); } else { return iterateNalUnitsInAnnexB(packetData); } }; export const extractNalUnitTypeForAvc = (byte) => { return byte & 0x1F; }; const removeEmulationPreventionBytes = (data) => { const result = []; const len = data.length; for (let i = 0; i < len; i++) { // Look for the 0x000003 pattern if (i + 2 < len && data[i] === 0x00 && data[i + 1] === 0x00 && data[i + 2] === 0x03) { result.push(0x00, 0x00); // Push the first two bytes i += 2; // Skip the 0x03 byte } else { result.push(data[i]); } } return new Uint8Array(result); }; const ANNEX_B_START_CODE = new Uint8Array([0, 0, 0, 1]); export const concatNalUnitsInAnnexB = (nalUnits) => { const totalLength = nalUnits.reduce((a, b) => a + ANNEX_B_START_CODE.byteLength + b.byteLength, 0); const result = new Uint8Array(totalLength); let offset = 0; for (const nalUnit of nalUnits) { result.set(ANNEX_B_START_CODE, offset); offset += ANNEX_B_START_CODE.byteLength; result.set(nalUnit, offset); offset += nalUnit.byteLength; } return result; }; export const concatNalUnitsInLengthPrefixed = (nalUnits, lengthSize) => { const totalLength = nalUnits.reduce((a, b) => a + lengthSize + b.byteLength, 0); const result = new Uint8Array(totalLength); let offset = 0; for (const nalUnit of nalUnits) { const dataView = new DataView(result.buffer, result.byteOffset, result.byteLength); switch (lengthSize) { case 1: dataView.setUint8(offset, nalUnit.byteLength); break; case 2: dataView.setUint16(offset, nalUnit.byteLength, false); break; case 3: setUint24(dataView, offset, nalUnit.byteLength, false); break; case 4: dataView.setUint32(offset, nalUnit.byteLength, false); break; } offset += lengthSize; result.set(nalUnit, offset); offset += nalUnit.byteLength; } return result; }; export const concatAvcNalUnits = (nalUnits, decoderConfig) => { if (decoderConfig.description) { // Stream is length-prefixed. Let's extract the size of the length prefix from the decoder config const bytes = toUint8Array(decoderConfig.description); const lengthSizeMinusOne = bytes[4] & 0b11; const lengthSize = (lengthSizeMinusOne + 1); return concatNalUnitsInLengthPrefixed(nalUnits, lengthSize); } else { // Stream is in Annex B format return concatNalUnitsInAnnexB(nalUnits); } }; /** Builds an AvcDecoderConfigurationRecord from an AVC packet in Annex B format. */ export const extractAvcDecoderConfigurationRecord = (packetData) => { try { const spsUnits = []; const ppsUnits = []; const spsExtUnits = []; for (const loc of iterateNalUnitsInAnnexB(packetData)) { const nalUnit = packetData.subarray(loc.offset, loc.offset + loc.length); const type = extractNalUnitTypeForAvc(nalUnit[0]); if (type === AvcNalUnitType.SPS) { spsUnits.push(nalUnit); } else if (type === AvcNalUnitType.PPS) { ppsUnits.push(nalUnit); } else if (type === AvcNalUnitType.SPS_EXT) { spsExtUnits.push(nalUnit); } } if (spsUnits.length === 0) { return null; } if (ppsUnits.length === 0) { return null; } // Let's get the first SPS for profile and level information const spsData = spsUnits[0]; const spsInfo = parseAvcSps(spsData); assert(spsInfo !== null); const hasExtendedData = spsInfo.profileIdc === 100 || spsInfo.profileIdc === 110 || spsInfo.profileIdc === 122 || spsInfo.profileIdc === 144; return { configurationVersion: 1, avcProfileIndication: spsInfo.profileIdc, profileCompatibility: spsInfo.constraintFlags, avcLevelIndication: spsInfo.levelIdc, lengthSizeMinusOne: 3, // Typically 4 bytes for length field sequenceParameterSets: spsUnits, pictureParameterSets: ppsUnits, chromaFormat: hasExtendedData ? spsInfo.chromaFormatIdc : null, bitDepthLumaMinus8: hasExtendedData ? spsInfo.bitDepthLumaMinus8 : null, bitDepthChromaMinus8: hasExtendedData ? spsInfo.bitDepthChromaMinus8 : null, sequenceParameterSetExt: hasExtendedData ? spsExtUnits : null, }; } catch (error) { console.error('Error building AVC Decoder Configuration Record:', error); return null; } }; /** Serializes an AvcDecoderConfigurationRecord into the format specified in Section 5.3.3.1 of ISO 14496-15. */ export const serializeAvcDecoderConfigurationRecord = (record) => { const bytes = []; // Write header bytes.push(record.configurationVersion); bytes.push(record.avcProfileIndication); bytes.push(record.profileCompatibility); bytes.push(record.avcLevelIndication); bytes.push(0xFC | (record.lengthSizeMinusOne & 0x03)); // Reserved bits (6) + lengthSizeMinusOne (2) // Reserved bits (3) + numOfSequenceParameterSets (5) bytes.push(0xE0 | (record.sequenceParameterSets.length & 0x1F)); // Write SPS for (const sps of record.sequenceParameterSets) { const length = sps.byteLength; bytes.push(length >> 8); // High byte bytes.push(length & 0xFF); // Low byte for (let i = 0; i < length; i++) { bytes.push(sps[i]); } } bytes.push(record.pictureParameterSets.length); // Write PPS for (const pps of record.pictureParameterSets) { const length = pps.byteLength; bytes.push(length >> 8); // High byte bytes.push(length & 0xFF); // Low byte for (let i = 0; i < length; i++) { bytes.push(pps[i]); } } if (record.avcProfileIndication === 100 || record.avcProfileIndication === 110 || record.avcProfileIndication === 122 || record.avcProfileIndication === 144) { assert(record.chromaFormat !== null); assert(record.bitDepthLumaMinus8 !== null); assert(record.bitDepthChromaMinus8 !== null); assert(record.sequenceParameterSetExt !== null); bytes.push(0xFC | (record.chromaFormat & 0x03)); // Reserved bits + chroma_format bytes.push(0xF8 | (record.bitDepthLumaMinus8 & 0x07)); // Reserved bits + bit_depth_luma_minus8 bytes.push(0xF8 | (record.bitDepthChromaMinus8 & 0x07)); // Reserved bits + bit_depth_chroma_minus8 bytes.push(record.sequenceParameterSetExt.length); // Write SPS Ext for (const spsExt of record.sequenceParameterSetExt) { const length = spsExt.byteLength; bytes.push(length >> 8); // High byte bytes.push(length & 0xFF); // Low byte for (let i = 0; i < length; i++) { bytes.push(spsExt[i]); } } } return new Uint8Array(bytes); }; /** Deserializes an AvcDecoderConfigurationRecord from the format specified in Section 5.3.3.1 of ISO 14496-15. */ export const deserializeAvcDecoderConfigurationRecord = (data) => { try { const view = toDataView(data); let offset = 0; // Read header const configurationVersion = view.getUint8(offset++); const avcProfileIndication = view.getUint8(offset++); const profileCompatibility = view.getUint8(offset++); const avcLevelIndication = view.getUint8(offset++); const lengthSizeMinusOne = view.getUint8(offset++) & 0x03; const numOfSequenceParameterSets = view.getUint8(offset++) & 0x1F; // Read SPS const sequenceParameterSets = []; for (let i = 0; i < numOfSequenceParameterSets; i++) { const length = view.getUint16(offset, false); offset += 2; sequenceParameterSets.push(data.subarray(offset, offset + length)); offset += length; } const numOfPictureParameterSets = view.getUint8(offset++); // Read PPS const pictureParameterSets = []; for (let i = 0; i < numOfPictureParameterSets; i++) { const length = view.getUint16(offset, false); offset += 2; pictureParameterSets.push(data.subarray(offset, offset + length)); offset += length; } const record = { configurationVersion, avcProfileIndication, profileCompatibility, avcLevelIndication, lengthSizeMinusOne, sequenceParameterSets, pictureParameterSets, chromaFormat: null, bitDepthLumaMinus8: null, bitDepthChromaMinus8: null, sequenceParameterSetExt: null, }; // Check if there are extended profile fields if ((avcProfileIndication === 100 || avcProfileIndication === 110 || avcProfileIndication === 122 || avcProfileIndication === 144) && offset + 4 <= data.length) { const chromaFormat = view.getUint8(offset++) & 0x03; const bitDepthLumaMinus8 = view.getUint8(offset++) & 0x07; const bitDepthChromaMinus8 = view.getUint8(offset++) & 0x07; const numOfSequenceParameterSetExt = view.getUint8(offset++); record.chromaFormat = chromaFormat; record.bitDepthLumaMinus8 = bitDepthLumaMinus8; record.bitDepthChromaMinus8 = bitDepthChromaMinus8; // Read SPS Ext const sequenceParameterSetExt = []; for (let i = 0; i < numOfSequenceParameterSetExt; i++) { const length = view.getUint16(offset, false); offset += 2; sequenceParameterSetExt.push(data.subarray(offset, offset + length)); offset += length; } record.sequenceParameterSetExt = sequenceParameterSetExt; } return record; } catch (error) { console.error('Error deserializing AVC Decoder Configuration Record:', error); return null; } }; const AVC_HEVC_ASPECT_RATIO_IDC_TABLE = { 1: { num: 1, den: 1 }, 2: { num: 12, den: 11 }, 3: { num: 10, den: 11 }, 4: { num: 16, den: 11 }, 5: { num: 40, den: 33 }, 6: { num: 24, den: 11 }, 7: { num: 20, den: 11 }, 8: { num: 32, den: 11 }, 9: { num: 80, den: 33 }, 10: { num: 18, den: 11 }, 11: { num: 15, den: 11 }, 12: { num: 64, den: 33 }, 13: { num: 160, den: 99 }, 14: { num: 4, den: 3 }, 15: { num: 3, den: 2 }, 16: { num: 2, den: 1 }, }; /** Parses an AVC SPS (Sequence Parameter Set) to extract basic information. */ export const parseAvcSps = (sps) => { try { const bitstream = new Bitstream(removeEmulationPreventionBytes(sps)); bitstream.skipBits(1); // forbidden_zero_bit bitstream.skipBits(2); // nal_ref_idc const nalUnitType = bitstream.readBits(5); if (nalUnitType !== 7) { // SPS NAL unit type is 7 return null; } const profileIdc = bitstream.readAlignedByte(); const constraintFlags = bitstream.readAlignedByte(); const levelIdc = bitstream.readAlignedByte(); readExpGolomb(bitstream); // seq_parameter_set_id // "When chroma_format_idc is not present, it shall be inferred to be equal to 1 (4:2:0 chroma format)." let chromaFormatIdc = 1; // "When bit_depth_luma_minus8 is not present, it shall be inferred to be equal to 0."" let bitDepthLumaMinus8 = 0; // "When bit_depth_chroma_minus8 is not present, it shall be inferred to be equal to 0." let bitDepthChromaMinus8 = 0; // "When separate_colour_plane_flag is not present, it shall be inferred to be equal to 0." let separateColourPlaneFlag = 0; // Handle high profile chroma_format_idc if (profileIdc === 100 || profileIdc === 110 || profileIdc === 122 || profileIdc === 244 || profileIdc === 44 || profileIdc === 83 || profileIdc === 86 || profileIdc === 118 || profileIdc === 128) { chromaFormatIdc = readExpGolomb(bitstream); if (chromaFormatIdc === 3) { separateColourPlaneFlag = bitstream.readBits(1); } bitDepthLumaMinus8 = readExpGolomb(bitstream); bitDepthChromaMinus8 = readExpGolomb(bitstream); bitstream.skipBits(1); // qpprime_y_zero_transform_bypass_flag const seqScalingMatrixPresentFlag = bitstream.readBits(1); if (seqScalingMatrixPresentFlag) { for (let i = 0; i < (chromaFormatIdc !== 3 ? 8 : 12); i++) { const seqScalingListPresentFlag = bitstream.readBits(1); if (seqScalingListPresentFlag) { const sizeOfScalingList = i < 6 ? 16 : 64; let lastScale = 8; let nextScale = 8; for (let j = 0; j < sizeOfScalingList; j++) { if (nextScale !== 0) { const deltaScale = readSignedExpGolomb(bitstream); nextScale = (lastScale + deltaScale + 256) % 256; } lastScale = nextScale === 0 ? lastScale : nextScale; } } } } } readExpGolomb(bitstream); // log2_max_frame_num_minus4 const picOrderCntType = readExpGolomb(bitstream); if (picOrderCntType === 0) { readExpGolomb(bitstream); // log2_max_pic_order_cnt_lsb_minus4 } else if (picOrderCntType === 1) { bitstream.skipBits(1); // delta_pic_order_always_zero_flag readSignedExpGolomb(bitstream); // offset_for_non_ref_pic readSignedExpGolomb(bitstream); // offset_for_top_to_bottom_field const numRefFramesInPicOrderCntCycle = readExpGolomb(bitstream); for (let i = 0; i < numRefFramesInPicOrderCntCycle; i++) { readSignedExpGolomb(bitstream); // offset_for_ref_frame[i] } } readExpGolomb(bitstream); // max_num_ref_frames bitstream.skipBits(1); // gaps_in_frame_num_value_allowed_flag const picWidthInMbsMinus1 = readExpGolomb(bitstream); const picHeightInMapUnitsMinus1 = readExpGolomb(bitstream); const codedWidth = 16 * (picWidthInMbsMinus1 + 1); const codedHeight = 16 * (picHeightInMapUnitsMinus1 + 1); let displayWidth = codedWidth; let displayHeight = codedHeight; const frameMbsOnlyFlag = bitstream.readBits(1); if (!frameMbsOnlyFlag) { bitstream.skipBits(1); // mb_adaptive_frame_field_flag } bitstream.skipBits(1); // direct_8x8_inference_flag const frameCroppingFlag = bitstream.readBits(1); if (frameCroppingFlag) { const frameCropLeftOffset = readExpGolomb(bitstream); const frameCropRightOffset = readExpGolomb(bitstream); const frameCropTopOffset = readExpGolomb(bitstream); const frameCropBottomOffset = readExpGolomb(bitstream); let cropUnitX; let cropUnitY; const chromaArrayType = separateColourPlaneFlag === 0 ? chromaFormatIdc : 0; if (chromaArrayType === 0) { // "If ChromaArrayType is equal to 0, CropUnitX and CropUnitY are derived as:" cropUnitX = 1; cropUnitY = 2 - frameMbsOnlyFlag; } else { // "Otherwise (ChromaArrayType is equal to 1, 2, or 3), CropUnitX and CropUnitY are derived as:" const subWidthC = chromaFormatIdc === 3 ? 1 : 2; const subHeightC = chromaFormatIdc === 1 ? 2 : 1; cropUnitX = subWidthC; cropUnitY = subHeightC * (2 - frameMbsOnlyFlag); } displayWidth -= (cropUnitX * (frameCropLeftOffset + frameCropRightOffset)); displayHeight -= (cropUnitY * (frameCropTopOffset + frameCropBottomOffset)); } // 2 = unspecified let colourPrimaries = 2; let transferCharacteristics = 2; let matrixCoefficients = 2; let fullRangeFlag = 0; let pixelAspectRatio = { num: 1, den: 1 }; let numReorderFrames = null; let maxDecFrameBuffering = null; const vuiParametersPresentFlag = bitstream.readBits(1); if (vuiParametersPresentFlag) { const aspectRatioInfoPresentFlag = bitstream.readBits(1); if (aspectRatioInfoPresentFlag) { const aspectRatioIdc = bitstream.readBits(8); if (aspectRatioIdc === 255) { // Extended_SAR pixelAspectRatio = { num: bitstream.readBits(16), den: bitstream.readBits(16), }; } else { const aspectRatio = AVC_HEVC_ASPECT_RATIO_IDC_TABLE[aspectRatioIdc]; if (aspectRatio) { pixelAspectRatio = aspectRatio; } } } const overscanInfoPresentFlag = bitstream.readBits(1); if (overscanInfoPresentFlag) { bitstream.skipBits(1); // overscan_appropriate_flag } const videoSignalTypePresentFlag = bitstream.readBits(1); if (videoSignalTypePresentFlag) { bitstream.skipBits(3); // video_format fullRangeFlag = bitstream.readBits(1); const colourDescriptionPresentFlag = bitstream.readBits(1); if (colourDescriptionPresentFlag) { colourPrimaries = bitstream.readBits(8); transferCharacteristics = bitstream.readBits(8); matrixCoefficients = bitstream.readBits(8); } } const chromaLocInfoPresentFlag = bitstream.readBits(1); if (chromaLocInfoPresentFlag) { readExpGolomb(bitstream); // chroma_sample_loc_type_top_field readExpGolomb(bitstream); // chroma_sample_loc_type_bottom_field } const timingInfoPresentFlag = bitstream.readBits(1); if (timingInfoPresentFlag) { bitstream.skipBits(32); // num_units_in_tick bitstream.skipBits(32); // time_scale bitstream.skipBits(1); // fixed_frame_rate_flag } const nalHrdParametersPresentFlag = bitstream.readBits(1); if (nalHrdParametersPresentFlag) { skipAvcHrdParameters(bitstream); } const vclHrdParametersPresentFlag = bitstream.readBits(1); if (vclHrdParametersPresentFlag) { skipAvcHrdParameters(bitstream); } if (nalHrdParametersPresentFlag || vclHrdParametersPresentFlag) { bitstream.skipBits(1); // low_delay_hrd_flag } bitstream.skipBits(1); // pic_struct_present_flag const bitstreamRestrictionFlag = bitstream.readBits(1); if (bitstreamRestrictionFlag) { bitstream.skipBits(1); // motion_vectors_over_pic_boundaries_flag readExpGolomb(bitstream); // max_bytes_per_pic_denom readExpGolomb(bitstream); // max_bits_per_mb_denom readExpGolomb(bitstream); // log2_max_mv_length_horizontal readExpGolomb(bitstream); // log2_max_mv_length_vertical numReorderFrames = readExpGolomb(bitstream); maxDecFrameBuffering = readExpGolomb(bitstream); } } if (numReorderFrames === null) { assert(maxDecFrameBuffering === null); const constraintSet3Flag = constraintFlags & 0b00010000; if ((profileIdc === 44 || profileIdc === 86 || profileIdc === 100 || profileIdc === 110 || profileIdc === 122 || profileIdc === 244) && constraintSet3Flag) { // "If profile_idc is equal to 44, 86, 100, 110, 122, or 244 and constraint_set3_flag is equal to 1, the // value of num_reorder_frames shall be inferred to be equal to 0." numReorderFrames = 0; maxDecFrameBuffering = 0; } else { const picWidthInMbs = picWidthInMbsMinus1 + 1; const picHeightInMapUnits = picHeightInMapUnitsMinus1 + 1; const frameHeightInMbs = (2 - frameMbsOnlyFlag) * picHeightInMapUnits; const levelInfo = AVC_LEVEL_TABLE.find(x => x.level >= levelIdc) ?? last(AVC_LEVEL_TABLE); // "MaxDpbFrames is equal to // Min( MaxDpbMbs / ( picWidthInMbs * frameHeightInMbs ), 16 ) and MaxDpbMbs is given in Table A-1." const maxDpbFrames = Math.min(Math.floor(levelInfo.maxDpbMbs / (picWidthInMbs * frameHeightInMbs)), 16); // "Otherwise, [...] the value of num_reorder_frames shall be inferred to be equal to MaxDpbFrames." numReorderFrames = maxDpbFrames; maxDecFrameBuffering = maxDpbFrames; } } assert(maxDecFrameBuffering !== null); return { profileIdc, constraintFlags, levelIdc, frameMbsOnlyFlag, chromaFormatIdc, bitDepthLumaMinus8, bitDepthChromaMinus8, codedWidth, codedHeight, displayWidth, displayHeight, pixelAspectRatio, colourPrimaries, matrixCoefficients, transferCharacteristics, fullRangeFlag, numReorderFrames, maxDecFrameBuffering, }; } catch (error) { console.error('Error parsing AVC SPS:', error); return null; } }; const skipAvcHrdParameters = (bitstream) => { const cpb_cnt_minus1 = readExpGolomb(bitstream); bitstream.skipBits(4); // bit_rate_scale bitstream.skipBits(4); // cpb_size_scale for (let i = 0; i <= cpb_cnt_minus1; i++) { readExpGolomb(bitstream); // bit_rate_value_minus1[i] readExpGolomb(bitstream); // cpb_size_value_minus1[i] bitstream.skipBits(1); // cbr_flag[i] } bitstream.skipBits(5); // initial_cpb_removal_delay_length_minus1 bitstream.skipBits(5); // cpb_removal_delay_length_minus1 bitstream.skipBits(5); // dpb_output_delay_length_minus1 bitstream.skipBits(5); // time_offset_length }; export const concatHevcNalUnits = (nalUnits, decoderConfig) => { if (decoderConfig.description) { // Stream is length-prefixed. Let's extract the size of the length prefix from the decoder config const bytes = toUint8Array(decoderConfig.description); const lengthSizeMinusOne = bytes[21] & 0b11; const lengthSize = (lengthSizeMinusOne + 1); return concatNalUnitsInLengthPrefixed(nalUnits, lengthSize); } else { // Stream is in Annex B format return concatNalUnitsInAnnexB(nalUnits); } }; export const iterateHevcNalUnits = (packetData, decoderConfig) => { if (decoderConfig.description) { const bytes = toUint8Array(decoderConfig.description); const lengthSizeMinusOne = bytes[21] & 0b11; const lengthSize = (lengthSizeMinusOne + 1); return iterateNalUnitsInLengthPrefixed(packetData, lengthSize); } else { return iterateNalUnitsInAnnexB(packetData); } }; export const extractNalUnitTypeForHevc = (byte) => { return (byte >> 1) & 0x3F; }; /** Parses an HEVC SPS (Sequence Parameter Set) to extract video information. */ export const parseHevcSps = (sps) => { try { const bitstream = new Bitstream(removeEmulationPreventionBytes(sps)); bitstream.skipBits(16); // NAL header bitstream.readBits(4); // sps_video_parameter_set_id const spsMaxSubLayersMinus1 = bitstream.readBits(3); const spsTemporalIdNestingFlag = bitstream.readBits(1); const { general_profile_space, general_tier_flag, general_profile_idc, general_profile_compatibility_flags, general_constraint_indicator_flags, general_level_idc, } = parseProfileTierLevel(bitstream, spsMaxSubLayersMinus1); readExpGolomb(bitstream); // sps_seq_parameter_set_id const chromaFormatIdc = readExpGolomb(bitstream); let separateColourPlaneFlag = 0; if (chromaFormatIdc === 3) { separateColourPlaneFlag = bitstream.readBits(1); } const picWidthInLumaSamples = readExpGolomb(bitstream); const picHeightInLumaSamples = readExpGolomb(bitstream); let displayWidth = picWidthInLumaSamples; let displayHeight = picHeightInLumaSamples; if (bitstream.readBits(1)) { // conformance_window_flag const confWinLeftOffset = readExpGolomb(bitstream); const confWinRightOffset = readExpGolomb(bitstream); const confWinTopOffset = readExpGolomb(bitstream); const confWinBottomOffset = readExpGolomb(bitstream); // SubWidthC and SubHeightC depend on chroma_format_idc and separate_colour_plane_flag let subWidthC = 1; let subHeightC = 1; const chromaArrayType = separateColourPlaneFlag === 0 ? chromaFormatIdc : 0; if (chromaArrayType === 1) { subWidthC = 2; subHeightC = 2; } else if (chromaArrayType === 2) { subWidthC = 2; subHeightC = 1; } displayWidth -= (confWinLeftOffset + confWinRightOffset) * subWidthC; displayHeight -= (confWinTopOffset + confWinBottomOffset) * subHeightC; } const bitDepthLumaMinus8 = readExpGolomb(bitstream); const bitDepthChromaMinus8 = readExpGolomb(bitstream); readExpGolomb(bitstream); // log2_max_pic_order_cnt_lsb_minus4 const spsSubLayerOrderingInfoPresentFlag = bitstream.readBits(1); const startI = spsSubLayerOrderingInfoPresentFlag ? 0 : spsMaxSubLayersMinus1; let spsMaxNumReorderPics = 0; for (let i = startI; i <= spsMaxSubLayersMinus1; i++) { readExpGolomb(bitstream); // sps_max_dec_pic_buffering_minus1[i] spsMaxNumReorderPics = readExpGolomb(bitstream); // sps_max_num_reorder_pics[i] readExpGolomb(bitstream); // sps_max_latency_increase_plus1[i] } readExpGolomb(bitstream); // log2_min_luma_coding_block_size_minus3 readExpGolomb(bitstream); // log2_diff_max_min_luma_coding_block_size readExpGolomb(bitstream); // log2_min_luma_transform_block_size_minus2 readExpGolomb(bitstream); // log2_diff_max_min_luma_transform_block_size readExpGolomb(bitstream); // max_transform_hierarchy_depth_inter readExpGolomb(bitstream); // max_transform_hierarchy_depth_intra if (bitstream.readBits(1)) { // scaling_list_enabled_flag if (bitstream.readBits(1)) { skipScalingListData(bitstream); } } bitstream.skipBits(1); // amp_enabled_flag bitstream.skipBits(1); // sample_adaptive_offset_enabled_flag if (bitstream.readBits(1)) { // pcm_enabled_flag bitstream.skipBits(4); // pcm_sample_bit_depth_luma_minus1 bitstream.skipBits(4); // pcm_sample_bit_depth_chroma_minus1 readExpGolomb(bitstream); // log2_min_pcm_luma_coding_block_size_minus3 readExpGolomb(bitstream); // log2_diff_max_min_pcm_luma_coding_block_size bitstream.skipBits(1); // pcm_loop_filter_disabled_flag } const numShortTermRefPicSets = readExpGolomb(bitstream); skipAllStRefPicSets(bitstream, numShortTermRefPicSets); if (bitstream.readBits(1)) { // long_term_ref_pics_present_flag const numLongTermRefPicsSps = readExpGolomb(bitstream); for (let i = 0; i < numLongTermRefPicsSps; i++) { readExpGolomb(bitstream); // lt_ref_pic_poc_lsb_sps[i] bitstream.skipBits(1); // used_by_curr_pic_lt_sps_flag[i] } } bitstream.skipBits(1); // sps_temporal_mvp_enabled_flag bitstream.skipBits(1); // strong_intra_smoothing_enabled_flag let colourPrimaries = 2; let transferCharacteristics = 2; let matrixCoefficients = 2; let fullRangeFlag = 0; let minSpatialSegmentationIdc = 0; let pixelAspectRatio = { num: 1, den: 1 }; if (bitstream.readBits(1)) { // vui_parameters_present_flag const vui = parseHevcVui(bitstream, spsMaxSubLayersMinus1); pixelAspectRatio = vui.pixelAspectRatio; colourPrimaries = vui.colourPrimaries; transferCharacteristics = vui.transferCharacteristics; matrixCoefficients = vui.matrixCoefficients; fullRangeFlag = vui.fullRangeFlag; minSpatialSegmentationIdc = vui.minSpatialSegmentationIdc; } return { displayWidth, displayHeight, pixelAspectRatio, colourPrimaries, transferCharacteristics, matrixCoefficients, fullRangeFlag, maxDecFrameBuffering: spsMaxNumReorderPics + 1, spsMaxSubLayersMinus1, spsTemporalIdNestingFlag, generalProfileSpace: general_profile_space, generalTierFlag: general_tier_flag, generalProfileIdc: general_profile_idc, generalProfileCompatibilityFlags: general_profile_compatibility_flags, generalConstraintIndicatorFlags: general_constraint_indicator_flags, generalLevelIdc: general_level_idc, chromaFormatIdc, bitDepthLumaMinus8, bitDepthChromaMinus8, minSpatialSegmentationIdc, }; } catch (error) { console.error('Error parsing HEVC SPS:', error); return null; } }; /** Builds a HevcDecoderConfigurationRecord from an HEVC packet in Annex B format. */ export const extractHevcDecoderConfigurationRecord = (packetData) => { try { const vpsUnits = []; const spsUnits = []; const ppsUnits = []; const seiUnits = []; for (const loc of iterateNalUnitsInAnnexB(packetData)) { const nalUnit = packetData.subarray(loc.offset, loc.offset + loc.length); const type = extractNalUnitTypeForHevc(nalUnit[0]); if (type === HevcNalUnitType.VPS_NUT) { vpsUnits.push(nalUnit); } else if (type === HevcNalUnitType.SPS_NUT) { spsUnits.push(nalUnit); } else if (type === HevcNalUnitType.PPS_NUT) { ppsUnits.push(nalUnit); } else if (type === HevcNalUnitType.PREFIX_SEI_NUT || type === HevcNalUnitType.SUFFIX_SEI_NUT) { seiUnits.push(nalUnit); } } if (spsUnits.length === 0 || ppsUnits.length === 0) return null; const spsInfo = parseHevcSps(spsUnits[0]); if (!spsInfo) return null; // Parse PPS for parallelismType let parallelismType = 0; if (ppsUnits.length > 0) { const pps = ppsUnits[0]; const ppsBitstream = new Bitstream(removeEmulationPreventionBytes(pps)); ppsBitstream.skipBits(16); // NAL header readExpGolomb(ppsBitstream); // pps_pic_parameter_set_id readExpGolomb(ppsBitstream); // pps_seq_parameter_set_id ppsBitstream.skipBits(1); // dependent_slice_segments_enabled_flag ppsBitstream.skipBits(1); // output_flag_present_flag ppsBitstream.skipBits(3); // num_extra_slice_header_bits ppsBitstream.skipBits(1); // sign_data_hiding_enabled_flag ppsBitstream.skipBits(1); // cabac_init_present_flag readExpGolomb(ppsBitstream); // num_ref_idx_l0_default_active_minus1 readExpGolomb(ppsBitstream); // num_ref_idx_l1_default_active_minus1 readSignedExpGolomb(ppsBitstream); // init_qp_minus26 ppsBitstream.skipBits(1); // constrained_intra_pred_flag ppsBitstream.skipBits(1); // transform_skip_enabled_flag if (ppsBitstream.readBits(1)) { // cu_qp_delta_enabled_flag readExpGolomb(ppsBitstream); // diff_cu_qp_delta_depth } readSignedExpGolomb(ppsBitstream); // pps_cb_qp_offset readSignedExpGolomb(ppsBitstream); // pps_cr_qp_offset ppsBitstream.skipBits(1); // pps_slice_chroma_qp_offsets_present_flag ppsBitstream.skipBits(1); // weighted_pred_flag ppsBitstream.skipBits(1); // weighted_bipred_flag ppsBitstream.skipBits(1); // transquant_bypass_enabled_flag const tiles_enabled_flag = ppsBitstream.readBits(1); const entropy_coding_sync_enabled_flag = ppsBitstream.readBits(1); if (!tiles_enabled_flag && !entropy_coding_sync_enabled_flag) parallelismType = 0; else if (tiles_enabled_flag && !entropy_coding_sync_enabled_flag) parallelismType = 2; else if (!tiles_enabled_flag && entropy_coding_sync_enabled_flag) parallelismType = 3; else parallelismType = 0; } const arrays = [ ...(vpsUnits.length ? [ { arrayCompleteness: 1, nalUnitType: HevcNalUnitType.VPS_NUT, nalUnits: vpsUnits, }, ] : []), ...(spsUnits.length ? [ { arrayCompleteness: 1, nalUnitType: HevcNalUnitType.SPS_NUT, nalUnits: spsUnits, }, ] : []), ...(ppsUnits.length ? [ { arrayCompleteness: 1, nalUnitType: HevcNalUnitType.PPS_NUT, nalUnits: ppsUnits, }, ] : []), ...(seiUnits.length ? [ { arrayCompleteness: 1, nalUnitType: extractNalUnitTypeForHevc(seiUnits[0][0]), nalUnits: seiUnits, }, ] : []), ]; const record = { configurationVersion: 1, generalProfileSpace: spsInfo.generalProfileSpace, generalTierFlag: spsInfo.generalTierFlag, generalProfileIdc: spsInfo.generalProfileIdc, generalProfileCompatibilityFlags: spsInfo.generalProfileCompatibilityFlags, generalConstraintIndicatorFlags: spsInfo.generalConstraintIndicatorFlags, generalLevelIdc: spsInfo.generalLevelIdc, minSpatialSegmentationIdc: spsInfo.minSpatialSegmentationIdc, parallelismType, chromaFormatIdc: spsInfo.chromaFormatIdc, bitDepthLumaMinus8: spsInfo.bitDepthLumaMinus8, bitDepthChromaMinus8: spsInfo.bitDepthChromaMinus8, avgFrameRate: 0, constantFrameRate: 0, numTemporalLayers: spsInfo.spsMaxSubLayersMinus1 + 1, temporalIdNested: spsInfo.spsTemporalIdNestingFlag, lengthSizeMinusOne: 3, arrays, }; return record; } catch (error) { console.error('Error building HEVC Decoder Configuration Record:', error); return null; } }; const parseProfileTierLevel = (bitstream, maxNumSubLayersMinus1) => { const general_profile_space = bitstream.readBits(2); const general_tier_flag = bitstream.readBits(1); const general_profile_idc = bitstream.readBits(5); let general_profile_compatibility_flags = 0; for (let i = 0; i < 32; i++) { general_profile_compatibility_flags = (general_profile_compatibility_flags << 1) | bitstream.readBits(1); } const general_constraint_indicator_flags = new Uint8Array(6); for (let i = 0; i < 6; i++) { general_constraint_indicator_flags[i] = bitstream.readBits(8); } const general_level_idc = bitstream.readBits(8); const sub_layer_profile_present_flag = []; const sub_layer_level_present_flag = []; for (let i = 0; i < maxNumSubLayersMinus1; i++) { sub_layer_profile_present_flag.push(bitstream.readBits(1)); sub_layer_level_present_flag.push(bitstream.readBits(1)); } if (maxNumSubLayersMinus1 > 0) { for (let i = maxNumSubLayersMinus1; i < 8; i++) { bitstream.skipBits(2); // reserved_zero_2bits } } for (let i = 0; i < maxNumSubLayersMinus1; i++) { if (sub_layer_profile_present_flag[i]) bitstream.skipBits(88); if (sub_layer_level_present_flag[i]) bitstream.skipBits(8); } return { general_profile_space, general_tier_flag, general_profile_idc, general_profile_compatibility_flags, general_constraint_indicator_flags, general_level_idc, }; }; const skipScalingListData = (bitstream) => { for (let sizeId = 0; sizeId < 4; sizeId++) { for (let matrixId = 0; matrixId < (sizeId === 3 ? 2 : 6); matrixId++) { const scaling_list_pred_mode_flag = bitstream.readBits(1); if (!scaling_list_pred_mode_flag) { readExpGolomb(bitstream); // scaling_list_pred_matrix_id_delta } else { const coefNum = Math.min(64, 1 << (4 + (sizeId << 1))); if (sizeId > 1) { readSignedExpGolomb(bitstream); // scaling_list_dc_coef_minus8 } for (let i = 0; i < coefNum; i++) { readSignedExpGolomb(bitstream); // scaling_list_delta_coef } } } } }; const skipAllStRefPicSets = (bitstream, num_short_term_ref_pic_sets) => { const NumDeltaPocs = []; for (let stRpsIdx = 0; stRpsIdx < num_short_term_ref_pic_sets; stRpsIdx++) { NumDeltaPocs[stRpsIdx] = skipStRefPicSet(bitstream, stRpsIdx, num_short_term_ref_pic_sets, NumDeltaPocs); } }; const skipStRefPicSet = (bitstream, stRpsIdx, num_short_term_ref_pic_sets, NumDeltaPocs) => { let NumDeltaPocsThis = 0; let inter_ref_pic_set_prediction_flag = 0; let RefRpsIdx = 0; if (stRpsIdx !== 0) { inter_ref_pic_set_prediction_flag = bitstream.readBits(1); } if (inter_ref_pic_set_prediction_flag) { if (stRpsIdx === num_short_term_ref_pic_sets) { const delta_idx_minus1 = readExpGolomb(bitstream); RefRpsIdx = stRpsIdx - (delta_idx_minus1 + 1); } else { RefRpsIdx = stRpsIdx - 1; } bitstream.readBits(1); // delta_rps_sign readExpGolomb(bitstream); // abs_delta_rps_minus1 // The number of iterations is NumDeltaPocs[RefRpsIdx] + 1 const numDelta = NumDeltaPocs[RefRpsIdx] ?? 0; for (let j = 0; j <= numDelta; j++) { const used_by_curr_pic_flag = bitstream.readBits(1); if (!used_by_curr_pic_flag) { bitstream.readBits(1); // use_delta_flag } } NumDeltaPocsThis = NumDeltaPocs[RefRpsIdx]; } else { const num_negative_pics = readExpGolomb(bitstream); const num_positive_pics = readExpGolomb(bitstream); for (let i = 0; i < num_negative_pics; i++) { readExpGolomb(bitstream); // delta_poc_s0_minus1[i] bitstream.readBits(1); // used_by_curr_pic_s0_flag[i] } for (let i = 0; i < num_positive_pics; i++) { readExpGolomb(bitstream); // delta_poc_s1_minus1[i] bitstream.readBits(1); // used_by_curr_pic_s1_flag[i] } NumDeltaPocsThis = num_negative_pics + num_positive_pics; } return NumDeltaPocsThis; }; const parseHevcVui = (bitstream, sps_max_sub_layers_minus1) => { // Defaults: 2 = unspecified let colourPrimaries = 2; let transferCharacteristics = 2; let matrixCoefficients = 2; let fullRangeFlag = 0; let minSpatialSegmentationIdc = 0; let pixelAspectRatio = { num: 1, den: 1 }; if (bitstream.readBits(1)) { // aspect_ratio_info_present_flag const aspect_ratio_idc = bitstream.readBits(8); if (aspect_ratio_idc === 255) { pixelAspectRatio = { num: bitstream.readBits(16), den: bitstream.readBits(16), }; } else { const aspectRatio = AVC_HEVC_ASPECT_RATIO_IDC_TABLE[aspect_ratio_idc]; if (aspectRatio) { pixelAspectRatio = aspectRatio; } } } if (bitstream.readBits(1)) { // overscan_info_present_flag bitstream.readBits(1); // overscan_appropriate_flag } if (bitstream.readBits(1)) { // video_signal_type_present_flag bitstream.readBits(3); // video_format fullRangeFlag = bitstream.readBits(1); if (bitstream.readBits(1)) { // colour_description_present_flag colourPrimaries = bitstream.readBits(8); transferCharacteristics = bitstream.readBits(8); matrixCoefficients = bitstream.readBits(8); } } if (bitstream.readBits(1)) { // chroma_loc_info_present_flag readExpGolomb(bitstream); // chroma_sample_loc_type_top_field readExpGolomb(bitstream); // chroma_sample_loc_type_bottom_field } bitstream.readBits(1); // neutral_chroma_indication_flag bitstream.readBits(1); // field_seq_flag bitstream.readBits(1); // frame_field_info_present_flag if (bitstream.readBits(1)) { // default_display_window_flag readExpGolomb(bitstream); // def_disp_win_left_offset readExpGolomb(bitstream); // def_disp_win_right_offset readExpGolomb(bitstream); // def_disp_win_top_offset readExpGolomb(bitstream); // def_disp_win_bottom_offset } if (bitstream.readBits(1)) { // vui_timing_info_present_flag bitstream.readBits(32); // vui_num_units_in_tick bitstream.readBits(32); // vui_time_scale if (bitstream.readBits(1)) { // vui_poc_proportional_to_timing_flag readExpGolomb(bitstream); // vui_num_ticks_poc_diff_one_minus1 } if (bitstream.readBits(1)) { skipHevcHrdParameters(bitstream, true, sps_max_sub_layers_minus1); } } if (bitstream.readBits(1)) { // bitstream_restriction_flag bitstream.readBits(1); // tiles_fixed_structure_flag bitstream.readBits(1); // motion_vectors_over_pic_boundaries_flag bitstream.readBits(1); // restricted_ref_pic_lists_flag minSpatialSegmentationIdc = readExpGolomb(bitstream); readExpGolomb(bitstream); // max_bytes_per_pic_denom readExpGolomb(bitstream); // max_bits_per_min_cu_denom readExpGolomb(bitstream); // log2_max_mv_length_horizontal readExpGolomb(bitstream); // log2_max_mv_length_vertical } return { pixelAspectRatio, colourPrimaries, transferCharacteristics, matrixCoefficients, fullRangeFlag, minSpatialSegmentationIdc, }; }; const skipHevcHrdParameters = (bitstream, commonInfPresentFlag, maxNumSubLayersMinus1) => { let nal_hrd_parameters_present_flag = false; let vcl_hrd_parameters_present_flag = false; let sub_pic_hrd_params_present_flag = false; if (commonInfPresentFlag) { nal_hrd_parameters_present_flag = bitstream.readBits(1) === 1; vcl_hrd_parameters_present_flag = bitstream.readBits(1) === 1; if (nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag) { sub_pic_hrd_params_present_flag = bitstream.readBits(1) === 1; if (sub_pic_hrd_params_present_flag) { bitstream.readBits(8); // tick_divisor_minus2 bitstream.readBits(5); // du_cpb_removal_delay_increment_length_minus1 bitstream.readBits(1); // sub_pic_cpb_params_in_pic