UNPKG

@jxstjh/jhvideo

Version:

HTML5 jhvideo base on MPEG2-TS Stream Player

613 lines (518 loc) 21.6 kB
function readUint64(view, offset) { if (offset + 8 > view.byteLength) { throw new RangeError(`readUint64 out of range: offset=${offset}, length=${view.byteLength}`); } const high = view.getUint32(offset); const low = view.getUint32(offset + 4); return high * 4294967296 + low; } function readInt32(view, offset) { if (offset + 4 > view.byteLength) { throw new RangeError(`readInt32 out of range: offset=${offset}, length=${view.byteLength}`); } return view.getInt32(offset); } function readUint8(view, offset) { if (offset + 1 > view.byteLength) { throw new RangeError(`readUint8 out of range: offset=${offset}, length=${view.byteLength}`); } return view.getUint8(offset); } function readUint16(view, offset) { if (offset + 2 > view.byteLength) { throw new RangeError(`readUint16 out of range: offset=${offset}, length=${view.byteLength}`); } return view.getUint16(offset); } function readUint32(view, offset) { if (offset + 4 > view.byteLength) { throw new RangeError(`readUint32 out of range: offset=${offset}, length=${view.byteLength}`); } return view.getUint32(offset); } function readType(view, offset) { if (offset + 4 > view.byteLength) { throw new RangeError(`readType out of range: offset=${offset}, length=${view.byteLength}`); } return String.fromCharCode( readUint8(view, offset), readUint8(view, offset + 1), readUint8(view, offset + 2), readUint8(view, offset + 3) ); } function getBoxes(arrayBuffer, start = 0, end = arrayBuffer.byteLength) { const boxes = []; const view = new DataView(arrayBuffer); let offset = start; while (offset + 8 <= end) { let size = readUint32(view, offset); const type = readType(view, offset + 4); let headerSize = 8; if (size === 1) { if (offset + 16 > end) { break; } size = readUint64(view, offset + 8); headerSize = 16; } else if (size === 0) { size = end - offset; } if (!size || offset + size > end) { break; } boxes.push({ type, start: offset, size, headerSize, dataStart: offset + headerSize, end: offset + size }); offset += size; } return boxes; } function findChildBox(arrayBuffer, parentBox, type) { return getBoxes(arrayBuffer, parentBox.dataStart, parentBox.end).find((box) => box.type === type) || null; } function findBoxesByType(arrayBuffer, parentBox, type) { return getBoxes(arrayBuffer, parentBox.dataStart, parentBox.end).filter((box) => box.type === type); } function parseMdhd(arrayBuffer, mdhdBox) { const view = new DataView(arrayBuffer, mdhdBox.dataStart, mdhdBox.end - mdhdBox.dataStart); const version = readUint8(view, 0); if (version === 1) { return { timescale: readUint32(view, 20), duration: readUint64(view, 24) }; } return { timescale: readUint32(view, 12), duration: readUint32(view, 16) }; } function parseHdlr(arrayBuffer, hdlrBox) { const view = new DataView(arrayBuffer, hdlrBox.dataStart, hdlrBox.end - hdlrBox.dataStart); return readType(view, 8); } function parseTkhd(arrayBuffer, tkhdBox) { const view = new DataView(arrayBuffer, tkhdBox.dataStart, tkhdBox.end - tkhdBox.dataStart); const version = readUint8(view, 0); const widthOffset = version === 1 ? 88 : 76; const heightOffset = version === 1 ? 92 : 80; if (heightOffset + 4 > view.byteLength) { throw new RangeError(`tkhd box too short: length=${view.byteLength}`); } return { width: readUint32(view, widthOffset) / 65536, height: readUint32(view, heightOffset) / 65536 }; } function parseHvcc(arrayBuffer, hvccBox) { const view = new DataView(arrayBuffer, hvccBox.dataStart, hvccBox.end - hvccBox.dataStart); if (view.byteLength < 23) { throw new RangeError(`hvcC box too short: length=${view.byteLength}`); } const lengthSize = (readUint8(view, 21) & 0x03) + 1; const numOfArrays = readUint8(view, 22); let offset = 23; const parameterSets = []; for (let i = 0; i < numOfArrays; i++) { if (offset + 3 > view.byteLength) { break; } const numNalus = readUint16(view, offset + 1); offset += 3; for (let j = 0; j < numNalus; j++) { if (offset + 2 > view.byteLength) { break; } const naluLength = readUint16(view, offset); offset += 2; if (offset + naluLength > view.byteLength) { break; } parameterSets.push(new Uint8Array(arrayBuffer.slice( hvccBox.dataStart + offset, hvccBox.dataStart + offset + naluLength ))); offset += naluLength; } } return { lengthSize, parameterSets }; } function parseStsd(arrayBuffer, stsdBox) { const view = new DataView(arrayBuffer, stsdBox.dataStart, stsdBox.end - stsdBox.dataStart); if (view.byteLength < 8) { throw new RangeError(`stsd box too short: length=${view.byteLength}`); } const entryCount = readUint32(view, 4); let offset = 8; for (let i = 0; i < entryCount; i++) { if (offset + 8 > view.byteLength) { throw new RangeError(`stsd entry out of range: offset=${offset}, length=${view.byteLength}`); } const size = readUint32(view, offset); const type = readType(view, offset + 4); const sampleEntryStart = stsdBox.dataStart + offset; if (!size || sampleEntryStart + size > stsdBox.end) { throw new RangeError(`invalid stsd entry size: size=${size}, entryStart=${sampleEntryStart}, stsdEnd=${stsdBox.end}`); } const sampleEntry = { type, start: sampleEntryStart, size, dataStart: sampleEntryStart + 8, end: sampleEntryStart + size }; const hvcc = findChildBox(arrayBuffer, sampleEntry, 'hvcC'); return { codecType: type, hvcc: hvcc ? parseHvcc(arrayBuffer, hvcc) : null }; } return null; } function parseTrex(arrayBuffer, trexBox) { const view = new DataView(arrayBuffer, trexBox.dataStart, trexBox.end - trexBox.dataStart); if (view.byteLength < 24) { throw new RangeError(`trex box too short: length=${view.byteLength}`); } return { trackId: readUint32(view, 4), defaultSampleDuration: readUint32(view, 12), defaultSampleSize: readUint32(view, 16), defaultSampleFlags: readUint32(view, 20) }; } export function parseHevcInitSegment(arrayBuffer) { const topBoxes = getBoxes(arrayBuffer); const moov = topBoxes.find((box) => box.type === 'moov'); if (!moov) { return null; } const mvex = findChildBox(arrayBuffer, moov, 'mvex'); const trexMap = new Map(); if (mvex) { findBoxesByType(arrayBuffer, mvex, 'trex').forEach((trexBox) => { const trex = parseTrex(arrayBuffer, trexBox); trexMap.set(trex.trackId, trex); }); } const tracks = findBoxesByType(arrayBuffer, moov, 'trak'); for (const trak of tracks) { const mdia = findChildBox(arrayBuffer, trak, 'mdia'); const tkhd = findChildBox(arrayBuffer, trak, 'tkhd'); if (!mdia) { continue; } const hdlr = findChildBox(arrayBuffer, mdia, 'hdlr'); if (!hdlr || parseHdlr(arrayBuffer, hdlr) !== 'vide') { continue; } const mdhd = findChildBox(arrayBuffer, mdia, 'mdhd'); const minf = findChildBox(arrayBuffer, mdia, 'minf'); const stbl = minf ? findChildBox(arrayBuffer, minf, 'stbl') : null; const stsd = stbl ? findChildBox(arrayBuffer, stbl, 'stsd') : null; const mdhdInfo = mdhd ? parseMdhd(arrayBuffer, mdhd) : { timescale: 1000, duration: 0 }; const stsdInfo = stsd ? parseStsd(arrayBuffer, stsd) : null; const tkhdInfo = tkhd ? parseTkhd(arrayBuffer, tkhd) : { width: 0, height: 0 }; // 这里是 HLS/fMP4 判断 H265 的核心: // stsd sample entry 如果是 hvc1 / hev1,就说明这条视频轨是 HEVC(H265)。 // 如果不是这两个类型,当前方法直接返回 null,让上层继续走普通 HLS 播放链路。 if (!stsdInfo || (stsdInfo.codecType !== 'hvc1' && stsdInfo.codecType !== 'hev1')) { continue; } let trackId = null; if (tkhd) { const view = new DataView(arrayBuffer, tkhd.dataStart, tkhd.end - tkhd.dataStart); const version = readUint8(view, 0); if ((version === 1 && view.byteLength < 24) || (version !== 1 && view.byteLength < 16)) { throw new RangeError(`tkhd box too short for trackId: length=${view.byteLength}`); } trackId = version === 1 ? readUint32(view, 20) : readUint32(view, 12); } return { codecType: stsdInfo.codecType, // 与现有 ws/flv 链路保持一致:12 表示 H265,后面 WasmPlayer 会按这个 codecId 打开解码器。 codecId: 12, timescale: mdhdInfo.timescale || 1000, duration: mdhdInfo.duration || 0, trackId, width: Math.round(tkhdInfo.width || 0), height: Math.round(tkhdInfo.height || 0), // hvcC 里带有 HEVC 参数集和长度字段,这些数据后面会被用来把 m4s 样本转换成 wasm 能吃的 Annex B 访问单元。 lengthSize: stsdInfo.hvcc ? stsdInfo.hvcc.lengthSize : 4, parameterSets: stsdInfo.hvcc ? stsdInfo.hvcc.parameterSets : [], trex: trexMap.get(trackId) || null }; } return null; } function parseTfhd(arrayBuffer, tfhdBox) { const view = new DataView(arrayBuffer, tfhdBox.dataStart, tfhdBox.end - tfhdBox.dataStart); if (view.byteLength < 8) { throw new RangeError(`tfhd box too short: length=${view.byteLength}`); } const flags = (readUint8(view, 1) << 16) | (readUint8(view, 2) << 8) | readUint8(view, 3); let offset = 4; const trackId = readUint32(view, offset); offset += 4; let baseDataOffset = null; let defaultSampleDuration = null; let defaultSampleSize = null; let defaultSampleFlags = null; if (flags & 0x000001) { baseDataOffset = readUint64(view, offset); offset += 8; } if (flags & 0x000002) { offset += 4; } if (flags & 0x000008) { defaultSampleDuration = readUint32(view, offset); offset += 4; } if (flags & 0x000010) { defaultSampleSize = readUint32(view, offset); offset += 4; } if (flags & 0x000020) { defaultSampleFlags = readUint32(view, offset); offset += 4; } return { flags, trackId, baseDataOffset, defaultSampleDuration, defaultSampleSize, defaultSampleFlags }; } function parseTfdt(arrayBuffer, tfdtBox) { const view = new DataView(arrayBuffer, tfdtBox.dataStart, tfdtBox.end - tfdtBox.dataStart); if (view.byteLength < 8) { throw new RangeError(`tfdt box too short: length=${view.byteLength}`); } const version = readUint8(view, 0); return version === 1 ? readUint64(view, 4) : readUint32(view, 4); } function parseTrun(arrayBuffer, trunBox) { const view = new DataView(arrayBuffer, trunBox.dataStart, trunBox.end - trunBox.dataStart); if (view.byteLength < 8) { throw new RangeError(`trun box too short: length=${view.byteLength}`); } const version = readUint8(view, 0); const flags = (readUint8(view, 1) << 16) | (readUint8(view, 2) << 8) | readUint8(view, 3); const sampleCount = readUint32(view, 4); let offset = 8; let dataOffset = null; let firstSampleFlags = null; if (flags & 0x000001) { dataOffset = readInt32(view, offset); offset += 4; } if (flags & 0x000004) { firstSampleFlags = readUint32(view, offset); offset += 4; } const samples = []; for (let i = 0; i < sampleCount; i++) { const sample = {}; if (flags & 0x000100) { sample.duration = readUint32(view, offset); offset += 4; } if (flags & 0x000200) { sample.size = readUint32(view, offset); offset += 4; } if (flags & 0x000400) { sample.flags = readUint32(view, offset); offset += 4; } else if (i === 0 && firstSampleFlags != null) { sample.flags = firstSampleFlags; } if (flags & 0x000800) { sample.cts = version === 1 ? readInt32(view, offset) : readUint32(view, offset); offset += 4; } else { sample.cts = 0; } samples.push(sample); } if (offset > view.byteLength) { throw new RangeError(`trun sample table out of range: offset=${offset}, length=${view.byteLength}, sampleCount=${sampleCount}, flags=${flags}`); } return { flags, dataOffset, samples }; } function splitNalus(sampleData, lengthSize) { const units = []; const view = new DataView(sampleData.buffer, sampleData.byteOffset, sampleData.byteLength); let offset = 0; while (offset + lengthSize <= sampleData.byteLength) { let naluSize = 0; for (let i = 0; i < lengthSize; i++) { naluSize = (naluSize << 8) | readUint8(view, offset + i); } offset += lengthSize; if (!naluSize || offset + naluSize > sampleData.byteLength) { break; } units.push(sampleData.subarray(offset, offset + naluSize)); offset += naluSize; } return { units, consumedBytes: offset }; } function splitNalusWithFallback(sampleData, preferredLengthSize) { // 大多数流会和 init.mp4 里的 lengthSize 一致。 // 这里保留回退,是为了兼容部分实际线上流分片里长度头异常的情况。 const candidates = [preferredLengthSize, 4, 2, 1, 3].filter((value, index, list) => ( value >= 1 && value <= 4 && list.indexOf(value) === index )); let bestResult = null; for (const lengthSize of candidates) { const result = splitNalus(sampleData, lengthSize); if (!result.units.length) { continue; } const trailingBytes = sampleData.byteLength - result.consumedBytes; const score = result.units.length * 100000 - trailingBytes; if (!bestResult || score > bestResult.score) { bestResult = { lengthSize, units: result.units, consumedBytes: result.consumedBytes, trailingBytes, score }; } if (trailingBytes === 0) { break; } } return bestResult; } function getHevcNaluType(unit) { if (!unit || !unit.byteLength) { return -1; } return (unit[0] >> 1) & 0x3f; } function isHevcKeyframe(units) { return units.some((unit) => { const type = getHevcNaluType(unit); return type >= 16 && type <= 21; }); } function formatBytesPreview(data, maxBytes = 16) { const limit = Math.min(maxBytes, data.byteLength); const values = []; for (let i = 0; i < limit; i++) { values.push(data[i].toString(16).padStart(2, '0')); } return values.join(' '); } export function parseHevcMediaSegment(arrayBuffer, initInfo) { let debugContext = { topLevelTypes: [], moofRange: null, mdatRange: null, trafIndex: -1, trafRange: null, trunIndex: -1, trunRange: null }; try { const topBoxes = getBoxes(arrayBuffer); debugContext.topLevelTypes = topBoxes.map((box) => box.type); const moof = topBoxes.find((box) => box.type === 'moof'); const mdat = topBoxes.find((box) => box.type === 'mdat'); if (!moof || !mdat) { return []; } debugContext.moofRange = [moof.start, moof.end]; debugContext.mdatRange = [mdat.dataStart, mdat.end]; const trafs = findBoxesByType(arrayBuffer, moof, 'traf'); const samples = []; for (let trafIndex = 0; trafIndex < trafs.length; trafIndex++) { const traf = trafs[trafIndex]; debugContext.trafIndex = trafIndex; debugContext.trafRange = [traf.start, traf.end]; const tfhd = findChildBox(arrayBuffer, traf, 'tfhd'); const tfdt = findChildBox(arrayBuffer, traf, 'tfdt'); const truns = findBoxesByType(arrayBuffer, traf, 'trun'); if (!tfhd || !tfdt || !truns.length) { continue; } const tfhdInfo = parseTfhd(arrayBuffer, tfhd); if (initInfo.trackId && tfhdInfo.trackId !== initInfo.trackId) { continue; } const defaultInfo = initInfo.trex || {}; let decodeTime = parseTfdt(arrayBuffer, tfdt); let nextSampleOffset = null; // traf 的样本数据基准偏移优先取 tfhd.baseDataOffset; // 没带这个字段时,再按当前 moof 起点推导。 const trafDataBaseOffset = tfhdInfo.baseDataOffset != null ? tfhdInfo.baseDataOffset : moof.start; for (let trunIndex = 0; trunIndex < truns.length; trunIndex++) { const trunBox = truns[trunIndex]; debugContext.trunIndex = trunIndex; debugContext.trunRange = [trunBox.start, trunBox.end]; const trunInfo = parseTrun(arrayBuffer, trunBox); // trun.dataOffset 存在时,从 traf 基准偏移重新定位样本区; // 否则沿用上一个 sample 结束位置,兼容一个 traf 里多个 trun 的情况。 const sampleBaseOffset = trunInfo.dataOffset != null ? trafDataBaseOffset + trunInfo.dataOffset : (nextSampleOffset != null ? nextSampleOffset : mdat.dataStart); if (sampleBaseOffset < mdat.dataStart || sampleBaseOffset > mdat.end) { throw new RangeError(`sampleBaseOffset out of mdat range: sampleBaseOffset=${sampleBaseOffset}, trafDataBaseOffset=${trafDataBaseOffset}, trunDataOffset=${trunInfo.dataOffset}, mdat=[${mdat.dataStart}, ${mdat.end}]`); } let sampleOffset = sampleBaseOffset; for (const rawSample of trunInfo.samples) { const duration = rawSample.duration || tfhdInfo.defaultSampleDuration || defaultInfo.defaultSampleDuration || 0; const size = rawSample.size || tfhdInfo.defaultSampleSize || defaultInfo.defaultSampleSize || 0; const pts = decodeTime + (rawSample.cts || 0); if (!size) { throw new RangeError(`sample size is empty: sampleOffset=${sampleOffset}`); } if (sampleOffset < mdat.dataStart || sampleOffset + size > mdat.end) { throw new RangeError(`sample out of mdat range: sampleOffset=${sampleOffset}, size=${size}, mdat=[${mdat.dataStart}, ${mdat.end}]`); } const sampleData = new Uint8Array(arrayBuffer, sampleOffset, size); const splitResult = splitNalusWithFallback(sampleData, initInfo.lengthSize || 4); if (!splitResult || !splitResult.units.length) { throw new RangeError(`HEVC sample contains no NAL units: sampleOffset=${sampleOffset}, size=${size}, lengthSize=${initInfo.lengthSize || 4}, trafDataBaseOffset=${trafDataBaseOffset}, trunDataOffset=${trunInfo.dataOffset}, bytes=${formatBytesPreview(sampleData)}`); } // Wasm 解码链路最终吃的是 HEVC NAL 单元,因此这里先把 fMP4 sample 拆回 NAL。 const units = splitResult.units; const keyframe = isHevcKeyframe(units); samples.push({ pts, ptsMs: initInfo.timescale ? Math.floor((pts / initInfo.timescale) * 1000) : pts, frameType: keyframe ? 1 : 2, keyframe, lengthSize: splitResult.lengthSize, units, data: sampleData }); sampleOffset += size; nextSampleOffset = sampleOffset; decodeTime += duration; } } } return samples; } catch (error) { if (error instanceof RangeError && !String(error.message).includes('topLevelTypes=')) { throw new RangeError(`${error.message}, topLevelTypes=${debugContext.topLevelTypes.join('/')}, moofRange=${debugContext.moofRange}, mdatRange=${debugContext.mdatRange}, trafIndex=${debugContext.trafIndex}, trafRange=${debugContext.trafRange}, trunIndex=${debugContext.trunIndex}, trunRange=${debugContext.trunRange}`); } throw error; } }