@jxstjh/jhvideo
Version:
HTML5 jhvideo base on MPEG2-TS Stream Player
613 lines (518 loc) • 21.6 kB
JavaScript
function readUint64(view, offset) {
if (offset + 8 > view.byteLength) {
throw new RangeError(`readUint64 out of range: offset=${offset}, length=${view.byteLength}`);
}
const high = view.getUint32(offset);
const low = view.getUint32(offset + 4);
return high * 4294967296 + low;
}
function readInt32(view, offset) {
if (offset + 4 > view.byteLength) {
throw new RangeError(`readInt32 out of range: offset=${offset}, length=${view.byteLength}`);
}
return view.getInt32(offset);
}
function readUint8(view, offset) {
if (offset + 1 > view.byteLength) {
throw new RangeError(`readUint8 out of range: offset=${offset}, length=${view.byteLength}`);
}
return view.getUint8(offset);
}
function readUint16(view, offset) {
if (offset + 2 > view.byteLength) {
throw new RangeError(`readUint16 out of range: offset=${offset}, length=${view.byteLength}`);
}
return view.getUint16(offset);
}
function readUint32(view, offset) {
if (offset + 4 > view.byteLength) {
throw new RangeError(`readUint32 out of range: offset=${offset}, length=${view.byteLength}`);
}
return view.getUint32(offset);
}
function readType(view, offset) {
if (offset + 4 > view.byteLength) {
throw new RangeError(`readType out of range: offset=${offset}, length=${view.byteLength}`);
}
return String.fromCharCode(
readUint8(view, offset),
readUint8(view, offset + 1),
readUint8(view, offset + 2),
readUint8(view, offset + 3)
);
}
function getBoxes(arrayBuffer, start = 0, end = arrayBuffer.byteLength) {
const boxes = [];
const view = new DataView(arrayBuffer);
let offset = start;
while (offset + 8 <= end) {
let size = readUint32(view, offset);
const type = readType(view, offset + 4);
let headerSize = 8;
if (size === 1) {
if (offset + 16 > end) {
break;
}
size = readUint64(view, offset + 8);
headerSize = 16;
} else if (size === 0) {
size = end - offset;
}
if (!size || offset + size > end) {
break;
}
boxes.push({
type,
start: offset,
size,
headerSize,
dataStart: offset + headerSize,
end: offset + size
});
offset += size;
}
return boxes;
}
function findChildBox(arrayBuffer, parentBox, type) {
return getBoxes(arrayBuffer, parentBox.dataStart, parentBox.end).find((box) => box.type === type) || null;
}
function findBoxesByType(arrayBuffer, parentBox, type) {
return getBoxes(arrayBuffer, parentBox.dataStart, parentBox.end).filter((box) => box.type === type);
}
function parseMdhd(arrayBuffer, mdhdBox) {
const view = new DataView(arrayBuffer, mdhdBox.dataStart, mdhdBox.end - mdhdBox.dataStart);
const version = readUint8(view, 0);
if (version === 1) {
return {
timescale: readUint32(view, 20),
duration: readUint64(view, 24)
};
}
return {
timescale: readUint32(view, 12),
duration: readUint32(view, 16)
};
}
function parseHdlr(arrayBuffer, hdlrBox) {
const view = new DataView(arrayBuffer, hdlrBox.dataStart, hdlrBox.end - hdlrBox.dataStart);
return readType(view, 8);
}
function parseTkhd(arrayBuffer, tkhdBox) {
const view = new DataView(arrayBuffer, tkhdBox.dataStart, tkhdBox.end - tkhdBox.dataStart);
const version = readUint8(view, 0);
const widthOffset = version === 1 ? 88 : 76;
const heightOffset = version === 1 ? 92 : 80;
if (heightOffset + 4 > view.byteLength) {
throw new RangeError(`tkhd box too short: length=${view.byteLength}`);
}
return {
width: readUint32(view, widthOffset) / 65536,
height: readUint32(view, heightOffset) / 65536
};
}
function parseHvcc(arrayBuffer, hvccBox) {
const view = new DataView(arrayBuffer, hvccBox.dataStart, hvccBox.end - hvccBox.dataStart);
if (view.byteLength < 23) {
throw new RangeError(`hvcC box too short: length=${view.byteLength}`);
}
const lengthSize = (readUint8(view, 21) & 0x03) + 1;
const numOfArrays = readUint8(view, 22);
let offset = 23;
const parameterSets = [];
for (let i = 0; i < numOfArrays; i++) {
if (offset + 3 > view.byteLength) {
break;
}
const numNalus = readUint16(view, offset + 1);
offset += 3;
for (let j = 0; j < numNalus; j++) {
if (offset + 2 > view.byteLength) {
break;
}
const naluLength = readUint16(view, offset);
offset += 2;
if (offset + naluLength > view.byteLength) {
break;
}
parameterSets.push(new Uint8Array(arrayBuffer.slice(
hvccBox.dataStart + offset,
hvccBox.dataStart + offset + naluLength
)));
offset += naluLength;
}
}
return { lengthSize, parameterSets };
}
function parseStsd(arrayBuffer, stsdBox) {
const view = new DataView(arrayBuffer, stsdBox.dataStart, stsdBox.end - stsdBox.dataStart);
if (view.byteLength < 8) {
throw new RangeError(`stsd box too short: length=${view.byteLength}`);
}
const entryCount = readUint32(view, 4);
let offset = 8;
for (let i = 0; i < entryCount; i++) {
if (offset + 8 > view.byteLength) {
throw new RangeError(`stsd entry out of range: offset=${offset}, length=${view.byteLength}`);
}
const size = readUint32(view, offset);
const type = readType(view, offset + 4);
const sampleEntryStart = stsdBox.dataStart + offset;
if (!size || sampleEntryStart + size > stsdBox.end) {
throw new RangeError(`invalid stsd entry size: size=${size}, entryStart=${sampleEntryStart}, stsdEnd=${stsdBox.end}`);
}
const sampleEntry = {
type,
start: sampleEntryStart,
size,
dataStart: sampleEntryStart + 8,
end: sampleEntryStart + size
};
const hvcc = findChildBox(arrayBuffer, sampleEntry, 'hvcC');
return {
codecType: type,
hvcc: hvcc ? parseHvcc(arrayBuffer, hvcc) : null
};
}
return null;
}
function parseTrex(arrayBuffer, trexBox) {
const view = new DataView(arrayBuffer, trexBox.dataStart, trexBox.end - trexBox.dataStart);
if (view.byteLength < 24) {
throw new RangeError(`trex box too short: length=${view.byteLength}`);
}
return {
trackId: readUint32(view, 4),
defaultSampleDuration: readUint32(view, 12),
defaultSampleSize: readUint32(view, 16),
defaultSampleFlags: readUint32(view, 20)
};
}
export function parseHevcInitSegment(arrayBuffer) {
const topBoxes = getBoxes(arrayBuffer);
const moov = topBoxes.find((box) => box.type === 'moov');
if (!moov) {
return null;
}
const mvex = findChildBox(arrayBuffer, moov, 'mvex');
const trexMap = new Map();
if (mvex) {
findBoxesByType(arrayBuffer, mvex, 'trex').forEach((trexBox) => {
const trex = parseTrex(arrayBuffer, trexBox);
trexMap.set(trex.trackId, trex);
});
}
const tracks = findBoxesByType(arrayBuffer, moov, 'trak');
for (const trak of tracks) {
const mdia = findChildBox(arrayBuffer, trak, 'mdia');
const tkhd = findChildBox(arrayBuffer, trak, 'tkhd');
if (!mdia) {
continue;
}
const hdlr = findChildBox(arrayBuffer, mdia, 'hdlr');
if (!hdlr || parseHdlr(arrayBuffer, hdlr) !== 'vide') {
continue;
}
const mdhd = findChildBox(arrayBuffer, mdia, 'mdhd');
const minf = findChildBox(arrayBuffer, mdia, 'minf');
const stbl = minf ? findChildBox(arrayBuffer, minf, 'stbl') : null;
const stsd = stbl ? findChildBox(arrayBuffer, stbl, 'stsd') : null;
const mdhdInfo = mdhd ? parseMdhd(arrayBuffer, mdhd) : { timescale: 1000, duration: 0 };
const stsdInfo = stsd ? parseStsd(arrayBuffer, stsd) : null;
const tkhdInfo = tkhd ? parseTkhd(arrayBuffer, tkhd) : { width: 0, height: 0 };
// 这里是 HLS/fMP4 判断 H265 的核心:
// stsd sample entry 如果是 hvc1 / hev1,就说明这条视频轨是 HEVC(H265)。
// 如果不是这两个类型,当前方法直接返回 null,让上层继续走普通 HLS 播放链路。
if (!stsdInfo || (stsdInfo.codecType !== 'hvc1' && stsdInfo.codecType !== 'hev1')) {
continue;
}
let trackId = null;
if (tkhd) {
const view = new DataView(arrayBuffer, tkhd.dataStart, tkhd.end - tkhd.dataStart);
const version = readUint8(view, 0);
if ((version === 1 && view.byteLength < 24) || (version !== 1 && view.byteLength < 16)) {
throw new RangeError(`tkhd box too short for trackId: length=${view.byteLength}`);
}
trackId = version === 1 ? readUint32(view, 20) : readUint32(view, 12);
}
return {
codecType: stsdInfo.codecType,
// 与现有 ws/flv 链路保持一致:12 表示 H265,后面 WasmPlayer 会按这个 codecId 打开解码器。
codecId: 12,
timescale: mdhdInfo.timescale || 1000,
duration: mdhdInfo.duration || 0,
trackId,
width: Math.round(tkhdInfo.width || 0),
height: Math.round(tkhdInfo.height || 0),
// hvcC 里带有 HEVC 参数集和长度字段,这些数据后面会被用来把 m4s 样本转换成 wasm 能吃的 Annex B 访问单元。
lengthSize: stsdInfo.hvcc ? stsdInfo.hvcc.lengthSize : 4,
parameterSets: stsdInfo.hvcc ? stsdInfo.hvcc.parameterSets : [],
trex: trexMap.get(trackId) || null
};
}
return null;
}
function parseTfhd(arrayBuffer, tfhdBox) {
const view = new DataView(arrayBuffer, tfhdBox.dataStart, tfhdBox.end - tfhdBox.dataStart);
if (view.byteLength < 8) {
throw new RangeError(`tfhd box too short: length=${view.byteLength}`);
}
const flags = (readUint8(view, 1) << 16) | (readUint8(view, 2) << 8) | readUint8(view, 3);
let offset = 4;
const trackId = readUint32(view, offset);
offset += 4;
let baseDataOffset = null;
let defaultSampleDuration = null;
let defaultSampleSize = null;
let defaultSampleFlags = null;
if (flags & 0x000001) {
baseDataOffset = readUint64(view, offset);
offset += 8;
}
if (flags & 0x000002) {
offset += 4;
}
if (flags & 0x000008) {
defaultSampleDuration = readUint32(view, offset);
offset += 4;
}
if (flags & 0x000010) {
defaultSampleSize = readUint32(view, offset);
offset += 4;
}
if (flags & 0x000020) {
defaultSampleFlags = readUint32(view, offset);
offset += 4;
}
return {
flags,
trackId,
baseDataOffset,
defaultSampleDuration,
defaultSampleSize,
defaultSampleFlags
};
}
function parseTfdt(arrayBuffer, tfdtBox) {
const view = new DataView(arrayBuffer, tfdtBox.dataStart, tfdtBox.end - tfdtBox.dataStart);
if (view.byteLength < 8) {
throw new RangeError(`tfdt box too short: length=${view.byteLength}`);
}
const version = readUint8(view, 0);
return version === 1 ? readUint64(view, 4) : readUint32(view, 4);
}
function parseTrun(arrayBuffer, trunBox) {
const view = new DataView(arrayBuffer, trunBox.dataStart, trunBox.end - trunBox.dataStart);
if (view.byteLength < 8) {
throw new RangeError(`trun box too short: length=${view.byteLength}`);
}
const version = readUint8(view, 0);
const flags = (readUint8(view, 1) << 16) | (readUint8(view, 2) << 8) | readUint8(view, 3);
const sampleCount = readUint32(view, 4);
let offset = 8;
let dataOffset = null;
let firstSampleFlags = null;
if (flags & 0x000001) {
dataOffset = readInt32(view, offset);
offset += 4;
}
if (flags & 0x000004) {
firstSampleFlags = readUint32(view, offset);
offset += 4;
}
const samples = [];
for (let i = 0; i < sampleCount; i++) {
const sample = {};
if (flags & 0x000100) {
sample.duration = readUint32(view, offset);
offset += 4;
}
if (flags & 0x000200) {
sample.size = readUint32(view, offset);
offset += 4;
}
if (flags & 0x000400) {
sample.flags = readUint32(view, offset);
offset += 4;
} else if (i === 0 && firstSampleFlags != null) {
sample.flags = firstSampleFlags;
}
if (flags & 0x000800) {
sample.cts = version === 1 ? readInt32(view, offset) : readUint32(view, offset);
offset += 4;
} else {
sample.cts = 0;
}
samples.push(sample);
}
if (offset > view.byteLength) {
throw new RangeError(`trun sample table out of range: offset=${offset}, length=${view.byteLength}, sampleCount=${sampleCount}, flags=${flags}`);
}
return { flags, dataOffset, samples };
}
function splitNalus(sampleData, lengthSize) {
const units = [];
const view = new DataView(sampleData.buffer, sampleData.byteOffset, sampleData.byteLength);
let offset = 0;
while (offset + lengthSize <= sampleData.byteLength) {
let naluSize = 0;
for (let i = 0; i < lengthSize; i++) {
naluSize = (naluSize << 8) | readUint8(view, offset + i);
}
offset += lengthSize;
if (!naluSize || offset + naluSize > sampleData.byteLength) {
break;
}
units.push(sampleData.subarray(offset, offset + naluSize));
offset += naluSize;
}
return {
units,
consumedBytes: offset
};
}
function splitNalusWithFallback(sampleData, preferredLengthSize) {
// 大多数流会和 init.mp4 里的 lengthSize 一致。
// 这里保留回退,是为了兼容部分实际线上流分片里长度头异常的情况。
const candidates = [preferredLengthSize, 4, 2, 1, 3].filter((value, index, list) => (
value >= 1 && value <= 4 && list.indexOf(value) === index
));
let bestResult = null;
for (const lengthSize of candidates) {
const result = splitNalus(sampleData, lengthSize);
if (!result.units.length) {
continue;
}
const trailingBytes = sampleData.byteLength - result.consumedBytes;
const score = result.units.length * 100000 - trailingBytes;
if (!bestResult || score > bestResult.score) {
bestResult = {
lengthSize,
units: result.units,
consumedBytes: result.consumedBytes,
trailingBytes,
score
};
}
if (trailingBytes === 0) {
break;
}
}
return bestResult;
}
function getHevcNaluType(unit) {
if (!unit || !unit.byteLength) {
return -1;
}
return (unit[0] >> 1) & 0x3f;
}
function isHevcKeyframe(units) {
return units.some((unit) => {
const type = getHevcNaluType(unit);
return type >= 16 && type <= 21;
});
}
function formatBytesPreview(data, maxBytes = 16) {
const limit = Math.min(maxBytes, data.byteLength);
const values = [];
for (let i = 0; i < limit; i++) {
values.push(data[i].toString(16).padStart(2, '0'));
}
return values.join(' ');
}
export function parseHevcMediaSegment(arrayBuffer, initInfo) {
let debugContext = {
topLevelTypes: [],
moofRange: null,
mdatRange: null,
trafIndex: -1,
trafRange: null,
trunIndex: -1,
trunRange: null
};
try {
const topBoxes = getBoxes(arrayBuffer);
debugContext.topLevelTypes = topBoxes.map((box) => box.type);
const moof = topBoxes.find((box) => box.type === 'moof');
const mdat = topBoxes.find((box) => box.type === 'mdat');
if (!moof || !mdat) {
return [];
}
debugContext.moofRange = [moof.start, moof.end];
debugContext.mdatRange = [mdat.dataStart, mdat.end];
const trafs = findBoxesByType(arrayBuffer, moof, 'traf');
const samples = [];
for (let trafIndex = 0; trafIndex < trafs.length; trafIndex++) {
const traf = trafs[trafIndex];
debugContext.trafIndex = trafIndex;
debugContext.trafRange = [traf.start, traf.end];
const tfhd = findChildBox(arrayBuffer, traf, 'tfhd');
const tfdt = findChildBox(arrayBuffer, traf, 'tfdt');
const truns = findBoxesByType(arrayBuffer, traf, 'trun');
if (!tfhd || !tfdt || !truns.length) {
continue;
}
const tfhdInfo = parseTfhd(arrayBuffer, tfhd);
if (initInfo.trackId && tfhdInfo.trackId !== initInfo.trackId) {
continue;
}
const defaultInfo = initInfo.trex || {};
let decodeTime = parseTfdt(arrayBuffer, tfdt);
let nextSampleOffset = null;
// traf 的样本数据基准偏移优先取 tfhd.baseDataOffset;
// 没带这个字段时,再按当前 moof 起点推导。
const trafDataBaseOffset = tfhdInfo.baseDataOffset != null ? tfhdInfo.baseDataOffset : moof.start;
for (let trunIndex = 0; trunIndex < truns.length; trunIndex++) {
const trunBox = truns[trunIndex];
debugContext.trunIndex = trunIndex;
debugContext.trunRange = [trunBox.start, trunBox.end];
const trunInfo = parseTrun(arrayBuffer, trunBox);
// trun.dataOffset 存在时,从 traf 基准偏移重新定位样本区;
// 否则沿用上一个 sample 结束位置,兼容一个 traf 里多个 trun 的情况。
const sampleBaseOffset = trunInfo.dataOffset != null
? trafDataBaseOffset + trunInfo.dataOffset
: (nextSampleOffset != null ? nextSampleOffset : mdat.dataStart);
if (sampleBaseOffset < mdat.dataStart || sampleBaseOffset > mdat.end) {
throw new RangeError(`sampleBaseOffset out of mdat range: sampleBaseOffset=${sampleBaseOffset}, trafDataBaseOffset=${trafDataBaseOffset}, trunDataOffset=${trunInfo.dataOffset}, mdat=[${mdat.dataStart}, ${mdat.end}]`);
}
let sampleOffset = sampleBaseOffset;
for (const rawSample of trunInfo.samples) {
const duration = rawSample.duration || tfhdInfo.defaultSampleDuration || defaultInfo.defaultSampleDuration || 0;
const size = rawSample.size || tfhdInfo.defaultSampleSize || defaultInfo.defaultSampleSize || 0;
const pts = decodeTime + (rawSample.cts || 0);
if (!size) {
throw new RangeError(`sample size is empty: sampleOffset=${sampleOffset}`);
}
if (sampleOffset < mdat.dataStart || sampleOffset + size > mdat.end) {
throw new RangeError(`sample out of mdat range: sampleOffset=${sampleOffset}, size=${size}, mdat=[${mdat.dataStart}, ${mdat.end}]`);
}
const sampleData = new Uint8Array(arrayBuffer, sampleOffset, size);
const splitResult = splitNalusWithFallback(sampleData, initInfo.lengthSize || 4);
if (!splitResult || !splitResult.units.length) {
throw new RangeError(`HEVC sample contains no NAL units: sampleOffset=${sampleOffset}, size=${size}, lengthSize=${initInfo.lengthSize || 4}, trafDataBaseOffset=${trafDataBaseOffset}, trunDataOffset=${trunInfo.dataOffset}, bytes=${formatBytesPreview(sampleData)}`);
}
// Wasm 解码链路最终吃的是 HEVC NAL 单元,因此这里先把 fMP4 sample 拆回 NAL。
const units = splitResult.units;
const keyframe = isHevcKeyframe(units);
samples.push({
pts,
ptsMs: initInfo.timescale ? Math.floor((pts / initInfo.timescale) * 1000) : pts,
frameType: keyframe ? 1 : 2,
keyframe,
lengthSize: splitResult.lengthSize,
units,
data: sampleData
});
sampleOffset += size;
nextSampleOffset = sampleOffset;
decodeTime += duration;
}
}
}
return samples;
} catch (error) {
if (error instanceof RangeError && !String(error.message).includes('topLevelTypes=')) {
throw new RangeError(`${error.message}, topLevelTypes=${debugContext.topLevelTypes.join('/')}, moofRange=${debugContext.moofRange}, mdatRange=${debugContext.mdatRange}, trafIndex=${debugContext.trafIndex}, trafRange=${debugContext.trafRange}, trunIndex=${debugContext.trunIndex}, trunRange=${debugContext.trunRange}`);
}
throw error;
}
}