jessibuca
Version:
a h5 live stream player
592 lines (547 loc) • 20.1 kB
text/typescript
interface MP4Types {
[key: string]: number[];
}
interface MP4Constants {
FTYP: Uint8Array;
STSD_PREFIX: Uint8Array;
STTS: Uint8Array;
STSC: Uint8Array;
STCO: Uint8Array;
STSZ: Uint8Array;
HDLR_VIDEO: Uint8Array;
HDLR_AUDIO: Uint8Array;
DREF: Uint8Array;
SMHD: Uint8Array;
VMHD: Uint8Array;
}
export interface MP4Meta {
id: number;
type: 'audio' | 'video';
timescale: number;
duration: number;
codecWidth: number;
codecHeight: number;
presentWidth: number;
presentHeight: number;
channelCount: number;
audioSampleRate: number;
config?: number[];
avcc: Uint8Array;
videoType?: string;
}
interface MP4Track {
id: number;
sequenceNumber: number;
duration: number;
size: number;
flags: {
isLeading: number;
dependsOn: number;
isDependedOn: number;
hasRedundancy: number;
isNonSync: number;
};
cts: number;
}
class MP4 {
static types: MP4Types = {
avc1: [], avcC: [], hvc1: [], hvcC: [], btrt: [],
dinf: [], dref: [], esds: [], ftyp: [], hdlr: [],
mdat: [], mdhd: [], mdia: [], mfhd: [], minf: [],
moof: [], moov: [], mp4a: [], mvex: [], mvhd: [],
sdtp: [], stbl: [], stco: [], stsc: [], stsd: [],
stsz: [], stts: [], tfdt: [], tfhd: [], traf: [],
trak: [], trun: [], trex: [], tkhd: [], vmhd: [],
smhd: []
}
static readonly constants: MP4Constants = {
FTYP: new Uint8Array([
0x69, 0x73, 0x6F, 0x6D, // major_brand: isom
0x0, 0x0, 0x0, 0x1, // minor_version: 0x01
0x69, 0x73, 0x6F, 0x6D, // isom
0x61, 0x76, 0x63, 0x31 // avc1
]),
STSD_PREFIX: new Uint8Array([
0x00, 0x00, 0x00, 0x00, // version(0) + flags
0x00, 0x00, 0x00, 0x01 // entry_count
]),
STTS: new Uint8Array([
0x00, 0x00, 0x00, 0x00, // version(0) + flags
0x00, 0x00, 0x00, 0x00 // entry_count
]),
STSC: new Uint8Array([
0x00, 0x00, 0x00, 0x00, // version(0) + flags
0x00, 0x00, 0x00, 0x00 // entry_count
]),
STCO: new Uint8Array([
0x00, 0x00, 0x00, 0x00, // version(0) + flags
0x00, 0x00, 0x00, 0x00 // entry_count
]),
STSZ: new Uint8Array([
0x00, 0x00, 0x00, 0x00, // version(0) + flags
0x00, 0x00, 0x00, 0x00, // sample_size
0x00, 0x00, 0x00, 0x00 // sample_count
]),
HDLR_VIDEO: new Uint8Array([
0x00, 0x00, 0x00, 0x00, // version(0) + flags
0x00, 0x00, 0x00, 0x00, // pre_defined
0x76, 0x69, 0x64, 0x65, // handler_type: 'vide'
0x00, 0x00, 0x00, 0x00, // reserved: 3 * 4 bytes
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
0x56, 0x69, 0x64, 0x65,
0x6F, 0x48, 0x61, 0x6E,
0x64, 0x6C, 0x65, 0x72, 0x00 // name: VideoHandler
]),
HDLR_AUDIO: new Uint8Array([
0x00, 0x00, 0x00, 0x00, // version(0) + flags
0x00, 0x00, 0x00, 0x00, // pre_defined
0x73, 0x6F, 0x75, 0x6E, // handler_type: 'soun'
0x00, 0x00, 0x00, 0x00, // reserved: 3 * 4 bytes
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
0x53, 0x6F, 0x75, 0x6E,
0x64, 0x48, 0x61, 0x6E,
0x64, 0x6C, 0x65, 0x72, 0x00 // name: SoundHandler
]),
DREF: new Uint8Array([
0x00, 0x00, 0x00, 0x00, // version(0) + flags
0x00, 0x00, 0x00, 0x01, // entry_count
0x00, 0x00, 0x00, 0x0C, // entry_size
0x75, 0x72, 0x6C, 0x20, // type 'url '
0x00, 0x00, 0x00, 0x01 // version(0) + flags
]),
SMHD: new Uint8Array([
0x00, 0x00, 0x00, 0x00, // version(0) + flags
0x00, 0x00, 0x00, 0x00 // balance(2) + reserved(2)
]),
VMHD: new Uint8Array([
0x00, 0x00, 0x00, 0x01, // version(0) + flags
0x00, 0x00, // graphicsmode: 2 bytes
0x00, 0x00, 0x00, 0x00, // opcolor: 3 * 2 bytes
0x00, 0x00
]),
} as MP4Constants;
// Generate a box
static box(type: number[], ...datas: Uint8Array[]): Uint8Array {
let size = 8;
let arrayCount = datas.length;
for (let i = 0; i < arrayCount; i++) {
size += datas[i].byteLength;
}
let result = new Uint8Array(size);
result[0] = (size >>> 24) & 0xFF; // size
result[1] = (size >>> 16) & 0xFF;
result[2] = (size >>> 8) & 0xFF;
result[3] = (size) & 0xFF;
result.set(type, 4); // type
let offset = 8;
for (let i = 0; i < arrayCount; i++) { // data body
result.set(datas[i], offset);
offset += datas[i].byteLength;
}
return result;
}
// emit ftyp & moov
static generateInitSegment(meta: MP4Meta): Uint8Array {
let ftyp = MP4.box(MP4.types.ftyp, MP4.constants.FTYP);
let moov = MP4.moov(meta);
let result = new Uint8Array(ftyp.byteLength + moov.byteLength);
result.set(ftyp, 0);
result.set(moov, ftyp.byteLength);
return result;
}
// Movie metadata box
static moov(meta: MP4Meta): Uint8Array {
let mvhd = MP4.mvhd(meta.timescale, meta.duration);
let trak = MP4.trak(meta);
let mvex = MP4.mvex(meta);
return MP4.box(MP4.types.moov, mvhd, trak, mvex);
}
// Movie header box
static mvhd(timescale: number, duration: number): Uint8Array {
return MP4.box(MP4.types.mvhd, new Uint8Array([
0x00, 0x00, 0x00, 0x00, // version(0) + flags
0x00, 0x00, 0x00, 0x00, // creation_time
0x00, 0x00, 0x00, 0x00, // modification_time
(timescale >>> 24) & 0xFF, // timescale: 4 bytes
(timescale >>> 16) & 0xFF,
(timescale >>> 8) & 0xFF,
(timescale) & 0xFF,
(duration >>> 24) & 0xFF, // duration: 4 bytes
(duration >>> 16) & 0xFF,
(duration >>> 8) & 0xFF,
(duration) & 0xFF,
0x00, 0x01, 0x00, 0x00, // Preferred rate: 1.0
0x01, 0x00, 0x00, 0x00, // PreferredVolume(1.0, 2bytes) + reserved(2bytes)
0x00, 0x00, 0x00, 0x00, // reserved: 4 + 4 bytes
0x00, 0x00, 0x00, 0x00,
0x00, 0x01, 0x00, 0x00, // ----begin composition matrix----
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
0x00, 0x01, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
0x40, 0x00, 0x00, 0x00, // ----end composition matrix----
0x00, 0x00, 0x00, 0x00, // ----begin pre_defined 6 * 4 bytes----
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, // ----end pre_defined 6 * 4 bytes----
0xFF, 0xFF, 0xFF, 0xFF // next_track_ID
]));
}
// Track box
static trak(meta: MP4Meta): Uint8Array {
return MP4.box(MP4.types.trak, MP4.tkhd(meta), MP4.mdia(meta));
}
// Track header box
static tkhd(meta: MP4Meta): Uint8Array {
let trackId = meta.id, duration = meta.duration;
let width = meta.presentWidth, height = meta.presentHeight;
return MP4.box(MP4.types.tkhd, new Uint8Array([
0x00, 0x00, 0x00, 0x07, // version(0) + flags
0x00, 0x00, 0x00, 0x00, // creation_time
0x00, 0x00, 0x00, 0x00, // modification_time
(trackId >>> 24) & 0xFF, // track_ID: 4 bytes
(trackId >>> 16) & 0xFF,
(trackId >>> 8) & 0xFF,
(trackId) & 0xFF,
0x00, 0x00, 0x00, 0x00, // reserved: 4 bytes
(duration >>> 24) & 0xFF, // duration: 4 bytes
(duration >>> 16) & 0xFF,
(duration >>> 8) & 0xFF,
(duration) & 0xFF,
0x00, 0x00, 0x00, 0x00, // reserved: 2 * 4 bytes
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, // layer(2bytes) + alternate_group(2bytes)
0x00, 0x00, 0x00, 0x00, // volume(2bytes) + reserved(2bytes)
0x00, 0x01, 0x00, 0x00, // ----begin composition matrix----
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
0x00, 0x01, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
0x40, 0x00, 0x00, 0x00, // ----end composition matrix----
(width >>> 8) & 0xFF, // width and height
(width) & 0xFF,
0x00, 0x00,
(height >>> 8) & 0xFF,
(height) & 0xFF,
0x00, 0x00
]));
}
static mdia(meta: MP4Meta): Uint8Array {
return MP4.box(MP4.types.mdia, MP4.mdhd(meta), MP4.hdlr(meta), MP4.minf(meta))
}
// Media header box
static mdhd(meta: MP4Meta): Uint8Array {
let timescale = meta.timescale;
let duration = meta.duration;
return MP4.box(MP4.types.mdhd, new Uint8Array([
0x00, 0x00, 0x00, 0x00, // version(0) + flags
0x00, 0x00, 0x00, 0x00, // creation_time
0x00, 0x00, 0x00, 0x00, // modification_time
(timescale >>> 24) & 0xFF, // timescale: 4 bytes
(timescale >>> 16) & 0xFF,
(timescale >>> 8) & 0xFF,
(timescale) & 0xFF,
(duration >>> 24) & 0xFF, // duration: 4 bytes
(duration >>> 16) & 0xFF,
(duration >>> 8) & 0xFF,
(duration) & 0xFF,
0x55, 0xC4, // language: und (undetermined)
0x00, 0x00 // pre_defined = 0
]));
}
// Media handler reference box
static hdlr(meta: MP4Meta): Uint8Array {
let data = null;
if (meta.type === 'audio') {
data = MP4.constants.HDLR_AUDIO;
} else {
data = MP4.constants.HDLR_VIDEO;
}
return MP4.box(MP4.types.hdlr, data);
}
// Media infomation box
static minf(meta: MP4Meta): Uint8Array {
let xmhd = null;
if (meta.type === 'audio') {
xmhd = MP4.box(MP4.types.smhd, MP4.constants.SMHD);
} else {
xmhd = MP4.box(MP4.types.vmhd, MP4.constants.VMHD);
}
return MP4.box(MP4.types.minf, xmhd, MP4.dinf(), MP4.stbl(meta));
}
// Data infomation box
static dinf(): Uint8Array {
let result = MP4.box(MP4.types.dinf,
MP4.box(MP4.types.dref, MP4.constants.DREF)
);
return result;
}
// Sample table box
static stbl(meta: MP4Meta): Uint8Array {
let result = MP4.box(MP4.types.stbl, // type: stbl
MP4.stsd(meta), // Sample Description Table
MP4.box(MP4.types.stts, MP4.constants.STTS), // Time-To-Sample
MP4.box(MP4.types.stsc, MP4.constants.STSC), // Sample-To-Chunk
MP4.box(MP4.types.stsz, MP4.constants.STSZ), // Sample size
MP4.box(MP4.types.stco, MP4.constants.STCO) // Chunk offset
);
return result;
}
// Sample description box
static stsd(meta: MP4Meta): Uint8Array {
if (meta.type === 'audio') {
// else: aac -> mp4a
return MP4.box(MP4.types.stsd, MP4.constants.STSD_PREFIX, MP4.mp4a(meta));
} else {
if (meta.videoType === 'avc') {
//
return MP4.box(MP4.types.stsd, MP4.constants.STSD_PREFIX, MP4.avc1(meta));
} else {
//
return MP4.box(MP4.types.stsd, MP4.constants.STSD_PREFIX, MP4.hvc1(meta))
}
}
}
static mp4a(meta: MP4Meta): Uint8Array {
let channelCount = meta.channelCount;
let sampleRate = meta.audioSampleRate;
let data = new Uint8Array([
0x00, 0x00, 0x00, 0x00, // reserved(4)
0x00, 0x00, 0x00, 0x01, // reserved(2) + data_reference_index(2)
0x00, 0x00, 0x00, 0x00, // reserved: 2 * 4 bytes
0x00, 0x00, 0x00, 0x00,
0x00, channelCount, // channelCount(2)
0x00, 0x10, // sampleSize(2)
0x00, 0x00, 0x00, 0x00, // reserved(4)
(sampleRate >>> 8) & 0xFF, // Audio sample rate
(sampleRate) & 0xFF,
0x00, 0x00
]);
return MP4.box(MP4.types.mp4a, data, MP4.esds(meta));
}
static esds(meta: MP4Meta): Uint8Array {
let config = meta.config || [];
let configSize = config.length;
let data = new Uint8Array([
0x00, 0x00, 0x00, 0x00, // version 0 + flags
0x03, // descriptor_type
0x17 + configSize, // length3
0x00, 0x01, // es_id
0x00, // stream_priority
0x04, // descriptor_type
0x0F + configSize, // length
0x40, // codec: mpeg4_audio
0x15, // stream_type: Audio
0x00, 0x00, 0x00, // buffer_size
0x00, 0x00, 0x00, 0x00, // maxBitrate
0x00, 0x00, 0x00, 0x00, // avgBitrate
0x05 // descriptor_type
].concat([
configSize
]).concat(
config
).concat([
0x06, 0x01, 0x02 // GASpecificConfig
]));
return MP4.box(MP4.types.esds, data);
}
// avc
static avc1(meta: MP4Meta): Uint8Array {
let avcc = meta.avcc;
const width = meta.codecWidth;
const height = meta.codecHeight;
let data = new Uint8Array([
0, 0, 0, 0,
0, 0, 0, 1,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
width >>> 8 & 255,
width & 255,
height >>> 8 & 255,
height & 255,
0, 72, 0, 0,
0, 72, 0, 0,
0, 0, 0, 0,
0, 1,
0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0,
0, 24,
255, 255]
);
return MP4.box(MP4.types.avc1, data, MP4.box(MP4.types.avcC, avcc))
}
// hvc
static hvc1(meta: MP4Meta): Uint8Array {
let avcc = meta.avcc;
const width = meta.codecWidth;
const height = meta.codecHeight;
let data = new Uint8Array([
0, 0, 0, 0,
0, 0, 0, 1,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
width >>> 8 & 255,
width & 255,
height >>> 8 & 255,
height & 255,
0, 72, 0, 0,
0, 72, 0, 0,
0, 0, 0, 0,
0, 1,
0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0,
0, 24,
255, 255
]);
return MP4.box(MP4.types.hvc1, data, MP4.box(MP4.types.hvcC, avcc))
}
// Movie Extends box
static mvex(meta: MP4Meta): Uint8Array {
return MP4.box(MP4.types.mvex, MP4.trex(meta))
}
// Track Extends box
static trex(meta: MP4Meta): Uint8Array {
let trackId = meta.id;
let data = new Uint8Array([
0x00, 0x00, 0x00, 0x00, // version(0) + flags
(trackId >>> 24) & 0xFF, // track_ID
(trackId >>> 16) & 0xFF,
(trackId >>> 8) & 0xFF,
(trackId) & 0xFF,
0x00, 0x00, 0x00, 0x01, // default_sample_description_index
0x00, 0x00, 0x00, 0x00, // default_sample_duration
0x00, 0x00, 0x00, 0x00, // default_sample_size
0x00, 0x01, 0x00, 0x01 // default_sample_flags
]);
return MP4.box(MP4.types.trex, data);
}
// Movie fragment box
static moof(track: MP4Track, baseMediaDecodeTime: number): Uint8Array {
return MP4.box(MP4.types.moof, MP4.mfhd(track.sequenceNumber), MP4.traf(track, baseMediaDecodeTime))
}
//
static mfhd(sequenceNumber: number): Uint8Array {
let data = new Uint8Array([
0x00, 0x00, 0x00, 0x00,
(sequenceNumber >>> 24) & 0xFF, // sequence_number: int32
(sequenceNumber >>> 16) & 0xFF,
(sequenceNumber >>> 8) & 0xFF,
(sequenceNumber) & 0xFF
]);
return MP4.box(MP4.types.mfhd, data);
}
// Track fragment box
static traf(track: MP4Track, baseMediaDecodeTime: number): Uint8Array {
let trackId = track.id;
// Track fragment header box
let tfhd = MP4.box(MP4.types.tfhd, new Uint8Array([
0x00, 0x00, 0x00, 0x00, // version(0) & flags
(trackId >>> 24) & 0xFF, // track_ID
(trackId >>> 16) & 0xFF,
(trackId >>> 8) & 0xFF,
(trackId) & 0xFF
]));
// Track Fragment Decode Time
let tfdt = MP4.box(MP4.types.tfdt, new Uint8Array([
0x00, 0x00, 0x00, 0x00, // version(0) & flags
(baseMediaDecodeTime >>> 24) & 0xFF, // baseMediaDecodeTime: int32
(baseMediaDecodeTime >>> 16) & 0xFF,
(baseMediaDecodeTime >>> 8) & 0xFF,
(baseMediaDecodeTime) & 0xFF
]));
let sdtp = MP4.sdtp(track);
let trun = MP4.trun(track, sdtp.byteLength + 16 + 16 + 8 + 16 + 8 + 8);
return MP4.box(MP4.types.traf, tfhd, tfdt, trun, sdtp);
}
// Sample Dependency Type box
static sdtp(track: MP4Track): Uint8Array {
let data = new Uint8Array(4 + 1);
let flags = track.flags;
data[4] = flags.isLeading << 6
| flags.dependsOn << 4
| flags.isDependedOn << 2
| flags.hasRedundancy;
return MP4.box(MP4.types.sdtp, data);
}
// trun
static trun(track: MP4Track, offset: number): Uint8Array {
let dataSize = 12 + 16;
let data = new Uint8Array(dataSize);
offset += 8 + dataSize;
data.set([
0x00, 0x00, 0x0F, 0x01, // version(0) & flags
0x00, 0x00, 0x00, 0x01, // sample_count
(offset >>> 24) & 0xFF, // data_offset
(offset >>> 16) & 0xFF,
(offset >>> 8) & 0xFF,
(offset) & 0xFF
], 0);
let duration = track.duration;
let size = track.size;
let flags = track.flags;
let cts = track.cts;
data.set([
(duration >>> 24) & 0xFF, // sample_duration
(duration >>> 16) & 0xFF,
(duration >>> 8) & 0xFF,
(duration) & 0xFF,
(size >>> 24) & 0xFF, // sample_size
(size >>> 16) & 0xFF,
(size >>> 8) & 0xFF,
(size) & 0xFF,
(flags.isLeading << 2) | flags.dependsOn, // sample_flags
(flags.isDependedOn << 6) | (flags.hasRedundancy << 4) | flags.isNonSync,
0x00, 0x00, // sample_degradation_priority
(cts >>> 24) & 0xFF, // sample_composition_time_offset
(cts >>> 16) & 0xFF,
(cts >>> 8) & 0xFF,
(cts) & 0xFF
], 12);
return MP4.box(MP4.types.trun, data);
}
// mdat
static mdat(data: Uint8Array): Uint8Array {
return MP4.box(MP4.types.mdat, data)
}
}
for (let name in MP4.types) {
if (MP4.types.hasOwnProperty(name)) {
MP4.types[name] = [
name.charCodeAt(0),
name.charCodeAt(1),
name.charCodeAt(2),
name.charCodeAt(3)
];
}
}
MP4.constants.STSC = MP4.constants.STTS;
MP4.constants.STCO = MP4.constants.STTS;
export default MP4;