UNPKG

jessibuca

Version:
592 lines (547 loc) 20.1 kB
interface MP4Types { [key: string]: number[]; } interface MP4Constants { FTYP: Uint8Array; STSD_PREFIX: Uint8Array; STTS: Uint8Array; STSC: Uint8Array; STCO: Uint8Array; STSZ: Uint8Array; HDLR_VIDEO: Uint8Array; HDLR_AUDIO: Uint8Array; DREF: Uint8Array; SMHD: Uint8Array; VMHD: Uint8Array; } export interface MP4Meta { id: number; type: 'audio' | 'video'; timescale: number; duration: number; codecWidth: number; codecHeight: number; presentWidth: number; presentHeight: number; channelCount: number; audioSampleRate: number; config?: number[]; avcc: Uint8Array; videoType?: string; } interface MP4Track { id: number; sequenceNumber: number; duration: number; size: number; flags: { isLeading: number; dependsOn: number; isDependedOn: number; hasRedundancy: number; isNonSync: number; }; cts: number; } class MP4 { static types: MP4Types = { avc1: [], avcC: [], hvc1: [], hvcC: [], btrt: [], dinf: [], dref: [], esds: [], ftyp: [], hdlr: [], mdat: [], mdhd: [], mdia: [], mfhd: [], minf: [], moof: [], moov: [], mp4a: [], mvex: [], mvhd: [], sdtp: [], stbl: [], stco: [], stsc: [], stsd: [], stsz: [], stts: [], tfdt: [], tfhd: [], traf: [], trak: [], trun: [], trex: [], tkhd: [], vmhd: [], smhd: [] } static readonly constants: MP4Constants = { FTYP: new Uint8Array([ 0x69, 0x73, 0x6F, 0x6D, // major_brand: isom 0x0, 0x0, 0x0, 0x1, // minor_version: 0x01 0x69, 0x73, 0x6F, 0x6D, // isom 0x61, 0x76, 0x63, 0x31 // avc1 ]), STSD_PREFIX: new Uint8Array([ 0x00, 0x00, 0x00, 0x00, // version(0) + flags 0x00, 0x00, 0x00, 0x01 // entry_count ]), STTS: new Uint8Array([ 0x00, 0x00, 0x00, 0x00, // version(0) + flags 0x00, 0x00, 0x00, 0x00 // entry_count ]), STSC: new Uint8Array([ 0x00, 0x00, 0x00, 0x00, // version(0) + flags 0x00, 0x00, 0x00, 0x00 // entry_count ]), STCO: new Uint8Array([ 0x00, 0x00, 0x00, 0x00, // version(0) + flags 0x00, 0x00, 0x00, 0x00 // entry_count ]), STSZ: new Uint8Array([ 0x00, 0x00, 0x00, 0x00, // version(0) + flags 0x00, 0x00, 0x00, 0x00, // sample_size 0x00, 0x00, 0x00, 0x00 // sample_count ]), HDLR_VIDEO: new Uint8Array([ 0x00, 0x00, 0x00, 0x00, // version(0) + flags 0x00, 0x00, 0x00, 0x00, // pre_defined 0x76, 0x69, 0x64, 0x65, // handler_type: 'vide' 0x00, 0x00, 0x00, 0x00, // reserved: 3 * 4 bytes 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x56, 0x69, 0x64, 0x65, 0x6F, 0x48, 0x61, 0x6E, 0x64, 0x6C, 0x65, 0x72, 0x00 // name: VideoHandler ]), HDLR_AUDIO: new Uint8Array([ 0x00, 0x00, 0x00, 0x00, // version(0) + flags 0x00, 0x00, 0x00, 0x00, // pre_defined 0x73, 0x6F, 0x75, 0x6E, // handler_type: 'soun' 0x00, 0x00, 0x00, 0x00, // reserved: 3 * 4 bytes 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x53, 0x6F, 0x75, 0x6E, 0x64, 0x48, 0x61, 0x6E, 0x64, 0x6C, 0x65, 0x72, 0x00 // name: SoundHandler ]), DREF: new Uint8Array([ 0x00, 0x00, 0x00, 0x00, // version(0) + flags 0x00, 0x00, 0x00, 0x01, // entry_count 0x00, 0x00, 0x00, 0x0C, // entry_size 0x75, 0x72, 0x6C, 0x20, // type 'url ' 0x00, 0x00, 0x00, 0x01 // version(0) + flags ]), SMHD: new Uint8Array([ 0x00, 0x00, 0x00, 0x00, // version(0) + flags 0x00, 0x00, 0x00, 0x00 // balance(2) + reserved(2) ]), VMHD: new Uint8Array([ 0x00, 0x00, 0x00, 0x01, // version(0) + flags 0x00, 0x00, // graphicsmode: 2 bytes 0x00, 0x00, 0x00, 0x00, // opcolor: 3 * 2 bytes 0x00, 0x00 ]), } as MP4Constants; // Generate a box static box(type: number[], ...datas: Uint8Array[]): Uint8Array { let size = 8; let arrayCount = datas.length; for (let i = 0; i < arrayCount; i++) { size += datas[i].byteLength; } let result = new Uint8Array(size); result[0] = (size >>> 24) & 0xFF; // size result[1] = (size >>> 16) & 0xFF; result[2] = (size >>> 8) & 0xFF; result[3] = (size) & 0xFF; result.set(type, 4); // type let offset = 8; for (let i = 0; i < arrayCount; i++) { // data body result.set(datas[i], offset); offset += datas[i].byteLength; } return result; } // emit ftyp & moov static generateInitSegment(meta: MP4Meta): Uint8Array { let ftyp = MP4.box(MP4.types.ftyp, MP4.constants.FTYP); let moov = MP4.moov(meta); let result = new Uint8Array(ftyp.byteLength + moov.byteLength); result.set(ftyp, 0); result.set(moov, ftyp.byteLength); return result; } // Movie metadata box static moov(meta: MP4Meta): Uint8Array { let mvhd = MP4.mvhd(meta.timescale, meta.duration); let trak = MP4.trak(meta); let mvex = MP4.mvex(meta); return MP4.box(MP4.types.moov, mvhd, trak, mvex); } // Movie header box static mvhd(timescale: number, duration: number): Uint8Array { return MP4.box(MP4.types.mvhd, new Uint8Array([ 0x00, 0x00, 0x00, 0x00, // version(0) + flags 0x00, 0x00, 0x00, 0x00, // creation_time 0x00, 0x00, 0x00, 0x00, // modification_time (timescale >>> 24) & 0xFF, // timescale: 4 bytes (timescale >>> 16) & 0xFF, (timescale >>> 8) & 0xFF, (timescale) & 0xFF, (duration >>> 24) & 0xFF, // duration: 4 bytes (duration >>> 16) & 0xFF, (duration >>> 8) & 0xFF, (duration) & 0xFF, 0x00, 0x01, 0x00, 0x00, // Preferred rate: 1.0 0x01, 0x00, 0x00, 0x00, // PreferredVolume(1.0, 2bytes) + reserved(2bytes) 0x00, 0x00, 0x00, 0x00, // reserved: 4 + 4 bytes 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, // ----begin composition matrix---- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, // ----end composition matrix---- 0x00, 0x00, 0x00, 0x00, // ----begin pre_defined 6 * 4 bytes---- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // ----end pre_defined 6 * 4 bytes---- 0xFF, 0xFF, 0xFF, 0xFF // next_track_ID ])); } // Track box static trak(meta: MP4Meta): Uint8Array { return MP4.box(MP4.types.trak, MP4.tkhd(meta), MP4.mdia(meta)); } // Track header box static tkhd(meta: MP4Meta): Uint8Array { let trackId = meta.id, duration = meta.duration; let width = meta.presentWidth, height = meta.presentHeight; return MP4.box(MP4.types.tkhd, new Uint8Array([ 0x00, 0x00, 0x00, 0x07, // version(0) + flags 0x00, 0x00, 0x00, 0x00, // creation_time 0x00, 0x00, 0x00, 0x00, // modification_time (trackId >>> 24) & 0xFF, // track_ID: 4 bytes (trackId >>> 16) & 0xFF, (trackId >>> 8) & 0xFF, (trackId) & 0xFF, 0x00, 0x00, 0x00, 0x00, // reserved: 4 bytes (duration >>> 24) & 0xFF, // duration: 4 bytes (duration >>> 16) & 0xFF, (duration >>> 8) & 0xFF, (duration) & 0xFF, 0x00, 0x00, 0x00, 0x00, // reserved: 2 * 4 bytes 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // layer(2bytes) + alternate_group(2bytes) 0x00, 0x00, 0x00, 0x00, // volume(2bytes) + reserved(2bytes) 0x00, 0x01, 0x00, 0x00, // ----begin composition matrix---- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, // ----end composition matrix---- (width >>> 8) & 0xFF, // width and height (width) & 0xFF, 0x00, 0x00, (height >>> 8) & 0xFF, (height) & 0xFF, 0x00, 0x00 ])); } static mdia(meta: MP4Meta): Uint8Array { return MP4.box(MP4.types.mdia, MP4.mdhd(meta), MP4.hdlr(meta), MP4.minf(meta)) } // Media header box static mdhd(meta: MP4Meta): Uint8Array { let timescale = meta.timescale; let duration = meta.duration; return MP4.box(MP4.types.mdhd, new Uint8Array([ 0x00, 0x00, 0x00, 0x00, // version(0) + flags 0x00, 0x00, 0x00, 0x00, // creation_time 0x00, 0x00, 0x00, 0x00, // modification_time (timescale >>> 24) & 0xFF, // timescale: 4 bytes (timescale >>> 16) & 0xFF, (timescale >>> 8) & 0xFF, (timescale) & 0xFF, (duration >>> 24) & 0xFF, // duration: 4 bytes (duration >>> 16) & 0xFF, (duration >>> 8) & 0xFF, (duration) & 0xFF, 0x55, 0xC4, // language: und (undetermined) 0x00, 0x00 // pre_defined = 0 ])); } // Media handler reference box static hdlr(meta: MP4Meta): Uint8Array { let data = null; if (meta.type === 'audio') { data = MP4.constants.HDLR_AUDIO; } else { data = MP4.constants.HDLR_VIDEO; } return MP4.box(MP4.types.hdlr, data); } // Media infomation box static minf(meta: MP4Meta): Uint8Array { let xmhd = null; if (meta.type === 'audio') { xmhd = MP4.box(MP4.types.smhd, MP4.constants.SMHD); } else { xmhd = MP4.box(MP4.types.vmhd, MP4.constants.VMHD); } return MP4.box(MP4.types.minf, xmhd, MP4.dinf(), MP4.stbl(meta)); } // Data infomation box static dinf(): Uint8Array { let result = MP4.box(MP4.types.dinf, MP4.box(MP4.types.dref, MP4.constants.DREF) ); return result; } // Sample table box static stbl(meta: MP4Meta): Uint8Array { let result = MP4.box(MP4.types.stbl, // type: stbl MP4.stsd(meta), // Sample Description Table MP4.box(MP4.types.stts, MP4.constants.STTS), // Time-To-Sample MP4.box(MP4.types.stsc, MP4.constants.STSC), // Sample-To-Chunk MP4.box(MP4.types.stsz, MP4.constants.STSZ), // Sample size MP4.box(MP4.types.stco, MP4.constants.STCO) // Chunk offset ); return result; } // Sample description box static stsd(meta: MP4Meta): Uint8Array { if (meta.type === 'audio') { // else: aac -> mp4a return MP4.box(MP4.types.stsd, MP4.constants.STSD_PREFIX, MP4.mp4a(meta)); } else { if (meta.videoType === 'avc') { // return MP4.box(MP4.types.stsd, MP4.constants.STSD_PREFIX, MP4.avc1(meta)); } else { // return MP4.box(MP4.types.stsd, MP4.constants.STSD_PREFIX, MP4.hvc1(meta)) } } } static mp4a(meta: MP4Meta): Uint8Array { let channelCount = meta.channelCount; let sampleRate = meta.audioSampleRate; let data = new Uint8Array([ 0x00, 0x00, 0x00, 0x00, // reserved(4) 0x00, 0x00, 0x00, 0x01, // reserved(2) + data_reference_index(2) 0x00, 0x00, 0x00, 0x00, // reserved: 2 * 4 bytes 0x00, 0x00, 0x00, 0x00, 0x00, channelCount, // channelCount(2) 0x00, 0x10, // sampleSize(2) 0x00, 0x00, 0x00, 0x00, // reserved(4) (sampleRate >>> 8) & 0xFF, // Audio sample rate (sampleRate) & 0xFF, 0x00, 0x00 ]); return MP4.box(MP4.types.mp4a, data, MP4.esds(meta)); } static esds(meta: MP4Meta): Uint8Array { let config = meta.config || []; let configSize = config.length; let data = new Uint8Array([ 0x00, 0x00, 0x00, 0x00, // version 0 + flags 0x03, // descriptor_type 0x17 + configSize, // length3 0x00, 0x01, // es_id 0x00, // stream_priority 0x04, // descriptor_type 0x0F + configSize, // length 0x40, // codec: mpeg4_audio 0x15, // stream_type: Audio 0x00, 0x00, 0x00, // buffer_size 0x00, 0x00, 0x00, 0x00, // maxBitrate 0x00, 0x00, 0x00, 0x00, // avgBitrate 0x05 // descriptor_type ].concat([ configSize ]).concat( config ).concat([ 0x06, 0x01, 0x02 // GASpecificConfig ])); return MP4.box(MP4.types.esds, data); } // avc static avc1(meta: MP4Meta): Uint8Array { let avcc = meta.avcc; const width = meta.codecWidth; const height = meta.codecHeight; let data = new Uint8Array([ 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, width >>> 8 & 255, width & 255, height >>> 8 & 255, height & 255, 0, 72, 0, 0, 0, 72, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 255, 255] ); return MP4.box(MP4.types.avc1, data, MP4.box(MP4.types.avcC, avcc)) } // hvc static hvc1(meta: MP4Meta): Uint8Array { let avcc = meta.avcc; const width = meta.codecWidth; const height = meta.codecHeight; let data = new Uint8Array([ 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, width >>> 8 & 255, width & 255, height >>> 8 & 255, height & 255, 0, 72, 0, 0, 0, 72, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 255, 255 ]); return MP4.box(MP4.types.hvc1, data, MP4.box(MP4.types.hvcC, avcc)) } // Movie Extends box static mvex(meta: MP4Meta): Uint8Array { return MP4.box(MP4.types.mvex, MP4.trex(meta)) } // Track Extends box static trex(meta: MP4Meta): Uint8Array { let trackId = meta.id; let data = new Uint8Array([ 0x00, 0x00, 0x00, 0x00, // version(0) + flags (trackId >>> 24) & 0xFF, // track_ID (trackId >>> 16) & 0xFF, (trackId >>> 8) & 0xFF, (trackId) & 0xFF, 0x00, 0x00, 0x00, 0x01, // default_sample_description_index 0x00, 0x00, 0x00, 0x00, // default_sample_duration 0x00, 0x00, 0x00, 0x00, // default_sample_size 0x00, 0x01, 0x00, 0x01 // default_sample_flags ]); return MP4.box(MP4.types.trex, data); } // Movie fragment box static moof(track: MP4Track, baseMediaDecodeTime: number): Uint8Array { return MP4.box(MP4.types.moof, MP4.mfhd(track.sequenceNumber), MP4.traf(track, baseMediaDecodeTime)) } // static mfhd(sequenceNumber: number): Uint8Array { let data = new Uint8Array([ 0x00, 0x00, 0x00, 0x00, (sequenceNumber >>> 24) & 0xFF, // sequence_number: int32 (sequenceNumber >>> 16) & 0xFF, (sequenceNumber >>> 8) & 0xFF, (sequenceNumber) & 0xFF ]); return MP4.box(MP4.types.mfhd, data); } // Track fragment box static traf(track: MP4Track, baseMediaDecodeTime: number): Uint8Array { let trackId = track.id; // Track fragment header box let tfhd = MP4.box(MP4.types.tfhd, new Uint8Array([ 0x00, 0x00, 0x00, 0x00, // version(0) & flags (trackId >>> 24) & 0xFF, // track_ID (trackId >>> 16) & 0xFF, (trackId >>> 8) & 0xFF, (trackId) & 0xFF ])); // Track Fragment Decode Time let tfdt = MP4.box(MP4.types.tfdt, new Uint8Array([ 0x00, 0x00, 0x00, 0x00, // version(0) & flags (baseMediaDecodeTime >>> 24) & 0xFF, // baseMediaDecodeTime: int32 (baseMediaDecodeTime >>> 16) & 0xFF, (baseMediaDecodeTime >>> 8) & 0xFF, (baseMediaDecodeTime) & 0xFF ])); let sdtp = MP4.sdtp(track); let trun = MP4.trun(track, sdtp.byteLength + 16 + 16 + 8 + 16 + 8 + 8); return MP4.box(MP4.types.traf, tfhd, tfdt, trun, sdtp); } // Sample Dependency Type box static sdtp(track: MP4Track): Uint8Array { let data = new Uint8Array(4 + 1); let flags = track.flags; data[4] = flags.isLeading << 6 | flags.dependsOn << 4 | flags.isDependedOn << 2 | flags.hasRedundancy; return MP4.box(MP4.types.sdtp, data); } // trun static trun(track: MP4Track, offset: number): Uint8Array { let dataSize = 12 + 16; let data = new Uint8Array(dataSize); offset += 8 + dataSize; data.set([ 0x00, 0x00, 0x0F, 0x01, // version(0) & flags 0x00, 0x00, 0x00, 0x01, // sample_count (offset >>> 24) & 0xFF, // data_offset (offset >>> 16) & 0xFF, (offset >>> 8) & 0xFF, (offset) & 0xFF ], 0); let duration = track.duration; let size = track.size; let flags = track.flags; let cts = track.cts; data.set([ (duration >>> 24) & 0xFF, // sample_duration (duration >>> 16) & 0xFF, (duration >>> 8) & 0xFF, (duration) & 0xFF, (size >>> 24) & 0xFF, // sample_size (size >>> 16) & 0xFF, (size >>> 8) & 0xFF, (size) & 0xFF, (flags.isLeading << 2) | flags.dependsOn, // sample_flags (flags.isDependedOn << 6) | (flags.hasRedundancy << 4) | flags.isNonSync, 0x00, 0x00, // sample_degradation_priority (cts >>> 24) & 0xFF, // sample_composition_time_offset (cts >>> 16) & 0xFF, (cts >>> 8) & 0xFF, (cts) & 0xFF ], 12); return MP4.box(MP4.types.trun, data); } // mdat static mdat(data: Uint8Array): Uint8Array { return MP4.box(MP4.types.mdat, data) } } for (let name in MP4.types) { if (MP4.types.hasOwnProperty(name)) { MP4.types[name] = [ name.charCodeAt(0), name.charCodeAt(1), name.charCodeAt(2), name.charCodeAt(3) ]; } } MP4.constants.STSC = MP4.constants.STTS; MP4.constants.STCO = MP4.constants.STTS; export default MP4;