UNPKG

@videojs/http-streaming

Version:

Play back HLS and DASH with Video.js, even where it's not natively supported

1,614 lines (1,428 loc) 241 kB
/*! @name @videojs/http-streaming @version 2.1.0 @license Apache-2.0 */ var transmuxerWorker = (function () { 'use strict'; /** * mux.js * * Copyright (c) Brightcove * Licensed Apache-2.0 https://github.com/videojs/mux.js/blob/master/LICENSE * * A lightweight readable stream implemention that handles event dispatching. * Objects that inherit from streams should call init in their constructors. */ var Stream = function() { this.init = function() { var listeners = {}; /** * Add a listener for a specified event type. * @param type {string} the event name * @param listener {function} the callback to be invoked when an event of * the specified type occurs */ this.on = function(type, listener) { if (!listeners[type]) { listeners[type] = []; } listeners[type] = listeners[type].concat(listener); }; /** * Remove a listener for a specified event type. * @param type {string} the event name * @param listener {function} a function previously registered for this * type of event through `on` */ this.off = function(type, listener) { var index; if (!listeners[type]) { return false; } index = listeners[type].indexOf(listener); listeners[type] = listeners[type].slice(); listeners[type].splice(index, 1); return index > -1; }; /** * Trigger an event of the specified type on this stream. Any additional * arguments to this function are passed as parameters to event listeners. * @param type {string} the event name */ this.trigger = function(type) { var callbacks, i, length, args; callbacks = listeners[type]; if (!callbacks) { return; } // Slicing the arguments on every invocation of this method // can add a significant amount of overhead. Avoid the // intermediate object creation for the common case of a // single callback argument if (arguments.length === 2) { length = callbacks.length; for (i = 0; i < length; ++i) { callbacks[i].call(this, arguments[1]); } } else { args = []; i = arguments.length; for (i = 1; i < arguments.length; ++i) { args.push(arguments[i]); } length = callbacks.length; for (i = 0; i < length; ++i) { callbacks[i].apply(this, args); } } }; /** * Destroys the stream and cleans up. */ this.dispose = function() { listeners = {}; }; }; }; /** * Forwards all `data` events on this stream to the destination stream. The * destination stream should provide a method `push` to receive the data * events as they arrive. * @param destination {stream} the stream that will receive all `data` events * @param autoFlush {boolean} if false, we will not call `flush` on the destination * when the current stream emits a 'done' event * @see http://nodejs.org/api/stream.html#stream_readable_pipe_destination_options */ Stream.prototype.pipe = function(destination) { this.on('data', function(data) { destination.push(data); }); this.on('done', function(flushSource) { destination.flush(flushSource); }); this.on('partialdone', function(flushSource) { destination.partialFlush(flushSource); }); this.on('endedtimeline', function(flushSource) { destination.endTimeline(flushSource); }); this.on('reset', function(flushSource) { destination.reset(flushSource); }); return destination; }; // Default stream functions that are expected to be overridden to perform // actual work. These are provided by the prototype as a sort of no-op // implementation so that we don't have to check for their existence in the // `pipe` function above. Stream.prototype.push = function(data) { this.trigger('data', data); }; Stream.prototype.flush = function(flushSource) { this.trigger('done', flushSource); }; Stream.prototype.partialFlush = function(flushSource) { this.trigger('partialdone', flushSource); }; Stream.prototype.endTimeline = function(flushSource) { this.trigger('endedtimeline', flushSource); }; Stream.prototype.reset = function(flushSource) { this.trigger('reset', flushSource); }; var stream = Stream; /** * mux.js * * Copyright (c) Brightcove * Licensed Apache-2.0 https://github.com/videojs/mux.js/blob/master/LICENSE * * Functions that generate fragmented MP4s suitable for use with Media * Source Extensions. */ var UINT32_MAX = Math.pow(2, 32) - 1; var box, dinf, esds, ftyp, mdat, mfhd, minf, moof, moov, mvex, mvhd, trak, tkhd, mdia, mdhd, hdlr, sdtp, stbl, stsd, traf, trex, trun, types, MAJOR_BRAND, MINOR_VERSION, AVC1_BRAND, VIDEO_HDLR, AUDIO_HDLR, HDLR_TYPES, VMHD, SMHD, DREF, STCO, STSC, STSZ, STTS; // pre-calculate constants (function() { var i; types = { avc1: [], // codingname avcC: [], btrt: [], dinf: [], dref: [], esds: [], ftyp: [], hdlr: [], mdat: [], mdhd: [], mdia: [], mfhd: [], minf: [], moof: [], moov: [], mp4a: [], // codingname mvex: [], mvhd: [], pasp: [], sdtp: [], smhd: [], stbl: [], stco: [], stsc: [], stsd: [], stsz: [], stts: [], styp: [], tfdt: [], tfhd: [], traf: [], trak: [], trun: [], trex: [], tkhd: [], vmhd: [] }; // In environments where Uint8Array is undefined (e.g., IE8), skip set up so that we // don't throw an error if (typeof Uint8Array === 'undefined') { return; } for (i in types) { if (types.hasOwnProperty(i)) { types[i] = [ i.charCodeAt(0), i.charCodeAt(1), i.charCodeAt(2), i.charCodeAt(3) ]; } } MAJOR_BRAND = new Uint8Array([ 'i'.charCodeAt(0), 's'.charCodeAt(0), 'o'.charCodeAt(0), 'm'.charCodeAt(0) ]); AVC1_BRAND = new Uint8Array([ 'a'.charCodeAt(0), 'v'.charCodeAt(0), 'c'.charCodeAt(0), '1'.charCodeAt(0) ]); MINOR_VERSION = new Uint8Array([0, 0, 0, 1]); VIDEO_HDLR = new Uint8Array([ 0x00, // version 0 0x00, 0x00, 0x00, // flags 0x00, 0x00, 0x00, 0x00, // pre_defined 0x76, 0x69, 0x64, 0x65, // handler_type: 'vide' 0x00, 0x00, 0x00, 0x00, // reserved 0x00, 0x00, 0x00, 0x00, // reserved 0x00, 0x00, 0x00, 0x00, // reserved 0x56, 0x69, 0x64, 0x65, 0x6f, 0x48, 0x61, 0x6e, 0x64, 0x6c, 0x65, 0x72, 0x00 // name: 'VideoHandler' ]); AUDIO_HDLR = new Uint8Array([ 0x00, // version 0 0x00, 0x00, 0x00, // flags 0x00, 0x00, 0x00, 0x00, // pre_defined 0x73, 0x6f, 0x75, 0x6e, // handler_type: 'soun' 0x00, 0x00, 0x00, 0x00, // reserved 0x00, 0x00, 0x00, 0x00, // reserved 0x00, 0x00, 0x00, 0x00, // reserved 0x53, 0x6f, 0x75, 0x6e, 0x64, 0x48, 0x61, 0x6e, 0x64, 0x6c, 0x65, 0x72, 0x00 // name: 'SoundHandler' ]); HDLR_TYPES = { video: VIDEO_HDLR, audio: AUDIO_HDLR }; DREF = new Uint8Array([ 0x00, // version 0 0x00, 0x00, 0x00, // flags 0x00, 0x00, 0x00, 0x01, // entry_count 0x00, 0x00, 0x00, 0x0c, // entry_size 0x75, 0x72, 0x6c, 0x20, // 'url' type 0x00, // version 0 0x00, 0x00, 0x01 // entry_flags ]); SMHD = new Uint8Array([ 0x00, // version 0x00, 0x00, 0x00, // flags 0x00, 0x00, // balance, 0 means centered 0x00, 0x00 // reserved ]); STCO = new Uint8Array([ 0x00, // version 0x00, 0x00, 0x00, // flags 0x00, 0x00, 0x00, 0x00 // entry_count ]); STSC = STCO; STSZ = new Uint8Array([ 0x00, // version 0x00, 0x00, 0x00, // flags 0x00, 0x00, 0x00, 0x00, // sample_size 0x00, 0x00, 0x00, 0x00 // sample_count ]); STTS = STCO; VMHD = new Uint8Array([ 0x00, // version 0x00, 0x00, 0x01, // flags 0x00, 0x00, // graphicsmode 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 // opcolor ]); }()); box = function(type) { var payload = [], size = 0, i, result, view; for (i = 1; i < arguments.length; i++) { payload.push(arguments[i]); } i = payload.length; // calculate the total size we need to allocate while (i--) { size += payload[i].byteLength; } result = new Uint8Array(size + 8); view = new DataView(result.buffer, result.byteOffset, result.byteLength); view.setUint32(0, result.byteLength); result.set(type, 4); // copy the payload into the result for (i = 0, size = 8; i < payload.length; i++) { result.set(payload[i], size); size += payload[i].byteLength; } return result; }; dinf = function() { return box(types.dinf, box(types.dref, DREF)); }; esds = function(track) { return box(types.esds, new Uint8Array([ 0x00, // version 0x00, 0x00, 0x00, // flags // ES_Descriptor 0x03, // tag, ES_DescrTag 0x19, // length 0x00, 0x00, // ES_ID 0x00, // streamDependenceFlag, URL_flag, reserved, streamPriority // DecoderConfigDescriptor 0x04, // tag, DecoderConfigDescrTag 0x11, // length 0x40, // object type 0x15, // streamType 0x00, 0x06, 0x00, // bufferSizeDB 0x00, 0x00, 0xda, 0xc0, // maxBitrate 0x00, 0x00, 0xda, 0xc0, // avgBitrate // DecoderSpecificInfo 0x05, // tag, DecoderSpecificInfoTag 0x02, // length // ISO/IEC 14496-3, AudioSpecificConfig // for samplingFrequencyIndex see ISO/IEC 13818-7:2006, 8.1.3.2.2, Table 35 (track.audioobjecttype << 3) | (track.samplingfrequencyindex >>> 1), (track.samplingfrequencyindex << 7) | (track.channelcount << 3), 0x06, 0x01, 0x02 // GASpecificConfig ])); }; ftyp = function() { return box(types.ftyp, MAJOR_BRAND, MINOR_VERSION, MAJOR_BRAND, AVC1_BRAND); }; hdlr = function(type) { return box(types.hdlr, HDLR_TYPES[type]); }; mdat = function(data) { return box(types.mdat, data); }; mdhd = function(track) { var result = new Uint8Array([ 0x00, // version 0 0x00, 0x00, 0x00, // flags 0x00, 0x00, 0x00, 0x02, // creation_time 0x00, 0x00, 0x00, 0x03, // modification_time 0x00, 0x01, 0x5f, 0x90, // timescale, 90,000 "ticks" per second (track.duration >>> 24) & 0xFF, (track.duration >>> 16) & 0xFF, (track.duration >>> 8) & 0xFF, track.duration & 0xFF, // duration 0x55, 0xc4, // 'und' language (undetermined) 0x00, 0x00 ]); // Use the sample rate from the track metadata, when it is // defined. The sample rate can be parsed out of an ADTS header, for // instance. if (track.samplerate) { result[12] = (track.samplerate >>> 24) & 0xFF; result[13] = (track.samplerate >>> 16) & 0xFF; result[14] = (track.samplerate >>> 8) & 0xFF; result[15] = (track.samplerate) & 0xFF; } return box(types.mdhd, result); }; mdia = function(track) { return box(types.mdia, mdhd(track), hdlr(track.type), minf(track)); }; mfhd = function(sequenceNumber) { return box(types.mfhd, new Uint8Array([ 0x00, 0x00, 0x00, 0x00, // flags (sequenceNumber & 0xFF000000) >> 24, (sequenceNumber & 0xFF0000) >> 16, (sequenceNumber & 0xFF00) >> 8, sequenceNumber & 0xFF // sequence_number ])); }; minf = function(track) { return box(types.minf, track.type === 'video' ? box(types.vmhd, VMHD) : box(types.smhd, SMHD), dinf(), stbl(track)); }; moof = function(sequenceNumber, tracks) { var trackFragments = [], i = tracks.length; // build traf boxes for each track fragment while (i--) { trackFragments[i] = traf(tracks[i]); } return box.apply(null, [ types.moof, mfhd(sequenceNumber) ].concat(trackFragments)); }; /** * Returns a movie box. * @param tracks {array} the tracks associated with this movie * @see ISO/IEC 14496-12:2012(E), section 8.2.1 */ moov = function(tracks) { var i = tracks.length, boxes = []; while (i--) { boxes[i] = trak(tracks[i]); } return box.apply(null, [types.moov, mvhd(0xffffffff)].concat(boxes).concat(mvex(tracks))); }; mvex = function(tracks) { var i = tracks.length, boxes = []; while (i--) { boxes[i] = trex(tracks[i]); } return box.apply(null, [types.mvex].concat(boxes)); }; mvhd = function(duration) { var bytes = new Uint8Array([ 0x00, // version 0 0x00, 0x00, 0x00, // flags 0x00, 0x00, 0x00, 0x01, // creation_time 0x00, 0x00, 0x00, 0x02, // modification_time 0x00, 0x01, 0x5f, 0x90, // timescale, 90,000 "ticks" per second (duration & 0xFF000000) >> 24, (duration & 0xFF0000) >> 16, (duration & 0xFF00) >> 8, duration & 0xFF, // duration 0x00, 0x01, 0x00, 0x00, // 1.0 rate 0x01, 0x00, // 1.0 volume 0x00, 0x00, // reserved 0x00, 0x00, 0x00, 0x00, // reserved 0x00, 0x00, 0x00, 0x00, // reserved 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, // transformation: unity matrix 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // pre_defined 0xff, 0xff, 0xff, 0xff // next_track_ID ]); return box(types.mvhd, bytes); }; sdtp = function(track) { var samples = track.samples || [], bytes = new Uint8Array(4 + samples.length), flags, i; // leave the full box header (4 bytes) all zero // write the sample table for (i = 0; i < samples.length; i++) { flags = samples[i].flags; bytes[i + 4] = (flags.dependsOn << 4) | (flags.isDependedOn << 2) | (flags.hasRedundancy); } return box(types.sdtp, bytes); }; stbl = function(track) { return box(types.stbl, stsd(track), box(types.stts, STTS), box(types.stsc, STSC), box(types.stsz, STSZ), box(types.stco, STCO)); }; (function() { var videoSample, audioSample; stsd = function(track) { return box(types.stsd, new Uint8Array([ 0x00, // version 0 0x00, 0x00, 0x00, // flags 0x00, 0x00, 0x00, 0x01 ]), track.type === 'video' ? videoSample(track) : audioSample(track)); }; videoSample = function(track) { var sps = track.sps || [], pps = track.pps || [], sequenceParameterSets = [], pictureParameterSets = [], i, avc1Box; // assemble the SPSs for (i = 0; i < sps.length; i++) { sequenceParameterSets.push((sps[i].byteLength & 0xFF00) >>> 8); sequenceParameterSets.push((sps[i].byteLength & 0xFF)); // sequenceParameterSetLength sequenceParameterSets = sequenceParameterSets.concat(Array.prototype.slice.call(sps[i])); // SPS } // assemble the PPSs for (i = 0; i < pps.length; i++) { pictureParameterSets.push((pps[i].byteLength & 0xFF00) >>> 8); pictureParameterSets.push((pps[i].byteLength & 0xFF)); pictureParameterSets = pictureParameterSets.concat(Array.prototype.slice.call(pps[i])); } avc1Box = [ types.avc1, new Uint8Array([ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // reserved 0x00, 0x01, // data_reference_index 0x00, 0x00, // pre_defined 0x00, 0x00, // reserved 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // pre_defined (track.width & 0xff00) >> 8, track.width & 0xff, // width (track.height & 0xff00) >> 8, track.height & 0xff, // height 0x00, 0x48, 0x00, 0x00, // horizresolution 0x00, 0x48, 0x00, 0x00, // vertresolution 0x00, 0x00, 0x00, 0x00, // reserved 0x00, 0x01, // frame_count 0x13, 0x76, 0x69, 0x64, 0x65, 0x6f, 0x6a, 0x73, 0x2d, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x69, 0x62, 0x2d, 0x68, 0x6c, 0x73, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // compressorname 0x00, 0x18, // depth = 24 0x11, 0x11 // pre_defined = -1 ]), box(types.avcC, new Uint8Array([ 0x01, // configurationVersion track.profileIdc, // AVCProfileIndication track.profileCompatibility, // profile_compatibility track.levelIdc, // AVCLevelIndication 0xff // lengthSizeMinusOne, hard-coded to 4 bytes ].concat( [sps.length], // numOfSequenceParameterSets sequenceParameterSets, // "SPS" [pps.length], // numOfPictureParameterSets pictureParameterSets // "PPS" ))), box(types.btrt, new Uint8Array([ 0x00, 0x1c, 0x9c, 0x80, // bufferSizeDB 0x00, 0x2d, 0xc6, 0xc0, // maxBitrate 0x00, 0x2d, 0xc6, 0xc0 // avgBitrate ])) ]; if (track.sarRatio) { var hSpacing = track.sarRatio[0], vSpacing = track.sarRatio[1]; avc1Box.push( box(types.pasp, new Uint8Array([ (hSpacing & 0xFF000000) >> 24, (hSpacing & 0xFF0000) >> 16, (hSpacing & 0xFF00) >> 8, hSpacing & 0xFF, (vSpacing & 0xFF000000) >> 24, (vSpacing & 0xFF0000) >> 16, (vSpacing & 0xFF00) >> 8, vSpacing & 0xFF ])) ); } return box.apply(null, avc1Box); }; audioSample = function(track) { return box(types.mp4a, new Uint8Array([ // SampleEntry, ISO/IEC 14496-12 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // reserved 0x00, 0x01, // data_reference_index // AudioSampleEntry, ISO/IEC 14496-12 0x00, 0x00, 0x00, 0x00, // reserved 0x00, 0x00, 0x00, 0x00, // reserved (track.channelcount & 0xff00) >> 8, (track.channelcount & 0xff), // channelcount (track.samplesize & 0xff00) >> 8, (track.samplesize & 0xff), // samplesize 0x00, 0x00, // pre_defined 0x00, 0x00, // reserved (track.samplerate & 0xff00) >> 8, (track.samplerate & 0xff), 0x00, 0x00 // samplerate, 16.16 // MP4AudioSampleEntry, ISO/IEC 14496-14 ]), esds(track)); }; }()); tkhd = function(track) { var result = new Uint8Array([ 0x00, // version 0 0x00, 0x00, 0x07, // flags 0x00, 0x00, 0x00, 0x00, // creation_time 0x00, 0x00, 0x00, 0x00, // modification_time (track.id & 0xFF000000) >> 24, (track.id & 0xFF0000) >> 16, (track.id & 0xFF00) >> 8, track.id & 0xFF, // track_ID 0x00, 0x00, 0x00, 0x00, // reserved (track.duration & 0xFF000000) >> 24, (track.duration & 0xFF0000) >> 16, (track.duration & 0xFF00) >> 8, track.duration & 0xFF, // duration 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // reserved 0x00, 0x00, // layer 0x00, 0x00, // alternate_group 0x01, 0x00, // non-audio track volume 0x00, 0x00, // reserved 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, // transformation: unity matrix (track.width & 0xFF00) >> 8, track.width & 0xFF, 0x00, 0x00, // width (track.height & 0xFF00) >> 8, track.height & 0xFF, 0x00, 0x00 // height ]); return box(types.tkhd, result); }; /** * Generate a track fragment (traf) box. A traf box collects metadata * about tracks in a movie fragment (moof) box. */ traf = function(track) { var trackFragmentHeader, trackFragmentDecodeTime, trackFragmentRun, sampleDependencyTable, dataOffset, upperWordBaseMediaDecodeTime, lowerWordBaseMediaDecodeTime; trackFragmentHeader = box(types.tfhd, new Uint8Array([ 0x00, // version 0 0x00, 0x00, 0x3a, // flags (track.id & 0xFF000000) >> 24, (track.id & 0xFF0000) >> 16, (track.id & 0xFF00) >> 8, (track.id & 0xFF), // track_ID 0x00, 0x00, 0x00, 0x01, // sample_description_index 0x00, 0x00, 0x00, 0x00, // default_sample_duration 0x00, 0x00, 0x00, 0x00, // default_sample_size 0x00, 0x00, 0x00, 0x00 // default_sample_flags ])); upperWordBaseMediaDecodeTime = Math.floor(track.baseMediaDecodeTime / (UINT32_MAX + 1)); lowerWordBaseMediaDecodeTime = Math.floor(track.baseMediaDecodeTime % (UINT32_MAX + 1)); trackFragmentDecodeTime = box(types.tfdt, new Uint8Array([ 0x01, // version 1 0x00, 0x00, 0x00, // flags // baseMediaDecodeTime (upperWordBaseMediaDecodeTime >>> 24) & 0xFF, (upperWordBaseMediaDecodeTime >>> 16) & 0xFF, (upperWordBaseMediaDecodeTime >>> 8) & 0xFF, upperWordBaseMediaDecodeTime & 0xFF, (lowerWordBaseMediaDecodeTime >>> 24) & 0xFF, (lowerWordBaseMediaDecodeTime >>> 16) & 0xFF, (lowerWordBaseMediaDecodeTime >>> 8) & 0xFF, lowerWordBaseMediaDecodeTime & 0xFF ])); // the data offset specifies the number of bytes from the start of // the containing moof to the first payload byte of the associated // mdat dataOffset = (32 + // tfhd 20 + // tfdt 8 + // traf header 16 + // mfhd 8 + // moof header 8); // mdat header // audio tracks require less metadata if (track.type === 'audio') { trackFragmentRun = trun(track, dataOffset); return box(types.traf, trackFragmentHeader, trackFragmentDecodeTime, trackFragmentRun); } // video tracks should contain an independent and disposable samples // box (sdtp) // generate one and adjust offsets to match sampleDependencyTable = sdtp(track); trackFragmentRun = trun(track, sampleDependencyTable.length + dataOffset); return box(types.traf, trackFragmentHeader, trackFragmentDecodeTime, trackFragmentRun, sampleDependencyTable); }; /** * Generate a track box. * @param track {object} a track definition * @return {Uint8Array} the track box */ trak = function(track) { track.duration = track.duration || 0xffffffff; return box(types.trak, tkhd(track), mdia(track)); }; trex = function(track) { var result = new Uint8Array([ 0x00, // version 0 0x00, 0x00, 0x00, // flags (track.id & 0xFF000000) >> 24, (track.id & 0xFF0000) >> 16, (track.id & 0xFF00) >> 8, (track.id & 0xFF), // track_ID 0x00, 0x00, 0x00, 0x01, // default_sample_description_index 0x00, 0x00, 0x00, 0x00, // default_sample_duration 0x00, 0x00, 0x00, 0x00, // default_sample_size 0x00, 0x01, 0x00, 0x01 // default_sample_flags ]); // the last two bytes of default_sample_flags is the sample // degradation priority, a hint about the importance of this sample // relative to others. Lower the degradation priority for all sample // types other than video. if (track.type !== 'video') { result[result.length - 1] = 0x00; } return box(types.trex, result); }; (function() { var audioTrun, videoTrun, trunHeader; // This method assumes all samples are uniform. That is, if a // duration is present for the first sample, it will be present for // all subsequent samples. // see ISO/IEC 14496-12:2012, Section 8.8.8.1 trunHeader = function(samples, offset) { var durationPresent = 0, sizePresent = 0, flagsPresent = 0, compositionTimeOffset = 0; // trun flag constants if (samples.length) { if (samples[0].duration !== undefined) { durationPresent = 0x1; } if (samples[0].size !== undefined) { sizePresent = 0x2; } if (samples[0].flags !== undefined) { flagsPresent = 0x4; } if (samples[0].compositionTimeOffset !== undefined) { compositionTimeOffset = 0x8; } } return [ 0x00, // version 0 0x00, durationPresent | sizePresent | flagsPresent | compositionTimeOffset, 0x01, // flags (samples.length & 0xFF000000) >>> 24, (samples.length & 0xFF0000) >>> 16, (samples.length & 0xFF00) >>> 8, samples.length & 0xFF, // sample_count (offset & 0xFF000000) >>> 24, (offset & 0xFF0000) >>> 16, (offset & 0xFF00) >>> 8, offset & 0xFF // data_offset ]; }; videoTrun = function(track, offset) { var bytesOffest, bytes, header, samples, sample, i; samples = track.samples || []; offset += 8 + 12 + (16 * samples.length); header = trunHeader(samples, offset); bytes = new Uint8Array(header.length + samples.length * 16); bytes.set(header); bytesOffest = header.length; for (i = 0; i < samples.length; i++) { sample = samples[i]; bytes[bytesOffest++] = (sample.duration & 0xFF000000) >>> 24; bytes[bytesOffest++] = (sample.duration & 0xFF0000) >>> 16; bytes[bytesOffest++] = (sample.duration & 0xFF00) >>> 8; bytes[bytesOffest++] = sample.duration & 0xFF; // sample_duration bytes[bytesOffest++] = (sample.size & 0xFF000000) >>> 24; bytes[bytesOffest++] = (sample.size & 0xFF0000) >>> 16; bytes[bytesOffest++] = (sample.size & 0xFF00) >>> 8; bytes[bytesOffest++] = sample.size & 0xFF; // sample_size bytes[bytesOffest++] = (sample.flags.isLeading << 2) | sample.flags.dependsOn; bytes[bytesOffest++] = (sample.flags.isDependedOn << 6) | (sample.flags.hasRedundancy << 4) | (sample.flags.paddingValue << 1) | sample.flags.isNonSyncSample; bytes[bytesOffest++] = sample.flags.degradationPriority & 0xF0 << 8; bytes[bytesOffest++] = sample.flags.degradationPriority & 0x0F; // sample_flags bytes[bytesOffest++] = (sample.compositionTimeOffset & 0xFF000000) >>> 24; bytes[bytesOffest++] = (sample.compositionTimeOffset & 0xFF0000) >>> 16; bytes[bytesOffest++] = (sample.compositionTimeOffset & 0xFF00) >>> 8; bytes[bytesOffest++] = sample.compositionTimeOffset & 0xFF; // sample_composition_time_offset } return box(types.trun, bytes); }; audioTrun = function(track, offset) { var bytes, bytesOffest, header, samples, sample, i; samples = track.samples || []; offset += 8 + 12 + (8 * samples.length); header = trunHeader(samples, offset); bytes = new Uint8Array(header.length + samples.length * 8); bytes.set(header); bytesOffest = header.length; for (i = 0; i < samples.length; i++) { sample = samples[i]; bytes[bytesOffest++] = (sample.duration & 0xFF000000) >>> 24; bytes[bytesOffest++] = (sample.duration & 0xFF0000) >>> 16; bytes[bytesOffest++] = (sample.duration & 0xFF00) >>> 8; bytes[bytesOffest++] = sample.duration & 0xFF; // sample_duration bytes[bytesOffest++] = (sample.size & 0xFF000000) >>> 24; bytes[bytesOffest++] = (sample.size & 0xFF0000) >>> 16; bytes[bytesOffest++] = (sample.size & 0xFF00) >>> 8; bytes[bytesOffest++] = sample.size & 0xFF; // sample_size } return box(types.trun, bytes); }; trun = function(track, offset) { if (track.type === 'audio') { return audioTrun(track, offset); } return videoTrun(track, offset); }; }()); var mp4Generator = { ftyp: ftyp, mdat: mdat, moof: moof, moov: moov, initSegment: function(tracks) { var fileType = ftyp(), movie = moov(tracks), result; result = new Uint8Array(fileType.byteLength + movie.byteLength); result.set(fileType); result.set(movie, fileType.byteLength); return result; } }; /** * mux.js * * Copyright (c) Brightcove * Licensed Apache-2.0 https://github.com/videojs/mux.js/blob/master/LICENSE */ // Convert an array of nal units into an array of frames with each frame being // composed of the nal units that make up that frame // Also keep track of cummulative data about the frame from the nal units such // as the frame duration, starting pts, etc. var groupNalsIntoFrames = function(nalUnits) { var i, currentNal, currentFrame = [], frames = []; // TODO added for LHLS, make sure this is OK frames.byteLength = 0; frames.nalCount = 0; frames.duration = 0; currentFrame.byteLength = 0; for (i = 0; i < nalUnits.length; i++) { currentNal = nalUnits[i]; // Split on 'aud'-type nal units if (currentNal.nalUnitType === 'access_unit_delimiter_rbsp') { // Since the very first nal unit is expected to be an AUD // only push to the frames array when currentFrame is not empty if (currentFrame.length) { currentFrame.duration = currentNal.dts - currentFrame.dts; // TODO added for LHLS, make sure this is OK frames.byteLength += currentFrame.byteLength; frames.nalCount += currentFrame.length; frames.duration += currentFrame.duration; frames.push(currentFrame); } currentFrame = [currentNal]; currentFrame.byteLength = currentNal.data.byteLength; currentFrame.pts = currentNal.pts; currentFrame.dts = currentNal.dts; } else { // Specifically flag key frames for ease of use later if (currentNal.nalUnitType === 'slice_layer_without_partitioning_rbsp_idr') { currentFrame.keyFrame = true; } currentFrame.duration = currentNal.dts - currentFrame.dts; currentFrame.byteLength += currentNal.data.byteLength; currentFrame.push(currentNal); } } // For the last frame, use the duration of the previous frame if we // have nothing better to go on if (frames.length && (!currentFrame.duration || currentFrame.duration <= 0)) { currentFrame.duration = frames[frames.length - 1].duration; } // Push the final frame // TODO added for LHLS, make sure this is OK frames.byteLength += currentFrame.byteLength; frames.nalCount += currentFrame.length; frames.duration += currentFrame.duration; frames.push(currentFrame); return frames; }; // Convert an array of frames into an array of Gop with each Gop being composed // of the frames that make up that Gop // Also keep track of cummulative data about the Gop from the frames such as the // Gop duration, starting pts, etc. var groupFramesIntoGops = function(frames) { var i, currentFrame, currentGop = [], gops = []; // We must pre-set some of the values on the Gop since we // keep running totals of these values currentGop.byteLength = 0; currentGop.nalCount = 0; currentGop.duration = 0; currentGop.pts = frames[0].pts; currentGop.dts = frames[0].dts; // store some metadata about all the Gops gops.byteLength = 0; gops.nalCount = 0; gops.duration = 0; gops.pts = frames[0].pts; gops.dts = frames[0].dts; for (i = 0; i < frames.length; i++) { currentFrame = frames[i]; if (currentFrame.keyFrame) { // Since the very first frame is expected to be an keyframe // only push to the gops array when currentGop is not empty if (currentGop.length) { gops.push(currentGop); gops.byteLength += currentGop.byteLength; gops.nalCount += currentGop.nalCount; gops.duration += currentGop.duration; } currentGop = [currentFrame]; currentGop.nalCount = currentFrame.length; currentGop.byteLength = currentFrame.byteLength; currentGop.pts = currentFrame.pts; currentGop.dts = currentFrame.dts; currentGop.duration = currentFrame.duration; } else { currentGop.duration += currentFrame.duration; currentGop.nalCount += currentFrame.length; currentGop.byteLength += currentFrame.byteLength; currentGop.push(currentFrame); } } if (gops.length && currentGop.duration <= 0) { currentGop.duration = gops[gops.length - 1].duration; } gops.byteLength += currentGop.byteLength; gops.nalCount += currentGop.nalCount; gops.duration += currentGop.duration; // push the final Gop gops.push(currentGop); return gops; }; /* * Search for the first keyframe in the GOPs and throw away all frames * until that keyframe. Then extend the duration of the pulled keyframe * and pull the PTS and DTS of the keyframe so that it covers the time * range of the frames that were disposed. * * @param {Array} gops video GOPs * @returns {Array} modified video GOPs */ var extendFirstKeyFrame = function(gops) { var currentGop; if (!gops[0][0].keyFrame && gops.length > 1) { // Remove the first GOP currentGop = gops.shift(); gops.byteLength -= currentGop.byteLength; gops.nalCount -= currentGop.nalCount; // Extend the first frame of what is now the // first gop to cover the time period of the // frames we just removed gops[0][0].dts = currentGop.dts; gops[0][0].pts = currentGop.pts; gops[0][0].duration += currentGop.duration; } return gops; }; /** * Default sample object * see ISO/IEC 14496-12:2012, section 8.6.4.3 */ var createDefaultSample = function() { return { size: 0, flags: { isLeading: 0, dependsOn: 1, isDependedOn: 0, hasRedundancy: 0, degradationPriority: 0, isNonSyncSample: 1 } }; }; /* * Collates information from a video frame into an object for eventual * entry into an MP4 sample table. * * @param {Object} frame the video frame * @param {Number} dataOffset the byte offset to position the sample * @return {Object} object containing sample table info for a frame */ var sampleForFrame = function(frame, dataOffset) { var sample = createDefaultSample(); sample.dataOffset = dataOffset; sample.compositionTimeOffset = frame.pts - frame.dts; sample.duration = frame.duration; sample.size = 4 * frame.length; // Space for nal unit size sample.size += frame.byteLength; if (frame.keyFrame) { sample.flags.dependsOn = 2; sample.flags.isNonSyncSample = 0; } return sample; }; // generate the track's sample table from an array of gops var generateSampleTable = function(gops, baseDataOffset) { var h, i, sample, currentGop, currentFrame, dataOffset = baseDataOffset || 0, samples = []; for (h = 0; h < gops.length; h++) { currentGop = gops[h]; for (i = 0; i < currentGop.length; i++) { currentFrame = currentGop[i]; sample = sampleForFrame(currentFrame, dataOffset); dataOffset += sample.size; samples.push(sample); } } return samples; }; // generate the track's raw mdat data from an array of gops var concatenateNalData = function(gops) { var h, i, j, currentGop, currentFrame, currentNal, dataOffset = 0, nalsByteLength = gops.byteLength, numberOfNals = gops.nalCount, totalByteLength = nalsByteLength + 4 * numberOfNals, data = new Uint8Array(totalByteLength), view = new DataView(data.buffer); // For each Gop.. for (h = 0; h < gops.length; h++) { currentGop = gops[h]; // For each Frame.. for (i = 0; i < currentGop.length; i++) { currentFrame = currentGop[i]; // For each NAL.. for (j = 0; j < currentFrame.length; j++) { currentNal = currentFrame[j]; view.setUint32(dataOffset, currentNal.data.byteLength); dataOffset += 4; data.set(currentNal.data, dataOffset); dataOffset += currentNal.data.byteLength; } } } return data; }; // generate the track's sample table from a frame var generateSampleTableForFrame = function(frame, baseDataOffset) { var sample, dataOffset = baseDataOffset || 0, samples = []; sample = sampleForFrame(frame, dataOffset); samples.push(sample); return samples; }; // generate the track's raw mdat data from a frame var concatenateNalDataForFrame = function(frame) { var i, currentNal, dataOffset = 0, nalsByteLength = frame.byteLength, numberOfNals = frame.length, totalByteLength = nalsByteLength + 4 * numberOfNals, data = new Uint8Array(totalByteLength), view = new DataView(data.buffer); // For each NAL.. for (i = 0; i < frame.length; i++) { currentNal = frame[i]; view.setUint32(dataOffset, currentNal.data.byteLength); dataOffset += 4; data.set(currentNal.data, dataOffset); dataOffset += currentNal.data.byteLength; } return data; }; var frameUtils = { groupNalsIntoFrames: groupNalsIntoFrames, groupFramesIntoGops: groupFramesIntoGops, extendFirstKeyFrame: extendFirstKeyFrame, generateSampleTable: generateSampleTable, concatenateNalData: concatenateNalData, generateSampleTableForFrame: generateSampleTableForFrame, concatenateNalDataForFrame: concatenateNalDataForFrame }; /** * mux.js * * Copyright (c) Brightcove * Licensed Apache-2.0 https://github.com/videojs/mux.js/blob/master/LICENSE */ var highPrefix = [33, 16, 5, 32, 164, 27]; var lowPrefix = [33, 65, 108, 84, 1, 2, 4, 8, 168, 2, 4, 8, 17, 191, 252]; var zeroFill = function(count) { var a = []; while (count--) { a.push(0); } return a; }; var makeTable = function(metaTable) { return Object.keys(metaTable).reduce(function(obj, key) { obj[key] = new Uint8Array(metaTable[key].reduce(function(arr, part) { return arr.concat(part); }, [])); return obj; }, {}); }; var silence; var silence_1 = function() { if (!silence) { // Frames-of-silence to use for filling in missing AAC frames var coneOfSilence = { 96000: [highPrefix, [227, 64], zeroFill(154), [56]], 88200: [highPrefix, [231], zeroFill(170), [56]], 64000: [highPrefix, [248, 192], zeroFill(240), [56]], 48000: [highPrefix, [255, 192], zeroFill(268), [55, 148, 128], zeroFill(54), [112]], 44100: [highPrefix, [255, 192], zeroFill(268), [55, 163, 128], zeroFill(84), [112]], 32000: [highPrefix, [255, 192], zeroFill(268), [55, 234], zeroFill(226), [112]], 24000: [highPrefix, [255, 192], zeroFill(268), [55, 255, 128], zeroFill(268), [111, 112], zeroFill(126), [224]], 16000: [highPrefix, [255, 192], zeroFill(268), [55, 255, 128], zeroFill(268), [111, 255], zeroFill(269), [223, 108], zeroFill(195), [1, 192]], 12000: [lowPrefix, zeroFill(268), [3, 127, 248], zeroFill(268), [6, 255, 240], zeroFill(268), [13, 255, 224], zeroFill(268), [27, 253, 128], zeroFill(259), [56]], 11025: [lowPrefix, zeroFill(268), [3, 127, 248], zeroFill(268), [6, 255, 240], zeroFill(268), [13, 255, 224], zeroFill(268), [27, 255, 192], zeroFill(268), [55, 175, 128], zeroFill(108), [112]], 8000: [lowPrefix, zeroFill(268), [3, 121, 16], zeroFill(47), [7]] }; silence = makeTable(coneOfSilence); } return silence; }; /** * mux.js * * Copyright (c) Brightcove * Licensed Apache-2.0 https://github.com/videojs/mux.js/blob/master/LICENSE */ var ONE_SECOND_IN_TS = 90000, // 90kHz clock secondsToVideoTs, secondsToAudioTs, videoTsToSeconds, audioTsToSeconds, audioTsToVideoTs, videoTsToAudioTs, metadataTsToSeconds; secondsToVideoTs = function(seconds) { return seconds * ONE_SECOND_IN_TS; }; secondsToAudioTs = function(seconds, sampleRate) { return seconds * sampleRate; }; videoTsToSeconds = function(timestamp) { return timestamp / ONE_SECOND_IN_TS; }; audioTsToSeconds = function(timestamp, sampleRate) { return timestamp / sampleRate; }; audioTsToVideoTs = function(timestamp, sampleRate) { return secondsToVideoTs(audioTsToSeconds(timestamp, sampleRate)); }; videoTsToAudioTs = function(timestamp, sampleRate) { return secondsToAudioTs(videoTsToSeconds(timestamp), sampleRate); }; /** * Adjust ID3 tag or caption timing information by the timeline pts values * (if keepOriginalTimestamps is false) and convert to seconds */ metadataTsToSeconds = function(timestamp, timelineStartPts, keepOriginalTimestamps) { return videoTsToSeconds(keepOriginalTimestamps ? timestamp : timestamp - timelineStartPts); }; var clock = { ONE_SECOND_IN_TS: ONE_SECOND_IN_TS, secondsToVideoTs: secondsToVideoTs, secondsToAudioTs: secondsToAudioTs, videoTsToSeconds: videoTsToSeconds, audioTsToSeconds: audioTsToSeconds, audioTsToVideoTs: audioTsToVideoTs, videoTsToAudioTs: videoTsToAudioTs, metadataTsToSeconds: metadataTsToSeconds }; var clock_2 = clock.secondsToVideoTs; var clock_4 = clock.videoTsToSeconds; /** * mux.js * * Copyright (c) Brightcove * Licensed Apache-2.0 https://github.com/videojs/mux.js/blob/master/LICENSE */ /** * Sum the `byteLength` properties of the data in each AAC frame */ var sumFrameByteLengths = function(array) { var i, currentObj, sum = 0; // sum the byteLength's all each nal unit in the frame for (i = 0; i < array.length; i++) { currentObj = array[i]; sum += currentObj.data.byteLength; } return sum; }; // Possibly pad (prefix) the audio track with silence if appending this track // would lead to the introduction of a gap in the audio buffer var prefixWithSilence = function( track, frames, audioAppendStartTs, videoBaseMediaDecodeTime ) { var baseMediaDecodeTimeTs, frameDuration = 0, audioGapDuration = 0, audioFillFrameCount = 0, audioFillDuration = 0, silentFrame, i, firstFrame; if (!frames.length) { return; } baseMediaDecodeTimeTs = clock.audioTsToVideoTs(track.baseMediaDecodeTime, track.samplerate); // determine frame clock duration based on sample rate, round up to avoid overfills frameDuration = Math.ceil(clock.ONE_SECOND_IN_TS / (track.samplerate / 1024)); if (audioAppendStartTs && videoBaseMediaDecodeTime) { // insert the shortest possible amount (audio gap or audio to video gap) audioGapDuration = baseMediaDecodeTimeTs - Math.max(audioAppendStartTs, videoBaseMediaDecodeTime); // number of full frames in the audio gap audioFillFrameCount = Math.floor(audioGapDuration / frameDuration); audioFillDuration = audioFillFrameCount * frameDuration; } // don't attempt to fill gaps smaller than a single frame or larger // than a half second if (audioFillFrameCount < 1 || audioFillDuration > clock.ONE_SECOND_IN_TS / 2) { return; } silentFrame = silence_1()[track.samplerate]; if (!silentFrame) { // we don't have a silent frame pregenerated for the sample rate, so use a frame // from the content instead silentFrame = frames[0].data; } for (i = 0; i < audioFillFrameCount; i++) { firstFrame = frames[0]; frames.splice(0, 0, { data: silentFrame, dts: firstFrame.dts - frameDuration, pts: firstFrame.pts - frameDuration }); } track.baseMediaDecodeTime -= Math.floor(clock.videoTsToAudioTs(audioFillDuration, track.samplerate)); }; // If the audio segment extends before the earliest allowed dts // value, remove AAC frames until starts at or after the earliest // allowed DTS so that we don't end up with a negative baseMedia- // DecodeTime for the audio track var trimAdtsFramesByEarliestDts = function(adtsFrames, track, earliestAllowedDts) { if (track.minSegmentDts >= earliestAllowedDts) { return adtsFrames; } // We will need to recalculate the earliest segment Dts track.minSegmentDts = Infinity; return adtsFrames.filter(function(currentFrame) { // If this is an allowed frame, keep it and record it's Dts if (currentFrame.dts >= earliestAllowedDts) { track.minSegmentDts = Math.min(track.minSegmentDts, currentFrame.dts); track.minSegmentPts = track.minSegmentDts; return true; } // Otherwise, discard it return false; }); }; // generate the track's raw mdat data from an array of frames var generateSampleTable$1 = function(frames) { var i, currentFrame, samples = []; for (i = 0; i < frames.length; i++) { currentFrame = frames[i]; samples.push({ size: currentFrame.data.byteLength, duration: 1024 // For AAC audio, all samples contain 1024 samples }); } return samples; }; // generate the track's sample table from an array of frames var concatenateFrameData = function(frames) { var i, currentFrame, dataOffset = 0, data = new Uint8Array(sumFrameByteLengths(frames)); for (i = 0; i < frames.length; i++) { currentFrame = frames[i]; data.set(currentFrame.data, dataOffset); dataOffset += currentFrame.data.byteLength; } return data; }; var audioFrameUtils = { prefixWithSilence: prefixWithSilence, trimAdtsFramesByEarliestDts: trimAdtsFramesByEarliestDts, generateSampleTable: generateSampleTable$1, concatenateFrameData: concatenateFrameData }; /** * mux.js * * Copyright (c) Brightcove * Licensed Apache-2.0 https://github.com/videojs/mux.js/blob/master/LICENSE */ var ONE_SECOND_IN_TS$1 = clock.ONE_SECOND_IN_TS; /** * Store information about the start and end of the track and the * duration for each frame/sample we process in order to calculate * the baseMediaDecodeTime */ var collectDtsInfo = function(track, data) { if (typeof data.pts === 'number') { if (track.timelineStartInfo.pts === undefined) { track.timelineStartInfo.pts = data.pts; } if (track.minSegmentPts === undefined) { track.minSegmentPts = data.pts; } else { track.minSegmentPts = Math.min(track.minSegmentPts, data.pts); } if (track.maxSegmentPts === undefined) { track.maxSegmentPts = data.pts; } else { track.maxSegmentPts = Math.max(track.maxSegmentPts, data.pts); } } if (typeof data.dts === 'number') { if (track.timelineStartInfo.dts === undefined) { track.timelineStartInfo.dts = data.dts; } if (track.minSegmentDts === undefined) { track.minSegmentDts = data.dts; } else { track.minSegmentDts = Math.min(track.minSegmentDts, data.dts); } if (track.maxSegmentDts === undefined) { track.maxSegmentDts = data.dts; } else { track.maxSegmentDts = Math.max(track.maxSegmentDts, data.dts); } } }; /** * Clear values used to calculate the baseMediaDecodeTime between * tracks */ var clearDtsInfo = function(track) { delete track.minSegmentDts; delete track.maxSegmentDts; delete track.minSegmentPts; delete track.maxSegmentPts; }; /** * Calculate the track's baseMediaDecodeTime based on the earliest * DTS the transmuxer has ever seen and the minimum DTS for the * current track * @param track {object} track metadata configuration * @param keepOriginalTimestamps {boolean} If true, keep the timestamps * in the source; false to adjust the first segment to start at 0. */ var calculateTrackBaseMediaDecodeTime = function(track, keepOriginalTimestamps) { var baseMediaDecodeTime, scale, minSegmentDts = track.minSegmentDts; // Optionally adjust the time so the first segment starts at zero. if (!keepOriginalTimestamps) { minSegmentDts -= track.timelineStartInfo.dts; } // track.timelineStartInfo.baseMediaDecodeTime is the location, in time, where // we want the start of the first segment to be placed