@l5i/dashjs
Version:
A reference client implementation for the playback of MPEG DASH via Javascript and compliant browsers.
623 lines (570 loc) • 26.4 kB
JavaScript
/**
* The copyright in this software is being made available under the BSD License,
* included below. This software may be subject to other third party and contributor
* rights, including patent rights, and no such rights are granted under this license.
*
* Copyright (c) 2013, Dash Industry Forum.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation and/or
* other materials provided with the distribution.
* * Neither the name of Dash Industry Forum nor the names of its
* contributors may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS AS IS AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
import Constants from '../constants/Constants';
import {HTTPRequest} from '../vo/metrics/HTTPRequest';
import TextTrackInfo from '../vo/TextTrackInfo';
import FragmentedTextBoxParser from '../../dash/utils/FragmentedTextBoxParser';
import BoxParser from '../utils/BoxParser';
import CustomTimeRanges from '../utils/CustomTimeRanges';
import FactoryMaker from '../../core/FactoryMaker';
import Debug from '../../core/Debug';
import TextTracks from './TextTracks';
import EmbeddedTextHtmlRender from './EmbeddedTextHtmlRender';
import ISOBoxer from 'codem-isoboxer';
import cea608parser from '../../../externals/cea608-parser';
import EventBus from '../../core/EventBus';
import Events from '../../core/events/Events';
import DashJSError from '../vo/DashJSError';
import Errors from '../../core/errors/Errors';
function TextSourceBuffer() {
const context = this.context;
const eventBus = EventBus(context).getInstance();
let embeddedInitialized = false;
let instance,
logger,
boxParser,
errHandler,
dashManifestModel,
manifestModel,
mediaController,
parser,
vttParser,
ttmlParser,
fragmentedTextBoxParser,
mediaInfos,
textTracks,
fragmentedFragmentModel,
initializationSegmentReceived,
timescale,
fragmentedTracks,
videoModel,
streamController,
firstFragmentedSubtitleStart,
currFragmentedTrackIdx,
embeddedTracks,
embeddedInitializationSegmentReceived,
embeddedTimescale,
embeddedLastSequenceNumber,
embeddedSequenceNumbers,
embeddedCea608FieldParsers,
embeddedTextHtmlRender,
mseTimeOffset;
function setup() {
logger = Debug(context).getInstance().getLogger(instance);
resetInitialSettings();
}
function resetFragmented () {
fragmentedFragmentModel = null;
timescale = NaN;
fragmentedTracks = [];
firstFragmentedSubtitleStart = null;
initializationSegmentReceived = false;
}
function resetInitialSettings() {
resetFragmented();
mediaInfos = [];
parser = null;
}
function initialize(mimeType, streamProcessor) {
if (!embeddedInitialized) {
initEmbedded();
}
textTracks.setConfig({
videoModel: videoModel
});
textTracks.initialize();
if (!boxParser) {
boxParser = BoxParser(context).getInstance();
fragmentedTextBoxParser = FragmentedTextBoxParser(context).getInstance();
fragmentedTextBoxParser.setConfig({
boxParser: boxParser
});
}
addMediaInfos(mimeType, streamProcessor);
}
function addMediaInfos(mimeType, streamProcessor) {
const isFragmented = !dashManifestModel.getIsTextTrack(mimeType);
if (streamProcessor) {
mediaInfos = mediaInfos.concat(streamProcessor.getMediaInfoArr());
if (isFragmented) {
fragmentedFragmentModel = streamProcessor.getFragmentModel();
instance.buffered = CustomTimeRanges(context).create();
fragmentedTracks = mediaController.getTracksFor(Constants.FRAGMENTED_TEXT, streamController.getActiveStreamInfo());
const currFragTrack = mediaController.getCurrentTrackFor(Constants.FRAGMENTED_TEXT, streamController.getActiveStreamInfo());
for (let i = 0; i < fragmentedTracks.length; i++) {
if (fragmentedTracks[i] === currFragTrack) {
setCurrentFragmentedTrackIdx(i);
break;
}
}
}
for (let i = 0; i < mediaInfos.length; i++) {
createTextTrackFromMediaInfo(null, mediaInfos[i]);
}
}
}
function abort() {
textTracks.deleteAllTextTracks();
fragmentedTextBoxParser = null;
boxParser = null;
mediaInfos = [];
fragmentedFragmentModel = null;
initializationSegmentReceived = false;
fragmentedTracks = [];
}
function reset() {
resetInitialSettings();
streamController = null;
videoModel = null;
textTracks = null;
}
function onVideoChunkReceived(e) {
const chunk = e.chunk;
if (chunk.mediaInfo.embeddedCaptions) {
append(chunk.bytes, chunk);
}
}
function initEmbedded() {
embeddedTracks = [];
textTracks = TextTracks(context).getInstance();
textTracks.setConfig({
videoModel: videoModel
});
textTracks.initialize();
boxParser = BoxParser(context).getInstance();
fragmentedTextBoxParser = FragmentedTextBoxParser(context).getInstance();
fragmentedTextBoxParser.setConfig({
boxParser: boxParser
});
currFragmentedTrackIdx = null;
embeddedInitializationSegmentReceived = false;
embeddedTimescale = 0;
embeddedCea608FieldParsers = [];
embeddedSequenceNumbers = [];
embeddedLastSequenceNumber = null;
embeddedInitialized = true;
embeddedTextHtmlRender = EmbeddedTextHtmlRender(context).getInstance();
const streamProcessors = streamController.getActiveStreamProcessors();
for (const i in streamProcessors) {
if (streamProcessors[i].getType() === 'video') {
mseTimeOffset = streamProcessors[i].getRepresentationInfo().MSETimeOffset;
break;
}
}
eventBus.on(Events.VIDEO_CHUNK_RECEIVED, onVideoChunkReceived, this);
}
function resetEmbedded() {
eventBus.off(Events.VIDEO_CHUNK_RECEIVED, onVideoChunkReceived, this);
if (textTracks) {
textTracks.deleteAllTextTracks();
}
embeddedInitialized = false;
embeddedTracks = [];
embeddedCea608FieldParsers = [null, null];
embeddedSequenceNumbers = [];
embeddedLastSequenceNumber = null;
}
function addEmbeddedTrack(mediaInfo) {
if (!embeddedInitialized) {
initEmbedded();
}
if (mediaInfo) {
if (mediaInfo.id === Constants.CC1 || mediaInfo.id === Constants.CC3) {
for (let i = 0; i < embeddedTracks.length; i++) {
if (embeddedTracks[i].id === mediaInfo.id) {
return;
}
}
embeddedTracks.push(mediaInfo);
} else {
logger.warn('Embedded track ' + mediaInfo.id + ' not supported!');
}
}
}
function setConfig(config) {
if (!config) {
return;
}
if (config.errHandler) {
errHandler = config.errHandler;
}
if (config.dashManifestModel) {
dashManifestModel = config.dashManifestModel;
}
if (config.manifestModel) {
manifestModel = config.manifestModel;
}
if (config.mediaController) {
mediaController = config.mediaController;
}
if (config.videoModel) {
videoModel = config.videoModel;
}
if (config.streamController) {
streamController = config.streamController;
}
if (config.textTracks) {
textTracks = config.textTracks;
}
if (config.vttParser) {
vttParser = config.vttParser;
}
if (config.ttmlParser) {
ttmlParser = config.ttmlParser;
}
}
function getConfig() {
const config = {
fragmentModel: fragmentedFragmentModel,
fragmentedTracks: fragmentedTracks,
videoModel: videoModel
};
return config;
}
function setCurrentFragmentedTrackIdx(idx) {
currFragmentedTrackIdx = idx;
}
function createTextTrackFromMediaInfo(captionData, mediaInfo) {
const textTrackInfo = new TextTrackInfo();
const trackKindMap = { subtitle: 'subtitles', caption: 'captions' }; //Dash Spec has no "s" on end of KIND but HTML needs plural.
const getKind = function () {
let kind = (mediaInfo.roles.length > 0) ? trackKindMap[mediaInfo.roles[0]] : trackKindMap.caption;
kind = (kind === trackKindMap.caption || kind === trackKindMap.subtitle) ? kind : trackKindMap.caption;
return kind;
};
const checkTTML = function () {
let ttml = false;
if (mediaInfo.codec && mediaInfo.codec.search(Constants.STPP) >= 0) {
ttml = true;
}
if (mediaInfo.mimeType && mediaInfo.mimeType.search(Constants.TTML) >= 0) {
ttml = true;
}
return ttml;
};
textTrackInfo.captionData = captionData;
textTrackInfo.lang = mediaInfo.lang;
textTrackInfo.label = mediaInfo.id ? mediaInfo.id : mediaInfo.index; // AdaptationSet id (an unsigned int) as it's optionnal parameter, use mediaInfo.index
textTrackInfo.index = mediaInfo.index; // AdaptationSet index in manifest
textTrackInfo.isTTML = checkTTML();
textTrackInfo.defaultTrack = getIsDefault(mediaInfo);
textTrackInfo.isFragmented = !dashManifestModel.getIsTextTrack(mediaInfo.mimeType);
textTrackInfo.isEmbedded = mediaInfo.isEmbedded ? true : false;
textTrackInfo.kind = getKind();
textTrackInfo.roles = mediaInfo.roles;
textTrackInfo.accessibility = mediaInfo.accessibility;
const totalNrTracks = (mediaInfos ? mediaInfos.length : 0) + embeddedTracks.length;
textTracks.addTextTrack(textTrackInfo, totalNrTracks);
}
function append(bytes, chunk) {
let result,
sampleList,
i, j, k,
samplesInfo,
ccContent;
const mediaInfo = chunk.mediaInfo;
const mediaType = mediaInfo.type;
const mimeType = mediaInfo.mimeType;
const codecType = mediaInfo.codec || mimeType;
if (!codecType) {
logger.error('No text type defined');
return;
}
if (mediaType === Constants.FRAGMENTED_TEXT) {
if (!initializationSegmentReceived) {
initializationSegmentReceived = true;
timescale = fragmentedTextBoxParser.getMediaTimescaleFromMoov(bytes);
} else {
samplesInfo = fragmentedTextBoxParser.getSamplesInfo(bytes);
sampleList = samplesInfo.sampleList;
if (firstFragmentedSubtitleStart === null && sampleList.length > 0) {
firstFragmentedSubtitleStart = sampleList[0].cts - chunk.start * timescale;
}
if (codecType.search(Constants.STPP) >= 0) {
parser = parser !== null ? parser : getParser(codecType);
for (i = 0; i < sampleList.length; i++) {
const sample = sampleList[i];
const sampleStart = sample.cts;
const sampleRelStart = sampleStart - firstFragmentedSubtitleStart;
this.buffered.add(sampleRelStart / timescale, (sampleRelStart + sample.duration) / timescale);
const dataView = new DataView(bytes, sample.offset, sample.subSizes[0]);
ccContent = ISOBoxer.Utils.dataViewToString(dataView, Constants.UTF8);
const images = [];
let subOffset = sample.offset + sample.subSizes[0];
for (j = 1; j < sample.subSizes.length; j++) {
const inData = new Uint8Array(bytes, subOffset, sample.subSizes[j]);
const raw = String.fromCharCode.apply(null, inData);
images.push(raw);
subOffset += sample.subSizes[j];
}
try {
// Only used for Miscrosoft Smooth Streaming support - caption time is relative to sample time. In this case, we apply an offset.
const manifest = manifestModel.getValue();
const offsetTime = manifest.ttmlTimeIsRelative ? sampleStart / timescale : 0;
result = parser.parse(ccContent, offsetTime, sampleStart / timescale, (sampleStart + sample.duration) / timescale, images);
textTracks.addCaptions(currFragmentedTrackIdx, firstFragmentedSubtitleStart / timescale, result);
} catch (e) {
fragmentedFragmentModel.removeExecutedRequestsBeforeTime();
this.remove();
logger.error('TTML parser error: ' + e.message);
}
}
} else {
// WebVTT case
const captionArray = [];
for (i = 0 ; i < sampleList.length; i++) {
const sample = sampleList[i];
sample.cts -= firstFragmentedSubtitleStart;
this.buffered.add(sample.cts / timescale, (sample.cts + sample.duration) / timescale);
const sampleData = bytes.slice(sample.offset, sample.offset + sample.size);
// There are boxes inside the sampleData, so we need a ISOBoxer to get at it.
const sampleBoxes = ISOBoxer.parseBuffer(sampleData);
for (j = 0 ; j < sampleBoxes.boxes.length; j++) {
const box1 = sampleBoxes.boxes[j];
logger.debug('VTT box1: ' + box1.type);
if (box1.type === 'vtte') {
continue; //Empty box
}
if (box1.type === 'vttc') {
logger.debug('VTT vttc boxes.length = ' + box1.boxes.length);
for (k = 0 ; k < box1.boxes.length; k++) {
const box2 = box1.boxes[k];
logger.debug('VTT box2: ' + box2.type);
if (box2.type === 'payl') {
const cue_text = box2.cue_text;
logger.debug('VTT cue_text = ' + cue_text);
const start_time = sample.cts / timescale;
const end_time = (sample.cts + sample.duration) / timescale;
captionArray.push({
start: start_time,
end: end_time,
data: cue_text,
styles: {}
});
logger.debug('VTT ' + start_time + '-' + end_time + ' : ' + cue_text);
}
}
}
}
}
if (captionArray.length > 0) {
textTracks.addCaptions(currFragmentedTrackIdx, 0, captionArray);
}
}
}
} else if (mediaType === Constants.TEXT) {
const dataView = new DataView(bytes, 0, bytes.byteLength);
ccContent = ISOBoxer.Utils.dataViewToString(dataView, Constants.UTF8);
try {
result = getParser(codecType).parse(ccContent, 0);
textTracks.addCaptions(textTracks.getCurrentTrackIdx(), 0, result);
} catch (e) {
errHandler.timedTextError(e, 'parse', ccContent);
errHandler.error(new DashJSError(Errors.TIMED_TEXT_ERROR_ID_PARSE_CODE, Errors.TIMED_TEXT_ERROR_MESSAGE_PARSE + e.message, ccContent));
}
} else if (mediaType === Constants.VIDEO) { //embedded text
if (chunk.segmentType === HTTPRequest.INIT_SEGMENT_TYPE) {
if (embeddedTimescale === 0) {
embeddedTimescale = fragmentedTextBoxParser.getMediaTimescaleFromMoov(bytes);
for (i = 0; i < embeddedTracks.length; i++) {
createTextTrackFromMediaInfo(null, embeddedTracks[i]);
}
}
} else { // MediaSegment
if (embeddedTimescale === 0) {
logger.warn('CEA-608: No timescale for embeddedTextTrack yet');
return;
}
const makeCueAdderForIndex = function (self, trackIndex) {
function newCue(startTime, endTime, captionScreen) {
let captionsArray = null;
if (videoModel.getTTMLRenderingDiv()) {
captionsArray = embeddedTextHtmlRender.createHTMLCaptionsFromScreen(videoModel.getElement(), startTime, endTime, captionScreen);
} else {
const text = captionScreen.getDisplayText();
captionsArray = [{
start: startTime,
end: endTime,
data: text,
styles: {}
}];
}
if (captionsArray) {
textTracks.addCaptions(trackIndex, 0, captionsArray);
}
}
return newCue;
};
samplesInfo = fragmentedTextBoxParser.getSamplesInfo(bytes);
const sequenceNumber = samplesInfo.lastSequenceNumber;
if (!embeddedCea608FieldParsers[0] && !embeddedCea608FieldParsers[1]) {
// Time to setup the CEA-608 parsing
let field, handler, trackIdx;
for (i = 0; i < embeddedTracks.length; i++) {
if (embeddedTracks[i].id === Constants.CC1) {
field = 0;
trackIdx = textTracks.getTrackIdxForId(Constants.CC1);
} else if (embeddedTracks[i].id === Constants.CC3) {
field = 1;
trackIdx = textTracks.getTrackIdxForId(Constants.CC3);
}
if (trackIdx === -1) {
logger.warn('CEA-608: data before track is ready.');
return;
}
handler = makeCueAdderForIndex(this, trackIdx);
embeddedCea608FieldParsers[i] = new cea608parser.Cea608Parser(i + 1, {
'newCue': handler
}, null);
}
}
if (embeddedTimescale && embeddedSequenceNumbers.indexOf(sequenceNumber) == -1) {
if (embeddedLastSequenceNumber !== null && sequenceNumber !== embeddedLastSequenceNumber + samplesInfo.numSequences) {
for (i = 0; i < embeddedCea608FieldParsers.length; i++) {
if (embeddedCea608FieldParsers[i]) {
embeddedCea608FieldParsers[i].reset();
}
}
}
const allCcData = extractCea608Data(bytes, samplesInfo.sampleList);
for (let fieldNr = 0; fieldNr < embeddedCea608FieldParsers.length; fieldNr++) {
const ccData = allCcData.fields[fieldNr];
const fieldParser = embeddedCea608FieldParsers[fieldNr];
if (fieldParser) {
for (i = 0; i < ccData.length; i++) {
fieldParser.addData(ccData[i][0] / embeddedTimescale, ccData[i][1]);
}
}
}
embeddedLastSequenceNumber = sequenceNumber;
embeddedSequenceNumbers.push(sequenceNumber);
}
}
}
}
/**
* Extract CEA-608 data from a buffer of data.
* @param {ArrayBuffer} data
* @param {Array} samples cue information
* @returns {Object|null} ccData corresponding to one segment.
*/
function extractCea608Data(data, samples) {
if (samples.length === 0) {
return null;
}
const allCcData = {
splits: [],
fields: [[], []]
};
const raw = new DataView(data);
for (let i = 0; i < samples.length; i++) {
const sample = samples[i];
const cea608Ranges = cea608parser.findCea608Nalus(raw, sample.offset, sample.size);
let lastSampleTime = null;
let idx = 0;
for (let j = 0; j < cea608Ranges.length; j++) {
const ccData = cea608parser.extractCea608DataFromRange(raw, cea608Ranges[j]);
for (let k = 0; k < 2; k++) {
if (ccData[k].length > 0) {
if (sample.cts !== lastSampleTime) {
idx = 0;
} else {
idx += 1;
}
allCcData.fields[k].push([sample.cts + (mseTimeOffset * embeddedTimescale), ccData[k], idx]);
lastSampleTime = sample.cts;
}
}
}
}
// Sort by sampleTime ascending order
// If two packets have the same sampleTime, use them in the order
// they were received
allCcData.fields.forEach(function sortField(field) {
field.sort(function (a, b) {
if (a[0] === b[0]) {
return a[2] - b[2];
}
return a[0] - b[0];
});
});
return allCcData;
}
function getIsDefault(mediaInfo) {
//TODO How to tag default. currently same order as listed in manifest.
// Is there a way to mark a text adaptation set as the default one? DASHIF meeting talk about using role which is being used for track KIND
// Eg subtitles etc. You can have multiple role tags per adaptation Not defined in the spec yet.
let isDefault = false;
if (embeddedTracks.length > 1 && mediaInfo.isEmbedded) {
isDefault = (mediaInfo.id && mediaInfo.id === Constants.CC1); // CC1 if both CC1 and CC3 exist
} else if (embeddedTracks.length === 1) {
if (mediaInfo.id && mediaInfo.id.substring(0, 2) === 'CC') { // Either CC1 or CC3
isDefault = true;
}
} else if (embeddedTracks.length === 0) {
isDefault = (mediaInfo.index === mediaInfos[0].index);
}
return isDefault;
}
function getParser(codecType) {
let parser;
if (codecType.search(Constants.VTT) >= 0) {
parser = vttParser;
} else if (codecType.search(Constants.TTML) >= 0 || codecType.search(Constants.STPP) >= 0) {
parser = ttmlParser;
}
return parser;
}
function remove(start, end) {
//if start and end are not defined, remove all
if ((start === undefined) && (start === end)) {
start = this.buffered.start(0);
end = this.buffered.end(this.buffered.length - 1);
}
this.buffered.remove(start, end);
}
instance = {
initialize: initialize,
append: append,
abort: abort,
addEmbeddedTrack: addEmbeddedTrack,
resetEmbedded: resetEmbedded,
setConfig: setConfig,
getConfig: getConfig,
setCurrentFragmentedTrackIdx: setCurrentFragmentedTrackIdx,
remove: remove,
reset: reset
};
setup();
return instance;
}
TextSourceBuffer.__dashjs_factory_name = 'TextSourceBuffer';
export default FactoryMaker.getSingletonFactory(TextSourceBuffer);