ytdl-core
Version:
Youtube video downloader in pure javascript.
463 lines (403 loc) • 10.8 kB
JavaScript
;
const qs = require('querystring');
const url = require('url');
const Entities = require('html-entities').AllHtmlEntities;
const FORMATS = require('./formats');
const VIDEO_URL = 'https://www.youtube.com/watch?v=';
/**
* Parses a string representation of amount of milliseconds.
*
* @param {String} time
* @return {Number}
*/
var timeRegexp = /(?:(\d+)h)?(?:(\d+)m(?!s))?(?:(\d+)s)?(?:(\d+)(?:ms)?)?/;
exports.parseTime = (time) => {
var result = timeRegexp.exec(time.toString());
var hours = result[1] || 0;
var mins = result[2] || 0;
var secs = result[3] || 0;
var ms = result[4] || 0;
return hours * 3600000 + mins * 60000 + secs * 1000 + parseInt(ms, 10);
};
// Use these to help sort formats, higher is better.
var audioEncodingRanks = {
mp3: 1,
vorbis: 2,
aac: 3,
opus: 4,
flac: 5,
};
var videoEncodingRanks = {
'Sorenson H.283': 1,
'MPEG-4 Visual': 2,
'VP8': 3,
'VP9': 4,
'H.264': 5,
};
/**
* Sort formats from highest quality to lowest.
* By resolution, then video bitrate, then audio bitrate.
*
* @param {Object} a
* @param {Object} b
*/
exports.sortFormats = (a, b) => {
var ares = a.resolution ? parseInt(a.resolution.slice(0, -1), 10) : 0;
var bres = b.resolution ? parseInt(b.resolution.slice(0, -1), 10) : 0;
var afeats = ~~!!ares * 2 + ~~!!a.audioBitrate;
var bfeats = ~~!!bres * 2 + ~~!!b.audioBitrate;
function getBitrate(c) {
if (c.bitrate) {
let s = c.bitrate.split('-');
return parseFloat(s[s.length - 1], 10);
} else {
return 0;
}
}
function audioScore(c) {
var abitrate = c.audioBitrate || 0;
var aenc = audioEncodingRanks[c.audioEncoding] || 0;
return abitrate + aenc / 10;
}
if (afeats === bfeats) {
if (ares === bres) {
let avbitrate = getBitrate(a);
let bvbitrate = getBitrate(b);
if (avbitrate === bvbitrate) {
let aascore = audioScore(a);
let bascore = audioScore(b);
if (aascore === bascore) {
let avenc = videoEncodingRanks[a.encoding] || 0;
let bvenc = videoEncodingRanks[b.encoding] || 0;
return bvenc - avenc;
} else {
return bascore - aascore;
}
} else {
return bvbitrate - avbitrate;
}
} else {
return bres - ares;
}
} else {
return bfeats - afeats;
}
};
/**
* Choose a format depending on the given options.
*
* @param {Array.<Object>} formats
* @param {Object} options
* @return {Object|Error}
*/
exports.chooseFormat = (formats, options) => {
if (typeof options.format === 'object') {
return options.format;
}
if (options.filter) {
formats = exports.filterFormats(formats, options.filter);
if (formats.length === 0) {
return new Error('No formats found with custom filter');
}
}
var format;
var quality = options.quality || 'highest';
switch (quality) {
case 'highest':
format = formats[0];
break;
case 'lowest':
format = formats[formats.length - 1];
break;
default: {
let getFormat = (itag) => {
return formats.find((format) => format.itag === '' + itag);
};
if (Array.isArray(quality)) {
quality.find((q) => format = getFormat(q));
} else {
format = getFormat(quality);
}
}
}
if (!format) {
return new Error('No such format found: ' + quality);
}
return format;
};
/**
* @param {Array.<Object>} formats
* @param {Function} filter
* @return {Array.<Object>}
*/
exports.filterFormats = (formats, filter) => {
var fn;
switch (filter) {
case 'audioandvideo':
fn = (format) => format.bitrate && format.audioBitrate;
break;
case 'video':
fn = (format) => format.bitrate;
break;
case 'videoonly':
fn = (format) => format.bitrate && !format.audioBitrate;
break;
case 'audio':
fn = (format) => format.audioBitrate;
break;
case 'audioonly':
fn = (format) => !format.bitrate && format.audioBitrate;
break;
default:
if (typeof filter === 'function') {
fn = filter;
} else {
throw new TypeError(`Given filter (${filter}) is not supported`);
}
}
return formats.filter(fn);
};
/**
* Extract string inbetween another.
*
* @param {String} haystack
* @param {String} left
* @param {String} right
* @return {String}
*/
exports.between = (haystack, left, right) => {
var pos;
pos = haystack.indexOf(left);
if (pos === -1) { return ''; }
haystack = haystack.slice(pos + left.length);
pos = haystack.indexOf(right);
if (pos === -1) { return ''; }
haystack = haystack.slice(0, pos);
return haystack;
};
/**
* Get video ID.
*
* There are a few type of video URL formats.
* - http://www.youtube.com/watch?v=VIDEO_ID
* - http://m.youtube.com/watch?v=VIDEO_ID
* - http://youtu.be/VIDEO_ID
* - http://www.youtube.com/v/VIDEO_ID
* - http://www.youtube.com/embed/VIDEO_ID
*
* @param {String} link
* @return {String|Error}
*/
exports.getURLVideoID = function(link) {
var parsed = url.parse(link, true);
var id = parsed.query.v;
if (parsed.hostname === 'youtu.be' ||
(parsed.hostname === 'youtube.com' ||
parsed.hostname === 'www.youtube.com') && !id) {
var s = parsed.pathname.split('/');
id = s[s.length - 1];
}
if (!id) {
return new Error('No video id found: ' + link);
}
id = id.substring(0, 11);
if (!exports.validateID(id)) {
return new TypeError(`Video id (${id}) does not match expected ` +
`format (${idRegex.toString()})`);
}
return id;
};
/**
* Gets video ID either from a url or by checking if the given string
* matches the video ID format.
*
* @param {String} str
* @return {String|Error}
*/
exports.getVideoID = (str) => {
if (exports.validateID(str)) {
return str;
} else {
return exports.getURLVideoID(str);
}
};
/**
* Returns true if given id satifies YouTube's id format.
*
* @param {String} id
* @return {Boolean}
*/
var idRegex = /^[a-zA-Z0-9-_]{11}$/;
exports.validateID = (id) => {
return idRegex.test(id);
};
/**
* Checks wether the input string includes a valid id.
*
* @param {String} string
* @return {Boolean}
*/
exports.validateURL = (string) => {
return !(exports.getURLVideoID(string) instanceof Error);
};
/**
* @param {Object} info
* @return {Array.<Object>}
*/
exports.parseFormats = (info) => {
var formats = [];
if (info.url_encoded_fmt_stream_map) {
formats = formats
.concat(info.url_encoded_fmt_stream_map.split(','));
}
if (info.adaptive_fmts) {
formats = formats.concat(info.adaptive_fmts.split(','));
}
formats = formats.map((format) => qs.parse(format));
delete info.url_encoded_fmt_stream_map;
delete info.adaptive_fmts;
return formats;
};
/**
* @param {Object} format
*/
exports.addFormatMeta = (format) => {
var meta = FORMATS[format.itag];
for (let key in meta) {
format[key] = meta[key];
}
if (/\/live\/1\//.test(format.url)) {
format.live = true;
}
};
/**
* Get video description from html
*
* @param {String} html
* @return {String}
*/
exports.getVideoDescription = (html) => {
var regex = /<p.*?id="eow-description".*?>(.+?)<\/p>[\n\r\s]*?<\/div>/im;
var description = html.match(regex);
return description ? new Entities().decode(description[1]
.replace(/\n/g, ' ')
.replace(/\s*<\s*br\s*\/?\s*>\s*/gi, '\n')
.replace(/<\s*\/\s*p\s*>\s*<\s*p[^>]*>/gi, '\n')
.replace(/<.*?>/gi, '')).trim() : '';
};
/**
* Get video Owner from html.
*
* @param {String} body
* @return {Object}
*/
var authorRegexp = /<a href="\/channel\/([\w-]+)"[^>]+>(.+?(?=<\/a>))/;
var aliasRegExp = /<a href="\/user\/([^"]+)/;
exports.getAuthor = (body) => {
var ownerinfo = exports.between(body,
'<div id="watch7-user-header" class=" spf-link ">',
'<div id="watch8-action-buttons" class="watch-action-buttons clearfix">');
if (ownerinfo === '') {
return {};
}
ownerinfo = new Entities().decode(ownerinfo);
var channelMatch = ownerinfo.match(authorRegexp);
var userMatch = ownerinfo.match(aliasRegExp);
return {
id: channelMatch[1],
name: channelMatch[2],
avatar: url.resolve(VIDEO_URL, exports.between(ownerinfo,
'data-thumb="', '"')),
user: userMatch ? userMatch[1] : null,
channel_url: 'https://www.youtube.com/channel/' + channelMatch[1],
user_url: userMatch ? 'https://www.youtube.com/user/' + userMatch[1] : null,
};
};
/**
* Get video published at from html.
*
* @param {String} body
* @return {String}
*/
exports.getPublished = (body) => {
return Date.parse(exports.between(body,
'<meta itemprop="datePublished" content="', '">'));
};
/**
* Get video published at from html.
* Credits to https://github.com/paixaop.
*
* @param {String} body
* @return {Array.<Object>}
*/
exports.getRelatedVideos = (body) => {
var jsonStr = exports.between(body, '\'RELATED_PLAYER_ARGS\': {"rvs":', '},');
try {
jsonStr = JSON.parse(jsonStr);
} catch (err) {
return [];
}
return jsonStr.split(',').map((link) => qs.parse(link));
};
/**
* @param {Array.<Function>} funcs
* @param {Function(!Error, Array.<Object>)} callback
*/
exports.parallel = (funcs, callback) => {
var funcsDone = 0;
var len = funcs.length;
var errGiven = false;
var results = [];
function checkDone(index, err, result) {
if (errGiven) { return; }
if (err) {
errGiven = true;
callback(err);
return;
}
results[index] = result;
if (++funcsDone === len) {
callback(null, results);
}
}
if (len > 0) {
funcs.forEach((f, i) => { f(checkDone.bind(null, i)); });
} else {
callback(null, results);
}
};
/**
* Converts human friendly time to milliseconds. Supports the format
* 00:00:00.000 for hours, minutes, seconds, and milliseconds respectively.
* And 0ms, 0s, 0m, 0h, and together 1m1s.
*
* @param {String|Number} time
* @return {Number}
*/
var numberFormat = /^\d+$/;
var timeFormat = /^(?:(?:(\d+):)?(\d{1,2}):)?(\d{1,2})(?:\.(\d{3}))?$/;
var timeUnits = {
ms: 1,
s: 1000,
m: 60000,
h: 3600000,
};
exports.fromHumanTime = (time) => {
if (typeof time === 'number') { return time; }
if (numberFormat.test(time)) { return +time; }
var firstFormat = timeFormat.exec(time);
if (firstFormat) {
return +(firstFormat[1] || 0) * timeUnits.h +
+(firstFormat[2] || 0) * timeUnits.m +
+firstFormat[3] * timeUnits.s +
+(firstFormat[4] || 0);
} else {
var total = 0;
var r = /(\d+)(ms|s|m|h)/g;
var rs;
while ((rs = r.exec(time)) != null) {
total += +rs[1] * timeUnits[rs[2]];
}
return total;
}
};