ytdl-core
Version:
Youtube video downloader in pure javascript.
323 lines (278 loc) • 9.13 kB
JavaScript
;
const urllib = require('url');
const querystring = require('querystring');
const sax = require('sax');
const request = require('miniget');
const util = require('./util');
const sig = require('./sig');
const FORMATS = require('./formats');
const VIDEO_URL = 'https://www.youtube.com/watch?v=';
const EMBED_URL = 'https://www.youtube.com/embed/';
const VIDEO_EURL = 'https://youtube.googleapis.com/v/';
const INFO_HOST = 'www.youtube.com';
const INFO_PATH = '/get_video_info';
const KEYS_TO_SPLIT = [
'keywords',
'fmt_list',
'fexp',
'watermark'
];
/**
* Gets info from a video.
*
* @param {String} link
* @param {Object} options
* @param {Function(Error, Object)} callback
*/
module.exports = function getInfo(link, options, callback) {
if (typeof options === 'function') {
callback = options;
options = {};
} else if (!options) {
options = {};
}
if (!callback) {
return new Promise((resolve, reject) => {
getInfo(link, options, (err, info) => {
if (err) return reject(err);
resolve(info);
});
});
}
var id = util.getVideoID(link);
if (id instanceof Error) return callback(id);
// Try getting config from the video page first.
var params = 'hl=' + (options.lang || 'en');
var url = VIDEO_URL + id + '&' + params;
request(url, options.requestOptions, (err, res, body) => {
if (err) return callback(err);
// Check if there are any errors with this video page.
var unavailableMsg = util.between(body, '<div id="player-unavailable"', '>');
if (unavailableMsg &&
!/\bhid\b/.test(util.between(unavailableMsg, 'class="', '"'))) {
// Ignore error about age restriction.
if (!body.includes('<div id="watch7-player-age-gate-content"')) {
return callback(new Error(util.between(body,
'<h1 id="unavailable-message" class="message">', '</h1>').trim()));
}
}
// Parse out additional metadata from this page.
var additional = {
// Get the author/uploader.
author: util.getAuthor(body),
// Get the day the vid was published.
published: util.getPublished(body),
// Get description.
description: util.getVideoDescription(body),
// Get related videos.
related_videos: util.getRelatedVideos(body),
// Give the canonical link to the video.
video_url: url,
};
var jsonStr = util.between(body, 'ytplayer.config = ', '</script>');
var config;
if (jsonStr) {
config = jsonStr.slice(0, jsonStr.lastIndexOf(';ytplayer.load'));
gotConfig(id, options, additional, config, false, callback);
} else {
// If the video page doesn't work, maybe because it has mature content.
// and requires an account logged in to view, try the embed page.
url = EMBED_URL + id + '?' + params;
request(url, options.requestOptions, (err, res, body) => {
if (err) return callback(err);
config = util.between(body, 't.setConfig({\'PLAYER_CONFIG\': ', '},\'');
gotConfig(id, options, additional, config, true, callback);
});
}
});
};
/**
* @param {Object} id
* @param {Object} options
* @param {Object} additional
* @param {Object} config
* @param {Boolean} fromEmbed
* @param {Function(Error, Object)} callback
*/
function gotConfig(id, options, additional, config, fromEmbed, callback) {
if (!config) {
return callback(new Error('Could not find player config'));
}
try {
config = JSON.parse(config + (fromEmbed ? '}' : ''));
} catch (err) {
return callback(new Error('Error parsing config: ' + err.message));
}
var url = urllib.format({
protocol: 'https',
host: INFO_HOST,
pathname: INFO_PATH,
query: {
video_id: id,
eurl: VIDEO_EURL + id,
ps: 'default',
gl: 'US',
hl: (options.lang || 'en'),
sts: config.sts,
},
});
request(url, options.requestOptions, (err, res, body) => {
if (err) return callback(err);
var info = querystring.parse(body);
if (info.requires_purchase === '1') {
return callback(new Error('Video requires purchase'));
} else if (info.status === 'fail') {
if (info.errorcode === '150' && config.args) {
info = config.args;
} else {
return callback(new Error(`Code ${info.errorcode}: ${info.reason}`));
}
}
// Split some keys by commas.
KEYS_TO_SPLIT.forEach((key) => {
if (!info[key]) return;
info[key] = info[key]
.split(',')
.filter((v) => v !== '');
});
if (config.args.player_response) {
try {
info.player_response = JSON.parse(config.args.player_response);
} catch (err) {
return callback(
new Error('Error parsing `player_response`: ' + err.message));
}
}
info.fmt_list = info.fmt_list ?
info.fmt_list.map((format) => format.split('/')) : [];
info.formats = util.parseFormats(info);
// Add additional properties to info.
Object.assign(info, additional);
if (info.formats.length ||
config.args.dashmpd || info.dashmpd || info.hlsvp) {
var html5playerfile = urllib.resolve(VIDEO_URL, config.assets.js);
sig.getTokens(html5playerfile, options, (err, tokens) => {
if (err) return callback(err);
sig.decipherFormats(info.formats, tokens, options.debug);
var funcs = [];
if (config.args.dashmpd) {
let dashmpd = decipherURL(config.args.dashmpd, tokens);
funcs.push(getDashManifest.bind(null, dashmpd, options));
}
if (info.dashmpd && info.dashmpd !== config.args.dashmpd) {
let dashmpd = decipherURL(info.dashmpd, tokens);
funcs.push(getDashManifest.bind(null, dashmpd, options));
}
if (info.hlsvp) {
info.hlsvp = decipherURL(info.hlsvp, tokens);
funcs.push(getM3U8.bind(null, info.hlsvp, options));
}
util.parallel(funcs, (err, results) => {
if (err) return callback(err);
if (results[0]) { mergeFormats(info, results[0]); }
if (results[1]) { mergeFormats(info, results[1]); }
if (results[2]) { mergeFormats(info, results[2]); }
if (!info.formats.length) {
callback(new Error('No formats found'));
return;
}
if (options.debug) {
info.formats.forEach((format) => {
var itag = format.itag;
if (!FORMATS[itag]) {
console.warn(`No format metadata for itag ${itag} found`);
}
});
}
info.formats.forEach(util.addFormatMeta);
info.formats.sort(util.sortFormats);
callback(null, info);
});
});
} else {
callback(new Error('This video is unavailable'));
}
});
}
/**
* @param {String} url
* @param {Array.<String>} tokens
*/
function decipherURL(url, tokens) {
return url.replace(/\/s\/([a-fA-F0-9.]+)/, (_, s) => {
return '/signature/' + sig.decipher(tokens, s);
});
}
/**
* Merges formats from DASH or M3U8 with formats from video info page.
*
* @param {Object} info
* @param {Object} formatsMap
*/
function mergeFormats(info, formatsMap) {
info.formats.forEach((f) => {
var cf = formatsMap[f.itag];
if (cf) {
for (let key in f) { cf[key] = f[key]; }
} else {
formatsMap[f.itag] = f;
}
});
info.formats = [];
for (let itag in formatsMap) { info.formats.push(formatsMap[itag]); }
}
/**
* Gets additional DASH formats.
*
* @param {String} url
* @param {Object} options
* @param {Function(!Error, Array.<Object>)} callback
*/
function getDashManifest(url, options, callback) {
var formats = {};
var currentFormat = null;
var expectUrl = false;
var parser = sax.parser(false);
parser.onerror = callback;
parser.onopentag = (node) => {
if (node.name === 'REPRESENTATION') {
var itag = node.attributes.ID;
currentFormat = { itag: itag };
formats[itag] = currentFormat;
}
expectUrl = node.name === 'BASEURL';
};
parser.ontext = (text) => {
if (expectUrl) {
currentFormat.url = text;
}
};
parser.onend = () => { callback(null, formats); };
var req = request(urllib.resolve(VIDEO_URL, url), options.requestOptions);
req.setEncoding('utf8');
req.on('error', callback);
req.on('data', (chunk) => { parser.write(chunk); });
req.on('end', parser.close.bind(parser));
}
/**
* Gets additional formats.
*
* @param {String} url
* @param {Object} options
* @param {Function(!Error, Array.<Object>)} callback
*/
function getM3U8(url, options, callback) {
url = urllib.resolve(VIDEO_URL, url);
request(url, options.requestOptions, (err, res, body) => {
if (err) return callback(err);
var formats = {};
body
.split('\n')
.filter((line) => /https?:\/\//.test(line))
.forEach((line) => {
var itag = line.match(/\/itag\/(\d+)\//)[1];
formats[itag] = { itag: itag, url: line };
});
callback(null, formats);
});
}