@distube/ytdl-core
Version:
DisTube fork of ytdl-core. YouTube video downloader in pure javascript.
279 lines (254 loc) • 11 kB
JavaScript
const querystring = require("querystring");
const Cache = require("./cache");
const utils = require("./utils");
const vm = require("vm");
// A shared cache to keep track of html5player js functions.
exports.cache = new Cache(1);
/**
* Extract signature deciphering and n parameter transform functions from html5player file.
*
* @param {string} html5playerfile
* @param {Object} options
* @returns {Promise<Array.<string>>}
*/
exports.getFunctions = (html5playerfile, options) =>
exports.cache.getOrSet(html5playerfile, async () => {
// Rewrite tce player script URLs to non-tce variant
if (html5playerfile.includes("/player_ias_tce.vflset/")) {
console.debug("jsUrl URL points to tce-variant player script, rewriting to non-tce.");
html5playerfile = html5playerfile.replace("/player_ias_tce.vflset/", "/player_ias.vflset/");
}
const body = await utils.request(html5playerfile, options);
const functions = exports.extractFunctions(body);
exports.cache.set(html5playerfile, functions);
return functions;
});
// Updated VARIABLE_PART based on the Java code
const VARIABLE_PART = "[a-zA-Z_\\$][a-zA-Z_0-9\\$]*";
const DECIPHER_NAME_REGEXPS = {
"\\b([a-zA-Z0-9_$]+)&&\\(\\1=([a-zA-Z0-9_$]{2,})\\(decodeURIComponent\\(\\1\\)\\)": 2,
'([a-zA-Z0-9_$]+)\\s*=\\s*function\\(\\s*([a-zA-Z0-9_$]+)\\s*\\)\\s*{\\s*\\2\\s*=\\s*\\2\\.split\\(\\s*""\\s*\\)\\s*;\\s*[^}]+;\\s*return\\s+\\2\\.join\\(\\s*""\\s*\\)': 1,
'/(?:\\b|[^a-zA-Z0-9_$])([a-zA-Z0-9_$]{2,})\\s*=\\s*function\\(\\s*a\\s*\\)\\s*{\\s*a\\s*=\\s*a\\.split\\(\\s*""\\s*\\)(?:;[a-zA-Z0-9_$]{2}\\.[a-zA-Z0-9_$]{2}\\(a,\\d+\\))?/': 1,
"\\bm=([a-zA-Z0-9$]{2,})\\(decodeURIComponent\\(h\\.s\\)\\)": 1,
"\\bc&&\\(c=([a-zA-Z0-9$]{2,})\\(decodeURIComponent\\(c\\)\\)": 1,
'(?:\\b|[^a-zA-Z0-9$])([a-zA-Z0-9$]{2,})\\s*=\\s*function\\(\\s*a\\s*\\)\\s*\\{\\s*a\\s*=\\s*a\\.split\\(\\s*""\\s*\\)': 1,
'([\\w$]+)\\s*=\\s*function\\((\\w+)\\)\\{\\s*\\2=\\s*\\2\\.split\\(""\\)\\s*;': 1,
};
// LavaPlayer regexps - update to use the new VARIABLE_PART
const VARIABLE_PART_DEFINE = `\\"?${VARIABLE_PART}\\"?`;
const BEFORE_ACCESS = '(?:\\[\\"|\\.)';
const AFTER_ACCESS = '(?:\\"\\]|)';
const VARIABLE_PART_ACCESS = BEFORE_ACCESS + VARIABLE_PART + AFTER_ACCESS;
const REVERSE_PART = ":function\\(\\w\\)\\{(?:return )?\\w\\.reverse\\(\\)\\}";
const SLICE_PART = ":function\\(\\w,\\w\\)\\{return \\w\\.slice\\(\\w\\)\\}";
const SPLICE_PART = ":function\\(\\w,\\w\\)\\{\\w\\.splice\\(0,\\w\\)\\}";
const SWAP_PART =
":function\\(\\w,\\w\\)\\{var \\w=\\w\\[0\\];\\w\\[0\\]=\\w\\[\\w%\\w\\.length\\];\\w\\[\\w(?:%\\w.length|)\\]=\\w(?:;return \\w)?\\}";
const DECIPHER_REGEXP =
`function(?: ${VARIABLE_PART})?\\(([a-zA-Z])\\)\\{` +
'\\1=\\1\\.split\\(""\\);\\s*' +
`((?:(?:\\1=)?${VARIABLE_PART}${VARIABLE_PART_ACCESS}\\(\\1,\\d+\\);)+)` +
'return \\1\\.join\\(""\\)' +
`\\}`;
const HELPER_REGEXP = `var (${VARIABLE_PART})=\\{((?:(?:${VARIABLE_PART_DEFINE}${REVERSE_PART}|${
VARIABLE_PART_DEFINE
}${SLICE_PART}|${VARIABLE_PART_DEFINE}${SPLICE_PART}|${VARIABLE_PART_DEFINE}${SWAP_PART}),?\\n?)+)\\};`;
const SCVR = "[a-zA-Z0-9$_]";
const MCR = `${SCVR}+`;
const AAR = "\\[(\\d+)]";
const N_TRANSFORM_NAME_REGEXPS = {
[`${SCVR}="nn"\\[\\+${MCR}\\.${MCR}],${MCR}\\(${MCR}\\),${MCR}=${MCR}\\.${MCR}\\[${MCR}]\\|\\|null\\).+\\|\\|(${MCR})\\(""\\)`]: 1,
[`${SCVR}="nn"\\[\\+${MCR}\\.${MCR}],${MCR}\\(${MCR}\\),${MCR}=${MCR}\\.${MCR}\\[${MCR}]\\|\\|null\\)&&\\(${MCR}=(${MCR})${AAR}`]: 1,
[`${SCVR}="nn"\\[\\+${MCR}\\.${MCR}],${MCR}=${MCR}\\.get\\(${MCR}\\)\\).+\\|\\|(${MCR})\\(""\\)`]: 1,
[`${SCVR}="nn"\\[\\+${MCR}\\.${MCR}],${MCR}=${MCR}\\.get\\(${MCR}\\)\\)&&\\(${MCR}=(${MCR})\\[(\\d+)]`]: 1,
[`\\(${SCVR}=String\\.fromCharCode\\(110\\),${SCVR}=${SCVR}\\.get\\(${SCVR}\\)\\)&&\\(${SCVR}=(${MCR})(?:${AAR})?\\(${SCVR}\\)`]: 1,
[`\\.get\\("n"\\)\\)&&\\(${SCVR}=(${MCR})(?:${AAR})?\\(${SCVR}\\)`]: 1,
};
// LavaPlayer regexps
const N_TRANSFORM_REGEXP =
"function\\(\\s*(\\w+)\\s*\\)\\s*\\{" +
"var\\s*(\\w+)=(?:\\1\\.split\\(.*?\\)|String\\.prototype\\.split\\.call\\(\\1,.*?\\))," +
"\\s*(\\w+)=(\\[.*?]);\\s*\\3\\[\\d+]" +
"(.*?try)(\\{.*?})catch\\(\\s*(\\w+)\\s*\\)\\s*\\{" +
'\\s*return"[\\w-]+([A-z0-9-]+)"\\s*\\+\\s*\\1\\s*}' +
'\\s*return\\s*(\\2\\.join\\(""\\)|Array\\.prototype\\.join\\.call\\(\\2,.*?\\))};';
const DECIPHER_ARGUMENT = "sig";
const N_ARGUMENT = "ncode";
const matchRegex = (regex, str) => {
const match = str.match(new RegExp(regex, "s"));
if (!match) throw new Error(`Could not match ${regex}`);
return match;
};
const matchGroup = (regex, str, idx = 0) => matchRegex(regex, str)[idx];
const getFuncName = (body, regexps) => {
let fn;
for (const [regex, idx] of Object.entries(regexps)) {
try {
fn = matchGroup(regex, body, idx);
try {
fn = matchGroup(`${fn.replace(/\$/g, "\\$")}=\\[([a-zA-Z0-9$\\[\\]]{2,})\\]`, body, 1);
} catch (err) {
// Function name is not inside an array
}
break;
} catch (err) {
continue;
}
}
if (!fn || fn.includes("[")) throw Error("Could not match");
return fn;
};
const DECIPHER_FUNC_NAME = "DisTubeDecipherFunc";
const extractDecipherFunc = (exports.d1 = body => {
try {
const helperObject = matchGroup(HELPER_REGEXP, body, 0);
const decipherFunc = matchGroup(DECIPHER_REGEXP, body, 0);
const resultFunc = `var ${DECIPHER_FUNC_NAME}=${decipherFunc};`;
const callerFunc = `${DECIPHER_FUNC_NAME}(${DECIPHER_ARGUMENT});`;
return helperObject + resultFunc + callerFunc;
} catch (e) {
return null;
}
});
const extractDecipherWithName = (exports.d2 = body => {
try {
const decipherFuncName = getFuncName(body, DECIPHER_NAME_REGEXPS);
const funcPattern = `(${decipherFuncName.replace(/\$/g, "\\$")}=function\\([a-zA-Z0-9_]+\\)\\{.+?\\})`;
const decipherFunc = `var ${matchGroup(funcPattern, body, 1)};`;
const helperObjectName = matchGroup(";([A-Za-z0-9_\\$]{2,})\\.\\w+\\(", decipherFunc, 1);
const helperPattern = `(var ${helperObjectName.replace(/\$/g, "\\$")}=\\{[\\s\\S]+?\\}\\};)`;
const helperObject = matchGroup(helperPattern, body, 1);
const callerFunc = `${decipherFuncName}(${DECIPHER_ARGUMENT});`;
return helperObject + decipherFunc + callerFunc;
} catch (e) {
return null;
}
});
const getExtractFunctions = (extractFunctions, body, postProcess = null) => {
for (const extractFunction of extractFunctions) {
try {
const func = extractFunction(body);
if (!func) continue;
return new vm.Script(postProcess ? postProcess(func) : func);
} catch (err) {
continue;
}
}
return null;
};
let decipherWarning = false;
// This is required function to get the stream url, but we can continue if user doesn't need stream url.
const extractDecipher = body => {
// Faster: extractDecipherFunc
const decipherFunc = getExtractFunctions([extractDecipherFunc, extractDecipherWithName], body);
if (!decipherFunc && !decipherWarning) {
console.warn(
"\x1b[33mWARNING:\x1B[0m Could not parse decipher function.\n" +
"Stream URLs will be missing.\n" +
`Please report this issue by uploading the "${utils.saveDebugFile(
"base.js",
body,
)}" file on https://github.com/distubejs/ytdl-core/issues/144.`,
);
decipherWarning = true;
}
return decipherFunc;
};
const N_TRANSFORM_FUNC_NAME = "DisTubeNTransformFunc";
const extractNTransformFunc = (exports.n1 = body => {
try {
const nFunc = matchGroup(N_TRANSFORM_REGEXP, body, 0);
const resultFunc = `var ${N_TRANSFORM_FUNC_NAME}=${nFunc}`;
const callerFunc = `${N_TRANSFORM_FUNC_NAME}(${N_ARGUMENT});`;
return resultFunc + callerFunc;
} catch (e) {
return null;
}
});
const extractNTransformWithName = (exports.n2 = body => {
try {
const nFuncName = getFuncName(body, N_TRANSFORM_NAME_REGEXPS);
const funcPattern = `(${nFuncName.replace(/\$/g, "\\$")}=function\\([a-zA-Z0-9_]+\\)\\{.+?\\})`;
const nTransformFunc = `var ${matchGroup(funcPattern, body, 1)};`;
const callerFunc = `${nFuncName}(${N_ARGUMENT});`;
return nTransformFunc + callerFunc;
} catch (e) {
return null;
}
});
let nTransformWarning = false;
const extractNTransform = body => {
// Faster: extractNTransformFunc
const nTransformFunc = getExtractFunctions([extractNTransformFunc, extractNTransformWithName], body, code =>
code.replace(/if\s*\(\s*typeof\s*[\w$]+\s*===?.*?\)\s*return\s+[\w$]+\s*;?/, ""),
);
if (!nTransformFunc && !nTransformWarning) {
// This is optional, so we can continue if it's not found, but it will bottleneck the download.
console.warn(
"\x1b[33mWARNING:\x1B[0m Could not parse n transform function.\n" +
`Please report this issue by uploading the "${utils.saveDebugFile(
"base.js",
body,
)}" file on https://github.com/distubejs/ytdl-core/issues/144.`,
);
nTransformWarning = true;
}
return nTransformFunc;
};
/**
* Extracts the actions that should be taken to decipher a signature
* and transform the n parameter
*
* @param {string} body
* @returns {Array.<string>}
*/
exports.extractFunctions = body => [extractDecipher(body), extractNTransform(body)];
/**
* Apply decipher and n-transform to individual format
*
* @param {Object} format
* @param {vm.Script} decipherScript
* @param {vm.Script} nTransformScript
*/
exports.setDownloadURL = (format, decipherScript, nTransformScript) => {
if (!decipherScript) return;
const decipher = url => {
const args = querystring.parse(url);
if (!args.s) return args.url;
const components = new URL(decodeURIComponent(args.url));
const context = {};
context[DECIPHER_ARGUMENT] = decodeURIComponent(args.s);
components.searchParams.set(args.sp || "sig", decipherScript.runInNewContext(context));
return components.toString();
};
const nTransform = url => {
const components = new URL(decodeURIComponent(url));
const n = components.searchParams.get("n");
if (!n || !nTransformScript) return url;
const context = {};
context[N_ARGUMENT] = n;
components.searchParams.set("n", nTransformScript.runInNewContext(context));
return components.toString();
};
const cipher = !format.url;
const url = format.url || format.signatureCipher || format.cipher;
format.url = nTransform(cipher ? decipher(url) : url);
delete format.signatureCipher;
delete format.cipher;
};
/**
* Applies decipher and n parameter transforms to all format URL's.
*
* @param {Array.<Object>} formats
* @param {string} html5player
* @param {Object} options
*/
exports.decipherFormats = async (formats, html5player, options) => {
const decipheredFormats = {};
const [decipherScript, nTransformScript] = await exports.getFunctions(html5player, options);
formats.forEach(format => {
exports.setDownloadURL(format, decipherScript, nTransformScript);
decipheredFormats[format.url] = format;
});
return decipheredFormats;
};