@lzwme/m3u8-dl
Version:
A free, open-source, and powerful m3u8 video batch downloader with multi-threaded downloading, play-while-downloading, WebUI management, video parsing, and more.
126 lines (125 loc) • 5.94 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.getM3u8Urls = getM3u8Urls;
const fe_utils_1 = require("@lzwme/fe-utils");
const utils_js_1 = require("./utils.js");
function getFormatTitle(text) {
if (typeof text !== 'string' || !text)
return '';
if (/^\d+$/.test(text))
return text;
const match = /第(\d+)(集|期)/.exec(text);
if (match)
return match[0];
return '';
}
/** 从指定的 url 页面中提取 m3u8 播放地址。deep 指定搜索页面深度 */
async function getM3u8Urls(opts) {
const options = {
headers: {},
deep: 1,
visited: new Set(),
maxUrls: 3000,
maxHtmlSize: 5 * 1024 * 1024, // 5MB
...opts,
};
const baseUrl = new URL(options.url).origin;
const req = new fe_utils_1.Request({
headers: { 'content-type': 'text/html; charset=UTF-8', referer: baseUrl, ...(0, utils_js_1.formatHeaders)(options.headers) },
reqOptions: { rejectUnauthorized: false },
});
const { data: html, response } = await req.get(options.url);
const m3u8Urls = new Map();
if (!response.statusCode || response.statusCode >= 400) {
utils_js_1.logger.error('获取页面失败:', fe_utils_1.color.red(options.url), response.statusCode, response.statusMessage, html);
return m3u8Urls;
}
// 检查 HTML 大小
if (options.maxHtmlSize && html.length > options.maxHtmlSize) {
utils_js_1.logger.warn('HTML 内容过大,跳过处理:', fe_utils_1.color.yellow(options.url), `${(html.length / 1024 / 1024).toFixed(2)}MB`, `(限制: ${(options.maxHtmlSize / 1024 / 1024).toFixed(2)}MB)`);
return m3u8Urls;
}
// 从 html 中正则匹配提取 m3u8
const m3u8Regex = /https?:[^\s'":]+\.(m3u8|mp4)(\?[^\s'"]*)?/gi;
// 1. 直接正则匹配 m3u8 地址
let match = m3u8Regex.exec(html);
const title = (/<title>([^<]+)</.exec(html)?.[1].split('-')[0] || '').replace(/在线播放|在线观看|详情|介绍|《|》/g, '').trim();
while (match) {
const href = match[0].replaceAll('\\/', '/');
match = m3u8Regex.exec(html);
if (!m3u8Urls.has(href))
m3u8Urls.set(href, getFormatTitle(title) || title);
}
// 找到了多个链接,修改 title 添加序号
if (m3u8Urls.size > 3 && !/第\d+(集|期)/.test(title)) {
let idx = 1;
for (const [key] of m3u8Urls) {
m3u8Urls.set(key, `${title}第${String(++idx).padStart(2, '0')}集`);
}
}
// 2. 若未找到且深度大于 0,则获取所有 a 标签的 href 并递归查找
if (m3u8Urls.size === 0 && options.deep > 0) {
utils_js_1.logger.debug('未获取到 m3u8 地址', fe_utils_1.color.gray(options.url), html.length);
options.visited.add(options.url);
const aTagRegex = /<a\s+[^>]*href=["']([^"']+)["'][^>]*>(.*?)<\/a>/gi;
let aMatch = aTagRegex.exec(html);
const subPageUrls = new Map();
let failedSubPages = 0;
while (aMatch) {
const href = aMatch[1] ? new URL(aMatch[1], baseUrl).toString() : '';
const text = aMatch[2].replace(/<[^>]+>/g, '');
aMatch = aTagRegex.exec(html);
if (!href || options.visited.has(href) || !href.startsWith(baseUrl))
continue;
if (options.subUrlRegex) {
if (typeof options.subUrlRegex === 'string') {
options.subUrlRegex = new RegExp(options.subUrlRegex.replaceAll(/\*+/g, '.+'));
}
if (!options.subUrlRegex.test(href))
continue;
}
else if (!/集|期|HD|高清|抢先|BD/.test(text))
continue;
subPageUrls.set(href, text);
utils_js_1.logger.debug(' > 提取到子页面: ', fe_utils_1.color.gray(href), text);
}
const taskList = Array.from(subPageUrls.entries()).map(([href, text]) => {
return async () => {
// 达到最大 URL 数量限制,不再继续
if (options.maxUrls && m3u8Urls.size >= options.maxUrls)
return;
try {
options.visited.add(href);
const subUrls = await getM3u8Urls({ ...options, url: href, deep: options.deep - 1 });
utils_js_1.logger.debug(' > 从子页面提取: ', fe_utils_1.color.gray(href), text, subUrls);
if (subUrls.size === 0 && m3u8Urls.size === 0) {
failedSubPages++;
if (failedSubPages > 3) {
utils_js_1.logger.warn(`连续查找 ${failedSubPages} 个子页面均未获取到,不再继续`, options.url, href);
return;
}
}
for (const [u, t] of subUrls) {
let stitle = t;
for (const s of [text, t, m3u8Urls.get(u) || '']) {
const ft = getFormatTitle(s);
if (ft) {
stitle = ft;
break;
}
}
utils_js_1.logger.debug(' > m3u8地址: ', fe_utils_1.color.gray(u), fe_utils_1.color.green(stitle));
m3u8Urls.set(u, stitle.trim());
}
}
catch (err) {
utils_js_1.logger.warn(' > 尝试访问子页面异常: ', fe_utils_1.color.red(href), err.message);
}
};
});
await (0, fe_utils_1.concurrency)(taskList, Math.max(1, +options.concurrency || 3));
}
return m3u8Urls;
}
// logger.updateOptions({ levelType: 'debug' });
// getM3u8Urls({ url: process.argv.slice(2)[0] }).then(d => console.log(d));