@lzwme/m3u8-dl
Version:
Batch download of m3u8 files and convert to mp4
108 lines (107 loc) • 5.04 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.getM3u8Urls = getM3u8Urls;
const fe_utils_1 = require("@lzwme/fe-utils");
const utils_js_1 = require("./utils.js");
function getFormatTitle(text) {
if (typeof text !== 'string' || !text)
return '';
if (/^\d+$/.test(text))
return text;
const match = /第(\d+)(集|期)/.exec(text);
if (match)
return match[0];
return '';
}
/** 从指定的 url 页面中提取 m3u8 播放地址。deep 指定搜索页面深度 */
async function getM3u8Urls(opts) {
const options = { headers: {}, deep: 1, visited: new Set(), ...opts };
const baseUrl = new URL(options.url).origin;
const req = new fe_utils_1.Request({
headers: { 'content-type': 'text/html; charset=UTF-8', referer: baseUrl, ...(0, utils_js_1.formatHeaders)(options.headers) },
reqOptions: { rejectUnauthorized: false },
});
const { data: html, response } = await req.get(options.url);
const m3u8Urls = new Map();
if (!response.statusCode || response.statusCode >= 400) {
utils_js_1.logger.error('获取页面失败:', fe_utils_1.color.red(options.url), response.statusCode, response.statusMessage, html);
return m3u8Urls;
}
// 从 html 中正则匹配提取 m3u8
const m3u8Regex = /https?:[^\s'":]+\.(m3u8|mp4)(\?[^\s'"]*)?/gi;
// 1. 直接正则匹配 m3u8 地址
let match = m3u8Regex.exec(html);
const title = (/<title>([^<]+)</.exec(html)?.[1].split('-')[0] || '').replace(/在线播放|在线观看|详情|介绍|《|》/g, '').trim();
while (match) {
const href = match[0].replaceAll('\\/', '/');
match = m3u8Regex.exec(html);
if (!m3u8Urls.has(href))
m3u8Urls.set(href, getFormatTitle(title) || title);
}
// 找到了多个链接,修改 title 添加序号
if (m3u8Urls.size > 3 && !/第\d+(集|期)/.test(title)) {
let idx = 1;
for (const [key] of m3u8Urls) {
m3u8Urls.set(key, `${title}第${String(++idx).padStart(2, '0')}集`);
}
}
// 2. 若未找到且深度大于 0,则获取所有 a 标签的 href 并递归查找
if (m3u8Urls.size === 0 && options.deep > 0) {
utils_js_1.logger.debug('未获取到 m3u8 地址', fe_utils_1.color.gray(options.url), html.length);
options.visited.add(options.url);
const aTagRegex = /<a\s+[^>]*href=["']([^"']+)["'][^>]*>(.*?)<\/a>/gi;
let aMatch = aTagRegex.exec(html);
const subPageUrls = new Map();
let failedSubPages = 0;
while (aMatch) {
const href = aMatch[1] ? new URL(aMatch[1], baseUrl).toString() : '';
const text = aMatch[2].replace(/<[^>]+>/g, '');
aMatch = aTagRegex.exec(html);
if (!href || options.visited.has(href) || !href.startsWith(baseUrl))
continue;
if (options.subUrlRegex) {
if (typeof options.subUrlRegex === 'string') {
options.subUrlRegex = new RegExp(options.subUrlRegex.replaceAll(/\*+/g, '.+'));
}
if (!options.subUrlRegex.test(href))
continue;
}
else if (!/集|期|HD|高清|抢先|BD/.test(text))
continue;
subPageUrls.set(href, text);
utils_js_1.logger.debug(' > 提取到子页面: ', fe_utils_1.color.gray(href), text);
}
for (const [href, text] of subPageUrls) {
try {
options.visited.add(href);
const subUrls = await getM3u8Urls({ ...options, url: href, deep: options.deep - 1 });
utils_js_1.logger.debug(' > 从子页面提取: ', fe_utils_1.color.gray(href), text, subUrls);
if (subUrls.size === 0 && m3u8Urls.size === 0) {
failedSubPages++;
if (failedSubPages > 3) {
utils_js_1.logger.warn(`连续查找 ${failedSubPages} 个子页面均未获取到,不再继续`, options.url, href);
return m3u8Urls;
}
}
for (const [u, t] of subUrls) {
let stitle = t;
for (const s of [text, t, m3u8Urls.get(u) || '']) {
const ft = getFormatTitle(s);
if (ft) {
stitle = ft;
break;
}
}
utils_js_1.logger.debug(' > m3u8地址: ', fe_utils_1.color.gray(u), fe_utils_1.color.green(stitle));
m3u8Urls.set(u, stitle.trim());
}
}
catch (err) {
utils_js_1.logger.warn(' > 尝试访问子页面异常: ', fe_utils_1.color.red(href), err.message);
}
}
}
return m3u8Urls;
}
// logger.updateOptions({ levelType: 'debug' });
// getM3u8Urls(process.argv.slice(2)[0]).then(d => console.log(d));