rsshub
Version:
Make RSS Great Again!
94 lines (92 loc) • 3.56 kB
JavaScript
import { t as logger_default } from "./logger-Czu8UMNd.mjs";
import { t as ofetch_default } from "./ofetch-BIyrKU3Y.mjs";
import { t as parseDate } from "./parse-date-BrP7mxXf.mjs";
import { t as cache_default } from "./cache-Bo__VnGm.mjs";
import { n as setCookies, t as getCookies } from "./puppeteer-utils-BK3JC9qW.mjs";
import { load } from "cheerio";
//#region lib/routes/cw/utils.ts
let cookie;
const baseUrl = "https://www.cw.com.tw";
const pathMap = {
today: {
pageUrl: () => "/today",
limit: 30
},
master: {
pageUrl: (channel) => `/masterChannel.action?idMasterChannel=${channel}`,
limit: 12
},
sub: {
pageUrl: (channel) => `/subchannel.action?idSubChannel=${channel}`,
limit: 12
},
author: {
pageUrl: (channel) => `/author/${channel}`,
limit: 10
}
};
const getCookie = async (browser, tryGet) => {
if (!cookie) cookie = await tryGet("cw:cookie", async () => {
const page = await browser.newPage();
await page.setRequestInterception(true);
page.on("request", (request) => {
request.resourceType() === "document" || request.resourceType() === "script" ? request.continue() : request.abort();
});
logger_default.http(`Requesting ${baseUrl}/user/get/cookie-bar`);
await page.goto(`${baseUrl}/user/get/cookie-bar`, { waitUntil: "domcontentloaded" });
cookie = await getCookies(page);
await page.close();
return cookie;
});
return cookie;
};
const parsePage = async (path, browser, ctx) => {
const pageUrl = `${baseUrl}${pathMap[path].pageUrl(ctx.req.param("channel"))}`;
const cookie$1 = await getCookie(browser, cache_default.tryGet);
const page = await browser.newPage();
await page.setRequestInterception(true);
page.on("request", (request) => {
request.resourceType() === "document" || request.resourceType() === "script" ? request.continue() : request.abort();
});
await setCookies(page, cookie$1, "cw.com.tw");
logger_default.http(`Requesting ${pageUrl}`);
await page.goto(pageUrl, { waitUntil: "domcontentloaded" });
await page.waitForSelector(".caption");
const response = await page.evaluate(() => document.documentElement.innerHTML);
await page.close();
const $ = load(response);
return {
$,
items: await parseItems(parseList($, ctx.req.query("limit") ? Number(ctx.req.query("limit")) : pathMap[path].limit), browser, cache_default.tryGet)
};
};
const parseList = ($, limit) => $(".caption").toArray().map((item) => {
item = $(item);
return {
title: item.find("h3").text(),
link: item.find("h3 a").attr("href"),
pubDate: parseDate(item.find("time").text())
};
}).slice(0, limit);
const parseItems = (list, browser, tryGet) => Promise.all(list.map((item) => tryGet(item.link, async () => {
const $ = load(await ofetch_default(item.link, { headers: {
Cookie: await getCookie(browser, tryGet),
"User-Agent": browser.userAgent()
} }));
const meta = JSON.parse($("head script[type=\"application/ld+json\"]").eq(0).text());
$(".article__head .breadcrumb, .article__head h1, .article__provideViews, .ad").remove();
$("img.lazyload").each((_, img) => {
if (img.attribs["data-src"]) {
img.attribs.src = img.attribs["data-src"];
delete img.attribs["data-src"];
}
});
item.title = $("head title").text();
item.category = $("meta[name=keywords]").attr("content").split(",");
item.pubDate = parseDate(meta.datePublished);
item.author = meta.author.name.replace(",", " ") || meta.publisher.name;
item.description = $(".article__head .container").html() + $(".article__content").html();
return item;
})));
//#endregion
export { parsePage as n, baseUrl as t };