UNPKG

rsshub

Version:
94 lines (92 loc) 3.56 kB
import { t as logger_default } from "./logger-Czu8UMNd.mjs"; import { t as ofetch_default } from "./ofetch-BIyrKU3Y.mjs"; import { t as parseDate } from "./parse-date-BrP7mxXf.mjs"; import { t as cache_default } from "./cache-Bo__VnGm.mjs"; import { n as setCookies, t as getCookies } from "./puppeteer-utils-BK3JC9qW.mjs"; import { load } from "cheerio"; //#region lib/routes/cw/utils.ts let cookie; const baseUrl = "https://www.cw.com.tw"; const pathMap = { today: { pageUrl: () => "/today", limit: 30 }, master: { pageUrl: (channel) => `/masterChannel.action?idMasterChannel=${channel}`, limit: 12 }, sub: { pageUrl: (channel) => `/subchannel.action?idSubChannel=${channel}`, limit: 12 }, author: { pageUrl: (channel) => `/author/${channel}`, limit: 10 } }; const getCookie = async (browser, tryGet) => { if (!cookie) cookie = await tryGet("cw:cookie", async () => { const page = await browser.newPage(); await page.setRequestInterception(true); page.on("request", (request) => { request.resourceType() === "document" || request.resourceType() === "script" ? request.continue() : request.abort(); }); logger_default.http(`Requesting ${baseUrl}/user/get/cookie-bar`); await page.goto(`${baseUrl}/user/get/cookie-bar`, { waitUntil: "domcontentloaded" }); cookie = await getCookies(page); await page.close(); return cookie; }); return cookie; }; const parsePage = async (path, browser, ctx) => { const pageUrl = `${baseUrl}${pathMap[path].pageUrl(ctx.req.param("channel"))}`; const cookie$1 = await getCookie(browser, cache_default.tryGet); const page = await browser.newPage(); await page.setRequestInterception(true); page.on("request", (request) => { request.resourceType() === "document" || request.resourceType() === "script" ? request.continue() : request.abort(); }); await setCookies(page, cookie$1, "cw.com.tw"); logger_default.http(`Requesting ${pageUrl}`); await page.goto(pageUrl, { waitUntil: "domcontentloaded" }); await page.waitForSelector(".caption"); const response = await page.evaluate(() => document.documentElement.innerHTML); await page.close(); const $ = load(response); return { $, items: await parseItems(parseList($, ctx.req.query("limit") ? Number(ctx.req.query("limit")) : pathMap[path].limit), browser, cache_default.tryGet) }; }; const parseList = ($, limit) => $(".caption").toArray().map((item) => { item = $(item); return { title: item.find("h3").text(), link: item.find("h3 a").attr("href"), pubDate: parseDate(item.find("time").text()) }; }).slice(0, limit); const parseItems = (list, browser, tryGet) => Promise.all(list.map((item) => tryGet(item.link, async () => { const $ = load(await ofetch_default(item.link, { headers: { Cookie: await getCookie(browser, tryGet), "User-Agent": browser.userAgent() } })); const meta = JSON.parse($("head script[type=\"application/ld+json\"]").eq(0).text()); $(".article__head .breadcrumb, .article__head h1, .article__provideViews, .ad").remove(); $("img.lazyload").each((_, img) => { if (img.attribs["data-src"]) { img.attribs.src = img.attribs["data-src"]; delete img.attribs["data-src"]; } }); item.title = $("head title").text(); item.category = $("meta[name=keywords]").attr("content").split(","); item.pubDate = parseDate(meta.datePublished); item.author = meta.author.name.replace(",", " ") || meta.publisher.name; item.description = $(".article__head .container").html() + $(".article__content").html(); return item; }))); //#endregion export { parsePage as n, baseUrl as t };