UNPKG

rsshub

Version:
98 lines (96 loc) 3.96 kB
import { n as init_esm_shims, t as __dirname } from "./esm-shims-CzJ_djXG.mjs"; import { t as parseDate } from "./parse-date-BrP7mxXf.mjs"; import { t as art } from "./render-BQo6B4tL.mjs"; import { t as got_default } from "./got-KxxWdaxq.mjs"; import { t as timezone } from "./timezone-D8cuwzTY.mjs"; import path from "node:path"; import { load } from "cheerio"; //#region lib/routes/whu/util.ts init_esm_shims(); const domain = "whu.edu.cn"; /** * Process the meta information from the HTML content. * * @param {string} text - The HTML content. * @returns {Object} The meta information extracted from the content. */ const processMeta = (text) => { const meta = {}; text.replaceAll(/<meta name="(.*?)" content="(.*?)"/gi, (_, key, value) => { meta[key] = value; }); return meta; }; /** * Get a specific meta value from the meta object. * * @param {Object} metaObject - The meta object. * @param {string} key - The key of the meta value to retrieve. * @returns {string|undefined} The value of the specified meta key, or undefined if not found. */ const getMeta = (metaObject, key) => Object.hasOwn(metaObject, key) ? metaObject[key] : void 0; /** * Retrieves item details from a given link and updates the item object. * @param {Object} item - The item object to be updated. * @param {string} rootUrl - The root URL of the item's link. * @returns {Promise<object>} The updated item object. */ const getItemDetail = async (item, rootUrl) => { try { const { data: detailResponse } = await got_default(item.link); const content = load(detailResponse); content("p.vsbcontent_img").each(function() { const image = content(this).find("img"); content(this).replaceWith(art(path.join(__dirname, "templates/description-9e3ccd8d.art"), { image: { src: new URL(image.prop("orisrc"), rootUrl).href, width: image.prop("width") } })); }); content("script[name=\"_videourl\"]").each(function() { const video = content(this); video.replaceWith(art(path.join(__dirname, "templates/description-9e3ccd8d.art"), { video: { src: new URL(video.prop("vurl").split("?")[0], rootUrl).href, width: content(video).prop("vwidth"), height: content(video).prop("vheight") } })); }); const description = content("div.v_news_content").html(); content("form[name=\"_newscontent_fromname\"] table").remove(); const attachments = content("form[name=\"_newscontent_fromname\"] ul li").toArray().map((attachment) => { attachment = content(attachment).find("a"); return { title: attachment.text(), link: new URL(attachment.prop("href"), rootUrl).href }; }); const meta = processMeta(detailResponse); item.title = getMeta(meta, "ArticleTitle") ?? item.title; item.description = art(path.join(__dirname, "templates/description-9e3ccd8d.art"), { description, attachments }); item.author = getMeta(meta, "ContentSource"); item.category = getMeta(meta, "Keywords")?.split(" ").filter(Boolean) ?? []; item.guid = getMeta(meta, "Url") ?? item.link; item.pubDate = getMeta(meta, "PubDate") ? timezone(parseDate(getMeta(meta, "PubDate")), 8) : item.pubDate; if (attachments.length > 0) { item.enclosure_url = attachments[0].link; item.enclosure_type = `application/${attachments[0].title.split(".").pop()}`; } } catch {} return item; }; /** * Process items asynchronously. * * @param {Array<Object>} items - The array of items to process. * @param {Function} tryGet - The function to attempt to get the content of a URL. * @param {string} rootUrl - The root URL. * @returns {Array<Promise<Object>>} An array of promises that resolve to the processed items. */ const processItems = async (items, tryGet, rootUrl) => await Promise.all(items.map((item) => { if (!item.link.includes(domain)) return item; return tryGet(item.link, async () => await getItemDetail(item, rootUrl)); })); //#endregion export { processMeta as i, getMeta as n, processItems as r, domain as t };