rsshub

Version:

Make RSS Great Again!

51 lines (45 loc) • 1.64 kB

text/typescript

import got from '@/utils/got'; // get web content import { load } from 'cheerio'; // html parser import { parseDate } from '@/utils/parse-date'; import timezone from '@/utils/timezone'; export default async function get_article(url) { const domain = 'https://www.solidot.org'; if (/^\/.*$/.test(url)) { url = domain + url; } const response = await got({ method: 'get', url, }); const data = response.data; const $ = load(data); const date_raw = $('div.talk_time').clone().children().remove().end().text(); const date_str_zh = date_raw.replaceAll(/^[^`]*发表于(.*分)[^`]*$/g, '$1'); // use [^`] to match \n const date_str = date_str_zh .replaceAll(/[年月]/g, '-') .replaceAll('时', ':') .replaceAll(/[分日]/g, ''); const title = $('div.block_m > div.ct_tittle > div.bg_htit > h2').text(); const category = $('div.icon_float > a').attr('title'); const author = $('div.talk_time > b') .text() .replaceAll(/^来自(.*)部门$/g, '$1'); $('div.ct_tittle').remove(); $('div.talk_time').remove(); const description = $('div.block_m') .html() .replaceAll(/(href.*?)<u>(.*?)<\/u>/g, `$1$2`) .replaceAll('href="/', 'href="' + domain + '/') // Preserve the not extremely disturbing donation ad // to support the site. .replaceAll(/(<img.*liiLIZF8Uh6yM.*?>)/g, `<br><br>$1`); const item = { title, pubDate: timezone(parseDate(date_str), +8), author, link: url, description, category, }; return item; }