UNPKG

rsshub

Version:
494 lines (492 loc) • 18.7 kB
import { n as init_esm_shims, t as __dirname } from "./esm-shims-CzJ_djXG.mjs"; import { t as ofetch_default } from "./ofetch-BIyrKU3Y.mjs"; import { t as parseDate } from "./parse-date-BrP7mxXf.mjs"; import { t as cache_default } from "./cache-Bo__VnGm.mjs"; import { t as art } from "./render-BQo6B4tL.mjs"; import { t as got_default } from "./got-KxxWdaxq.mjs"; import path from "node:path"; import { load } from "cheerio"; import { destr } from "destr"; //#region lib/routes/bloomberg/utils.ts init_esm_shims(); const rootUrl = "https://www.bloomberg.com/feeds"; const idSel = "script[id^=\"article-info\"][type=\"application/json\"], script[class^=\"article-info\"][type=\"application/json\"], script#dvz-config"; const idUrl = "https://www.bloomberg.com/article/api/story/id/"; const headers = { accept: "application/json", "cache-control": "no-cache", referer: "https://www.bloomberg.com" }; const apiEndpoints = { articles: { url: "https://www.bloomberg.com/article/api/story/slug/" }, features: { url: "https://www.bloomberg.com/article/api/story/slug/" }, audio: { url: "https://www.bloomberg.com/news/audio/", sel: "script#__NEXT_DATA__" }, videos: { url: "https://www.bloomberg.com/news/videos/", sel: "script" }, newsletters: { url: "https://www.bloomberg.com/article/api/story/slug/" }, "photo-essays": { url: "https://www.bloomberg.com/javelin/api/photo-essay_transporter/", sel: "script[type = \"application/json\"][data-component-props]" }, "features/": { url: "https://www.bloomberg.com/features/", sel: idSel, prop: "id" } }; const regex = [/\/(?<page>[\w-]*?)\/(?<link>\d{4}-\d{2}-\d{2}\/.*)/, /(?<!news|politics)\/(?<page>features\/|graphics\/)(?<link>.*)/]; const capRegex = /<p>|<\/p>/g; const emptyRegex = /<p\b[^>]*>(&nbsp;|\s)<\/p>/g; const redirectGot = (url) => ofetch_default.raw(url, { headers, parseResponse: (responseText) => ({ data: destr(responseText), body: responseText }) }); const parseNewsList = async (url, ctx) => { const $ = load((await got_default(url)).data, { xml: { xmlMode: true } }); return $("urlset url").toArray().slice(0, ctx.req.query("limit") ? Number.parseInt(ctx.req.query("limit")) : 50).map((u) => { u = $(u); return { title: u.find(String.raw`news\:title`).text(), link: u.find("loc").text(), pubDate: parseDate(u.find(String.raw`news\:publication_date`).text()) }; }); }; const parseArticle = (item) => cache_default.tryGet(item.link, async () => { const group = regex.map((r) => r.exec(item.link)).filter((e) => e && e.groups).map((a) => a && a.groups)[0]; if (group) { const { page, link } = group; if (apiEndpoints[page]) { const api = { ...apiEndpoints[page] }; let res; try { res = await redirectGot(`${api.url}${link}`); } catch (error) { if (error.name && (error.name === "HTTPError" || error.name === "RequestError" || error.name === "FetchError")) try { res = await redirectGot(item.link); } catch { return { title: item.title, link: item.link, pubDate: item.pubDate }; } } if (res.redirected && new URL(res.url).pathname === "/tosv2.html" || res.status === 404) return { title: item.title, link: item.link, pubDate: item.pubDate }; switch (page) { case "audio": return parseAudioPage(res._data, api, item); case "videos": return parseVideoPage(res._data, api, item); case "photo-essays": return parsePhotoEssaysPage(res._data, api, item); case "features/": return parseReactRendererPage(res._data, api, item); default: return parseStoryJson(res._data.data, item); } } } return item; }); const parseAudioPage = async (res, api, item) => { const audio_json = JSON.parse(load(res.data)(api.sel).html()).props.pageProps; const episode = audio_json.episode; return { title: episode.title || item.title, link: audio_json.pageInfo.canonicalUrl || item.link, guid: `bloomberg:${episode.id}`, description: (await processBody(episode.articleBody, audio_json)).replaceAll(emptyRegex, ""), pubDate: parseDate(episode.publishedAt) || item.pubDate, author: audio_json.hero.showTitle, media: { content: { url: episode.image }, thumbnails: { url: episode.image } }, enclosure_type: "audio/mpeg", enclosure_url: episode.url, itunes_item_image: episode.image || audio_json.pageInfo.image.url }; }; const parseVideoPage = async (res, api, item) => { const $ = load(res.data); const json = $(api.sel).filter((i, el) => $(el).text().includes("__PRELOADED_STATE__")).text().trim().match(/window\.__PRELOADED_STATE__ = (.*?);/)?.[1]; const article_json = JSON.parse(json || "{}"); const video_story = article_json.video?.videoStory ?? article_json.quicktakeVideo?.videoStory; if (video_story) { const desc = await processVideo(video_story.video.bmmrId, video_story.summary.html.replaceAll(emptyRegex, "")); return { title: video_story.headline.text || item.title, link: video_story.url || item.link, guid: `bloomberg:${video_story.id}`, description: art(path.join(__dirname, "templates/video_media-86c5e7ff.art"), desc), pubDate: parseDate(video_story.publishedAt) || item.pubDate, media: { content: { url: video_story.video?.thumbnail.url || "" }, thumbnails: { url: video_story.video?.thumbnail.url || "" } }, category: desc.keywords ?? [] }; } return item; }; const parsePhotoEssaysPage = async (res, api, item) => { const $ = load(res.data.html); const article_json = {}; for (const e of $(api.sel).toArray()) Object.assign(article_json, JSON.parse($(e).html())); return { title: article_json.headline || item.title, link: article_json.canonical || item.link, guid: `bloomberg:${article_json.id}`, description: (await processBody(article_json.body, article_json)).replaceAll(emptyRegex, ""), pubDate: item.pubDate, author: article_json.authors?.map((a) => a.name).join(", ") ?? [] }; }; const parseReactRendererPage = async (res, api, item) => { const json = load(res.data)(api.sel).text().trim(); const story_id = JSON.parse(json)[api.prop]; try { return await parseStoryJson((await redirectGot(`${idUrl}${story_id}`))._data, item); } catch (error) { if (error.name && (error.name === "HTTPError" || error.name === "RequestError" || error.name === "FetchError")) return { title: item.title, link: item.link, pubDate: item.pubDate }; } }; const parseStoryJson = async (story_json, item) => { const media_img = story_json.ledeImageUrl || Object.values(story_json.imageAttachments ?? {})[0]?.url; return { title: story_json.headline || item.title, link: story_json.url || item.link, guid: `bloomberg:${story_json.id}`, description: processHeadline(story_json) + await processLedeMedia(story_json) + await documentToHtmlString(story_json.body || ""), pubDate: parseDate(story_json.publishedAt) || item.pubDate, author: story_json.authors?.map((a) => a.name).join(", ") ?? [], category: story_json.mostRelevantTags ?? [], media: { content: { url: media_img }, thumbnails: { url: media_img } } }; }; const processHeadline = (story_json) => { const dek = story_json.dek || story_json.summary || story_json.headline || ""; const abs = story_json.abstract?.map((a) => `<li>${a}</li>`).join(""); return abs ? dek + `<ul>${abs}</ul>` : dek; }; const processLedeMedia = async (story_json) => { if (story_json.ledeMediaKind) { const kind = story_json.ledeMediaKind; const media = { kind: story_json.ledeMediaKind, caption: story_json.ledeCaption?.replaceAll(capRegex, "") ?? "", description: story_json.ledeDescription?.replaceAll(capRegex, "") ?? "", credit: story_json.ledeCredit?.replaceAll(capRegex, "") ?? "", src: story_json.ledeImageUrl, video: kind === "video" && await processVideo(story_json.ledeAttachment.bmmrId) }; return art(path.join(__dirname, "templates/lede_media-39081938.art"), { media }); } else if (story_json.lede) { const lede = story_json.lede; const image = { src: lede.url, alt: lede.alt || lede.title, caption: lede.caption?.replaceAll(capRegex, "") ?? "", credit: lede.credit?.replaceAll(capRegex, "") ?? "" }; return art(path.join(__dirname, "templates/image_figure-1ba8d11c.art"), image); } else if (story_json.imageAttachments) { const attachment = Object.values(story_json.imageAttachments)[0]; if (attachment) { const image = { src: attachment.baseUrl || attachment.url, alt: attachment.alt || attachment.title, caption: attachment.caption?.replaceAll(capRegex, "") ?? "", credit: attachment.credit?.replaceAll(capRegex, "") ?? "" }; return art(path.join(__dirname, "templates/image_figure-1ba8d11c.art"), image); } return ""; } else if (story_json.type === "Lede") { const props = story_json.props; const media = { kind: props.media, caption: props.caption?.replaceAll(capRegex, "") ?? "", description: props.dek?.replaceAll(capRegex, "") ?? "", credit: props.credit?.replaceAll(capRegex, "") ?? "", src: props.url }; return art(path.join(__dirname, "templates/lede_media-39081938.art"), { media }); } }; const processBody = async (body_html, story_json) => { const removeSel = [ "meta", "script", "*[class$=\"-footnotes\"]", "*[class$=\"for-you\"]", "*[class$=\"-newsletter\"]", "*[class$=\"page-ad\"]", "*[class$=\"-recirc\"]", "*[data-ad-placeholder=\"Advertisement\"]" ]; const $ = load(body_html); for (const sel of removeSel) $(sel).remove(); $(".paywall").removeAttr("class"); for await (const e of $("figure")) { const imageType = $(e).data("image-type"); const type = $(e).data("type"); let new_figure = ""; if (imageType === "audio") { let audio = {}; if (story_json.audios) { const attachment = story_json.audios.find((a) => a.id.toString() === $(e).data("id").toString()); audio = { img: attachment.image?.url || $(e).find("img").attr("src"), src: attachment.url || $(e).find("audio source").attr("src"), caption: $(e).find("[class$=\"text\"]").html()?.trim() ?? "", credit: $(e).find("[class$=\"credit\"]").html()?.trim() ?? "" }; } if (story_json.episode) { const episode = story_json.episode; audio = { src: episode.url, img: episode.image || story_json.pageInfo.image.url, caption: episode.description || ($(e).find("[class$=\"text\"]").html()?.trim() ?? ""), credit: (episode.credits.map((c) => c.name).join(", ") ?? []) || ($(e).find("[class$=\"credit\"]").html()?.trim() ?? "") }; } new_figure = art(path.join(__dirname, "templates/audio_media-302de55e.art"), audio); } else if (imageType === "video") { if (story_json.videoAttachments) { const attachment = story_json.videoAttachments[$(e).data("id")]; const video = await processVideo(attachment.bmmrId); new_figure = art(path.join(__dirname, "templates/video_media-86c5e7ff.art"), video); } } else if (imageType === "photo" || imageType === "image" || type === "image") { let src, alt; if (story_json.imageAttachments) { const attachment = story_json.imageAttachments[$(e).data("id")]; alt = attachment?.alt || $(e).find("img").attr("alt")?.trim(); src = attachment?.baseUrl; } else { alt = $(e).find("img").attr("alt").trim(); src = $(e).find("img").data("native-src"); } const caption = $(e).find("[class$=\"text\"], .caption, .photo-essay__text").html()?.trim() ?? ""; const credit = $(e).find("[class$=\"credit\"], .credit, .photo-essay__source").html()?.trim() ?? ""; const image = { src, alt, caption, credit }; new_figure = art(path.join(__dirname, "templates/image_figure-1ba8d11c.art"), image); } $(new_figure).insertAfter(e); $(e).remove(); } return $.html(); }; const processVideo = async (bmmrId, summary) => { const res = await redirectGot(`https://www.bloomberg.com/multimedia/api/embed?id=${bmmrId}`); if (res.redirected && new URL(res.url).pathname === "/tosv2.html" || res.status === 404) return { stream: "", mp4: "", coverUrl: "", caption: summary }; if (res._data.data) { const video_json = res._data.data; return { stream: video_json.streams ? video_json.streams[0]?.url : "", mp4: video_json.downloadURLs ? video_json.downloadURLs["600"] : "", coverUrl: video_json.thumbnail?.baseUrl ?? "", caption: video_json.description || video_json.title || summary }; } return { stream: "", mp4: "", coverUrl: "", caption: summary }; }; const nodeRenderers = { paragraph: async (node, nextNode$1) => `<p>${await nextNode$1(node.content)}</p>`, text: (node) => { const { attributes: attr, value: val } = node; if (attr?.emphasis && attr?.strong) return `<strong><em>${val}</em></strong>`; else if (attr?.emphasis) return `<em>${val}</em>`; else if (attr?.strong) return `<strong>${val}</strong>`; else return val; }, "inline-newsletter": async (node, nextNode$1) => `<div>${await nextNode$1(node.content)}</div>`, "inline-recirc": async (node, nextNode$1) => `<div>${await nextNode$1(node.content)}</div>`, heading: async (node, nextNode$1) => { const nodeData = node.data; if (nodeData.level === 2 || nodeData.level === 3) return `<h3>${await nextNode$1(node.content)}</h3>`; }, link: async (node, nextNode$1) => { const dest = node.data.destination; const web = dest.web; const bbg = dest.bbg; const title = node.data.title; if (web) return `<a href="${web}" title="${title}" target="_blank">${await nextNode$1(node.content)}</a>`; if (bbg && bbg.startsWith("bbg://news/stories")) { const o = bbg.split("bbg://news/stories/").pop(); return `<a href="${"https://www.bloomberg.com/news/terminal/".concat(o)}" title="${title}" target="_blank">${await nextNode$1(node.content)}</a>`; } return String(await nextNode$1(node.content)); }, entity: async (node, nextNode$1) => { const t = node.subType; const web = node.data.link.destination.web; if (t === "person") return nextNode$1(node.content); if (t === "story") { if (web) return `<a href="${web}" target="_blank">${await nextNode$1(node.content)}</a>`; const a = node.data.story.identifiers.suid; return `<a href="${"https://www.bloomberg.com/news/terminal/".concat(a)}" target="_blank">${await nextNode$1(node.content)}</a>`; } if (t === "security") { const s = node.data.security.identifiers.parsekey; if (s) { const c = s.split(" "); return `<a href="${[..."https://www.bloomberg.com/quote/".concat(c[0], ":"), c[1]]}" target="_blank">${await nextNode$1(node.content)}</a>`; } } return nextNode$1(node.content); }, br: () => `<br/>`, hr: () => `<br/>`, ad: () => {}, blockquote: async (node, nextNode$1) => `<blockquote>${await nextNode$1(node.content)}</blockquote>`, quote: async (node, nextNode$1) => `<blockquote>${await nextNode$1(node.content)}</blockquote>`, aside: async (node, nextNode$1) => `<aside>${await nextNode$1(node.content)}</aside>`, list: async (node, nextNode$1) => { const t = node.subType; if (t === "unordered") return `<ul>${await nextNode$1(node.content)}</ul>`; if (t === "ordered") return `<ol>${await nextNode$1(node.content)}</ol>`; }, listItem: async (node, nextNode$1) => `<li>${await nextNode$1(node.content)}</li>`, media: async (node) => { const t = node.subType; if (t === "chart" && node.data.attachment) { if (node.data.attachment.creator === "TOASTER") { const c = node.data.chart; const e = { src: c && c.fallback || "", chart: node.data.attachment, id: c && c.id || "", alt: c && c.alt || "" }; const w = e.chart; const chart = { source: w.source, footnote: w.footnote, url: w.url, title: w.title, subtitle: w.subtitle, chartId: "toaster-chart-".concat(e.id), chartAlt: e.alt, fallback: e.src }; return art(path.join(__dirname, "templates/chart_media-e91db148.art"), { chart }); } const image = { alt: node.data.attachment?.footnote || "", caption: node.data.attachment?.title + node.data.attachment.subtitle || "", credit: node.data.attachment?.source || "", src: node.data.chart?.fallback || "" }; return art(path.join(__dirname, "templates/image_figure-1ba8d11c.art"), image); } if (t === "photo") { const h = node.data; let img = ""; if (h.attachment) { const image = { src: h.photo?.src, alt: h.photo?.alt, caption: h.photo?.caption, credit: h.photo?.credit }; img = art(path.join(__dirname, "templates/image_figure-1ba8d11c.art"), image); } if (h.link && h.link.destination && h.link.destination.web) return `<a href="${h.link.destination.web}" target="_blank">${img}</a>`; return img; } if (t === "video") { const h = node.data; const id = h.attachment?.id; if (id) { const desc = await processVideo(id, h.attachment?.title); return art(path.join(__dirname, "templates/video_media-86c5e7ff.art"), desc); } } if (t === "audio" && node.data.attachment) { const B = node.data.attachment; const P = B.title; const D = B.url; const M = B.image; if (P && D) { const audio = { src: D, img: M.url, caption: P, credit: "" }; return art(path.join(__dirname, "templates/audio_media-302de55e.art"), audio); } } return ""; }, tabularData: async (node, nextNode$1) => `<table>${await nextNode$1(node.content)}</table>`, columns: (node) => { return `<tr>${node.data.definitions.map((e) => ({ title: e.title, span: e.colSpan || 1, type: e.dataType })).map((e) => `<th colspan=${e.span}>${e.title}</th>`)}</tr>`; }, row: async (node, nextNode$1) => `<tr>${await nextNode$1(node.content)}</tr>`, cell: async (node, nextNode$1) => { return `<td data-coltype=${{ "news-rsf-table-number": "number", "news-rsf-table-string": "text" }[node.data.class] || "text"} colspan=${node.data.colspan}>${await nextNode$1(node.content)}</td>`; } }; const nextNode = async (nodes) => { return await nodeListToHtmlString(nodes); }; const nodeToHtmlString = async (node, obj) => { if (!node.type || !nodeRenderers[node.type]) return `<node>${node.type}</node>`; return await nodeRenderers[node.type](node, nextNode, obj); }; const nodeListToHtmlString = async (nodes) => { return (await Promise.all(nodes.map(async (node, index) => { return await nodeToHtmlString(node, { index, prev: nodes[index - 1]?.type, next: nodes[index + 1]?.type }); }))).join(""); }; const documentToHtmlString = async (document) => { if (!document || !document.content) return ""; return await nodeListToHtmlString(document.content); }; //#endregion export { parseNewsList as n, rootUrl as r, parseArticle as t };