rsshub
Version:
Make RSS Great Again!
127 lines (115 loc) • 4.82 kB
text/typescript
import { Route } from '@/types';
import cache from '@/utils/cache';
import ofetch from '@/utils/ofetch';
import { load } from 'cheerio';
import { parseDate } from '@/utils/parse-date';
const rootURL = 'https://www.uber.com';
export const route: Route = {
// `compat` is a never used parameter
// just for backward compatibility with the deprecated `:maxPage` parameter
path: '/blog/:compat?',
categories: ['blog'],
example: '/uber/blog',
features: {
requireConfig: false,
requirePuppeteer: false,
antiCrawler: false,
supportBT: false,
supportPodcast: false,
supportScihub: false,
},
radar: [
{
source: ['www.uber.com/:language/blog/engineering'],
target: '/blog',
},
],
name: 'Engineering',
maintainers: ['hulb'],
handler,
url: 'www.uber.com/en-HK/blog/engineering',
description:
"The English blog on any of Uber's regional sites (e.g., www.uber.com/en-JP/blog) is the same engineering blog provided by this route, so language selection is not supported. This route is not for the public news blog on specific regional sites (e.g., www.uber.com/ja-JP/blog).",
zh: {
description: 'uber的任何区域站点的英文blog(例如www.uber.com/en-JP/blog)都是相同的内容,正是本路由提供的engineering blog,因此本路由不提供语言选择;本路由不是uber在特定区域站点的公开新闻blog(例如www.uber.com/ja-JP/blog)',
},
};
async function handler() {
const response = await ofetch(`${rootURL}/en-HK/blog/engineering/rss/`, {
// The source site is misconfigured or intentionally blocking requests without a specific accept header
// Without this header, it will return an HTTP 406 error
// Note that the accept type must be 'text/html'; 'application/xml' or similar will get HTTP 404 error
headers: {
accept: 'text/html',
},
// Without this, ofetch will parse the response as a blob instead of text, which cannot be loaded by cheerio
parseResponse: (txt) => txt,
});
const $ = load(response, { xmlMode: true });
const result = await Promise.all(
$('item')
.toArray()
.map((el) =>
cache.tryGet($(el).find('link').text(), async () => {
const detailResponse = await ofetch($(el).find('link').text(), {
headers: {
accept: 'text/html',
},
});
const detail = load(detailResponse);
const scriptText = detail('script#__REDUX_STATE__').text().trim();
// The json in the script element is over-encoded
// It needs to be decoded this way before it can be parsed by JSON.parse
const jsonText = decodeURIComponent(JSON.parse(`"${scriptText}"`));
// Traverse the JSON to find the content node, which is more robust against format changes.
const contentHtml = findNode(JSON.parse(jsonText), { idKey: 'id', idValue: 'BlogArticleContent', siblingKey: 'props', childKey: 'content' }).replaceAll(String.raw`\n`, '');
return {
link: $(el).find('link').text(),
title: $(el).find('title').text(),
description: contentHtml,
pubDate: parseDate($(el).find('pubDate').text()),
category: $(el)
.find('category')
.toArray()
.map((item) => $(item).text()),
};
})
)
);
return {
title: `Uber Engineering Blog`,
link: rootURL + '/blog/engineering',
description: 'The technology behind Uber Engineering',
item: result,
};
}
function findNode(
json: any,
options: {
idKey?: string;
idValue: string;
siblingKey: string;
childKey: string;
}
): any {
const { idKey = 'id', idValue, siblingKey, childKey } = options;
if (Array.isArray(json)) {
for (const item of json) {
const result = findNode(item, options);
if (result !== undefined) {
return result;
}
}
} else if (json && typeof json === 'object') {
if (json[idKey] === idValue) {
return json[siblingKey]?.[childKey];
}
for (const key in json) {
const result = findNode(json[key], options);
if (result !== undefined) {
return result;
}
}
}
return undefined;
}