rsshub
Version:
Make RSS Great Again!
511 lines (463 loc) • 26.3 kB
text/typescript
import cache from '@/utils/cache';
import querystring from 'node:querystring';
import got from '@/utils/got';
import { load } from 'cheerio';
import { fallback, queryToBoolean, queryToInteger } from '@/utils/readable-social';
const weiboUtils = {
formatTitle: (html) =>
html
.replaceAll(/<span class=["']url-icon["']><img\s[^>]*?alt=["']?([^>]+?)["']?\s[^>]*?\/?><\/span>/g, '$1') // 表情转换
.replaceAll(/<span class=["']url-icon["']>(<img\s[^>]*>)<\/span>/g, '') // 去掉所有图标
.replaceAll(/<img\s[^<]*>/g, '[图片]')
// impossible to have inline script in weibo posts, but CodeQL complains about it
// Dismiss it through the UI: https://github.com/github/codeql/issues/11427
.replaceAll(/<[^<]*>/g, '')
.replaceAll('\n', ' ')
.trim(),
formatExtended: (ctx, status, uid, params = {}, picsPrefixes = []) => {
// `uid = undefined` to explicitly mark it as optional, avoiding IDEs prompting warnings
// undefined and strings like "1" is also safely parsed, so no if branch is needed
const routeParams = querystring.parse(ctx.req.param('routeParams'));
const mergedParams = {
readable: fallback(params.readable, queryToBoolean(routeParams.readable), false),
authorNameBold: fallback(params.authorNameBold, queryToBoolean(routeParams.authorNameBold), false),
showAuthorInTitle: fallback(params.showAuthorInTitle, queryToBoolean(routeParams.showAuthorInTitle), false),
showAuthorInDesc: fallback(params.showAuthorInDesc, queryToBoolean(routeParams.showAuthorInDesc), false),
showAuthorAvatarInDesc: fallback(params.showAuthorAvatarInDesc, queryToBoolean(routeParams.showAuthorAvatarInDesc), false),
showAtBeforeAuthor: fallback(params.showAtBeforeAuthor, null, false),
showEmojiForRetweet: fallback(params.showEmojiForRetweet, queryToBoolean(routeParams.showEmojiForRetweet), false),
showRetweetTextInTitle: fallback(params.showRetweetTextInTitle, queryToBoolean(routeParams.showRetweetTextInTitle), true),
addLinkForPics: fallback(params.addLinkForPics, queryToBoolean(routeParams.addLinkForPics), false),
showTimestampInDescription: fallback(params.showTimestampInDescription, queryToBoolean(routeParams.showTimestampInDescription), false),
widthOfPics: fallback(params.widthOfPics, queryToInteger(routeParams.widthOfPics), -1),
heightOfPics: fallback(params.heightOfPics, queryToInteger(routeParams.heightOfPics), -1),
sizeOfAuthorAvatar: fallback(params.sizeOfAuthorAvatar, queryToInteger(routeParams.sizeOfAuthorAvatar), 48),
showEmojiInDescription: fallback(params.showEmojiInDescription, queryToInteger(routeParams.showEmojiInDescription), false),
showLinkIconInDescription: fallback(params.showLinkIconInDescription, queryToInteger(routeParams.showLinkIconInDescription), true),
preferMobileLink: fallback(params.preferMobileLink, queryToBoolean(routeParams.preferMobileLink), false),
};
params = mergedParams;
const {
readable,
authorNameBold,
showAuthorInTitle,
showAuthorInDesc,
showAuthorAvatarInDesc,
showAtBeforeAuthor,
showEmojiForRetweet,
showRetweetTextInTitle,
addLinkForPics,
showTimestampInDescription,
widthOfPics,
heightOfPics,
sizeOfAuthorAvatar,
showEmojiInDescription,
showLinkIconInDescription,
preferMobileLink,
} = params;
let retweeted = '';
// 长文章的处理
let htmlNewLineUnreplaced = (status.longText && status.longText.longTextContent) || status.text || '';
// 表情图标转换为文字
if (!showEmojiInDescription) {
htmlNewLineUnreplaced = htmlNewLineUnreplaced.replaceAll(/<span class=["']?url-icon["']?><img\s[^>]*?alt=["']?([^>]+?)["']?\s[^>]*?\/><\/span>/g, '$1');
}
// 去掉链接的图标,保留 a 标签链接
if (!showLinkIconInDescription) {
htmlNewLineUnreplaced = htmlNewLineUnreplaced.replaceAll(/(<a\s[^>]*>)<span class=["']?url-icon["']?><img\s[^>]*><\/span>[^<>]*?<span class=["']?surl-text["']?>([^<>]*?)<\/span><\/a>/g, '$1$2</a>');
}
// 提取 话题作为 category
const category: string[] = htmlNewLineUnreplaced.match(/<span class=["']?surl-text["']?>#([^<>]*?)#<\/span>/g)?.map((e) => e?.match(/#([^#]+)#/)?.[1]);
// 去掉乱七八糟的图标 // 不需要,上述的替换应该已经把所有的图标都替换掉了,且这条 regex 会破坏上述替换不发生时的输出
// htmlNewLineUnreplaced = htmlNewLineUnreplaced.replace(/<span class=["']?url-icon["']?>(<img\s[^>]*?>)<\/span>/g, '');
// 将行内图标的高度设置为一行,改善阅读体验。但有些阅读器删除了 style 属性,无法生效 // 不需要,微博已经作此设置
// htmlNewLineUnreplaced = htmlNewLineUnreplaced.replace(/(?<=<span class=["']?url-icon["']?>)<img/g, '<img style="height: 1em"');
// 去掉全文
htmlNewLineUnreplaced = htmlNewLineUnreplaced.replaceAll('全文<br>', '<br>');
htmlNewLineUnreplaced = htmlNewLineUnreplaced.replaceAll(/<a href="(.*?)">全文<\/a>/g, '');
// 处理外部链接
htmlNewLineUnreplaced = htmlNewLineUnreplaced.replaceAll(/"https:\/\/weibo\.cn\/sinaurl.*?[&?]u=(http.*?)"/g, (match, p1) => `"${decodeURIComponent(p1)}"`);
// 处理图片的链接
htmlNewLineUnreplaced = htmlNewLineUnreplaced.replaceAll(/<a\s+href="https?:\/\/[^"]+\.(jpg|png|gif)"/g, (match) => `${match} data-rsshub-image="href"`);
let html = htmlNewLineUnreplaced.replaceAll('\n', '<br>');
// 添加用户名和头像
if (showAuthorInDesc) {
let usernameAndAvatar = `<a href="https://weibo.com/${status.user.id}" target="_blank">`;
if (showAuthorAvatarInDesc) {
usernameAndAvatar += `<img width="${sizeOfAuthorAvatar}" height="${sizeOfAuthorAvatar}" src="${status.user.profile_image_url}" ${readable ? 'hspace="8" vspace="8" align="left"' : ''} />`;
}
let name = status.user.screen_name;
if (showAtBeforeAuthor) {
name = '@' + name;
}
usernameAndAvatar += authorNameBold ? `<strong>${name}</strong></a>: ` : `${name}</a>: `;
html = usernameAndAvatar + html;
}
// status.pics can be either an array or an object:
// array: [ object, object, ... ]
// object: { '0': object, '1': object, ... } // REALLY AMAZING data structure
if (status.pics && !Array.isArray(status.pics) && typeof status.pics === 'object') {
status.pics = Object.values(status.pics);
}
// 添加文章头图,此处不需要回落到被转发的微博,后续处理被转发的微博时,还会执行到这里
if (status.page_info && status.page_info.type === 'article' && status.page_info.page_pic && status.page_info.page_pic.url) {
// 如果以后后续流程会用到其他字段,记得修改这里
const pagePic = {
large: {
url: status.page_info.page_pic.url,
},
};
// 文章微博一般不会有配图,但也有可能有:https://weibo.com/6882481489/Lh85BkS3m
if (status.pics) {
status.pics.push(pagePic);
} else {
status.pics = [pagePic];
}
}
// drop live photo
const livePhotoCount = status.pics ? status.pics.filter((pic) => pic.type === 'livephoto').length : 0;
const pics = status.pics && status.pics.filter((pic) => pic.type !== 'livephoto');
// 添加微博配图
if (pics) {
if (readable) {
html += '<br clear="both" /><div style="clear: both"></div>';
}
// 一些RSS Reader会识别所有<img>标签作为内含图片显示,我们不想要头像也作为内含图片之一
// 让所有配图在description的最前面再次出现一次,但宽高设为0
let picsPrefix = '';
for (const item of pics) {
picsPrefix += `<img width="0" height="0" hidden="true" src="${item.large.url}">`;
}
picsPrefixes.push(picsPrefix);
for (const item of pics) {
if (addLinkForPics) {
html += '<a href="' + item.large.url + '">';
}
let style = '';
html += '<img ';
html += readable ? 'vspace="8" hspace="4"' : '';
if (item.large) {
const { geo, url } = item.large;
if (geo?.width || widthOfPics >= 0) {
const width = geo?.width || widthOfPics;
html += ` width="${width}"`;
style += `width: ${width}px;`;
}
if (geo?.height || heightOfPics >= 0) {
const height = geo?.height || heightOfPics;
html += ` height="${height}"`;
style += `height: ${height}px;`;
}
html += ` style="${style}" src="${url}">`;
}
if (addLinkForPics) {
html += '</a>';
}
htmlNewLineUnreplaced += '<img src="" />';
}
}
// 处理转发的微博
if (status.retweeted_status) {
html += readable
? `<br clear="both" /><div style="clear: both"></div><blockquote style="background: #80808010;border-top:1px solid #80808030;border-bottom:1px solid #80808030;margin:0;padding:5px 20px;">`
: `<br><blockquote> - 转发 `;
if (!status.retweeted_status.user) {
// 当转发的微博被删除时user为null
status.retweeted_status.user = {
profile_image_url: '',
screen_name: '[原微博不可访问]',
id: 'sorry',
};
}
// 插入转发的微博
const retweetedParams = Object.assign({}, params);
retweetedParams.showAuthorInDesc = true;
retweetedParams.showAuthorAvatarInDesc = false;
retweetedParams.showAtBeforeAuthor = true;
retweeted += weiboUtils.formatExtended(ctx, status.retweeted_status, undefined, retweetedParams, picsPrefixes).description;
html += retweeted;
if (readable) {
html += `<br><small>原博:<a href="https://weibo.com/${status.retweeted_status.user.id}/${status.retweeted_status.bid}" target="_blank" rel="noopener noreferrer">https://weibo.com/${status.retweeted_status.user.id}/${status.retweeted_status.bid}</a></small>`;
}
if (showTimestampInDescription) {
html += `<br><small>` + new Date(status.retweeted_status.created_at).toLocaleString() + `</small>`;
}
if (readable) {
html += `<br clear="both" /><div style="clear: both"></div>`;
}
html += '</blockquote>';
}
if (showAuthorInDesc && showAuthorAvatarInDesc) {
html = picsPrefixes.join('') + html;
}
let title = '';
if (showAuthorInTitle) {
title += status.user.screen_name + ': ';
}
if (!status.retweeted_status || showRetweetTextInTitle) {
title += weiboUtils.formatTitle(htmlNewLineUnreplaced);
}
if (status.retweeted_status) {
title += showEmojiForRetweet ? '🔁 ' : ' - 转发 ';
title += weiboUtils.formatTitle(retweeted);
}
if (livePhotoCount > 0) {
title += ' ';
title += Array.from({ length: livePhotoCount + 1 }).join('[Live Photo]');
}
if (status.page_info && status.page_info === 'video') {
title += ' [视频]';
}
uid = uid || status.user?.id;
const bid = status.bid || status.id;
const guid = uid ? `https://weibo.com/${uid}/${bid}` : `https://m.weibo.cn/status/${bid}`;
const link = preferMobileLink ? `https://m.weibo.cn/status/${bid}` : guid;
const author = [
{
name: status.user?.screen_name,
url: `https://weibo.com/${uid}`,
avatar: status.user?.avatar_hd,
},
];
const pubDate = status.created_at;
return { description: html, title, link, guid, author, pubDate, category };
},
getShowData: async (uid, bid) => {
const link = `https://m.weibo.cn/statuses/show?id=${bid}`;
const itemResponse = await got.get(link, {
headers: {
Referer: `https://m.weibo.cn/u/${uid}`,
'MWeibo-Pwa': 1,
'X-Requested-With': 'XMLHttpRequest',
'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1',
},
});
return itemResponse.data.data;
},
formatVideo: (itemDesc, status) => {
const pageInfo = status.page_info;
const livePhotos = status.pics && status.pics.filter((pic) => pic.type === 'livephoto' && pic.videoSrc);
let video = '<br clear="both" /><div style="clear: both"></div>';
let anyVideo = false;
if (livePhotos) {
for (const livePhoto of livePhotos) {
video += `<video controls="controls" poster="${(livePhoto.large && livePhoto.large.url) || livePhoto.url}" src="${livePhoto.videoSrc}" style="width: 100%"></video>`;
anyVideo = true;
}
}
if (pageInfo && pageInfo.type === 'video') {
const pagePic = pageInfo.page_pic;
const posterUrl = pagePic ? pagePic.url : '';
const pageUrl = pageInfo.page_url; // video page url
const mediaInfo = pageInfo.media_info || {}; // stream_url, stream_url_hd; deprecated: mp4_720p_mp4, mp4_hd_url, mp4_sd_url
const urls = pageInfo.urls || {}; // mp4_720p_mp4, mp4_hd_mp4, hevc_mp4_hd, mp4_ld_mp4
const video720p = urls.mp4_720p_mp4 || mediaInfo.mp4_720p_mp4 || '';
const videoHd = urls.mp4_hd_mp4 || mediaInfo.mp4_hd_url || mediaInfo.stream_url_hd || '';
const videoHdHevc = urls.hevc_mp4_hd || '';
const videoLd = urls.mp4_ld_mp4 || mediaInfo.mp4_sd_url || mediaInfo.stream_url || '';
const hasVideo = video720p || videoHd || videoHdHevc || videoLd;
if (hasVideo) {
video += `<video controls="controls" poster="${posterUrl}" style="width: 100%">`;
if (video720p) {
video += `<source src="${video720p}">`;
}
if (videoHd) {
video += `<source src="${videoHd}">`;
}
if (videoHdHevc) {
video += `<source src="${videoHdHevc}">`;
}
if (videoLd) {
video += `<source src="${videoLd}">`;
}
if (pageUrl) {
video += `<p>视频无法显示,请前往<a href="${pageUrl}" target="_blank" rel="noopener noreferrer">微博视频</a>观看。</p>`;
}
video += '</video>';
anyVideo = true;
}
}
if (anyVideo) {
itemDesc += video;
}
return itemDesc;
},
formatArticle: async (ctx, itemDesc, status) => {
const pageInfo = status.page_info;
if (pageInfo && pageInfo.type === 'article' && pageInfo.page_url) {
const pageUrl = pageInfo.page_url;
const articleIdMatch = pageUrl.match(/id=(\d+)/);
if (!articleIdMatch) {
return itemDesc;
}
const articleId = articleIdMatch[1];
const link = `https://card.weibo.com/article/m/aj/detail?id=${articleId}`;
const response = await cache.tryGet(link, async () => {
const _response = await got.get(link, {
headers: {
Referer: `https://card.weibo.com/article/m/show/id/${articleId}`,
'MWeibo-Pwa': 1,
'X-Requested-With': 'XMLHttpRequest',
'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1',
},
});
return _response.data;
}); // cache it!
const article = response.data;
if (article && article.title && article.content) {
const title = article.title;
const content = article.content;
const summary = article.summary;
const createAt = article.create_at;
const readCount = article.read_count;
const isOriginal = article.is_original;
const isArticleNonFree = article.is_article_free; // 微博起错了字段名,它为 1 时才是收费文章
// 许多微博文章都给文字设置了白色背景,这里也只好使用白色背景了
let html = '<br clear="both" /><br clear="both" />';
html += '<div style="clear: both"></div><div style="background: #fff;border:5px solid #80808030;margin:0;padding:3% 5%;overflow-wrap: break-word">';
html += `<h1 style="font-size: 1.5rem;line-height: 1.25;color: #333;">${title}</h1>`; // 加入标题
// 加入文章信息
const iconStyle =
'display: inline-block;margin-inline: 0.25rem;width: 2.25rem; height: 1.125rem; background: #eee; border-radius: 2px; box-sizing: border-box; text-align: center; line-height: 1.0625rem; font-size: 0.75rem; color: #aaa;';
let articleMeta = '<p style="line-height: 1.66; color: #999;margin: 0 0 0.75rem;font-size: 0.75rem;padding: 0">';
if (isArticleNonFree) {
articleMeta += `<span style="${iconStyle}">试读</span> `;
}
if (isOriginal) {
articleMeta += `<span style="${iconStyle}">原创</span> `;
}
articleMeta += `<span style="margin-inline: 0.25rem;">发布时间: ${createAt}</span> `; // 发布时间
articleMeta += `<span style="margin-inline: 0.25rem;">阅读量: ${readCount}</span> `; // 阅读量
articleMeta += '</p>';
html += articleMeta;
if (summary) {
html += `<p style="color: #999;line-height: 1.5rem;padding: 0.0625rem 0 0.875rem;margin: 0">${summary}</p>`; // 摘要
}
html += '<div style="height: 0;border-bottom: 1px dashed #999;margin-bottom: 0.75rem;"></div>'; // 分割线
// 正文处理,加入一些在微博文章页的 CSS 中定义的不可或缺的样式
const $ = load(content);
$('p').each((_, elem) => {
elem = $(elem);
let style = elem.attr('style') || '';
style = 'margin: 0;padding: 0;border: 0;' + style;
elem.attr('style', style);
});
$('.image').each((_, elem) => {
elem = $(elem);
let style = elem.attr('style') || '';
style = 'display: table;text-align: center;margin-left: auto;margin-right: auto;clear: both;min-width: 50px;' + style;
elem.attr('style', style);
});
$('img').each((_, elem) => {
elem = $(elem);
let style = elem.attr('style') || '';
style = 'display: block;max-width: 100%;margin-left: auto;margin-right: auto;min-width: 50px;' + style;
elem.attr('style', style);
});
const contentHtml = $.html();
html += `<div style="line-height: 1.59;text-align: justify;font-size: 1.0625rem;color: #333;">${contentHtml}</div>`; // 正文
html += '</div>';
itemDesc += html;
}
}
return itemDesc;
},
formatComments: async (ctx, itemDesc, status, showBloggerIcons) => {
if (status && status.comments_count && status.id && status.mid) {
const id = status.id;
const mid = status.mid;
const link = `https://m.weibo.cn/comments/hotflow?id=${id}&mid=${mid}&max_id_type=0`;
const response = await cache.tryGet(link, async () => {
const _response = await got.get(link, {
headers: {
Referer: `https://m.weibo.cn/detail/${id}`,
'MWeibo-Pwa': 1,
'X-Requested-With': 'XMLHttpRequest',
'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1',
},
});
return _response.data;
});
if (response.data && response.data.data) {
const comments = response.data.data;
itemDesc += `<br clear="both" /><div style="clear: both"></div><div style="background: #80808010;border-top:1px solid #80808030;border-bottom:1px solid #80808030;margin:0;padding:5px 20px;">`;
itemDesc += '<h3>热门评论</h3>';
for (const comment of comments) {
itemDesc += '<p style="margin-bottom: 0.5em;margin-top: 0.5em">';
let name = comment.user.screen_name;
if (showBloggerIcons === '1' && comment.blogger_icons) {
name += comment.blogger_icons[0].name;
}
itemDesc += `<a href="https://weibo.com/${comment.user.id}" target="_blank">${name}</a>: ${comment.text}`;
// 带有图片的评论直接输出图片
if ('pic' in comment) {
itemDesc += `<br><img src="${comment.pic.url}">`;
}
if (comment.comments) {
itemDesc += '<blockquote style="border-left:0.2em solid #80808080; margin-left: 0.3em; padding-left: 0.5em; margin-bottom: 0.5em; margin-top: 0.25em">';
for (const com of comment.comments) {
// 评论的带有图片的评论直接输出图片
const pattern = /<a\s+href="https:\/\/weibo\.cn\/sinaurl\?u=([^"]+)"[^>]*><span class='url-icon'><img[^>]*><\/span><span class="surl-text">(查看图片|评论配图|查看动图)<\/span><\/a>/g;
const matches = com.text.match(pattern);
if (matches) {
for (const match of matches) {
const hrefMatch = match.match(/href="https:\/\/weibo\.cn\/sinaurl\?u=([^"]+)"/);
if (hrefMatch) {
// 获取并解码 href 中的图片 URL
const imgSrc = decodeURIComponent(hrefMatch[1]);
const imgTag = `<img src="${imgSrc}" style="width: 1rem; height: 1rem;">`;
// 用替换后的 img 标签替换原来的 <a> 标签部分
com.text = com.text.replaceAll(match, imgTag);
}
}
}
itemDesc += '<div style="font-size: 0.9em">';
let name = com.user.screen_name;
if (showBloggerIcons === '1' && com.blogger_icons) {
name += com.blogger_icons[0].name;
}
itemDesc += `<a href="https://weibo.com/${com.user.id}" target="_blank">${name}</a>: ${com.text}`;
itemDesc += '</div>';
}
itemDesc += '</blockquote>';
}
itemDesc += '</p>';
}
itemDesc += '</div>';
}
}
return itemDesc;
},
sinaimgTvax: (() => {
// https://datatracker.ietf.org/doc/html/rfc1808#section-2.4.3
const regex = /(?<=\/\/)wx(?=[1-4]\.sinaimg\.cn\/)/gi;
// const prefixes = ['tva', 'tvax'];
// let cnt = 0;
// const replace = (html) => {
// cnt = (cnt + 1) % 2;
// return html.replace(regex, prefixes[cnt]);
// };
const replace = (html) => html.replaceAll(regex, 'tvax'); // enforce `tvax` as `tva` has a strict WAF
const replaceKV = (obj, keys) => {
for (const key of keys) {
if (obj[key]) {
obj[key] = replace(obj[key]);
}
}
};
const dataKeys = ['description', 'image'];
const itemKeys = ['description'];
return (data) => {
if (data) {
replaceKV(data, dataKeys);
if (data.item) {
for (const item of data.item) {
replaceKV(item, itemKeys);
}
}
}
return data;
};
})(),
};
export default weiboUtils;