koishi-plugin-nitter-rss
Version:
订阅 X (Twitter) 内容,使用 nitter.cz,支持ChatGPT与Gradio Chatbot翻译
205 lines (204 loc) • 9.2 kB
JavaScript
;
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
__setModuleDefault(result, mod);
return result;
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.capturehtml = void 0;
const koishi_1 = require("koishi");
const fs = __importStar(require("fs"));
const cheerio = __importStar(require("cheerio"));
const downloader_1 = require("./downloader");
const utils_1 = require("./utils");
const logger = new koishi_1.Logger('nitter-rss-puppeteer');
async function capturehtml(nitterUrl, ctx, account, id, getScreenshot, sendImage, width) {
//创建目录
(0, downloader_1.createDirIfNonExist)(`./data/cache/nitter-rss/${account}/status/`);
//如果文件已经存在,使用缓存
if (fs.existsSync(`./data/cache/nitter-rss/${account}/status/${id}_screenshot.png`) && fs.existsSync(`./data/cache/nitter-rss/${account}/status/${id}_webpage.html`) && fs.existsSync(`./data/cache/nitter-rss/${account}/status/${id}_content.txt`)) {
const screenshotData = fs.readFileSync(`./data/cache/nitter-rss/${account}/status/${id}_screenshot.png`);
const html = fs.readFileSync(`./data/cache/nitter-rss/${account}/status/${id}_webpage.html`).toString();
const $ = cheerio.load(html);
const fullname = $('#m > div > div > div:nth-child(1) > div > div > div > a.fullname').text(); //获取用户名
const time_org = $('#m > div > div > p').text(); //获取时间
const timestamp = (0, utils_1.parseTimestamp)(time_org); //获取时间戳
const timeText = (0, utils_1.formatLocalTime)(timestamp); //获取本地时间
const extractedContent = fs.readFileSync(`./data/cache/nitter-rss/${account}/status/${id}_content.txt`).toString(); //获取内容
//尝试获取图片
let images = [];
let imageId = 0;
if (sendImage) {
images = await getImageFromHtml(ctx, $, account, id);
}
return { extractedContent: (0, utils_1.cleanText)(extractedContent), fullname, timestamp, timeText, screenshot: screenshotData, images };
}
else { //如果文件不存在,获取网页
const url = `https://${nitterUrl}/${account}/status/${id}`; //网页地址
const page = await ctx.puppeteer.page();
if (width) {
await page.setViewport({ width, height: 4000 });
}
//await page.goto(url); 重试3次
let retry = 0;
while (retry < 3) {
try {
await page.goto(url);
break;
}
catch (e) {
logger.error(e);
logger.error(`Failed to load page: ${url}, retrying...`);
retry++;
}
}
// 检测是否需要跳过检测
const isSkip = await page.evaluate(() => {
const form = document.querySelector('form#reqform');
return form && form.querySelector('input[type="submit"]');
});
// 模拟点击跳过检测
if (isSkip) {
// 模拟点击跳过检测
try {
await page.click('form#reqform input[type="submit"]');
}
catch (e) {
logger.error(e);
}
}
// 刷新页面,等待页面加载完成
await page.goto(url, { 'waitUntil': 'domcontentloaded' });
//检测是否跳goto成功
if (page.url() != url) {
throw new Error(`Failed to load page: ${url}, instead loaded: ${page.url()}`);
}
// 删除网页内容函数
async function removeSelectorContent(selector) {
await page.evaluate((selector) => {
const elements = document.querySelectorAll(selector);
for (const element of elements) {
element.remove();
}
}, selector);
}
// 删除网页内容
const removeSelectors = [
'body > nav',
'#r',
'#m > div > div > div:nth-child(1) > div > div > span'
];
for (const selector of removeSelectors) {
await removeSelectorContent(selector);
}
// 获取网页截图
let screenshotData;
if (getScreenshot) {
let elementSelector = 'body > div > div > div.main-thread';
const elementHandle = await page.$(elementSelector);
if (elementHandle) {
screenshotData = await elementHandle.screenshot();
}
else {
throw new Error(`Element "${elementSelector}" not found.`);
}
fs.writeFile(`./data/cache/nitter-rss/${account}/status/${id}_screenshot.png`, screenshotData, function (err) {
if (err) {
return console.error(err);
}
logger.success("webpage screenshot saved.");
});
}
//保存网页
const html = await page.content(); // 获取网页的HTML内容
fs.writeFileSync(`./data/cache/nitter-rss/${account}/status/${id}_webpage.html`, html); //保存网页
logger.success("webpage html saved.");
// 使用cheerio解析HTML
const $ = cheerio.load(html);
const fullname = $('#m > div > div > div:nth-child(1) > div > div > div > a.fullname').text(); //获取用户名
const time_org = $('#m > div > div > p').text(); //获取时间
const timestamp = (0, utils_1.parseTimestamp)(time_org); //获取时间戳
const timeText = (0, utils_1.formatLocalTime)(timestamp); //获取本地时间
//移除多余的内容
$('.tweet-header').remove(); //顶部信息
$('.tweet-name-row').remove(); //转发顶部信息
$('.tweet-published').remove(); //发布时间
$('.tweet-stats').remove(); //转发数等信息
$('.inner-nav').remove(); //导航栏
$('.replies').remove(); //回复
// 提取指定元素的内容
const extractedContent = (0, utils_1.removeHTMLTags)($('body > div > div > div.main-thread').toString());
fs.writeFileSync(`./data/cache/nitter-rss/${account}/status/${id}_content.txt`, extractedContent); //保存内容
let images = [];
if (sendImage) {
images = await getImageFromHtml(ctx, $, account, id);
}
return { extractedContent: (0, utils_1.cleanText)(extractedContent), fullname, timestamp, timeText, screenshot: screenshotData, images };
}
}
exports.capturehtml = capturehtml;
async function getImageFromHtml(ctx, $, account, id) {
//移除多余的内容
// 保存网页的所有图片
let imageUrls = [];
// 保存所有原图
$('.still-image').each((index, element) => {
const linkUrl = $(element).attr('href');
if (linkUrl) {
imageUrls.push(linkUrl);
}
});
// 如果没有原图,尝试获取所有图片素材
if (imageUrls.length == 0) {
$('.tweet-header').remove(); //顶部信息
$('.tweet-name-row').remove(); //转发顶部信息
$('.tweet-published').remove(); //发布时间
$('.tweet-stats').remove(); //转发数等信息
$('.inner-nav').remove(); //导航栏
$('.replies').remove(); //回复
$('img').each((index, element) => {
const imageUrl = $(element).attr('src');
if (imageUrl) {
imageUrls.push(imageUrl);
}
});
}
let imageId = 0;
let images = [];
for (const imageUrl of imageUrls) {
try {
let imageBuffer;
if (fs.existsSync(`./data/cache/nitter-rss/${account}/status/${id}_images_${imageId}.png`)) {
imageBuffer = fs.readFileSync(`./data/cache/nitter-rss/${account}/status/${id}_images_${imageId}.png`);
}
else {
imageBuffer = await (0, downloader_1.download)(ctx, `https://nitter.cz${imageUrl}`, `./data/cache/nitter-rss/${account}/status/`, `${id}_images_${imageId}.png`);
}
// 下载图片并保存到文件
images.push(imageBuffer);
imageId++;
}
catch (e) {
logger.error(e);
}
}
return images;
}