UNPKG

koishi-plugin-nitter-rss

Version:

订阅 X (Twitter) 内容,使用 nitter.cz,支持ChatGPT与Gradio Chatbot翻译

205 lines (204 loc) 9.2 kB
"use strict"; var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k); __setModuleDefault(result, mod); return result; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.capturehtml = void 0; const koishi_1 = require("koishi"); const fs = __importStar(require("fs")); const cheerio = __importStar(require("cheerio")); const downloader_1 = require("./downloader"); const utils_1 = require("./utils"); const logger = new koishi_1.Logger('nitter-rss-puppeteer'); async function capturehtml(nitterUrl, ctx, account, id, getScreenshot, sendImage, width) { //创建目录 (0, downloader_1.createDirIfNonExist)(`./data/cache/nitter-rss/${account}/status/`); //如果文件已经存在,使用缓存 if (fs.existsSync(`./data/cache/nitter-rss/${account}/status/${id}_screenshot.png`) && fs.existsSync(`./data/cache/nitter-rss/${account}/status/${id}_webpage.html`) && fs.existsSync(`./data/cache/nitter-rss/${account}/status/${id}_content.txt`)) { const screenshotData = fs.readFileSync(`./data/cache/nitter-rss/${account}/status/${id}_screenshot.png`); const html = fs.readFileSync(`./data/cache/nitter-rss/${account}/status/${id}_webpage.html`).toString(); const $ = cheerio.load(html); const fullname = $('#m > div > div > div:nth-child(1) > div > div > div > a.fullname').text(); //获取用户名 const time_org = $('#m > div > div > p').text(); //获取时间 const timestamp = (0, utils_1.parseTimestamp)(time_org); //获取时间戳 const timeText = (0, utils_1.formatLocalTime)(timestamp); //获取本地时间 const extractedContent = fs.readFileSync(`./data/cache/nitter-rss/${account}/status/${id}_content.txt`).toString(); //获取内容 //尝试获取图片 let images = []; let imageId = 0; if (sendImage) { images = await getImageFromHtml(ctx, $, account, id); } return { extractedContent: (0, utils_1.cleanText)(extractedContent), fullname, timestamp, timeText, screenshot: screenshotData, images }; } else { //如果文件不存在,获取网页 const url = `https://${nitterUrl}/${account}/status/${id}`; //网页地址 const page = await ctx.puppeteer.page(); if (width) { await page.setViewport({ width, height: 4000 }); } //await page.goto(url); 重试3次 let retry = 0; while (retry < 3) { try { await page.goto(url); break; } catch (e) { logger.error(e); logger.error(`Failed to load page: ${url}, retrying...`); retry++; } } // 检测是否需要跳过检测 const isSkip = await page.evaluate(() => { const form = document.querySelector('form#reqform'); return form && form.querySelector('input[type="submit"]'); }); // 模拟点击跳过检测 if (isSkip) { // 模拟点击跳过检测 try { await page.click('form#reqform input[type="submit"]'); } catch (e) { logger.error(e); } } // 刷新页面,等待页面加载完成 await page.goto(url, { 'waitUntil': 'domcontentloaded' }); //检测是否跳goto成功 if (page.url() != url) { throw new Error(`Failed to load page: ${url}, instead loaded: ${page.url()}`); } // 删除网页内容函数 async function removeSelectorContent(selector) { await page.evaluate((selector) => { const elements = document.querySelectorAll(selector); for (const element of elements) { element.remove(); } }, selector); } // 删除网页内容 const removeSelectors = [ 'body > nav', '#r', '#m > div > div > div:nth-child(1) > div > div > span' ]; for (const selector of removeSelectors) { await removeSelectorContent(selector); } // 获取网页截图 let screenshotData; if (getScreenshot) { let elementSelector = 'body > div > div > div.main-thread'; const elementHandle = await page.$(elementSelector); if (elementHandle) { screenshotData = await elementHandle.screenshot(); } else { throw new Error(`Element "${elementSelector}" not found.`); } fs.writeFile(`./data/cache/nitter-rss/${account}/status/${id}_screenshot.png`, screenshotData, function (err) { if (err) { return console.error(err); } logger.success("webpage screenshot saved."); }); } //保存网页 const html = await page.content(); // 获取网页的HTML内容 fs.writeFileSync(`./data/cache/nitter-rss/${account}/status/${id}_webpage.html`, html); //保存网页 logger.success("webpage html saved."); // 使用cheerio解析HTML const $ = cheerio.load(html); const fullname = $('#m > div > div > div:nth-child(1) > div > div > div > a.fullname').text(); //获取用户名 const time_org = $('#m > div > div > p').text(); //获取时间 const timestamp = (0, utils_1.parseTimestamp)(time_org); //获取时间戳 const timeText = (0, utils_1.formatLocalTime)(timestamp); //获取本地时间 //移除多余的内容 $('.tweet-header').remove(); //顶部信息 $('.tweet-name-row').remove(); //转发顶部信息 $('.tweet-published').remove(); //发布时间 $('.tweet-stats').remove(); //转发数等信息 $('.inner-nav').remove(); //导航栏 $('.replies').remove(); //回复 // 提取指定元素的内容 const extractedContent = (0, utils_1.removeHTMLTags)($('body > div > div > div.main-thread').toString()); fs.writeFileSync(`./data/cache/nitter-rss/${account}/status/${id}_content.txt`, extractedContent); //保存内容 let images = []; if (sendImage) { images = await getImageFromHtml(ctx, $, account, id); } return { extractedContent: (0, utils_1.cleanText)(extractedContent), fullname, timestamp, timeText, screenshot: screenshotData, images }; } } exports.capturehtml = capturehtml; async function getImageFromHtml(ctx, $, account, id) { //移除多余的内容 // 保存网页的所有图片 let imageUrls = []; // 保存所有原图 $('.still-image').each((index, element) => { const linkUrl = $(element).attr('href'); if (linkUrl) { imageUrls.push(linkUrl); } }); // 如果没有原图,尝试获取所有图片素材 if (imageUrls.length == 0) { $('.tweet-header').remove(); //顶部信息 $('.tweet-name-row').remove(); //转发顶部信息 $('.tweet-published').remove(); //发布时间 $('.tweet-stats').remove(); //转发数等信息 $('.inner-nav').remove(); //导航栏 $('.replies').remove(); //回复 $('img').each((index, element) => { const imageUrl = $(element).attr('src'); if (imageUrl) { imageUrls.push(imageUrl); } }); } let imageId = 0; let images = []; for (const imageUrl of imageUrls) { try { let imageBuffer; if (fs.existsSync(`./data/cache/nitter-rss/${account}/status/${id}_images_${imageId}.png`)) { imageBuffer = fs.readFileSync(`./data/cache/nitter-rss/${account}/status/${id}_images_${imageId}.png`); } else { imageBuffer = await (0, downloader_1.download)(ctx, `https://nitter.cz${imageUrl}`, `./data/cache/nitter-rss/${account}/status/`, `${id}_images_${imageId}.png`); } // 下载图片并保存到文件 images.push(imageBuffer); imageId++; } catch (e) { logger.error(e); } } return images; }