UNPKG

koishi-plugin-tieba-parse

Version:

一个用于解析百度贴吧链接,并生成帖子截图、提取内容的 Koishi 插件。

461 lines (459 loc) 21 kB
var __defProp = Object.defineProperty; var __getOwnPropDesc = Object.getOwnPropertyDescriptor; var __getOwnPropNames = Object.getOwnPropertyNames; var __hasOwnProp = Object.prototype.hasOwnProperty; var __name = (target, value) => __defProp(target, "name", { value, configurable: true }); var __export = (target, all) => { for (var name2 in all) __defProp(target, name2, { get: all[name2], enumerable: true }); }; var __copyProps = (to, from, except, desc) => { if (from && typeof from === "object" || typeof from === "function") { for (let key of __getOwnPropNames(from)) if (!__hasOwnProp.call(to, key) && key !== except) __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable }); } return to; }; var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod); // src/index.ts var src_exports = {}; __export(src_exports, { Config: () => Config, apply: () => apply, name: () => name, using: () => using }); module.exports = __toCommonJS(src_exports); var import_koishi = require("koishi"); var name = "tieba-parser-final"; var using = ["puppeteer"]; var Config = import_koishi.Schema.intersect([ import_koishi.Schema.object({ width: import_koishi.Schema.number().default(800).description("截图的默认宽度(像素)。"), screenshotHeight: import_koishi.Schema.number().default(0).description("设置截图的最大高度(像素)。设置为 0 则代表不限制高度,截取帖子第一页的所有内容。"), showTitle: import_koishi.Schema.boolean().default(true).description("是否在截图上方显示帖子标题。"), extractFirstPostText: import_koishi.Schema.boolean().default(true).description("是否在截图前提取并发送1楼的纯文本内容。"), extractFirstPostImages: import_koishi.Schema.boolean().default(true).description("是否在截图下方提取并发送1楼的全部图片。"), extractFirstPostVideo: import_koishi.Schema.boolean().default(true).description("是否提取并发送1楼的视频(若存在)。") }).description("解析设置"), import_koishi.Schema.object({ cookie: import_koishi.Schema.string().role("textarea").description("请通过 /tieba.login 指令获取。") }).description("登录信息"), import_koishi.Schema.object({ debugMode: import_koishi.Schema.boolean().default(false).description("启用调试模式。开启后,将在后台控制台输出详细的操作日志。") }).description("调试") ]); var TIEBA_REG = /(tieba\.baidu\.com\/p\/(\d+))|(jump\.bdimg\.com\/p\/(\d+))/; var OLD_FRONTEND = { mode: "old", screenshotSelector: "#j_p_postlist", readySelectors: ["#j_p_postlist", ".l_post"], cleanupCss: `#com_userbar, .tb-header, .right_section, .core_reply_wrapper, .app_download_wrap, .see-more-wrap, .tb_rich_poster_container, .footer, .j_user_sign, .quick_reply_button, .share_btn_wrapper, .celebrity, .post-client-promotion, .lottery-exp-wrap, .simple-card, .vip-red-name-honour-wrap, .bawu-button-wrapper, .video_header_wrap, .fix_bar_wrap { display: none !important; } .pb_content { width: auto !important; }`, async extract(page, cfg) { const data = await page.evaluate((cfg2) => { const result = { postTitle: "", firstPostText: "", imageUrls: [], videoUrl: "" }; if (cfg2.showTitle) { result.postTitle = document.title.replace(/_百度贴吧$/, "").trim(); } const firstPost = document.querySelector(".l_post"); if (!firstPost) return result; const contentElement = firstPost.querySelector(".d_post_content_main .p_content"); if (contentElement) { if (cfg2.extractFirstPostText) { result.firstPostText = contentElement.innerText.trim(); } if (cfg2.extractFirstPostImages) { const imageElements = contentElement.querySelectorAll("img.BDE_Image"); result.imageUrls = Array.from(imageElements).map((img) => { const element = img; return element.currentSrc || element.src || element.dataset.src || element.getAttribute("data-src") || ""; }).filter(Boolean); } } if (cfg2.extractFirstPostVideo) { const videoElement = firstPost.querySelector(".d_post_content_main video"); if (videoElement) { result.videoUrl = videoElement.currentSrc || videoElement.src || ""; } } return result; }, cfg); return normalizeExtractedData(data); } }; var NEW_FRONTEND_READY_SELECTORS = [".center-content", ".pb-title"]; function formatCookie(cookies) { return cookies.map((c) => `${c.name}=${c.value}`).join("; "); } __name(formatCookie, "formatCookie"); function parseCookie(cookieString) { if (!cookieString) return []; return cookieString.split(";").map((pair) => { const parts = pair.split("="); const name2 = parts.shift()?.trim(); const value = parts.join("=").trim(); return { name: name2, value, domain: ".baidu.com" }; }).filter((cookie) => cookie.name); } __name(parseCookie, "parseCookie"); function cleanExtractedText(text) { return text.split(/\r?\n/).map((line) => line.trim()).filter((line) => line && !/^点击展开,查看完整(?:图片|视频)$/.test(line)).join("\n").trim(); } __name(cleanExtractedText, "cleanExtractedText"); function normalizeExtractedData(data) { return { postTitle: (data.postTitle || "").trim(), firstPostText: cleanExtractedText(data.firstPostText || ""), imageUrls: Array.from(new Set((data.imageUrls || []).map((url) => url.trim()).filter(Boolean))), videoUrl: (data.videoUrl || "").trim() }; } __name(normalizeExtractedData, "normalizeExtractedData"); function wait(ms) { return new Promise((resolve) => setTimeout(resolve, ms)); } __name(wait, "wait"); function logDebug(logger, enabled, message, ...args) { if (enabled) logger.info(message, ...args); } __name(logDebug, "logDebug"); async function hasSelectors(page, selectors) { return page.evaluate((selectors2) => { return selectors2.every((selector) => !!document.querySelector(selector)); }, selectors); } __name(hasSelectors, "hasSelectors"); async function waitForSelectors(page, selectors, timeout = 1e4) { const start = Date.now(); while (Date.now() - start < timeout) { try { if (await hasSelectors(page, selectors)) return true; } catch { } await wait(300); } return false; } __name(waitForSelectors, "waitForSelectors"); async function detectFrontend(page) { if (await hasSelectors(page, OLD_FRONTEND.readySelectors)) { return "old"; } if (await hasSelectors(page, NEW_FRONTEND_READY_SELECTORS)) { return "new"; } return null; } __name(detectFrontend, "detectFrontend"); async function clickIconTarget(page, iconId, logger, debugMode) { const target = await page.evaluate((iconId2) => { const uses = Array.from(document.querySelectorAll("use")).filter((node) => { return node.getAttribute("xlink:href") === `#${iconId2}` || node.getAttribute("href") === `#${iconId2}`; }); for (const use of uses) { const preferred = [ use.closest('button, a, [role="button"], [role="menuitem"], [role="link"], .menu-item, .menu-item-content, .more-btn, .operate-btn, li') ].filter(Boolean); let current = use; while (current && preferred.length < 10) { preferred.push(current); current = current.parentElement; } const seen = /* @__PURE__ */ new Set(); for (const candidate of preferred) { if (seen.has(candidate)) continue; seen.add(candidate); const rect = candidate.getBoundingClientRect(); const style = window.getComputedStyle(candidate); if (rect.width <= 0 || rect.height <= 0) continue; if (style.visibility === "hidden" || style.display === "none") continue; const rawHref = candidate instanceof HTMLAnchorElement ? candidate.href : candidate.getAttribute("href") || candidate.getAttribute("data-href") || candidate.getAttribute("data-url"); const role = candidate.getAttribute("role"); const interactive = candidate instanceof HTMLButtonElement || candidate instanceof HTMLAnchorElement || role === "button" || role === "menuitem" || role === "link" || candidate.hasAttribute("onclick") || candidate.getAttribute("tabindex") !== null || style.cursor === "pointer"; if (!interactive && !rawHref) continue; let href = ""; if (rawHref) { try { href = new URL(rawHref, location.href).href; } catch { href = rawHref; } } return { x: Math.round(rect.left + rect.width / 2), y: Math.round(rect.top + rect.height / 2), href, tag: candidate.tagName.toLowerCase(), text: (candidate.textContent || "").trim().slice(0, 80), className: candidate.getAttribute("class") || "" }; } } return null; }, iconId); if (!target) { logDebug(logger, debugMode, "未定位到 %s 图标对应的可点击目标。", iconId); return null; } logDebug(logger, debugMode, "定位到 %s 图标目标: %o", iconId, target); await page.mouse.move(target.x, target.y); await page.mouse.down(); await page.mouse.up(); return target; } __name(clickIconTarget, "clickIconTarget"); async function waitForOldFrontendSwitch(page, initialUrl, logger, debugMode, fallbackUrl = "") { const start = Date.now(); let lastUrl = initialUrl; while (Date.now() - start < 15e3) { try { if (await hasSelectors(page, OLD_FRONTEND.readySelectors)) return true; const currentUrl = page.url(); if (currentUrl !== lastUrl) { logDebug(logger, debugMode, "切页过程中 URL 变化: %s -> %s", lastUrl, currentUrl); lastUrl = currentUrl; } } catch { } await wait(300); } if (fallbackUrl && fallbackUrl !== page.url()) { logDebug(logger, debugMode, "点击未直接切页,尝试使用菜单目标地址进入旧版: %s", fallbackUrl); try { await page.goto(fallbackUrl, { waitUntil: "networkidle2" }); const ready = await waitForSelectors(page, OLD_FRONTEND.readySelectors, 15e3); logDebug(logger, debugMode, "通过菜单目标地址进入旧版结果: %s", ready); if (ready) return true; } catch (error) { logDebug(logger, debugMode, "通过菜单目标地址进入旧版失败: %s", error instanceof Error ? error.message : String(error)); } } const finalState = await page.evaluate(() => ({ href: location.href, title: document.title, oldRoot: !!document.querySelector("#j_p_postlist"), oldPost: !!document.querySelector(".l_post"), centerContent: !!document.querySelector(".center-content"), pbTitle: !!document.querySelector(".pb-title") })); logDebug(logger, debugMode, "旧版切换后页面状态: %o", finalState); return false; } __name(waitForOldFrontendSwitch, "waitForOldFrontendSwitch"); async function switchToOldFrontend(page, logger, debugMode) { logDebug(logger, debugMode, "尝试切换到旧版贴吧前端。"); const beforeUrl = page.url(); const ellipsisTarget = await clickIconTarget(page, "ellipsis", logger, debugMode); const openedMenu = !!ellipsisTarget; if (openedMenu) await wait(500); const backOldTarget = await clickIconTarget(page, "back_old", logger, debugMode); const clickedBackOld = !!backOldTarget; logDebug(logger, debugMode, "旧版切换点击结果: %o", { openedMenu, clickedBackOld, backOldHref: backOldTarget?.href || "" }); if (!clickedBackOld) { logDebug(logger, debugMode, "未找到“回旧版”按钮。"); return false; } const switched = await waitForOldFrontendSwitch(page, beforeUrl, logger, debugMode, backOldTarget?.href || ""); logDebug(logger, debugMode, "旧版前端切换结果: %s", switched); return switched; } __name(switchToOldFrontend, "switchToOldFrontend"); async function scrollPageForLazyContent(page) { await page.evaluate(async () => { let lastHeight = -1; let currentHeight = 0; let tries = 0; while (lastHeight < currentHeight && tries < 15) { window.scrollTo(0, document.body.scrollHeight); lastHeight = currentHeight; await new Promise((resolve) => setTimeout(resolve, 500)); currentHeight = document.body.scrollHeight; tries++; } }); await page.evaluate(() => window.scrollTo(0, 0)); await wait(100); } __name(scrollPageForLazyContent, "scrollPageForLazyContent"); async function captureContentScreenshot(page, strategy, screenshotHeight, logger, debugMode) { await page.addStyleTag({ content: strategy.cleanupCss }); const contentArea = await page.$(strategy.screenshotSelector); if (!contentArea) { throw new Error(`无法找到截图区域 ${strategy.screenshotSelector}。`); } const boundingBox = await contentArea.boundingBox(); if (!boundingBox) { throw new Error(`无法获取截图区域 ${strategy.screenshotSelector} 的边界框。`); } const clip = { x: boundingBox.x, y: boundingBox.y, width: boundingBox.width, height: screenshotHeight > 0 ? Math.min(screenshotHeight, boundingBox.height) : boundingBox.height }; clip.height = Math.max(clip.height, 1); logDebug(logger, debugMode, "截图边界: mode=%s anchor=%s contentHeight=%d finalHeight=%d", strategy.mode, "container", Math.round(boundingBox.height), Math.round(clip.height)); return page.screenshot({ clip }); } __name(captureContentScreenshot, "captureContentScreenshot"); async function parseWithStrategy(page, strategy, config, logger) { const ready = await waitForSelectors(page, strategy.readySelectors, 1e4); if (!ready) { throw new Error(`未检测到 ${strategy.mode} 版页面的关键节点。`); } await scrollPageForLazyContent(page); const data = await strategy.extract(page, config); logDebug(logger, config.debugMode, "使用 %s 版前端解析成功:标题=%s, 文本长度=%d, 图片数=%d, 视频=%s", strategy.mode, !!data.postTitle, data.firstPostText.length, data.imageUrls.length, !!data.videoUrl); const imageBuffer = await captureContentScreenshot(page, strategy, config.screenshotHeight, logger, config.debugMode); return { mode: strategy.mode, data, imageBuffer }; } __name(parseWithStrategy, "parseWithStrategy"); async function parseTiebaPage(page, config, logger) { const resolved = await detectFrontend(page); logDebug(logger, config.debugMode, "检测到当前贴吧前端: %s", resolved || "unknown"); let lastError; try { if (resolved === "old") { logDebug(logger, config.debugMode, "识别为旧版前端,直接解析。"); return parseWithStrategy(page, OLD_FRONTEND, config, logger); } if (resolved === "new") { logDebug(logger, config.debugMode, "识别为新版前端,准备切回旧版。"); const switched = await switchToOldFrontend(page, logger, config.debugMode); if (!switched) { throw new Error("未找到切换到旧版前端的入口,或切换未生效。"); } return parseWithStrategy(page, OLD_FRONTEND, config, logger); } const readyNew = await waitForSelectors(page, NEW_FRONTEND_READY_SELECTORS, 4e3); if (readyNew) { logDebug(logger, config.debugMode, "延迟识别为新版前端,准备切回旧版。"); const switched = await switchToOldFrontend(page, logger, config.debugMode); if (!switched) { throw new Error("未找到切换到旧版前端的入口,或切换未生效。"); } return parseWithStrategy(page, OLD_FRONTEND, config, logger); } const readyOld = await waitForSelectors(page, OLD_FRONTEND.readySelectors, 4e3); if (readyOld) { logDebug(logger, config.debugMode, "延迟识别为旧版前端。"); return parseWithStrategy(page, OLD_FRONTEND, config, logger); } } catch (error) { lastError = error; if (config.debugMode) { const reason = error instanceof Error ? error.message : String(error); logger.warn("贴吧页面解析失败:%s", reason); } } throw lastError || new Error("未能识别当前贴吧页面结构。"); } __name(parseTiebaPage, "parseTiebaPage"); function apply(ctx, config) { const logger = ctx.logger("tieba-parser"); ctx.command("tieba.login", "获取贴吧 Cookie").action(async ({ session }) => { let page; try { await session.send("正在获取登录二维码,请稍候..."); page = await ctx.puppeteer.page(); const loginUrl = "https://passport.baidu.com/v2/?login&tpl=tb&u=https%3A%2F%2Ftieba.baidu.com"; await page.goto(loginUrl); const qrCodeElement = await page.waitForSelector(".tang-pass-qrcode-img"); const qrCodeImage = await qrCodeElement.screenshot({ type: "png" }); await session.send([ import_koishi.h.image(qrCodeImage, "image/png"), (0, import_koishi.h)("p", "请在2分钟内使用【百度贴吧】App扫描二维码登录。") ]); await page.waitForNavigation({ timeout: 12e4 }); const cookies = await page.cookies("https://baidu.com", "https://tieba.baidu.com"); const cookieString = formatCookie(cookies); if (!cookieString || !cookieString.includes("BDUSS")) { return "登录失败:未能获取到关键的登录凭证 (BDUSS),请重试。"; } return "登录成功!\n请将以下 Cookie 完整复制并粘贴到插件的【登录信息】配置项中:\n" + cookieString; } catch (error) { const detail = error instanceof Error ? error.stack || error.message : String(error); logger.error("扫码登录失败!\n" + detail); return "登录失败或超时,请重试。"; } finally { if (page) await page.close(); } }); ctx.middleware(async (session, next) => { const content = session.content || ""; const prefixes = Array.isArray(ctx.options.prefix) ? ctx.options.prefix : [ctx.options.prefix]; const commandPrefixes = prefixes.filter((p) => p && typeof p === "string"); if (commandPrefixes.some((p) => content.startsWith(p))) { return next(); } const match = TIEBA_REG.exec(content); if (!match) return next(); const postId = match[2] || match[4]; const targetUrl = `https://tieba.baidu.com/p/${postId}`; logDebug(logger, config.debugMode, "匹配到贴吧链接,ID: %s", postId); const pinger = await session.send([ (0, import_koishi.h)("quote", { id: session.messageId }), "识别到贴吧链接,正在为您生成内容..." ]); const pingerId = pinger?.[0]; let page; try { logDebug(logger, config.debugMode, "准备启动 Puppeteer 页面..."); page = await ctx.puppeteer.page(); if (config.cookie) { await page.setCookie(...parseCookie(config.cookie)); logDebug(logger, config.debugMode, "已设置全局 Cookie。"); } await page.setViewport({ width: config.width, height: 1080 }); await page.goto(targetUrl, { waitUntil: "networkidle2" }); logDebug(logger, config.debugMode, "页面已导航至: %s", targetUrl); const { data, imageBuffer, mode } = await parseTiebaPage(page, config, logger); const { postTitle, firstPostText, imageUrls, videoUrl } = data; logDebug(logger, config.debugMode, "最终采用前端模式: %s", mode); const mainMessage = []; const textBlocks = []; if (postTitle) textBlocks.push(`标题: ${postTitle}`); if (firstPostText) textBlocks.push(`正文: ${firstPostText}`); if (textBlocks.length > 0) mainMessage.push(textBlocks.join("\n\n")); mainMessage.push(import_koishi.h.image(imageBuffer, "image/png")); if (imageUrls.length > 0) mainMessage.push(...imageUrls.map((url) => import_koishi.h.image(url))); await session.send(mainMessage); if (videoUrl) { await session.send(import_koishi.h.video(videoUrl)); } return; } catch (error) { const detail = error instanceof Error ? error.stack || error.message : String(error); logger.error("贴吧解析过程中发生严重错误!\n" + detail); return "解析失败,可能是帖子不存在、前端结构变化或网络问题。请管理员检查后台日志以获取详细错误信息。"; } finally { if (page) { await page.close(); logDebug(logger, config.debugMode, "Puppeteer 页面已关闭。"); } if (pingerId) { try { await session.bot.deleteMessage(session.channelId, pingerId); logDebug(logger, config.debugMode, "已撤回“正在生成”的提示消息。"); } catch (error) { if (config.debugMode) logger.warn("撤回提示消息失败,可能缺少权限。", error); } } } }); } __name(apply, "apply"); // Annotate the CommonJS export names for ESM import in node: 0 && (module.exports = { Config, apply, name, using });