koishi-plugin-tieba-parse
Version:
一个用于解析百度贴吧链接,并生成帖子截图、提取内容的 Koishi 插件。
461 lines (459 loc) • 21 kB
JavaScript
var __defProp = Object.defineProperty;
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
var __getOwnPropNames = Object.getOwnPropertyNames;
var __hasOwnProp = Object.prototype.hasOwnProperty;
var __name = (target, value) => __defProp(target, "name", { value, configurable: true });
var __export = (target, all) => {
for (var name2 in all)
__defProp(target, name2, { get: all[name2], enumerable: true });
};
var __copyProps = (to, from, except, desc) => {
if (from && typeof from === "object" || typeof from === "function") {
for (let key of __getOwnPropNames(from))
if (!__hasOwnProp.call(to, key) && key !== except)
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
}
return to;
};
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
// src/index.ts
var src_exports = {};
__export(src_exports, {
Config: () => Config,
apply: () => apply,
name: () => name,
using: () => using
});
module.exports = __toCommonJS(src_exports);
var import_koishi = require("koishi");
var name = "tieba-parser-final";
var using = ["puppeteer"];
var Config = import_koishi.Schema.intersect([
import_koishi.Schema.object({
width: import_koishi.Schema.number().default(800).description("截图的默认宽度(像素)。"),
screenshotHeight: import_koishi.Schema.number().default(0).description("设置截图的最大高度(像素)。设置为 0 则代表不限制高度,截取帖子第一页的所有内容。"),
showTitle: import_koishi.Schema.boolean().default(true).description("是否在截图上方显示帖子标题。"),
extractFirstPostText: import_koishi.Schema.boolean().default(true).description("是否在截图前提取并发送1楼的纯文本内容。"),
extractFirstPostImages: import_koishi.Schema.boolean().default(true).description("是否在截图下方提取并发送1楼的全部图片。"),
extractFirstPostVideo: import_koishi.Schema.boolean().default(true).description("是否提取并发送1楼的视频(若存在)。")
}).description("解析设置"),
import_koishi.Schema.object({
cookie: import_koishi.Schema.string().role("textarea").description("请通过 /tieba.login 指令获取。")
}).description("登录信息"),
import_koishi.Schema.object({
debugMode: import_koishi.Schema.boolean().default(false).description("启用调试模式。开启后,将在后台控制台输出详细的操作日志。")
}).description("调试")
]);
var TIEBA_REG = /(tieba\.baidu\.com\/p\/(\d+))|(jump\.bdimg\.com\/p\/(\d+))/;
var OLD_FRONTEND = {
mode: "old",
screenshotSelector: "#j_p_postlist",
readySelectors: ["#j_p_postlist", ".l_post"],
cleanupCss: `#com_userbar, .tb-header, .right_section, .core_reply_wrapper, .app_download_wrap, .see-more-wrap, .tb_rich_poster_container, .footer, .j_user_sign, .quick_reply_button, .share_btn_wrapper, .celebrity, .post-client-promotion, .lottery-exp-wrap, .simple-card, .vip-red-name-honour-wrap, .bawu-button-wrapper, .video_header_wrap, .fix_bar_wrap { display: none !important; } .pb_content { width: auto !important; }`,
async extract(page, cfg) {
const data = await page.evaluate((cfg2) => {
const result = { postTitle: "", firstPostText: "", imageUrls: [], videoUrl: "" };
if (cfg2.showTitle) {
result.postTitle = document.title.replace(/_百度贴吧$/, "").trim();
}
const firstPost = document.querySelector(".l_post");
if (!firstPost) return result;
const contentElement = firstPost.querySelector(".d_post_content_main .p_content");
if (contentElement) {
if (cfg2.extractFirstPostText) {
result.firstPostText = contentElement.innerText.trim();
}
if (cfg2.extractFirstPostImages) {
const imageElements = contentElement.querySelectorAll("img.BDE_Image");
result.imageUrls = Array.from(imageElements).map((img) => {
const element = img;
return element.currentSrc || element.src || element.dataset.src || element.getAttribute("data-src") || "";
}).filter(Boolean);
}
}
if (cfg2.extractFirstPostVideo) {
const videoElement = firstPost.querySelector(".d_post_content_main video");
if (videoElement) {
result.videoUrl = videoElement.currentSrc || videoElement.src || "";
}
}
return result;
}, cfg);
return normalizeExtractedData(data);
}
};
var NEW_FRONTEND_READY_SELECTORS = [".center-content", ".pb-title"];
function formatCookie(cookies) {
return cookies.map((c) => `${c.name}=${c.value}`).join("; ");
}
__name(formatCookie, "formatCookie");
function parseCookie(cookieString) {
if (!cookieString) return [];
return cookieString.split(";").map((pair) => {
const parts = pair.split("=");
const name2 = parts.shift()?.trim();
const value = parts.join("=").trim();
return { name: name2, value, domain: ".baidu.com" };
}).filter((cookie) => cookie.name);
}
__name(parseCookie, "parseCookie");
function cleanExtractedText(text) {
return text.split(/\r?\n/).map((line) => line.trim()).filter((line) => line && !/^点击展开,查看完整(?:图片|视频)$/.test(line)).join("\n").trim();
}
__name(cleanExtractedText, "cleanExtractedText");
function normalizeExtractedData(data) {
return {
postTitle: (data.postTitle || "").trim(),
firstPostText: cleanExtractedText(data.firstPostText || ""),
imageUrls: Array.from(new Set((data.imageUrls || []).map((url) => url.trim()).filter(Boolean))),
videoUrl: (data.videoUrl || "").trim()
};
}
__name(normalizeExtractedData, "normalizeExtractedData");
function wait(ms) {
return new Promise((resolve) => setTimeout(resolve, ms));
}
__name(wait, "wait");
function logDebug(logger, enabled, message, ...args) {
if (enabled) logger.info(message, ...args);
}
__name(logDebug, "logDebug");
async function hasSelectors(page, selectors) {
return page.evaluate((selectors2) => {
return selectors2.every((selector) => !!document.querySelector(selector));
}, selectors);
}
__name(hasSelectors, "hasSelectors");
async function waitForSelectors(page, selectors, timeout = 1e4) {
const start = Date.now();
while (Date.now() - start < timeout) {
try {
if (await hasSelectors(page, selectors)) return true;
} catch {
}
await wait(300);
}
return false;
}
__name(waitForSelectors, "waitForSelectors");
async function detectFrontend(page) {
if (await hasSelectors(page, OLD_FRONTEND.readySelectors)) {
return "old";
}
if (await hasSelectors(page, NEW_FRONTEND_READY_SELECTORS)) {
return "new";
}
return null;
}
__name(detectFrontend, "detectFrontend");
async function clickIconTarget(page, iconId, logger, debugMode) {
const target = await page.evaluate((iconId2) => {
const uses = Array.from(document.querySelectorAll("use")).filter((node) => {
return node.getAttribute("xlink:href") === `#${iconId2}` || node.getAttribute("href") === `#${iconId2}`;
});
for (const use of uses) {
const preferred = [
use.closest('button, a, [role="button"], [role="menuitem"], [role="link"], .menu-item, .menu-item-content, .more-btn, .operate-btn, li')
].filter(Boolean);
let current = use;
while (current && preferred.length < 10) {
preferred.push(current);
current = current.parentElement;
}
const seen = /* @__PURE__ */ new Set();
for (const candidate of preferred) {
if (seen.has(candidate)) continue;
seen.add(candidate);
const rect = candidate.getBoundingClientRect();
const style = window.getComputedStyle(candidate);
if (rect.width <= 0 || rect.height <= 0) continue;
if (style.visibility === "hidden" || style.display === "none") continue;
const rawHref = candidate instanceof HTMLAnchorElement ? candidate.href : candidate.getAttribute("href") || candidate.getAttribute("data-href") || candidate.getAttribute("data-url");
const role = candidate.getAttribute("role");
const interactive = candidate instanceof HTMLButtonElement || candidate instanceof HTMLAnchorElement || role === "button" || role === "menuitem" || role === "link" || candidate.hasAttribute("onclick") || candidate.getAttribute("tabindex") !== null || style.cursor === "pointer";
if (!interactive && !rawHref) continue;
let href = "";
if (rawHref) {
try {
href = new URL(rawHref, location.href).href;
} catch {
href = rawHref;
}
}
return {
x: Math.round(rect.left + rect.width / 2),
y: Math.round(rect.top + rect.height / 2),
href,
tag: candidate.tagName.toLowerCase(),
text: (candidate.textContent || "").trim().slice(0, 80),
className: candidate.getAttribute("class") || ""
};
}
}
return null;
}, iconId);
if (!target) {
logDebug(logger, debugMode, "未定位到 %s 图标对应的可点击目标。", iconId);
return null;
}
logDebug(logger, debugMode, "定位到 %s 图标目标: %o", iconId, target);
await page.mouse.move(target.x, target.y);
await page.mouse.down();
await page.mouse.up();
return target;
}
__name(clickIconTarget, "clickIconTarget");
async function waitForOldFrontendSwitch(page, initialUrl, logger, debugMode, fallbackUrl = "") {
const start = Date.now();
let lastUrl = initialUrl;
while (Date.now() - start < 15e3) {
try {
if (await hasSelectors(page, OLD_FRONTEND.readySelectors)) return true;
const currentUrl = page.url();
if (currentUrl !== lastUrl) {
logDebug(logger, debugMode, "切页过程中 URL 变化: %s -> %s", lastUrl, currentUrl);
lastUrl = currentUrl;
}
} catch {
}
await wait(300);
}
if (fallbackUrl && fallbackUrl !== page.url()) {
logDebug(logger, debugMode, "点击未直接切页,尝试使用菜单目标地址进入旧版: %s", fallbackUrl);
try {
await page.goto(fallbackUrl, { waitUntil: "networkidle2" });
const ready = await waitForSelectors(page, OLD_FRONTEND.readySelectors, 15e3);
logDebug(logger, debugMode, "通过菜单目标地址进入旧版结果: %s", ready);
if (ready) return true;
} catch (error) {
logDebug(logger, debugMode, "通过菜单目标地址进入旧版失败: %s", error instanceof Error ? error.message : String(error));
}
}
const finalState = await page.evaluate(() => ({
href: location.href,
title: document.title,
oldRoot: !!document.querySelector("#j_p_postlist"),
oldPost: !!document.querySelector(".l_post"),
centerContent: !!document.querySelector(".center-content"),
pbTitle: !!document.querySelector(".pb-title")
}));
logDebug(logger, debugMode, "旧版切换后页面状态: %o", finalState);
return false;
}
__name(waitForOldFrontendSwitch, "waitForOldFrontendSwitch");
async function switchToOldFrontend(page, logger, debugMode) {
logDebug(logger, debugMode, "尝试切换到旧版贴吧前端。");
const beforeUrl = page.url();
const ellipsisTarget = await clickIconTarget(page, "ellipsis", logger, debugMode);
const openedMenu = !!ellipsisTarget;
if (openedMenu) await wait(500);
const backOldTarget = await clickIconTarget(page, "back_old", logger, debugMode);
const clickedBackOld = !!backOldTarget;
logDebug(logger, debugMode, "旧版切换点击结果: %o", {
openedMenu,
clickedBackOld,
backOldHref: backOldTarget?.href || ""
});
if (!clickedBackOld) {
logDebug(logger, debugMode, "未找到“回旧版”按钮。");
return false;
}
const switched = await waitForOldFrontendSwitch(page, beforeUrl, logger, debugMode, backOldTarget?.href || "");
logDebug(logger, debugMode, "旧版前端切换结果: %s", switched);
return switched;
}
__name(switchToOldFrontend, "switchToOldFrontend");
async function scrollPageForLazyContent(page) {
await page.evaluate(async () => {
let lastHeight = -1;
let currentHeight = 0;
let tries = 0;
while (lastHeight < currentHeight && tries < 15) {
window.scrollTo(0, document.body.scrollHeight);
lastHeight = currentHeight;
await new Promise((resolve) => setTimeout(resolve, 500));
currentHeight = document.body.scrollHeight;
tries++;
}
});
await page.evaluate(() => window.scrollTo(0, 0));
await wait(100);
}
__name(scrollPageForLazyContent, "scrollPageForLazyContent");
async function captureContentScreenshot(page, strategy, screenshotHeight, logger, debugMode) {
await page.addStyleTag({ content: strategy.cleanupCss });
const contentArea = await page.$(strategy.screenshotSelector);
if (!contentArea) {
throw new Error(`无法找到截图区域 ${strategy.screenshotSelector}。`);
}
const boundingBox = await contentArea.boundingBox();
if (!boundingBox) {
throw new Error(`无法获取截图区域 ${strategy.screenshotSelector} 的边界框。`);
}
const clip = {
x: boundingBox.x,
y: boundingBox.y,
width: boundingBox.width,
height: screenshotHeight > 0 ? Math.min(screenshotHeight, boundingBox.height) : boundingBox.height
};
clip.height = Math.max(clip.height, 1);
logDebug(logger, debugMode, "截图边界: mode=%s anchor=%s contentHeight=%d finalHeight=%d", strategy.mode, "container", Math.round(boundingBox.height), Math.round(clip.height));
return page.screenshot({ clip });
}
__name(captureContentScreenshot, "captureContentScreenshot");
async function parseWithStrategy(page, strategy, config, logger) {
const ready = await waitForSelectors(page, strategy.readySelectors, 1e4);
if (!ready) {
throw new Error(`未检测到 ${strategy.mode} 版页面的关键节点。`);
}
await scrollPageForLazyContent(page);
const data = await strategy.extract(page, config);
logDebug(logger, config.debugMode, "使用 %s 版前端解析成功:标题=%s, 文本长度=%d, 图片数=%d, 视频=%s", strategy.mode, !!data.postTitle, data.firstPostText.length, data.imageUrls.length, !!data.videoUrl);
const imageBuffer = await captureContentScreenshot(page, strategy, config.screenshotHeight, logger, config.debugMode);
return { mode: strategy.mode, data, imageBuffer };
}
__name(parseWithStrategy, "parseWithStrategy");
async function parseTiebaPage(page, config, logger) {
const resolved = await detectFrontend(page);
logDebug(logger, config.debugMode, "检测到当前贴吧前端: %s", resolved || "unknown");
let lastError;
try {
if (resolved === "old") {
logDebug(logger, config.debugMode, "识别为旧版前端,直接解析。");
return parseWithStrategy(page, OLD_FRONTEND, config, logger);
}
if (resolved === "new") {
logDebug(logger, config.debugMode, "识别为新版前端,准备切回旧版。");
const switched = await switchToOldFrontend(page, logger, config.debugMode);
if (!switched) {
throw new Error("未找到切换到旧版前端的入口,或切换未生效。");
}
return parseWithStrategy(page, OLD_FRONTEND, config, logger);
}
const readyNew = await waitForSelectors(page, NEW_FRONTEND_READY_SELECTORS, 4e3);
if (readyNew) {
logDebug(logger, config.debugMode, "延迟识别为新版前端,准备切回旧版。");
const switched = await switchToOldFrontend(page, logger, config.debugMode);
if (!switched) {
throw new Error("未找到切换到旧版前端的入口,或切换未生效。");
}
return parseWithStrategy(page, OLD_FRONTEND, config, logger);
}
const readyOld = await waitForSelectors(page, OLD_FRONTEND.readySelectors, 4e3);
if (readyOld) {
logDebug(logger, config.debugMode, "延迟识别为旧版前端。");
return parseWithStrategy(page, OLD_FRONTEND, config, logger);
}
} catch (error) {
lastError = error;
if (config.debugMode) {
const reason = error instanceof Error ? error.message : String(error);
logger.warn("贴吧页面解析失败:%s", reason);
}
}
throw lastError || new Error("未能识别当前贴吧页面结构。");
}
__name(parseTiebaPage, "parseTiebaPage");
function apply(ctx, config) {
const logger = ctx.logger("tieba-parser");
ctx.command("tieba.login", "获取贴吧 Cookie").action(async ({ session }) => {
let page;
try {
await session.send("正在获取登录二维码,请稍候...");
page = await ctx.puppeteer.page();
const loginUrl = "https://passport.baidu.com/v2/?login&tpl=tb&u=https%3A%2F%2Ftieba.baidu.com";
await page.goto(loginUrl);
const qrCodeElement = await page.waitForSelector(".tang-pass-qrcode-img");
const qrCodeImage = await qrCodeElement.screenshot({ type: "png" });
await session.send([
import_koishi.h.image(qrCodeImage, "image/png"),
(0, import_koishi.h)("p", "请在2分钟内使用【百度贴吧】App扫描二维码登录。")
]);
await page.waitForNavigation({ timeout: 12e4 });
const cookies = await page.cookies("https://baidu.com", "https://tieba.baidu.com");
const cookieString = formatCookie(cookies);
if (!cookieString || !cookieString.includes("BDUSS")) {
return "登录失败:未能获取到关键的登录凭证 (BDUSS),请重试。";
}
return "登录成功!\n请将以下 Cookie 完整复制并粘贴到插件的【登录信息】配置项中:\n" + cookieString;
} catch (error) {
const detail = error instanceof Error ? error.stack || error.message : String(error);
logger.error("扫码登录失败!\n" + detail);
return "登录失败或超时,请重试。";
} finally {
if (page) await page.close();
}
});
ctx.middleware(async (session, next) => {
const content = session.content || "";
const prefixes = Array.isArray(ctx.options.prefix) ? ctx.options.prefix : [ctx.options.prefix];
const commandPrefixes = prefixes.filter((p) => p && typeof p === "string");
if (commandPrefixes.some((p) => content.startsWith(p))) {
return next();
}
const match = TIEBA_REG.exec(content);
if (!match) return next();
const postId = match[2] || match[4];
const targetUrl = `https://tieba.baidu.com/p/${postId}`;
logDebug(logger, config.debugMode, "匹配到贴吧链接,ID: %s", postId);
const pinger = await session.send([
(0, import_koishi.h)("quote", { id: session.messageId }),
"识别到贴吧链接,正在为您生成内容..."
]);
const pingerId = pinger?.[0];
let page;
try {
logDebug(logger, config.debugMode, "准备启动 Puppeteer 页面...");
page = await ctx.puppeteer.page();
if (config.cookie) {
await page.setCookie(...parseCookie(config.cookie));
logDebug(logger, config.debugMode, "已设置全局 Cookie。");
}
await page.setViewport({ width: config.width, height: 1080 });
await page.goto(targetUrl, { waitUntil: "networkidle2" });
logDebug(logger, config.debugMode, "页面已导航至: %s", targetUrl);
const { data, imageBuffer, mode } = await parseTiebaPage(page, config, logger);
const { postTitle, firstPostText, imageUrls, videoUrl } = data;
logDebug(logger, config.debugMode, "最终采用前端模式: %s", mode);
const mainMessage = [];
const textBlocks = [];
if (postTitle) textBlocks.push(`标题:
${postTitle}`);
if (firstPostText) textBlocks.push(`正文:
${firstPostText}`);
if (textBlocks.length > 0) mainMessage.push(textBlocks.join("\n\n"));
mainMessage.push(import_koishi.h.image(imageBuffer, "image/png"));
if (imageUrls.length > 0) mainMessage.push(...imageUrls.map((url) => import_koishi.h.image(url)));
await session.send(mainMessage);
if (videoUrl) {
await session.send(import_koishi.h.video(videoUrl));
}
return;
} catch (error) {
const detail = error instanceof Error ? error.stack || error.message : String(error);
logger.error("贴吧解析过程中发生严重错误!\n" + detail);
return "解析失败,可能是帖子不存在、前端结构变化或网络问题。请管理员检查后台日志以获取详细错误信息。";
} finally {
if (page) {
await page.close();
logDebug(logger, config.debugMode, "Puppeteer 页面已关闭。");
}
if (pingerId) {
try {
await session.bot.deleteMessage(session.channelId, pingerId);
logDebug(logger, config.debugMode, "已撤回“正在生成”的提示消息。");
} catch (error) {
if (config.debugMode) logger.warn("撤回提示消息失败,可能缺少权限。", error);
}
}
}
});
}
__name(apply, "apply");
// Annotate the CommonJS export names for ESM import in node:
0 && (module.exports = {
Config,
apply,
name,
using
});