UNPKG

mcp-prd-server

Version:
1,154 lines (1,153 loc) 54.7 kB
// handlers.ts // 所有异步处理函数,原本在index.ts import axios from "axios"; import puppeteer from "puppeteer"; import fs from "fs"; import path from "path"; import { projectNameMap, config } from "./config.js"; import { projectList, isValidProject, getAllVersionsOfProject, isValidVersion, htmlReduce, getCreatorResult, } from "./utils.js"; // 浏览器实例管理器 class BrowserManager { static instance; browser = null; isInitializing = false; initPromise = null; constructor() { } static getInstance() { if (!BrowserManager.instance) { BrowserManager.instance = new BrowserManager(); } return BrowserManager.instance; } async getBrowser() { if (this.browser) { return this.browser; } if (this.isInitializing) { return this.initPromise; } this.isInitializing = true; this.initPromise = puppeteer.launch({ headless: true, args: [ "--no-sandbox", "--disable-setuid-sandbox", "--disable-features=HttpsFirstBalancedModeAutoEnable", "--disable-dev-shm-usage", // 减少内存使用 "--disable-gpu", // 禁用GPU加速 "--no-first-run", // 跳过首次运行设置 "--no-default-browser-check", // 跳过默认浏览器检查 ], }); try { this.browser = await this.initPromise; console.log("浏览器实例已启动"); return this.browser; } catch (error) { this.isInitializing = false; this.initPromise = null; console.error("浏览器启动失败:", error); throw error; } } async closeBrowser() { if (this.browser) { try { await this.browser.close(); console.log("浏览器实例已关闭"); } catch (error) { console.error("关闭浏览器时出错:", error); } finally { this.browser = null; this.isInitializing = false; this.initPromise = null; } } } async createPage() { const browser = await this.getBrowser(); let page = null; try { page = await browser.newPage(); // 设置页面性能优化 await page.setViewport({ width: 1920, height: 1080 }); await page.setRequestInterception(true); // 减少资源拦截的严格程度,只拦截不必要的资源 page.on("request", (req) => { const resourceType = req.resourceType(); const url = req.url(); // 只拦截一些不必要的资源,保留样式表以确保页面正确渲染 if (["font", "media"].includes(resourceType)) { // 对于图片、字体和媒体文件,只拦截外部资源,保留本地资源 if (url.startsWith("http") && !url.includes("192.168.1.244")) { req.abort(); } else { req.continue(); } } else if (resourceType === "stylesheet") { // 保留样式表以确保页面正确渲染 req.continue(); } else { req.continue(); } }); // 修改 navigator.webdriver await page.evaluateOnNewDocument(() => { delete Object.getPrototypeOf(navigator).webdriver; }); return page; } catch (error) { // 如果页面创建失败,确保清理资源 if (page) { try { await page.close(); } catch (closeError) { console.error("关闭页面时出错:", closeError); } } throw error; } } } // 检查URL是否为有效的项目和版本 async function isProjectVersions(url) { let project = ""; let version = ""; try { const urlObj = new URL(url); const parts = urlObj.pathname.split("/").filter(Boolean); project = parts[0] || ""; version = parts[1] || ""; } catch (e) { return { valid: false, html: `URL 解析失败: ${e.message}`, screenshot: "", }; } if (!project || !isValidProject(project)) { return { html: `没有这个项目:${project}。可用项目有:${projectList.join("、")}`, screenshot: "", }; } if (!version || !isValidVersion(project, version)) { return { html: `项目 ${project} 没有这个版本:${version}。可用版本有:${getAllVersionsOfProject(project).join("、")}`, screenshot: "", }; } // 仅日志输出 console.log("project", project); console.log("version", version); } // 获取当前页面内容 async function fetchPrd(url) { let processedUrl = url; let pageName = ""; if (url.includes("#")) { const baseUrl = url.split("#")[0]; const params = new URLSearchParams(url.split("#")[1]); pageName = params.get("p") || ""; if (pageName) { processedUrl = `${baseUrl}${pageName}.html`; } } console.log("processedUrl", processedUrl); try { const response = await axios.get(processedUrl, { headers: { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", }, }); const htmlStr = htmlReduce(response.data); console.log("htmlStr", htmlStr); // 获取页面截图 const browserManager = BrowserManager.getInstance(); const page = await browserManager.createPage(); try { await page.goto(processedUrl, { waitUntil: "networkidle0", // 等待网络空闲,确保页面完全加载 timeout: 10000, // 增加超时时间到30秒 }); // 直接获取base64截图数据 const screenshot = await page.screenshot({ encoding: "base64", fullPage: true, type: "png", // 添加截图质量优化选项 omitBackground: true, // 如果页面背景是透明的,则保持透明 }); // 如果开启了保存截图功能,保存图片到本地 if (config.saveScreenshot) { const timestamp = new Date().toISOString().replace(/[:.]/g, "-"); const urlHash = Buffer.from(url).toString("base64").substring(0, 10); const filename = `screenshot-${timestamp}-${urlHash}.png`; const filepath = path.join(config.screenshotDir, filename); await fs.promises.writeFile(filepath, Buffer.from(screenshot, "base64")); console.log(`Screenshot saved to: ${filepath}`); } return { html: htmlStr, screenshot: typeof screenshot === "string" ? screenshot : screenshot.toString("base64"), }; } finally { await page.close(); } } catch (error) { console.error("获取PRD内容失败:", error); return { html: "获取PRD内容失败:" + (error.message || error), screenshot: "", }; } } // 1. 获取全部页面内容,并返回树形结构 async function fetchHtmlWithContentImpl(url) { // 处理URL格式 let processedUrl = url; if (url.includes("#")) { const baseUrl = url.split("#")[0]; const params = new URLSearchParams(url.split("#")[1]); const pageName = params.get("p"); if (pageName) { processedUrl = `${baseUrl}${pageName}.html`; } } try { // 1. 获取 document.js const jsUrl = new URL("data/document.js", processedUrl).href; const jsResp = await axios.get(jsUrl, { headers: { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", }, }); const jsContent = jsResp.data; const rootNodes = getCreatorResult(jsContent).sitemap.rootNodes; // 递归抓取内容 async function fetchTree(nodes) { return Promise.all(nodes.map(async (node) => { if (node.type === "Folder" && node.children) { return { ...node, children: await fetchTree(node.children), }; } else if (node.type === "Wireframe" && node.url) { // 拼接页面url const htmlUrl = new URL(node.url, processedUrl).href; let htmlContent = ""; try { const htmlResp = await axios.get(htmlUrl, { headers: { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", }, }); htmlContent = htmlReduce(htmlResp.data); } catch (e) { htmlContent = `获取失败: ${e}`; } return { ...node, content: htmlContent, }; } else { return node; } })); } const treeWithContent = await fetchTree(rootNodes); return { success: true, tree: treeWithContent }; } catch (e) { return { success: false, error: `获取document.js或解析失败:${e}` }; } } // 新增:爬取 http://192.168.1.244:7777/{project}/ 下全部版本页面内容(只返回 html,不递归) async function fetchProjectVersions(project) { if (!isValidProject(project)) { return { html: `没有这个项目:${project}。可用项目有:${projectList.join("、")}`, }; } const url = `http://192.168.1.244:7777/${project}/`; try { const response = await axios.get(url, { headers: { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", }, }); return { html: htmlReduce(response.data) }; } catch (error) { return { html: `获取${project}项目全部版本页面失败:` + error.message, }; } } // 新增:爬取 http://192.168.1.244:7777/ 首页内容 async function fetchAllProjects() { const url = "http://192.168.1.244:7777/"; try { const response = await axios.get(url, { headers: { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", }, }); // 处理响应内容 let html = htmlReduce(response.data); // 添加项目映射信息到响应中 const projectMappingScript = ` <script type="application/json" id="project-name-mapping"> ${JSON.stringify(projectNameMap)} </script> `; html = html.replace("</body>", `${projectMappingScript}</body>`); return { html }; } catch (error) { return { html: "获取首页内容失败:" + error.message }; } } // 定时爬取所有项目和版本并保存为JSON async function fetchAndSaveAllPrd(options) { const { monthsToLoad = 1, // 默认加载最近1个月 } = options || {}; // 1. 获取项目列表页HTML const url = "http://192.168.1.244:7777/"; let html = ""; try { const res = await axios.get(url, { headers: { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", }, }); html = res.data; } catch (e) { console.error("获取项目列表页失败:", e); return; } // 2. 提取所有项目文件夹名(过滤掉 ..) const projectMatches = [...html.matchAll(/<a href="([^\/?#]+)\//g)]; let projectNames = projectMatches .map((m) => m[1]) .filter((name) => name !== ".."); // 获取所有项目,不进行筛选 console.log(`获取所有项目:${projectNames.join(", ")}`); // 3. 保存为 JSON 文件 const dataDir = path.join(process.cwd(), "data"); if (!fs.existsSync(dataDir)) { fs.mkdirSync(dataDir, { recursive: true }); } const savePath = path.join(dataDir, "project_list.json"); fs.writeFileSync(savePath, JSON.stringify(projectNames, null, 2), "utf-8"); console.log("已保存项目列表到", savePath); // 4. 加载现有数据(如果存在) const versionSavePath = path.join(dataDir, "project_versions.json"); let existingVersions = {}; if (fs.existsSync(versionSavePath)) { try { existingVersions = JSON.parse(fs.readFileSync(versionSavePath, "utf-8")); console.log(`加载现有数据,包含 ${Object.keys(existingVersions).length} 个项目`); } catch (e) { console.error("加载现有数据失败,将创建新文件:", e); } } // 5. 递归抓取每个项目下的所有版本目录(过滤掉 ..) const allVersions = { ...existingVersions }; for (const project of projectNames) { try { const projectUrl = `http://192.168.1.244:7777/${project}/`; const res = await axios.get(projectUrl, { headers: { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", }, }); const projectHtml = res.data; const versionMatches = [ ...projectHtml.matchAll(/<a href="([^\/?#]+)\//g), ]; let versionNames = versionMatches .map((m) => m[1]) .filter((v) => v !== ".."); // 提取版本时间信息 - 使用更简单的匹配 const versionTimeMap = new Map(); const versionTimeMatches = [ ...projectHtml.matchAll(/<a href="([^\/?#]+)\/">[^<]+<\/a>\s*(\d{1,2}-[A-Za-z]{3}-\d{4}\s+\d{1,2}:\d{2})/g), ]; versionTimeMatches.forEach((match) => { const versionName = match[1]; const timeStr = match[2]; if (versionName !== "..") { versionTimeMap.set(versionName, timeStr); } }); // 获取所有版本,不进行筛选 console.log(`项目 ${project}: 获取所有版本:${versionNames.join(", ")}`); // 为每个版本获取url和首页内容 - 使用浏览器管理器和并发限制 const versionsWithContent = []; const browserManager = BrowserManager.getInstance(); // 限制并发数量,避免系统负载过高 const concurrencyLimit = 3; const timeoutMs = 30000; // 30秒超时 for (let i = 0; i < versionNames.length; i += concurrencyLimit) { const batch = versionNames.slice(i, i + concurrencyLimit); const batchResults = await Promise.allSettled(batch.map(async (version) => { const versionUrl = `http://192.168.1.244:7777/${project}/${version}/`; let versionContent = ""; // 检查是否需要获取内容(根据时间筛选条件) let shouldGetContent = true; if (monthsToLoad > 0) { // 获取版本时间信息 const versionTime = versionTimeMap.get(version); if (versionTime) { shouldGetContent = isWithinLastMonths(versionTime, monthsToLoad); if (shouldGetContent) { console.log(`✅ 版本 ${version} 在最近 ${monthsToLoad} 个月内 (${versionTime})`); } } else { console.log(`⚠️ 版本 ${version} 没有时间信息,跳过`); shouldGetContent = false; } } else { // 当monthsToLoad为0时,获取所有内容 shouldGetContent = true; console.log(`📋 获取所有版本内容,包括 ${version}`); } if (shouldGetContent) { try { // 添加超时控制 const timeoutPromise = new Promise((_, reject) => { setTimeout(() => reject(new Error(`获取版本 ${version} 超时`)), timeoutMs); }); const contentPromise = (async () => { // 参考 fetchHtmlWithContentImpl 方法,获取 document.js 并解析页面结构 const jsUrl = new URL("data/document.js", versionUrl).href; const jsResp = await axios.get(jsUrl, { headers: { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", }, }); const jsContent = jsResp.data; const rootNodes = getCreatorResult(jsContent).sitemap.rootNodes; // 递归获取所有页面URL(不获取页面内容) async function fetchAllPages(nodes) { return Promise.all(nodes.map(async (node) => { if (node.type === "Folder" && node.children) { return { ...node, children: await fetchAllPages(node.children), }; } else if (node.type === "Wireframe" && node.url) { // 拼接页面url const htmlUrl = new URL(node.url, versionUrl).href; return { ...node, fullUrl: htmlUrl, }; } else { return node; } })); } const pagesWithContent = await fetchAllPages(rootNodes); // 递归提取所有 Wireframe 页面 function extractWireframePages(nodes) { const pages = []; for (const node of nodes) { if (node.type === "Wireframe" && node.fullUrl) { pages.push({ name: node.pageName || node.name || "未命名页面", url: node.fullUrl, }); } else if (node.type === "Folder" && node.children) { pages.push(...extractWireframePages(node.children)); } } return pages; } const wireframePages = extractWireframePages(pagesWithContent); // 构建精简的版本内容信息 const versionInfo = { project: project, version: version, totalPages: wireframePages.length, pages: wireframePages, lastModified: versionTimeMap.get(version) || null, }; return JSON.stringify(versionInfo, null, 2); })(); versionContent = (await Promise.race([ contentPromise, timeoutPromise, ])); } catch (e) { console.error(`获取项目 ${project} 版本 ${version} document.js 失败:`); // 如果获取 document.js 失败,回退到原来的页面内容获取方式 try { const page = await browserManager.createPage(); try { await page.goto(versionUrl, { waitUntil: "networkidle0", timeout: 15000, }); await new Promise((resolve) => setTimeout(resolve, 1000)); versionContent = await page.evaluate(() => { const scripts = document.querySelectorAll("script, style"); scripts.forEach((script) => script.remove()); let title = document.title || ""; if (!title || title === "Untitled Document" || title === "Document") { const h1 = document.querySelector("h1"); if (h1 && h1.textContent) { title = h1.textContent.trim(); } else { const possibleTitles = document.querySelectorAll('h1, h2, h3, .title, .header, [class*="title"], [class*="header"]'); for (const element of possibleTitles) { const text = element.textContent?.trim(); if (text && text.length > 0 && text.length < 100) { title = text; break; } } } } let content = ""; if (document.body) { const mainContent = document.querySelector("main, .main, .content, .container, #content, #main") || document.body; const walker = document.createTreeWalker(mainContent, NodeFilter.SHOW_TEXT, { acceptNode: function (node) { const parent = node.parentElement; if (!parent) return NodeFilter.FILTER_REJECT; const style = window.getComputedStyle(parent); if (style.display === "none" || style.visibility === "hidden") { return NodeFilter.FILTER_REJECT; } if (parent.tagName === "SCRIPT" || parent.tagName === "STYLE") { return NodeFilter.FILTER_REJECT; } if (parent.closest("nav, .nav, .navigation, .toolbar, .header, .footer")) { return NodeFilter.FILTER_REJECT; } return NodeFilter.FILTER_ACCEPT; }, }); const textNodes = []; let node; while ((node = walker.nextNode())) { const text = node.textContent?.trim(); if (text && text.length > 0) { const uselessPatterns = [ /^(CLOSE|Local Preview|Share Prototype|Show Note Markers|Show Hotspots|Default Scale|Scale to Width|Scale to Fit|Use|and|keys|to move between pages|No notes for this page|Notes added in Axure RP will appear here)$/, /^\(\d+ of \d+\)$/, /^\(\d+ x \w+\)$/, /^\(\w+ x \w+\)$/, /^(Pages|Adaptive)$/, /^[A-Z\s]+$/, /^\d+$/, /^[^\u4e00-\u9fa5a-zA-Z0-9]+$/, ]; const isUseless = uselessPatterns.some((pattern) => pattern.test(text)); if (!isUseless && text.length > 1) { textNodes.push(text); } } } content = textNodes.join("\n"); } if (!content) { content = document.body ? document.body.innerText : document.documentElement.innerText; } let result = ""; if (title) { result += `标题: ${title}\n\n`; } if (content) { const sections = content .split("\n") .filter((line) => line.trim().length > 0) .filter((line) => { const trimmed = line.trim(); return (trimmed.length > 2 && !trimmed.match(/^[^\u4e00-\u9fa5a-zA-Z0-9]+$/) && !trimmed.match(/^[A-Z\s]+$/) && !trimmed.match(/^\d+$/) && !trimmed.match(/^\(\d+ of \d+\)$/) && !trimmed.match(/^\(\d+ x \w+\)$/) && !trimmed.match(/^(Pages|Adaptive|CLOSE)$/)); }); if (sections.length > 0) { result += `页面内容:\n`; sections.forEach((section, index) => { result += `${index + 1}. ${section}\n`; }); } } return result || "无内容"; }); } finally { await page.close(); } } catch (pageError) { console.error(`获取项目 ${project} 版本 ${version} 页面内容也失败:`, pageError); versionContent = `获取失败: ${e.message}`; } } } else { // 如果不需要获取内容,只保存基本信息 versionContent = ""; } return { name: version, url: versionUrl, content: versionContent || version, lastModified: versionTimeMap.get(version) || null, pages: versionContent && versionContent.startsWith("{") ? (() => { try { const parsed = JSON.parse(versionContent); return parsed.pages || []; } catch { return []; } })() : [], }; })); // 处理Promise.allSettled的结果 const processedBatchResults = batchResults.map((result, index) => { if (result.status === "fulfilled") { return result.value; } else { console.error(`版本 ${versionNames[i + index]} 处理失败:`, result.reason); return { name: versionNames[i + index], url: `http://192.168.1.244:7777/${project}/${versionNames[i + index]}/`, content: `处理失败: ${result.reason?.message || "未知错误"}`, lastModified: versionTimeMap.get(versionNames[i + index]) || null, pages: [], }; } }); versionsWithContent.push(...processedBatchResults); // 添加进度日志 console.log(`项目 ${project}: 已完成 ${Math.min(i + concurrencyLimit, versionNames.length)}/${versionNames.length} 个版本`); } // 增量更新:合并新获取的版本和现有版本 const existingProjectVersions = allVersions[project] || []; const existingVersionMap = new Map(); // 创建现有版本的映射 existingProjectVersions.forEach((version) => { existingVersionMap.set(version.name, version); }); // 更新或添加新版本 versionsWithContent.forEach((version) => { existingVersionMap.set(version.name, version); }); // 转换回数组 allVersions[project] = Array.from(existingVersionMap.values()); console.log(`项目 ${project}: 更新后共有 ${allVersions[project].length} 个版本`); } catch (e) { console.error(`获取项目 ${project} 版本目录失败:`); allVersions[project] = []; } } // 统计更新结果 const totalProjects = Object.keys(allVersions).length; const totalVersions = Object.values(allVersions).reduce((sum, versions) => sum + versions.length, 0); const originalVersions = Object.values(existingVersions).reduce((sum, versions) => sum + versions.length, 0); const newVersions = totalVersions - originalVersions; fs.writeFileSync(versionSavePath, JSON.stringify(allVersions, null, 2), "utf-8"); console.log(`已保存所有项目版本到 ${versionSavePath}`); console.log(`更新统计: 项目 ${totalProjects} 个, 版本 ${totalVersions} 个 (新增 ${newVersions} 个)`); await buildDocumentIndex(); } // 清理函数,用于应用退出时关闭浏览器实例 async function cleanupBrowser() { const browserManager = BrowserManager.getInstance(); await browserManager.closeBrowser(); } // 时间解析函数 function parseTimeString(timeStr) { // 解析格式如 "19-Apr-2022 16:14" const months = { Jan: 0, Feb: 1, Mar: 2, Apr: 3, May: 4, Jun: 5, Jul: 6, Aug: 7, Sep: 8, Oct: 9, Nov: 10, Dec: 11, }; const match = timeStr.match(/(\d{1,2})-([A-Za-z]{3})-(\d{4})\s+(\d{1,2}):(\d{2})/); if (!match) { throw new Error(`无法解析时间格式: ${timeStr}`); } const [, day, month, year, hour, minute] = match; return new Date(parseInt(year), months[month], parseInt(day), parseInt(hour), parseInt(minute)); } // 检查时间是否在最近N个月内 function isWithinLastMonths(timeStr, months) { try { const versionDate = parseTimeString(timeStr); const monthsAgo = new Date(); monthsAgo.setMonth(monthsAgo.getMonth() - months); return versionDate >= monthsAgo; } catch (error) { console.warn(`时间解析失败: ${timeStr}`, error); return false; } } // 文档索引管理 class DocumentIndexManager { static instance; indexes = new Map(); indexFilePath; constructor() { this.indexFilePath = path.join(process.cwd(), "data", "document_index.json"); } static getInstance() { if (!DocumentIndexManager.instance) { DocumentIndexManager.instance = new DocumentIndexManager(); } return DocumentIndexManager.instance; } // 从现有数据构建索引 async buildIndexFromExistingData() { console.log("开始从现有数据构建文档索引..."); try { // 读取现有的项目版本数据 const projectVersionsPath = path.join(process.cwd(), "data", "project_versions.json"); if (!fs.existsSync(projectVersionsPath)) { console.error("项目版本数据文件不存在"); return; } const projectVersions = JSON.parse(fs.readFileSync(projectVersionsPath, "utf-8")); const indexes = []; let processedCount = 0; const totalCount = Object.values(projectVersions).flat().length; // 内存管理:限制同时处理的文档数量 const maxConcurrentDocs = 100; let currentBatch = []; // 计算一个月前的时间戳 const oneMonthAgo = new Date(); oneMonthAgo.setMonth(oneMonthAgo.getMonth() - 1); for (const [project, versions] of Object.entries(projectVersions)) { for (const version of versions) { try { // 解析最后修改时间 const lastModified = new Date(version.lastModified); const isRecent = lastModified > oneMonthAgo; // 生成文档标题 const title = `${project} ${version.name}`; // 提取关键词 const keywords = this.extractKeywords(project, version.name, version.content, version.pages); // 生成摘要 const summary = this.generateSummary(version.content, version.pages); const index = { project, version: version.name, url: version.url, title, keywords, summary, lastModified: version.lastModified, pages: version.pages, }; currentBatch.push(index); this.indexes.set(`${project}-${version.name}`, index); processedCount++; // 当批次达到最大数量时,保存并清空 if (currentBatch.length >= maxConcurrentDocs) { indexes.push(...currentBatch); currentBatch = []; // 强制垃圾回收(如果可用) if (global.gc) { global.gc(); } } if (processedCount % 100 === 0) { console.log(`已处理 ${processedCount}/${totalCount} 个文档`); } } catch (error) { console.error(`处理文档 ${project}/${version.name} 时出错:`, error); } } } // 保存剩余的批次 if (currentBatch.length > 0) { indexes.push(...currentBatch); } // 保存索引到文件 await this.saveIndexes(indexes); console.log(`文档索引构建完成,共处理 ${indexes.length} 个文档`); } catch (error) { console.error("构建文档索引失败:", error); } } // 提取关键词 extractKeywords(project, version, content, pages) { const keywords = new Set(); // 添加项目名和版本号 keywords.add(project.toLowerCase()); keywords.add(version.toLowerCase()); // 从内容中提取关键词 if (content && content.trim()) { try { // 尝试解析JSON内容 const contentObj = JSON.parse(content); if (contentObj.pages && Array.isArray(contentObj.pages)) { // 从页面名称中提取关键词 contentObj.pages.forEach((page) => { if (typeof page === "string") { // 页面名称是字符串 const pageName = page.toLowerCase(); keywords.add(pageName); // 提取中文词组 this.extractChinesePhrases(pageName).forEach((phrase) => { keywords.add(phrase); }); } else if (page.name) { // 页面名称是对象 const pageName = page.name.toLowerCase(); keywords.add(pageName); // 提取中文词组 this.extractChinesePhrases(pageName).forEach((phrase) => { keywords.add(phrase); }); } }); } } catch (e) { // 如果解析失败,按原来的方式处理 const techKeywords = [ "api", "ui", "ux", "prd", "需求", "功能", "页面", "按钮", "表单", "列表", "搜索", "筛选", "排序", "分页", "弹窗", "模态", "导航", "菜单", "用户", "登录", "注册", "权限", "角色", "数据", "数据库", "缓存", "性能", "优化", ]; const lowerContent = content.toLowerCase(); techKeywords.forEach((keyword) => { if (lowerContent.includes(keyword)) { keywords.add(keyword); } }); } // 提取项目名称映射中的中文名 Object.entries(projectNameMap).forEach(([pinyin, chinese]) => { if (project.toLowerCase() === pinyin.toLowerCase()) { keywords.add(chinese); } }); } // 从pages字段提取关键词 if (pages && Array.isArray(pages)) { pages.forEach((page) => { if (page.name) { const pageName = page.name.toLowerCase(); keywords.add(pageName); // 提取中文词组 this.extractChinesePhrases(pageName).forEach((phrase) => { keywords.add(phrase); }); } }); } return Array.from(keywords); } // 提取中文词组 extractChinesePhrases(text) { const phrases = []; const chineseWords = text.match(/[\u4e00-\u9fa5]+/g) || []; for (const word of chineseWords) { if (word.length >= 2) { phrases.push(word); // 对于较长的中文词组,提取子词组 if (word.length > 3) { for (let i = 0; i <= word.length - 2; i++) { for (let j = i + 2; j <= word.length; j++) { const subPhrase = word.substring(i, j); if (subPhrase.length >= 2) { phrases.push(subPhrase); } } } } } } return phrases; } // 生成摘要 generateSummary(content, pages) { if (!content || content.trim() === "") { return "暂无内容"; } try { // 尝试解析JSON内容 const contentObj = JSON.parse(content); if (contentObj.pages && Array.isArray(contentObj.pages)) { // 从页面名称生成摘要 const pageNames = contentObj.pages .map((page) => { if (typeof page === "string") { return page; } else if (page.name) { return page.name; } return ""; }) .filter((name) => name); if (pageNames.length > 0) { return `包含 ${pageNames.length} 个页面: ${pageNames .slice(0, 5) .join(", ")}${pageNames.length > 5 ? "..." : ""}`; } } } catch (e) { // 如果解析失败,按原来的方式处理 } // 从pages字段生成摘要 if (pages && Array.isArray(pages)) { const pageNames = pages .map((page) => page.name) .filter((name) => name); if (pageNames.length > 0) { return `包含 ${pageNames.length} 个页面: ${pageNames .slice(0, 5) .join(", ")}${pageNames.length > 5 ? "..." : ""}`; } } // 简单的摘要生成:取前200个字符 const summary = content.replace(/\s+/g, " ").trim(); return summary.length > 200 ? summary.substring(0, 200) + "..." : summary; } // 保存索引到文件 async saveIndexes(indexes) { const dataDir = path.join(process.cwd(), "data"); if (!fs.existsSync(dataDir)) { fs.mkdirSync(dataDir, { recursive: true }); } fs.writeFileSync(this.indexFilePath, JSON.stringify(indexes, null, 2), "utf-8"); console.log(`索引已保存到: ${this.indexFilePath}`); } // 加载索引 async loadIndexes() { try { if (fs.existsSync(this.indexFilePath)) { const indexes = JSON.parse(fs.readFileSync(this.indexFilePath, "utf-8")); this.indexes.clear(); indexes.forEach((index) => { this.indexes.set(`${index.project}-${index.version}`, index); }); console.log(`已加载 ${this.indexes.size} 个文档索引`); } } catch (error) { console.error("加载文档索引失败:", error); } } // 获取所有索引 getAllIndexes() { return Array.from(this.indexes.values()); } // 根据关键词搜索 searchByKeywords(query) { const results = []; const lowerQuery = query.toLowerCase(); const queryWords = lowerQuery .split(/\s+/) .filter((word) => word.length > 0); for (const index of this.indexes.values()) { let relevance = 0; const matchedKeywords = []; let matchType = "fuzzy"; // 精确匹配标题、项目名、版本号 if (index.title.toLowerCase().includes(lowerQuery) || index.project.toLowerCase().includes(lowerQuery) || index.version.toLowerCase().includes(lowerQuery)) { relevance += 15; matchType = "exact"; matchedKeywords.push(query); } // 页面名称匹配(高权重) if (index.pages && Array.isArray(index.pages)) { for (const page of index.pages) { if (page.name) { const pageName = page.name.toLowerCase(); // 完整页面名称匹配 if (pageName.includes(lowerQuery) || lowerQuery.includes(pageName)) { relevance += 12; if (matchType === "fuzzy") matchType = "keyword"; if (!matchedKeywords.includes(pageName)) { matchedKeywords.push(pageName); } } // 页面名称中的关键词匹配 for (const queryWord of queryWords) { if (pageName.includes(queryWord) || queryWord.includes(pageName)) { relevance += 8; if (matchType === "fuzzy") matchType = "keyword"; if (!matchedKeywords.includes(queryWord)) { matchedKeywords.push(queryWord); } } } } } } // 关键词匹配 for (const keyword of index.keywords) { // 完整关键词匹配 if (keyword.includes(lowerQuery) || lowerQuery.includes(keyword)) { relevance += 10; if (matchType === "fuzzy") matchType = "keyword"; if (!matchedKeywords.includes(keyword)) { matchedKeywords.push(keyword); } } // 关键词中的单词匹配 for (const queryWord of queryWords) { if (keyword.includes(queryWord) || queryWord.includes(keyword)) { relevance += 6; if (matchType === "fuzzy") matchType = "keyword"; if (!matchedKeywords.includes(keyword)) { matchedKeywords.push(keyword); } } } } // 摘要匹配 if (index.summary.toLowerCase().includes(lowerQuery)) { relevance += 5; if (matchType === "fuzzy") matchType = "keyword"; } // 模糊匹配标题和摘要 if (relevance === 0) { for (const queryWord of queryWords) { if (index.title.toLowerCase().includes(queryWord) || index.summary.toLowerCase().includes(queryWord)) { relevance += 2; } } } // 中文词组匹配优化 if (relevance === 0 && /[\u4e00-\u9fa5]/.test(lowerQuery)) { // 对于中文查询,尝试更宽松的匹配 const chineseWords = lowerQuery.match(/[\u4e00-\u9fa5]+/g) || []; for (const chineseWord of chineseWords) { if (chineseWord.length >= 2) { // 至少2个中文字符 // 在标题中查找 if (index.title.toLowerCase().includes(chineseWord)) { relevance += 4; if (!matchedKeywords.includes(chineseWord)) { matchedKeywords.push(chineseWord); } } // 在页面名称中查找 if (index.pages && Array.isArray(index.pages)) { for (const page of index.pages) { if (page.name && page.name.toLowerCase().includes(chineseWord)) { relevance += 6; if (!matchedKeywords.includes(chineseWord)) { matchedKeywords.push(chineseWord); } } } } // 在关键词中查找 for (const keyword of index.keywords) { if (keyword.includes(chineseWord)) { relevance += 5; if (!matchedKeywords.includes(chineseWord)) { matchedKeywords.push(chineseWord); } } } } } } if (relevance > 0) { results.push({ project: index.project, version: index.version,