mcp-prd-server
Version:
MCP Server for PRD content management
1,154 lines (1,153 loc) • 54.7 kB
JavaScript
// handlers.ts
// 所有异步处理函数,原本在index.ts
import axios from "axios";
import puppeteer from "puppeteer";
import fs from "fs";
import path from "path";
import { projectNameMap, config } from "./config.js";
import { projectList, isValidProject, getAllVersionsOfProject, isValidVersion, htmlReduce, getCreatorResult, } from "./utils.js";
// 浏览器实例管理器
class BrowserManager {
static instance;
browser = null;
isInitializing = false;
initPromise = null;
constructor() { }
static getInstance() {
if (!BrowserManager.instance) {
BrowserManager.instance = new BrowserManager();
}
return BrowserManager.instance;
}
async getBrowser() {
if (this.browser) {
return this.browser;
}
if (this.isInitializing) {
return this.initPromise;
}
this.isInitializing = true;
this.initPromise = puppeteer.launch({
headless: true,
args: [
"--no-sandbox",
"--disable-setuid-sandbox",
"--disable-features=HttpsFirstBalancedModeAutoEnable",
"--disable-dev-shm-usage", // 减少内存使用
"--disable-gpu", // 禁用GPU加速
"--no-first-run", // 跳过首次运行设置
"--no-default-browser-check", // 跳过默认浏览器检查
],
});
try {
this.browser = await this.initPromise;
console.log("浏览器实例已启动");
return this.browser;
}
catch (error) {
this.isInitializing = false;
this.initPromise = null;
console.error("浏览器启动失败:", error);
throw error;
}
}
async closeBrowser() {
if (this.browser) {
try {
await this.browser.close();
console.log("浏览器实例已关闭");
}
catch (error) {
console.error("关闭浏览器时出错:", error);
}
finally {
this.browser = null;
this.isInitializing = false;
this.initPromise = null;
}
}
}
async createPage() {
const browser = await this.getBrowser();
let page = null;
try {
page = await browser.newPage();
// 设置页面性能优化
await page.setViewport({ width: 1920, height: 1080 });
await page.setRequestInterception(true);
// 减少资源拦截的严格程度,只拦截不必要的资源
page.on("request", (req) => {
const resourceType = req.resourceType();
const url = req.url();
// 只拦截一些不必要的资源,保留样式表以确保页面正确渲染
if (["font", "media"].includes(resourceType)) {
// 对于图片、字体和媒体文件,只拦截外部资源,保留本地资源
if (url.startsWith("http") && !url.includes("192.168.1.244")) {
req.abort();
}
else {
req.continue();
}
}
else if (resourceType === "stylesheet") {
// 保留样式表以确保页面正确渲染
req.continue();
}
else {
req.continue();
}
});
// 修改 navigator.webdriver
await page.evaluateOnNewDocument(() => {
delete Object.getPrototypeOf(navigator).webdriver;
});
return page;
}
catch (error) {
// 如果页面创建失败,确保清理资源
if (page) {
try {
await page.close();
}
catch (closeError) {
console.error("关闭页面时出错:", closeError);
}
}
throw error;
}
}
}
// 检查URL是否为有效的项目和版本
async function isProjectVersions(url) {
let project = "";
let version = "";
try {
const urlObj = new URL(url);
const parts = urlObj.pathname.split("/").filter(Boolean);
project = parts[0] || "";
version = parts[1] || "";
}
catch (e) {
return {
valid: false,
html: `URL 解析失败: ${e.message}`,
screenshot: "",
};
}
if (!project || !isValidProject(project)) {
return {
html: `没有这个项目:${project}。可用项目有:${projectList.join("、")}`,
screenshot: "",
};
}
if (!version || !isValidVersion(project, version)) {
return {
html: `项目 ${project} 没有这个版本:${version}。可用版本有:${getAllVersionsOfProject(project).join("、")}`,
screenshot: "",
};
}
// 仅日志输出
console.log("project", project);
console.log("version", version);
}
// 获取当前页面内容
async function fetchPrd(url) {
let processedUrl = url;
let pageName = "";
if (url.includes("#")) {
const baseUrl = url.split("#")[0];
const params = new URLSearchParams(url.split("#")[1]);
pageName = params.get("p") || "";
if (pageName) {
processedUrl = `${baseUrl}${pageName}.html`;
}
}
console.log("processedUrl", processedUrl);
try {
const response = await axios.get(processedUrl, {
headers: {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
},
});
const htmlStr = htmlReduce(response.data);
console.log("htmlStr", htmlStr);
// 获取页面截图
const browserManager = BrowserManager.getInstance();
const page = await browserManager.createPage();
try {
await page.goto(processedUrl, {
waitUntil: "networkidle0", // 等待网络空闲,确保页面完全加载
timeout: 10000, // 增加超时时间到30秒
});
// 直接获取base64截图数据
const screenshot = await page.screenshot({
encoding: "base64",
fullPage: true,
type: "png",
// 添加截图质量优化选项
omitBackground: true, // 如果页面背景是透明的,则保持透明
});
// 如果开启了保存截图功能,保存图片到本地
if (config.saveScreenshot) {
const timestamp = new Date().toISOString().replace(/[:.]/g, "-");
const urlHash = Buffer.from(url).toString("base64").substring(0, 10);
const filename = `screenshot-${timestamp}-${urlHash}.png`;
const filepath = path.join(config.screenshotDir, filename);
await fs.promises.writeFile(filepath, Buffer.from(screenshot, "base64"));
console.log(`Screenshot saved to: ${filepath}`);
}
return {
html: htmlStr,
screenshot: typeof screenshot === "string"
? screenshot
: screenshot.toString("base64"),
};
}
finally {
await page.close();
}
}
catch (error) {
console.error("获取PRD内容失败:", error);
return {
html: "获取PRD内容失败:" + (error.message || error),
screenshot: "",
};
}
}
// 1. 获取全部页面内容,并返回树形结构
async function fetchHtmlWithContentImpl(url) {
// 处理URL格式
let processedUrl = url;
if (url.includes("#")) {
const baseUrl = url.split("#")[0];
const params = new URLSearchParams(url.split("#")[1]);
const pageName = params.get("p");
if (pageName) {
processedUrl = `${baseUrl}${pageName}.html`;
}
}
try {
// 1. 获取 document.js
const jsUrl = new URL("data/document.js", processedUrl).href;
const jsResp = await axios.get(jsUrl, {
headers: {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
},
});
const jsContent = jsResp.data;
const rootNodes = getCreatorResult(jsContent).sitemap.rootNodes;
// 递归抓取内容
async function fetchTree(nodes) {
return Promise.all(nodes.map(async (node) => {
if (node.type === "Folder" && node.children) {
return {
...node,
children: await fetchTree(node.children),
};
}
else if (node.type === "Wireframe" && node.url) {
// 拼接页面url
const htmlUrl = new URL(node.url, processedUrl).href;
let htmlContent = "";
try {
const htmlResp = await axios.get(htmlUrl, {
headers: {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
},
});
htmlContent = htmlReduce(htmlResp.data);
}
catch (e) {
htmlContent = `获取失败: ${e}`;
}
return {
...node,
content: htmlContent,
};
}
else {
return node;
}
}));
}
const treeWithContent = await fetchTree(rootNodes);
return { success: true, tree: treeWithContent };
}
catch (e) {
return { success: false, error: `获取document.js或解析失败:${e}` };
}
}
// 新增:爬取 http://192.168.1.244:7777/{project}/ 下全部版本页面内容(只返回 html,不递归)
async function fetchProjectVersions(project) {
if (!isValidProject(project)) {
return {
html: `没有这个项目:${project}。可用项目有:${projectList.join("、")}`,
};
}
const url = `http://192.168.1.244:7777/${project}/`;
try {
const response = await axios.get(url, {
headers: {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
},
});
return { html: htmlReduce(response.data) };
}
catch (error) {
return {
html: `获取${project}项目全部版本页面失败:` + error.message,
};
}
}
// 新增:爬取 http://192.168.1.244:7777/ 首页内容
async function fetchAllProjects() {
const url = "http://192.168.1.244:7777/";
try {
const response = await axios.get(url, {
headers: {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
},
});
// 处理响应内容
let html = htmlReduce(response.data);
// 添加项目映射信息到响应中
const projectMappingScript = `
<script type="application/json" id="project-name-mapping">
${JSON.stringify(projectNameMap)}
</script>
`;
html = html.replace("</body>", `${projectMappingScript}</body>`);
return { html };
}
catch (error) {
return { html: "获取首页内容失败:" + error.message };
}
}
// 定时爬取所有项目和版本并保存为JSON
async function fetchAndSaveAllPrd(options) {
const { monthsToLoad = 1, // 默认加载最近1个月
} = options || {};
// 1. 获取项目列表页HTML
const url = "http://192.168.1.244:7777/";
let html = "";
try {
const res = await axios.get(url, {
headers: {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
},
});
html = res.data;
}
catch (e) {
console.error("获取项目列表页失败:", e);
return;
}
// 2. 提取所有项目文件夹名(过滤掉 ..)
const projectMatches = [...html.matchAll(/<a href="([^\/?#]+)\//g)];
let projectNames = projectMatches
.map((m) => m[1])
.filter((name) => name !== "..");
// 获取所有项目,不进行筛选
console.log(`获取所有项目:${projectNames.join(", ")}`);
// 3. 保存为 JSON 文件
const dataDir = path.join(process.cwd(), "data");
if (!fs.existsSync(dataDir)) {
fs.mkdirSync(dataDir, { recursive: true });
}
const savePath = path.join(dataDir, "project_list.json");
fs.writeFileSync(savePath, JSON.stringify(projectNames, null, 2), "utf-8");
console.log("已保存项目列表到", savePath);
// 4. 加载现有数据(如果存在)
const versionSavePath = path.join(dataDir, "project_versions.json");
let existingVersions = {};
if (fs.existsSync(versionSavePath)) {
try {
existingVersions = JSON.parse(fs.readFileSync(versionSavePath, "utf-8"));
console.log(`加载现有数据,包含 ${Object.keys(existingVersions).length} 个项目`);
}
catch (e) {
console.error("加载现有数据失败,将创建新文件:", e);
}
}
// 5. 递归抓取每个项目下的所有版本目录(过滤掉 ..)
const allVersions = { ...existingVersions };
for (const project of projectNames) {
try {
const projectUrl = `http://192.168.1.244:7777/${project}/`;
const res = await axios.get(projectUrl, {
headers: {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
},
});
const projectHtml = res.data;
const versionMatches = [
...projectHtml.matchAll(/<a href="([^\/?#]+)\//g),
];
let versionNames = versionMatches
.map((m) => m[1])
.filter((v) => v !== "..");
// 提取版本时间信息 - 使用更简单的匹配
const versionTimeMap = new Map();
const versionTimeMatches = [
...projectHtml.matchAll(/<a href="([^\/?#]+)\/">[^<]+<\/a>\s*(\d{1,2}-[A-Za-z]{3}-\d{4}\s+\d{1,2}:\d{2})/g),
];
versionTimeMatches.forEach((match) => {
const versionName = match[1];
const timeStr = match[2];
if (versionName !== "..") {
versionTimeMap.set(versionName, timeStr);
}
});
// 获取所有版本,不进行筛选
console.log(`项目 ${project}: 获取所有版本:${versionNames.join(", ")}`);
// 为每个版本获取url和首页内容 - 使用浏览器管理器和并发限制
const versionsWithContent = [];
const browserManager = BrowserManager.getInstance();
// 限制并发数量,避免系统负载过高
const concurrencyLimit = 3;
const timeoutMs = 30000; // 30秒超时
for (let i = 0; i < versionNames.length; i += concurrencyLimit) {
const batch = versionNames.slice(i, i + concurrencyLimit);
const batchResults = await Promise.allSettled(batch.map(async (version) => {
const versionUrl = `http://192.168.1.244:7777/${project}/${version}/`;
let versionContent = "";
// 检查是否需要获取内容(根据时间筛选条件)
let shouldGetContent = true;
if (monthsToLoad > 0) {
// 获取版本时间信息
const versionTime = versionTimeMap.get(version);
if (versionTime) {
shouldGetContent = isWithinLastMonths(versionTime, monthsToLoad);
if (shouldGetContent) {
console.log(`✅ 版本 ${version} 在最近 ${monthsToLoad} 个月内 (${versionTime})`);
}
}
else {
console.log(`⚠️ 版本 ${version} 没有时间信息,跳过`);
shouldGetContent = false;
}
}
else {
// 当monthsToLoad为0时,获取所有内容
shouldGetContent = true;
console.log(`📋 获取所有版本内容,包括 ${version}`);
}
if (shouldGetContent) {
try {
// 添加超时控制
const timeoutPromise = new Promise((_, reject) => {
setTimeout(() => reject(new Error(`获取版本 ${version} 超时`)), timeoutMs);
});
const contentPromise = (async () => {
// 参考 fetchHtmlWithContentImpl 方法,获取 document.js 并解析页面结构
const jsUrl = new URL("data/document.js", versionUrl).href;
const jsResp = await axios.get(jsUrl, {
headers: {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
},
});
const jsContent = jsResp.data;
const rootNodes = getCreatorResult(jsContent).sitemap.rootNodes;
// 递归获取所有页面URL(不获取页面内容)
async function fetchAllPages(nodes) {
return Promise.all(nodes.map(async (node) => {
if (node.type === "Folder" && node.children) {
return {
...node,
children: await fetchAllPages(node.children),
};
}
else if (node.type === "Wireframe" && node.url) {
// 拼接页面url
const htmlUrl = new URL(node.url, versionUrl).href;
return {
...node,
fullUrl: htmlUrl,
};
}
else {
return node;
}
}));
}
const pagesWithContent = await fetchAllPages(rootNodes);
// 递归提取所有 Wireframe 页面
function extractWireframePages(nodes) {
const pages = [];
for (const node of nodes) {
if (node.type === "Wireframe" && node.fullUrl) {
pages.push({
name: node.pageName || node.name || "未命名页面",
url: node.fullUrl,
});
}
else if (node.type === "Folder" && node.children) {
pages.push(...extractWireframePages(node.children));
}
}
return pages;
}
const wireframePages = extractWireframePages(pagesWithContent);
// 构建精简的版本内容信息
const versionInfo = {
project: project,
version: version,
totalPages: wireframePages.length,
pages: wireframePages,
lastModified: versionTimeMap.get(version) || null,
};
return JSON.stringify(versionInfo, null, 2);
})();
versionContent = (await Promise.race([
contentPromise,
timeoutPromise,
]));
}
catch (e) {
console.error(`获取项目 ${project} 版本 ${version} document.js 失败:`);
// 如果获取 document.js 失败,回退到原来的页面内容获取方式
try {
const page = await browserManager.createPage();
try {
await page.goto(versionUrl, {
waitUntil: "networkidle0",
timeout: 15000,
});
await new Promise((resolve) => setTimeout(resolve, 1000));
versionContent = await page.evaluate(() => {
const scripts = document.querySelectorAll("script, style");
scripts.forEach((script) => script.remove());
let title = document.title || "";
if (!title ||
title === "Untitled Document" ||
title === "Document") {
const h1 = document.querySelector("h1");
if (h1 && h1.textContent) {
title = h1.textContent.trim();
}
else {
const possibleTitles = document.querySelectorAll('h1, h2, h3, .title, .header, [class*="title"], [class*="header"]');
for (const element of possibleTitles) {
const text = element.textContent?.trim();
if (text && text.length > 0 && text.length < 100) {
title = text;
break;
}
}
}
}
let content = "";
if (document.body) {
const mainContent = document.querySelector("main, .main, .content, .container, #content, #main") || document.body;
const walker = document.createTreeWalker(mainContent, NodeFilter.SHOW_TEXT, {
acceptNode: function (node) {
const parent = node.parentElement;
if (!parent)
return NodeFilter.FILTER_REJECT;
const style = window.getComputedStyle(parent);
if (style.display === "none" ||
style.visibility === "hidden") {
return NodeFilter.FILTER_REJECT;
}
if (parent.tagName === "SCRIPT" ||
parent.tagName === "STYLE") {
return NodeFilter.FILTER_REJECT;
}
if (parent.closest("nav, .nav, .navigation, .toolbar, .header, .footer")) {
return NodeFilter.FILTER_REJECT;
}
return NodeFilter.FILTER_ACCEPT;
},
});
const textNodes = [];
let node;
while ((node = walker.nextNode())) {
const text = node.textContent?.trim();
if (text && text.length > 0) {
const uselessPatterns = [
/^(CLOSE|Local Preview|Share Prototype|Show Note Markers|Show Hotspots|Default Scale|Scale to Width|Scale to Fit|Use|and|keys|to move between pages|No notes for this page|Notes added in Axure RP will appear here)$/,
/^\(\d+ of \d+\)$/,
/^\(\d+ x \w+\)$/,
/^\(\w+ x \w+\)$/,
/^(Pages|Adaptive)$/,
/^[A-Z\s]+$/,
/^\d+$/,
/^[^\u4e00-\u9fa5a-zA-Z0-9]+$/,
];
const isUseless = uselessPatterns.some((pattern) => pattern.test(text));
if (!isUseless && text.length > 1) {
textNodes.push(text);
}
}
}
content = textNodes.join("\n");
}
if (!content) {
content = document.body
? document.body.innerText
: document.documentElement.innerText;
}
let result = "";
if (title) {
result += `标题: ${title}\n\n`;
}
if (content) {
const sections = content
.split("\n")
.filter((line) => line.trim().length > 0)
.filter((line) => {
const trimmed = line.trim();
return (trimmed.length > 2 &&
!trimmed.match(/^[^\u4e00-\u9fa5a-zA-Z0-9]+$/) &&
!trimmed.match(/^[A-Z\s]+$/) &&
!trimmed.match(/^\d+$/) &&
!trimmed.match(/^\(\d+ of \d+\)$/) &&
!trimmed.match(/^\(\d+ x \w+\)$/) &&
!trimmed.match(/^(Pages|Adaptive|CLOSE)$/));
});
if (sections.length > 0) {
result += `页面内容:\n`;
sections.forEach((section, index) => {
result += `${index + 1}. ${section}\n`;
});
}
}
return result || "无内容";
});
}
finally {
await page.close();
}
}
catch (pageError) {
console.error(`获取项目 ${project} 版本 ${version} 页面内容也失败:`, pageError);
versionContent = `获取失败: ${e.message}`;
}
}
}
else {
// 如果不需要获取内容,只保存基本信息
versionContent = "";
}
return {
name: version,
url: versionUrl,
content: versionContent || version,
lastModified: versionTimeMap.get(version) || null,
pages: versionContent && versionContent.startsWith("{")
? (() => {
try {
const parsed = JSON.parse(versionContent);
return parsed.pages || [];
}
catch {
return [];
}
})()
: [],
};
}));
// 处理Promise.allSettled的结果
const processedBatchResults = batchResults.map((result, index) => {
if (result.status === "fulfilled") {
return result.value;
}
else {
console.error(`版本 ${versionNames[i + index]} 处理失败:`, result.reason);
return {
name: versionNames[i + index],
url: `http://192.168.1.244:7777/${project}/${versionNames[i + index]}/`,
content: `处理失败: ${result.reason?.message || "未知错误"}`,
lastModified: versionTimeMap.get(versionNames[i + index]) || null,
pages: [],
};
}
});
versionsWithContent.push(...processedBatchResults);
// 添加进度日志
console.log(`项目 ${project}: 已完成 ${Math.min(i + concurrencyLimit, versionNames.length)}/${versionNames.length} 个版本`);
}
// 增量更新:合并新获取的版本和现有版本
const existingProjectVersions = allVersions[project] || [];
const existingVersionMap = new Map();
// 创建现有版本的映射
existingProjectVersions.forEach((version) => {
existingVersionMap.set(version.name, version);
});
// 更新或添加新版本
versionsWithContent.forEach((version) => {
existingVersionMap.set(version.name, version);
});
// 转换回数组
allVersions[project] = Array.from(existingVersionMap.values());
console.log(`项目 ${project}: 更新后共有 ${allVersions[project].length} 个版本`);
}
catch (e) {
console.error(`获取项目 ${project} 版本目录失败:`);
allVersions[project] = [];
}
}
// 统计更新结果
const totalProjects = Object.keys(allVersions).length;
const totalVersions = Object.values(allVersions).reduce((sum, versions) => sum + versions.length, 0);
const originalVersions = Object.values(existingVersions).reduce((sum, versions) => sum + versions.length, 0);
const newVersions = totalVersions - originalVersions;
fs.writeFileSync(versionSavePath, JSON.stringify(allVersions, null, 2), "utf-8");
console.log(`已保存所有项目版本到 ${versionSavePath}`);
console.log(`更新统计: 项目 ${totalProjects} 个, 版本 ${totalVersions} 个 (新增 ${newVersions} 个)`);
await buildDocumentIndex();
}
// 清理函数,用于应用退出时关闭浏览器实例
async function cleanupBrowser() {
const browserManager = BrowserManager.getInstance();
await browserManager.closeBrowser();
}
// 时间解析函数
function parseTimeString(timeStr) {
// 解析格式如 "19-Apr-2022 16:14"
const months = {
Jan: 0,
Feb: 1,
Mar: 2,
Apr: 3,
May: 4,
Jun: 5,
Jul: 6,
Aug: 7,
Sep: 8,
Oct: 9,
Nov: 10,
Dec: 11,
};
const match = timeStr.match(/(\d{1,2})-([A-Za-z]{3})-(\d{4})\s+(\d{1,2}):(\d{2})/);
if (!match) {
throw new Error(`无法解析时间格式: ${timeStr}`);
}
const [, day, month, year, hour, minute] = match;
return new Date(parseInt(year), months[month], parseInt(day), parseInt(hour), parseInt(minute));
}
// 检查时间是否在最近N个月内
function isWithinLastMonths(timeStr, months) {
try {
const versionDate = parseTimeString(timeStr);
const monthsAgo = new Date();
monthsAgo.setMonth(monthsAgo.getMonth() - months);
return versionDate >= monthsAgo;
}
catch (error) {
console.warn(`时间解析失败: ${timeStr}`, error);
return false;
}
}
// 文档索引管理
class DocumentIndexManager {
static instance;
indexes = new Map();
indexFilePath;
constructor() {
this.indexFilePath = path.join(process.cwd(), "data", "document_index.json");
}
static getInstance() {
if (!DocumentIndexManager.instance) {
DocumentIndexManager.instance = new DocumentIndexManager();
}
return DocumentIndexManager.instance;
}
// 从现有数据构建索引
async buildIndexFromExistingData() {
console.log("开始从现有数据构建文档索引...");
try {
// 读取现有的项目版本数据
const projectVersionsPath = path.join(process.cwd(), "data", "project_versions.json");
if (!fs.existsSync(projectVersionsPath)) {
console.error("项目版本数据文件不存在");
return;
}
const projectVersions = JSON.parse(fs.readFileSync(projectVersionsPath, "utf-8"));
const indexes = [];
let processedCount = 0;
const totalCount = Object.values(projectVersions).flat().length;
// 内存管理:限制同时处理的文档数量
const maxConcurrentDocs = 100;
let currentBatch = [];
// 计算一个月前的时间戳
const oneMonthAgo = new Date();
oneMonthAgo.setMonth(oneMonthAgo.getMonth() - 1);
for (const [project, versions] of Object.entries(projectVersions)) {
for (const version of versions) {
try {
// 解析最后修改时间
const lastModified = new Date(version.lastModified);
const isRecent = lastModified > oneMonthAgo;
// 生成文档标题
const title = `${project} ${version.name}`;
// 提取关键词
const keywords = this.extractKeywords(project, version.name, version.content, version.pages);
// 生成摘要
const summary = this.generateSummary(version.content, version.pages);
const index = {
project,
version: version.name,
url: version.url,
title,
keywords,
summary,
lastModified: version.lastModified,
pages: version.pages,
};
currentBatch.push(index);
this.indexes.set(`${project}-${version.name}`, index);
processedCount++;
// 当批次达到最大数量时,保存并清空
if (currentBatch.length >= maxConcurrentDocs) {
indexes.push(...currentBatch);
currentBatch = [];
// 强制垃圾回收(如果可用)
if (global.gc) {
global.gc();
}
}
if (processedCount % 100 === 0) {
console.log(`已处理 ${processedCount}/${totalCount} 个文档`);
}
}
catch (error) {
console.error(`处理文档 ${project}/${version.name} 时出错:`, error);
}
}
}
// 保存剩余的批次
if (currentBatch.length > 0) {
indexes.push(...currentBatch);
}
// 保存索引到文件
await this.saveIndexes(indexes);
console.log(`文档索引构建完成,共处理 ${indexes.length} 个文档`);
}
catch (error) {
console.error("构建文档索引失败:", error);
}
}
// 提取关键词
extractKeywords(project, version, content, pages) {
const keywords = new Set();
// 添加项目名和版本号
keywords.add(project.toLowerCase());
keywords.add(version.toLowerCase());
// 从内容中提取关键词
if (content && content.trim()) {
try {
// 尝试解析JSON内容
const contentObj = JSON.parse(content);
if (contentObj.pages && Array.isArray(contentObj.pages)) {
// 从页面名称中提取关键词
contentObj.pages.forEach((page) => {
if (typeof page === "string") {
// 页面名称是字符串
const pageName = page.toLowerCase();
keywords.add(pageName);
// 提取中文词组
this.extractChinesePhrases(pageName).forEach((phrase) => {
keywords.add(phrase);
});
}
else if (page.name) {
// 页面名称是对象
const pageName = page.name.toLowerCase();
keywords.add(pageName);
// 提取中文词组
this.extractChinesePhrases(pageName).forEach((phrase) => {
keywords.add(phrase);
});
}
});
}
}
catch (e) {
// 如果解析失败,按原来的方式处理
const techKeywords = [
"api",
"ui",
"ux",
"prd",
"需求",
"功能",
"页面",
"按钮",
"表单",
"列表",
"搜索",
"筛选",
"排序",
"分页",
"弹窗",
"模态",
"导航",
"菜单",
"用户",
"登录",
"注册",
"权限",
"角色",
"数据",
"数据库",
"缓存",
"性能",
"优化",
];
const lowerContent = content.toLowerCase();
techKeywords.forEach((keyword) => {
if (lowerContent.includes(keyword)) {
keywords.add(keyword);
}
});
}
// 提取项目名称映射中的中文名
Object.entries(projectNameMap).forEach(([pinyin, chinese]) => {
if (project.toLowerCase() === pinyin.toLowerCase()) {
keywords.add(chinese);
}
});
}
// 从pages字段提取关键词
if (pages && Array.isArray(pages)) {
pages.forEach((page) => {
if (page.name) {
const pageName = page.name.toLowerCase();
keywords.add(pageName);
// 提取中文词组
this.extractChinesePhrases(pageName).forEach((phrase) => {
keywords.add(phrase);
});
}
});
}
return Array.from(keywords);
}
// 提取中文词组
extractChinesePhrases(text) {
const phrases = [];
const chineseWords = text.match(/[\u4e00-\u9fa5]+/g) || [];
for (const word of chineseWords) {
if (word.length >= 2) {
phrases.push(word);
// 对于较长的中文词组,提取子词组
if (word.length > 3) {
for (let i = 0; i <= word.length - 2; i++) {
for (let j = i + 2; j <= word.length; j++) {
const subPhrase = word.substring(i, j);
if (subPhrase.length >= 2) {
phrases.push(subPhrase);
}
}
}
}
}
}
return phrases;
}
// 生成摘要
generateSummary(content, pages) {
if (!content || content.trim() === "") {
return "暂无内容";
}
try {
// 尝试解析JSON内容
const contentObj = JSON.parse(content);
if (contentObj.pages && Array.isArray(contentObj.pages)) {
// 从页面名称生成摘要
const pageNames = contentObj.pages
.map((page) => {
if (typeof page === "string") {
return page;
}
else if (page.name) {
return page.name;
}
return "";
})
.filter((name) => name);
if (pageNames.length > 0) {
return `包含 ${pageNames.length} 个页面: ${pageNames
.slice(0, 5)
.join(", ")}${pageNames.length > 5 ? "..." : ""}`;
}
}
}
catch (e) {
// 如果解析失败,按原来的方式处理
}
// 从pages字段生成摘要
if (pages && Array.isArray(pages)) {
const pageNames = pages
.map((page) => page.name)
.filter((name) => name);
if (pageNames.length > 0) {
return `包含 ${pageNames.length} 个页面: ${pageNames
.slice(0, 5)
.join(", ")}${pageNames.length > 5 ? "..." : ""}`;
}
}
// 简单的摘要生成:取前200个字符
const summary = content.replace(/\s+/g, " ").trim();
return summary.length > 200 ? summary.substring(0, 200) + "..." : summary;
}
// 保存索引到文件
async saveIndexes(indexes) {
const dataDir = path.join(process.cwd(), "data");
if (!fs.existsSync(dataDir)) {
fs.mkdirSync(dataDir, { recursive: true });
}
fs.writeFileSync(this.indexFilePath, JSON.stringify(indexes, null, 2), "utf-8");
console.log(`索引已保存到: ${this.indexFilePath}`);
}
// 加载索引
async loadIndexes() {
try {
if (fs.existsSync(this.indexFilePath)) {
const indexes = JSON.parse(fs.readFileSync(this.indexFilePath, "utf-8"));
this.indexes.clear();
indexes.forEach((index) => {
this.indexes.set(`${index.project}-${index.version}`, index);
});
console.log(`已加载 ${this.indexes.size} 个文档索引`);
}
}
catch (error) {
console.error("加载文档索引失败:", error);
}
}
// 获取所有索引
getAllIndexes() {
return Array.from(this.indexes.values());
}
// 根据关键词搜索
searchByKeywords(query) {
const results = [];
const lowerQuery = query.toLowerCase();
const queryWords = lowerQuery
.split(/\s+/)
.filter((word) => word.length > 0);
for (const index of this.indexes.values()) {
let relevance = 0;
const matchedKeywords = [];
let matchType = "fuzzy";
// 精确匹配标题、项目名、版本号
if (index.title.toLowerCase().includes(lowerQuery) ||
index.project.toLowerCase().includes(lowerQuery) ||
index.version.toLowerCase().includes(lowerQuery)) {
relevance += 15;
matchType = "exact";
matchedKeywords.push(query);
}
// 页面名称匹配(高权重)
if (index.pages && Array.isArray(index.pages)) {
for (const page of index.pages) {
if (page.name) {
const pageName = page.name.toLowerCase();
// 完整页面名称匹配
if (pageName.includes(lowerQuery) ||
lowerQuery.includes(pageName)) {
relevance += 12;
if (matchType === "fuzzy")
matchType = "keyword";
if (!matchedKeywords.includes(pageName)) {
matchedKeywords.push(pageName);
}
}
// 页面名称中的关键词匹配
for (const queryWord of queryWords) {
if (pageName.includes(queryWord) ||
queryWord.includes(pageName)) {
relevance += 8;
if (matchType === "fuzzy")
matchType = "keyword";
if (!matchedKeywords.includes(queryWord)) {
matchedKeywords.push(queryWord);
}
}
}
}
}
}
// 关键词匹配
for (const keyword of index.keywords) {
// 完整关键词匹配
if (keyword.includes(lowerQuery) || lowerQuery.includes(keyword)) {
relevance += 10;
if (matchType === "fuzzy")
matchType = "keyword";
if (!matchedKeywords.includes(keyword)) {
matchedKeywords.push(keyword);
}
}
// 关键词中的单词匹配
for (const queryWord of queryWords) {
if (keyword.includes(queryWord) || queryWord.includes(keyword)) {
relevance += 6;
if (matchType === "fuzzy")
matchType = "keyword";
if (!matchedKeywords.includes(keyword)) {
matchedKeywords.push(keyword);
}
}
}
}
// 摘要匹配
if (index.summary.toLowerCase().includes(lowerQuery)) {
relevance += 5;
if (matchType === "fuzzy")
matchType = "keyword";
}
// 模糊匹配标题和摘要
if (relevance === 0) {
for (const queryWord of queryWords) {
if (index.title.toLowerCase().includes(queryWord) ||
index.summary.toLowerCase().includes(queryWord)) {
relevance += 2;
}
}
}
// 中文词组匹配优化
if (relevance === 0 && /[\u4e00-\u9fa5]/.test(lowerQuery)) {
// 对于中文查询,尝试更宽松的匹配
const chineseWords = lowerQuery.match(/[\u4e00-\u9fa5]+/g) || [];
for (const chineseWord of chineseWords) {
if (chineseWord.length >= 2) {
// 至少2个中文字符
// 在标题中查找
if (index.title.toLowerCase().includes(chineseWord)) {
relevance += 4;
if (!matchedKeywords.includes(chineseWord)) {
matchedKeywords.push(chineseWord);
}
}
// 在页面名称中查找
if (index.pages && Array.isArray(index.pages)) {
for (const page of index.pages) {
if (page.name &&
page.name.toLowerCase().includes(chineseWord)) {
relevance += 6;
if (!matchedKeywords.includes(chineseWord)) {
matchedKeywords.push(chineseWord);
}
}
}
}
// 在关键词中查找
for (const keyword of index.keywords) {
if (keyword.includes(chineseWord)) {
relevance += 5;
if (!matchedKeywords.includes(chineseWord)) {
matchedKeywords.push(chineseWord);
}
}
}
}
}
}
if (relevance > 0) {
results.push({
project: index.project,
version: index.version,