UNPKG

autosnippet

Version:

Extract code patterns into a knowledge base for AI coding assistants

936 lines (935 loc) 34.1 kB
/** * WikiUtils.js — Wiki 生成器工具函数 * * 从 WikiGenerator.js 中提取的纯工具/辅助函数,无 class 依赖。 * * @module WikiUtils */ import fs from 'node:fs'; import path from 'node:path'; import Logger from '../../infrastructure/logging/Logger.js'; import { LanguageService } from '../../shared/LanguageService.js'; import { DEFAULT_KNOWLEDGE_BASE_DIR } from '../../shared/ProjectMarkers.js'; const logger = Logger.getInstance(); // ─── 工具函数 ──────────────────────────────────────────────── /** 文本 slug 化 */ export function slug(name) { return name.replace(/[^a-zA-Z0-9_-]/g, '_').toLowerCase(); } /** Mermaid 安全 ID */ export function mermaidId(name) { return name.replace(/[^a-zA-Z0-9]/g, '_'); } /** 遍历目录(排除 build/Pods/DerivedData 等) */ export function walkDir(dir, callback, maxFiles = 500) { const excludeNames = new Set([ 'Pods', 'Carthage', 'node_modules', '.build', 'build', 'DerivedData', 'vendor', '.git', '__tests__', 'Tests', DEFAULT_KNOWLEDGE_BASE_DIR, '.cursor', ]); let count = 0; const walk = (d) => { if (count >= maxFiles) { return; } let entries; try { entries = fs.readdirSync(d, { withFileTypes: true }); } catch { return; } for (const entry of entries) { if (count >= maxFiles) { return; } if (excludeNames.has(entry.name)) { continue; } if (entry.name.startsWith('.')) { continue; } const fullPath = path.join(d, entry.name); if (entry.isDirectory()) { walk(fullPath); } else if (entry.isFile()) { callback(fullPath); count++; } } }; walk(dir); } /** * 从文件相对路径推断所属模块名 * 支持多种项目结构约定: * SPM: Sources/{ModuleName}/... * Node.js: packages/{name}/... | src/{name}/... | lib/{name}/... * Go: pkg/{name}/... | internal/{name}/... | cmd/{name}/... * Rust: crates/{name}/... | src/ (单 crate) * Python: src/{name}/... | {name}/ (顶层包) * Java/Kt: src/main/java/{pkg}/... (取第一个包段) * Dart: lib/{name}/... * * 兜底: 取第一级目录名 */ export function inferModuleFromPath(filePath) { const parts = filePath.split('/'); // SPM: Sources/{Module}/... const sourcesIdx = parts.indexOf('Sources'); if (sourcesIdx >= 0 && sourcesIdx + 1 < parts.length) { return parts[sourcesIdx + 1]; } // Node.js monorepo: packages/{name}/... | apps/{name}/... for (const dir of ['packages', 'apps', 'modules']) { const idx = parts.indexOf(dir); if (idx >= 0 && idx + 1 < parts.length) { return parts[idx + 1]; } } // Go: pkg/{name}/... | internal/{name}/... | cmd/{name}/... for (const dir of ['pkg', 'internal', 'cmd']) { const idx = parts.indexOf(dir); if (idx >= 0 && idx + 1 < parts.length) { return parts[idx + 1]; } } // Rust: crates/{name}/... const cratesIdx = parts.indexOf('crates'); if (cratesIdx >= 0 && cratesIdx + 1 < parts.length) { return parts[cratesIdx + 1]; } // Java/Kotlin: src/main/java/{pkg}/... → 跳过域名前缀,取最后一个有意义的包目录 // 例: src/main/java/org/springframework/samples/petclinic/vet/Vet.java → "vet" // 例: src/main/java/com/example/demo/DemoApp.java → "demo" for (const langDir of ['java', 'kotlin']) { const langIdx = parts.indexOf(langDir); if (langIdx >= 0 && langIdx + 1 < parts.length) { // 文件名所在目录(倒数第二个 part)才是 "模块" const pkgParts = parts.slice(langIdx + 1, parts.length - 1); // 包路径(不含文件名) if (pkgParts.length >= 2) { // 从尾部取: 最后一个包段即为功能模块 return pkgParts[pkgParts.length - 1]; } if (pkgParts.length === 1) { return pkgParts[0]; } // 只有文件直接在 java/ 下 return parts[langIdx + 1]; } } // Generic: src/{name}/... | lib/{name}/... (至少 3 层深时) for (const dir of ['src', 'lib']) { const idx = parts.indexOf(dir); if (idx >= 0 && idx + 1 < parts.length && parts.length > idx + 2) { return parts[idx + 1]; } } // 兜底: 取第一级目录名 return parts.length > 1 ? parts[0] : null; } /** * 获取某个 Target 对应的源文件列表 * 按优先级匹配: target.path → target.info.path → sourceFilesByModule[name] */ export function getModuleSourceFiles(target, projectInfo) { const sfm = projectInfo.sourceFilesByModule || {}; const name = target.name; // 1. 按模块名直接匹配(最常见: Sources/{name}/ 解析出的 key) if (sfm[name]?.length > 0) { return sfm[name]; } // 2. 通过 target.path 或 target.info.path 匹配 const targetPath = target.path || target.info?.path; if (targetPath) { const matched = (projectInfo.sourceFiles || []).filter((f) => f.startsWith(`${targetPath}/`) || f.startsWith(targetPath + path.sep)); if (matched.length > 0) { return matched; } } // 3. 大小写不敏感模糊匹配 const lower = name.toLowerCase(); for (const [key, files] of Object.entries(sfm)) { if (key.toLowerCase() === lower) { return files; } } return []; } /** * 基于模块名称和内容推断模块功能 * 对常见命名模式做智能推断 */ export function inferModulePurpose(name, classes, protocols, files) { const lower = name.toLowerCase(); const _fileNames = files.map((f) => path.basename(f).toLowerCase()); // 常见模块功能推断规则 const rules = [ { match: /network|http|api|client|request|fetch/i, zh: '负责网络通信和 API 调用', en: 'handles network communication and API calls', }, { match: /ui|view|component|widget|screen|page/i, zh: '提供用户界面组件', en: 'provides user interface components', }, { match: /model|entity|domain|data/i, zh: '定义数据模型和领域实体', en: 'defines data models and domain entities', }, { match: /storage|database|cache|persist|core\s*data|realm/i, zh: '负责数据持久化和存储', en: 'manages data persistence and storage', }, { match: /auth|login|session|token|credential/i, zh: '处理认证授权和会话管理', en: 'handles authentication and session management', }, { match: /util|helper|extension|common|shared|foundation/i, zh: '提供公共工具类和扩展方法', en: 'provides common utilities and extensions', }, { match: /test|spec|mock/i, zh: '包含单元测试和 Mock', en: 'contains unit tests and mocks' }, { match: /router|navigation|coordinator|flow/i, zh: '管理页面路由和导航流', en: 'manages page routing and navigation flow', }, { match: /config|setting|preference|env/i, zh: '管理应用配置和环境设置', en: 'manages app configuration and environment settings', }, { match: /log|analytics|track|monitor/i, zh: '提供日志记录和数据分析能力', en: 'provides logging and analytics capabilities', }, { match: /media|image|video|audio|player/i, zh: '处理多媒体资源', en: 'handles multimedia resources', }, { match: /service|manager|provider/i, zh: '提供核心业务服务', en: 'provides core business services', }, ]; // 先按模块名匹配 for (const rule of rules) { if (rule.match.test(lower)) { return rule; } } // 再按类名匹配 const classStr = classes.join(' '); for (const rule of rules) { if (rule.match.test(classStr)) { return rule; } } return null; } /** * 从 CodeEntityGraph 提取继承根节点 * @returns >} */ export function getInheritanceRoots(codeEntityGraph) { if (!codeEntityGraph) { return []; } try { // 尝试查询继承关系 const entities = codeEntityGraph.queryEntities?.({ entityType: 'class', limit: 50 }) || []; const roots = []; for (const e of entities) { const _parents = codeEntityGraph.queryEdges?.({ toId: e.entityId, relation: 'inherits' }) || []; const children = codeEntityGraph.queryEdges?.({ fromId: e.entityId, relation: 'inherits' }) || []; if (children.length > 0) { roots.push({ name: e.name, children: children.map((c) => c.toId || c.to_id || ''), }); } } return roots.sort((a, b) => (b.children?.length || 0) - (a.children?.length || 0)); } catch { return []; } } /** * 两层去重 * * Layer 1: Title slug 碰撞 — 同名文件不同目录 → hash 相同则删除副本 * Layer 2: Content hash — 跨文件内容完全相同 → 仅保留第一个 * * @returns } */ export function dedup(files, wikiDir, emit) { const removed = []; // Layer 1: slug 碰撞(同名文件跨目录) const slugMap = new Map(); // slug → first file for (const file of files) { const s = path.basename(file.path, path.extname(file.path)).toLowerCase(); if (slugMap.has(s)) { const existing = slugMap.get(s); // 完全相同 hash → 移除后来的 if (existing.hash === file.hash) { const fullPath = path.join(wikiDir, file.path); if (!fullPath.startsWith(path.resolve(wikiDir) + path.sep)) { logger.warn(`[WikiGenerator] Dedup: path escape blocked — ${file.path}`); continue; } try { fs.unlinkSync(fullPath); } catch { /* skip */ } removed.push(file.path); logger.info(`[WikiGenerator] Dedup: removed ${file.path} (same hash as ${existing.path})`); } // hash 不同 → 保留两个(不同目录允许同名) } else { slugMap.set(s, file); } } // Layer 2: content hash 碰撞(不同文件名但内容相同) const hashMap = new Map(); // hash → first file path for (const file of files) { if (removed.includes(file.path)) { continue; } if (hashMap.has(file.hash)) { const firstPath = hashMap.get(file.hash); // 优先保留代码生成的(非 synced) const isFirstSynced = firstPath.startsWith('documents/') || firstPath.startsWith('skills/'); const isCurrentSynced = file.path.startsWith('documents/') || file.path.startsWith('skills/'); if (isCurrentSynced && !isFirstSynced) { // 当前是 synced,first 是 codegen → 删除 synced const fullPath = path.join(wikiDir, file.path); if (!fullPath.startsWith(path.resolve(wikiDir) + path.sep)) { logger.warn(`[WikiGenerator] Dedup: path escape blocked — ${file.path}`); continue; } try { fs.unlinkSync(fullPath); } catch { /* skip */ } removed.push(file.path); logger.info(`[WikiGenerator] Dedup: removed synced ${file.path} (same content as ${firstPath})`); } // 其他情况保留两个 } else { hashMap.set(file.hash, file.path); } } // 从 files 数组中移除已删除的 for (let i = files.length - 1; i >= 0; i--) { if (removed.includes(files[i].path)) { files.splice(i, 1); } } if (removed.length > 0) { emit('dedup', 93, `去重: 移除 ${removed.length} 个重复文件`); } else { emit('dedup', 93, '无重复文件'); } return { removed, kept: files.length }; } // ─── 多语言支持 ────────────────────────────────────────────── /** * 按主语言返回 AST 术语(中英文) * * 不同语言对"类"和"接口"有不同称谓,Wiki 文档应使用合适的措辞。 * * @param langId LanguageService langId,如 'swift', 'python', 'go' * @returns , interfaceLabel: {zh: string, en: string}, moduleMetric: {zh: string, en: string} }} */ export function getLangTerms(langId) { const TERMS = { swift: { typeLabel: { zh: '类/结构体', en: 'Classes/Structs' }, interfaceLabel: { zh: '协议', en: 'Protocols' }, moduleMetric: { zh: 'SPM Targets', en: 'SPM Targets' }, }, objectivec: { typeLabel: { zh: '类', en: 'Classes' }, interfaceLabel: { zh: '协议', en: 'Protocols' }, moduleMetric: { zh: 'Targets', en: 'Targets' }, }, typescript: { typeLabel: { zh: '类', en: 'Classes' }, interfaceLabel: { zh: '接口', en: 'Interfaces' }, moduleMetric: { zh: 'Packages', en: 'Packages' }, }, javascript: { typeLabel: { zh: '类/模块', en: 'Classes/Modules' }, interfaceLabel: { zh: '接口', en: 'Interfaces' }, moduleMetric: { zh: 'Packages', en: 'Packages' }, }, python: { typeLabel: { zh: '类', en: 'Classes' }, interfaceLabel: { zh: '抽象基类', en: 'Abstract Base' }, moduleMetric: { zh: 'Packages', en: 'Packages' }, }, go: { typeLabel: { zh: '结构体', en: 'Structs' }, interfaceLabel: { zh: '接口', en: 'Interfaces' }, moduleMetric: { zh: 'Go Modules', en: 'Go Modules' }, }, rust: { typeLabel: { zh: '结构体/枚举', en: 'Structs/Enums' }, interfaceLabel: { zh: 'Trait', en: 'Traits' }, moduleMetric: { zh: 'Crates', en: 'Crates' }, }, java: { typeLabel: { zh: '类', en: 'Classes' }, interfaceLabel: { zh: '接口', en: 'Interfaces' }, moduleMetric: { zh: 'Modules', en: 'Modules' }, }, kotlin: { typeLabel: { zh: '类', en: 'Classes' }, interfaceLabel: { zh: '接口', en: 'Interfaces' }, moduleMetric: { zh: 'Modules', en: 'Modules' }, }, dart: { typeLabel: { zh: '类', en: 'Classes' }, interfaceLabel: { zh: '抽象类', en: 'Abstract Classes' }, moduleMetric: { zh: 'Packages', en: 'Packages' }, }, csharp: { typeLabel: { zh: '类', en: 'Classes' }, interfaceLabel: { zh: '接口', en: 'Interfaces' }, moduleMetric: { zh: 'Projects', en: 'Projects' }, }, }; return (TERMS[langId] || { typeLabel: { zh: '类型', en: 'Types' }, interfaceLabel: { zh: '接口', en: 'Interfaces' }, moduleMetric: { zh: 'Modules', en: 'Modules' }, }); } /** * 已知的构建系统标志文件 → 生态类型映射 * * @deprecated 请使用 LanguageService.buildSystemMarkers。此处保留为只读引用以保持向后兼容。 */ export const BUILD_SYSTEM_MARKERS = LanguageService.buildSystemMarkers; /** * 检测项目根目录中存在的构建系统标志 * * 两级检测: * 1. 先检查根目录的一级文件 * 2. 如果根目录未找到,检查一级子目录(支持 monorepo 如 AppFlowy/frontend/...) * * @param rootEntryNames 项目根目录一级文件/目录名列表 * @param [projectRoot] 可选的项目根路径,用于二级检测 * @returns >} 匹配到的构建系统 */ export function detectBuildSystems(rootEntryNames, projectRoot) { // 委托给 LanguageService 做一级匹配 const results = LanguageService.matchBuildMarkers(rootEntryNames); const seenEco = new Set(results.map((r) => r.eco)); // 二级检测: monorepo / 嵌套项目 — 检查一级子目录 if (projectRoot && results.length === 0) { const skipDirs = LanguageService.scanSkipDirs; try { const entries = fs.readdirSync(projectRoot, { withFileTypes: true }); for (const dir of entries) { if (!dir.isDirectory() || dir.name.startsWith('.') || skipDirs.has(dir.name)) { continue; } try { const subEntries = fs .readdirSync(path.join(projectRoot, dir.name)) .filter((n) => !n.startsWith('.')); const subResults = LanguageService.matchBuildMarkers(subEntries); for (const r of subResults) { if (!seenEco.has(r.eco)) { results.push(r); seenEco.add(r.eco); } } } catch { /* skip unreadable subdirs */ } } } catch { /* skip */ } } return results; } // ─── Folder Profile 分析 (AST 不可用时的降级策略) ───────── /** 入口文件名模式 */ const ENTRY_POINT_NAMES = new Set([ 'index.js', 'index.ts', 'index.tsx', 'index.jsx', 'index.mjs', 'main.js', 'main.ts', 'main.go', 'main.py', 'main.rs', 'main.dart', 'main.c', 'main.cpp', 'mod.rs', 'lib.rs', '__init__.py', 'app.js', 'app.ts', 'app.py', 'app.rb', 'server.js', 'server.ts', 'server.py', ]); /** 多语言 import/require 正则 (轻量级, 不依赖 AST) */ const IMPORT_PATTERNS = [ // JS/TS: import ... from '...' or require('...') /(?:import\s+.*?\s+from\s+['"]([^'"]+)['"]|require\s*\(\s*['"]([^'"]+)['"]\s*\))/, // Python: import xxx / from xxx import yyy /(?:^from\s+([.\w]+)\s+import|^import\s+([.\w]+))/, // Go: import "path/to/pkg" /import\s+(?:\w+\s+)?["']([^"']+)["']/, // Rust: use crate::xxx / use super::xxx / mod xxx /(?:use\s+(?:crate|super)::(\w+)|mod\s+(\w+)\s*;)/, // Java/Kotlin: import com.xxx.yyy /import\s+([\w.]+)/, // Ruby: require 'xxx' / require_relative 'xxx' /require(?:_relative)?\s+['"]([^'"]+)['"]/, // C/C++: #include "xxx" /#include\s+"([^"]+)"/, // Dart: import 'package:xxx/yyy.dart' /import\s+['"](?:package:)?([^'"]+)['"]/, ]; /** * 分析项目中重要文件夹,生成 FolderProfile 列表 * * 适用场景: AST 无法提取 target(类/函数/协议)的语言, * 通过文件夹结构、文件命名、轻量 import 分析来产出有意义的 wiki 内容。 * * @param projectInfo WikiGenerator._scanProject() 的输出 * @param [options.minFiles=3] 文件夹最少文件数阈值 * @param [options.maxFolders=20] 最多分析的文件夹数 * @param [options.sampleLines=40] 每个文件采样行数 (用于 import 提取) */ export function profileFolders(projectInfo, options = {}) { const { minFiles = 3, maxFolders = 20, sampleLines = 40 } = options; const root = projectInfo.root; const sourceFiles = projectInfo.sourceFiles || []; // ── 1. 按文件夹分组源文件 ── /** relDir → [relFilePath, ...] */ const folderFiles = new Map(); for (const relFile of sourceFiles) { const dir = path.dirname(relFile); if (!folderFiles.has(dir)) { folderFiles.set(dir, []); } folderFiles.get(dir).push(relFile); } // ── 2. 聚合: 将子文件夹的文件计入父文件夹 (递归) ── /** relDir → 所有递归子文件 */ const folderRecursive = new Map(); for (const [dir, files] of folderFiles) { // 把文件计入 dir 本身及所有祖先 const parts = dir.split('/'); for (let depth = 1; depth <= parts.length; depth++) { const ancestor = parts.slice(0, depth).join('/'); if (!folderRecursive.has(ancestor)) { folderRecursive.set(ancestor, []); } folderRecursive.get(ancestor).push(...files); } } // ── 3. 筛选重要文件夹 ── const candidates = []; for (const [dir, files] of folderRecursive) { if (files.length < minFiles) { continue; } // 排除根目录 '.' if (dir === '.') { continue; } // 排除太深的目录 (depth > 4), 这些通常是叶子目录, 信息量低 const depth = dir.split('/').length; if (depth > 4) { continue; } candidates.push({ dir, files, depth }); } // 按文件数降序, 优先保留文件多的大模块 candidates.sort((a, b) => b.files.length - a.files.length); // 去除被父级包含且文件完全是父级子集的冗余候选 // (保留层次分明的目录: 如果父子文件数差异不大, 去掉子) const selected = _pruneRedundantFolders(candidates.slice(0, maxFolders * 2), maxFolders); // ── 4. 为每个选中的文件夹生成 Profile ── const profiles = []; for (const { dir, files, depth } of selected) { const profile = _buildFolderProfile(dir, files, depth, root, sampleLines); if (profile) { profiles.push(profile); } } // 按 fileCount 降序 + depth 升序 排序 profiles.sort((a, b) => { if (b.fileCount !== a.fileCount) { return b.fileCount - a.fileCount; } return a.depth - b.depth; }); return profiles.slice(0, maxFolders); } /** * 修剪冗余文件夹: 如果子目录文件数与父目录接近 (>80%), 仅保留父目录 */ function _pruneRedundantFolders(candidates, maxFolders) { const kept = []; const removedDirs = new Set(); for (const c of candidates) { if (removedDirs.has(c.dir)) { continue; } // 检查是否有已 kept 的父目录, 且文件比率 > 80% let isRedundant = false; for (const k of kept) { if (c.dir.startsWith(`${k.dir}/`)) { // c 是 k 的子目录 if (c.files.length / k.files.length > 0.8) { isRedundant = true; break; } } else if (k.dir.startsWith(`${c.dir}/`)) { // c 是 k 的父目录, k 覆盖了 c 大部分 → 保留 c (更高层), 移除 k if (k.files.length / c.files.length > 0.8) { removedDirs.add(k.dir); } } } if (!isRedundant) { kept.push(c); } if (kept.length >= maxFolders) { break; } } return kept.filter((c) => !removedDirs.has(c.dir)); } /** * 为单个文件夹构建 FolderProfile */ function _buildFolderProfile(relDir, files, depth, projectRoot, sampleLines) { const fullDir = path.join(projectRoot, relDir); const folderName = path.basename(relDir); // ── 语言分布 ── const langBreakdown = {}; let totalSize = 0; for (const f of files) { const ext = path.extname(f); const lang = LanguageService.displayNameFromExt(ext) || ext; langBreakdown[lang] = (langBreakdown[lang] || 0) + 1; try { const stat = fs.statSync(path.join(projectRoot, f)); totalSize += stat.size; } catch { /* skip */ } } // ── 文件名列表 ── const fileNames = files.map((f) => path.basename(f)).sort(); // ── 入口点检测 ── const entryPoints = files.filter((f) => ENTRY_POINT_NAMES.has(path.basename(f).toLowerCase())); // ── 重要文件 (大文件 top5 + 入口文件) ── const fileSizes = []; for (const f of files) { try { const stat = fs.statSync(path.join(projectRoot, f)); fileSizes.push({ file: f, size: stat.size }); } catch { /* skip */ } } fileSizes.sort((a, b) => b.size - a.size); const keyFiles = [...new Set([...entryPoints, ...fileSizes.slice(0, 5).map((fs) => fs.file)])]; // ── README 检测 ── let readme = null; const readmeNames = ['README.md', 'readme.md', 'README.txt', 'README', 'readme.markdown']; for (const rn of readmeNames) { const rPath = path.join(fullDir, rn); try { if (fs.existsSync(rPath)) { const content = fs.readFileSync(rPath, 'utf-8'); readme = content.slice(0, 1000); // 只取前 1000 字符 break; } } catch { /* skip */ } } // ── 命名模式检测 ── const namingPatterns = _detectNamingPatterns(fileNames); // ── 轻量 Import 分析 ── const imports = _extractImports(keyFiles.slice(0, 10), projectRoot, sampleLines, relDir); // ── 头部注释提取 (从关键文件提取首段注释) ── const headerComments = []; for (const f of keyFiles.slice(0, 3)) { const comment = _extractHeaderComment(path.join(projectRoot, f)); if (comment) { headerComments.push(`${path.basename(f)}: ${comment}`); } } // ── 功能推断 (复用已有 inferModulePurpose + 增强) ── const purpose = inferModulePurpose(folderName, [], [], files); return { name: folderName, relPath: relDir, fileCount: files.length, totalSize, depth, langBreakdown, keyFiles, fileNames, readme, purpose: purpose ? purpose : null, imports, entryPoints: [...new Set(entryPoints.map((f) => path.basename(f)))], namingPatterns, headerComments, }; } /** * 从文件名列表检测命名约定 * @param fileNames basename 列表 */ function _detectNamingPatterns(fileNames) { const patterns = []; const lower = fileNames.map((n) => n.toLowerCase()); // 测试文件 const testFiles = lower.filter((n) => n.startsWith('test_') || n.startsWith('test.') || n.endsWith('_test.go') || n.endsWith('.test.js') || n.endsWith('.test.ts') || n.endsWith('.spec.js') || n.endsWith('.spec.ts') || n.endsWith('_spec.rb') || (n.startsWith('test') && n.includes('.'))); if (testFiles.length > 0) { patterns.push(`test files: ${testFiles.length}`); } // 常见后缀模式 const suffixes = {}; for (const name of fileNames) { const base = path.basename(name, path.extname(name)); // 检测 CamelCase 后缀: UserController → Controller const camelMatch = base.match(/([A-Z][a-z]+)$/); if (camelMatch) { const suffix = camelMatch[1]; suffixes[suffix] = (suffixes[suffix] || 0) + 1; } // 检测 snake_case 后缀: user_controller → controller const snakeMatch = base.match(/_([a-z]+)$/); if (snakeMatch) { const suffix = snakeMatch[1]; suffixes[suffix] = (suffixes[suffix] || 0) + 1; } } // 出现 ≥2 次的后缀视为命名约定 for (const [suffix, count] of Object.entries(suffixes).sort((a, b) => b[1] - a[1])) { if (count >= 2) { patterns.push(`*${suffix}: ${count}`); } } return patterns.slice(0, 8); } /** * 从文件顶部提取 import/require 语句,推断文件夹级依赖 */ function _extractImports(keyFiles, projectRoot, sampleLines, currentDir) { const importTargets = new Set(); // Node.js / 常见运行时内置模块 — 不应计入项目文件夹依赖 const BUILTIN_MODULES = new Set([ 'fs', 'path', 'os', 'http', 'https', 'url', 'util', 'crypto', 'stream', 'events', 'child_process', 'cluster', 'net', 'dns', 'tls', 'zlib', 'readline', 'assert', 'buffer', 'querystring', 'string_decoder', 'timers', 'tty', 'dgram', 'vm', 'worker_threads', 'perf_hooks', 'async_hooks', 'v8', 'inspector', 'console', 'process', 'module', // node: prefix 会被 firstSeg 拆出 "node" — 直接排除 'node', // 常见第三方包 (非项目目录) 'react', 'vue', 'express', 'lodash', 'axios', 'moment', 'dayjs', 'webpack', 'vite', 'jest', 'mocha', 'chai', ]); for (const relFile of keyFiles) { try { const fullPath = path.join(projectRoot, relFile); const content = fs.readFileSync(fullPath, 'utf-8'); const lines = content.split('\n').slice(0, sampleLines); for (const line of lines) { for (const pattern of IMPORT_PATTERNS) { const match = line.match(pattern); if (match) { // 取第一个非 undefined 捕获组 const target = match[1] || match[2]; if (target) { // 跳过 node: 协议前缀 (Node.js 内置模块) if (target.startsWith('node:')) { continue; } // 解析相对路径 import → 文件夹名 if (target.startsWith('.') || target.startsWith('/')) { const resolved = path.normalize(path.join(currentDir, target)); const topDir = resolved.split('/')[0]; if (topDir && topDir !== '.' && topDir !== '..' && topDir !== currentDir.split('/')[0]) { importTargets.add(topDir); } } else { // 绝对 import → 取第一段作为模块名 const firstSeg = target.split(/[/.]/)[0]; if (firstSeg && firstSeg.length > 1 && !BUILTIN_MODULES.has(firstSeg)) { importTargets.add(firstSeg); } } } } } } } catch { /* skip unreadable files */ } } return [...importTargets].slice(0, 20); } /** * 提取文件头部注释 (第一个注释块) */ function _extractHeaderComment(fullPath) { try { const content = fs.readFileSync(fullPath, 'utf-8'); const lines = content.split('\n').slice(0, 30); // 尝试匹配多行注释 /** ... */ 或 /* ... */ const joined = lines.join('\n'); const blockMatch = joined.match(/\/\*\*?([\s\S]*?)\*\//); if (blockMatch) { const comment = blockMatch[1] .split('\n') .map((l) => l.replace(/^\s*\*\s?/, '').trim()) .filter((l) => l && !l.startsWith('@')) .join(' ') .slice(0, 200); if (comment.length > 10) { return comment; } } // 尝试匹配 # 或 // 开头的连续行注释 const lineComments = []; for (const line of lines) { const stripped = line.trim(); if (stripped.startsWith('#') && !stripped.startsWith('#!') && !stripped.startsWith('#include')) { lineComments.push(stripped.replace(/^#+\s*/, '')); } else if (stripped.startsWith('//')) { lineComments.push(stripped.replace(/^\/\/\s*/, '')); } else if (stripped.startsWith('"""') || stripped.startsWith("'''")) { // Python docstring const docMatch = joined.match(/(?:"""|''')([\s\S]*?)(?:"""|''')/); if (docMatch) { return docMatch[1].trim().slice(0, 200); } } else if (lineComments.length > 0) { break; // 注释块结束 } } if (lineComments.length > 0) { const comment = lineComments.join(' ').slice(0, 200); if (comment.length > 10) { return comment; } } return null; } catch { return null; } }