UNPKG

autosnippet

Version:

Extract code patterns into a knowledge base for AI coding assistants

359 lines (358 loc) 16.1 kB
/** * EpisodicConsolidator — Episodic → Semantic 固化引擎 * * Bootstrap 完成后,将 SessionStore (Tier 2) 中的维度分析结果 * 提炼为结构化记忆,固化到 PersistentMemory (Tier 3)。 * * 固化策略 (规则化,无需额外 AI 调用): * 1. 从每个维度的 findings 提取 fact 记忆 * 2. 从 Tier Reflections 的 crossDimensionPatterns 提取 insight 记忆 * 3. 从 analysisText 中提取项目级别事实 (正则匹配) * 4. 使用 PersistentMemory.consolidate() 进行去重和合并 * * @module EpisodicConsolidator */ import Logger from '#infra/logging/Logger.js'; // ────────────────────────────────────────────────────────────── // 正则: 从分析文本中提取陈述性知识 // ────────────────────────────────────────────────────────────── /** * 匹配常见的项目事实陈述模式: * - "项目使用 XX 模式" * - "发现 XX 个 YY" * - "主要语言是 XX" * - "XX 是唯一的 YY" * - "XX 采用了 YY" */ const FACT_PATTERNS = [ // Chinese /(?:项目|工程|代码库)(?:使用|采用|基于|遵循)了?\s*([^,。,.\n]{5,60})/g, /(?:主要|核心|主|主力)\s*(\S+)\s*(?:是|为|使用)\s*([^,。,.\n]{3,40})/g, /(?:发现|找到|扫描到|识别|共有|包含)\s*了?\s*(\d+)\s*个?\s*([^,。,.\n]{2,30})/g, /(\S{2,20})\s*是\s*(?:唯一的?|主要的?|核心的?|全局的?)\s*([^,。,.\n]{3,30})/g, /(?:使用|采用|遵循)了?\s*(\S{1,10})\s*(?:前缀|后缀|命名|约定|规范)/g, // English /(?:the\s+)?project\s+(?:uses?|adopts?|relies\s+on|follows?)\s+([^.,\n]{5,60})/gi, /(?:found|discovered|identified|detected)\s+(\d+)\s+([^.,\n]{3,40})/gi, /(?:the\s+)?(?:primary|main|core)\s+(\S+)\s+(?:is|are)\s+([^.,\n]{3,40})/gi, /(?:all|every)\s+([^.,\n]{3,30})\s+(?:use|adopt|follow|implement)\s+([^.,\n]{3,40})/gi, /(?:there\s+(?:is|are))\s+(\d+)\s+([^.,\n]{3,40})/gi, ]; /** * 匹配洞察性陈述: * - "XXX 暗示/表明/说明 YYY" * - "XXX 与 YYY 耦合/关联" * - "建议/推荐 XXX" */ const INSIGHT_PATTERNS = [ // Chinese /([^,。,.\n]{5,40})(?:暗示|表明|说明|意味着|揭示)\s*([^,。,.\n]{5,60})/g, /([^,。,.\n]{3,20})\s*(?:与|和)\s*([^,。,.\n]{3,20})\s*(?:耦合|关联|存在依赖|有关系)/g, /(?:建议|推荐|应该|需要)\s*([^,。,.\n]{5,60})/g, // English /([^.,\n]{5,40})\s+(?:suggests?|indicates?|implies?|reveals?)\s+(?:that\s+)?([^.,\n]{5,60})/gi, /([^.,\n]{3,20})\s+(?:is|are)\s+(?:tightly\s+)?(?:coupled|linked|related)\s+(?:to|with)\s+([^.,\n]{3,30})/gi, /(?:recommend|should|consider|suggest)\s+([^.,\n]{5,60})/gi, ]; // ────────────────────────────────────────────────────────────── // EpisodicConsolidator 类 // ────────────────────────────────────────────────────────────── export class EpisodicConsolidator { #semanticMemory; #logger; constructor(semanticMemory, { logger } = {}) { this.#semanticMemory = semanticMemory; this.#logger = logger || Logger.getInstance(); } /** * 执行固化: SessionStore → PersistentMemory * * @param [opts.bootstrapSession] Bootstrap session ID * @param [opts.clearPrevious=false] 是否先清除旧的 bootstrap 记忆 * @returns } */ consolidate(sessionStore, { bootstrapSession, clearPrevious = false, } = {}) { const t0 = Date.now(); // 可选: 清除旧的 bootstrap 记忆 (全量重跑场景) if (clearPrevious) { const cleared = this.#semanticMemory.clearBootstrapMemories(); this.#logger.info(`[Consolidator] Cleared ${cleared} previous bootstrap memories`); } // 1. 先执行维护 (过期清理) this.#semanticMemory.compact(); // 2. 从 findings 提取 fact 记忆 const findingMemories = this.#extractFromFindings(sessionStore); // 3. 从 Tier Reflections 提取 insight 记忆 const insightMemories = this.#extractFromReflections(sessionStore); // 4. 从 analysisText 提取文本中的事实 const textFactMemories = this.#extractFromAnalysisText(sessionStore); // 5. 合并所有候选, 使用 consolidate 去重 const allCandidates = [...findingMemories, ...insightMemories, ...textFactMemories]; // ── 结构化统计日志 ── const dimStats = this.#computeDimStats(allCandidates); const importanceDist = this.#computeImportanceDistribution(allCandidates); const entityCount = allCandidates.reduce((sum, c) => sum + (c.relatedEntities?.length || 0), 0); this.#logger.info(`[Consolidator] Extracted ${allCandidates.length} candidate memories: ` + `${findingMemories.length} findings, ${insightMemories.length} insights, ` + `${textFactMemories.length} text facts`); this.#logger.info(`[Consolidator] Per-dimension: ${dimStats.map((d) => `${d.dim}=${d.count}`).join(', ')}`); this.#logger.info(`[Consolidator] Importance distribution: ${importanceDist} | Entities extracted: ${entityCount}`); const result = this.#semanticMemory.consolidate(allCandidates, { bootstrapSession }); const durationMs = Date.now() - t0; this.#logger.info(`[Consolidator] Consolidation complete in ${durationMs}ms: ` + `+${result.added} ADD, ~${result.updated} UPDATE, ⊕${result.merged} MERGE, ` + `=${result.skipped} SKIP`); return { findings: { extracted: findingMemories.length }, insights: { extracted: insightMemories.length }, textFacts: { extracted: textFactMemories.length }, total: result, durationMs, perDimension: Object.fromEntries(dimStats.map((d) => [d.dim, d.count])), importanceDistribution: this.#importanceHistogram(allCandidates), entityCount, }; } // ─── 提取器 ─────────────────────────────────────────── /** * 从维度 findings 提取 fact 记忆 * * 每个 finding 映射为一条 fact,importance 直接继承。 */ #extractFromFindings(sessionStore) { const memories = []; const completedDims = sessionStore.getCompletedDimensions(); for (const dimId of completedDims) { const report = sessionStore.getDimensionReport(dimId); if (!report?.findings) { continue; } for (const f of report.findings) { // 跳过低重要性的发现 if ((f.importance || 5) < 4) { continue; } // 跳过过短的发现 const content = typeof f === 'string' ? f : f.finding || ''; if (content.length < 10) { continue; } // 提取关联实体 (从 evidence 中提取文件名/类名) const entities = this.#extractEntities(content, f.evidence); memories.push({ type: 'fact', content: content.substring(0, 500), source: 'bootstrap', importance: typeof f === 'string' ? 5 : f.importance || 5, sourceDimension: dimId, sourceEvidence: typeof f === 'string' ? '' : f.evidence || '', relatedEntities: entities, tags: [dimId], }); } } return memories; } /** * 从 Tier Reflections 提取 insight 记忆 * * crossDimensionPatterns → insight (跨维度观察) * suggestionsForNextTier → insight (分析建议) * topFindings 中重要性 ≥ 7 的 → fact (高优先级重复确认) */ #extractFromReflections(sessionStore) { const memories = []; const json = sessionStore.toJSON(); const reflections = json.tierReflections || []; for (const ref of reflections) { // 跨维度模式 → insight for (const pattern of ref.crossDimensionPatterns || []) { if (pattern.length < 10) { continue; } memories.push({ type: 'insight', content: pattern.substring(0, 500), source: 'bootstrap', importance: 7, // 跨维度发现通常较重要 sourceDimension: `tier-${ref.tierIndex + 1}-reflection`, relatedEntities: this.#extractEntities(pattern), tags: ref.completedDimensions || [], }); } // 建议 → insight (较低优先级) for (const suggestion of ref.suggestionsForNextTier || []) { if (suggestion.length < 10) { continue; } memories.push({ type: 'insight', content: suggestion.substring(0, 500), source: 'bootstrap', importance: 5, sourceDimension: `tier-${ref.tierIndex + 1}-reflection`, tags: ['suggestion'], }); } // 高重要性 topFindings → fact (≥ 7 分的重要发现) for (const f of ref.topFindings || []) { if ((f.importance || 5) < 7) { continue; } const content = typeof f === 'string' ? f : f.finding || ''; if (content.length < 10) { continue; } memories.push({ type: 'fact', content: content.substring(0, 500), source: 'bootstrap', importance: f.importance || 7, sourceDimension: f.dimId || `tier-${ref.tierIndex + 1}`, sourceEvidence: f.evidence || '', relatedEntities: this.#extractEntities(content), tags: [f.dimId, 'tier-reflection'].filter(Boolean), }); } } return memories; } /** * 从分析文本中正则提取项目级事实和洞察 * * 仅提取高置信度的简短陈述 (≤100 字), 避免噪音。 */ #extractFromAnalysisText(sessionStore) { const memories = []; const seen = new Set(); // 去重 const completedDims = sessionStore.getCompletedDimensions(); for (const dimId of completedDims) { const report = sessionStore.getDimensionReport(dimId); if (!report?.analysisText) { continue; } const text = report.analysisText; // 提取事实 for (const pattern of FACT_PATTERNS) { // 重置 lastIndex (全局正则) pattern.lastIndex = 0; let match; let matchCount = 0; while ((match = pattern.exec(text)) !== null && matchCount < 5) { const fullMatch = match[0].trim(); if (fullMatch.length < 10 || fullMatch.length > 120) { continue; } if (seen.has(fullMatch)) { continue; } seen.add(fullMatch); matchCount++; memories.push({ type: 'fact', content: fullMatch, source: 'bootstrap', importance: 4, // 正则提取的置信度偏低 sourceDimension: dimId, relatedEntities: this.#extractEntities(fullMatch), tags: [dimId, 'text-extracted'], }); } } // 提取洞察 for (const pattern of INSIGHT_PATTERNS) { pattern.lastIndex = 0; let match; let matchCount = 0; while ((match = pattern.exec(text)) !== null && matchCount < 3) { const fullMatch = match[0].trim(); if (fullMatch.length < 10 || fullMatch.length > 120) { continue; } if (seen.has(fullMatch)) { continue; } seen.add(fullMatch); matchCount++; memories.push({ type: 'insight', content: fullMatch, source: 'bootstrap', importance: 4, sourceDimension: dimId, relatedEntities: this.#extractEntities(fullMatch), tags: [dimId, 'text-extracted'], }); } } } return memories; } // ─── 辅助方法 ───────────────────────────────────────── /** 按维度聚合候选数量 */ #computeDimStats(candidates) { const counts = new Map(); for (const c of candidates) { const dim = c.sourceDimension || 'unknown'; counts.set(dim, (counts.get(dim) || 0) + 1); } return [...counts.entries()] .map(([dim, count]) => ({ dim, count })) .sort((a, b) => b.count - a.count); } /** 生成重要性分布字符串: "[1-3]=N [4-6]=N [7-10]=N" */ #computeImportanceDistribution(candidates) { let low = 0; let mid = 0; let high = 0; for (const c of candidates) { const imp = c.importance || 5; if (imp <= 3) { low++; } else if (imp <= 6) { mid++; } else { high++; } } return `[1-3]=${low} [4-6]=${mid} [7-10]=${high}`; } /** 构建重要性直方图对象 (供返回值使用) */ #importanceHistogram(candidates) { const hist = {}; for (const c of candidates) { const imp = c.importance || 5; hist[imp] = (hist[imp] || 0) + 1; } return hist; } /** * 从文本中提取实体名 (类名/文件名/模块名) * * 简单规则: * - 大驼峰式: BDNetworkManager, UIViewController * - 文件路径: Classes/Network/BDRequest.m * - 冒号分隔的 evidence: "BDRequest.m:42" */ #extractEntities(text, evidence = undefined) { const entities = new Set(); // 大驼峰类名 (至少 2 个大写字母) const classNames = (text || '').match(/\b[A-Z][a-zA-Z]*[A-Z][a-zA-Z]*\b/g) || []; for (const name of classNames) { if (name.length >= 4 && name.length <= 40) { entities.add(name); } } // 从 evidence 提取文件名 if (evidence) { const fileName = evidence.split(':')[0].split('/').pop(); if (fileName && fileName.length >= 3) { entities.add(fileName); } } return [...entities].slice(0, 5); // 最多 5 个实体 } } export default EpisodicConsolidator;