UNPKG

autosnippet

Version:

Extract code patterns into a knowledge base for AI coding assistants

454 lines (453 loc) 18.6 kB
/** * BootstrapSnapshot — Bootstrap 快照管理 * * 负责: * 1. 保存每次 bootstrap 完成后的文件指纹 (path → hash) * 2. 记录每个维度引用了哪些文件 * 3. 持久化 EpisodicMemory 摘要 * 4. 提供增量 diff 计算 * * 存储: SQLite bootstrap_snapshots + bootstrap_dim_files 表 * 所有操作使用 Drizzle 类型安全 API。 * * @module pipeline/BootstrapSnapshot */ import { createHash, randomUUID } from 'node:crypto'; import { readFileSync } from 'node:fs'; import { relative } from 'node:path'; import { and, desc, eq, sql } from 'drizzle-orm'; import { getDrizzle } from '../../../../../infrastructure/database/drizzle/index.js'; import { bootstrapDimFiles, bootstrapSnapshots, } from '../../../../../infrastructure/database/drizzle/schema.js'; // ────────────────────────────────────────────────────────────── // 常量 // ────────────────────────────────────────────────────────────── /** 快照保留数量 (最多保留 N 个历史快照) */ const MAX_SNAPSHOTS = 5; /** 全量/增量判断阈值: 文件变更超过此比例 → 全量重跑 */ const FULL_REBUILD_THRESHOLD = 0.5; // ────────────────────────────────────────────────────────────── // BootstrapSnapshot 类 // ────────────────────────────────────────────────────────────── export class BootstrapSnapshot { #drizzle; #logger; /** @param db DatabaseConnection 或 better-sqlite3 实例 */ constructor(db, { logger } = {}) { if (!db) { throw new Error('BootstrapSnapshot requires a database instance'); } this.#drizzle = typeof db?.getDrizzle === 'function' ? db.getDrizzle() : getDrizzle(); this.#logger = logger || null; } // ─── 快照保存 ───────────────────────────────────────── /** * 保存一次 bootstrap 完成后的快照 * * @param params.sessionId Bootstrap 会话 ID * @param params.projectRoot 项目根目录 * @param params.allFiles 扫描到的文件列表 * @param params.dimensionStats { dimId: { referencedFiles: string[] } } * @param [params.episodicData] EpisodicMemory.toJSON() * @param [params.meta] { durationMs, candidateCount, primaryLang } * @param [params.isIncremental] 是否增量 bootstrap * @param [params.parentId] 增量时的父快照 ID * @param [params.changedFiles] 增量时的变更文件 * @param [params.affectedDims] 增量时受影响的维度 * @returns 快照 ID */ save(params) { const { sessionId, projectRoot, allFiles, dimensionStats, episodicData, meta = {}, isIncremental = false, parentId = null, changedFiles = [], affectedDims = [], } = params; const id = `snap_${randomUUID().replace(/-/g, '').substring(0, 12)}`; const now = new Date().toISOString(); // 计算文件指纹 const fileHashes = {}; for (const f of allFiles) { const rel = f.relativePath || relative(projectRoot, f.path); fileHashes[rel] = this.#computeContentHash(f.content || this.#readFileContent(f.path)); } // 构建维度-文件映射 const dimensionMeta = {}; for (const [dimId, stat] of Object.entries(dimensionStats || {})) { dimensionMeta[dimId] = { candidateCount: stat.candidateCount || 0, analysisChars: stat.analysisChars || 0, referencedFiles: stat.referencedFiles || 0, durationMs: stat.durationMs || 0, }; } // 事务保存(Drizzle 类型安全) this.#drizzle.transaction((tx) => { // 主记录 tx.insert(bootstrapSnapshots) .values({ id, sessionId: sessionId || null, projectRoot, createdAt: now, durationMs: meta.durationMs || 0, fileCount: allFiles.length, dimensionCount: Object.keys(dimensionStats || {}).length, candidateCount: meta.candidateCount || 0, primaryLang: meta.primaryLang || null, fileHashes: JSON.stringify(fileHashes), dimensionMeta: JSON.stringify(dimensionMeta), episodicData: episodicData ? JSON.stringify(episodicData) : null, isIncremental: isIncremental ? 1 : 0, parentId: parentId, changedFiles: JSON.stringify(changedFiles), affectedDims: JSON.stringify(affectedDims), status: 'complete', }) .run(); // 维度-文件关联 for (const [dimId, stat] of Object.entries(dimensionStats || {})) { const refFiles = stat.referencedFilesList || []; for (const filePath of refFiles) { const rel = typeof filePath === 'string' ? filePath.startsWith('/') ? relative(projectRoot, filePath) : filePath : filePath; tx.insert(bootstrapDimFiles) .values({ snapshotId: id, dimId, filePath: rel, role: 'referenced', }) .onConflictDoNothing() .run(); } } // 容量控制: 保留最新 N 个 this.#enforceCapacity(projectRoot, tx); }); this.#log(`Snapshot saved: ${id} (${allFiles.length} files, ${Object.keys(dimensionStats || {}).length} dims)`); return id; } // ─── 快照加载 ───────────────────────────────────────── /** 清除项目的所有快照 — 用于手动重新冷启动时强制全量 */ clearProject(projectRoot) { try { const rows = this.#drizzle .select({ id: bootstrapSnapshots.id }) .from(bootstrapSnapshots) .where(eq(bootstrapSnapshots.projectRoot, projectRoot)) .all(); for (const row of rows) { this.#drizzle.delete(bootstrapSnapshots).where(eq(bootstrapSnapshots.id, row.id)).run(); } this.#log(`Cleared ${rows.length} snapshots for project`); } catch (err) { const msg = err instanceof Error ? err.message : String(err); this.#log(`clearProject failed: ${msg}`, 'warn'); } } /** * 加载最新的快照 * * @returns 快照数据 */ getLatest(projectRoot) { const row = this.#drizzle .select() .from(bootstrapSnapshots) .where(and(eq(bootstrapSnapshots.projectRoot, projectRoot), eq(bootstrapSnapshots.status, 'complete'))) .orderBy(desc(bootstrapSnapshots.createdAt)) .limit(1) .get(); if (!row) { return null; } return this.#deserialize(row); } /** 根据 ID 加载快照 */ getById(id) { const row = this.#drizzle .select() .from(bootstrapSnapshots) .where(eq(bootstrapSnapshots.id, id)) .get(); if (!row) { return null; } return this.#deserialize(row); } /** 获取项目的所有快照 (按时间降序) */ list(projectRoot, limit = 10) { return this.#drizzle .select() .from(bootstrapSnapshots) .where(eq(bootstrapSnapshots.projectRoot, projectRoot)) .orderBy(desc(bootstrapSnapshots.createdAt)) .limit(limit) .all() .map((r) => this.#deserialize(r)); } // ─── 增量 Diff 计算 ────────────────────────────────── /** * 计算当前文件与快照的 diff * * @param snapshot getLatest() 返回的快照 * @param currentFiles 当前文件列表 * @returns } */ computeDiff(snapshot, currentFiles, projectRoot) { const oldHashes = snapshot.fileHashes || {}; // 计算当前文件 hash const newHashes = {}; for (const f of currentFiles) { const rel = f.relativePath || relative(projectRoot, f.path); newHashes[rel] = this.#computeContentHash(f.content || ''); } const added = []; const modified = []; const unchanged = []; // 对比新文件 for (const [relPath, hash] of Object.entries(newHashes)) { if (!(relPath in oldHashes)) { added.push(relPath); } else if (oldHashes[relPath] !== hash) { modified.push(relPath); } else { unchanged.push(relPath); } } // 已删除的文件 const deleted = Object.keys(oldHashes).filter((p) => !(p in newHashes)); const totalFiles = Object.keys(newHashes).length || 1; const changedCount = added.length + modified.length + deleted.length; const changeRatio = changedCount / totalFiles; return { added, modified, deleted, unchanged, changeRatio }; } // ─── 受影响维度推断 ────────────────────────────────── /** * 根据文件变更推断受影响的维度 * * 策略: * 1. 查找变更文件被哪些维度引用 → 直接受影响 * 2. 新增文件按文件类型推断可能相关的维度 * 3. 如果变更比例超过阈值 → 建议全量 * * @param snapshot 上次快照 * @param diff * @param allDimIds 所有可用维度 ID * @returns } */ inferAffectedDimensions(snapshot, diff, allDimIds) { const changeRatio = (diff.added.length + diff.modified.length + diff.deleted.length) / (diff.added.length + diff.modified.length + diff.deleted.length + (diff.unchanged?.length || 0) || 1); // 变更超过 50% → 全量 if (changeRatio > FULL_REBUILD_THRESHOLD) { return { mode: 'full', dimensions: allDimIds, skippedDimensions: [], reason: `变更比例 ${(changeRatio * 100).toFixed(0)}% 超过阈值 (${(FULL_REBUILD_THRESHOLD * 100).toFixed(0)}%),建议全量冷启动`, }; } // 没有变更 → 跳过所有 if (diff.added.length === 0 && diff.modified.length === 0 && diff.deleted.length === 0) { return { mode: 'incremental', dimensions: [], skippedDimensions: allDimIds, reason: '无文件变更,所有维度使用历史结果', }; } const affected = new Set(); const changedFiles = [...diff.added, ...diff.modified, ...diff.deleted]; // 1. 从快照的 dimensionMeta 推断 — 查找维度引用了哪些变更文件 const dimFileMap = this.#getDimFileMap(snapshot.id); for (const [dimId, files] of Object.entries(dimFileMap)) { for (const changedFile of changedFiles) { if (files.has(changedFile)) { affected.add(dimId); break; } } } // 2. 新增文件: 按文件类型推断 for (const addedFile of diff.added) { const inferredDims = this.#inferDimsByFileType(addedFile); for (const dim of inferredDims) { affected.add(dim); } } // 3. 删除文件: 引用了已删除文件的维度需要更新 // (已在步骤 1 中处理) // 4. 始终包含 project-profile (它是全局概览) if (changedFiles.length > 0) { affected.add('project-profile'); } const dimensions = allDimIds.filter((d) => affected.has(d)); const skippedDimensions = allDimIds.filter((d) => !affected.has(d)); return { mode: 'incremental', dimensions, skippedDimensions, reason: `${changedFiles.length} 个文件变更影响 ${dimensions.length}/${allDimIds.length} 个维度`, }; } // ─── 维度-文件映射查询 ────────────────────────────── /** 获取某个快照中每个维度引用的文件集合 */ #getDimFileMap(snapshotId) { const rows = this.#drizzle .select({ dimId: bootstrapDimFiles.dimId, filePath: bootstrapDimFiles.filePath, }) .from(bootstrapDimFiles) .where(eq(bootstrapDimFiles.snapshotId, snapshotId)) .all(); const map = {}; for (const row of rows) { if (!map[row.dimId]) { map[row.dimId] = new Set(); } map[row.dimId].add(row.filePath); } return map; } /** 根据文件扩展名推断可能相关的维度 */ #inferDimsByFileType(filePath) { const ext = filePath.split('.').pop()?.toLowerCase() || ''; const name = filePath.split('/').pop()?.toLowerCase() || ''; const dims = []; // ObjC 文件 → objc-deep-scan if (['m', 'mm', 'h'].includes(ext)) { dims.push('objc-deep-scan'); } // Category 文件 if (name.includes('+') || name.includes('category')) { dims.push('category-scan'); } // Swift 相关 if (ext === 'swift') { dims.push('code-standard', 'architecture'); } // TS/JS 相关 if (['ts', 'tsx', 'js', 'jsx', 'mjs', 'cjs', 'vue', 'svelte'].includes(ext)) { dims.push('module-export-scan', 'code-standard', 'architecture'); } // Python 相关 if (ext === 'py') { dims.push('python-package-scan', 'code-standard', 'architecture'); } // Java/Kotlin 相关 if (['java', 'kt', 'kts'].includes(ext)) { dims.push('jvm-annotation-scan', 'code-standard', 'architecture'); } // 配置文件 if (['json', 'yaml', 'yml', 'plist', 'xcconfig', 'toml', 'properties', 'gradle'].includes(ext)) { dims.push('project-profile'); } // 通用: 代码文件都可能影响 code-pattern 和 best-practice if ([ 'm', 'mm', 'h', 'swift', 'js', 'jsx', 'ts', 'tsx', 'mjs', 'cjs', 'py', 'java', 'kt', 'kts', 'go', 'rs', 'rb', ].includes(ext)) { dims.push('code-pattern', 'best-practice'); } // 数据流相关 if (name.includes('manager') || name.includes('service') || name.includes('event') || name.includes('notification') || name.includes('delegate')) { dims.push('event-and-data-flow'); } return [...new Set(dims)]; } // ─── 内部方法 ───────────────────────────────────────── #computeContentHash(content) { return createHash('sha256') .update(content || '') .digest('hex') .substring(0, 16); } #readFileContent(filePath) { try { return readFileSync(filePath, 'utf-8'); } catch { return ''; } } #enforceCapacity(projectRoot, db = this.#drizzle) { try { db.delete(bootstrapSnapshots) .where(sql `${bootstrapSnapshots.projectRoot} = ${projectRoot} AND ${bootstrapSnapshots.id} NOT IN ( SELECT ${bootstrapSnapshots.id} FROM ${bootstrapSnapshots} WHERE ${bootstrapSnapshots.projectRoot} = ${projectRoot} ORDER BY ${bootstrapSnapshots.createdAt} DESC LIMIT ${MAX_SNAPSHOTS} )`) .run(); } catch (err) { const msg = err instanceof Error ? err.message : String(err); this.#log(`Capacity enforcement failed: ${msg}`, 'warn'); } } #deserialize(row) { return { id: row.id, sessionId: row.sessionId ?? null, projectRoot: row.projectRoot, createdAt: row.createdAt, durationMs: row.durationMs ?? 0, fileCount: row.fileCount ?? 0, dimensionCount: row.dimensionCount ?? 0, candidateCount: row.candidateCount ?? 0, primaryLang: row.primaryLang ?? null, fileHashes: this.#safeParseJSON(row.fileHashes, {}), dimensionMeta: this.#safeParseJSON(row.dimensionMeta, {}), episodicData: this.#safeParseJSON(row.episodicData, null), isIncremental: !!row.isIncremental, parentId: row.parentId ?? null, changedFiles: this.#safeParseJSON(row.changedFiles, []), affectedDims: this.#safeParseJSON(row.affectedDims, []), status: row.status ?? 'complete', }; } #safeParseJSON(str, fallback) { try { return str ? JSON.parse(str) : fallback; } catch { return fallback; } } #log(msg, level = 'info') { if (this.#logger && this.#logger[level]) { this.#logger[level](`[BootstrapSnapshot] ${msg}`); } } } export default BootstrapSnapshot;