autosnippet
Version:
Extract code patterns into a knowledge base for AI coding assistants
454 lines (453 loc) • 18.6 kB
JavaScript
/**
* BootstrapSnapshot — Bootstrap 快照管理
*
* 负责:
* 1. 保存每次 bootstrap 完成后的文件指纹 (path → hash)
* 2. 记录每个维度引用了哪些文件
* 3. 持久化 EpisodicMemory 摘要
* 4. 提供增量 diff 计算
*
* 存储: SQLite bootstrap_snapshots + bootstrap_dim_files 表
* 所有操作使用 Drizzle 类型安全 API。
*
* @module pipeline/BootstrapSnapshot
*/
import { createHash, randomUUID } from 'node:crypto';
import { readFileSync } from 'node:fs';
import { relative } from 'node:path';
import { and, desc, eq, sql } from 'drizzle-orm';
import { getDrizzle } from '../../../../../infrastructure/database/drizzle/index.js';
import { bootstrapDimFiles, bootstrapSnapshots, } from '../../../../../infrastructure/database/drizzle/schema.js';
// ──────────────────────────────────────────────────────────────
// 常量
// ──────────────────────────────────────────────────────────────
/** 快照保留数量 (最多保留 N 个历史快照) */
const MAX_SNAPSHOTS = 5;
/** 全量/增量判断阈值: 文件变更超过此比例 → 全量重跑 */
const FULL_REBUILD_THRESHOLD = 0.5;
// ──────────────────────────────────────────────────────────────
// BootstrapSnapshot 类
// ──────────────────────────────────────────────────────────────
export class BootstrapSnapshot {
#drizzle;
#logger;
/** @param db DatabaseConnection 或 better-sqlite3 实例 */
constructor(db, { logger } = {}) {
if (!db) {
throw new Error('BootstrapSnapshot requires a database instance');
}
this.#drizzle =
typeof db?.getDrizzle === 'function'
? db.getDrizzle()
: getDrizzle();
this.#logger = logger || null;
}
// ─── 快照保存 ─────────────────────────────────────────
/**
* 保存一次 bootstrap 完成后的快照
*
* @param params.sessionId Bootstrap 会话 ID
* @param params.projectRoot 项目根目录
* @param params.allFiles 扫描到的文件列表
* @param params.dimensionStats { dimId: { referencedFiles: string[] } }
* @param [params.episodicData] EpisodicMemory.toJSON()
* @param [params.meta] { durationMs, candidateCount, primaryLang }
* @param [params.isIncremental] 是否增量 bootstrap
* @param [params.parentId] 增量时的父快照 ID
* @param [params.changedFiles] 增量时的变更文件
* @param [params.affectedDims] 增量时受影响的维度
* @returns 快照 ID
*/
save(params) {
const { sessionId, projectRoot, allFiles, dimensionStats, episodicData, meta = {}, isIncremental = false, parentId = null, changedFiles = [], affectedDims = [], } = params;
const id = `snap_${randomUUID().replace(/-/g, '').substring(0, 12)}`;
const now = new Date().toISOString();
// 计算文件指纹
const fileHashes = {};
for (const f of allFiles) {
const rel = f.relativePath || relative(projectRoot, f.path);
fileHashes[rel] = this.#computeContentHash(f.content || this.#readFileContent(f.path));
}
// 构建维度-文件映射
const dimensionMeta = {};
for (const [dimId, stat] of Object.entries(dimensionStats || {})) {
dimensionMeta[dimId] = {
candidateCount: stat.candidateCount || 0,
analysisChars: stat.analysisChars || 0,
referencedFiles: stat.referencedFiles || 0,
durationMs: stat.durationMs || 0,
};
}
// 事务保存(Drizzle 类型安全)
this.#drizzle.transaction((tx) => {
// 主记录
tx.insert(bootstrapSnapshots)
.values({
id,
sessionId: sessionId || null,
projectRoot,
createdAt: now,
durationMs: meta.durationMs || 0,
fileCount: allFiles.length,
dimensionCount: Object.keys(dimensionStats || {}).length,
candidateCount: meta.candidateCount || 0,
primaryLang: meta.primaryLang || null,
fileHashes: JSON.stringify(fileHashes),
dimensionMeta: JSON.stringify(dimensionMeta),
episodicData: episodicData ? JSON.stringify(episodicData) : null,
isIncremental: isIncremental ? 1 : 0,
parentId: parentId,
changedFiles: JSON.stringify(changedFiles),
affectedDims: JSON.stringify(affectedDims),
status: 'complete',
})
.run();
// 维度-文件关联
for (const [dimId, stat] of Object.entries(dimensionStats || {})) {
const refFiles = stat.referencedFilesList || [];
for (const filePath of refFiles) {
const rel = typeof filePath === 'string'
? filePath.startsWith('/')
? relative(projectRoot, filePath)
: filePath
: filePath;
tx.insert(bootstrapDimFiles)
.values({
snapshotId: id,
dimId,
filePath: rel,
role: 'referenced',
})
.onConflictDoNothing()
.run();
}
}
// 容量控制: 保留最新 N 个
this.#enforceCapacity(projectRoot, tx);
});
this.#log(`Snapshot saved: ${id} (${allFiles.length} files, ${Object.keys(dimensionStats || {}).length} dims)`);
return id;
}
// ─── 快照加载 ─────────────────────────────────────────
/** 清除项目的所有快照 — 用于手动重新冷启动时强制全量 */
clearProject(projectRoot) {
try {
const rows = this.#drizzle
.select({ id: bootstrapSnapshots.id })
.from(bootstrapSnapshots)
.where(eq(bootstrapSnapshots.projectRoot, projectRoot))
.all();
for (const row of rows) {
this.#drizzle.delete(bootstrapSnapshots).where(eq(bootstrapSnapshots.id, row.id)).run();
}
this.#log(`Cleared ${rows.length} snapshots for project`);
}
catch (err) {
const msg = err instanceof Error ? err.message : String(err);
this.#log(`clearProject failed: ${msg}`, 'warn');
}
}
/**
* 加载最新的快照
*
* @returns 快照数据
*/
getLatest(projectRoot) {
const row = this.#drizzle
.select()
.from(bootstrapSnapshots)
.where(and(eq(bootstrapSnapshots.projectRoot, projectRoot), eq(bootstrapSnapshots.status, 'complete')))
.orderBy(desc(bootstrapSnapshots.createdAt))
.limit(1)
.get();
if (!row) {
return null;
}
return this.#deserialize(row);
}
/** 根据 ID 加载快照 */
getById(id) {
const row = this.#drizzle
.select()
.from(bootstrapSnapshots)
.where(eq(bootstrapSnapshots.id, id))
.get();
if (!row) {
return null;
}
return this.#deserialize(row);
}
/** 获取项目的所有快照 (按时间降序) */
list(projectRoot, limit = 10) {
return this.#drizzle
.select()
.from(bootstrapSnapshots)
.where(eq(bootstrapSnapshots.projectRoot, projectRoot))
.orderBy(desc(bootstrapSnapshots.createdAt))
.limit(limit)
.all()
.map((r) => this.#deserialize(r));
}
// ─── 增量 Diff 计算 ──────────────────────────────────
/**
* 计算当前文件与快照的 diff
*
* @param snapshot getLatest() 返回的快照
* @param currentFiles 当前文件列表
* @returns }
*/
computeDiff(snapshot, currentFiles, projectRoot) {
const oldHashes = snapshot.fileHashes || {};
// 计算当前文件 hash
const newHashes = {};
for (const f of currentFiles) {
const rel = f.relativePath || relative(projectRoot, f.path);
newHashes[rel] = this.#computeContentHash(f.content || '');
}
const added = [];
const modified = [];
const unchanged = [];
// 对比新文件
for (const [relPath, hash] of Object.entries(newHashes)) {
if (!(relPath in oldHashes)) {
added.push(relPath);
}
else if (oldHashes[relPath] !== hash) {
modified.push(relPath);
}
else {
unchanged.push(relPath);
}
}
// 已删除的文件
const deleted = Object.keys(oldHashes).filter((p) => !(p in newHashes));
const totalFiles = Object.keys(newHashes).length || 1;
const changedCount = added.length + modified.length + deleted.length;
const changeRatio = changedCount / totalFiles;
return { added, modified, deleted, unchanged, changeRatio };
}
// ─── 受影响维度推断 ──────────────────────────────────
/**
* 根据文件变更推断受影响的维度
*
* 策略:
* 1. 查找变更文件被哪些维度引用 → 直接受影响
* 2. 新增文件按文件类型推断可能相关的维度
* 3. 如果变更比例超过阈值 → 建议全量
*
* @param snapshot 上次快照
* @param diff
* @param allDimIds 所有可用维度 ID
* @returns }
*/
inferAffectedDimensions(snapshot, diff, allDimIds) {
const changeRatio = (diff.added.length + diff.modified.length + diff.deleted.length) /
(diff.added.length +
diff.modified.length +
diff.deleted.length +
(diff.unchanged?.length || 0) || 1);
// 变更超过 50% → 全量
if (changeRatio > FULL_REBUILD_THRESHOLD) {
return {
mode: 'full',
dimensions: allDimIds,
skippedDimensions: [],
reason: `变更比例 ${(changeRatio * 100).toFixed(0)}% 超过阈值 (${(FULL_REBUILD_THRESHOLD * 100).toFixed(0)}%),建议全量冷启动`,
};
}
// 没有变更 → 跳过所有
if (diff.added.length === 0 && diff.modified.length === 0 && diff.deleted.length === 0) {
return {
mode: 'incremental',
dimensions: [],
skippedDimensions: allDimIds,
reason: '无文件变更,所有维度使用历史结果',
};
}
const affected = new Set();
const changedFiles = [...diff.added, ...diff.modified, ...diff.deleted];
// 1. 从快照的 dimensionMeta 推断 — 查找维度引用了哪些变更文件
const dimFileMap = this.#getDimFileMap(snapshot.id);
for (const [dimId, files] of Object.entries(dimFileMap)) {
for (const changedFile of changedFiles) {
if (files.has(changedFile)) {
affected.add(dimId);
break;
}
}
}
// 2. 新增文件: 按文件类型推断
for (const addedFile of diff.added) {
const inferredDims = this.#inferDimsByFileType(addedFile);
for (const dim of inferredDims) {
affected.add(dim);
}
}
// 3. 删除文件: 引用了已删除文件的维度需要更新
// (已在步骤 1 中处理)
// 4. 始终包含 project-profile (它是全局概览)
if (changedFiles.length > 0) {
affected.add('project-profile');
}
const dimensions = allDimIds.filter((d) => affected.has(d));
const skippedDimensions = allDimIds.filter((d) => !affected.has(d));
return {
mode: 'incremental',
dimensions,
skippedDimensions,
reason: `${changedFiles.length} 个文件变更影响 ${dimensions.length}/${allDimIds.length} 个维度`,
};
}
// ─── 维度-文件映射查询 ──────────────────────────────
/** 获取某个快照中每个维度引用的文件集合 */
#getDimFileMap(snapshotId) {
const rows = this.#drizzle
.select({
dimId: bootstrapDimFiles.dimId,
filePath: bootstrapDimFiles.filePath,
})
.from(bootstrapDimFiles)
.where(eq(bootstrapDimFiles.snapshotId, snapshotId))
.all();
const map = {};
for (const row of rows) {
if (!map[row.dimId]) {
map[row.dimId] = new Set();
}
map[row.dimId].add(row.filePath);
}
return map;
}
/** 根据文件扩展名推断可能相关的维度 */
#inferDimsByFileType(filePath) {
const ext = filePath.split('.').pop()?.toLowerCase() || '';
const name = filePath.split('/').pop()?.toLowerCase() || '';
const dims = [];
// ObjC 文件 → objc-deep-scan
if (['m', 'mm', 'h'].includes(ext)) {
dims.push('objc-deep-scan');
}
// Category 文件
if (name.includes('+') || name.includes('category')) {
dims.push('category-scan');
}
// Swift 相关
if (ext === 'swift') {
dims.push('code-standard', 'architecture');
}
// TS/JS 相关
if (['ts', 'tsx', 'js', 'jsx', 'mjs', 'cjs', 'vue', 'svelte'].includes(ext)) {
dims.push('module-export-scan', 'code-standard', 'architecture');
}
// Python 相关
if (ext === 'py') {
dims.push('python-package-scan', 'code-standard', 'architecture');
}
// Java/Kotlin 相关
if (['java', 'kt', 'kts'].includes(ext)) {
dims.push('jvm-annotation-scan', 'code-standard', 'architecture');
}
// 配置文件
if (['json', 'yaml', 'yml', 'plist', 'xcconfig', 'toml', 'properties', 'gradle'].includes(ext)) {
dims.push('project-profile');
}
// 通用: 代码文件都可能影响 code-pattern 和 best-practice
if ([
'm',
'mm',
'h',
'swift',
'js',
'jsx',
'ts',
'tsx',
'mjs',
'cjs',
'py',
'java',
'kt',
'kts',
'go',
'rs',
'rb',
].includes(ext)) {
dims.push('code-pattern', 'best-practice');
}
// 数据流相关
if (name.includes('manager') ||
name.includes('service') ||
name.includes('event') ||
name.includes('notification') ||
name.includes('delegate')) {
dims.push('event-and-data-flow');
}
return [...new Set(dims)];
}
// ─── 内部方法 ─────────────────────────────────────────
#computeContentHash(content) {
return createHash('sha256')
.update(content || '')
.digest('hex')
.substring(0, 16);
}
#readFileContent(filePath) {
try {
return readFileSync(filePath, 'utf-8');
}
catch {
return '';
}
}
#enforceCapacity(projectRoot, db = this.#drizzle) {
try {
db.delete(bootstrapSnapshots)
.where(sql `${bootstrapSnapshots.projectRoot} = ${projectRoot}
AND ${bootstrapSnapshots.id} NOT IN (
SELECT ${bootstrapSnapshots.id} FROM ${bootstrapSnapshots}
WHERE ${bootstrapSnapshots.projectRoot} = ${projectRoot}
ORDER BY ${bootstrapSnapshots.createdAt} DESC
LIMIT ${MAX_SNAPSHOTS}
)`)
.run();
}
catch (err) {
const msg = err instanceof Error ? err.message : String(err);
this.#log(`Capacity enforcement failed: ${msg}`, 'warn');
}
}
#deserialize(row) {
return {
id: row.id,
sessionId: row.sessionId ?? null,
projectRoot: row.projectRoot,
createdAt: row.createdAt,
durationMs: row.durationMs ?? 0,
fileCount: row.fileCount ?? 0,
dimensionCount: row.dimensionCount ?? 0,
candidateCount: row.candidateCount ?? 0,
primaryLang: row.primaryLang ?? null,
fileHashes: this.#safeParseJSON(row.fileHashes, {}),
dimensionMeta: this.#safeParseJSON(row.dimensionMeta, {}),
episodicData: this.#safeParseJSON(row.episodicData, null),
isIncremental: !!row.isIncremental,
parentId: row.parentId ?? null,
changedFiles: this.#safeParseJSON(row.changedFiles, []),
affectedDims: this.#safeParseJSON(row.affectedDims, []),
status: row.status ?? 'complete',
};
}
#safeParseJSON(str, fallback) {
try {
return str ? JSON.parse(str) : fallback;
}
catch {
return fallback;
}
}
#log(msg, level = 'info') {
if (this.#logger && this.#logger[level]) {
this.#logger[level](`[BootstrapSnapshot] ${msg}`);
}
}
}
export default BootstrapSnapshot;