UNPKG

autosnippet

Version:

Extract code patterns into a knowledge base for AI coding assistants

337 lines (336 loc) 13.8 kB
/** * KnowledgeSyncService — 将 .md 文件增量同步到 SQLite DB(knowledge_entries 表) * * 统一替代 SyncService (Recipe) + CandidateSyncService。 * * 设计原则: * - .md 文件 = 完整唯一数据源(Source of Truth),DB = 索引缓存 * - 通过 contentHash 检测手写/手改 .md → 进入违规统计(audit_logs) * - 孤儿 Entry(DB 有但 .md 不存在)→ 自动标记 deprecated * - 同时扫描 AutoSnippet/candidates/ 和 AutoSnippet/recipes/ 两个目录 * * 使用方式: * - CLI: `asd sync` 委托调用 * - 内部: SetupService.stepDatabase() 委托调用(skipViolations = true) */ import { randomUUID } from 'node:crypto'; import fs from 'node:fs'; import path from 'node:path'; import { CANDIDATES_DIR, RECIPES_DIR } from '../infrastructure/config/Defaults.js'; import Logger from '../infrastructure/logging/Logger.js'; import { unwrapRawDb } from '../repository/search/SearchRepoAdapter.js'; import { RawDbSyncAdapter } from '../repository/sync/SyncRepoAdapter.js'; import { computeKnowledgeHash, parseKnowledgeMarkdown, } from '../service/knowledge/KnowledgeFileWriter.js'; export class KnowledgeSyncService { candidatesDir; logger; projectRoot; recipesDir; #sourceRefReconciler; constructor(projectRoot, options) { this.projectRoot = projectRoot; this.recipesDir = path.join(projectRoot, RECIPES_DIR); this.candidatesDir = path.join(projectRoot, CANDIDATES_DIR); this.logger = Logger.getInstance(); this.#sourceRefReconciler = options?.sourceRefReconciler ?? null; } /** * 完整同步入口 — sync + reconcile + repair * * asd sync CLI 和 asd ui 启动都调用此方法。 * * @param db better-sqlite3 原始句柄 * @param opts 同步选项 * @returns 包含 sync + reconcile + repair 报告的综合结果 */ async syncAll(db, opts = {}) { // 1. .md → DB 同步 const syncReport = this.sync(db, opts); const report = { ...syncReport }; // 2. 填充/验证 recipe_source_refs 桥接表 try { const reconciler = this.#sourceRefReconciler; if (!reconciler) { this.logger.warn('KnowledgeSyncService: sourceRefReconciler not available, skipping reconcile'); return report; } report.reconcileReport = await reconciler.reconcile({ force: opts.force }); // 3. git rename 修复 report.repairReport = await reconciler.repairRenames(); // 4. 写回修复 if (report.repairReport.renamed > 0) { report.applyReport = await reconciler.applyRepairs(); } } catch (err) { this.logger.warn('KnowledgeSyncService: sourceRef reconcile failed (non-blocking)', { error: err.message, }); } return report; } /** * 执行增量同步:.md → DB(knowledge_entries 表) * * 同时扫描 candidates/ 和 recipes/ 两个目录。 * * @param db better-sqlite3 原始句柄或 DatabaseConnection * @param [opts.dryRun=false] 只报告不写入 * @param [opts.force=false] 忽略 hash,强制覆盖 * @param [opts.skipViolations=false] 跳过违规记录(setup 场景) * @returns } */ sync(db, opts = {}) { const { dryRun = false, force = false, skipViolations = false } = opts; const report = { synced: 0, created: 0, updated: 0, violations: [], // 手动编辑的文件列表 orphaned: [], // DB 有但 .md 不存在 skipped: 0, }; // ── 1. 收集 .md 文件(两个目录) ── const mdFiles = [ ...this._collectMdFiles(this.candidatesDir, CANDIDATES_DIR), ...this._collectMdFiles(this.recipesDir, RECIPES_DIR), ]; if (mdFiles.length === 0) { this.logger.info('KnowledgeSyncService: no .md files found'); return report; } // ── 2. 创建仓储适配器 ── const rawDb = unwrapRawDb(db); const repo = new RawDbSyncAdapter(rawDb); const upsertStmt = dryRun ? null : repo.createUpsertStmt(this._upsertCols()); const auditStmt = dryRun || skipViolations ? null : repo.createAuditInsertStmt(); // ── 3. 逐文件同步 ── const syncedIds = new Set(); for (const { absPath, relPath } of mdFiles) { try { const content = fs.readFileSync(absPath, 'utf8'); const parsed = parseKnowledgeMarkdown(content, relPath); if (!parsed.id) { this.logger.warn(`KnowledgeSyncService: skip file without id — ${relPath}`); report.skipped++; continue; } syncedIds.add(parsed.id); // ── 检测手动编辑 ── const actualHash = computeKnowledgeHash(content); const storedHash = parsed.contentHash; const isManualEdit = storedHash && storedHash !== actualHash && !force; if (isManualEdit) { report.violations.push(relPath); if (auditStmt) { this._logViolation(auditStmt, parsed.id, relPath, storedHash, actualHash); } } // ── upsert ── if (!dryRun) { const existed = repo.entryExists(parsed.id); const row = this._buildDbRow(parsed, relPath, content); upsertStmt?.run(...Object.values(row)); if (existed) { report.updated++; } else { report.created++; } } report.synced++; } catch (err) { this.logger.error(`KnowledgeSyncService: failed to sync ${relPath}`, { error: err.message, }); report.skipped++; } } // ── 4. 检测孤儿 ── report.orphaned = this._detectOrphans(repo, syncedIds, dryRun); this.logger.info('KnowledgeSyncService: sync complete', { synced: report.synced, created: report.created, updated: report.updated, violations: report.violations.length, orphaned: report.orphaned.length, skipped: report.skipped, }); return report; } /* ═══ 文件收集 ═══════════════════════════════════════════ */ /** * 递归收集指定目录下所有 .md 文件(跳过 _ 前缀模板) * @param dir 绝对目录路径 * @param prefix 相对路径前缀 (e.g. 'AutoSnippet/candidates') * @returns []} */ _collectMdFiles(dir, prefix) { if (!fs.existsSync(dir)) { return []; } const results = []; const walk = (curDir, base) => { for (const entry of fs.readdirSync(curDir, { withFileTypes: true })) { const full = path.join(curDir, entry.name); const rel = base ? `${base}/${entry.name}` : entry.name; if (entry.isDirectory()) { walk(full, rel); } else if (entry.name.endsWith('.md') && !entry.name.startsWith('_')) { results.push({ absPath: full, relPath: `${prefix}/${rel}`, }); } } }; walk(dir, ''); return results; } /* ═══ DB 操作 ═══════════════════════════════════════════ */ /** * 从 parseKnowledgeMarkdown 的结果构建 DB row * wire format → DB 列映射(与 KnowledgeRepository.impl 对齐) */ _buildDbRow(parsed, relPath, rawContent) { const now = Math.floor(Date.now() / 1000); // 内容 hash const contentHash = computeKnowledgeHash(rawContent); return { id: parsed.id, title: parsed.title || '', trigger: parsed.trigger || '', description: parsed.description || '', lifecycle: parsed.lifecycle || 'pending', lifecycleHistory: JSON.stringify(parsed.lifecycleHistory || []), autoApprovable: parsed.autoApprovable ? 1 : 0, language: parsed.language || 'unknown', category: parsed.category || 'general', kind: parsed.kind || 'pattern', knowledgeType: parsed.knowledgeType || 'code-pattern', complexity: parsed.complexity || 'intermediate', scope: parsed.scope || 'universal', difficulty: parsed.difficulty || null, tags: JSON.stringify(parsed.tags || []), content: JSON.stringify(parsed.content || {}), relations: JSON.stringify(parsed.relations || {}), constraints: JSON.stringify(parsed.constraints || {}), reasoning: JSON.stringify(parsed.reasoning || {}), quality: JSON.stringify(parsed.quality || {}), stats: JSON.stringify(parsed.stats || {}), headers: JSON.stringify(parsed.headers || []), headerPaths: JSON.stringify(parsed.headerPaths || []), moduleName: parsed.moduleName || '', includeHeaders: parsed.includeHeaders ? 1 : 0, topicHint: parsed.topicHint || null, whenClause: parsed.whenClause || null, doClause: parsed.doClause || null, dontClause: parsed.dontClause || null, coreCode: parsed.coreCode || null, agentNotes: parsed.agentNotes ? JSON.stringify(parsed.agentNotes) : null, aiInsight: parsed.aiInsight || null, reviewedBy: parsed.reviewedBy || null, reviewedAt: parsed.reviewedAt || null, rejectionReason: parsed.rejectionReason || null, source: parsed.source || 'file-sync', sourceFile: relPath, sourceCandidateId: parsed.sourceCandidateId || null, createdBy: parsed.createdBy || 'file-sync', createdAt: parsed.createdAt || now, updatedAt: parsed.updatedAt || now, publishedAt: parsed.publishedAt || null, publishedBy: parsed.publishedBy || null, contentHash: contentHash, }; } /** UPSERT 使用的列名列表 */ _upsertCols() { return [ 'id', 'title', 'trigger', 'description', 'lifecycle', 'lifecycleHistory', 'autoApprovable', 'language', 'category', 'kind', 'knowledgeType', 'complexity', 'scope', 'difficulty', 'tags', 'content', 'relations', 'constraints', 'reasoning', 'quality', 'stats', 'headers', 'headerPaths', 'moduleName', 'includeHeaders', 'topicHint', 'whenClause', 'doClause', 'dontClause', 'coreCode', 'agentNotes', 'aiInsight', 'reviewedBy', 'reviewedAt', 'rejectionReason', 'source', 'sourceFile', 'sourceCandidateId', 'createdBy', 'createdAt', 'updatedAt', 'publishedAt', 'publishedBy', 'contentHash', ]; } /* ═══ 违规记录 ═══════════════════════════════════════════ */ _logViolation(stmt, entryId, filePath, expectedHash, actualHash) { try { stmt.run(randomUUID(), Math.floor(Date.now() / 1000), 'sync', JSON.stringify({ source: 'cli' }), 'manual_knowledge_edit', entryId, JSON.stringify({ file: filePath, expectedHash, actualHash }), 'violation_detected', null, 0); } catch (err) { this.logger.warn('KnowledgeSyncService: failed to log violation', { entryId, error: err.message, }); } } /* ═══ 孤儿检测 ═══════════════════════════════════════════ */ /** * 检测 DB 中存在但 .md 已删除的 Entry → 标记 deprecated * @returns 孤儿 entry id 列表 */ _detectOrphans(repo, syncedIds, dryRun) { const orphanIds = []; try { const rows = repo.findActiveEntriesWithSourceFile(); for (const row of rows) { if (!syncedIds.has(row.id)) { orphanIds.push(row.id); if (!dryRun) { const now = Math.floor(Date.now() / 1000); repo.deprecateEntry(row.id, '源文件已删除(孤儿条目)', now); } } } } catch (err) { this.logger.warn('KnowledgeSyncService: orphan detection failed', { error: err.message, }); } return orphanIds; } } export default KnowledgeSyncService;