UNPKG

autosnippet

Version:

Extract code patterns into a knowledge base for AI coding assistants

306 lines (305 loc) 11.5 kB
/** * SyncCoordinator — 知识 CRUD → 向量索引事件驱动同步 * * 监听 EventBus 的 `knowledge:changed` 事件, * debounce 合并后批量执行 chunk → embed → upsert/remove。 * * 设计: * - 2s debounce 窗口内合并多个 CRUD 事件 * - maxBatchSize(20) 达到时立即触发 * - 启动时可执行一次 DB↔Vector 对账 * * @module service/vector/SyncCoordinator */ import { ne } from 'drizzle-orm'; import { knowledgeEntries } from '../../infrastructure/database/drizzle/schema.js'; import Logger from '../../infrastructure/logging/Logger.js'; import { queryNonDeprecatedEntries } from '../../repository/search/SearchRepoAdapter.js'; // ── Coordinator ── export class SyncCoordinator { #vectorStore; #embedProvider; #contextualEnricher; #debounceMs; #maxBatchSize; #drizzle; #pendingChanges = new Map(); #debounceTimer = null; #processing = false; #logger = Logger.getInstance(); #eventBus = null; #boundHandler = null; constructor(config) { this.#vectorStore = config.vectorStore; this.#embedProvider = config.embedProvider; this.#contextualEnricher = config.contextualEnricher; this.#debounceMs = config.debounceMs; this.#maxBatchSize = config.maxBatchSize ?? 20; this.#drizzle = config.drizzle ?? null; } /** 绑定 EventBus,开始监听知识变更事件 */ bindEventBus(eventBus) { this.#eventBus = eventBus; this.#boundHandler = (data) => { this.#onKnowledgeChanged(data); }; eventBus.on('knowledge:changed', this.#boundHandler); eventBus.on('knowledge:deleted', (data) => { const d = data; const entryId = d.entryId || d.id; if (entryId) { this.#enqueue({ type: 'remove', entryId, timestamp: Date.now(), }); } }); this.#logger.info('[SyncCoordinator] Bound to EventBus'); } /** 手动触发立即刷入(用于测试或 shutdown 前确保数据落盘) */ async flush() { if (this.#debounceTimer) { clearTimeout(this.#debounceTimer); this.#debounceTimer = null; } await this.#processBatch(); } /** * 启动对账: 比较 DB knowledge_entries 与向量索引,修复不一致 * - 孤儿向量 (在索引中但 DB 无对应) → 删除 * - 缺失向量 (在 DB 中但索引无对应) → 加入待同步队列 * * @param db - 数据库连接 (better-sqlite3 style) * @returns 对账结果 */ async reconcile(db) { const result = { orphansRemoved: 0, missingSynced: 0, errors: [] }; try { // 1. 获取向量索引中所有 ID const vectorIds = new Set(await this.#vectorStore.listIds()); // 2. 获取 DB 中所有 active 知识条目 ID let dbEntries = []; try { if (this.#drizzle) { // Drizzle 类型安全查询 dbEntries = this.#drizzle .select({ id: knowledgeEntries.id, title: knowledgeEntries.title, content: knowledgeEntries.content, kind: knowledgeEntries.kind, }) .from(knowledgeEntries) .where(ne(knowledgeEntries.lifecycle, 'deprecated')) .all(); } else if (db) { // 向后兼容: 测试时可传入 mock db dbEntries = queryNonDeprecatedEntries(db); } else { return result; } } catch { // 表可能不存在 return result; } const dbIdSet = new Set(dbEntries.map((e) => `entry_${e.id}`)); // 3. 找孤儿向量 (在索引中但 DB 无对应的 entry_ 前缀记录) for (const vectorId of vectorIds) { if (vectorId.startsWith('entry_') && !dbIdSet.has(vectorId)) { try { await this.#vectorStore.remove(vectorId); result.orphansRemoved++; } catch { // 删除失败不阻塞 } } } // 4. 找缺失向量 (在 DB 中但索引无对应) for (const entry of dbEntries) { const expectedId = `entry_${entry.id}`; if (!vectorIds.has(expectedId)) { this.#enqueue({ type: 'upsert', entryId: entry.id, title: entry.title, content: entry.content, kind: entry.kind, timestamp: Date.now(), }); result.missingSynced++; } } // 立即处理缺失的 if (result.missingSynced > 0) { await this.flush(); } this.#logger.info('[SyncCoordinator] Reconciliation complete', { orphansRemoved: result.orphansRemoved, missingSynced: result.missingSynced, }); } catch (err) { result.errors.push(err instanceof Error ? err.message : String(err)); } return result; } /** 销毁: 清理定时器和事件监听 */ destroy() { if (this.#debounceTimer) { clearTimeout(this.#debounceTimer); this.#debounceTimer = null; } if (this.#eventBus && this.#boundHandler) { this.#eventBus.off('knowledge:changed', this.#boundHandler); this.#boundHandler = null; } this.#pendingChanges.clear(); this.#logger.info('[SyncCoordinator] Destroyed'); } // ═══ Private ═══ #onKnowledgeChanged(data) { const d = data; const entryId = d.entryId || d.id || d.entry?.id; if (!entryId) { return; } if (d.action === 'delete') { this.#enqueue({ type: 'remove', entryId, timestamp: Date.now() }); } else { this.#enqueue({ type: 'upsert', entryId, title: d.entry?.title, content: d.entry?.content, kind: d.entry?.kind, timestamp: Date.now(), }); } } #enqueue(change) { // 同一 entryId 的后续操作覆盖前一个(最终一致性) this.#pendingChanges.set(change.entryId, change); // 达到批量上限时立即触发 if (this.#pendingChanges.size >= this.#maxBatchSize) { if (this.#debounceTimer) { clearTimeout(this.#debounceTimer); this.#debounceTimer = null; } this.#processBatch().catch((err) => { this.#logger.warn('[SyncCoordinator] processBatch error', { error: err instanceof Error ? err.message : String(err), }); }); return; } // debounce if (this.#debounceTimer) { clearTimeout(this.#debounceTimer); } this.#debounceTimer = setTimeout(() => { this.#debounceTimer = null; this.#processBatch().catch((err) => { this.#logger.warn('[SyncCoordinator] processBatch error', { error: err instanceof Error ? err.message : String(err), }); }); }, this.#debounceMs); } async #processBatch() { if (this.#processing || this.#pendingChanges.size === 0) { return; } this.#processing = true; const batch = new Map(this.#pendingChanges); this.#pendingChanges.clear(); try { const upserts = []; const removes = []; for (const change of batch.values()) { if (change.type === 'remove') { removes.push(change.entryId); } else { upserts.push(change); } } // 处理删除 for (const entryId of removes) { try { await this.#vectorStore.remove(`entry_${entryId}`); } catch { // 删除失败不阻塞 } } // 处理 upsert: 提取文本 → embed → upsert if (upserts.length > 0) { const validUpserts = upserts.filter((u) => u.title || u.content); if (validUpserts.length > 0) { const texts = validUpserts.map((u) => this.#extractText(u)); try { const embedResult = await this.#embedProvider.embed(texts); const vectors = Array.isArray(embedResult[0]) ? embedResult : [embedResult]; const items = validUpserts.map((u, i) => ({ id: `entry_${u.entryId}`, content: texts[i], vector: vectors[i] || [], metadata: { entryId: u.entryId, title: u.title || '', kind: u.kind || 'unknown', source: 'event_sync', updatedAt: Date.now(), }, })); await this.#vectorStore.batchUpsert(items); } catch (err) { this.#logger.warn('[SyncCoordinator] batch embed/upsert failed', { count: validUpserts.length, error: err instanceof Error ? err.message : String(err), }); } } } this.#logger.info('[SyncCoordinator] Batch processed', { upserted: upserts.length, removed: removes.length, }); } finally { this.#processing = false; // 处理期间可能有新的变更入队 if (this.#pendingChanges.size > 0) { this.#enqueue(this.#pendingChanges.values().next().value); } } } #extractText(change) { const parts = []; if (change.title) { parts.push(change.title); } if (typeof change.content === 'string') { parts.push(change.content); } else if (change.content && typeof change.content === 'object') { const c = change.content; if (typeof c.body === 'string') { parts.push(c.body); } if (typeof c.code === 'string') { parts.push(c.code); } } return parts.join('\n\n') || change.entryId; } }