UNPKG

autosnippet

Version:

Extract code patterns into a knowledge base for AI coding assistants

160 lines (159 loc) 5.07 kB
/** * MemoryEmbeddingStore — 向量嵌入的 JSON sidecar 存储 * * 将 Agent Memory 的向量嵌入从 SQLite BLOB 迁移到独立 JSON 文件, * 与 Knowledge 侧 HNSW `.asvec` 的设计理念对齐: * **结构化数据存 SQLite,向量存独立文件。** * * 设计: * - 内存 Map<id, number[]> 缓存,启动时一次性加载 * - 写入时更新内存 + debounced flush 到 JSON * - 崩溃丢失可通过 embedAllMemories() backfill 恢复 * * 文件位置: .autosnippet/context/memory_embeddings.json * * @module MemoryEmbeddingStore */ import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs'; import { dirname, join } from 'node:path'; /** debounce flush 延迟 (ms) */ const FLUSH_DELAY_MS = 2000; export class MemoryEmbeddingStore { /** 内存缓存: id → embedding vector */ #cache = new Map(); /** JSON 文件路径 */ #filePath; /** debounce timer */ #flushTimer = null; /** dirty flag */ #dirty = false; /** * @param projectRoot 项目根目录 * @param opts.filePath 覆盖默认文件路径 (测试用) */ constructor(projectRoot, opts) { this.#filePath = opts?.filePath ?? join(projectRoot, '.autosnippet', 'context', 'memory_embeddings.json'); this.#load(); } /** 获取单条 embedding */ get(id) { return this.#cache.get(id) ?? null; } /** 设置单条 embedding */ set(id, embedding) { this.#cache.set(id, embedding); this.#scheduleDirtyFlush(); } /** 批量设置 embeddings */ batchSet(entries) { let count = 0; for (const { id, embedding } of entries) { this.#cache.set(id, embedding); count++; } if (count > 0) { this.#scheduleDirtyFlush(); } return count; } /** 删除单条 embedding */ delete(id) { const existed = this.#cache.delete(id); if (existed) { this.#scheduleDirtyFlush(); } return existed; } /** 检查是否有 embedding */ has(id) { return this.#cache.has(id); } /** 返回所有缺少 embedding 的 ID (给定候选 ID 列表) */ getMissingIds(candidateIds) { return candidateIds.filter((id) => !this.#cache.has(id)); } /** 缓存大小 */ get size() { return this.#cache.size; } /** 清除所有 embeddings (用于重建) */ clear() { this.#cache.clear(); this.#scheduleDirtyFlush(); } /** 立即刷写到磁盘 (shutdown / 测试用) */ flushSync() { if (this.#flushTimer) { clearTimeout(this.#flushTimer); this.#flushTimer = null; } if (!this.#dirty) { return; } this.#writeFile(); this.#dirty = false; } /** GC: 移除不在给定 ID 集合中的 embeddings */ gc(activeIds) { let removed = 0; for (const id of this.#cache.keys()) { if (!activeIds.has(id)) { this.#cache.delete(id); removed++; } } if (removed > 0) { this.#scheduleDirtyFlush(); } return removed; } // ═══════════════════════════════════════════════════════════ // Private // ═══════════════════════════════════════════════════════════ #load() { try { if (existsSync(this.#filePath)) { const raw = readFileSync(this.#filePath, 'utf-8'); const data = JSON.parse(raw); for (const [id, vec] of Object.entries(data)) { if (Array.isArray(vec)) { this.#cache.set(id, vec); } } } } catch { // 文件不存在或解析失败 → 空缓存,后续 backfill 会重建 } } #writeFile() { try { const dir = dirname(this.#filePath); if (!existsSync(dir)) { mkdirSync(dir, { recursive: true }); } const obj = {}; for (const [id, vec] of this.#cache) { obj[id] = vec; } writeFileSync(this.#filePath, JSON.stringify(obj), 'utf-8'); } catch { // 写入失败不阻塞运行时;下次 flush 或 backfill 会重试 } } #scheduleDirtyFlush() { this.#dirty = true; if (this.#flushTimer) { return; // 已有 pending timer } this.#flushTimer = setTimeout(() => { this.#flushTimer = null; if (this.#dirty) { this.#writeFile(); this.#dirty = false; } }, FLUSH_DELAY_MS); } }