UNPKG

autosnippet

Version:

Extract code patterns into a knowledge base for AI coding assistants

276 lines (275 loc) 8.44 kB
/** * AsyncPersistence — WAL (Write-Ahead Log) + 异步写入 * * 设计: * - 写操作先追加到 WAL 文件 (NDJSON + CRC32), 再应用到内存 * - 定时 (2s) 或积累 100 条操作后 flush: 写入完整 .asvec + 清理 WAL * - 启动时: 加载 .asvec 主文件, 然后 replay WAL 中未刷盘的操作 * - WAL 条目带 CRC32 校验, 损坏条目跳过 (数据最终由 .asvec 兜底) * * WAL 格式 (NDJSON): * 每行: JSON\tCRC32_HEX\n * JSON: { "t": 1, "id": "doc_1", "c": "content", "v": [...], "m": {...} } * t=1: upsert, t=2: remove, t=3: clear * * @module infrastructure/vector/AsyncPersistence */ import { appendFileSync, existsSync, mkdirSync, readFileSync, unlinkSync } from 'node:fs'; import { dirname } from 'node:path'; // ── WAL 操作类型 ── export const WAL_OP = Object.freeze({ UPSERT: 1, REMOVE: 2, CLEAR: 3, }); /** * CRC32 校验 (ISO 3309 / ITU-T V.42 polynomial) * 纯 JS 实现, 零依赖 */ const CRC32_TABLE = (() => { const table = new Uint32Array(256); for (let i = 0; i < 256; i++) { let crc = i; for (let j = 0; j < 8; j++) { crc = crc & 1 ? (crc >>> 1) ^ 0xedb88320 : crc >>> 1; } table[i] = crc; } return table; })(); /** * 计算字符串的 CRC32 校验值 * @returns 8 位十六进制字符串 */ function crc32(str) { const bytes = Buffer.from(str, 'utf-8'); let crc = 0xffffffff; for (let i = 0; i < bytes.length; i++) { crc = (crc >>> 8) ^ CRC32_TABLE[(crc ^ bytes[i]) & 0xff]; } return ((crc ^ 0xffffffff) >>> 0).toString(16).padStart(8, '0'); } export class AsyncPersistence { /** 主索引文件路径 (.asvec) */ #indexPath; /** WAL 文件路径 (.wal) */ #walPath; /** 待刷盘操作队列 */ #pendingOps = []; #flushTimer = null; #flushing = false; /** flush 间隔 (ms) */ #flushIntervalMs; /** 触发立即 flush 的操作数 */ #flushBatchSize; /** 外部提供的 persist 回调: () => Promise<void> */ #onPersist; /** 外部提供的 replay 回调: (op) => void */ #onReplay; /** WAL 是否启用 */ #enabled; /** * @param options.indexPath 主索引文件路径 (.asvec) * @param options.onPersist persist 回调: async () => void (写完整 .asvec) * @param options.onReplay replay 回调: (op) => void (重放单条操作) * @param [options.enabled=true] 是否启用 WAL */ constructor(options) { this.#indexPath = options.indexPath; this.#walPath = options.indexPath.replace(/\.asvec$/, '.wal'); this.#onPersist = options.onPersist; this.#onReplay = options.onReplay; this.#enabled = options.enabled !== false; this.#flushIntervalMs = options.flushIntervalMs || 2000; this.#flushBatchSize = options.flushBatchSize || 100; // 确保目录存在 const dir = dirname(this.#walPath); if (!existsSync(dir)) { mkdirSync(dir, { recursive: true }); } } /** WAL 文件路径 (供外部测试/调试) */ get walPath() { return this.#walPath; } /** 当前待刷盘操作数量 */ get pendingCount() { return this.#pendingOps.length; } /** 是否正在刷盘 */ get isFlushing() { return this.#flushing; } /** * 追加操作到 WAL * 操作同时写入磁盘 WAL 文件 (append) 和内存队列 * * @param op WAL 操作 * @param op.t 操作类型: 1=upsert, 2=remove, 3=clear * @param [op.id] 文档 ID * @param [op.c] 内容 (upsert) * @param [op.v] 向量 (upsert) * @param [op.m] metadata (upsert) */ appendWal(op) { if (!this.#enabled) { return; } this.#pendingOps.push(op); this.#writeWalEntry(op); this.#scheduleFlush(); } /** * 将单条 WAL 条目追加到磁盘 WAL 文件 * 格式: JSON\tCRC32_HEX\n */ #writeWalEntry(op) { try { const json = JSON.stringify(op); const checksum = crc32(json); const entry = `${json}\t${checksum}\n`; appendFileSync(this.#walPath, entry, 'utf-8'); } catch { // 写入失败非致命: 操作已在内存队列, flush 时会写入完整文件 } } /** 调度 flush (debounced) */ #scheduleFlush() { if (this.#flushing) { return; } // 积累够多操作时立即 flush if (this.#pendingOps.length >= this.#flushBatchSize) { this.#doFlush(); return; } // 否则 debounced if (this.#flushTimer) { return; } this.#flushTimer = setTimeout(() => { this.#flushTimer = null; this.#doFlush(); }, this.#flushIntervalMs); if (this.#flushTimer?.unref) { this.#flushTimer.unref(); } } /** 执行 flush: 写入完整 .asvec + 清理 WAL */ async #doFlush() { if (this.#flushing) { return; } if (this.#pendingOps.length === 0) { return; } this.#flushing = true; const ops = this.#pendingOps.splice(0); try { // 调用外部 persist 回调写入完整 .asvec await this.#onPersist(); // 成功后清理 WAL 文件 this.#clearWal(); } catch { // persist 失败: WAL 文件保留, 下次启动时可以 replay // 将 ops 放回队列头部 this.#pendingOps.unshift(...ops); } finally { this.#flushing = false; } } /** 手动触发 flush (用于关闭/测试) */ async flush() { // 取消待执行的定时器 if (this.#flushTimer) { clearTimeout(this.#flushTimer); this.#flushTimer = null; } await this.#doFlush(); } /** * 启动时恢复: 读取 WAL 文件, replay 有效条目 * WAL 条目带 CRC32 校验, 损坏条目跳过 * * @returns } */ recover() { if (!this.#enabled) { return { replayed: 0, skipped: 0 }; } if (!existsSync(this.#walPath)) { return { replayed: 0, skipped: 0 }; } let replayed = 0; let skipped = 0; try { const content = readFileSync(this.#walPath, 'utf-8'); const lines = content.split('\n').filter((l) => l.length > 0); for (const line of lines) { const tabIdx = line.lastIndexOf('\t'); if (tabIdx === -1) { skipped++; continue; } const json = line.slice(0, tabIdx); const expectedCrc = line.slice(tabIdx + 1); // CRC 校验 const actualCrc = crc32(json); if (actualCrc !== expectedCrc) { skipped++; continue; } // 解析并 replay try { const op = JSON.parse(json); this.#onReplay(op); replayed++; } catch { skipped++; } } // replay 完成后清理 WAL if (replayed > 0 || skipped > 0) { this.#clearWal(); } } catch { // WAL 文件读取失败, 跳过恢复 } return { replayed, skipped }; } /** 清理 WAL 文件 */ #clearWal() { try { if (existsSync(this.#walPath)) { unlinkSync(this.#walPath); } } catch { // 删除失败非致命 } } /** 销毁: 清理定时器 */ destroy() { if (this.#flushTimer) { clearTimeout(this.#flushTimer); this.#flushTimer = null; } } /** * 同步 flush (用于进程退出时) * 注意: 只清理定时器, 不执行实际 persist (由调用方负责) */ destroySync() { if (this.#flushTimer) { clearTimeout(this.#flushTimer); this.#flushTimer = null; } } } // 导出 CRC32 工具函数 (用于测试) export { crc32 };