UNPKG

@agentkai/node

Version:

AgentKai的Node.js环境特定实现

471 lines (470 loc) 15.8 kB
import { Logger, } from '@agentkai/core'; import { platform } from '../../platform'; import { HierarchicalNSW } from 'hnswlib-node'; /** * HNSW搜索提供者 * * 这个类使用HNSW算法提供高效的向量搜索功能。与传统实现不同,为避免与底层C++库 * 的内存管理问题,本实现不保留索引实例,而是将记忆和其嵌入向量保存在内存中, * 仅在需要搜索时创建临时索引。此方法虽然每次搜索时有额外开销,但完全避免了 * 内存泄漏和双重释放问题。 */ export class HnswSearchProvider { /** * 创建HNSW搜索提供者 * @param storage 存储提供者,用于加载记忆 * @param embeddingProvider 嵌入向量提供者 * @param dataPath 数据目录路径 * @param indexName 索引名称 */ constructor(storage, embeddingProvider, dataPath, indexName = 'memory') { // 核心依赖 Object.defineProperty(this, "embeddingProvider", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "logger", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "storage", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "fs", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "pathUtils", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "metadataPath", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "dimensions", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "spacetype", { enumerable: true, configurable: true, writable: true, value: 'cosine' }); // 内部状态 Object.defineProperty(this, "initialized", { enumerable: true, configurable: true, writable: true, value: false }); Object.defineProperty(this, "idToIndex", { enumerable: true, configurable: true, writable: true, value: new Map() }); Object.defineProperty(this, "memories", { enumerable: true, configurable: true, writable: true, value: new Map() }); Object.defineProperty(this, "currentCount", { enumerable: true, configurable: true, writable: true, value: 0 }); // 搜索参数 Object.defineProperty(this, "efConstruction", { enumerable: true, configurable: true, writable: true, value: 200 }); Object.defineProperty(this, "efSearch", { enumerable: true, configurable: true, writable: true, value: 50 }); Object.defineProperty(this, "M", { enumerable: true, configurable: true, writable: true, value: 16 }); this.logger = new Logger('HnswSearchProvider'); this.storage = storage; this.embeddingProvider = embeddingProvider; this.fs = platform.fs; this.pathUtils = platform.path; this.dimensions = embeddingProvider.getDimensions(); this.metadataPath = this.pathUtils.join(dataPath, `${indexName}.meta.json`); } /** * 删除记忆 * 实现ISearchProvider接口 * @param id 记忆ID */ async deleteMemory(id) { await this.ensureInitialized(); if (this.idToIndex.has(id)) { const indexId = this.idToIndex.get(id); this.idToIndex.delete(id); this.memories.delete(indexId); await this.saveMetadata(); this.logger.debug(`记忆已从索引中移除, ID: ${id}`); } } /** * 按内容搜索记忆 * @param query 搜索查询文本 * @param options 搜索选项 * @returns 搜索结果 */ async searchByContent(query, options) { try { const vector = await this.embeddingProvider.getEmbedding(query); return await this.searchByVector(vector, options); } catch (error) { this.logger.error('内容搜索失败', error); return { results: [] }; } } /** * 按向量搜索记忆 * @param vector 搜索向量 * @param options 搜索选项 * @returns 搜索结果 */ async searchByVector(vector, options) { await this.ensureInitialized(); const limit = options?.limit || 10; if (this.memories.size === 0) { this.logger.debug('索引为空,无法搜索'); return { results: [] }; } // 验证向量维度 if (vector.length !== this.dimensions) { this.logger.warn(`查询向量维度 ${vector.length} 与索引维度 ${this.dimensions} 不匹配`); return { results: [] }; } try { const results = await this.searchWithTempIndex(vector, limit); return { results, totalCount: results.length, }; } catch (error) { this.logger.error('向量搜索失败', error); return { results: [] }; } } /** * 创建临时索引并执行搜索 * @param vector 查询向量 * @param limit 结果数量限制 * @returns 匹配的记忆数组 */ async searchWithTempIndex(vector, limit) { const tempIndex = new HierarchicalNSW(this.spacetype, this.dimensions); const maxElements = Math.max(this.memories.size + 10, 100); try { // 初始化临时索引 tempIndex.initIndex(maxElements, this.M, this.efConstruction); tempIndex.setEf(this.efSearch); // 将记忆添加到临时索引 for (const [indexId, memory] of this.memories.entries()) { if (memory.embedding?.length === this.dimensions) { tempIndex.addPoint(memory.embedding, indexId); } } // 限制结果数量 const numNeighbors = Math.min(limit, this.memories.size); if (numNeighbors === 0) return []; // 执行KNN搜索 const result = tempIndex.searchKnn(vector, numNeighbors); // 处理搜索结果 return this.processSearchResults(result.neighbors, result.distances); } finally { // 安全释放临时索引资源 this.releaseIndexResource(tempIndex); } } /** * 处理搜索结果 * @param neighbors 邻居索引 * @param distances 距离值 * @returns 处理后的记忆数组 */ processSearchResults(neighbors, distances) { const results = []; for (let i = 0; i < neighbors.length; i++) { const indexId = neighbors[i]; const distance = distances[i]; if (this.memories.has(indexId)) { const memory = this.memories.get(indexId); const similarity = this.distanceToSimilarity(distance); // 添加相似度信息 results.push({ ...memory, metadata: { ...memory.metadata, similarity, }, }); } } return results; } /** * 将距离转换为相似度 * @param distance 距离值 * @returns 相似度值 (0-1) */ distanceToSimilarity(distance) { if (this.spacetype === 'cosine') { return 1 - distance; // 余弦距离转相似度 } else if (this.spacetype === 'ip') { return distance; // 内积就是相似度 } else { // L2空间(欧氏距离)使用高斯核转换 return Math.exp(-distance / 2); } } /** * 安全释放索引资源 * @param index 索引实例 */ releaseIndexResource(index) { try { // 断开原型链引用协助垃圾回收 // @ts-expect-error - 动态操作对象协助垃圾回收 index.__proto__ = null; } catch { // 忽略可能的错误 } } /** * 确保已初始化 */ async ensureInitialized() { if (!this.initialized) { await this.initialize(); } } /** * 初始化搜索提供者 * 加载记忆元数据 */ async initialize() { if (this.initialized) return; this.logger.info(`初始化HNSW搜索提供者, 元数据路径: ${this.metadataPath}`); try { // 加载元数据 await this.loadMetadata(); // 如果没有加载到记忆,则尝试重建索引 if (this.memories.size === 0) { this.logger.info('未找到有效记忆,将重建索引'); await this.rebuildIndex(); } else { this.logger.info(`成功加载元数据,包含 ${this.memories.size} 条记忆`); } this.initialized = true; } catch (error) { this.logger.error('初始化搜索提供者失败', error); throw error; } } /** * 添加记忆到搜索索引 * @param memory 要添加的记忆 */ async addMemory(memory) { if (!memory.embedding || memory.embedding.length === 0) { this.logger.warn(`记忆 ${memory.id} 没有嵌入向量,无法添加`); return; } await this.ensureInitialized(); try { // 如果记忆已存在,先删除 if (this.idToIndex.has(memory.id)) { await this.deleteMemory(memory.id); } // 添加到映射 const indexId = this.currentCount++; this.idToIndex.set(memory.id, indexId); this.memories.set(indexId, memory); // 保存元数据 await this.saveMetadata(); this.logger.debug(`记忆已添加, ID: ${memory.id}`); } catch (error) { this.logger.error(`添加记忆失败, ID: ${memory.id}:`, error); } } /** * 更新记忆 * @param memory 更新后的记忆 */ async updateMemory(memory) { await this.addMemory(memory); } /** * 移除记忆 * @param id 记忆ID */ async removeMemory(id) { await this.deleteMemory(id); } /** * 清空索引 */ async clear() { this.logger.info('清空索引'); try { // 重置所有内存数据 this.idToIndex.clear(); this.memories.clear(); this.currentCount = 0; // 保存空元数据 await this.saveMetadata(); this.logger.info('索引已清空'); } catch (error) { this.logger.error('清空索引失败', error); } } /** * 保存元数据到文件 */ async saveMetadata() { try { // 确保目录存在 const dir = this.pathUtils.dirname(this.metadataPath); if (!(await this.fs.exists(dir))) { await this.fs.mkdir(dir, { recursive: true }); } // 构造元数据对象 const metadata = { dimensions: this.dimensions, count: this.currentCount, spacetype: this.spacetype, idToIndex: Array.from(this.idToIndex.entries()), memories: Array.from(this.memories.entries()).map(([id, memory]) => [ String(id), memory, ]), }; // 保存到文件 await this.fs.writeFile(this.metadataPath, JSON.stringify(metadata)); this.logger.debug(`元数据已保存到: ${this.metadataPath}`); } catch (error) { this.logger.error(`保存元数据失败:`, error); throw error; } } /** * 从文件加载元数据 */ async loadMetadata() { // 重置数据 this.idToIndex.clear(); this.memories.clear(); this.currentCount = 0; // 检查文件是否存在 if (!(await this.fs.exists(this.metadataPath))) { this.logger.info(`元数据文件不存在: ${this.metadataPath}`); return; } try { // 读取并解析元数据 const metadataStr = await this.fs.readFile(this.metadataPath); const metadata = JSON.parse(metadataStr); // 恢复基本属性 this.dimensions = metadata.dimensions || this.dimensions; this.currentCount = metadata.count || 0; this.spacetype = metadata.spacetype || 'cosine'; // 恢复ID映射 if (Array.isArray(metadata.idToIndex)) { this.idToIndex = new Map(metadata.idToIndex); } // 恢复记忆数据 if (Array.isArray(metadata.memories)) { for (const [indexId, memory] of metadata.memories) { this.memories.set(parseInt(indexId), memory); } } this.logger.info(`加载了 ${this.memories.size} 条记忆的元数据`); } catch (error) { this.logger.error(`加载元数据失败:`, error); // 确保数据一致性 this.idToIndex.clear(); this.memories.clear(); this.currentCount = 0; } } /** * 重建索引 * 加载所有记忆并重建元数据 */ async rebuildIndex() { this.logger.info('开始重建索引'); try { // 清空内存数据 this.idToIndex.clear(); this.memories.clear(); this.currentCount = 0; // 获取所有记忆 const memories = await this.storage.list(); // 筛选有嵌入向量的记忆 const validMemories = memories.filter((memory) => memory.embedding?.length === this.dimensions); this.logger.info(`找到 ${validMemories.length} 条有效记忆用于索引构建`); // 添加到内存映射 for (let i = 0; i < validMemories.length; i++) { const memory = validMemories[i]; const indexId = i; this.idToIndex.set(memory.id, indexId); this.memories.set(indexId, memory); } this.currentCount = validMemories.length; // 保存元数据 await this.saveMetadata(); this.logger.info('索引重建完成'); } catch (error) { this.logger.error('重建索引失败', error); throw error; } } }