UNPKG

@n2flowjs/nbase

Version:

Neural Vector Database for efficient similarity search

1,062 lines 46.9 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); const fs_1 = require("fs"); const profiling_1 = require("../utils/profiling"); const log_1 = require("../utils/log"); /** * Hierarchical Navigable Small World (HNSW) graph for approximate nearest neighbor search * Optimized for performance */ /** * Hierarchical Navigable Small World (HNSW) index implementation for approximate nearest neighbor search. * * HNSW is an algorithm for efficient approximate nearest neighbor search in high-dimensional spaces. * It creates a multi-layered graph structure that allows for faster search by navigating through * a hierarchy of increasingly dense graphs. * * Key features: * - Dimension-aware mode: Optimizes searches for vectors of the same dimension * - Efficient incremental updates: Add/remove vectors without rebuilding the entire index * - Configurable precision via efConstruction and efSearch parameters * - Soft deletion support: Vectors can be marked for deletion without rebuilding * - Serialization/deserialization for persistent storage * * The implementation is optimized for both memory efficiency and search performance, * with specialized handling for different vector dimensions when in dimension-aware mode. * * @example * ```typescript * // Create a new HNSW index * const hnsw = new HNSW(vectorDatabase, { * M: 16, // Max connections per node (default: 16) * efConstruction: 200, // Size of dynamic candidate list during construction (default: 200) * efSearch: 50, // Size of dynamic candidate list during search (default: 50) * dimensionAware: true // Whether to optimize for vectors of the same dimension (default: true) * }); * * // Build the index with all vectors in the database * await hnsw.buildIndex({ * progressCallback: (progress) => console.log(`Indexing: ${progress * 100}%`) * }); * * // Search for nearest neighbors * const results = hnsw.findNearest(queryVector, 10); * ``` */ class HNSW { constructor(db, options = {}) { this.db = db; // Set HNSW parameters this.M = options.M || 16; this.efConstruction = options.efConstruction || 200; this.efSearch = options.efSearch || 50; this.maxLevel = options.maxLevel || 16; this.levelProbability = options.levelProbability || 0.5; this.entryPointId = options.entryPointId || null; this.dimensionAware = options.dimensionAware !== false; // Customizable distance function - Inlined Euclidean Distance for performance this.distanceFunc = (a, b) => { let sum = 0; const len = Math.min(a.length, b.length); // Inlined loop for performance for (let i = 0; i < len; i++) { const diff = a[i] - b[i]; sum += diff * diff; } // Dimension penalty - only if dimensionAware if (this.dimensionAware) { const dimDiff = Math.abs(a.length - b.length); if (dimDiff > 0) { sum += dimDiff * 0.01; } } return Math.sqrt(sum); }; // Initialize data structures this.nodes = new Map(); this.nodeToLevel = new Map(); this.nodeDimensions = new Map(); this.dimensionGroups = new Map(); this.dimensionEntryPoints = new Map(); this.timer = (0, profiling_1.createTimer)(); this.initialized = false; this.deletedNodes = new Set(); // Initialize deletedNodes set } /** * Add a vector to the HNSW graph * @param id - Vector identifier * @param vector - Vector to add * @returns Added vector ID */ addVector(id, vector) { // Verify the vector exists in the database (optimized get) const dbVector = this.db.getVector(id); // No need for || vector, assume vector is in DB if (!dbVector) { throw new Error(`Vector with id ${id} not found in database. Length vector input ${vector.length}`); } // Store vector dimension const dimension = dbVector.length; this.nodeDimensions.set(id, dimension); // Add to dimension group (optimized Set operations) let dimensionSet = this.dimensionGroups.get(dimension); if (!dimensionSet) { dimensionSet = new Set(); this.dimensionGroups.set(dimension, dimensionSet); } dimensionSet.add(id); // If this is the first vector in this dimension group, make it the entry point for this dimension if (dimensionSet.size === 1) { this.dimensionEntryPoints.set(dimension, id); } // If this is the first vector overall, make it the global entry point if (!this.entryPointId) { this.entryPointId = id; const level = this._randomLevel(); this._createNode(id, level, dimension); this.initialized = true; return id; } // Random level for new node const randLevel = this._randomLevel(); this._createNode(id, randLevel, dimension); // Connect the new node into the graph // For dimension-aware mode, use entry point from the same dimension group if available let entryPointId = this.dimensionAware ? this.dimensionEntryPoints.get(dimension) || this.entryPointId : this.entryPointId; // Only proceed with graph building if we have an entry point with the same dimension // or if we're not in dimension-aware mode if (entryPointId) { let currObj = entryPointId; let currDist = this._distance(id, entryPointId); // Get max level in the graph (optimized level retrieval) const entryLevel = this.nodeToLevel.get(entryPointId) || 0; // Work down from the entry level to the level of the new node for (let level = Math.min(entryLevel, randLevel); level >= 0; level--) { // Find closest neighbors at the current level let changed = true; // Greedy search for the closest element while (changed) { changed = false; // Get node connections at level (optimized retrieval) const neighbors = this._getConnections(currObj, level); for (const neighborId of neighbors) { // Skip neighbors with different dimensions in dimension-aware mode (optimized dimension check) if (this.dimensionAware && this.nodeDimensions.get(neighborId) !== dimension) { continue; } const dist = this._distance(id, neighborId); if (dist < currDist) { currDist = dist; currObj = neighborId; changed = true; } } } if (level <= randLevel) { // Add edges at this level this._addConnectionsForNode(id, level, currObj); } } // Update dimension entry point if new node is at a higher level const dimEntryPoint = this.dimensionEntryPoints.get(dimension); if (dimEntryPoint && randLevel > (this.nodeToLevel.get(dimEntryPoint) || 0)) { this.dimensionEntryPoints.set(dimension, id); } // Update global entry point if new node is at a higher level if (randLevel > (this.nodeToLevel.get(this.entryPointId) || 0)) { this.entryPointId = id; } } return id; } /** * Mark a vector as deleted in the HNSW graph * This method marks nodes for deletion without immediately removing them from the graph * @param id - Vector identifier to mark as deleted * @returns True if the vector was marked for deletion, false if not found */ markDelete(id) { if (!this.nodes.has(id)) { console.warn(`[HNSW] Vector with id ${id} not found to delete`); return false; } // Mark the node as deleted this.deletedNodes.add(id); // If the deleted node is the entry point, find a new entry point if (this.entryPointId === id) { console.log(`[HNSW] Entry point ${id} was deleted, finding new entry point...`); this._updateEntryPointAfterDeletion(); } // If the deleted node is a dimension entry point, update that too const nodeDimension = this.nodeDimensions.get(id); if (nodeDimension !== undefined && this.dimensionEntryPoints.get(nodeDimension) === id) { console.log(`[HNSW] Dimension entry point for dimension ${nodeDimension} was deleted, finding new entry point...`); this._updateDimensionEntryPointAfterDeletion(nodeDimension); } return true; } /** * Update entry point after the current entry point was deleted * @private */ _updateEntryPointAfterDeletion() { // Reset entry point this.entryPointId = null; // Find the node with the highest level that isn't deleted let maxLevel = -1; let newEntryPoint = null; for (const [nodeId, level] of this.nodeToLevel.entries()) { if (this.deletedNodes.has(nodeId)) continue; if (level > maxLevel) { maxLevel = level; newEntryPoint = nodeId; } } // Set new entry point if found if (newEntryPoint !== null) { this.entryPointId = newEntryPoint; console.log(`[HNSW] New entry point set to ${newEntryPoint} with level ${maxLevel}`); } else { console.warn('[HNSW] No valid entry point found after deletion.'); } } /** * Update dimension entry point after the current entry point for that dimension was deleted * @private */ _updateDimensionEntryPointAfterDeletion(dimension) { // Get the set of nodes for this dimension const dimensionNodes = this.dimensionGroups.get(dimension); if (!dimensionNodes || dimensionNodes.size === 0) { // No more nodes in this dimension this.dimensionEntryPoints.delete(dimension); console.log(`[HNSW] No more nodes in dimension ${dimension}, removing entry point.`); return; } // Find a new entry point for this dimension (highest level node) let maxLevel = -1; let newEntryPoint = null; for (const nodeId of dimensionNodes) { if (this.deletedNodes.has(nodeId)) continue; const level = this.nodeToLevel.get(nodeId) || 0; if (level > maxLevel) { maxLevel = level; newEntryPoint = nodeId; } } // Set new dimension entry point if found if (newEntryPoint !== null) { this.dimensionEntryPoints.set(dimension, newEntryPoint); console.log(`[HNSW] New dimension ${dimension} entry point set to ${newEntryPoint} with level ${maxLevel}`); } else { this.dimensionEntryPoints.delete(dimension); console.warn(`[HNSW] No valid entry point found for dimension ${dimension} after deletion.`); } } /** * Search with a specific entry point * @private */ _searchWithEntryPoint(entryPoint, query, k, options = {}) { const timer = this.timer; const queryDimension = query.length; const exactDimensions = options.exactDimensions || false; const filter = options.filter || (() => true); // Get entry level (optimized level retrieval) const entryLevel = this.nodeToLevel.get(entryPoint) || 0; let currObj = entryPoint; let currDist = this._distanceToQuery(query, entryPoint); // Search from top level down for (let i = entryLevel; i > 0; i--) { // Greedy search at this level let changed = true; while (changed) { changed = false; // Get node connections at level (optimized retrieval) const neighbors = this._getConnections(currObj, i); for (const neighborId of neighbors) { // Skip deleted nodes if (this.deletedNodes.has(neighborId)) continue; // Skip neighbors with different dimensions if exactDimensions is true (optimized dimension check) if (exactDimensions) { const neighborDim = this.nodeDimensions.get(neighborId); if (neighborDim !== queryDimension) continue; } // Skip if filter excludes this ID if (!filter(neighborId)) continue; const dist = this._distanceToQuery(query, neighborId); if (dist < currDist) { currDist = dist; currObj = neighborId; changed = true; } } } } // Beam search at the bottom level const ef = Math.max(k, this.efSearch); const visited = new Set(); const candidates = new Map(); // id -> distance const results = new Map(); // id -> distance // Initialize with entry point candidates.set(currObj, currDist); results.set(currObj, currDist); visited.add(currObj); // Main loop while (candidates.size > 0) { // Find closest candidate let closest = null; let minDist = Infinity; for (const [id, dist] of candidates.entries()) { if (dist < minDist) { minDist = dist; closest = id; } } if (closest === null) break; // If furthest result is closer than closest candidate, we're done if (results.size >= ef) { let furthestResultDist = -Infinity; for (const dist of results.values()) { if (dist > furthestResultDist) { furthestResultDist = dist; } } if (minDist > furthestResultDist) { break; } } // Remove from candidates candidates.delete(closest); // Get nearest neighbors for this candidate const connections = this._getConnections(closest, 0); for (const neighborId of connections) { // Skip deleted nodes if (this.deletedNodes.has(neighborId)) continue; // Skip neighbors with different dimensions if exactDimensions is true if (exactDimensions) { const neighborDim = this.nodeDimensions.get(neighborId); if (neighborDim !== queryDimension) continue; } // Skip if filter excludes this ID if (!filter(neighborId)) continue; if (!visited.has(neighborId)) { visited.add(neighborId); const dist = this._distanceToQuery(query, neighborId); // Add to results if results is not full or if it's closer than furthest result let furthestResultDist = -Infinity; let furthestId = null; if (results.size >= ef) { // Find furthest result for (const [resultId, resultDist] of results.entries()) { if (resultDist > furthestResultDist) { furthestResultDist = resultDist; furthestId = resultId; } } if (dist < furthestResultDist) { // Replace furthest result if (furthestId !== null) { results.delete(furthestId); } results.set(neighborId, dist); candidates.set(neighborId, dist); } } else { // Results not full yet results.set(neighborId, dist); candidates.set(neighborId, dist); } } } } // Convert results to array and sort by distance let resultsList = Array.from(results.entries()) .sort((a, b) => a[1] - b[1]) .map(([id, dist]) => ({ id, dist })); // Take top k results resultsList = resultsList.slice(0, k); timer.stop('hnsw_search'); return resultsList; } /** * Add a single point to the HNSW index * For incremental updates to an existing index * @param vector - Vector to add * @param id - Vector identifier * @returns Added vector ID */ addPoint(vector, id) { // Skip if the node is already in the index if (this.nodes.has(id)) { console.warn(`[HNSW] Vector with id ${id} already exists in the index`); return id; } // Remove from deleted nodes if it was marked as deleted if (this.deletedNodes.has(id)) { this.deletedNodes.delete(id); console.log(`[HNSW] Vector with id ${id} was previously deleted, undeleting it`); } // Store vector dimension const dimension = vector.length; this.nodeDimensions.set(id, dimension); // Add to dimension group let dimensionSet = this.dimensionGroups.get(dimension); if (!dimensionSet) { dimensionSet = new Set(); this.dimensionGroups.set(dimension, dimensionSet); } dimensionSet.add(id); // If this is the first vector in this dimension group, make it the entry point for this dimension if (dimensionSet.size === 1) { this.dimensionEntryPoints.set(dimension, id); } // If this is the first vector overall, make it the global entry point if (!this.entryPointId) { this.entryPointId = id; const level = this._randomLevel(); this._createNode(id, level, dimension); this.initialized = true; return id; } // Random level for new node const randLevel = this._randomLevel(); this._createNode(id, randLevel, dimension); // Connect the new node into the graph // For dimension-aware mode, use entry point from the same dimension group if available let entryPointId = this.dimensionAware ? this.dimensionEntryPoints.get(dimension) || this.entryPointId : this.entryPointId; // Only proceed with graph building if we have an entry point with the same dimension // or if we're not in dimension-aware mode if (entryPointId) { let currObj = entryPointId; let currDist = this.distanceFunc(vector, this.db.getVector(entryPointId) || vector); // Get max level in the graph const entryLevel = this.nodeToLevel.get(entryPointId) || 0; // Work down from the entry level to the level of the new node for (let level = Math.min(entryLevel, randLevel); level >= 0; level--) { // Find closest neighbors at the current level let changed = true; // Greedy search for the closest element while (changed) { changed = false; // Get node connections at level const neighbors = this._getConnections(currObj, level); for (const neighborId of neighbors) { // Skip neighbors with different dimensions in dimension-aware mode if (this.dimensionAware && this.nodeDimensions.get(neighborId) !== dimension) { continue; } const neighborVector = this.db.getVector(neighborId); if (!neighborVector) continue; const dist = this.distanceFunc(vector, neighborVector); if (dist < currDist) { currDist = dist; currObj = neighborId; changed = true; } } } if (level <= randLevel) { // Add edges at this level based on closest elements this._addConnectionsForNode(id, level, currObj); } } // Update dimension entry point if new node is at a higher level const dimEntryPoint = this.dimensionEntryPoints.get(dimension); if (dimEntryPoint && randLevel > (this.nodeToLevel.get(dimEntryPoint) || 0)) { this.dimensionEntryPoints.set(dimension, id); } // Update global entry point if new node is at a higher level if (randLevel > (this.nodeToLevel.get(this.entryPointId) || 0)) { this.entryPointId = id; } } return id; } /** * Get the number of nodes in the HNSW graph * @returns Number of nodes */ getNodeCount() { return this.nodes.size; } /** * Find k nearest neighbors to the query vector * @param query - Query vector * @param k - Number of neighbors to find * @param options - Search options * @returns Array of nearest neighbors */ findNearest(query, k = 10, options = {}) { console.log(`[HNSW] Searching for ${k} nearest neighbors`); if (!this.entryPointId || !this.initialized) { // Fall back to linear search (optimized linear search call) return this._linearSearch(query, k, options); } const timer = this.timer; timer.start('hnsw_search'); const queryDimension = query.length; const exactDimensions = options.exactDimensions || false; // Modify search methods to filter out deleted nodes const originalFilter = options.filter; options.filter = (id) => { // Skip deleted nodes and apply the original filter if it exists return !this.deletedNodes.has(id) && (originalFilter ? originalFilter(id) : true); }; // For dimension-aware search with exact dimension matching if (this.dimensionAware && exactDimensions) { // Get entry point for this dimension (optimized map get) const dimensionEntryPoint = this.dimensionEntryPoints.get(queryDimension); // If we don't have an entry point for this dimension, return empty results if (!dimensionEntryPoint) { timer.stop('hnsw_search'); return []; } // Perform search using the dimension-specific entry point (optimized search call) return this._searchWithEntryPoint(dimensionEntryPoint, query, k, options); } // Standard search using global entry point (optimized search call) return this._searchWithEntryPoint(this.entryPointId, query, k, options); } /** * Fallback linear search implementation * @private */ _linearSearch(query, k, options = {}) { const filter = options.filter || (() => true); const queryDimension = query.length; const results = []; // Optimized linear scan using for...of and direct Map iteration for (const [id, vector] of this.db.memoryStorage.entries()) { // Skip if filter excludes this ID (optimized filter call) if (!filter(id)) continue; if (vector.length !== queryDimension) continue; const dist = this.distanceFunc(query, vector); results.push({ id, dist }); } return results.sort((a, b) => a.dist - b.dist).slice(0, k); } /** * Build the HNSW index for all vectors in the database * @param options - Build options */ async buildIndex(options = {}) { const progressCallback = options.progressCallback || (() => { }); const dimensionAware = options.dimensionAware !== false; // Reset the index (optimized clear operations) this.nodes.clear(); this.nodeToLevel.clear(); this.nodeDimensions.clear(); this.dimensionGroups.clear(); this.dimensionEntryPoints.clear(); this.entryPointId = null; this.initialized = false; this.dimensionAware = dimensionAware; this.deletedNodes.clear(); // Clear deleted nodes // Get all vector IDs from the database (optimized key retrieval) const ids = Array.from(this.db.memoryStorage.keys()); const totalVectors = ids.length; if (totalVectors === 0) { console.log('No vectors to index'); return; } // First, scan all vectors to collect dimensions (optimized loop) for (let i = 0; i < ids.length; i++) { const id = ids[i]; const vector = this.db.getVector(id); // Optimized vector retrieval if (vector) { const dimension = vector.length; this.nodeDimensions.set(id, dimension); // Group vectors by dimension (optimized Set operations) let dimensionSet = this.dimensionGroups.get(dimension); if (!dimensionSet) { dimensionSet = new Set(); this.dimensionGroups.set(dimension, dimensionSet); } dimensionSet.add(id); } // Report progress (optimized modulo operation) if (i % 1000 === 0) { progressCallback((i / totalVectors) * 0.1); // First 10% for dimension scanning } } // Log dimension stats console.log(`Building HNSW index with ${this.dimensionGroups.size} different dimensions`); for (const [dimension, ids] of this.dimensionGroups.entries()) { console.log(`Dimension ${dimension}: ${ids.size} vectors`); } // Now build the index let processedCount = 0; // If dimension-aware, process each dimension group separately to avoid cross-dimension connections if (dimensionAware) { for (const [dimension, ids] of this.dimensionGroups.entries()) { (0, log_1.log)('info', `${dimension} in ids`); // Optimized Map iteration const dimensionIds = Array.from(ids); // Process vectors in this dimension (optimized loop) for (let i = 0; i < dimensionIds.length; i++) { const id = dimensionIds[i]; const vector = this.db.getVector(id); // Optimized vector retrieval if (vector) { this.addVector(id, vector); } processedCount++; // Report progress (optimized modulo operation) if (processedCount % 100 === 0) { const progress = 0.1 + (processedCount / totalVectors) * 0.9; // 10-100% progressCallback(progress); } } } } else { // Process all vectors regardless of dimension (optimized loop) for (let i = 0; i < ids.length; i++) { const id = ids[i]; const vector = this.db.getVector(id); // Optimized vector retrieval if (vector) { this.addVector(id, vector); } // Report progress (optimized modulo operation) if (i % 100 === 0) { const progress = 0.1 + (i / totalVectors) * 0.9; // 10-100% progressCallback(progress); } } } this.initialized = true; progressCallback(1.0); // 100% complete } /** * Create a new node in the graph * @private */ _createNode(id, level, dimension) { // Initialize connections for each level (optimized Map and Set creation) const node = { id, connections: new Map(), dimension, }; // Create empty connection sets for each level (optimized loop) for (let i = 0; i <= level; i++) { node.connections.set(i, new Set()); } this.nodes.set(id, node); this.nodeToLevel.set(id, level); if (dimension !== undefined) { this.nodeDimensions.set(id, dimension); } } /** * Get connections for a node at a specific level * @private */ _getConnections(id, level) { const node = this.nodes.get(id); // Optimized Map get if (!node) return new Set(); const connections = node.connections.get(level); // Optimized Map get return connections || new Set(); } /** * Add bidirectional connections between nodes * @private */ _addConnectionsForNode(newId, level, currId) { // Get the dimension of the new node (optimized dimension retrieval) const newNodeDimension = this.nodeDimensions.get(newId); // Find nearest neighbors for the new node at this level const closestIds = this._selectNeighbors(newId, level, currId); const newNode = this.nodes.get(newId); // Optimized Map get if (!newNode) return; // Get connections at this level for the new node (optimized retrieval and creation) let newConnections = newNode.connections.get(level); if (!newConnections) { newConnections = new Set(); newNode.connections.set(level, newConnections); } // Add connections from new node to its neighbors (optimized loop) for (const closestId of closestIds) { // Skip connections between different dimensions in dimension-aware mode (optimized dimension check) if (this.dimensionAware) { const closestDimension = this.nodeDimensions.get(closestId); if (closestDimension !== newNodeDimension) { continue; } } newConnections.add(closestId); // Optimized Set add // Add backlink from neighbor to new node const neighborNode = this.nodes.get(closestId); // Optimized Map get if (neighborNode) { let neighborConnections = neighborNode.connections.get(level); // Optimized retrieval and creation if (!neighborConnections) { neighborConnections = new Set(); neighborNode.connections.set(level, neighborConnections); } neighborConnections.add(newId); // Optimized Set add // Prune connections if needed this._pruneConnections(neighborNode, level); } } // Prune connections for the new node if needed this._pruneConnections(newNode, level); } /** * Select up to M nearest neighbors for a node * @private */ _selectNeighbors(id, level, entryPointId) { // Get the dimension of the node (optimized dimension retrieval) const nodeDimension = this.nodeDimensions.get(id); // Find ef_construction nearest neighbors for the node const candidates = new Map(); // id -> distance (optimized Map usage) const visited = new Set(); // Optimized Set usage // Start with the entry point const entryDist = this._distance(id, entryPointId); candidates.set(entryPointId, entryDist); // Optimized Map set visited.add(entryPointId); // Optimized Set add // Beam search const results = new Map(); // id -> distance (optimized Map usage) results.set(entryPointId, entryDist); // Optimized Map set // Main loop (optimized loop and candidate selection) while (candidates.size > 0) { // Find closest candidate (optimized candidate selection) let closest = null; let minDist = Infinity; for (const [candidateId, dist] of candidates.entries()) { // Optimized Map iteration if (dist < minDist) { minDist = dist; closest = candidateId; } } if (closest === null) break; // Check if we should stop search (optimized result comparison) let furthestResultDist = -Infinity; if (results.size >= this.efConstruction) { // Only calculate if needed for (const dist of results.values()) { // Optimized Map value iteration if (dist > furthestResultDist) { furthestResultDist = dist; } } if (minDist > furthestResultDist && results.size >= this.efConstruction) { break; } } // Remove from candidates (optimized Map delete) candidates.delete(closest); // Get nearest neighbors for this candidate (optimized retrieval) const connections = this._getConnections(closest, level); for (const neighborId of connections) { // Skip connections to nodes with different dimensions in dimension-aware mode (optimized dimension check) if (this.dimensionAware) { const neighborDimension = this.nodeDimensions.get(neighborId); if (neighborDimension !== nodeDimension) { continue; } } if (!visited.has(neighborId)) { // Optimized Set has check visited.add(neighborId); // Optimized Set add const dist = this._distance(id, neighborId); // Add to results if results is not full or if it's closer than furthest result (optimized result management) let furthestResultDist = -Infinity; if (results.size >= this.efConstruction) { // Only calculate if needed for (const dist of results.values()) { // Optimized Map value iteration if (dist > furthestResultDist) { furthestResultDist = dist; } } } if (results.size < this.efConstruction || dist < furthestResultDist) { if (results.size >= this.efConstruction) { // Optimized check, no need to find ID if not replacing // Find and remove furthest result (less frequent operation, optimized search for furthest) let furthestId = null; let maxDist = -Infinity; for (const [resultId, resultDist] of results.entries()) { // Optimized Map iteration if (resultDist > maxDist) { maxDist = resultDist; furthestId = resultId; } } if (furthestId !== null) { results.delete(furthestId); // Optimized Map delete } } results.set(neighborId, dist); // Optimized Map set candidates.set(neighborId, dist); // Optimized Map set } } } } // Keep only M closest neighbors (optimized sorting and mapping) return Array.from(results.entries()) .sort((a, b) => a[1] - b[1]) // Sorting is still O(N log N), might be bottleneck for large ef .slice(0, this.M) .map(([id]) => id); } /** * Prune connections to maintain at most M connections per node * @private */ _pruneConnections(node, level) { const connections = node.connections.get(level); // Optimized retrieval if (!connections || connections.size <= this.M) return; // Get node's dimension (optimized dimension retrieval) const nodeDimension = this.nodeDimensions.get(node.id); // Calculate distances from this node to all its neighbors const distances = []; // Optimized loop for distance calculation for (const neighborId of connections) { // Skip neighbors with different dimensions in dimension-aware mode (optimized dimension check) if (this.dimensionAware) { const neighborDimension = this.nodeDimensions.get(neighborId); if (neighborDimension !== nodeDimension) { continue; } } const dist = this._distance(node.id, neighborId); distances.push({ id: neighborId, dist }); } // Sort by distance and keep only the M closest (optimized sorting and connection update) distances.sort((a, b) => a.dist - b.dist); // Create new connection set with only M closest (optimized Set creation and population) const newConnections = new Set(); for (let i = 0; i < Math.min(this.M, distances.length); i++) { newConnections.add(distances[i].id); // Optimized Set add } // Replace old connections with pruned set (optimized Map set) node.connections.set(level, newConnections); } /** * Calculate random level for a new node * @private */ _randomLevel() { // Exponential distribution with base 1/levelProbability (no changes needed, already efficient) let level = 0; while (Math.random() < this.levelProbability && level < this.maxLevel) { level++; } return level; } /** * Calculate distance between two nodes * @private */ _distance(id1, id2) { const vec1 = this.db.getVector(id1); // Optimized vector retrieval const vec2 = this.db.getVector(id2); // Optimized vector retrieval if (!vec1 || !vec2) { throw new Error(`Vector not found: ${!vec1 ? id1 : id2}`); } return this.distanceFunc(vec1, vec2); } /** * Calculate distance from query to a node * @private */ _distanceToQuery(query, id) { const vec = this.db.getVector(id); // Optimized vector retrieval if (!vec) { throw new Error(`Vector not found: ${id}`); } return this.distanceFunc(query, vec); } /** * Get HNSW statistics * @returns Stats object with graph information */ getStats() { const levels = Array.from(this.nodeToLevel.values()); // Optimized value retrieval const maxLevel = levels.length > 0 ? Math.max(...levels) : 0; // Count nodes per level const nodesPerLevel = new Array(maxLevel + 1).fill(0); for (const level of levels) { // Optimized loop nodesPerLevel[level]++; } // Calculate average connections per node per level const avgConnectionsPerLevel = []; for (let level = 0; level <= maxLevel; level++) { // Optimized loop let totalConnections = 0; let nodesWithLevel = 0; for (const node of this.nodes.values()) { // Optimized value iteration const connections = node.connections.get(level); // Optimized retrieval if (connections) { totalConnections += connections.size; nodesWithLevel++; } } avgConnectionsPerLevel.push(nodesWithLevel > 0 ? totalConnections / nodesWithLevel : 0); } // Count vectors by dimension const nodesByDimension = {}; for (const dimension of this.nodeDimensions.values()) { // Optimized value iteration nodesByDimension[dimension] = (nodesByDimension[dimension] || 0) + 1; } return { totalNodes: this.nodes.size, maxM: this.M, efConstruction: this.efConstruction, efSearch: this.efSearch, levels: maxLevel + 1, nodesPerLevel, avgConnectionsPerLevel, entryPoint: this.entryPointId, dimensionAware: this.dimensionAware, dimensionGroups: this.dimensionGroups.size, dimensions: { counts: nodesByDimension, entryPoints: Object.fromEntries(this.dimensionEntryPoints), }, deletedNodesCount: this.deletedNodes.size, // Add deleted nodes count to stats }; } /** * Serialize HNSW graph to JSON * @returns JSON string representation of the graph */ serialize() { const data = { M: this.M, efConstruction: this.efConstruction, efSearch: this.efSearch, maxLevel: this.maxLevel, levelProbability: this.levelProbability, entryPointId: this.entryPointId, dimensionAware: this.dimensionAware, dimensionEntryPoints: Array.from(this.dimensionEntryPoints.entries()) || [], // Optimized Map to Array conversion nodes: Array.from(this.nodes.entries()).map(([id, node]) => ({ // Optimized Map iteration and mapping id, level: this.nodeToLevel.get(id), // Optimized retrieval dimension: this.nodeDimensions.get(id), // Optimized retrieval connections: Array.from(node.connections.entries()).map( // Optimized Map iteration and mapping ([level, connections]) => ({ level, connections: Array.from(connections), // Optimized Set to Array conversion })), })) || [], }; return JSON.stringify(data); } /** * Deserialize HNSW graph from JSON * @param json - JSON string representation of the graph * @returns HNSW instance */ static deserialize(json, db) { // Parse JSON string (optimized error handling) const data = JSON.parse(json); const hnsw = new HNSW(db, { M: data.M, efConstruction: data.efConstruction, efSearch: data.efSearch, maxLevel: data.maxLevel, levelProbability: data.levelProbability, dimensionAware: data.dimensionAware !== false, }); hnsw.entryPointId = data.entryPointId; // Restore dimension entry points (optimized Map set population) if (data.dimensionEntryPoints) { for (const [dimension, entryPoint] of data.dimensionEntryPoints) { hnsw.dimensionEntryPoints.set(Number(dimension), entryPoint); } } // Rebuild nodes and their connections (optimized loop and node creation) for (const nodeData of data.nodes) { const { id, level, dimension } = nodeData; // Create node hnsw._createNode(id, level, dimension); // Store dimension information if (dimension !== undefined) { hnsw.nodeDimensions.set(id, dimension); // Add to dimension group (optimized Set operations) let dimensionSet = hnsw.dimensionGroups.get(dimension); if (!dimensionSet) { dimensionSet = new Set(); hnsw.dimensionGroups.set(dimension, dimensionSet); } dimensionSet.add(id); } // Add connections at each level (optimized loop and connection population) for (const connData of nodeData.connections) { // Use type assertion to inform TypeScript of the correct type const levelConnections = new Set(connData.connections); const node = hnsw.nodes.get(id); // Optimized retrieval if (node) { node.connections.set(connData.level, levelConnections); } } } hnsw.initialized = true; return hnsw; } /** * Save HNSW index to disk * @param filePath - Path to save the index */ async saveIndex(filePath) { const data = this.serialize(); await fs_1.promises.writeFile(filePath, data, 'utf8'); } /** * Load HNSW index from disk * @param filePath - Path to load the index from * @param db - Vector database */ static async loadIndex(filePath, db, options = {}) { const data = await fs_1.promises.readFile(filePath, 'utf8'); const hnsw = HNSW.deserialize(data, db); // Set dimension-aware mode from options if provided if (options.dimensionAware !== undefined) { hnsw.dimensionAware = options.dimensionAware; } return hnsw; } /** * Clean up resources */ close() { // Clear internal data structures to free memory (optimized clear operations) this.nodes.clear(); this.nodeToLevel.clear(); this.nodeDimensions.clear(); this.dimensionGroups.clear(); this.dimensionEntryPoints.clear(); this.entryPointId = null; this.initialized = false; this.deletedNodes.clear(); // Clear deleted nodes } } exports.default = HNSW; //# sourceMappingURL=hnsw.js.map