UNPKG

hnsw-lite

Version:

A lightweight HNSW implementation for nearest neighbor search.

141 lines 6.97 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.HNSW = void 0; const layer_1 = require("./layer"); const node_1 = require("./node"); const calculateEuclideanDistance_1 = require("./utils/calculateEuclideanDistance"); const calculateCosineSimilarity_1 = require("./utils/calculateCosineSimilarity"); class HNSW { constructor(maxLayers, maxEdges, distanceFunction = 'euclideanDistance') { this.layers = []; this.maxLayers = maxLayers; this.maxEdges = maxEdges; // Determine the distance function to use if (typeof distanceFunction === 'string') { if (distanceFunction === 'euclideanDistance') { this.distanceFunction = calculateEuclideanDistance_1.calculateEuclideanDistance; } else if (distanceFunction === 'cosineSimilarity') { this.distanceFunction = calculateCosineSimilarity_1.calculateCosineSimilarity; } else { throw new Error(`Invalid distance function name: ${distanceFunction}. Valid options are 'euclideanDistance' or 'cosineSimilarity'.`); } } else if (typeof distanceFunction === 'function') { this.distanceFunction = distanceFunction; } else { throw new Error(`Invalid distanceFunction type: expected a string or function, but received ${typeof distanceFunction}.`); } // Initialize layers with nodes that decrease as the layers increase let nodesInLayer = Math.pow(2, maxLayers - 1); // Start with a larger number of nodes for the bottom layer for (let i = 0; i < maxLayers; i++) { this.layers.push(new layer_1.Layer(this.maxEdges, i)); // Add layer with maxEdges to control neighbors per node nodesInLayer = Math.max(1, Math.floor(nodesInLayer / 2)); // Halve nodes in each layer } } // Add a node to the HNSW, starting from the top layer add(id, vector) { // Validate that the vector contains only numbers between 0 and 1 inclusive const isValidVector = vector.every((value) => typeof value === "number" && value >= 0 && value <= 1 && Number.isFinite(value)); if (!isValidVector) { throw new Error(`Invalid vector: All values must be floating-point numbers between 0 and 1 inclusive. Received: ${JSON.stringify(vector)}`); } // Check if this is the first vector being added if (this.layers[0].nodes.length === 0) { // Save the vector length to validate future vectors this._vectorLength = vector.length; } else { // Ensure the vector length matches the previously added vectors const expectedLength = this._vectorLength; if (vector.length !== expectedLength) { throw new Error(`Vector length mismatch: Expected vectors of length ${expectedLength}, but received a vector of length ${vector.length}.`); } } let level = 0; // Randomly assign the level of the new node based on geometric distribution while (Math.random() < 0.5 && level < this.maxLayers - 1) { level++; } // Add the node to the layers, from level 0 to the calculated level for (let currentLayer = 0; currentLayer <= level; currentLayer++) { const layer = this.layers[currentLayer]; layer.addNode(vector, id, currentLayer); // Add node with id to the layer } } // Add multiple nodes in bulk using tuples (vector, id) addBulk(bulkData) { // Process each tuple in the bulk data for (const [id, vector] of bulkData) { this.add(id, vector); // Use the existing `add` method to insert each node } } // Remove the node from all layers by its ID remove(nodeId) { // Remove the node from all layers for (let layer of this.layers) { layer.removeNode(nodeId); // Remove node by ID from the layer } } // HNSW query function that uses searchLayer for each layer query(queryVector, nClosest = 1) { let currentNodeId = null; // Start from the topmost layer and work down to the bottom layer for (let level = this.maxLayers - 1; level > 0; level--) { // Perform a search on the current layer using the currentNodeId const closestNodeIds = this.layers[level].searchLayer(currentNodeId, queryVector, 1); // Set the currentNodeId to be the closest node ID from the search result currentNodeId = closestNodeIds[0]; // Only take the first closest node } // Final query at layer 0 - return the top n closest nodes const closestNodeIds = this.layers[0].searchLayer(currentNodeId, queryVector, nClosest); return closestNodeIds; } toJSON() { // Convert your internal layers and nodes to a plain object or JSON-friendly format return { layers: this.layers.map(layer => layer.toJSON()), // Assuming Layer also has a toJSON method maxLayers: this.maxLayers, maxEdges: this.maxEdges, distanceFunction: this.distanceFunction.name }; } // Rebuild the HNSW model from a JSON object static rebuildFromJSON(json) { const { maxLayers, maxEdges, layers, distanceFunction } = json; // Use the default distance function (euclideanDistance) during reconstruction. const hnsw = new HNSW(maxLayers, maxEdges, distanceFunction); // Rebuild each layer and its nodes layers.forEach((layerData, layerIndex) => { const layer = hnsw.layers[layerIndex]; layerData.nodes.forEach((nodeData) => { const node = new node_1.Node(nodeData.id, nodeData.vector, maxEdges, layerIndex); layer.nodes.push(node); // Add the node to the layer layer.nodeMap.set(node.id, node); // Add the node to the map // Rebuild neighbors (not directly set by toJSON) nodeData.neighbors.forEach((neighborId) => { const neighborNode = hnsw.getNodeById(neighborId); if (neighborNode) { // Recalculate the distance here since we're using node ids const distance = hnsw.distanceFunction(node.vector, neighborNode.vector); node.addNeighbor(neighborNode, distance); // Add the neighbor with the calculated distance } }); }); }); return hnsw; } // Helper function to get a node by its ID getNodeById(id) { for (let layer of this.layers) { const node = layer.nodeMap.get(id); if (node) return node; } return undefined; } } exports.HNSW = HNSW; //# sourceMappingURL=hnsw.js.map