UNPKG

@n2flowjs/nbase

Version:

Neural Vector Database for efficient similarity search

784 lines 35.4 kB
"use strict"; // --- START OF FILE vector_db.ts --- var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.VectorDB = void 0; const events_1 = require("events"); const path_1 = __importDefault(require("path")); const fs_1 = require("fs"); const zlib_1 = __importDefault(require("zlib")); const util_1 = require("util"); const log_1 = require("../utils/log"); const gzip = (0, util_1.promisify)(zlib_1.default.gzip); const gunzip = (0, util_1.promisify)(zlib_1.default.gunzip); const config_1 = __importDefault(require("../config")); // Assuming config exists and has defaults // Function to serialize Float32Array to Buffer function serializeVector(vector) { // Create a Buffer from the underlying ArrayBuffer of the Float32Array return Buffer.from(vector.buffer, vector.byteOffset, vector.byteLength); } // Function to deserialize Buffer to Float32Array function deserializeVector(buffer, dimension) { if (buffer.length !== dimension * 4) { // Basic sanity check (4 bytes per float32) throw new Error(`Buffer length ${buffer.length} does not match expected size for dimension ${dimension}`); } // Create a Float32Array viewing the same memory as the Buffer // Ensure proper alignment and byte offset handling if the buffer comes from a larger allocation return new Float32Array(buffer.buffer, buffer.byteOffset, dimension); } // --- VectorDB Base Class (Optimized Persistence) --- /** * Vector database for storing and retrieving high-dimensional vectors with associated metadata. * Provides efficient in-memory storage with optional persistence to disk. * * Key features: * - Store vectors with numeric or string IDs * - Associate metadata with vectors * - Find nearest neighbors using different distance metrics * - Persist database to disk with optional compression * - Event-based architecture for operation monitoring * * @example * ```typescript * // Create an in-memory database with default vector size of 384 * const db = new VectorDB(384); * * // Add vectors with metadata * const id = db.addVector(undefined, new Float32Array([0.1, 0.2, 0.3]), { source: 'example' }); * * // Find similar vectors * const results = db.findNearest([0.1, 0.15, 0.25], 5, { metric: 'cosine' }); * * // Save to disk * db.setPath('./vector_data'); * await db.save(); * ``` * * @fires vector:add - When a vector is added * @fires vectors:bulkAdd - When multiple vectors are added * @fires vector:delete - When a vector is deleted * @fires metadata:add - When metadata is added to a vector * @fires metadata:update - When vector metadata is updated * @fires db:save - When the database is saved to disk * @fires db:load - When the database is loaded from disk * @fires db:close - When the database is closed * * @extends EventEmitter */ class VectorDB extends events_1.EventEmitter { constructor(suggestedVectorSize = null, dbPath = null, options = {}) { super(); this.defaultVectorSize = null; this.savePromise = null; this.isClosed = false; this.isReady = false; // Flag to indicate if the database is ready for operations this.defaultVectorSize = suggestedVectorSize; this.memoryStorage = new Map(); this.metadata = new Map(); this.vectorDimensions = new Map(); this.idCounter = 1; this.dbPath = dbPath; this.useCompression = options.useCompression ?? false; // Default to no compression if (dbPath) { (0, log_1.log)('info', `[VectorDB] Constructor Loading database from ${dbPath}...`); this.load() .catch((err) => { // Only log error if file likely existed but failed to load if (err.code !== 'ENOENT') { (0, log_1.log)('error', `Error loading database from ${dbPath}:`, err); } else { (0, log_1.log)('info', `Database files not found at ${dbPath}, starting fresh.`); } }) .finally(() => { this.isReady = true; // Set ready flag after load attempt }); } } getIdCounter() { return this.idCounter; } // --- Core Methods (Mostly Unchanged, check ID handling) --- setPath(dbPath) { this.dbPath = dbPath; } vectorSize() { // (Keep existing logic for determining most common size) if (this.defaultVectorSize !== null) { return this.defaultVectorSize; } if (this.memoryStorage.size === 0) { return config_1.default.defaults.vectorSize || 0; } const dimensionCounts = new Map(); let maxCount = 0; let mostCommonSize = config_1.default.defaults.vectorSize || 0; for (const dim of this.vectorDimensions.values()) { const count = (dimensionCounts.get(dim) || 0) + 1; dimensionCounts.set(dim, count); if (count > maxCount) { maxCount = count; mostCommonSize = dim; } } return mostCommonSize; } addVector(id, vector, metadata // Allow adding metadata directly ) { let vectorId = id !== undefined ? id : this.idCounter++; // Optional: Standardize ID to string for internal consistency? // vectorId = String(vectorId); if (this.memoryStorage.has(vectorId)) { (0, log_1.log)('warn', `Vector with ID ${vectorId} already exists. Overwriting.`); // Decide if overwrite is desired or should throw error } const typedVector = vector instanceof Float32Array ? vector : new Float32Array(vector); const dimension = typedVector.length; if (this.memoryStorage.size === 0 && this.defaultVectorSize === null) { this.defaultVectorSize = dimension; } this.memoryStorage.set(vectorId, typedVector); this.vectorDimensions.set(vectorId, dimension); // Store dimension if (metadata) { this.metadata.set(vectorId, metadata); } // Update idCounter if a numeric ID is provided explicitly if (typeof vectorId === 'number' && vectorId >= this.idCounter) { this.idCounter = vectorId + 1; } this.emit('vector:add', { id: vectorId, dimensions: dimension }); return vectorId; } bulkAdd(vectors) { (0, log_1.log)('info', `[VectorDB] Bulk adding ${vectors.length} vectors...`); let addedCount = 0; const addedIds = []; for (const item of vectors) { try { // Pass metadata if available in VectorData type const id = this.addVector(item.id, item.vector, item.metadata); addedCount++; addedIds.push(id); } catch (error) { (0, log_1.log)('error', `Error adding vector ${item.id}:`, error); } } // Verify vectors were actually added to memory storage if (addedCount > 0 && this.memoryStorage.size === 0) { (0, log_1.log)('warn', '[VectorDB] Warning: bulkAdd reported success but memoryStorage is empty'); } else { (0, log_1.log)('info', `[VectorDB] Successfully added ${addedCount} vectors to memory storage. Storage size: ${this.memoryStorage.size}`); } this.emit('vectors:bulkAdd', { count: addedCount, ids: addedIds }); return addedCount; } getVector(id) { // Keep existing logic (try exact, then conversions) return this.memoryStorage.get(id) ?? null; // Simpler check with nullish coalescing // Consider removing automatic type conversion for stricter behavior if desired } hasVector(id) { // Keep existing logic return this.memoryStorage.has(id); // Consider removing automatic type conversion } deleteVector(id) { const deleted = this.memoryStorage.delete(id); if (deleted) { this.metadata.delete(id); this.vectorDimensions.delete(id); // Remove dimension info this.emit('vector:delete', { id }); } return deleted; } updateVector(id, vector) { if (!this.memoryStorage.has(id)) { (0, log_1.log)('warn', `Attempted to update non-existent vector ID: ${id}`); return false; } const typedVector = vector instanceof Float32Array ? vector : new Float32Array(vector); const dimension = typedVector.length; this.memoryStorage.set(id, typedVector); this.vectorDimensions.set(id, dimension); // Update dimension this.emit('vector:update', { id, dimensions: dimension }); return true; } addMetadata(id, data) { if (!this.memoryStorage.has(id)) { // Use hasVector for consistency? throw new Error(`Vector with ID ${id} not found`); } this.metadata.set(id, data); this.emit('metadata:add', { id, metadata: data }); } getMetadata(id) { // Keep existing logic return this.metadata.get(id) ?? null; // Consider removing automatic type conversion } updateMetadata(id, data) { // Keep existing logic if (!this.memoryStorage.has(id)) { (0, log_1.log)('warn', `Attempted to update metadata for non-existent vector ID: ${id}`); return false; // Or throw error } const current = this.metadata.get(id) || null; let updated; if (typeof data === 'function') { updated = data(current); } else { updated = { ...(current || {}), ...data }; // Ensure current is not null } this.metadata.set(id, updated); this.emit('metadata:update', { id, metadata: updated }); return true; } getVectorDimension(id) { // Direct lookup is now primary source return this.vectorDimensions.get(id) ?? null; // Consider removing automatic type conversion } // --- Distance Calculations --- _calculateNorm(vector) { let sum = 0; for (let i = 0; i < vector.length; i++) { sum += vector[i] * vector[i]; } return Math.sqrt(sum); } _dotProduct(a, b) { const len = Math.min(a.length, b.length); // Handle dimension mismatch let dot = 0; for (let i = 0; i < len; i++) { dot += a[i] * b[i]; } return dot; } _euclideanDistance(a, b) { const len = Math.min(a.length, b.length); let sum = 0; for (let i = 0; i < len; i++) { const d = a[i] - b[i]; sum += d * d; } // Optional: Penalty for dimension mismatch (consider if really needed) const dimDiff = Math.abs(a.length - b.length); if (dimDiff > 0) { // Simple penalty, maybe make this configurable or remove sum += dimDiff * (config_1.default.defaults.dimensionMismatchPenalty ?? 0.01); } return Math.sqrt(sum); } // Cosine Similarity returns similarity (higher is better). // Often 1 - similarity is used as a distance metric (lower is better). _cosineDistance(a, b) { const normA = this._calculateNorm(a); const normB = this._calculateNorm(b); if (normA === 0 || normB === 0) { return 1.0; // Handle zero vectors - maximally distant } // Ensure dimensions match for a meaningful cosine similarity if (a.length !== b.length) { // Or handle as per _euclideanDistance mismatch logic? // Returning max distance is safer if dimensions must match. (0, log_1.log)('warn', `Cosine distance called on vectors with different dimensions (${a.length} vs ${b.length}). Returning max distance.`); return 1.0; } const dot = this._dotProduct(a, b); // Clamp the result to [-1, 1] due to potential floating point inaccuracies const similarity = Math.max(-1.0, Math.min(1.0, dot / (normA * normB))); return 1.0 - similarity; // Convert similarity to distance } _calculateDistance(a, b, metric) { switch (metric) { case 'cosine': return this._cosineDistance(a, b); case 'euclidean': default: // Default to Euclidean return this._euclideanDistance(a, b); } } // --- Search (Linear Scan - Base Implementation) --- findNearest(query, k = 10, options = {}) { const typedQuery = query instanceof Float32Array ? query : new Float32Array(query); const metric = options.metric ?? 'euclidean'; // Default metric return this._linearSearch(typedQuery, k, metric, options.filter); } _linearSearch(query, k, metric, filter) { const results = []; const queryDim = query.length; for (const [id, vector] of this.memoryStorage.entries()) { // Filter first (if provided) to potentially skip distance calculation if (filter) { const meta = this.metadata.get(id); if (!filter(id, meta)) { continue; } } // Important: Ensure dimension compatibility based on metric if (metric === 'cosine' && vector.length !== queryDim) { // Cosine requires same dimensions continue; } // Euclidean can handle different dimensions (with penalty) const dist = this._calculateDistance(query, vector, metric); results.push({ id, dist }); } // Sort by distance and limit to k results // Note: For cosine distance (1-similarity), lower is better, so sort ascending still works return results.sort((a, b) => a.dist - b.dist).slice(0, k); } // --- Optimized Persistence --- _getMetaFilePath() { if (!this.dbPath) throw new Error('DB path not set'); return path_1.default.join(this.dbPath, 'meta.json') + (this.useCompression ? '.gz' : ''); } _getVectorFilePath() { if (!this.dbPath) throw new Error('DB path not set'); return path_1.default.join(this.dbPath, 'vec.bin' + (this.useCompression ? '.gz' : '')); } async save() { (0, log_1.log)('info', '[VectorDB] Saving database...'); if (!this.dbPath) { (0, log_1.log)('warn', '[VectorDB] No dbPath specified, skipping save.'); return; } if (this.isClosed) { (0, log_1.log)('warn', '[VectorDB] Attempted to save a closed database.'); return; } (0, log_1.log)('info', `[VectorDB] Saving to ${this.dbPath}`); // Only log and return existing promise if a save is already in progress if (this.savePromise) { (0, log_1.log)('info', `[VectorDB] Save already in progress, waiting...`); return this.savePromise; } this.savePromise = (async () => { const metaFilePath = this._getMetaFilePath(); const vectorFilePath = this._getVectorFilePath(); (0, log_1.log)('info', '[VectorDB] Meta file path:', metaFilePath); (0, log_1.log)('info', '[VectorDB] Vector file path:', vectorFilePath); try { // Ensure directory exists await fs_1.promises.mkdir(path_1.default.dirname(metaFilePath), { recursive: true }); (0, log_1.log)('info', '[VectorDB] Meta file path:', metaFilePath); (0, log_1.log)('info', '[VectorDB] Vector file path:', vectorFilePath); const metaData = {}; this.metadata.forEach((value, key) => { // Ensure keys are strings for JSON compatibility metaData[String(key)] = value; }); const vectorInfo = []; const vectorBuffers = []; let currentOffset = 0; // 1. Prepare vector data and metadata structure (0, log_1.log)('info', `[VectorDB] Preparing vector data for saving with ${this.memoryStorage.size} vectors...`); for (const [id, vector] of this.memoryStorage.entries()) { const vectorBuffer = serializeVector(vector); vectorBuffers.push(vectorBuffer); vectorInfo.push({ id: id, offset: currentOffset, length: vectorBuffer.length, // Store byte length dim: vector.length, // Store dimension }); currentOffset += vectorBuffer.length; } const saveData = { version: 1, // Add a version number for future format changes defaultVectorSize: this.defaultVectorSize, idCounter: this.idCounter, vectors: vectorInfo, metadata: metaData, }; (0, log_1.log)('info', `[VectorDB] Vector data prepared for saving: ${vectorInfo.length} vectors`); // 2. Write metadata file (0, log_1.log)('info', `[VectorDB] Writing metadata file to: ${metaFilePath} with ${vectorInfo.length} vectors`); // Ensure metadata is JSON-serializable let metaContent = JSON.stringify(saveData); if (this.useCompression) { metaContent = await gzip(metaContent); } (0, log_1.log)('info', '[VectorDB] Writing meta file to:', metaFilePath); await fs_1.promises.writeFile(metaFilePath, metaContent); (0, log_1.log)('info', '[VectorDB] Meta file written successfully.'); // 3. Write vector data file let vectorContent = Buffer.concat(vectorBuffers); if (this.useCompression) { vectorContent = await gzip(vectorContent); } (0, log_1.log)('info', `[VectorDB] Writing vector file to: ${vectorFilePath} (${vectorBuffers.length} vectors, ${vectorContent.length} bytes)`); await fs_1.promises.writeFile(vectorFilePath, vectorContent); (0, log_1.log)('info', '[VectorDB] Vector file written successfully.'); // 4. Emit save event this.emit('db:save', { path: this.dbPath || 'DB path not set', count: this.memoryStorage.size, }); (0, log_1.log)('info', '[VectorDB] Save event emitted successfully.'); } catch (error) { (0, log_1.log)('error', `Error saving database to ${this.dbPath}:`, error); throw error; // Re-throw to indicate failure } finally { this.savePromise = null; // Release lock } })(); (0, log_1.log)('info', '[VectorDB] Save promise created.'); return this.savePromise; } async load() { if (!this.dbPath) { throw new Error('Database path not specified for loading.'); } if (this.isClosed) { throw new Error('Cannot load into a closed database.'); } const metaFilePath = this._getMetaFilePath(); const vectorFilePath = this._getVectorFilePath(); // Check if files exist first to avoid unnecessary error logging for new databases const metaExists = (0, fs_1.existsSync)(metaFilePath); const vecExists = (0, fs_1.existsSync)(vectorFilePath); // If both files don't exist, this is likely a new database if (!metaExists && !vecExists) { (0, log_1.log)('info', `[VectorDB] Database files not found at ${this.dbPath}. Starting new database.`); return; // Exit early for a new database initialization } try { // 1. Read and parse metadata file let metaContentBuffer = await fs_1.promises.readFile(metaFilePath); if (this.useCompression) { metaContentBuffer = await gunzip(metaContentBuffer); } const saveData = JSON.parse(metaContentBuffer.toString('utf8')); if (saveData.version !== 1) { throw new Error(`Unsupported database format version: ${saveData.version}`); } // 2. Read vector data file let vectorDataBuffer = await fs_1.promises.readFile(vectorFilePath); if (this.useCompression) { vectorDataBuffer = await gunzip(vectorDataBuffer); } // 3. Clear existing data this.memoryStorage.clear(); this.metadata.clear(); this.vectorDimensions.clear(); // 4. Load data into memory this.defaultVectorSize = saveData.defaultVectorSize; this.idCounter = saveData.idCounter ?? 1; // Use saved counter or default for (const vecInfo of saveData.vectors) { const { id, offset, length, dim } = vecInfo; if (offset + length > vectorDataBuffer.length) { (0, log_1.log)('error', `Invalid offset/length for vector ${id}. Offset: ${offset}, Length: ${length}, Buffer Size: ${vectorDataBuffer.length}`); continue; // Skip corrupted entry } const vectorSlice = vectorDataBuffer.slice(offset, offset + length); const vector = deserializeVector(vectorSlice, dim); this.memoryStorage.set(id, vector); this.vectorDimensions.set(id, dim); // Load metadata (handle string keys from JSON) const meta = saveData.metadata[String(id)]; if (meta) { this.metadata.set(id, meta); } } this.emit('db:load', { path: this.dbPath, count: this.memoryStorage.size, }); (0, log_1.log)('info', `[VectorDB] Loaded ${this.memoryStorage.size} vectors from ${this.dbPath}`); } catch (error) { if (error.code === 'ENOENT') { // Files not found is expected for a new DB, don't throw (0, log_1.log)('info', `Database files not found at ${this.dbPath}. Starting new database.`); return; // Don't re-throw ENOENT } (0, log_1.log)('error', `Error loading database from ${this.dbPath}:`, error); throw error; // Re-throw other errors } } // --- Stats and Lifecycle --- getStats() { // (Keep existing logic, ensure it uses this.vectorDimensions) const dimensionCounts = {}; for (const dim of this.vectorDimensions.values()) { dimensionCounts[dim] = (dimensionCounts[dim] || 0) + 1; } // Recalculate memory usage based on actual stored data let vectorMemory = 0; this.memoryStorage.forEach((vec) => (vectorMemory += vec.byteLength)); let metadataMemory = 0; try { // Estimate metadata size (crude) this.metadata.forEach((meta) => (metadataMemory += JSON.stringify(meta).length * 2)); } catch (e) { (0, log_1.log)('warn', 'Could not estimate metadata size:', e); } const baseStats = { vectorCount: this.memoryStorage.size, vectorSize: this.vectorSize(), defaultVectorSize: this.defaultVectorSize ?? 0, // Use 0 if null metadataCount: this.metadata.size, dimensions: { counts: dimensionCounts, unique: Object.keys(dimensionCounts).length, }, // More accurate memory usage estimate memoryUsage: vectorMemory + metadataMemory + this.memoryStorage.size * 16 + // Estimate Map overhead for vectors this.metadata.size * 16 + // Estimate Map overhead for metadata this.vectorDimensions.size * 8, // Estimate Map overhead for dimensions // Placeholder for cluster stats (filled by subclass) clusters: { count: 0, avgSize: 0, distribution: [], dimensions: {} }, }; // Compatibility check for older DBStats type if needed if (!baseStats.clusters.dimensions) { baseStats.clusters.dimensions = {}; } return baseStats; } _estimateMemoryUsage() { // This method is now effectively replaced by the calculation within getStats() // Kept for potential internal use or backward compatibility if needed elsewhere let vectorMemory = 0; this.memoryStorage.forEach((vec) => (vectorMemory += vec.byteLength)); let metadataMemory = 0; try { this.metadata.forEach((meta) => (metadataMemory += JSON.stringify(meta).length * 2)); } catch (e) { /* ignore */ } const dimensionOverhead = this.vectorDimensions.size * 8; return vectorMemory + metadataMemory + dimensionOverhead + (this.memoryStorage.size + this.metadata.size) * 16; // Rough map overhead } /** * Gets a list of metadata entries that match specified criteria. * * @param criteria Can be: * - A string: field name to check for existence * - An array of strings: multiple field names to check for existence * - An object: key-value pairs where each key must exist and match the specified value * @param values Optional value(s) to match against the field(s) when using string/array input * @returns Array of {id, metadata} objects for entries that match the criteria * * @example * ```typescript * // Get all metadata entries that have a 'source' field * const allWithSource = db.getMetadataWithField('source'); * * // Get metadata entries where 'category' equals 'article' * const articles = db.getMetadataWithField('category', 'article'); * * // Get entries that have both 'author' and 'title' fields * const authoredContent = db.getMetadataWithField(['author', 'title']); * * // Get entries where 'type' is 'book' AND 'published' is true * const publishedBooks = db.getMetadataWithField(['type', 'published'], ['book', true]); * * // Using object syntax (recommended): type='book' AND published=true * const publishedBooks = db.getMetadataWithField({ type: 'book', published: true }); * ``` */ getMetadataWithField(criteria, values, options // Optional limit for results ) { const results = []; // Handle object criteria format (new format) if (criteria !== null && typeof criteria === 'object' && !Array.isArray(criteria)) { const criteriaObj = criteria; const fields = Object.keys(criteriaObj); this.metadata.forEach((meta, id) => { let match = true; // Check if all fields exist and match their values for (const field of fields) { if (!(field in meta) || meta[field] !== criteriaObj[field]) { match = false; break; } } if (match) { results.push({ id, metadata: { ...meta } }); // Return a copy of metadata } }); if (options?.limit) { return results.slice(0, options.limit); // Limit results if specified } return results; } // Handle legacy string/array format const fieldArray = Array.isArray(criteria) ? criteria : [criteria]; const valueArray = values !== undefined ? (Array.isArray(values) ? values : [values]) : undefined; // If values are provided, ensure the length matches fields if (valueArray !== undefined && valueArray.length !== fieldArray.length) { (0, log_1.log)('warn', 'Values array length does not match fields array length. Some value checks will be ignored.'); } this.metadata.forEach((meta, id) => { let match = true; // Check all fields exist and match values if provided for (let i = 0; i < fieldArray.length; i++) { const field = fieldArray[i]; if (!(field in meta)) { match = false; break; } // If values are provided, check if the field value matches if (valueArray !== undefined && i < valueArray.length && meta[field] !== valueArray[i]) { match = false; break; } } if (match) { results.push({ id, metadata: { ...meta } }); // Return a copy of metadata } }); return results; } getVectorCount() { return this.memoryStorage.size; } /** * Extract relationships between vectors based on distance or custom criteria. * * @param threshold - The maximum distance between vectors to consider them related. * @param metric - Distance metric to use (e.g., 'cosine', 'euclidean'). * @returns An array of relationships, where each relationship links two vector IDs, their distance, and optional metadata. */ extractRelationships(threshold, metric = 'euclidean') { const relationships = []; // Iterate over all vectors const vectorEntries = Array.from(this.memoryStorage.entries()); for (let i = 0; i < vectorEntries.length; i++) { const [id1, vector1] = vectorEntries[i]; for (let j = i + 1; j < vectorEntries.length; j++) { const [id2, vector2] = vectorEntries[j]; // Ensure dimension compatibility if (vector1.length !== vector2.length) { (0, log_1.log)('warn', `Dimension mismatch between vector ${id1} and ${id2}, skipping.`); continue; } // Calculate distance const distance = this._calculateDistance(vector1, vector2, metric); // Check if the distance is within the threshold if (distance <= threshold) { // Get metadata for both vectors if available const metadata1 = this.metadata.get(id1); const metadata2 = this.metadata.get(id2); relationships.push({ vector1: id1, vector2: id2, distance, metadata1: metadata1 ? { ...metadata1 } : undefined, metadata2: metadata2 ? { ...metadata2 } : undefined }); } } } (0, log_1.log)('info', `[VectorDB] Extracted ${relationships.length} relationships.`); return relationships; } /** * Extract communities of related vectors based on distance threshold. * A community is a group of vectors where each vector is related to at least one other vector in the group. * * @param threshold - The maximum distance between vectors to consider them related * @param metric - Distance metric to use (e.g., 'cosine', 'euclidean') * @returns Array of communities, where each community is an array of related vector information */ extractCommunities(threshold, metric = 'euclidean') { (0, log_1.log)('info', `[VectorDB] Extracting vector communities with threshold ${threshold}...`); // First build a graph representation where each vector is a node // and edges exist between vectors with distance <= threshold const graph = new Map(); const vectorEntries = Array.from(this.memoryStorage.entries()); // Initialize the graph with empty adjacency lists for (const [id] of vectorEntries) { graph.set(id, new Set()); } // Build edges for (let i = 0; i < vectorEntries.length; i++) { const [id1, vector1] = vectorEntries[i]; for (let j = i + 1; j < vectorEntries.length; j++) { const [id2, vector2] = vectorEntries[j]; // Ensure dimension compatibility if (vector1.length !== vector2.length) { continue; } // Calculate distance const distance = this._calculateDistance(vector1, vector2, metric); // Add edge if distance is within threshold if (distance <= threshold) { graph.get(id1)?.add(id2); graph.get(id2)?.add(id1); } } } // Use depth-first search to find connected components (communities) const visited = new Set(); const communities = []; for (const [id] of graph.entries()) { if (!visited.has(id)) { const community = []; // DFS to find all connected vectors const dfs = (nodeId) => { visited.add(nodeId); const metadata = this.metadata.get(nodeId); community.push({ id: nodeId, metadata: metadata ? { ...metadata } : undefined }); // Visit all neighbors const neighbors = graph.get(nodeId) || new Set(); for (const neighbor of neighbors) { if (!visited.has(neighbor)) { dfs(neighbor); } } }; dfs(id); // Only include communities with at least 2 vectors if (community.length > 1) { communities.push(community); } } } (0, log_1.log)('info', `[VectorDB] Found ${communities.length} communities`); return communities; } async close() { if (this.isClosed) return; this.isClosed = true; // Mark as closed immediately try { if (this.dbPath) { await this.save(); // Attempt to save on close } } catch (error) { (0, log_1.log)('error', 'Error saving database during close:', error); } finally { // Clear memory regardless of save success this.memoryStorage.clear(); this.metadata.clear(); this.vectorDimensions.clear(); this.emit('db:close', {}); (0, log_1.log)('info', 'Database closed.'); } } } exports.VectorDB = VectorDB; // --- END OF FILE vector_db.ts --- //# sourceMappingURL=vector_db.js.map