UNPKG

@codai/cbd

Version:

Codai Better Database - High-Performance Vector Memory System with HPKV-inspired architecture and MCP server

384 lines 13.9 kB
/** * Vector Storage Engine - AI embeddings and similarity search * Part of CBD Universal Database Phase 2 */ import { EventEmitter } from 'events'; export class VectorStorageEngine extends EventEmitter { indexes = new Map(); defaultIndex = 'default'; stats = new Map(); // For latency tracking constructor() { super(); } /** * Initialize method for CBD service compatibility */ async initialize() { // Don't create default index with fixed dimensions // Will be created dynamically when first vector is stored this.emit('engine:initialized', { type: 'vector' }); } // Search method for compatibility with CBD service async search(collection, vector, options = {}) { return this.findSimilar(vector, options, collection); } /** * Insert method for CBD service compatibility */ async insert(id, vector, metadata) { const vectorId = id || `vector_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`; await this.storeVector(vectorId, vector, metadata || {}); return { id: vectorId, vector: vector, metadata: metadata || {}, timestamp: new Date(), similarity: 1.0 }; } /** * Store a vector with metadata */ async storeVector(id, vector, metadata = {}, indexName = this.defaultIndex) { try { const index = this.getOrCreateIndex(indexName, vector.length); // Validate vector dimensions if (vector.length !== index.dimensions) { throw new Error(`Vector dimensions ${vector.length} don't match index dimensions ${index.dimensions}`); } // Calculate vector norm for optimization const norm = this.calculateNorm(vector); const record = { id, vector, metadata, norm, timestamp: new Date() }; // Store in index index.vectors.set(id, record); this.emit('vector:stored', { indexName, id, dimensions: vector.length }); } catch (error) { this.emit('vector:error', { operation: 'store', id, indexName, error }); throw error; } } /** * Store multiple vectors in batch */ async storeVectorsBatch(vectors, indexName = this.defaultIndex) { const startTime = Date.now(); for (const { id, vector, metadata = {} } of vectors) { await this.storeVector(id, vector, metadata, indexName); } const duration = Date.now() - startTime; this.emit('vector:batchStored', { indexName, count: vectors.length, duration, throughput: vectors.length / (duration / 1000) }); } /** * Find similar vectors using various similarity metrics */ async findSimilar(queryVector, options = {}, indexName = this.defaultIndex) { const startTime = Date.now(); try { const index = this.indexes.get(indexName); if (!index) { throw new Error(`Index ${indexName} not found`); } if (queryVector.length !== index.dimensions) { throw new Error(`Query vector dimensions ${queryVector.length} don't match index dimensions ${index.dimensions}`); } const { limit = 10, threshold = 0.0, includeMetadata = true, filters = {} } = options; const results = []; const queryNorm = this.calculateNorm(queryVector); // Calculate similarity for each vector for (const [id, record] of index.vectors) { // Apply metadata filters if (!this.matchesFilters(record.metadata, filters)) { continue; } const similarity = this.calculateSimilarity(queryVector, record.vector, index.algorithm, queryNorm, record.norm); if (similarity >= threshold) { results.push({ id, score: similarity, vector: record.vector, metadata: includeMetadata ? record.metadata : {}, distance: 1 - similarity }); } } // Sort by similarity (descending) and limit results results.sort((a, b) => b.score - a.score); const limitedResults = results.slice(0, limit); const duration = Date.now() - startTime; this.trackLatency(indexName, duration); this.emit('vector:searched', { indexName, resultCount: limitedResults.length, duration, threshold }); return limitedResults; } catch (error) { this.emit('vector:error', { operation: 'search', indexName, error }); throw error; } } /** * Hybrid search combining vector similarity and metadata filtering */ async hybridSearch(queryVector, textQuery, options = {}, indexName = this.defaultIndex) { const { textWeight = 0.3, vectorWeight = 0.7, limit = 10 } = options; // Get vector similarity results const vectorResults = await this.findSimilar(queryVector, options, indexName); const hybridResults = vectorResults.map(result => { let textScore = 0; // Simple text similarity if text query provided if (textQuery && result.metadata.text) { textScore = this.calculateTextSimilarity(textQuery, result.metadata.text); } const combinedScore = (result.score * vectorWeight) + (textScore * textWeight); return { ...result, textScore, combinedScore }; }); // Re-sort by combined score hybridResults.sort((a, b) => b.combinedScore - a.combinedScore); return hybridResults.slice(0, limit); } /** * Update a vector and its metadata */ async updateVector(id, vector, metadata = {}, indexName = this.defaultIndex) { try { const index = this.indexes.get(indexName); if (!index) { throw new Error(`Index ${indexName} not found`); } if (!index.vectors.has(id)) { throw new Error(`Vector ${id} not found in index ${indexName}`); } await this.storeVector(id, vector, metadata, indexName); this.emit('vector:updated', { indexName, id }); } catch (error) { this.emit('vector:error', { operation: 'update', id, indexName, error }); throw error; } } /** * Delete a vector from the index */ async deleteVector(id, indexName = this.defaultIndex) { try { const index = this.indexes.get(indexName); if (!index) { return false; } const deleted = index.vectors.delete(id); if (deleted) { this.emit('vector:deleted', { indexName, id }); } return deleted; } catch (error) { this.emit('vector:error', { operation: 'delete', id, indexName, error }); throw error; } } /** * Create a new vector index */ async createVectorIndex(name, dimensions, algorithm = 'cosine') { try { if (this.indexes.has(name)) { throw new Error(`Index ${name} already exists`); } const index = { name, dimensions, algorithm, vectors: new Map() }; this.indexes.set(name, index); this.emit('index:created', { name, dimensions, algorithm }); } catch (error) { this.emit('index:error', { operation: 'create', name, error }); throw error; } } /** * Drop a vector index */ async dropVectorIndex(name) { if (name === this.defaultIndex) { throw new Error('Cannot drop default index'); } const deleted = this.indexes.delete(name); if (deleted) { this.emit('index:dropped', { name }); } } /** * Get vector by ID */ async getVector(id, indexName = this.defaultIndex) { const index = this.indexes.get(indexName); if (!index) { return null; } return index.vectors.get(id) || null; } /** * Get index statistics */ async getIndexStats(indexName = this.defaultIndex) { const index = this.indexes.get(indexName); if (!index) { return null; } const vectors = Array.from(index.vectors.values()); const totalVectors = vectors.length; const averageNorm = totalVectors > 0 ? vectors.reduce((sum, v) => sum + (v.norm || 0), 0) / totalVectors : 0; const latencies = this.stats.get(indexName) || []; const sortedLatencies = latencies.slice().sort((a, b) => a - b); return { totalVectors, dimensions: index.dimensions, indexSize: totalVectors * index.dimensions * 8, // Estimated bytes averageNorm, searchLatency: { p50: this.percentile(sortedLatencies, 0.5), p95: this.percentile(sortedLatencies, 0.95), p99: this.percentile(sortedLatencies, 0.99) } }; } /** * List all vector indexes */ async listIndexes() { return Array.from(this.indexes.keys()); } /** * Bulk delete vectors by metadata filter */ async deleteByFilter(filters, indexName = this.defaultIndex) { const index = this.indexes.get(indexName); if (!index) { return 0; } let deletedCount = 0; const toDelete = []; for (const [id, record] of index.vectors) { if (this.matchesFilters(record.metadata, filters)) { toDelete.push(id); } } for (const id of toDelete) { if (index.vectors.delete(id)) { deletedCount++; } } this.emit('vector:bulkDeleted', { indexName, deletedCount, filters }); return deletedCount; } // Private helper methods getOrCreateIndex(name, dimensions) { if (!this.indexes.has(name)) { this.indexes.set(name, { name, dimensions, algorithm: 'cosine', vectors: new Map() }); this.emit('index:created', { name, dimensions }); } return this.indexes.get(name); } calculateNorm(vector) { return Math.sqrt(vector.reduce((sum, val) => sum + val * val, 0)); } calculateSimilarity(vector1, vector2, algorithm, norm1, norm2) { switch (algorithm) { case 'cosine': return this.cosineSimilarity(vector1, vector2, norm1, norm2); case 'euclidean': return this.euclideanSimilarity(vector1, vector2); case 'dot_product': return this.dotProduct(vector1, vector2); default: return this.cosineSimilarity(vector1, vector2, norm1, norm2); } } cosineSimilarity(vector1, vector2, norm1, norm2) { const dot = this.dotProduct(vector1, vector2); const normA = norm1 || this.calculateNorm(vector1); const normB = norm2 || this.calculateNorm(vector2); if (normA === 0 || normB === 0) return 0; return dot / (normA * normB); } euclideanSimilarity(vector1, vector2) { let sum = 0; for (let i = 0; i < vector1.length; i++) { const diff = (vector1[i] || 0) - (vector2[i] || 0); sum += diff * diff; } // Convert distance to similarity (closer = higher similarity) return 1 / (1 + Math.sqrt(sum)); } dotProduct(vector1, vector2) { let sum = 0; for (let i = 0; i < vector1.length; i++) { sum += (vector1[i] || 0) * (vector2[i] || 0); } return sum; } matchesFilters(metadata, filters) { for (const [key, value] of Object.entries(filters)) { if (metadata[key] !== value) { return false; } } return true; } calculateTextSimilarity(query, text) { // Simple text similarity using word overlap const queryWords = new Set(query.toLowerCase().split(/\s+/)); const textWords = new Set(text.toLowerCase().split(/\s+/)); const intersection = new Set([...queryWords].filter(x => textWords.has(x))); const union = new Set([...queryWords, ...textWords]); return intersection.size / union.size; // Jaccard similarity } trackLatency(indexName, duration) { if (!this.stats.has(indexName)) { this.stats.set(indexName, []); } const latencies = this.stats.get(indexName); latencies.push(duration); // Keep only last 1000 measurements if (latencies.length > 1000) { latencies.splice(0, latencies.length - 1000); } } percentile(sortedArray, p) { if (sortedArray.length === 0) return 0; const index = Math.ceil(sortedArray.length * p) - 1; return sortedArray[Math.max(0, index)] || 0; } } //# sourceMappingURL=VectorStorageEngine.js.map