UNPKG

@codai/memorai-core

Version:

Simplified advanced memory engine - no tiers, just powerful semantic search with persistence

345 lines (344 loc) 13.3 kB
/** * Optimized Qdrant Configuration for Enterprise Performance * Addresses the 45GB memory issue and slow performance */ import { QdrantClient } from '@qdrant/js-client-rest'; import { VectorStoreError } from '../types/index.js'; import { logger } from '../utils/logger.js'; export class OptimizedQdrantVectorStore { constructor(config) { this.connectionPool = []; this.currentConnectionIndex = 0; this.config = { ...config, batchSize: config.batchSize ?? 1000, maxRetries: config.maxRetries ?? 3, requestTimeout: config.requestTimeout ?? 30000, connectionPoolSize: config.connectionPoolSize ?? 5, enableCompression: config.enableCompression ?? true, quantizationEnabled: config.quantizationEnabled ?? true, segmentThreshold: config.segmentThreshold ?? 20000, hnswM: config.hnswM ?? 32, // Increased from 16 for better recall hnswEfConstruct: config.hnswEfConstruct ?? 200, // Increased from 100 for better quality fullScanThreshold: config.fullScanThreshold ?? 20000, // Increased threshold }; this.collection = this.config.collection; this.dimension = this.config.dimension; // Initialize connection pool this.initializeConnectionPool(); } initializeConnectionPool() { for (let i = 0; i < this.config.connectionPoolSize; i++) { const clientConfig = { url: this.config.url, timeout: this.config.requestTimeout, }; if (this.config.apiKey) { clientConfig.apiKey = this.config.apiKey; } const client = new QdrantClient(clientConfig); this.connectionPool.push(client); } this.client = this.connectionPool[0]; } getConnection() { const connection = this.connectionPool[this.currentConnectionIndex]; this.currentConnectionIndex = (this.currentConnectionIndex + 1) % this.config.connectionPoolSize; return connection; } async initialize() { try { // Check if collection exists const collections = await this.client.getCollections(); const exists = collections.collections.some((c) => c.name === this.collection); if (!exists) { await this.createOptimizedCollection(); } else { // Update existing collection configuration for optimization await this.updateCollectionConfig(); } // Create optimized indexes await this.createOptimizedIndexes(); logger.info('Optimized Qdrant vector store initialized successfully'); } catch (error) { logger.error('Failed to initialize optimized Qdrant store:', error); throw new VectorStoreError('Failed to initialize vector store'); } } async createOptimizedCollection() { const createConfig = { vectors: { size: this.dimension, distance: 'Cosine', hnsw_config: { m: this.config.hnswM, ef_construct: this.config.hnswEfConstruct, full_scan_threshold: this.config.fullScanThreshold, max_indexing_threads: 4, on_disk: true, // Store vectors on disk to save RAM }, }, optimizers_config: { deleted_threshold: 0.2, vacuum_min_vector_number: 1000, default_segment_number: 8, // Increased segments for better parallelism max_segment_size: this.config.segmentThreshold, memmap_threshold: this.config.segmentThreshold, indexing_threshold: 10000, flush_interval_sec: 10, max_optimization_threads: 4, }, shard_number: 2, // Distribute across shards for better performance replication_factor: 1, write_consistency_factor: 1, }; // Add quantization for memory efficiency if (this.config.quantizationEnabled) { createConfig.quantization_config = { scalar: { type: 'int8', quantile: 0.99, always_ram: false, }, }; } await this.client.createCollection(this.collection, createConfig); logger.info('Created optimized Qdrant collection with performance enhancements'); } async updateCollectionConfig() { try { // Update collection parameters for better performance await this.client.updateCollection(this.collection, { optimizers_config: { deleted_threshold: 0.2, vacuum_min_vector_number: 1000, default_segment_number: 8, max_segment_size: this.config.segmentThreshold, memmap_threshold: this.config.segmentThreshold, indexing_threshold: 10000, flush_interval_sec: 10, max_optimization_threads: 4, }, }); logger.info('Updated Qdrant collection configuration for optimization'); } catch (error) { logger.warn('Failed to update collection config, continuing:', error); } } async createOptimizedIndexes() { const indexes = [ { field_name: 'tenant_id', field_schema: 'keyword' }, { field_name: 'agent_id', field_schema: 'keyword' }, { field_name: 'type', field_schema: 'keyword' }, { field_name: 'created_at', field_schema: 'keyword' }, // Changed from datetime to keyword { field_name: 'importance', field_schema: 'float' }, { field_name: 'access_count', field_schema: 'integer' }, { field_name: 'content_hash', field_schema: 'keyword' }, ]; for (const index of indexes) { try { await this.client.createPayloadIndex(this.collection, index); } catch (error) { logger.warn(`Collection ${this.config.collection} might already exist:`, error); } } } async upsert(points) { const batches = this.chunkArray(points, this.config.batchSize); const promises = batches.map(batch => this.upsertBatch(batch)); await Promise.all(promises); } async upsertBatch(points) { const connection = this.getConnection(); await this.retryOperation(async () => { await connection.upsert(this.collection, { wait: false, // Don't wait for indexing to complete points: points.map(p => ({ id: p.id, vector: p.vector, payload: p.payload, })), }); }); } async search(vector, query) { const connection = this.getConnection(); const searchParams = { vector, limit: Math.min(query.limit || 10, 100), // Cap at 100 for performance with_payload: true, with_vector: false, // Don't return vectors unless needed score_threshold: query.threshold || 0.7, filter: this.buildFilter(query), search_params: { hnsw_ef: Math.min((query.limit || 10) * 4, 400), // Dynamic ef based on limit exact: false, // Use approximate search for speed }, }; const searchResult = await this.retryOperation(async () => { return await connection.search(this.collection, searchParams); }); return searchResult.map((point) => ({ id: String(point.id), score: point.score, payload: point.payload || {}, })); } async delete(ids) { if (ids.length === 0) return; const batches = this.chunkArray(ids, this.config.batchSize); const promises = batches.map(batch => this.deleteBatch(batch)); await Promise.all(promises); } async deleteBatch(ids) { const connection = this.getConnection(); await this.retryOperation(async () => { await connection.delete(this.collection, { wait: false, points: ids, }); }); } async count(tenantId) { const connection = this.getConnection(); const result = await this.retryOperation(async () => { return await connection.count(this.collection, { filter: { must: [{ key: 'tenant_id', match: { value: tenantId } }], }, }); }); return result.count; } async healthCheck() { try { await this.client.getCollections(); return true; } catch { return false; } } async checkDuplicateByHash(contentHash, tenantId, agentId) { const connection = this.getConnection(); const filter = { must: [ { key: 'content_hash', match: { value: contentHash } }, { key: 'tenant_id', match: { value: tenantId } }, ], }; if (agentId) { filter.must.push({ key: 'agent_id', match: { value: agentId } }); } const searchResult = await connection.scroll(this.collection, { filter, limit: 1, with_payload: false, with_vector: false, }); return searchResult.points.length > 0 ? searchResult.points[0].id : null; } async updateMemoryAccess(memoryId) { const connection = this.getConnection(); // Get current memory const points = await connection.retrieve(this.collection, { ids: [memoryId], with_payload: true, }); if (points.length === 0) return; const point = points[0]; const currentAccess = point.payload?.access_count || 0; // Update access count and last access time await connection.setPayload(this.collection, { wait: false, points: [memoryId], payload: { access_count: currentAccess + 1, last_accessed_at: new Date().toISOString(), }, }); } async getMemoryStats(tenantId) { // Implement efficient stats calculation const total = await this.count(tenantId); // For now return basic stats - implement more detailed stats as needed return { totalMemories: total, averageAccessCount: 0, oldestMemory: null, newestMemory: null, duplicateCount: 0, }; } /** * Optimize collection for better performance */ async optimizeCollection() { try { // Force optimization await this.client.updateCollection(this.collection, { optimizers_config: { deleted_threshold: 0.1, // More aggressive cleanup vacuum_min_vector_number: 100, }, }); logger.info('Collection optimization completed'); } catch (error) { logger.error('Collection optimization failed:', error); } } // Helper methods buildFilter(query) { const filter = { must: [] }; if (query.tenant_id) { filter.must.push({ key: 'tenant_id', match: { value: query.tenant_id } }); } if (query.agent_id) { filter.must.push({ key: 'agent_id', match: { value: query.agent_id } }); } if (query.type) { filter.must.push({ key: 'type', match: { value: query.type } }); } // Extended filter options could be added here if MemoryQuery interface is extended // For now, we'll use basic filtering return filter.must.length > 0 ? filter : undefined; } chunkArray(array, chunkSize) { const chunks = []; for (let i = 0; i < array.length; i += chunkSize) { chunks.push(array.slice(i, i + chunkSize)); } return chunks; } async retryOperation(operation) { let lastError; for (let attempt = 1; attempt <= this.config.maxRetries; attempt++) { try { return await operation(); } catch (error) { lastError = error; if (attempt < this.config.maxRetries) { const delay = Math.min(1000 * Math.pow(2, attempt - 1), 5000); // Exponential backoff await new Promise(resolve => setTimeout(resolve, delay)); logger.warn(`Retrying operation (attempt ${attempt + 1}/${this.config.maxRetries}):`, error); } } } throw lastError; } async close() { // Connection cleanup if needed this.connectionPool.length = 0; } }