@codai/memorai-core
Version:
Simplified advanced memory engine - no tiers, just powerful semantic search with persistence
345 lines (344 loc) • 13.3 kB
JavaScript
/**
* Optimized Qdrant Configuration for Enterprise Performance
* Addresses the 45GB memory issue and slow performance
*/
import { QdrantClient } from '@qdrant/js-client-rest';
import { VectorStoreError } from '../types/index.js';
import { logger } from '../utils/logger.js';
export class OptimizedQdrantVectorStore {
constructor(config) {
this.connectionPool = [];
this.currentConnectionIndex = 0;
this.config = {
...config,
batchSize: config.batchSize ?? 1000,
maxRetries: config.maxRetries ?? 3,
requestTimeout: config.requestTimeout ?? 30000,
connectionPoolSize: config.connectionPoolSize ?? 5,
enableCompression: config.enableCompression ?? true,
quantizationEnabled: config.quantizationEnabled ?? true,
segmentThreshold: config.segmentThreshold ?? 20000,
hnswM: config.hnswM ?? 32, // Increased from 16 for better recall
hnswEfConstruct: config.hnswEfConstruct ?? 200, // Increased from 100 for better quality
fullScanThreshold: config.fullScanThreshold ?? 20000, // Increased threshold
};
this.collection = this.config.collection;
this.dimension = this.config.dimension;
// Initialize connection pool
this.initializeConnectionPool();
}
initializeConnectionPool() {
for (let i = 0; i < this.config.connectionPoolSize; i++) {
const clientConfig = {
url: this.config.url,
timeout: this.config.requestTimeout,
};
if (this.config.apiKey) {
clientConfig.apiKey = this.config.apiKey;
}
const client = new QdrantClient(clientConfig);
this.connectionPool.push(client);
}
this.client = this.connectionPool[0];
}
getConnection() {
const connection = this.connectionPool[this.currentConnectionIndex];
this.currentConnectionIndex =
(this.currentConnectionIndex + 1) % this.config.connectionPoolSize;
return connection;
}
async initialize() {
try {
// Check if collection exists
const collections = await this.client.getCollections();
const exists = collections.collections.some((c) => c.name === this.collection);
if (!exists) {
await this.createOptimizedCollection();
}
else {
// Update existing collection configuration for optimization
await this.updateCollectionConfig();
}
// Create optimized indexes
await this.createOptimizedIndexes();
logger.info('Optimized Qdrant vector store initialized successfully');
}
catch (error) {
logger.error('Failed to initialize optimized Qdrant store:', error);
throw new VectorStoreError('Failed to initialize vector store');
}
}
async createOptimizedCollection() {
const createConfig = {
vectors: {
size: this.dimension,
distance: 'Cosine',
hnsw_config: {
m: this.config.hnswM,
ef_construct: this.config.hnswEfConstruct,
full_scan_threshold: this.config.fullScanThreshold,
max_indexing_threads: 4,
on_disk: true, // Store vectors on disk to save RAM
},
},
optimizers_config: {
deleted_threshold: 0.2,
vacuum_min_vector_number: 1000,
default_segment_number: 8, // Increased segments for better parallelism
max_segment_size: this.config.segmentThreshold,
memmap_threshold: this.config.segmentThreshold,
indexing_threshold: 10000,
flush_interval_sec: 10,
max_optimization_threads: 4,
},
shard_number: 2, // Distribute across shards for better performance
replication_factor: 1,
write_consistency_factor: 1,
};
// Add quantization for memory efficiency
if (this.config.quantizationEnabled) {
createConfig.quantization_config = {
scalar: {
type: 'int8',
quantile: 0.99,
always_ram: false,
},
};
}
await this.client.createCollection(this.collection, createConfig);
logger.info('Created optimized Qdrant collection with performance enhancements');
}
async updateCollectionConfig() {
try {
// Update collection parameters for better performance
await this.client.updateCollection(this.collection, {
optimizers_config: {
deleted_threshold: 0.2,
vacuum_min_vector_number: 1000,
default_segment_number: 8,
max_segment_size: this.config.segmentThreshold,
memmap_threshold: this.config.segmentThreshold,
indexing_threshold: 10000,
flush_interval_sec: 10,
max_optimization_threads: 4,
},
});
logger.info('Updated Qdrant collection configuration for optimization');
}
catch (error) {
logger.warn('Failed to update collection config, continuing:', error);
}
}
async createOptimizedIndexes() {
const indexes = [
{ field_name: 'tenant_id', field_schema: 'keyword' },
{ field_name: 'agent_id', field_schema: 'keyword' },
{ field_name: 'type', field_schema: 'keyword' },
{ field_name: 'created_at', field_schema: 'keyword' }, // Changed from datetime to keyword
{ field_name: 'importance', field_schema: 'float' },
{ field_name: 'access_count', field_schema: 'integer' },
{ field_name: 'content_hash', field_schema: 'keyword' },
];
for (const index of indexes) {
try {
await this.client.createPayloadIndex(this.collection, index);
}
catch (error) {
logger.warn(`Collection ${this.config.collection} might already exist:`, error);
}
}
}
async upsert(points) {
const batches = this.chunkArray(points, this.config.batchSize);
const promises = batches.map(batch => this.upsertBatch(batch));
await Promise.all(promises);
}
async upsertBatch(points) {
const connection = this.getConnection();
await this.retryOperation(async () => {
await connection.upsert(this.collection, {
wait: false, // Don't wait for indexing to complete
points: points.map(p => ({
id: p.id,
vector: p.vector,
payload: p.payload,
})),
});
});
}
async search(vector, query) {
const connection = this.getConnection();
const searchParams = {
vector,
limit: Math.min(query.limit || 10, 100), // Cap at 100 for performance
with_payload: true,
with_vector: false, // Don't return vectors unless needed
score_threshold: query.threshold || 0.7,
filter: this.buildFilter(query),
search_params: {
hnsw_ef: Math.min((query.limit || 10) * 4, 400), // Dynamic ef based on limit
exact: false, // Use approximate search for speed
},
};
const searchResult = await this.retryOperation(async () => {
return await connection.search(this.collection, searchParams);
});
return searchResult.map((point) => ({
id: String(point.id),
score: point.score,
payload: point.payload || {},
}));
}
async delete(ids) {
if (ids.length === 0)
return;
const batches = this.chunkArray(ids, this.config.batchSize);
const promises = batches.map(batch => this.deleteBatch(batch));
await Promise.all(promises);
}
async deleteBatch(ids) {
const connection = this.getConnection();
await this.retryOperation(async () => {
await connection.delete(this.collection, {
wait: false,
points: ids,
});
});
}
async count(tenantId) {
const connection = this.getConnection();
const result = await this.retryOperation(async () => {
return await connection.count(this.collection, {
filter: {
must: [{ key: 'tenant_id', match: { value: tenantId } }],
},
});
});
return result.count;
}
async healthCheck() {
try {
await this.client.getCollections();
return true;
}
catch {
return false;
}
}
async checkDuplicateByHash(contentHash, tenantId, agentId) {
const connection = this.getConnection();
const filter = {
must: [
{ key: 'content_hash', match: { value: contentHash } },
{ key: 'tenant_id', match: { value: tenantId } },
],
};
if (agentId) {
filter.must.push({ key: 'agent_id', match: { value: agentId } });
}
const searchResult = await connection.scroll(this.collection, {
filter,
limit: 1,
with_payload: false,
with_vector: false,
});
return searchResult.points.length > 0
? searchResult.points[0].id
: null;
}
async updateMemoryAccess(memoryId) {
const connection = this.getConnection();
// Get current memory
const points = await connection.retrieve(this.collection, {
ids: [memoryId],
with_payload: true,
});
if (points.length === 0)
return;
const point = points[0];
const currentAccess = point.payload?.access_count || 0;
// Update access count and last access time
await connection.setPayload(this.collection, {
wait: false,
points: [memoryId],
payload: {
access_count: currentAccess + 1,
last_accessed_at: new Date().toISOString(),
},
});
}
async getMemoryStats(tenantId) {
// Implement efficient stats calculation
const total = await this.count(tenantId);
// For now return basic stats - implement more detailed stats as needed
return {
totalMemories: total,
averageAccessCount: 0,
oldestMemory: null,
newestMemory: null,
duplicateCount: 0,
};
}
/**
* Optimize collection for better performance
*/
async optimizeCollection() {
try {
// Force optimization
await this.client.updateCollection(this.collection, {
optimizers_config: {
deleted_threshold: 0.1, // More aggressive cleanup
vacuum_min_vector_number: 100,
},
});
logger.info('Collection optimization completed');
}
catch (error) {
logger.error('Collection optimization failed:', error);
}
}
// Helper methods
buildFilter(query) {
const filter = { must: [] };
if (query.tenant_id) {
filter.must.push({ key: 'tenant_id', match: { value: query.tenant_id } });
}
if (query.agent_id) {
filter.must.push({ key: 'agent_id', match: { value: query.agent_id } });
}
if (query.type) {
filter.must.push({ key: 'type', match: { value: query.type } });
}
// Extended filter options could be added here if MemoryQuery interface is extended
// For now, we'll use basic filtering
return filter.must.length > 0 ? filter : undefined;
}
chunkArray(array, chunkSize) {
const chunks = [];
for (let i = 0; i < array.length; i += chunkSize) {
chunks.push(array.slice(i, i + chunkSize));
}
return chunks;
}
async retryOperation(operation) {
let lastError;
for (let attempt = 1; attempt <= this.config.maxRetries; attempt++) {
try {
return await operation();
}
catch (error) {
lastError = error;
if (attempt < this.config.maxRetries) {
const delay = Math.min(1000 * Math.pow(2, attempt - 1), 5000); // Exponential backoff
await new Promise(resolve => setTimeout(resolve, delay));
logger.warn(`Retrying operation (attempt ${attempt + 1}/${this.config.maxRetries}):`, error);
}
}
}
throw lastError;
}
async close() {
// Connection cleanup if needed
this.connectionPool.length = 0;
}
}