@n2flowjs/nbase
Version:
Neural Vector Database for efficient similarity search
1,002 lines • 47.9 kB
JavaScript
"use strict";
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.Database = void 0;
const events_1 = require("events");
const fs_1 = require("fs");
const lru_cache_1 = require("lru-cache");
const path_1 = __importDefault(require("path")); // Import path for directory handling
const config_1 = __importDefault(require("../config")); // Import the default config object
const unified_search_1 = require("../search/unified_search");
const profiling_1 = require("../utils/profiling");
const vector_monitoring_1 = require("../utils/vector_monitoring");
const partitioned_vector_db_1 = require("../vector/partitioned_vector_db");
/**
* High-level Database class using PartitionedVectorDB for scalable vector storage and search.
* Provides unified search, caching, auto-save, monitoring, and a simplified API.
*
* NOTE: File system backup of the partitions directory is recommended as an external process.
*/
/**
* The `Database` class provides a high-level interface for managing a partitioned vector database
* with support for vector addition, deletion, metadata management, nearest neighbor search,
* and background tasks such as auto-saving and monitoring. It integrates with `PartitionedVectorDB`
* and `UnifiedSearch` for efficient vector storage and search operations.
*
* ### Features:
* - Asynchronous initialization with event-based readiness notifications.
* - Partitioned vector storage with configurable options for clustering, indexing, and persistence.
* - Unified search engine for nearest neighbor queries with caching and concurrency control.
* - Background tasks for auto-saving and monitoring database performance.
* - Event-driven architecture for tracking database operations and errors.
*
* ### Usage:
* - Instantiate the class with `DatabaseOptions`.
* - Await the `ready()` promise or listen for the `'ready'` event before performing operations.
* - Use public methods like `addVector`, `findNearest`, `deleteVector`, etc., to interact with the database.
* - Call `close()` to gracefully shut down the database and release resources.
*
* ### Events:
* - `initializing`: Emitted when the database starts initializing.
* - `ready`: Emitted when the database is fully initialized and ready for operations.
* - `error`: Emitted when an error occurs during operations or background tasks.
* - `close`: Emitted when the database is closed.
* - Various search and indexing-related events such as `search:start`, `search:complete`, `index:progress`, etc.
*
* ### Example:
* ```typescript
* const db = new Database({
* vectorSize: 128,
* cacheSize: 1000,
* partitioning: { partitionsDir: './data/partitions' },
* indexing: { buildOnStart: true },
* });
*
* db.on('ready', async () => {
* console.log('Database is ready!');
* await db.addVector('id1', [0.1, 0.2, 0.3], { label: 'example' });
* const results = await db.findNearest([0.1, 0.2, 0.3], 5);
* console.log('Search results:', results);
* });
*
* db.on('error', (err) => {
* console.error('Database error:', err);
* });
* ```
*
* ### Notes:
* - Ensure proper error handling for asynchronous operations.
* - Use the `getStats()` method to retrieve detailed information about the database state and performance.
* - The database must be closed using `close()` to ensure all resources are released and state is saved.
*/
class Database extends events_1.EventEmitter {
/**
* Creates a new Database instance. Initialization is asynchronous.
* Listen for the 'ready' event or await the ready() promise before use.
* @param options Configuration options.
*/
constructor(options) {
super();
this.timer = (0, profiling_1.createTimer)(); // General purpose timer
this.isClosed = false;
this.isReady = false;
// Background Tasks
this.autoSaveTimer = null;
// Concurrency Control
this.activeSearchPromises = new Set();
// Performance Metrics (Simplified - detailed metrics come from components)
this.metrics = {
queries: 0,
totalSearchTime: 0,
avgSearchTime: 0,
cacheHits: 0,
cacheMisses: 0,
queryTimes: [], // Consider capping size
avgAddTime: 0, // Currently not tracked here, relies on events if needed
totalAddTime: 0, // Currently not tracked here
};
// --- Merge Options with Defaults ---
this.options = {
vectorSize: options.vectorSize ?? config_1.default.defaults.vectorSize,
clustering: { ...config_1.default.clustering, ...options.clustering },
partitioning: {
...config_1.default.partitioning,
...options.partitioning,
},
indexing: { ...config_1.default.indexing, ...options.indexing },
cacheSize: options.cacheSize ?? config_1.default.defaults.cacheSize,
maxConcurrentSearches: options.maxConcurrentSearches ?? config_1.default.defaults.maxConcurrentSearches,
persistence: { ...config_1.default.persistence, ...options.persistence },
backup: { ...config_1.default.backup, ...options.backup }, // Keep for config, but logic removed
monitoring: { ...config_1.default.monitoring, ...options.monitoring },
};
// --- Initialize Caching and Monitoring ---
this.searchCache = new lru_cache_1.LRUCache({
max: this.options.cacheSize,
});
this.monitor = this.options.monitoring.enable
? new vector_monitoring_1.VectorDBMonitor({
// Pass relevant monitor options from this.options.monitoring
interval: this.options.monitoring.intervalMs,
logToConsole: this.options.monitoring.logToConsole,
enableDatabaseMetrics: this.options.monitoring.enableDatabaseMetrics,
enableSystemMetrics: this.options.monitoring.enableSystemMetrics,
// Add other specific monitor options if needed
})
: null;
// --- Start Asynchronous Initialization ---
this.initializationPromise = this._initialize();
}
/**
* Performs the main asynchronous initialization sequence.
*/
async _initialize() {
try {
console.log('[Database] initialization started...');
this.emit('initializing', undefined);
// 1. Ensure Base Directory Exists
const baseDir = path_1.default.join(process.cwd(), this.options.persistence.dbPath || 'database'); // Default path
this._ensureDirectoryExists(baseDir, 'partitions');
// 2. Initialize PartitionedVectorDB
console.log('[Database] Initializing PartitionedVectorDB...');
this.vectorDB = new partitioned_vector_db_1.PartitionedVectorDB({
// Pass necessary options from this.options
partitionsDir: path_1.default.join(baseDir, "partitions"),
partitionCapacity: this.options.partitioning.partitionCapacity,
maxActivePartitions: this.options.partitioning.maxActivePartitions,
autoCreatePartitions: this.options.partitioning.autoCreatePartitions,
autoLoadPartitions: this.options.partitioning.autoLoadPartitions,
autoLoadHNSW: this.options.indexing.autoLoad, // Link autoLoadHNSW
vectorSize: this.options.vectorSize,
useCompression: this.options.clustering.useCompression,
clusterOptions: this.options.clustering,
});
// 3. Setup Event Listeners (Crucial to do this before waiting/loading)
this._setupEventListeners();
// 4. Wait for PartitionedVectorDB to be ready
console.log('[Database] Waiting for PartitionedVectorDB to become ready...');
// Check if IsReady method is available
if (this.vectorDB.IsReady()) {
// Proceed if the database is ready
console.log('[Database] PartitionedVectorDB is ready.');
}
else {
// Fallback: Wait on its internal initialization promise if accessible (less ideal)
await this.vectorDB.initializationPromise; // Adjust if name differs
}
console.log('[Database] PartitionedVectorDB is ready.');
// 5. Initialize UnifiedSearch (after vectorDB is ready)
console.log('[Database] Initializing UnifiedSearch...');
this.unifiedSearch = new unified_search_1.UnifiedSearch(this.vectorDB, {
// Pass any UnifiedSearch specific config if needed
debug: this.options.monitoring.logToConsole,
});
this._setupUnifiedSearchListeners(); // Setup listeners specific to unified search
// 6. Optional: Load/Build Indices on Start
if (this.options.indexing.buildOnStart) {
console.log('[Database] Loading/Building indices based on buildOnStart option...');
await this._handleInitialIndexing();
}
// 7. Start Monitoring
this.monitor?.start();
// 8. Setup Auto-Save (using vectorDB's save method)
if (!this.isClosed && this.options.persistence.saveIntervalMs && this.options.persistence.saveIntervalMs > 0) {
this._setupAutoSave();
}
// --- Initialization Complete ---
this.isReady = true;
console.log('[Database] initialization successful. Ready for operations.');
try {
const initialStats = await this.getStats(); // Get initial stats
console.log(`[Database] Initial State: Partitions Configured: ${initialStats.database?.partitions?.totalConfigured}, Loaded: ${initialStats.database?.partitions?.loadedCount}`);
}
catch (statsError) {
console.warn('[Database] Could not retrieve initial stats after ready:', statsError);
}
this.emit('ready', undefined);
}
catch (error) {
console.error('[Database] FATAL: Database initialization failed:', error);
this.isClosed = true; // Mark as closed on fatal error
this.monitor?.stop();
this.emit('error', {
message: `[Database] Database initialization failed: ${error.message}`,
error: error,
context: 'initialize',
});
// Propagate the error to reject the initializationPromise
throw error;
}
}
// --- Private Helper Methods ---
/** Ensures a directory exists. */
_ensureDirectoryExists(dirPath, purpose) {
try {
if (!(0, fs_1.existsSync)(dirPath)) {
(0, fs_1.mkdirSync)(dirPath, { recursive: true });
console.log(`[Database] Created ${purpose} directory: ${dirPath}`);
}
}
catch (error) {
throw new Error(`[Database] Failed to create ${purpose} directory at ${dirPath}: ${error.message}`);
}
}
/** Handles loading and/or building indices during startup based on options. */
async _handleInitialIndexing() {
try {
if (this.options.indexing.autoLoad) {
console.log('[Database] Attempting to load existing HNSW indices...');
await this.vectorDB.loadHNSWIndices(); // Load all available
console.log('Finished loading existing HNSW indices.');
}
// Decide if building is needed (e.g., if autoLoad failed or forced build)
// This logic might need refinement based on specific needs.
// For now, buildOnStart implies potentially building *after* loading.
console.log('[Database] Checking if initial index build is required...');
// Simple approach: just build if buildOnStart is true.
// More complex: Check if loaded indices cover all partitions, etc.
const progressCallback = (progress) => {
console.log(`[Database] Progress: ${progress}%`);
this.emit('index:progress', {
message: 'Building initial indices...',
progress: progress, // Placeholder for actual progress
});
};
await this.buildIndexes(undefined, {
force: true, // Skip initial check
dimensionAware: true,
progressCallback,
}); // Build for all loaded partitions if needed
console.log('[Database] Initial index build process completed (if applicable).');
}
catch (err) {
const message = err instanceof Error ? err.message : 'Unknown indexing error';
console.warn(`[Database] Warning during initial index handling: ${message}`);
this.emit('warn', {
message: `Initial index handling issue: ${message}`,
context: 'handleInitialIndexing',
error: err,
});
// Decide if this should be a fatal error
}
}
/** Sets up internal event listeners for PartitionedVectorDB events. */
_setupEventListeners() {
if (!this.vectorDB || typeof this.vectorDB.on !== 'function')
return;
const dbEmitter = this.vectorDB;
// Generic handler to forward events, clear cache, and update monitor
const handleDbEvent = (eventName, data) => {
if (this.isClosed)
return;
// Clear search cache on any data modification or structural change
const shouldClearCache = [
'vector:add',
'vector:delete',
'vectors:bulkAdd', // Assuming bulkAdd event exists
'partition:created',
'partition:loaded', // Maybe not necessary, but safer
'partition:unloaded', // Definitely clear if partition goes away
].includes(eventName);
if (shouldClearCache) {
this.searchCache.clear();
}
// Forward the event
this.emit(eventName, data);
// Update monitor DB metrics on relevant changes
const shouldUpdateMetrics = ['vector:add', 'vector:delete', 'vectors:bulkAdd', 'partition:loaded', 'partition:unloaded'].includes(eventName);
if (shouldUpdateMetrics) {
this._updateMonitorDbMetrics(); // Async update
}
// Record specific events/errors in monitor
if (this.monitor) {
if (['vector:add', 'vector:delete', 'partition:created'].includes(eventName)) {
this.monitor.recordEvent(eventName, data);
}
if (eventName === 'partition:error') {
this.monitor.recordError('partition_error', data);
}
}
};
// Register listeners
dbEmitter.on('vector:add', (data) => handleDbEvent('vector:add', data));
dbEmitter.on('vector:delete', (data) => handleDbEvent('vector:delete', data));
// Assuming PartitionedVectorDB might emit bulk add completion:
dbEmitter.on('vectors:bulkAdd', (data) => handleDbEvent('vectors:bulkAdd', data));
dbEmitter.on('partition:created', (data) => handleDbEvent('partition:created', data));
dbEmitter.on('partition:loaded', (data) => handleDbEvent('partition:loaded', data));
dbEmitter.on('partition:unloaded', (data) => handleDbEvent('partition:unloaded', data));
dbEmitter.on('partition:activated', (data) => handleDbEvent('partition:activated', data));
dbEmitter.on('partition:error', (data) => handleDbEvent('partition:error', data));
dbEmitter.on('config:saved', (data) => handleDbEvent('config:saved', data)); // Forward config save events
dbEmitter.on('db:saved', (data) => handleDbEvent('db:saved', data)); // Forward full save events
dbEmitter.on('db:loaded', (data) => handleDbEvent('db:loaded', data)); // Forward load events
// Forward indexing events
dbEmitter.on('partition:indexProgress', (data) => this.emit('index:progress', data));
dbEmitter.on('partition:indexed', (data) => this.emit('partition:indexed', data));
dbEmitter.on('partition:indexLoaded', (data) => this.emit('partition:indexLoaded', data));
dbEmitter.on('partition:indexSaved', (data) => this.emit('partition:indexSaved', data));
// Handle close event from underlying DB
dbEmitter.on('db:close', () => {
console.log('Underlying PartitionedVectorDB reported close. Closing high-level DB.');
this.close().catch((err) => console.error('Error during close triggered by underlying DB:', err));
});
}
/** Sets up listeners for UnifiedSearch events. */
_setupUnifiedSearchListeners() {
if (!this.unifiedSearch)
return;
this.unifiedSearch.on('search:start', (data) => {
// Potentially useful for detailed logging or request tracking
this.emit('search:start', data);
});
this.unifiedSearch.on('search:complete', (data) => {
if (this.monitor) {
this.monitor.recordSearch({
duration: data.totalTime,
method: data.dbMethodUsed,
results: data.resultCount,
cacheUsed: data.cacheUsed, // Pass cache info if available
});
// Update aggregate metrics
this.metrics.totalSearchTime += data.totalTime;
this.metrics.queries++;
this.metrics.avgSearchTime = this.metrics.totalSearchTime / this.metrics.queries;
this.metrics.queryTimes.push(data.totalTime);
if (this.metrics.queryTimes.length > 100)
this.metrics.queryTimes.shift(); // Keep last 100 times
}
this.emit('search:complete', data); // Forward event
});
this.unifiedSearch.on('search:error', (data) => {
if (this.monitor) {
this.monitor.recordError('search_error', data);
}
this.emit('search:error', data); // Forward error
});
}
/** Pushes current DB stats to the monitor asynchronously. */
async _updateMonitorDbMetrics() {
if (!this.monitor || !this.options.monitoring.enableDatabaseMetrics)
return;
// Prevent updates if DB isn't ready or is closing
if (!this.vectorDB || !this.isReady || this.isClosed)
return;
try {
// Use peek to avoid potentially high cost of getStats if called frequently
const stats = await this.vectorDB.getStats();
this.monitor.updateDatabaseMetrics({
vectorCount: stats.vectors.totalInMemory,
memoryUsageBytes: stats.memory.estimatedUsageBytes,
partitionCount: stats.partitions.loadedCount,
// Add more metrics from PartitionedDBStats if needed
});
}
catch (error) {
console.warn('Failed to update monitor DB metrics:', error);
}
}
/** Sets up the auto-save interval timer. */
_setupAutoSave() {
if (this.autoSaveTimer)
clearInterval(this.autoSaveTimer);
console.log(`Setting up auto-save via vectorDB.save() every ${this.options.persistence.saveIntervalMs}ms`);
this.autoSaveTimer = setInterval(async () => {
if (this.isClosed || !this.isReady)
return;
console.log('Auto-save triggered...');
try {
// Delegate saving entirely to PartitionedVectorDB
await this.vectorDB.save();
console.log('Auto-save completed successfully.');
}
catch (error) {
console.error('Auto-save failed:', error);
this.emit('error', {
message: `Auto-save failed: ${error.message}`,
error: error,
context: 'AutoSave',
});
}
}, this.options.persistence.saveIntervalMs);
this.autoSaveTimer.unref(); // Allow process to exit if this is the only timer
}
/** Generates a cache key for search results. */
_getCacheKey(query, k, options) {
const vectorHash = this._hashVector(query);
// Include all options that influence the search result
const optionsKey = JSON.stringify({
k,
filter: options.filter ? 'present' : 'absent', // Simplify filter representation
distanceMetric: options.distanceMetric ?? 'default', // Use default marker
efSearch: options.efSearch,
useHNSW: options.useHNSW,
rerank: options.rerank,
rerankingMethod: options.rerankingMethod,
partitionIds: options.partitionIds?.sort(), // Sort for consistency
searchMethod: options.searchMethod,
});
return `${vectorHash}::${optionsKey}`;
}
/** Simple, fast vector hash (not cryptographically secure). */
_hashVector(vector) {
let hash = 0;
// Sample fewer points for potentially faster hashing
const samples = Math.min(vector.length, 16);
const step = Math.max(1, Math.floor(vector.length / samples));
for (let i = 0; i < vector.length; i += step) {
// Combine value and index for slightly better distribution
const val = Math.round((vector[i] || 0) * 1000); // Use 1000x scaling
hash = (hash << 5) - hash + val + i;
hash |= 0; // Convert to 32bit integer
}
return hash.toString(36); // Use base 36 for shorter string
}
// --- Public API Methods ---
/** Checks if the database is initialized and ready for operations. */
IsReady() {
console.log('[Database] Checking if database is ready...');
console.log('[Database] isReady:', this.isReady);
console.log('[Database] isClosed:', this.isClosed);
return this.isReady && !this.isClosed;
}
/** Throws an error if the database is not ready or closed. */
async _assertReady(operation = 'Operation') {
console.log('[Database] Asserting readiness for operation:', operation);
if (this.isClosed)
throw new Error(`Database is closed. Cannot perform ${operation}.`);
if (!this.isReady) {
console.log(`[Database] Waiting for database readiness before performing ${operation}...`);
await this.initializationPromise;
// Re-check after waiting
if (this.isClosed)
throw new Error(`Database was closed during initialization wait. Cannot perform ${operation}.`);
if (!this.isReady)
throw new Error(`Database initialization failed. Cannot perform ${operation}.`);
}
}
/**
* Adds a vector to the appropriate partition.
* @returns An object containing the partitionId and the vectorId.
*/
async addVector(id, vector, metadata) {
console.log(`Adding vector with ID ${id ?? 'auto'}...`);
await this._assertReady('addVector');
this.timer.start('addVector');
try {
const result = await this.vectorDB.addVector(id, vector, metadata);
const duration = this.timer.stop('addVector');
this.metrics.totalAddTime += duration.total;
console.log(`Vector added successfully with ID ${result.vectorId}.`);
return result;
}
catch (error) {
this.timer.stop('addVector');
console.error(`Error in addVector (ID: ${id ?? 'auto'}):`, error);
this.emit('error', {
message: `Add vector failed: ${error.message}`,
error: error,
context: 'addVector',
});
throw error; // Re-throw original error
}
}
/**
* Bulk adds vectors, handling partitioning automatically.
*/
async bulkAdd(vectors) {
await this._assertReady('bulkAdd');
this.timer.start('bulkAdd');
try {
const result = await this.vectorDB.bulkAdd(vectors);
this.timer.stop('bulkAdd');
return result;
}
catch (error) {
this.timer.stop('bulkAdd');
console.error(`Error during bulk add:`, error);
this.emit('error', {
message: `Bulk add failed: ${error.message}`,
error: error,
context: 'bulkAdd',
});
throw error;
}
}
/** Deletes a vector from the partition it resides in. */
async deleteVector(id) {
await this._assertReady('deleteVector');
this.timer.start('deleteVector');
try {
const deleted = await this.vectorDB.deleteVector(id);
this.timer.stop('deleteVector');
return deleted;
}
catch (error) {
this.timer.stop('deleteVector');
console.error(`Error deleting vector ${id}:`, error);
this.emit('error', {
message: `Delete vector failed for ${id}: ${error.message}`,
error: error,
context: 'deleteVector',
});
throw error;
}
}
/** Checks if a vector exists in any loaded partition. */
async hasVector(id) {
await this._assertReady('hasVector');
const result = await this.vectorDB.getVector(id);
return result !== null;
}
/** Retrieves a vector by searching loaded partitions. */
async getVector(id) {
await this._assertReady('getVector');
// Directly delegate, no extra timing needed unless specific performance analysis required
return this.vectorDB.getVector(id);
}
/** Adds or updates metadata for a vector. Requires finding the vector first. */
async addMetadata(id, metadata) {
await this._assertReady('addMetadata');
this.timer.start('addMetadata');
try {
// Find the partition containing the vector
const vectorInfo = await this.vectorDB.getVector(id);
if (!vectorInfo) {
console.warn(`addMetadata: Vector ${id} not found.`);
this.timer.stop('addMetadata');
return false;
}
// Get the specific partition DB instance
const partition = await this.vectorDB.getPartition(vectorInfo.partitionId);
if (!partition) {
console.warn(`addMetadata: Partition ${vectorInfo.partitionId} for vector ${id} could not be loaded.`);
this.timer.stop('addMetadata');
return false;
}
// Add metadata using the partition's method
partition.addMetadata(id, metadata); // Assuming this is synchronous or handled internally
this.searchCache.clear(); // Clear cache as metadata might affect filtering
this.timer.stop('addMetadata');
return true;
}
catch (error) {
this.timer.stop('addMetadata');
console.error(`Error adding/updating metadata for vector ${id}:`, error);
this.emit('error', {
message: `Add/Update metadata failed for ${id}: ${error.message}`,
error: error,
context: 'addMetadata',
});
throw error;
}
}
/** Updates metadata using a callback or merging. Requires finding the vector first. */
async updateMetadata(id, metadataUpdate) {
await this._assertReady('updateMetadata');
this.timer.start('updateMetadata');
try {
const vectorInfo = await this.vectorDB.getVector(id);
if (!vectorInfo) {
console.warn(`updateMetadata: Vector ${id} not found.`);
this.timer.stop('updateMetadata');
return false;
}
const partition = await this.vectorDB.getPartition(vectorInfo.partitionId);
if (!partition) {
console.warn(`updateMetadata: Partition ${vectorInfo.partitionId} for vector ${id} could not be loaded.`);
this.timer.stop('updateMetadata');
return false;
}
partition.updateMetadata(id, metadataUpdate);
this.searchCache.clear();
this.timer.stop('updateMetadata');
return true;
}
catch (error) {
this.timer.stop('updateMetadata');
console.error(`Error updating metadata for vector ${id}:`, error);
this.emit('error', {
message: `Update metadata failed for ${id}: ${error.message}`,
error: error,
context: 'updateMetadata',
});
throw error;
}
}
/** Retrieves metadata by searching loaded partitions. */
async getMetadata(id) {
await this._assertReady('getMetadata');
return this.vectorDB.getMetadata(id);
}
/**
* Searches for metadata entries that match specific criteria across all loaded partitions.
*
* @param criteria Can be:
* - A string: field name to check for existence
* - An array of strings: multiple field names to check for existence
* - An object: key-value pairs where each key must exist and match the specified value
* @param values Optional value(s) to match against the field(s) when using string/array input
* @returns Array of objects with partitionId, vectorId, and metadata
*
* @example
* ```typescript
* // Get all entries with 'category' field
* const withCategory = await db.getMetadataWithField('category');
*
* // Get entries where type is 'article'
* const articles = await db.getMetadataWithField('type', 'article');
*
* // Get entries with both 'author' and 'publishDate' fields
* const authorsWithDates = await db.getMetadataWithField(['author', 'publishDate']);
*
* // Get entries where type='book' AND published=true (using arrays)
* const publishedBooks = await db.getMetadataWithField(['type', 'published'], ['book', true]);
*
* // Get entries where type='book' AND published=true (using object)
* const publishedBooks = await db.getMetadataWithField({ type: 'book', published: true });
* ```
*/
async getMetadataWithField(criteria, values, option) {
await this._assertReady('getMetadataWithField');
this.timer.start('getMetadataWithField');
try {
const results = await this.vectorDB.getMetadataWithFieldAcrossPartitions(criteria, values, option);
this.timer.stop('getMetadataWithField');
return results;
}
catch (error) {
this.timer.stop('getMetadataWithField');
console.error(`Error in getMetadataWithField:`, error);
this.emit('error', {
message: `Get metadata with field failed: ${error.message}`,
error: error,
context: 'getMetadataWithField',
});
throw error;
}
}
/**
* Performs a nearest neighbor search using the UnifiedSearch engine.
* Handles caching and concurrency limits.
*/
async findNearest(query, k, options = {}) {
console.log(`[Database] Searching for nearest vectors to query...`);
await this._assertReady('findNearest');
const operationTimer = (0, profiling_1.createTimer)();
operationTimer.start('findNearest_total');
const effectiveK = k ?? options.k ?? 10;
const searchOptions = { ...options, k: effectiveK };
// --- Cache Check ---
let cacheKey = null;
if (!searchOptions.skipCache) {
cacheKey = this._getCacheKey(query, effectiveK, searchOptions);
const cachedResults = this.searchCache.get(cacheKey);
if (cachedResults) {
this.metrics.cacheHits++;
this.monitor?.recordCacheHit();
operationTimer.stop('findNearest_total');
// UnifiedSearch event won't fire for cache hit, emit simple event here if needed
this.emit('search:cacheHit', { options: searchOptions, k: effectiveK });
return [...cachedResults]; // Return copy
}
this.metrics.cacheMisses++;
this.monitor?.recordCacheMiss();
}
// --- Concurrency Management ---
if (this.activeSearchPromises.size >= this.options.maxConcurrentSearches) {
this.timer.start('findNearest_wait');
console.log(`Search concurrency limit (${this.options.maxConcurrentSearches}) reached. Waiting...`);
try {
await Promise.race(this.activeSearchPromises); // Wait for any ongoing search to finish
}
catch {
/* Ignore errors from waiting */
}
this.timer.stop('findNearest_wait');
// Recheck limit after waiting (another search might have started)
if (this.activeSearchPromises.size >= this.options.maxConcurrentSearches) {
// If still full after waiting, throw or queue? Let's throw for now.
operationTimer.stop('findNearest_total');
throw new Error(`Search concurrency limit (${this.options.maxConcurrentSearches}) exceeded after wait.`);
}
}
// --- Execute Search via UnifiedSearch ---
const searchPromise = this.unifiedSearch.search(query, searchOptions);
this.activeSearchPromises.add(searchPromise);
try {
const results = await searchPromise;
// UnifiedSearch emits search:complete which updates metrics
// Cache results if cache was checked and not skipped
if (cacheKey && results.length > 0) {
this.searchCache.set(cacheKey, [...results]); // Store copy
}
operationTimer.stop('findNearest_total');
return results;
}
catch (error) {
operationTimer.stop('findNearest_total');
// Error event emitted by UnifiedSearch listener
console.error('Error during findNearest execution:', error);
throw error; // Re-throw
}
finally {
this.activeSearchPromises.delete(searchPromise); // Clean up promise tracking
}
}
/** Alias for findNearest */
async search(query, options = {}) {
return this.findNearest(query, options.k, options);
}
/**
* Saves the current state of the database (delegated to PartitionedVectorDB).
* This includes partition configurations, loaded partition data, and loaded HNSW indices.
*/
async save() {
console.log('[Database] Manual save requested...');
await this._assertReady('save');
console.log('[Database] Manual save requested. Delegating to PartitionedVectorDB...');
this.timer.start('save_database');
try {
await this.vectorDB.save(); // Delegate the comprehensive save
this.timer.stop('save_database');
console.log('Database save completed successfully.');
// db:saved event is emitted by PartitionedVectorDB listener
}
catch (error) {
this.timer.stop('save_database');
console.error('Manual save failed:', error);
this.emit('error', {
message: `Save failed: ${error.message}`,
error: error,
context: 'save',
});
throw error;
}
}
/**
* Builds HNSW indexes. Delegates to PartitionedVectorDB.
*/
async buildIndexes(partitionId, options) {
console.log(`[Database] Building HNSW index for partition ${partitionId ?? 'all loaded'}...`);
if (options?.force !== true)
await this._assertReady('buildIndexes');
console.log(`[Database] Requesting index build for ${partitionId ?? 'all loaded partitions'}...`);
this.timer.start('buildIndexes');
try {
const buildOptions = {
...this.options.indexing.hnsw, // Global defaults from Database options
...options, // Specific options for this call
};
await this.vectorDB.buildIndexHNSW(partitionId, buildOptions);
const duration = this.timer.stop('buildIndexes');
console.log(`Index build process finished in ${duration.total.toFixed(2)}ms for ${partitionId ?? 'relevant partitions'}.`);
// index:complete event emitted by listener
}
catch (error) {
const duration = this.timer.stop('buildIndexes');
console.error(`Index build failed after ${duration.total.toFixed(2)}ms for ${partitionId ?? 'partitions'}:`, error);
this.emit('error', {
message: `Index build failed: ${error.message}`,
error: error,
context: 'buildIndexes',
});
// index:error emitted by listener
throw error;
}
}
/** Closes the database, saves state, stops background tasks, and releases resources. */
async close() {
if (this.isClosed) {
console.log('Database already closed.');
return;
}
console.log('Closing database...');
this.isClosed = true; // Mark as closing immediately
this.isReady = false;
// Stop background tasks
if (this.autoSaveTimer)
clearInterval(this.autoSaveTimer);
this.autoSaveTimer = null;
// Stop monitoring
this.monitor?.stop();
// Wait for active searches? (Optional, might delay closing)
// console.log(`Waiting for ${this.activeSearchPromises.size} active searches to complete...`);
// await Promise.allSettled(this.activeSearchPromises);
this.activeSearchPromises.clear();
// Close UnifiedSearch
try {
this.unifiedSearch?.close(); // Assuming UnifiedSearch has a close method
}
catch (e) {
console.error('Error closing UnifiedSearch:', e);
}
// Close PartitionedVectorDB (this should handle saving its state)
if (this.vectorDB && typeof this.vectorDB.close === 'function') {
try {
console.log('Closing PartitionedVectorDB (will trigger final save)...');
await this.vectorDB.close();
}
catch (err) {
console.error('Error closing PartitionedVectorDB:', err.message);
// Continue closing process
}
}
// Clear search cache
this.searchCache.clear();
console.log('Database closed successfully.');
this.emit('close', undefined);
this.removeAllListeners(); // Clean up all listeners on this instance
}
// --- Getters and Utility Methods ---
/** Gets combined statistics about the database state, components, and system. */
async getStats() {
// No readiness check here, return best effort stats even if initializing/closing
let dbStats = null;
let searchStats = null;
if (this.vectorDB && typeof this.vectorDB.getStats === 'function') {
try {
dbStats = await this.vectorDB.getStats();
}
catch (e) {
console.warn('Failed to get PartitionedDB stats:', e);
}
}
if (this.unifiedSearch && typeof this.unifiedSearch.getStats === 'function') {
try {
searchStats = await this.unifiedSearch.getStats();
}
catch (e) {
console.warn('Failed to get UnifiedSearch stats:', e);
}
}
const cacheHitRate = this.metrics.cacheHits + this.metrics.cacheMisses > 0 ? this.metrics.cacheHits / (this.metrics.cacheHits + this.metrics.cacheMisses) : 0;
return {
state: {
isClosed: this.isClosed,
isReady: this.isReady,
status: this.isClosed ? 'closed' : this.isReady ? 'ready' : 'initializing',
},
database: dbStats, // Nullable if failed
search: searchStats?.search ?? null, // Access nested search stats, nullable
searchCache: {
size: this.searchCache.size,
capacity: this.options.cacheSize,
hits: this.metrics.cacheHits,
misses: this.metrics.cacheMisses,
hitRate: parseFloat((cacheHitRate * 100).toFixed(2)),
},
performance: {
queries: this.metrics.queries,
avgSearchTimeMs: this.metrics.queries > 0 ? parseFloat((this.metrics.totalSearchTime / this.metrics.queries).toFixed(2)) : 0,
cacheHitRate: parseFloat((cacheHitRate * 100).toFixed(2)),
concurrentSearches: this.activeSearchPromises.size,
// Include timer stats if needed, e.g., from this.timer.getSummary()
},
system: this.monitor?.getSystemMetrics(),
memoryUsage: process.memoryUsage(),
options: {
vectorSize: this.options.vectorSize || 0,
partitionsDir: this.options.partitioning.partitionsDir,
partitionCapacity: this.options.partitioning.partitionCapacity,
maxActivePartitions: this.options.partitioning.maxActivePartitions,
cacheSize: this.options.cacheSize,
maxConcurrentSearches: this.options.maxConcurrentSearches || 0,
autoSaveIntervalMs: this.options.persistence.saveIntervalMs || 0,
monitoringEnabled: this.options.monitoring.enable || false,
},
};
}
/** Gets the underlying PartitionedVectorDB instance. Use with caution. */
getVectorDB() {
if (!this.isReady && !this.isClosed)
console.warn('Accessing VectorDB instance before Database is fully ready.');
if (this.isClosed)
throw new Error('Cannot access VectorDB: Database is closed.');
return this.vectorDB;
}
/** Gets the UnifiedSearch instance. */
getUnifiedSearch() {
if (!this.isReady && !this.isClosed)
console.warn('Accessing UnifiedSearch instance before Database is fully ready.');
if (this.isClosed)
throw new Error('Cannot access UnifiedSearch: Database is closed.');
return this.unifiedSearch;
}
/** Gets the total count of vectors across all configured partitions. */
async getTotalVectorCount() {
await this._assertReady('getTotalVectorCount');
const stats = await this.vectorDB.getStats();
return stats?.vectors?.totalConfigured ?? 0;
}
/** Gets the count of vectors currently loaded in memory. */
async getInMemoryVectorCount() {
await this._assertReady('getInMemoryVectorCount');
const stats = await this.vectorDB.getStats();
return stats?.vectors?.totalInMemory ?? 0;
}
/** Gets the number of partitions currently loaded in memory. */
async getLoadedPartitionCount() {
await this._assertReady('getLoadedPartitionCount');
const stats = await this.vectorDB.getStats();
return stats?.partitions?.loadedCount ?? 0;
}
/** Gets the IDs of the partitions currently loaded in memory. */
async getLoadedPartitionIds() {
await this._assertReady('getLoadedPartitionIds');
const stats = await this.vectorDB.getStats();
return stats?.partitions?.loadedIds ?? [];
}
/**
* Extract relationships between vectors based on distance threshold across all loaded partitions.
*
* @param threshold - The maximum distance between vectors to consider them related
* @param options - Options including distance metric, partition filtering, and metadata inclusion
* @returns An array of relationships with vectorIds, partitionIds, optional metadata, and distances
*/
async extractRelationships(threshold, options = {}) {
await this._assertReady('extractRelationships');
this.timer.start('extractRelationships');
try {
console.log(`[Database] Extracting relationships with threshold ${threshold}...`);
const relationships = await this.vectorDB.extractRelationships(threshold, options);
const duration = this.timer.stop('extractRelationships');
console.log(`[Database] Extracted ${relationships.length} relationships in ${duration.total.toFixed(2)}ms`);
return relationships;
}
catch (error) {
this.timer.stop('extractRelationships');
console.error(`[Database] Error extracting relationships:`, error);
this.emit('error', {
message: `Extract relationships failed: ${error.message}`,
error,
context: 'extractRelationships',
});
throw error;
}
}
/**
* Extract communities of related vectors based on distance threshold across all loaded partitions.
* A community is a group of vectors where each vector is related to at least one other vector in the group.
*
* @param threshold - The maximum distance between vectors to consider them related
* @param options - Options including distance metric and partition filtering
* @returns An array of communities, where each community is an array of related vector information
*/
async extractCommunities(threshold, options = {}) {
await this._assertReady('extractCommunities');
this.timer.start('extractCommunities');
try {
// Determine which partitions to process
let partitionIds = options.partitionIds;
if (!partitionIds || partitionIds.length === 0) {
const stats = await this.vectorDB.getStats();
partitionIds = stats.partitions.loadedIds;
}
console.log(`[Database] Extracting vector communities across ${partitionIds.length} partitions with threshold ${threshold}...`);
// Delegate to the vectorDB implementation
const communities = await this.vectorDB.extractCommunities(threshold, {
metric: options.metric,
partitionIds,
includeMetadata: options.includeMetadata ?? true,
});
const duration = this.timer.stop('extractCommunities');
console.log(`[Database] Extracted ${communities.length} communities with ${communities.reduce((sum, c) => sum + c.length, 0)} total vectors in ${duration.total.toFixed(2)}ms`);
return communities;
}
catch (error) {
this.timer.stop('extractCommunities');
console.error(`[Database] Error extracting communities:`, error);
this.emit('error', {
message: `Extract communities failed: ${error.message}`,
error,
context: 'extractCommunities',
});
throw error;
}
}
}
exports.Database = Database;
//# sourceMappingURL=database.js.map