@n2flowjs/nbase
Version:
Neural Vector Database for efficient similarity search
369 lines • 17.3 kB
JavaScript
;
// --- START OF FILE unified_search.ts ---
Object.defineProperty(exports, "__esModule", { value: true });
exports.UnifiedSearch = void 0;
const events_1 = require("events");
const reranking_1 = require("./reranking");
const profiling_1 = require("../utils/profiling");
/**
* UnifiedSearch provides a consistent search interface, now leveraging PartitionedVectorDB
* for scalability with large datasets, using refined type definitions.
*/
/**
* A unified search interface that provides search capabilities across partitioned vector databases.
*
* @class UnifiedSearch
* @extends {EventEmitter}
* @description
* UnifiedSearch wraps a partitioned vector database to provide a unified search API
* with advanced features like search method selection (HNSW/clustered), reranking,
* metadata fetching, and performance tracking.
*
* The class handles:
* - Vector similarity search using partitioned vector databases
* - Automatic method selection between HNSW and clustered search
* - Optional result reranking for diversity or other criteria
* - Metadata fetching and inclusion in results
* - Performance metrics and statistics
*
* @fires UnifiedSearch#search:complete - Emitted when a search completes successfully
* @fires UnifiedSearch#search:error - Emitted when a search encounters an error
* @fires UnifiedSearch#search:closed - Emitted when the search engine is closed
*
* @example
* ```typescript
* // Create a UnifiedSearch instance with a partitioned vector database
* const search = new UnifiedSearch(vectorDb, { debug: true });
*
* // Perform a search with unified options
* const results = await search.search(queryVector, {
* k: 20,
* rerank: true,
* rerankingMethod: 'diversity',
* includeMetadata: true
* });
* ```
*/
class UnifiedSearch extends events_1.EventEmitter {
constructor(db, // Nhận instance DB đã được cấu hình
options = {}) {
super();
this.reranker = null;
this.debug = false;
this.db = db;
this.debug = options.debug || false;
this.timer = (0, profiling_1.createTimer)();
// Initialize reranker
this.reranker = new reranking_1.SearchReranker();
// Initialize search stats according to the new structure
this.searchStats = {
calls: 0,
totalTime: 0,
avgTime: 0,
methodCounts: {
'partitioned-hnsw': 0,
'partitioned-clustered': 0,
},
lastSearchTime: 0,
errors: 0,
};
// (Optional) Forward relevant events from the PartitionedVectorDB instance
// Example: if the db emits 'partition:loaded' or 'partition:error', we can forward them
// if (this.db instanceof EventEmitter) {
// this.db.on('partition:loaded', (data: PartitionedDBEventData['partition:loaded']) => this.emit('partition:loaded', data));
// this.db.on('partition:error', (data: PartitionedDBEventData['partition:error']) => this.emit('partition:error', data));
// // ... forward other necessary events
// }
}
// Helper function in UnifiedSearch
async _getVectorsForResults(ids) {
const vectorsMap = new Map();
if (ids.length === 0 || typeof this.db.getVector !== 'function') {
return vectorsMap;
}
// Fetch vectors concurrently
const promises = ids.map(async (id) => {
try {
const result = await this.db.getVector(id); // Assumes getVector returns { partitionId, vector } | null
if (result?.vector) {
vectorsMap.set(id, result.vector);
}
else {
if (this.debug)
console.warn(`Vector not found for ID ${id} during rerank fetch.`);
}
}
catch (error) {
if (this.debug)
console.error(`Failed to get vector for ID ${id}:`, error);
}
});
await Promise.all(promises);
return vectorsMap;
}
/**
* Search for nearest neighbors using PartitionedVectorDB with unified options.
*/
async search(query,
// Sử dụng UnifiedSearchOptions đã được tối ưu
options = {}) {
const operationTimer = this.timer; // Use the class-level timer
operationTimer.start('unified_search_total');
// Destructure options with defaults, separating base, execution, and unified options
const {
// BaseSearchOptions
k = 10, // TODO: Consider getting default K from DB/config if possible
filter, includeMetadata = false, distanceMetric, // Can override DB's default metric for this query
// SearchExecutionOptions
partitionIds, efSearch, // For HNSW search
// UnifiedSearchOptions specific
useHNSW = true, // Default preference for HNSW
rerank = false, rerankingMethod = 'diversity', searchTimeoutMs, // Optional timeout for the search operation
} = options;
if (this.debug) {
console.log('UnifiedSearch options received:', options);
}
let results = [];
let methodUsed = 'unknown';
const searchStartTime = Date.now();
try {
// --- 1. Database Search ---
operationTimer.start('db_search');
const dbSearchOptions = {
k,
filter,
includeMetadata: false, // Don't include metadata yet, fetch later if needed
distanceMetric,
partitionIds,
efSearch,
};
// Decide search method (HNSW preferred if enabled and available)
let searchPromise;
const canUseHNSW = useHNSW && typeof this.db.findNearestHNSW === 'function';
if (canUseHNSW) {
methodUsed = 'partitioned-hnsw';
if (this.debug)
console.log(`Using ${methodUsed} search with efSearch=${efSearch}...`);
// Pass only relevant options for HNSW
const hnswOptions = {
...dbSearchOptions,
};
searchPromise = this.db.findNearestHNSW(query, k, hnswOptions);
}
else if (typeof this.db.findNearest === 'function') {
methodUsed = 'partitioned-clustered';
if (this.debug)
console.log(`Using ${methodUsed} search..`);
// Pass only relevant options for Clustered/findNearest
const clusteredOptions = {
...dbSearchOptions,
};
delete clusteredOptions.efSearch; // efSearch is not for clustered
searchPromise = this.db.findNearest(query, k, clusteredOptions);
}
else {
throw new Error('No suitable search method (findNearestHNSW or findNearest) available in the database.');
}
// TODO: Implement timeout if searchTimeoutMs is provided
// searchPromise = await Promise.race([
// searchPromise,
// new Promise((_, reject) => setTimeout(() => reject(new Error('Search timed out')), searchTimeoutMs))
// ]);
results = await searchPromise;
const dbSearchTime = operationTimer.stop('db_search').total ?? 0; // Get duration
if (this.debug)
console.log(`${methodUsed} search completed in ${dbSearchTime}ms, found ${results.length} raw results.`);
// --- 2. Reranking (Optional) ---
let rerankTime = 0;
let finalResults = results; // Results after potential reranking
// Fetch metadata *before* reranking only if needed for weighted rerank or final output
const needMetadataForRerankOrOutput = includeMetadata || (rerank && rerankingMethod === 'weighted');
let metadataMap;
if (rerank && this.reranker && results.length > 1) {
operationTimer.start('fetch_vectors_for_rerank');
if (this.debug)
console.log(`Fetching ${results.length} vectors for diversity reranking...`);
// Fetch vectors corresponding to the initial results
const vectorsMap = await this._getVectorsForResults(results.map((r) => r.id));
if (this.debug)
console.log(`Fetched ${vectorsMap.size} vectors.`);
operationTimer.stop('fetch_vectors_for_rerank');
operationTimer.start('rerank');
if (needMetadataForRerankOrOutput) {
if (this.debug)
console.log('Fetching metadata for reranking/output...');
metadataMap = await this._getMetadataForResults(results.map((r) => r.id));
if (this.debug)
console.log(`Fetched metadata for ${metadataMap.size} IDs.`);
}
const rerankOptions = {
method: rerankingMethod,
k: k,
queryVector: query, // Pass the original query vector
vectorsMap: vectorsMap, // Pass the fetched vectors
lambda: options.rerankLambda ?? 0.7, // Get lambda from UnifiedSearchOptions or default
distanceMetric: distanceMetric ?? 'euclidean', // Use the query's distance metric
// metadataMap: metadataMap, // If weighted rerank also considered
};
finalResults = this.reranker.rerank(results, rerankOptions);
rerankTime = operationTimer.stop('rerank').total ?? 0;
if (this.debug)
console.log(`Reranking completed in ${rerankTime}ms. Results after rerank: ${finalResults.length}`);
}
else {
// If not reranking, ensure results are capped at k
finalResults = results.slice(0, k);
}
// --- 3. Add Metadata (if requested and not already fetched) ---
if (includeMetadata) {
const firstResultNeedsMeta = finalResults.length > 0 && !finalResults[0]?.metadata;
if (firstResultNeedsMeta) {
operationTimer.start('fetch_metadata');
if (this.debug)
console.log('Fetching metadata for final output...');
// Fetch metadata only if it wasn't already fetched for reranking
const finalMetadataMap = metadataMap ?? (await this._getMetadataForResults(finalResults.map((r) => r.id)));
if (this.debug)
console.log(`Fetched metadata for ${finalMetadataMap.size} IDs.`);
for (const result of finalResults) {
const meta = finalMetadataMap.get(result.id);
if (meta) {
result.metadata = meta;
}
}
operationTimer.stop('fetch_metadata');
}
else if (finalResults.length > 0 && finalResults[0]?.metadata) {
if (this.debug)
console.log('Metadata already present in results (likely from reranking fetch).');
}
}
// --- 4. Finalize Stats and Emit Event ---
const totalSearchTime = operationTimer.stop('unified_search_total').total ?? 0;
this.searchStats.calls++;
this.searchStats.methodCounts[methodUsed] = (this.searchStats.methodCounts[methodUsed] || 0) + 1;
this.searchStats.totalTime += totalSearchTime;
this.searchStats.avgTime = this.searchStats.totalTime / this.searchStats.calls;
this.searchStats.lastSearchTime = totalSearchTime;
this.searchStats.lastSearchTimestamp = new Date();
this.emit('search:complete', {
method: methodUsed,
searchOnlyTime: dbSearchTime,
rerankTime,
totalTime: totalSearchTime,
resultCount: finalResults.length,
kRequested: k,
optionsUsed: options, // Include original options for context
});
if (this.debug) {
console.log(`UnifiedSearch completed in ${totalSearchTime}ms (DB: ${dbSearchTime}ms, Rerank: ${rerankTime}ms). Method: ${methodUsed}. Returning ${finalResults.length} results.`);
}
return finalResults;
}
catch (error) {
const totalSearchTimeOnError = operationTimer.stop('unified_search_total').total ?? Date.now() - searchStartTime; // Ensure timer stops
const errorMessage = error instanceof Error ? error.message : String(error);
console.error(`UnifiedSearch error after ${totalSearchTimeOnError}ms using method ${methodUsed}:`, errorMessage, error);
this.searchStats.errors++;
this.searchStats.lastError = error instanceof Error ? error : new Error(errorMessage);
this.emit('search:error', {
error: this.searchStats.lastError,
method: methodUsed,
options,
totalTime: totalSearchTimeOnError,
});
// Re-throw the error so the caller can handle it
throw error;
}
}
/**
* Helper to fetch metadata for a list of result IDs.
* Assumes `this.db` has a `getMetadata(id)` method adhering to the interface.
* @private
*/
async _getMetadataForResults(ids) {
const metadataMap = new Map();
if (ids.length === 0 || typeof this.db.getMetadata !== 'function') {
return metadataMap;
}
operationTimer.start('fetch_metadata_batch'); // Start timer for batch metadata fetch
if (this.debug)
console.log(`Fetching metadata for ${ids.length} IDs...`);
// Fetch metadata concurrently
const promises = ids.map(async (id) => {
try {
// Assumes getMetadata returns { partitionId: string; metadata: Record<string, any> } | null
const result = await this.db.getMetadata(id);
if (result?.metadata !== undefined) {
// Check if metadata exists in the result
metadataMap.set(id, result.metadata);
}
else {
if (this.debug)
console.warn(`Metadata not found for ID ${id}.`);
}
}
catch (error) {
if (this.debug)
console.error(`Failed to get metadata for ID ${id}:`, error);
// Optionally log the error but continue fetching others
}
});
await Promise.all(promises);
operationTimer.stop('fetch_metadata_batch'); // Stop timer
if (this.debug)
console.log(`Metadata fetch batch completed in ${operationTimer.getElapsed('fetch_metadata_batch')}ms`);
return metadataMap;
}
/**
* Get search engine statistics, including stats from PartitionedVectorDB.
* @returns Object containing search statistics according to UnifiedSearchPartitionedStats
*/
async getStats() {
let dbStats = {};
try {
if (typeof this.db.getStats === 'function') {
dbStats = await this.db.getStats();
}
else {
console.warn('Database instance does not provide a getStats() method.');
}
}
catch (error) {
console.error('Failed to get stats from database:', error);
}
// Construct the stats object based on the defined interface
const stats = {
search: { ...this.searchStats }, // Copy current search stats
database: dbStats, // Embed the stats received from the DB
reranker: {
available: this.reranker !== null,
},
// Add other sections if UnifiedSearchPartitionedStats defines them
};
return stats;
}
/**
* Close and clean up resources, including closing the PartitionedVectorDB.
*/
async close() {
if (this.debug)
console.log('Closing UnifiedSearch...');
// Close the underlying database instance
if (typeof this.db.close === 'function') {
await this.db.close();
}
else {
console.warn('Database instance does not provide a close() method.');
}
this.emit('search:closed');
if (this.debug)
console.log('UnifiedSearch closed.');
}
}
exports.UnifiedSearch = UnifiedSearch;
// Add a global timer instance for helper functions like _getMetadataForResults
// This is a simple approach; a more robust solution might inject the timer or use a separate instance.
const operationTimer = (0, profiling_1.createTimer)();
// --- END OF FILE unified_search.ts ---
//# sourceMappingURL=unified_search.js.map