@knath2000/codebase-indexing-mcp
Version:
MCP server for codebase indexing with Voyage AI embeddings and Qdrant vector storage
115 lines (114 loc) • 3.91 kB
TypeScript
import { EmbeddingVector, SearchQuery, SearchResult } from '../types.js';
export declare class QdrantVectorClient {
private client;
private url;
private apiKey;
private collectionName;
private embeddingDimension;
private keywordTimeoutMs;
private keywordMaxChunks;
private requestDurations;
private maxDurationsToStore;
constructor(url: string, apiKey: string | undefined, collectionName: string, embeddingDimension: number, keywordTimeoutMs?: number, keywordMaxChunks?: number);
/**
* Initialize the collection with proper schema
*/
initializeCollection(): Promise<void>;
/**
* Recreate the collection with correct dimensions (deletes all existing data)
*/
recreateCollection(): Promise<void>;
/**
* Create payload indexes for filtering capabilities (matching Cursor's @codebase functionality)
*/
private createPayloadIndexes;
/**
* Create payload indexes on existing collection (useful for upgrading existing collections)
*/
ensurePayloadIndexes(): Promise<void>;
/**
* Store embedding vectors in Qdrant
*/
storeEmbeddings(embeddings: EmbeddingVector[]): Promise<void>;
/**
* Store a single embedding vector
*/
storeEmbedding(embedding: EmbeddingVector): Promise<void>;
/**
* Search for similar vectors with enhanced error handling and logging
*/
searchSimilar(query: SearchQuery, queryVector: number[]): Promise<SearchResult[]>;
/**
* Perform a simple keyword-based search across all indexed chunks.
* This provides a lightweight BM25-style sparse retrieval fallback that can be
* blended with dense semantic search results for higher accuracy – similar to
* Cursor's hybrid search pipeline.
*
* NOTE: This implementation scrolls the entire collection once and performs
* in-memory scoring. For typical source-code repositories (a few thousand
* chunks) this is fast enough and keeps the implementation dependency-free.
* If the collection grows large, consider replacing this with Qdrant's
* full-text payload index once it becomes generally available.
*/
keywordSearch(query: SearchQuery): Promise<SearchResult[]>;
/**
* Delete embeddings by file path
*/
deleteByFilePath(filePath: string): Promise<void>;
/**
* Delete embeddings by IDs
*/
deleteByIds(ids: string[]): Promise<void>;
/**
* Get collection info and stats
*/
getCollectionInfo(): Promise<any>;
/**
* Count total points in collection
*/
countPoints(): Promise<number>;
/**
* Clear all data from collection
*/
clearCollection(): Promise<void>;
/**
* Get points by their IDs
*/
getPointsById(ids: string[]): Promise<any[]>;
/**
* Get average request latency for Qdrant client
*/
getAverageLatency(): number;
/**
* Test connection to Qdrant
*/
testConnection(): Promise<boolean>;
private addRequestDuration;
/**
* Get embeddings by file path
*/
getEmbeddingsByFilePath(filePath: string): Promise<EmbeddingVector[]>;
/**
* Check if file is already indexed
*/
isFileIndexed(filePath: string, lastModified: number): Promise<boolean>;
/**
* Convert payload to CodeChunk
*/
private payloadToCodeChunk;
/**
* Create a snippet from the payload
*/
private createSnippet;
/**
* Create a context description for search results (similar to Cursor's @codebase format)
*/
private createContextDescription;
/**
* Calculate keyword score for keyword search.
* This is a very basic TF-IDF-like scoring.
* For a real-world application, you'd need a proper tokenizer, stopwords,
* and a more sophisticated scoring mechanism.
*/
private calculateKeywordScore;
}