UNPKG

@knath2000/codebase-indexing-mcp

Version:

MCP server for codebase indexing with Voyage AI embeddings and Qdrant vector storage

115 lines (114 loc) 3.91 kB
import { EmbeddingVector, SearchQuery, SearchResult } from '../types.js'; export declare class QdrantVectorClient { private client; private url; private apiKey; private collectionName; private embeddingDimension; private keywordTimeoutMs; private keywordMaxChunks; private requestDurations; private maxDurationsToStore; constructor(url: string, apiKey: string | undefined, collectionName: string, embeddingDimension: number, keywordTimeoutMs?: number, keywordMaxChunks?: number); /** * Initialize the collection with proper schema */ initializeCollection(): Promise<void>; /** * Recreate the collection with correct dimensions (deletes all existing data) */ recreateCollection(): Promise<void>; /** * Create payload indexes for filtering capabilities (matching Cursor's @codebase functionality) */ private createPayloadIndexes; /** * Create payload indexes on existing collection (useful for upgrading existing collections) */ ensurePayloadIndexes(): Promise<void>; /** * Store embedding vectors in Qdrant */ storeEmbeddings(embeddings: EmbeddingVector[]): Promise<void>; /** * Store a single embedding vector */ storeEmbedding(embedding: EmbeddingVector): Promise<void>; /** * Search for similar vectors with enhanced error handling and logging */ searchSimilar(query: SearchQuery, queryVector: number[]): Promise<SearchResult[]>; /** * Perform a simple keyword-based search across all indexed chunks. * This provides a lightweight BM25-style sparse retrieval fallback that can be * blended with dense semantic search results for higher accuracy – similar to * Cursor's hybrid search pipeline. * * NOTE: This implementation scrolls the entire collection once and performs * in-memory scoring. For typical source-code repositories (a few thousand * chunks) this is fast enough and keeps the implementation dependency-free. * If the collection grows large, consider replacing this with Qdrant's * full-text payload index once it becomes generally available. */ keywordSearch(query: SearchQuery): Promise<SearchResult[]>; /** * Delete embeddings by file path */ deleteByFilePath(filePath: string): Promise<void>; /** * Delete embeddings by IDs */ deleteByIds(ids: string[]): Promise<void>; /** * Get collection info and stats */ getCollectionInfo(): Promise<any>; /** * Count total points in collection */ countPoints(): Promise<number>; /** * Clear all data from collection */ clearCollection(): Promise<void>; /** * Get points by their IDs */ getPointsById(ids: string[]): Promise<any[]>; /** * Get average request latency for Qdrant client */ getAverageLatency(): number; /** * Test connection to Qdrant */ testConnection(): Promise<boolean>; private addRequestDuration; /** * Get embeddings by file path */ getEmbeddingsByFilePath(filePath: string): Promise<EmbeddingVector[]>; /** * Check if file is already indexed */ isFileIndexed(filePath: string, lastModified: number): Promise<boolean>; /** * Convert payload to CodeChunk */ private payloadToCodeChunk; /** * Create a snippet from the payload */ private createSnippet; /** * Create a context description for search results (similar to Cursor's @codebase format) */ private createContextDescription; /** * Calculate keyword score for keyword search. * This is a very basic TF-IDF-like scoring. * For a real-world application, you'd need a proper tokenizer, stopwords, * and a more sophisticated scoring mechanism. */ private calculateKeywordScore; }