UNPKG

@knath2000/codebase-indexing-mcp

Version:

MCP server for codebase indexing with Voyage AI embeddings and Qdrant vector storage

347 lines (340 loc) 14.5 kB
export class LLMRerankerService { constructor(config) { this.requestDurations = []; // To store last N request durations this.errorCount = 0; this.totalRequests = 0; this.maxDurationsToStore = 100; // Store up to 100 durations this.apiKey = config.llmRerankerApiKey || undefined; this.model = config.llmRerankerModel; this.enabled = config.enableLLMReranking && !!this.apiKey; this.timeoutMs = config.llmRerankerTimeoutMs; this.baseUrl = config.llmRerankerBaseUrl; if (config.enableLLMReranking && !this.apiKey) { console.warn('LLM re-ranking is enabled but no API key provided. Re-ranking will be disabled.'); this.enabled = false; } } /** * Check if LLM re-ranking is available and enabled */ isEnabled() { return this.enabled; } /** * Test connection to LLM Reranker */ async testConnection() { if (!this.enabled) { return true; // Consider as connected if disabled by config } try { const startTime = Date.now(); // Make a dummy request to test connectivity await this.callLLMAPI('test', startTime); this.addRequestDuration(Date.now() - startTime); return true; } catch (error) { console.error('LLM Reranker connection test failed:', error); this.errorCount++; return false; } } /** * Get cache size (dummy for now as no cache is implemented in reranker) */ cacheSize() { return 0; } /** * Get memory usage (dummy for now) */ memoryUsage() { return 0; } /** * Get average request latency for LLM reranker */ getAverageLatency() { if (this.requestDurations.length === 0) { return 0; } const sum = this.requestDurations.reduce((a, b) => a + b, 0); return sum / this.requestDurations.length; } /** * Get error rate for LLM reranker */ getErrorRate() { return this.totalRequests === 0 ? 0 : (this.errorCount / this.totalRequests) * 100; } addRequestDuration(duration) { this.requestDurations.push(duration); if (this.requestDurations.length > this.maxDurationsToStore) { this.requestDurations.shift(); // Remove the oldest duration } } /** * Re-rank search results using LLM for improved relevance */ async rerank(request, requestStartTime = Date.now()) { if (!this.enabled) { // Return original results if re-ranking is disabled return { rerankedResults: request.candidates.slice(0, request.maxResults), reasoning: 'LLM re-ranking disabled', confidence: 1.0 }; } try { const rerankStartTime = Date.now(); console.log(`🧠 [LLMReranker] Re-ranking ${request.candidates.length} results for query: "${request.query}"`); // Prepare the prompt for LLM re-ranking const prompt = this.buildRerankingPrompt(request); // Call the LLM API const response = await this.callLLMAPI(prompt, requestStartTime); // Parse the response and re-order results const rerankedResults = this.parseRerankingResponse(response, request.candidates, request.maxResults); const rerankDuration = Date.now() - rerankStartTime; console.log(`✅ [LLMReranker] Re-ranked to ${rerankedResults.length} results in ${rerankDuration}ms`); return { rerankedResults, reasoning: 'LLM-based relevance scoring', confidence: 0.9 }; } catch (error) { console.error(`❌ [LLMReranker] Re-ranking failed:`, error); // Fallback to original results on error return { rerankedResults: request.candidates.slice(0, request.maxResults), reasoning: `Re-ranking failed: ${error instanceof Error ? error.message : String(error)}`, confidence: 0.5 }; } } /** * Build the prompt for LLM re-ranking */ buildRerankingPrompt(request) { const candidates = request.candidates.map((result, index) => { const metadata = result.chunk.metadata; const snippet = result.snippet.length > 120 ? result.snippet.slice(0, 120) + '…' : result.snippet; // Extract fileKind from payload if available const fileKind = result.chunk.fileKind || ((result.chunk.filePath.includes('.md') || result.chunk.filePath.includes('README') || result.chunk.filePath.includes('docs/') || result.chunk.filePath.includes('memory-bank/')) ? 'docs' : 'code'); return ` CANDIDATE ${index + 1}: File: ${result.chunk.filePath} File Kind: ${fileKind} ${fileKind === 'code' ? '🔥 IMPLEMENTATION' : '📝 DOCUMENTATION'} Type: ${result.chunk.chunkType} Language: ${result.chunk.language} Function: ${result.chunk.functionName || 'N/A'} Class: ${result.chunk.className || 'N/A'} Lines: ${result.chunk.startLine}-${result.chunk.endLine} Similarity Score: ${result.score.toFixed(3)} Is Test File: ${metadata.isTest ? 'Yes' : 'No'} Code Snippet: \`\`\`${result.chunk.language} ${snippet} \`\`\` `; }).join('\n'); return `You are a code search expert specializing in finding IMPLEMENTATION CODE. Your task is to re-rank code search results based on their relevance to the user's query, with an EXTREME PREFERENCE for implementation code over documentation. USER QUERY: "${request.query}" SEARCH CANDIDATES: ${candidates} CRITICAL RANKING RULES (in order of importance): 1. **🔥 IMPLEMENTATION FIRST**: Candidates marked "🔥 IMPLEMENTATION" (File Kind: code) should ALWAYS rank higher than "📝 DOCUMENTATION" candidates, even if documentation has higher similarity scores 2. **CODE ENTITY PRIORITY**: Candidates with chunkType 'function', 'class', 'method', 'interface' are premium - rank these at the top 3. **ACTUAL CODE RELEVANCE**: Analyze the code snippet for direct relevance to the query - look for matching function names, variable names, logic patterns 4. **IMPLEMENTATION OVER EXPLANATION**: A function that implements the behavior beats documentation that explains the behavior 5. **EXACT MATCHES WIN**: Exact function/class name matches should rank highest 6. **WORKING CODE**: Complete, compilable code snippets rank higher than partial or example code 7. **RECENT/ACTIVE FILES**: Non-test files (.ts, .js, .py) over test files when both are relevant SCORING GUIDELINES: - Start with "🔥 IMPLEMENTATION" candidates - these should dominate your ranking - Only consider "📝 DOCUMENTATION" if no relevant implementation code exists - A mediocre implementation function is better than perfect documentation - Boost based on chunkType: function > class > method > interface > generic - Penalize documentation files (.md, README, docs/) unless explicitly asking for docs Expected JSON format: { "rankedIndices": [2, 0, 4, 1], "explanation": "Ranked implementation code first: function X directly implements the query, class Y provides relevant structure..." } JSON Response:`; } /** * Call the LLM API for re-ranking */ async callLLMAPI(prompt, requestStartTime) { if (!this.apiKey) { throw new Error('No API key configured for LLM re-ranking'); } // If a custom base URL is provided, treat as OpenAI-compatible gateway if (this.baseUrl) { return this.callOpenAIAPI(prompt, requestStartTime); } // Support different LLM providers based on model name when no custom base URL if (this.model.includes('claude')) { return this.callAnthropicAPI(prompt, requestStartTime); } else if (this.model.includes('gpt')) { return this.callOpenAIAPI(prompt, requestStartTime); } else { throw new Error(`Unsupported LLM model for re-ranking: ${this.model}`); } } /** * Call Anthropic Claude API with timeout */ async callAnthropicAPI(prompt, requestStartTime) { const controller = new AbortController(); const remainingTime = this.timeoutMs - (Date.now() - requestStartTime); const timeoutMs = Math.max(1000, remainingTime); const timeout = setTimeout(() => controller.abort(), timeoutMs); try { console.log(`[LLMReranker] Calling Anthropic API with timeout ${timeoutMs}ms...`); const apiCallStartTime = Date.now(); const response = await fetch('https://api.anthropic.com/v1/messages', { method: 'POST', headers: { 'Content-Type': 'application/json', 'x-api-key': this.apiKey, 'anthropic-version': '2023-06-01' }, body: JSON.stringify({ model: this.model, max_tokens: 400, temperature: 0.1, messages: [ { role: 'user', content: prompt } ] }), signal: controller.signal }); if (!response.ok) { throw new Error(`Anthropic API error: ${response.status} ${response.statusText}`); } const data = await response.json(); const apiCallDuration = Date.now() - apiCallStartTime; console.log(`[LLMReranker] Anthropic API call completed in ${apiCallDuration}ms`); return data.content[0].text; } finally { clearTimeout(timeout); } } /** * Call OpenAI GPT API with timeout */ async callOpenAIAPI(prompt, requestStartTime) { const controller = new AbortController(); const remainingTime = this.timeoutMs - (Date.now() - requestStartTime); const timeoutMs = Math.max(1000, remainingTime); const timeout = setTimeout(() => controller.abort(), timeoutMs); try { console.log(`[LLMReranker] Calling OpenAI API with timeout ${timeoutMs}ms...`); const apiCallStartTime = Date.now(); const endpoint = this.baseUrl ? `${this.baseUrl.replace(/\/?$/, '')}/chat/completions` : 'https://api.openai.com/v1/chat/completions'; const response = await fetch(endpoint, { method: 'POST', headers: { 'Content-Type': 'application/json', 'Authorization': `Bearer ${this.apiKey}`, 'x-api-key': this.apiKey }, body: JSON.stringify({ model: this.model, max_tokens: 400, temperature: 0.1, messages: [ { role: 'user', content: prompt } ] }), signal: controller.signal }); if (!response.ok) { throw new Error(`OpenAI API error: ${response.status} ${response.statusText}`); } const data = await response.json(); const apiCallDuration = Date.now() - apiCallStartTime; console.log(`[LLMReranker] OpenAI API call completed in ${apiCallDuration}ms`); return data.choices[0].message.content; } finally { clearTimeout(timeout); } } /** * Parse the LLM response and re-order results */ parseRerankingResponse(response, candidates, maxResults) { try { // Extract JSON from the response const jsonMatch = response.match(/\{[\s\S]*\}/); if (!jsonMatch) { throw new Error('No JSON found in LLM response'); } const parsed = JSON.parse(jsonMatch[0]); const rankedIndices = parsed.rankedIndices; if (!Array.isArray(rankedIndices)) { throw new Error('Invalid ranked indices format'); } // Re-order results based on LLM ranking const rerankedResults = []; for (const index of rankedIndices.slice(0, maxResults)) { if (index >= 0 && index < candidates.length) { const result = { ...candidates[index] }; // Store the original score and add re-ranking score result.rerankedScore = 1.0 - (rerankedResults.length * 0.1); // Decreasing score rerankedResults.push(result); } } // If we don't have enough results, fill with remaining candidates if (rerankedResults.length < maxResults) { const usedIndices = new Set(rankedIndices); for (let i = 0; i < candidates.length && rerankedResults.length < maxResults; i++) { if (!usedIndices.has(i)) { const result = { ...candidates[i] }; result.rerankedScore = 0.5 - (rerankedResults.length * 0.05); rerankedResults.push(result); } } } return rerankedResults; } catch (error) { console.warn(`Failed to parse LLM re-ranking response: ${error}`); console.warn(`Response was: ${response}`); // Fallback to original order return candidates.slice(0, maxResults).map((result, _index) => ({ ...result, rerankedScore: result.score * 0.9 // Slightly lower than original })); } } /** * Get re-ranking statistics */ getStats() { return { enabled: this.enabled, model: this.model, totalRequests: this.totalRequests, successRate: this.getErrorRate() === 0 ? 1 : 1 - (this.errorCount / this.totalRequests), averageLatency: this.getAverageLatency() }; } }