@knath2000/codebase-indexing-mcp
Version:
MCP server for codebase indexing with Voyage AI embeddings and Qdrant vector storage
160 lines (156 loc) • 7.33 kB
JavaScript
import { OpenAI } from 'openai';
export class LLMRerankerService {
constructor(config) {
this.requestDurations = []; // To store last N request durations
this.errorCount = 0;
this.totalRequests = 0;
this.maxDurationsToStore = 100; // Store up to 100 durations
this.apiKey = config.llmRerankerApiKey || undefined;
this.model = config.llmRerankerModel;
this.enabled = config.enableLLMReranking && !!this.apiKey;
this.timeoutMs = config.llmRerankerTimeoutMs;
// Initialize OpenAI client with LangDB configuration
if (this.enabled && this.apiKey) {
const baseUrl = config.llmRerankerBaseUrl;
this.client = new OpenAI({
baseURL: baseUrl || 'https://api.openai.com/v1',
apiKey: this.apiKey,
timeout: this.timeoutMs
});
console.log(`[LLMRerankerService] Initialized with OpenAI SDK`);
console.log(`[LLMRerankerService] Model: ${this.model}`);
console.log(`[LLMRerankerService] Base URL: ${baseUrl || 'https://api.openai.com/v1'}`);
console.log(`[LLMRerankerService] Timeout: ${this.timeoutMs}ms`);
}
}
async rerank(query, searchResults, limit = 10) {
const startTime = Date.now();
this.totalRequests++;
if (!this.enabled || !this.client) {
console.log('[LLMReranker] Re-ranking disabled or not configured, returning original results');
return { results: searchResults.slice(0, limit), reranked: false };
}
if (searchResults.length <= 1) {
console.log('[LLMReranker] Only 1 or fewer results, skipping re-ranking');
return { results: searchResults, reranked: false };
}
try {
console.log(`[LLMReranker] Re-ranking ${searchResults.length} results for query: "${query}"`);
const prompt = this.buildReRankingPrompt(query, searchResults);
const rankedIndices = await this.callLLMAPI(prompt);
// Apply the ranking
const rerankedResults = this.applyRanking(searchResults, rankedIndices, limit);
const duration = Date.now() - startTime;
this.recordDuration(duration);
console.log(`[LLMReranker] Re-ranking completed successfully in ${duration}ms`);
return { results: rerankedResults, reranked: true };
}
catch (error) {
this.errorCount++;
const duration = Date.now() - startTime;
this.recordDuration(duration);
console.error(`[LLMReranker] Re-ranking failed after ${duration}ms:`, error);
console.log('[LLMReranker] Falling back to original results');
return { results: searchResults.slice(0, limit), reranked: false };
}
}
async callLLMAPI(prompt) {
if (!this.client) {
throw new Error('OpenAI client not initialized');
}
console.log(`[LLMReranker] Calling ${this.model} via OpenAI SDK...`);
try {
const response = await this.client.chat.completions.create({
model: this.model,
messages: [
{
role: 'system',
content: 'You are a helpful assistant that ranks search results based on relevance to a query. You must respond with valid JSON only.'
},
{
role: 'user',
content: prompt
}
],
temperature: 0.1,
max_tokens: 500
});
const content = response.choices[0]?.message?.content;
if (!content) {
throw new Error('No content in LLM response');
}
console.log(`[LLMReranker] Raw LLM response: ${content}`);
// Parse the JSON response
const parsed = JSON.parse(content);
const rankedIndices = parsed.rankedIndices || parsed.ranking || parsed.indices;
if (!Array.isArray(rankedIndices)) {
throw new Error('LLM response does not contain a valid rankedIndices array');
}
console.log(`[LLMReranker] Parsed ranking: [${rankedIndices.join(', ')}]`);
return rankedIndices;
}
catch (error) {
console.error(`[LLMReranker] OpenAI SDK error:`, error.message);
throw error;
}
}
buildReRankingPrompt(query, results) {
const resultSummaries = results.map((result, index) => {
const fileName = result.filePath.split('/').pop() || result.filePath;
const preview = result.content.substring(0, 200).replace(/\n/g, ' ');
return `${index}: ${fileName} (${result.chunkType || 'unknown'}) - ${preview}...`;
}).join('\n');
return `Given the search query: "${query}"
Rank these ${results.length} search results by relevance (most relevant first):
${resultSummaries}
Respond with JSON only in this exact format:
{
"rankedIndices": [most_relevant_index, second_most_relevant_index, ...]
}
Include ALL indices from 0 to ${results.length - 1} in your ranking.`;
}
applyRanking(originalResults, rankedIndices, limit) {
try {
// Validate indices
const validIndices = rankedIndices.filter((idx) => typeof idx === 'number' && idx >= 0 && idx < originalResults.length);
if (validIndices.length === 0) {
console.warn('[LLMReranker] No valid indices in ranking, using original order');
return originalResults.slice(0, limit);
}
// Apply ranking
const rerankedResults = validIndices.map((idx) => originalResults[idx]);
// Add any missing results at the end
const usedIndices = new Set(validIndices);
const missingResults = originalResults.filter((_, idx) => !usedIndices.has(idx));
rerankedResults.push(...missingResults);
console.log(`[LLMReranker] Applied ranking: ${validIndices.slice(0, 5).join(', ')}${validIndices.length > 5 ? '...' : ''}`);
return rerankedResults.slice(0, limit);
}
catch (error) {
console.error('[LLMReranker] Error applying ranking:', error);
return originalResults.slice(0, limit);
}
}
recordDuration(duration) {
this.requestDurations.push(duration);
if (this.requestDurations.length > this.maxDurationsToStore) {
this.requestDurations.shift();
}
}
getStats() {
const avgDuration = this.requestDurations.length > 0
? this.requestDurations.reduce((a, b) => a + b, 0) / this.requestDurations.length
: 0;
return {
enabled: this.enabled,
model: this.model,
totalRequests: this.totalRequests,
errorCount: this.errorCount,
errorRate: this.totalRequests > 0 ? (this.errorCount / this.totalRequests) * 100 : 0,
avgDurationMs: Math.round(avgDuration),
timeoutMs: this.timeoutMs,
requestCount: this.requestDurations.length
};
}
}
//# sourceMappingURL=llm-reranker.js.map