UNPKG

@alvinveroy/codecompass

Version:

AI-powered MCP server for codebase navigation and LLM prompt optimization

155 lines (154 loc) 8.18 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.extractKeywords = extractKeywords; exports.broadenQuery = broadenQuery; exports.focusQueryBasedOnResults = focusQueryBasedOnResults; exports.tweakQuery = tweakQuery; exports.refineQuery = actualRefineQuery; exports.searchWithRefinement = searchWithRefinement; const config_service_1 = require("./config-service"); const text_utils_1 = require("../utils/text-utils"); const ollama_1 = require("./ollama"); // --- Helper Functions (remain the same, ensure they are exported) --- function extractKeywords(text) { const processed = (0, text_utils_1.preprocessText)(text); const cleanedForKeywords = processed.toLowerCase().replace(/[.,;:!?(){}[\]"']/g, " "); const words = cleanedForKeywords.split(/\s+/); const commonWords = new Set(['the', 'and', 'that', 'this', 'with', 'from', 'have', 'for', 'is', 'was', 'are', 'were', 'be', 'been', 'being', 'it', 'its', 'a', 'an', 'to', 'of', 'in', 'on', 'at', 'by']); const keywords = words.filter(word => { const cleanedWord = word.replace(/[():<>]$/, ''); return cleanedWord.length > 2 && !commonWords.has(cleanedWord) && !/^\d+$/.test(cleanedWord); }).map(word => word.replace(/[():<>]$/, '')); return [...new Set(keywords)].filter(kw => kw.length > 0); } function broadenQuery(query) { let broadened = query .replace(/\b(exact|specific|only|must)\b/gi, '') .replace(/\.(ts|js|tsx|jsx|py|java|cpp|rb|go|rs|php)\b/gi, '') .replace(/["'{}()[\]]/g, ' ') .trim(); broadened = broadened.replace(/\s\s+/g, ' '); if (broadened.length < 10 && broadened.length > 0) { return `${broadened} implementation code`; } if (broadened.length === 0) { return "general code context"; } return broadened; } function focusQueryBasedOnResults(query, results) { if (results.length === 0) return query; const contentSamples = results.slice(0, 3).map(r => { let sampleText = ''; if (r.payload) { switch (r.payload.dataType) { case 'file_chunk': sampleText = r.payload.file_content_chunk; break; case 'diff_chunk': sampleText = r.payload.diff_content_chunk; break; case 'commit_info': // For commit_info, commit_message is a good candidate for keywords. // You could also concatenate other fields if desired. sampleText = r.payload.commit_message; break; } } return sampleText.substring(0, 200); // Extract substring after selecting the text }).join(' '); const potentialKeywords = extractKeywords(contentSamples); const keywordsToAdd = potentialKeywords.slice(0, 2).join(' '); if (keywordsToAdd) { return `${query} ${keywordsToAdd}`.trim(); } return query; } function tweakQuery(query, results) { if (!results || results.length === 0) return query; const topResult = results[0]; let filepath = ''; if (topResult?.payload) { // Filepath is present in FileChunkPayload and DiffChunkPayload if (topResult.payload.dataType === 'file_chunk' || topResult.payload.dataType === 'diff_chunk') { filepath = topResult.payload.filepath; } // CommitInfoPayload does not have a 'filepath' field directly associated with it in this context. // If tweaking based on commit info is desired, different logic would be needed. } if (filepath) { // Only proceed if a filepath was found const fileTypeMatch = filepath.match(/\.([a-zA-Z0-9]+)$/); const fileType = fileTypeMatch ? fileTypeMatch[1] : ''; const pathParts = filepath.split(/[/\\]/); const directory = pathParts.length > 1 ? pathParts[0] : ''; // Takes the first part as directory if (fileType && !query.toLowerCase().includes(fileType.toLowerCase())) { return `${query} ${fileType}`; } // Check directory only if it's not a common root-like name (e.g. 'src', 'lib') // to avoid overly broad terms unless they are specific. // This is a heuristic and can be adjusted. if (directory && directory.length > 1 && !['src', 'lib', 'app', 'test', 'tests', 'doc', 'docs'].includes(directory.toLowerCase()) && !query.toLowerCase().includes(directory.toLowerCase())) { return `${query} in ${directory}`; } } return query; } // Actual refineQuery implementation function actualRefineQuery(originalQuery, results, currentRelevance, helpers = { broaden: broadenQuery, focus: focusQueryBasedOnResults, tweak: tweakQuery }) { if (results.length === 0 || currentRelevance < 0.3) { config_service_1.logger.debug(`Relevance ${currentRelevance.toFixed(2)} is low or no results. Broadening query: "${originalQuery}"`); return helpers.broaden(originalQuery); } if (currentRelevance < 0.7) { config_service_1.logger.debug(`Relevance ${currentRelevance.toFixed(2)} is mediocre. Focusing query: "${originalQuery}"`); return helpers.focus(originalQuery, results); } config_service_1.logger.debug(`Relevance ${currentRelevance.toFixed(2)} is decent. Tweaking query: "${originalQuery}"`); return helpers.tweak(originalQuery, results); } async function searchWithRefinement(client, query, files = [], customLimit, maxRefinements, relevanceThreshold = 0.7, // Injectable refineQuery function for testing refineQueryFunc = actualRefineQuery) { const effectiveMaxRefinements = maxRefinements === undefined ? config_service_1.configService.MAX_REFINEMENT_ITERATIONS : maxRefinements; let currentQuery = query; let bestResults = []; let bestRelevanceScore = 0; let refinementCount = 0; config_service_1.logger.info(`Starting iterative search with query: "${currentQuery}", maxRefinements: ${effectiveMaxRefinements}, threshold: ${relevanceThreshold}`); for (let i = 0; i <= effectiveMaxRefinements; i++) { const embedding = await (0, ollama_1.generateEmbedding)(currentQuery); const searchLimit = (customLimit && customLimit > 0) ? customLimit : config_service_1.configService.QDRANT_SEARCH_LIMIT_DEFAULT; const searchResults = await client.search(config_service_1.configService.COLLECTION_NAME, { vector: embedding, limit: searchLimit, filter: files.length ? { must: [{ key: "filepath", match: { any: files } }] } : undefined, }); const avgRelevance = searchResults.length > 0 ? searchResults.reduce((sum, r) => sum + r.score, 0) / searchResults.length : 0; config_service_1.logger.info(`Refinement iteration ${i}: Query "${currentQuery}" yielded ${searchResults.length} results with avg relevance ${avgRelevance.toFixed(2)}`); if (avgRelevance > bestRelevanceScore) { bestResults = searchResults; bestRelevanceScore = avgRelevance; } if (avgRelevance >= relevanceThreshold || i === effectiveMaxRefinements) { config_service_1.logger.info(`Stopping refinement: relevance ${bestRelevanceScore.toFixed(2)} >= threshold ${relevanceThreshold} or max iterations ${i}/${effectiveMaxRefinements} reached.`); break; } const refinedQuerySuggestion = refineQueryFunc(currentQuery, searchResults, avgRelevance); // Use injected function if (refinedQuerySuggestion === currentQuery && searchResults.length > 0) { config_service_1.logger.info(`Query "${currentQuery}" did not change after refinement with current results. Stopping.`); break; } currentQuery = refinedQuerySuggestion; refinementCount++; } config_service_1.logger.info(`Completed search with ${refinementCount} refinements. Final query: "${currentQuery}", Final relevance: ${bestRelevanceScore.toFixed(2)}`); return { results: bestResults, refinedQuery: currentQuery, relevanceScore: bestRelevanceScore }; }