@alvinveroy/codecompass
Version:
AI-powered MCP server for codebase navigation and LLM prompt optimization
155 lines (154 loc) • 8.18 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.extractKeywords = extractKeywords;
exports.broadenQuery = broadenQuery;
exports.focusQueryBasedOnResults = focusQueryBasedOnResults;
exports.tweakQuery = tweakQuery;
exports.refineQuery = actualRefineQuery;
exports.searchWithRefinement = searchWithRefinement;
const config_service_1 = require("./config-service");
const text_utils_1 = require("../utils/text-utils");
const ollama_1 = require("./ollama");
// --- Helper Functions (remain the same, ensure they are exported) ---
function extractKeywords(text) {
const processed = (0, text_utils_1.preprocessText)(text);
const cleanedForKeywords = processed.toLowerCase().replace(/[.,;:!?(){}[\]"']/g, " ");
const words = cleanedForKeywords.split(/\s+/);
const commonWords = new Set(['the', 'and', 'that', 'this', 'with', 'from', 'have', 'for', 'is', 'was', 'are', 'were', 'be', 'been', 'being', 'it', 'its', 'a', 'an', 'to', 'of', 'in', 'on', 'at', 'by']);
const keywords = words.filter(word => {
const cleanedWord = word.replace(/[():<>]$/, '');
return cleanedWord.length > 2 && !commonWords.has(cleanedWord) && !/^\d+$/.test(cleanedWord);
}).map(word => word.replace(/[():<>]$/, ''));
return [...new Set(keywords)].filter(kw => kw.length > 0);
}
function broadenQuery(query) {
let broadened = query
.replace(/\b(exact|specific|only|must)\b/gi, '')
.replace(/\.(ts|js|tsx|jsx|py|java|cpp|rb|go|rs|php)\b/gi, '')
.replace(/["'{}()[\]]/g, ' ')
.trim();
broadened = broadened.replace(/\s\s+/g, ' ');
if (broadened.length < 10 && broadened.length > 0) {
return `${broadened} implementation code`;
}
if (broadened.length === 0) {
return "general code context";
}
return broadened;
}
function focusQueryBasedOnResults(query, results) {
if (results.length === 0)
return query;
const contentSamples = results.slice(0, 3).map(r => {
let sampleText = '';
if (r.payload) {
switch (r.payload.dataType) {
case 'file_chunk':
sampleText = r.payload.file_content_chunk;
break;
case 'diff_chunk':
sampleText = r.payload.diff_content_chunk;
break;
case 'commit_info':
// For commit_info, commit_message is a good candidate for keywords.
// You could also concatenate other fields if desired.
sampleText = r.payload.commit_message;
break;
}
}
return sampleText.substring(0, 200); // Extract substring after selecting the text
}).join(' ');
const potentialKeywords = extractKeywords(contentSamples);
const keywordsToAdd = potentialKeywords.slice(0, 2).join(' ');
if (keywordsToAdd) {
return `${query} ${keywordsToAdd}`.trim();
}
return query;
}
function tweakQuery(query, results) {
if (!results || results.length === 0)
return query;
const topResult = results[0];
let filepath = '';
if (topResult?.payload) {
// Filepath is present in FileChunkPayload and DiffChunkPayload
if (topResult.payload.dataType === 'file_chunk' || topResult.payload.dataType === 'diff_chunk') {
filepath = topResult.payload.filepath;
}
// CommitInfoPayload does not have a 'filepath' field directly associated with it in this context.
// If tweaking based on commit info is desired, different logic would be needed.
}
if (filepath) { // Only proceed if a filepath was found
const fileTypeMatch = filepath.match(/\.([a-zA-Z0-9]+)$/);
const fileType = fileTypeMatch ? fileTypeMatch[1] : '';
const pathParts = filepath.split(/[/\\]/);
const directory = pathParts.length > 1 ? pathParts[0] : ''; // Takes the first part as directory
if (fileType && !query.toLowerCase().includes(fileType.toLowerCase())) {
return `${query} ${fileType}`;
}
// Check directory only if it's not a common root-like name (e.g. 'src', 'lib')
// to avoid overly broad terms unless they are specific.
// This is a heuristic and can be adjusted.
if (directory && directory.length > 1 && !['src', 'lib', 'app', 'test', 'tests', 'doc', 'docs'].includes(directory.toLowerCase()) && !query.toLowerCase().includes(directory.toLowerCase())) {
return `${query} in ${directory}`;
}
}
return query;
}
// Actual refineQuery implementation
function actualRefineQuery(originalQuery, results, currentRelevance, helpers = { broaden: broadenQuery, focus: focusQueryBasedOnResults, tweak: tweakQuery }) {
if (results.length === 0 || currentRelevance < 0.3) {
config_service_1.logger.debug(`Relevance ${currentRelevance.toFixed(2)} is low or no results. Broadening query: "${originalQuery}"`);
return helpers.broaden(originalQuery);
}
if (currentRelevance < 0.7) {
config_service_1.logger.debug(`Relevance ${currentRelevance.toFixed(2)} is mediocre. Focusing query: "${originalQuery}"`);
return helpers.focus(originalQuery, results);
}
config_service_1.logger.debug(`Relevance ${currentRelevance.toFixed(2)} is decent. Tweaking query: "${originalQuery}"`);
return helpers.tweak(originalQuery, results);
}
async function searchWithRefinement(client, query, files = [], customLimit, maxRefinements, relevanceThreshold = 0.7,
// Injectable refineQuery function for testing
refineQueryFunc = actualRefineQuery) {
const effectiveMaxRefinements = maxRefinements === undefined ? config_service_1.configService.MAX_REFINEMENT_ITERATIONS : maxRefinements;
let currentQuery = query;
let bestResults = [];
let bestRelevanceScore = 0;
let refinementCount = 0;
config_service_1.logger.info(`Starting iterative search with query: "${currentQuery}", maxRefinements: ${effectiveMaxRefinements}, threshold: ${relevanceThreshold}`);
for (let i = 0; i <= effectiveMaxRefinements; i++) {
const embedding = await (0, ollama_1.generateEmbedding)(currentQuery);
const searchLimit = (customLimit && customLimit > 0) ? customLimit : config_service_1.configService.QDRANT_SEARCH_LIMIT_DEFAULT;
const searchResults = await client.search(config_service_1.configService.COLLECTION_NAME, {
vector: embedding,
limit: searchLimit,
filter: files.length ? { must: [{ key: "filepath", match: { any: files } }] } : undefined,
});
const avgRelevance = searchResults.length > 0
? searchResults.reduce((sum, r) => sum + r.score, 0) / searchResults.length
: 0;
config_service_1.logger.info(`Refinement iteration ${i}: Query "${currentQuery}" yielded ${searchResults.length} results with avg relevance ${avgRelevance.toFixed(2)}`);
if (avgRelevance > bestRelevanceScore) {
bestResults = searchResults;
bestRelevanceScore = avgRelevance;
}
if (avgRelevance >= relevanceThreshold || i === effectiveMaxRefinements) {
config_service_1.logger.info(`Stopping refinement: relevance ${bestRelevanceScore.toFixed(2)} >= threshold ${relevanceThreshold} or max iterations ${i}/${effectiveMaxRefinements} reached.`);
break;
}
const refinedQuerySuggestion = refineQueryFunc(currentQuery, searchResults, avgRelevance); // Use injected function
if (refinedQuerySuggestion === currentQuery && searchResults.length > 0) {
config_service_1.logger.info(`Query "${currentQuery}" did not change after refinement with current results. Stopping.`);
break;
}
currentQuery = refinedQuerySuggestion;
refinementCount++;
}
config_service_1.logger.info(`Completed search with ${refinementCount} refinements. Final query: "${currentQuery}", Final relevance: ${bestRelevanceScore.toFixed(2)}`);
return {
results: bestResults,
refinedQuery: currentQuery,
relevanceScore: bestRelevanceScore
};
}