remcode
Version:
Turn your AI assistant into a codebase expert. Intelligent code analysis, semantic search, and software engineering guidance through MCP integration.
330 lines (329 loc) • 12.7 kB
JavaScript
Object.defineProperty(exports, "__esModule", { value: true });
exports.QueryProcessor = void 0;
const logger_1 = require("../utils/logger");
const logger = (0, logger_1.getLogger)('QueryProcessor');
/**
* Class for processing and optimizing code search queries
*/
class QueryProcessor {
constructor() {
this.patternRules = [
// Implementation search patterns
{
regex: /how\s+(does|is|to)\s+|implement(ation)?\s+of\s+|\s+work(s|ing)?\s+/i,
queryType: 'semantic',
intent: 'find_implementation',
resultType: 'function'
},
// Definition search patterns
{
regex: /define(d|s)?|declaration of|where is|locate\s+/i,
queryType: 'exact',
intent: 'find_definition',
resultType: 'any'
},
// Usage search patterns
{
regex: /use(d|s)?\s+of|usage|call(s|ed|ing)?\s+to|reference(s|d)?\s+/i,
queryType: 'context',
intent: 'find_usage',
resultType: 'any'
},
// Exact match patterns
{
regex: /exact(ly)?|precise(ly)?|literal(ly)?|\".*\"|\'.*\'/i,
queryType: 'exact',
intent: 'find_definition',
resultType: 'any'
},
// Pattern match patterns
{
regex: /pattern|regex|regexp|regular expression|\s+like\s+|similar to/i,
queryType: 'pattern',
intent: 'find_similar',
resultType: 'pattern'
},
// Complexity/bug search patterns
{
regex: /complex|complicated|bug(s|gy)?|error|issue|problem/i,
queryType: 'semantic',
intent: 'find_complexity',
resultType: 'function'
}
];
}
/**
* Process a raw search query into a structured form with metadata
* @param query The raw search query
* @returns Processed query with metadata
*/
async processQuery(query) {
logger.info(`Processing query: "${query}"`);
// Clean and normalize the query
const normalizedQuery = this.normalizeQuery(query);
// Extract filters from the query
const filters = await this.extractFilters(normalizedQuery);
// Determine query type and intent based on patterns
const { queryType, intent, expectedResultType, cleanedQuery } = this.determineQueryMetadata(normalizedQuery);
// Optimize the query for the determined search type
const processedQuery = await this.optimizeQuery(cleanedQuery, queryType);
// Calculate confidence score based on pattern matches and query length
const confidence = this.calculateConfidence(normalizedQuery, queryType, intent);
return {
originalQuery: query,
processedQuery,
queryType,
intent,
filters,
expectedResultType,
confidence
};
}
/**
* Normalize a query by trimming, converting to lowercase, and removing excess whitespace
* @param query The raw query
* @returns Normalized query
*/
normalizeQuery(query) {
return query
.trim()
.toLowerCase()
.replace(/\s+/g, ' ')
.replace(/[\?\!\;\:\/\\\.]/g, ' ')
.trim();
}
/**
* Determine the metadata for a query based on pattern matching
* @param query The normalized query
* @returns Query metadata including type, intent, and expected result type
*/
determineQueryMetadata(query) {
// Default values
let queryType = 'semantic';
let intent = 'find_implementation';
let expectedResultType = 'any';
let cleanedQuery = query;
// Check explicit type markers
if (query.includes('"') || query.includes('\'')) {
queryType = 'exact';
// Extract the quoted content
const match = query.match(/["']([^"']+)["']/);
if (match && match[1]) {
cleanedQuery = match[1];
}
}
// Check for regex/pattern indicators
if (query.includes('regex:') || query.includes('pattern:')) {
queryType = 'pattern';
// Extract the pattern
const match = query.match(/(?:regex|pattern):\s*(.+)$/);
if (match && match[1]) {
cleanedQuery = match[1];
}
}
// Check for file type specifications
if (query.includes('file:') || query.includes('in:')) {
expectedResultType = 'file';
}
else if (query.includes('class:') || query.includes('type:')) {
expectedResultType = 'class';
}
else if (query.includes('function:') || query.includes('method:')) {
expectedResultType = 'function';
}
else if (query.includes('module:')) {
expectedResultType = 'module';
}
// Apply pattern rules to determine intent and refine types
for (const rule of this.patternRules) {
if (rule.regex.test(query)) {
// If we've already set a more specific type from explicit markers, keep it
if ((queryType === 'semantic' && rule.queryType !== 'semantic') ||
(queryType === rule.queryType)) {
queryType = rule.queryType;
}
// Set the intent based on the matched rule
intent = rule.intent;
// Only override result type if it's still 'any'
if (expectedResultType === 'any') {
expectedResultType = rule.resultType;
}
// Remove the matched pattern words for better semantic search
if (queryType === 'semantic') {
cleanedQuery = cleanedQuery.replace(rule.regex, ' ').trim();
}
}
}
return {
queryType,
intent,
expectedResultType,
cleanedQuery
};
}
/**
* Extract filters from a query string
* @param query The normalized query
* @returns Extracted filters
*/
async extractFilters(query) {
logger.info('Extracting query filters');
const filters = {};
// Extract language filter
const languageMatch = query.match(/language:([\w\+\#]+)/);
if (languageMatch) {
filters.language = languageMatch[1];
}
// Extract file type filter
const fileTypeMatch = query.match(/file(?:type|ext|extension)?:([\w\.]+)/);
if (fileTypeMatch) {
filters.fileType = fileTypeMatch[1];
}
// Extract path filter
const pathMatch = query.match(/path:([\w\/\.\-\_]+)/);
if (pathMatch) {
filters.path = pathMatch[1];
}
// Extract complexity filter
const complexityMatch = query.match(/complexity:(low|medium|high)/);
if (complexityMatch) {
filters.complexity = complexityMatch[1];
}
// Extract has comments filter
if (query.includes('has:comments')) {
filters.hasComments = true;
}
// Extract has tests filter
if (query.includes('has:tests')) {
filters.hasTests = true;
}
// Extract include/exclude patterns
const includeMatches = query.match(/include:([\w\/\.\-\_]+)/g);
if (includeMatches) {
filters.includePatterns = includeMatches.map(m => m.replace('include:', ''));
}
const excludeMatches = query.match(/exclude:([\w\/\.\-\_]+)/g);
if (excludeMatches) {
filters.excludePatterns = excludeMatches.map(m => m.replace('exclude:', ''));
}
return filters;
}
/**
* Optimize a query based on its determined type
* @param query The cleaned query
* @param queryType The type of query
* @returns Optimized query
*/
async optimizeQuery(query, queryType) {
switch (queryType) {
case 'semantic':
return this.optimizeForSemanticSearch(query);
case 'exact':
return this.optimizeForExactSearch(query);
case 'pattern':
return this.optimizeForPatternSearch(query);
case 'context':
return this.optimizeForContextSearch(query);
default:
return query;
}
}
/**
* Optimize a query for semantic search
* @param query The cleaned query
* @returns Optimized semantic query
*/
async optimizeForSemanticSearch(query) {
logger.info('Optimizing query for semantic search');
// Remove common stop words to focus on key terms
const stopWords = ['a', 'an', 'the', 'in', 'on', 'at', 'of', 'for', 'to', 'with'];
let optimized = query;
for (const word of stopWords) {
const regex = new RegExp(`\\b${word}\\b`, 'gi');
optimized = optimized.replace(regex, ' ');
}
// Collapse multiple spaces
optimized = optimized.replace(/\s+/g, ' ').trim();
// Add code-specific terms to improve relevance if they're not already there
if (query.includes('function') && !optimized.includes('method')) {
optimized += ' method';
}
if (query.includes('bug') && !optimized.includes('error')) {
optimized += ' error exception';
}
return optimized;
}
/**
* Optimize a query for exact search
* @param query The cleaned query
* @returns Optimized exact query
*/
optimizeForExactSearch(query) {
// For exact searches, we preserve the query but ensure quotes if not present
if (!query.startsWith('"') && !query.endsWith('"')) {
return `"${query}"`;
}
return query;
}
/**
* Optimize a query for pattern search
* @param query The cleaned query
* @returns Optimized pattern query
*/
optimizeForPatternSearch(query) {
// For pattern searches, we try to build a better regex if possible
if (!query.includes('(') && !query.includes('[') && !query.includes('\\')) {
// Simple word pattern - add word boundaries
return `\\b${query}\\b`;
}
return query;
}
/**
* Optimize a query for context search
* @param query The cleaned query
* @returns Optimized context query
*/
optimizeForContextSearch(query) {
// For context searches, we want to focus on function/variable names
// Extract likely code symbols from the query
const symbols = query.match(/[a-zA-Z][a-zA-Z0-9_]*/g) || [];
if (symbols.length > 0) {
// Prioritize longer symbols as they're more likely to be meaningful
const longerSymbols = symbols.filter(s => s.length > 3);
if (longerSymbols.length > 0) {
return longerSymbols.join(' ');
}
}
return query;
}
/**
* Calculate a confidence score for the query interpretation
* @param query The normalized query
* @param queryType The determined query type
* @param intent The determined intent
* @returns Confidence score between 0 and 1
*/
calculateConfidence(query, queryType, intent) {
// Base confidence
let confidence = 0.7;
// Increase confidence for exact matches with quotes
if (queryType === 'exact' && (query.includes('"') || query.includes('\'')) && query.length > 5) {
confidence += 0.2;
}
// Increase confidence for pattern matches with regex markers
if (queryType === 'pattern' && (query.includes('regex:') || query.includes('pattern:')) && query.length > 8) {
confidence += 0.2;
}
// Adjust confidence based on query length (longer queries typically have more context)
if (query.length > 15) {
confidence += 0.05;
}
// Adjust confidence based on filter presence (more filters = more precise intent)
const filterCount = (query.match(/:[a-z0-9]+/g) || []).length;
confidence += Math.min(0.1, filterCount * 0.02);
// Cap at 1.0
return Math.min(1.0, confidence);
}
}
exports.QueryProcessor = QueryProcessor;
;