mnemos-coder
Version:
CLI-based coding agent with graph-based execution loop and terminal UI
337 lines • 12.4 kB
JavaScript
/**
* Semantic search engine for codebase context
* Combines vector similarity, text search, and intelligent re-ranking
*/
export class CodebaseSearchEngine {
db;
embedder;
defaultOptions;
constructor(db, embedder) {
this.db = db;
this.embedder = embedder;
this.defaultOptions = {
limit: 10,
threshold: 0.1,
includeContent: true,
fileTypes: [],
chunkTypes: [],
rerank: true,
hybridWeight: 0.7 // 70% vector, 30% text
};
}
/**
* Main search interface - combines multiple search strategies
*/
async search(query, options = {}, context) {
const opts = { ...this.defaultOptions, ...options };
// Parallel search execution
const [vectorResults, textResults] = await Promise.all([
this.vectorSearch(query, opts),
this.textSearch(query, opts)
]);
// Combine and re-rank results
const combinedResults = this.combineResults(vectorResults, textResults, opts);
// Apply contextual boosting
const contextualResults = context
? this.applyContextualBoosting(combinedResults, context)
: combinedResults;
// Re-rank if enabled
const finalResults = opts.rerank
? await this.rerankResults(contextualResults, query, context)
: contextualResults;
return finalResults.slice(0, opts.limit);
}
/**
* Vector-based semantic search
*/
async vectorSearch(query, options) {
try {
// Generate query embedding
const embeddingResult = await this.embedder.embed(query);
const queryEmbedding = embeddingResult.embedding;
// Get all embeddings from database
const allEmbeddings = this.db.getAllEmbeddings();
if (allEmbeddings.length === 0) {
return [];
}
// Calculate similarities
const similarities = this.embedder.searchSimilar(queryEmbedding, allEmbeddings, options.limit * 3 // Get more for filtering
);
// Convert to search results
const results = [];
for (const sim of similarities) {
if (sim.similarity < options.threshold)
continue;
const chunk = this.db.getChunkById(sim.id);
if (!chunk)
continue;
// Apply filters
if (options.fileTypes?.length && !options.fileTypes.includes(chunk.language))
continue;
if (options.chunkTypes?.length && !options.chunkTypes.includes(chunk.chunk_type))
continue;
results.push({
chunk,
similarity_score: sim.similarity,
combined_score: sim.similarity,
relevance_type: 'semantic'
});
}
return results;
}
catch (error) {
console.error('Vector search failed:', error);
return [];
}
}
/**
* Full-text search using SQLite FTS
*/
async textSearch(query, options) {
try {
// Preprocess query for FTS
const processedQuery = this.preprocessTextQuery(query);
const searchResults = this.db.textSearch(processedQuery, options.limit * 2);
return searchResults.map((result) => ({
chunk: result.chunk,
similarity_score: 0, // No vector similarity for text search
text_score: result.rank || 0,
combined_score: result.rank || 0,
relevance_type: 'text'
}));
}
catch (error) {
console.error('Text search failed:', error);
return [];
}
}
/**
* Find code by specific patterns (function names, types, etc.)
*/
async findByPattern(pattern, options = {}) {
const opts = { ...this.defaultOptions, ...options };
const results = [];
// Search by chunk type
if (pattern.chunkType) {
const chunks = this.db.getChunksByType(pattern.chunkType, opts.limit);
results.push(...chunks.map((chunk) => ({
chunk,
similarity_score: 1.0,
combined_score: 1.0,
relevance_type: 'contextual'
})));
}
// Search by metadata
if (pattern.functionName || pattern.className || pattern.interfaceName) {
const queryTerms = [
pattern.functionName,
pattern.className,
pattern.interfaceName
].filter(Boolean).join(' ');
const textResults = await this.textSearch(queryTerms, opts);
results.push(...textResults);
}
return this.deduplicateResults(results).slice(0, opts.limit);
}
/**
* Get context around a specific file or chunk
*/
async getFileContext(filePath, options = {}) {
const chunks = this.db.getChunksByFile(filePath);
return chunks.map((chunk) => ({
chunk,
similarity_score: 1.0,
combined_score: 1.0,
relevance_type: 'contextual'
}));
}
/**
* Find related code chunks (imports, usages, etc.)
*/
async findRelatedChunks(chunkId, options = {}) {
const chunk = this.db.getChunkById(chunkId);
if (!chunk)
return [];
// Extract identifiers from the chunk
const identifiers = this.extractIdentifiers(chunk.content);
// Search for chunks containing these identifiers
const relatedResults = [];
for (const identifier of identifiers) {
const results = await this.textSearch(identifier, { ...options, limit: 5 });
relatedResults.push(...results.map(r => ({
...r,
relevance_type: 'contextual',
related_chunks: [chunkId]
})));
}
return this.deduplicateResults(relatedResults).slice(0, options.limit || 10);
}
/**
* Suggest relevant context for a query
*/
async suggestContext(query, context, options = {}) {
// Get search results
const suggestions = await this.search(query, options, context);
// Extract keywords from results
const keywords = this.extractKeywords(suggestions.map(s => s.chunk.content).join(' '));
// Get related files
const relatedFiles = [...new Set(suggestions.map(s => s.chunk.file_path))];
return {
suggestions,
keywords,
relatedFiles
};
}
/**
* Combine vector and text search results
*/
combineResults(vectorResults, textResults, options) {
const combinedMap = new Map();
const hybridWeight = options.hybridWeight;
// Add vector results
for (const result of vectorResults) {
combinedMap.set(result.chunk.id, {
...result,
combined_score: result.similarity_score * hybridWeight,
relevance_type: 'semantic'
});
}
// Add text results, combining with existing vector results
for (const result of textResults) {
const existing = combinedMap.get(result.chunk.id);
if (existing) {
// Combine scores
const vectorScore = existing.similarity_score * hybridWeight;
const textScore = (result.text_score || 0) * (1 - hybridWeight);
combinedMap.set(result.chunk.id, {
...existing,
text_score: result.text_score,
combined_score: vectorScore + textScore,
relevance_type: 'hybrid'
});
}
else {
combinedMap.set(result.chunk.id, {
...result,
combined_score: (result.text_score || 0) * (1 - hybridWeight),
relevance_type: 'text'
});
}
}
return Array.from(combinedMap.values())
.sort((a, b) => b.combined_score - a.combined_score);
}
/**
* Apply contextual boosting based on current context
*/
applyContextualBoosting(results, context) {
return results.map(result => {
let boost = 1.0;
// Boost results from current file
if (context.filePath && result.chunk.file_path === context.filePath) {
boost *= 1.5;
}
// Boost results from recent files
if (context.recentFiles?.includes(result.chunk.file_path)) {
boost *= 1.2;
}
// Boost results in same language
if (context.language && result.chunk.language === context.language) {
boost *= 1.1;
}
return {
...result,
combined_score: result.combined_score * boost
};
}).sort((a, b) => b.combined_score - a.combined_score);
}
/**
* Re-rank results using additional heuristics
*/
async rerankResults(results, query, context) {
// For now, implement simple re-ranking based on chunk characteristics
return results.map(result => {
let rankBoost = 1.0;
// Boost functions and classes
if (['function', 'class'].includes(result.chunk.chunk_type)) {
rankBoost *= 1.2;
}
// Boost chunks with metadata matches
if (result.chunk.metadata?.name) {
const nameMatch = query.toLowerCase().includes(result.chunk.metadata.name.toLowerCase());
if (nameMatch) {
rankBoost *= 1.3;
}
}
// Boost shorter, more focused chunks
const contentLength = result.chunk.content.length;
if (contentLength < 500) {
rankBoost *= 1.1;
}
else if (contentLength > 2000) {
rankBoost *= 0.9;
}
return {
...result,
combined_score: result.combined_score * rankBoost
};
}).sort((a, b) => b.combined_score - a.combined_score);
}
/**
* Preprocess text query for FTS
*/
preprocessTextQuery(query) {
// Remove special characters and normalize
const cleaned = query.replace(/[^\w\s]/g, ' ').trim();
// Split into terms and add wildcard for partial matching
const terms = cleaned.split(/\s+/).filter(term => term.length > 1);
return terms.map(term => `${term}*`).join(' ');
}
/**
* Extract identifiers from code
*/
extractIdentifiers(code) {
const identifierRegex = /\b[a-zA-Z_][a-zA-Z0-9_]*\b/g;
const matches = code.match(identifierRegex) || [];
// Filter out common keywords
const keywords = new Set(['const', 'let', 'var', 'function', 'class', 'interface', 'type', 'import', 'export']);
return [...new Set(matches.filter(match => match.length > 2 && !keywords.has(match.toLowerCase())))];
}
/**
* Extract keywords from text
*/
extractKeywords(text, limit = 10) {
const words = this.extractIdentifiers(text);
const wordCounts = new Map();
for (const word of words) {
wordCounts.set(word, (wordCounts.get(word) || 0) + 1);
}
return Array.from(wordCounts.entries())
.sort((a, b) => b[1] - a[1])
.slice(0, limit)
.map(entry => entry[0]);
}
/**
* Remove duplicate results
*/
deduplicateResults(results) {
const seen = new Set();
return results.filter(result => {
if (seen.has(result.chunk.id)) {
return false;
}
seen.add(result.chunk.id);
return true;
});
}
/**
* Get search engine statistics
*/
getStats() {
return {
dbStats: this.db.getStats(),
embedderStats: this.embedder.getStats()
};
}
}
//# sourceMappingURL=search.js.map