UNPKG

mnemos-coder

Version:

CLI-based coding agent with graph-based execution loop and terminal UI

337 lines 12.4 kB
/** * Semantic search engine for codebase context * Combines vector similarity, text search, and intelligent re-ranking */ export class CodebaseSearchEngine { db; embedder; defaultOptions; constructor(db, embedder) { this.db = db; this.embedder = embedder; this.defaultOptions = { limit: 10, threshold: 0.1, includeContent: true, fileTypes: [], chunkTypes: [], rerank: true, hybridWeight: 0.7 // 70% vector, 30% text }; } /** * Main search interface - combines multiple search strategies */ async search(query, options = {}, context) { const opts = { ...this.defaultOptions, ...options }; // Parallel search execution const [vectorResults, textResults] = await Promise.all([ this.vectorSearch(query, opts), this.textSearch(query, opts) ]); // Combine and re-rank results const combinedResults = this.combineResults(vectorResults, textResults, opts); // Apply contextual boosting const contextualResults = context ? this.applyContextualBoosting(combinedResults, context) : combinedResults; // Re-rank if enabled const finalResults = opts.rerank ? await this.rerankResults(contextualResults, query, context) : contextualResults; return finalResults.slice(0, opts.limit); } /** * Vector-based semantic search */ async vectorSearch(query, options) { try { // Generate query embedding const embeddingResult = await this.embedder.embed(query); const queryEmbedding = embeddingResult.embedding; // Get all embeddings from database const allEmbeddings = this.db.getAllEmbeddings(); if (allEmbeddings.length === 0) { return []; } // Calculate similarities const similarities = this.embedder.searchSimilar(queryEmbedding, allEmbeddings, options.limit * 3 // Get more for filtering ); // Convert to search results const results = []; for (const sim of similarities) { if (sim.similarity < options.threshold) continue; const chunk = this.db.getChunkById(sim.id); if (!chunk) continue; // Apply filters if (options.fileTypes?.length && !options.fileTypes.includes(chunk.language)) continue; if (options.chunkTypes?.length && !options.chunkTypes.includes(chunk.chunk_type)) continue; results.push({ chunk, similarity_score: sim.similarity, combined_score: sim.similarity, relevance_type: 'semantic' }); } return results; } catch (error) { console.error('Vector search failed:', error); return []; } } /** * Full-text search using SQLite FTS */ async textSearch(query, options) { try { // Preprocess query for FTS const processedQuery = this.preprocessTextQuery(query); const searchResults = this.db.textSearch(processedQuery, options.limit * 2); return searchResults.map((result) => ({ chunk: result.chunk, similarity_score: 0, // No vector similarity for text search text_score: result.rank || 0, combined_score: result.rank || 0, relevance_type: 'text' })); } catch (error) { console.error('Text search failed:', error); return []; } } /** * Find code by specific patterns (function names, types, etc.) */ async findByPattern(pattern, options = {}) { const opts = { ...this.defaultOptions, ...options }; const results = []; // Search by chunk type if (pattern.chunkType) { const chunks = this.db.getChunksByType(pattern.chunkType, opts.limit); results.push(...chunks.map((chunk) => ({ chunk, similarity_score: 1.0, combined_score: 1.0, relevance_type: 'contextual' }))); } // Search by metadata if (pattern.functionName || pattern.className || pattern.interfaceName) { const queryTerms = [ pattern.functionName, pattern.className, pattern.interfaceName ].filter(Boolean).join(' '); const textResults = await this.textSearch(queryTerms, opts); results.push(...textResults); } return this.deduplicateResults(results).slice(0, opts.limit); } /** * Get context around a specific file or chunk */ async getFileContext(filePath, options = {}) { const chunks = this.db.getChunksByFile(filePath); return chunks.map((chunk) => ({ chunk, similarity_score: 1.0, combined_score: 1.0, relevance_type: 'contextual' })); } /** * Find related code chunks (imports, usages, etc.) */ async findRelatedChunks(chunkId, options = {}) { const chunk = this.db.getChunkById(chunkId); if (!chunk) return []; // Extract identifiers from the chunk const identifiers = this.extractIdentifiers(chunk.content); // Search for chunks containing these identifiers const relatedResults = []; for (const identifier of identifiers) { const results = await this.textSearch(identifier, { ...options, limit: 5 }); relatedResults.push(...results.map(r => ({ ...r, relevance_type: 'contextual', related_chunks: [chunkId] }))); } return this.deduplicateResults(relatedResults).slice(0, options.limit || 10); } /** * Suggest relevant context for a query */ async suggestContext(query, context, options = {}) { // Get search results const suggestions = await this.search(query, options, context); // Extract keywords from results const keywords = this.extractKeywords(suggestions.map(s => s.chunk.content).join(' ')); // Get related files const relatedFiles = [...new Set(suggestions.map(s => s.chunk.file_path))]; return { suggestions, keywords, relatedFiles }; } /** * Combine vector and text search results */ combineResults(vectorResults, textResults, options) { const combinedMap = new Map(); const hybridWeight = options.hybridWeight; // Add vector results for (const result of vectorResults) { combinedMap.set(result.chunk.id, { ...result, combined_score: result.similarity_score * hybridWeight, relevance_type: 'semantic' }); } // Add text results, combining with existing vector results for (const result of textResults) { const existing = combinedMap.get(result.chunk.id); if (existing) { // Combine scores const vectorScore = existing.similarity_score * hybridWeight; const textScore = (result.text_score || 0) * (1 - hybridWeight); combinedMap.set(result.chunk.id, { ...existing, text_score: result.text_score, combined_score: vectorScore + textScore, relevance_type: 'hybrid' }); } else { combinedMap.set(result.chunk.id, { ...result, combined_score: (result.text_score || 0) * (1 - hybridWeight), relevance_type: 'text' }); } } return Array.from(combinedMap.values()) .sort((a, b) => b.combined_score - a.combined_score); } /** * Apply contextual boosting based on current context */ applyContextualBoosting(results, context) { return results.map(result => { let boost = 1.0; // Boost results from current file if (context.filePath && result.chunk.file_path === context.filePath) { boost *= 1.5; } // Boost results from recent files if (context.recentFiles?.includes(result.chunk.file_path)) { boost *= 1.2; } // Boost results in same language if (context.language && result.chunk.language === context.language) { boost *= 1.1; } return { ...result, combined_score: result.combined_score * boost }; }).sort((a, b) => b.combined_score - a.combined_score); } /** * Re-rank results using additional heuristics */ async rerankResults(results, query, context) { // For now, implement simple re-ranking based on chunk characteristics return results.map(result => { let rankBoost = 1.0; // Boost functions and classes if (['function', 'class'].includes(result.chunk.chunk_type)) { rankBoost *= 1.2; } // Boost chunks with metadata matches if (result.chunk.metadata?.name) { const nameMatch = query.toLowerCase().includes(result.chunk.metadata.name.toLowerCase()); if (nameMatch) { rankBoost *= 1.3; } } // Boost shorter, more focused chunks const contentLength = result.chunk.content.length; if (contentLength < 500) { rankBoost *= 1.1; } else if (contentLength > 2000) { rankBoost *= 0.9; } return { ...result, combined_score: result.combined_score * rankBoost }; }).sort((a, b) => b.combined_score - a.combined_score); } /** * Preprocess text query for FTS */ preprocessTextQuery(query) { // Remove special characters and normalize const cleaned = query.replace(/[^\w\s]/g, ' ').trim(); // Split into terms and add wildcard for partial matching const terms = cleaned.split(/\s+/).filter(term => term.length > 1); return terms.map(term => `${term}*`).join(' '); } /** * Extract identifiers from code */ extractIdentifiers(code) { const identifierRegex = /\b[a-zA-Z_][a-zA-Z0-9_]*\b/g; const matches = code.match(identifierRegex) || []; // Filter out common keywords const keywords = new Set(['const', 'let', 'var', 'function', 'class', 'interface', 'type', 'import', 'export']); return [...new Set(matches.filter(match => match.length > 2 && !keywords.has(match.toLowerCase())))]; } /** * Extract keywords from text */ extractKeywords(text, limit = 10) { const words = this.extractIdentifiers(text); const wordCounts = new Map(); for (const word of words) { wordCounts.set(word, (wordCounts.get(word) || 0) + 1); } return Array.from(wordCounts.entries()) .sort((a, b) => b[1] - a[1]) .slice(0, limit) .map(entry => entry[0]); } /** * Remove duplicate results */ deduplicateResults(results) { const seen = new Set(); return results.filter(result => { if (seen.has(result.chunk.id)) { return false; } seen.add(result.chunk.id); return true; }); } /** * Get search engine statistics */ getStats() { return { dbStats: this.db.getStats(), embedderStats: this.embedder.getStats() }; } } //# sourceMappingURL=search.js.map