UNPKG

mnemos-coder

Version:

CLI-based coding agent with graph-based execution loop and terminal UI

293 lines 11.3 kB
/** * New semantic search engine using API embeddings and Vectra * Replaces the old TF-IDF based search */ export class NewSearchEngine { db; embedder; defaultOptions; constructor(db, embedder) { this.db = db; this.embedder = embedder; this.defaultOptions = { limit: 10, threshold: 0.1, includeContent: true, fileTypes: [], chunkTypes: [], hybridWeight: 0.7 // 70% vector, 30% text }; } /** * Hybrid search combining vector similarity and text search */ async search(query, options = {}, context) { const opts = { ...this.defaultOptions, ...options }; // Analyze query type and choose appropriate search strategy const queryAnalysis = this.analyzeQuery(query); try { let vectorResults = []; let textResults = []; if (queryAnalysis.useVector && queryAnalysis.useText) { // Hybrid search for natural language queries [vectorResults, textResults] = await Promise.all([ this.vectorSearch(query, opts), this.safeTextSearch(query, opts) // Safe wrapper for FTS ]); } else if (queryAnalysis.useVector) { // Semantic search only for conceptual queries vectorResults = await this.vectorSearch(query, opts); } else if (queryAnalysis.useText) { // Text search only for exact matches textResults = await this.safeTextSearch(query, opts); } else { // Fallback to vector search vectorResults = await this.vectorSearch(query, opts); } // Combine and rank results const combinedResults = this.combineResults(vectorResults, textResults, opts); // Apply contextual boosting if context is provided const contextualResults = context ? this.applyContextualBoosting(combinedResults, context) : combinedResults; return contextualResults.slice(0, opts.limit); } catch (error) { console.warn('Search error, falling back to text search:', error); return this.textSearch(query, opts); } } /** * Pure vector similarity search */ async vectorSearch(query, options) { try { // Generate query embedding const embeddingResult = await this.embedder.embed(query); const queryEmbedding = embeddingResult.embedding; // Perform vector search const results = await this.db.vectorSearch(queryEmbedding, options.limit * 3, // Get more for filtering options.threshold); // Convert to enhanced results const enhancedResults = []; for (const result of results) { // Apply filters if (options.fileTypes && options.fileTypes.length > 0) { if (!options.fileTypes.includes(result.chunk.language)) { continue; } } if (options.chunkTypes && options.chunkTypes.length > 0) { if (!options.chunkTypes.includes(result.chunk.chunk_type)) { continue; } } enhancedResults.push({ chunk: result.chunk, similarity_score: result.similarity_score, combined_score: result.similarity_score, relevance_type: 'semantic', file_context: this.getFileContext(result.chunk.file_path), related_chunks: this.getRelatedChunks(result.chunk.id) }); } return enhancedResults.slice(0, options.limit); } catch (error) { console.warn('Vector search failed:', error); return []; } } /** * Analyze query to determine optimal search strategy */ analyzeQuery(query) { // Patterns that work better with vector search const semanticPatterns = [ /\b(how to|what is|why does|explain|implement|create|build)\b/i, /\b(pattern|algorithm|design|architecture|structure)\b/i, /\b(similar|like|related|equivalent)\b/i ]; // Patterns that work better with text search const exactPatterns = [ /^[a-zA-Z_][a-zA-Z0-9_]*$/, // Simple identifiers /^".*"$/, // Quoted strings /^[a-zA-Z0-9_]+\([^)]*\)$/ // Function calls ]; // Patterns that should avoid FTS (regex/special chars) const vectorOnlyPatterns = [ /[.*+?^${}()|[\]\\]/, // Regex metacharacters /@[A-Za-z]+/, // Annotations /\.\*/, // Wildcard patterns /<.*>/, // Generic types ]; const isSemanticQuery = semanticPatterns.some(pattern => pattern.test(query)); const isExactQuery = exactPatterns.some(pattern => pattern.test(query)); const isVectorOnlyQuery = vectorOnlyPatterns.some(pattern => pattern.test(query)); if (isVectorOnlyQuery) { return { useVector: true, useText: false, strategy: 'vector-only' }; } if (isExactQuery && query.length < 50) { return { useVector: false, useText: true, strategy: 'text-only' }; } if (isSemanticQuery || query.length > 100) { return { useVector: true, useText: true, strategy: 'hybrid-semantic' }; } // Default: hybrid for medium queries if (query.length > 20) { return { useVector: true, useText: true, strategy: 'hybrid-balanced' }; } // Short queries: text first return { useVector: false, useText: true, strategy: 'text-preferred' }; } /** * Safe text search wrapper that handles FTS5 errors */ async safeTextSearch(query, options) { try { return await this.textSearch(query, options); } catch (error) { console.debug(`FTS5 search failed for query "${query}": ${error}. Falling back to semantic search.`); return []; } } /** * Text-based search using FTS5 */ async textSearch(query, options) { try { const results = this.db.textSearch(query, options.limit * 2); const enhancedResults = results.map(result => ({ chunk: result.chunk, similarity_score: 0, // No vector similarity for text search text_score: result.similarity_score, combined_score: result.similarity_score, relevance_type: 'text', file_context: this.getFileContext(result.chunk.file_path), related_chunks: this.getRelatedChunks(result.chunk.id) })); return enhancedResults.slice(0, options.limit); } catch (error) { console.warn('Text search failed:', error); return []; } } /** * Quick search for exact matches and patterns */ async quickSearch(pattern, options = {}) { const results = await this.textSearch(pattern, options); return results.map(result => ({ ...result, similarity_score: 1.0, combined_score: 1.0, relevance_type: 'contextual' })); } /** * Suggest context based on current code location */ async suggestContext(filePath, lineNumber, options = {}) { // Find chunks from the same file and related files const fileChunks = await this.textSearch(`path:${filePath}`, options); return fileChunks.map(result => ({ ...result, similarity_score: 1.0, combined_score: 1.0, relevance_type: 'contextual' })); } /** * Combine vector and text search results */ combineResults(vectorResults, textResults, options) { const hybridWeight = options.hybridWeight; const textWeight = 1 - hybridWeight; const combinedMap = new Map(); // Add vector results for (const result of vectorResults) { combinedMap.set(result.chunk.id, { ...result, combined_score: result.similarity_score * hybridWeight, relevance_type: 'semantic' }); } // Add text results, combining with existing vector results for (const result of textResults) { const existing = combinedMap.get(result.chunk.id); if (existing) { // Combine scores const vectorScore = existing.similarity_score * hybridWeight; const textScore = (result.text_score || 0) * textWeight; combinedMap.set(result.chunk.id, { ...existing, text_score: result.text_score, combined_score: vectorScore + textScore, relevance_type: 'hybrid' }); } else { // Add as text-only result combinedMap.set(result.chunk.id, { ...result, combined_score: (result.text_score || 0) * textWeight, relevance_type: 'text' }); } } // Sort by combined score return Array.from(combinedMap.values()) .sort((a, b) => b.combined_score - a.combined_score); } /** * Apply contextual boosting based on search context */ applyContextualBoosting(results, context) { return results.map(result => { let boost = 1.0; // Boost results from the same file if (context.filePath && result.chunk.file_path === context.filePath) { boost *= 1.5; } // Boost results from the same language if (context.language && result.chunk.language === context.language) { boost *= 1.2; } // Boost results from recent files if (context.recentFiles && context.recentFiles.includes(result.chunk.file_path)) { boost *= 1.3; } return { ...result, combined_score: result.combined_score * boost, relevance_type: boost > 1.0 ? 'contextual' : result.relevance_type }; }).sort((a, b) => b.combined_score - a.combined_score); } /** * Get file context for a chunk */ getFileContext(filePath) { // This could be enhanced to return actual file context return [filePath]; } /** * Get related chunks for a chunk */ getRelatedChunks(chunkId) { // This could be enhanced to find semantically related chunks return []; } /** * Get search statistics */ getStats() { return this.db.getStats(); } } //# sourceMappingURL=NewSearchEngine.js.map