mnemos-coder
Version:
CLI-based coding agent with graph-based execution loop and terminal UI
293 lines • 11.3 kB
JavaScript
/**
* New semantic search engine using API embeddings and Vectra
* Replaces the old TF-IDF based search
*/
export class NewSearchEngine {
db;
embedder;
defaultOptions;
constructor(db, embedder) {
this.db = db;
this.embedder = embedder;
this.defaultOptions = {
limit: 10,
threshold: 0.1,
includeContent: true,
fileTypes: [],
chunkTypes: [],
hybridWeight: 0.7 // 70% vector, 30% text
};
}
/**
* Hybrid search combining vector similarity and text search
*/
async search(query, options = {}, context) {
const opts = { ...this.defaultOptions, ...options };
// Analyze query type and choose appropriate search strategy
const queryAnalysis = this.analyzeQuery(query);
try {
let vectorResults = [];
let textResults = [];
if (queryAnalysis.useVector && queryAnalysis.useText) {
// Hybrid search for natural language queries
[vectorResults, textResults] = await Promise.all([
this.vectorSearch(query, opts),
this.safeTextSearch(query, opts) // Safe wrapper for FTS
]);
}
else if (queryAnalysis.useVector) {
// Semantic search only for conceptual queries
vectorResults = await this.vectorSearch(query, opts);
}
else if (queryAnalysis.useText) {
// Text search only for exact matches
textResults = await this.safeTextSearch(query, opts);
}
else {
// Fallback to vector search
vectorResults = await this.vectorSearch(query, opts);
}
// Combine and rank results
const combinedResults = this.combineResults(vectorResults, textResults, opts);
// Apply contextual boosting if context is provided
const contextualResults = context
? this.applyContextualBoosting(combinedResults, context)
: combinedResults;
return contextualResults.slice(0, opts.limit);
}
catch (error) {
console.warn('Search error, falling back to text search:', error);
return this.textSearch(query, opts);
}
}
/**
* Pure vector similarity search
*/
async vectorSearch(query, options) {
try {
// Generate query embedding
const embeddingResult = await this.embedder.embed(query);
const queryEmbedding = embeddingResult.embedding;
// Perform vector search
const results = await this.db.vectorSearch(queryEmbedding, options.limit * 3, // Get more for filtering
options.threshold);
// Convert to enhanced results
const enhancedResults = [];
for (const result of results) {
// Apply filters
if (options.fileTypes && options.fileTypes.length > 0) {
if (!options.fileTypes.includes(result.chunk.language)) {
continue;
}
}
if (options.chunkTypes && options.chunkTypes.length > 0) {
if (!options.chunkTypes.includes(result.chunk.chunk_type)) {
continue;
}
}
enhancedResults.push({
chunk: result.chunk,
similarity_score: result.similarity_score,
combined_score: result.similarity_score,
relevance_type: 'semantic',
file_context: this.getFileContext(result.chunk.file_path),
related_chunks: this.getRelatedChunks(result.chunk.id)
});
}
return enhancedResults.slice(0, options.limit);
}
catch (error) {
console.warn('Vector search failed:', error);
return [];
}
}
/**
* Analyze query to determine optimal search strategy
*/
analyzeQuery(query) {
// Patterns that work better with vector search
const semanticPatterns = [
/\b(how to|what is|why does|explain|implement|create|build)\b/i,
/\b(pattern|algorithm|design|architecture|structure)\b/i,
/\b(similar|like|related|equivalent)\b/i
];
// Patterns that work better with text search
const exactPatterns = [
/^[a-zA-Z_][a-zA-Z0-9_]*$/, // Simple identifiers
/^".*"$/, // Quoted strings
/^[a-zA-Z0-9_]+\([^)]*\)$/ // Function calls
];
// Patterns that should avoid FTS (regex/special chars)
const vectorOnlyPatterns = [
/[.*+?^${}()|[\]\\]/, // Regex metacharacters
/@[A-Za-z]+/, // Annotations
/\.\*/, // Wildcard patterns
/<.*>/, // Generic types
];
const isSemanticQuery = semanticPatterns.some(pattern => pattern.test(query));
const isExactQuery = exactPatterns.some(pattern => pattern.test(query));
const isVectorOnlyQuery = vectorOnlyPatterns.some(pattern => pattern.test(query));
if (isVectorOnlyQuery) {
return { useVector: true, useText: false, strategy: 'vector-only' };
}
if (isExactQuery && query.length < 50) {
return { useVector: false, useText: true, strategy: 'text-only' };
}
if (isSemanticQuery || query.length > 100) {
return { useVector: true, useText: true, strategy: 'hybrid-semantic' };
}
// Default: hybrid for medium queries
if (query.length > 20) {
return { useVector: true, useText: true, strategy: 'hybrid-balanced' };
}
// Short queries: text first
return { useVector: false, useText: true, strategy: 'text-preferred' };
}
/**
* Safe text search wrapper that handles FTS5 errors
*/
async safeTextSearch(query, options) {
try {
return await this.textSearch(query, options);
}
catch (error) {
console.debug(`FTS5 search failed for query "${query}": ${error}. Falling back to semantic search.`);
return [];
}
}
/**
* Text-based search using FTS5
*/
async textSearch(query, options) {
try {
const results = this.db.textSearch(query, options.limit * 2);
const enhancedResults = results.map(result => ({
chunk: result.chunk,
similarity_score: 0, // No vector similarity for text search
text_score: result.similarity_score,
combined_score: result.similarity_score,
relevance_type: 'text',
file_context: this.getFileContext(result.chunk.file_path),
related_chunks: this.getRelatedChunks(result.chunk.id)
}));
return enhancedResults.slice(0, options.limit);
}
catch (error) {
console.warn('Text search failed:', error);
return [];
}
}
/**
* Quick search for exact matches and patterns
*/
async quickSearch(pattern, options = {}) {
const results = await this.textSearch(pattern, options);
return results.map(result => ({
...result,
similarity_score: 1.0,
combined_score: 1.0,
relevance_type: 'contextual'
}));
}
/**
* Suggest context based on current code location
*/
async suggestContext(filePath, lineNumber, options = {}) {
// Find chunks from the same file and related files
const fileChunks = await this.textSearch(`path:${filePath}`, options);
return fileChunks.map(result => ({
...result,
similarity_score: 1.0,
combined_score: 1.0,
relevance_type: 'contextual'
}));
}
/**
* Combine vector and text search results
*/
combineResults(vectorResults, textResults, options) {
const hybridWeight = options.hybridWeight;
const textWeight = 1 - hybridWeight;
const combinedMap = new Map();
// Add vector results
for (const result of vectorResults) {
combinedMap.set(result.chunk.id, {
...result,
combined_score: result.similarity_score * hybridWeight,
relevance_type: 'semantic'
});
}
// Add text results, combining with existing vector results
for (const result of textResults) {
const existing = combinedMap.get(result.chunk.id);
if (existing) {
// Combine scores
const vectorScore = existing.similarity_score * hybridWeight;
const textScore = (result.text_score || 0) * textWeight;
combinedMap.set(result.chunk.id, {
...existing,
text_score: result.text_score,
combined_score: vectorScore + textScore,
relevance_type: 'hybrid'
});
}
else {
// Add as text-only result
combinedMap.set(result.chunk.id, {
...result,
combined_score: (result.text_score || 0) * textWeight,
relevance_type: 'text'
});
}
}
// Sort by combined score
return Array.from(combinedMap.values())
.sort((a, b) => b.combined_score - a.combined_score);
}
/**
* Apply contextual boosting based on search context
*/
applyContextualBoosting(results, context) {
return results.map(result => {
let boost = 1.0;
// Boost results from the same file
if (context.filePath && result.chunk.file_path === context.filePath) {
boost *= 1.5;
}
// Boost results from the same language
if (context.language && result.chunk.language === context.language) {
boost *= 1.2;
}
// Boost results from recent files
if (context.recentFiles && context.recentFiles.includes(result.chunk.file_path)) {
boost *= 1.3;
}
return {
...result,
combined_score: result.combined_score * boost,
relevance_type: boost > 1.0 ? 'contextual' : result.relevance_type
};
}).sort((a, b) => b.combined_score - a.combined_score);
}
/**
* Get file context for a chunk
*/
getFileContext(filePath) {
// This could be enhanced to return actual file context
return [filePath];
}
/**
* Get related chunks for a chunk
*/
getRelatedChunks(chunkId) {
// This could be enhanced to find semantically related chunks
return [];
}
/**
* Get search statistics
*/
getStats() {
return this.db.getStats();
}
}
//# sourceMappingURL=NewSearchEngine.js.map