UNPKG

hikma-engine

Version:

Code Knowledge Graph Indexer - A sophisticated TypeScript-based indexer that transforms Git repositories into multi-dimensional knowledge stores for AI agents

830 lines (829 loc) 35.9 kB
"use strict"; /** * @file DEPRECATED: Legacy search functionality for the hikma-engine knowledge graph. * This module is deprecated in favor of EnhancedSearchService. * Use EnhancedSearchService from enhanced-search-service.ts instead. * * @deprecated Use EnhancedSearchService for new implementations * Supports both semantic vector search and metadata-based queries using unified SQLite storage. */ Object.defineProperty(exports, "__esModule", { value: true }); exports.SearchService = void 0; const embedding_service_1 = require("./embedding-service"); const db_clients_1 = require("../persistence/db-clients"); const logger_1 = require("../utils/logger"); const error_handling_1 = require("../utils/error-handling"); /** * Search service for semantic and metadata-based queries. * * @deprecated This class is deprecated. Use EnhancedSearchService instead. * The EnhancedSearchService provides better performance and more features. */ class SearchService { constructor(config) { this.logger = (0, logger_1.getLogger)('SearchService'); this.isInitialized = false; this.config = config; const dbConfig = config.getDatabaseConfig(); this.embeddingService = new embedding_service_1.EmbeddingService(config); this.sqliteClient = new db_clients_1.SQLiteClient(dbConfig.sqlite.path); } /** * Initializes the search service by loading models and connecting to database. */ async initialize() { if (this.isInitialized) { this.logger.debug('Search service already initialized'); return; } const operation = this.logger.operation('Initializing search service'); try { this.logger.info('Loading embedding model...'); await this.embeddingService.loadModel(); this.logger.info('Connecting to SQLite database...'); await this.sqliteClient.connect(); // Check if vector search is available const vectorAvailable = await this.sqliteClient.isVectorSearchAvailable(); if (vectorAvailable) { this.logger.info('Vector search capabilities available'); } else { this.logger.warn('Vector search not available, semantic search will be limited'); } this.isInitialized = true; this.logger.info('Search service initialized successfully'); operation(); } catch (error) { this.logger.error('Failed to initialize search service', { error: (0, error_handling_1.getErrorMessage)(error) }); operation(); throw error; } } /** * Performs semantic search using vector embeddings. * @param query - The search query string * @param options - Search options * @returns Array of search results */ async semanticSearch(query, options = {}) { if (!this.isInitialized) { await this.initialize(); } const { limit = 10, nodeTypes, minSimilarity = 0.1, includeMetadata = false } = options; const operation = this.logger.operation(`Semantic search: "${query}"`); try { this.logger.info('Performing semantic search', { query: query.substring(0, 100), limit, nodeTypes }); // Check if vector search is available const vectorAvailable = await this.sqliteClient.isVectorSearchAvailable(); if (!vectorAvailable) { this.logger.warn('Vector search not available, falling back to text-based search'); try { const textResults = await this.searchCodeByText(query, { limit }); return textResults; } catch (error) { this.logger.warn('Text search also failed, returning empty results', { error: (0, error_handling_1.getErrorMessage)(error) }); return []; } } // Generate embedding for the query const queryEmbedding = await this.embeddingService.embedQuery(query); // Search in SQLite using vector similarity for each node type const allResults = []; const nodeTypesToSearch = nodeTypes || [ 'Code', 'File', 'Commit', 'Test', 'PullRequest' ]; for (const nodeType of nodeTypesToSearch) { try { const results = await this.searchNodeTypeWithVectors(nodeType, queryEmbedding, limit, minSimilarity); allResults.push(...results); } catch (error) { this.logger.warn(`Failed to search ${nodeType} nodes`, { error: (0, error_handling_1.getErrorMessage)(error) }); } } // Sort by similarity and limit results const sortedResults = allResults .sort((a, b) => b.similarity - a.similarity) .slice(0, limit); this.logger.info('Semantic search completed', { query: query.substring(0, 50), resultsFound: sortedResults.length, topSimilarity: sortedResults[0]?.similarity || 0 }); operation(); return sortedResults; } catch (error) { this.logger.error('Semantic search failed', { error: (0, error_handling_1.getErrorMessage)(error) }); operation(); throw error; } } /** * Searches a specific node type using vector similarity. */ async searchNodeTypeWithVectors(nodeType, queryEmbedding, limit, minSimilarity) { const results = []; try { switch (nodeType) { case 'File': const fileResults = await this.sqliteClient.vectorSearch('files', 'content_embedding', queryEmbedding, limit, minSimilarity); results.push(...fileResults.map(r => this.convertToSearchResult(r, 'File'))); break; case 'Function': // Search both signature and body embeddings const sigResults = await this.sqliteClient.vectorSearch('functions', 'signature_embedding', queryEmbedding, limit, minSimilarity); const bodyResults = await this.sqliteClient.vectorSearch('functions', 'body_embedding', queryEmbedding, limit, minSimilarity); results.push(...sigResults.map(r => this.convertToSearchResult(r, 'Function'))); results.push(...bodyResults.map(r => this.convertToSearchResult(r, 'Function'))); break; case 'Commit': const commitResults = await this.sqliteClient.vectorSearch('commits', 'message_embedding', queryEmbedding, limit, minSimilarity); results.push(...commitResults.map(r => this.convertToSearchResult(r, 'Commit'))); break; case 'Code': const codeResults = await this.sqliteClient.vectorSearch('code_nodes', 'code_embedding', queryEmbedding, limit, minSimilarity); results.push(...codeResults.map(r => this.convertToSearchResult(r, 'Code'))); break; case 'Test': const testResults = await this.sqliteClient.vectorSearch('test_nodes', 'test_embedding', queryEmbedding, limit, minSimilarity); results.push(...testResults.map(r => this.convertToSearchResult(r, 'Test'))); break; case 'PullRequest': // Search both title and body embeddings const titleResults = await this.sqliteClient.vectorSearch('pull_requests', 'title_embedding', queryEmbedding, limit, minSimilarity); const prBodyResults = await this.sqliteClient.vectorSearch('pull_requests', 'body_embedding', queryEmbedding, limit, minSimilarity); results.push(...titleResults.map(r => this.convertToSearchResult(r, 'PullRequest'))); results.push(...prBodyResults.map(r => this.convertToSearchResult(r, 'PullRequest'))); break; } } catch (error) { this.logger.warn(`Vector search failed for ${nodeType}`, { error: (0, error_handling_1.getErrorMessage)(error) }); } return results; } /** * Converts SQLite vector search result to SearchResult format. */ convertToSearchResult(result, nodeType) { return { node: { id: result.id, type: nodeType, properties: result.data, embedding: [], // Don't return embedding in search results sourceText: '' }, similarity: result.similarity, rank: 0 // Will be set when sorting }; } /** * Performs metadata-based search using SQL queries. * @param filters - Metadata filters * @param options - Search options * @returns Array of node IDs matching the filters */ async metadataSearch(filters, options = {}) { if (!this.isInitialized) { await this.initialize(); } const { limit = 100 } = options; const operation = this.logger.operation('Metadata search'); try { this.logger.info('Performing metadata search', { filters }); const allNodeIds = []; // Search files table if (filters.fileExtension || filters.filePath) { const conditions = []; const params = []; if (filters.fileExtension) { conditions.push('file_extension = ?'); params.push(filters.fileExtension); } if (filters.filePath) { conditions.push('file_path LIKE ?'); params.push(`%${filters.filePath}%`); } const whereClause = conditions.length > 0 ? `WHERE ${conditions.join(' AND ')}` : ''; const sql = `SELECT id FROM files ${whereClause} LIMIT ?`; const results = this.sqliteClient.all(sql, [...params, limit]); allNodeIds.push(...results.map((row) => row.id)); } // Search code_nodes table if (filters.language || filters.filePath) { const conditions = []; const params = []; if (filters.language) { conditions.push('language = ?'); params.push(filters.language); } if (filters.filePath) { conditions.push('file_path LIKE ?'); params.push(`%${filters.filePath}%`); } const whereClause = conditions.length > 0 ? `WHERE ${conditions.join(' AND ')}` : ''; const sql = `SELECT id FROM code_nodes ${whereClause} LIMIT ?`; const results = this.sqliteClient.all(sql, [...params, limit]); allNodeIds.push(...results.map((row) => row.id)); } // Search commits table if (filters.author || filters.dateRange) { const conditions = []; const params = []; if (filters.author) { conditions.push('author LIKE ?'); params.push(`%${filters.author}%`); } if (filters.dateRange) { conditions.push('date BETWEEN ? AND ?'); params.push(filters.dateRange.start, filters.dateRange.end); } const whereClause = conditions.length > 0 ? `WHERE ${conditions.join(' AND ')}` : ''; const sql = `SELECT id FROM commits ${whereClause} LIMIT ?`; const results = this.sqliteClient.all(sql, [...params, limit]); allNodeIds.push(...results.map((row) => row.id)); } // Remove duplicates and limit results const uniqueNodeIds = Array.from(new Set(allNodeIds)).slice(0, limit); this.logger.info('Metadata search completed', { filters, resultsFound: uniqueNodeIds.length }); operation(); return uniqueNodeIds; } catch (error) { this.logger.error('Metadata search failed', { error: (0, error_handling_1.getErrorMessage)(error) }); operation(); throw error; } } /** * Performs hybrid search combining semantic and metadata filtering. * @param query - The search query string * @param filters - Metadata filters * @param options - Search options * @returns Array of enhanced search results */ async hybridSearch(query, filters = {}, options = {}) { const operation = this.logger.operation(`Hybrid search: "${query}"`); try { this.logger.info('Performing hybrid search', { query: query.substring(0, 100), filters }); let semanticResults = []; // Try semantic search first try { semanticResults = await this.semanticSearch(query, { ...options, limit: (options.limit || 10) * 3 // Get more results for filtering }); } catch (error) { this.logger.warn('Semantic search failed in hybrid search, using metadata only', { error: (0, error_handling_1.getErrorMessage)(error) }); } // Then, get metadata-filtered node IDs const metadataNodeIds = Object.keys(filters).length > 0 ? await this.metadataSearch(filters, { limit: 1000 }) : []; // Filter semantic results by metadata if filters are provided let filteredResults = semanticResults; if (metadataNodeIds.length > 0) { const metadataSet = new Set(metadataNodeIds); filteredResults = semanticResults.filter(result => metadataSet.has(result.node.id)); } // If both semantic search failed and no metadata results, try direct text search for code nodes if (filteredResults.length === 0 && options.nodeTypes?.includes('CodeNode')) { this.logger.info('Both semantic and metadata search failed, trying direct text search'); try { const textResults = await this.searchCodeByText(query, { limit: options.limit || 10, language: filters.language }); filteredResults = textResults; } catch (error) { this.logger.warn('Direct text search also failed', { error: (0, error_handling_1.getErrorMessage)(error) }); } } // Enhance results with metadata const enhancedResults = await Promise.all(filteredResults.slice(0, options.limit || 10).map(async (result, index) => { const metadata = await this.getNodeMetadata(result.node.id, result.node.type); return { ...result, rank: index + 1, metadata }; })); this.logger.info('Hybrid search completed', { query: query.substring(0, 50), semanticResults: semanticResults.length, filteredResults: enhancedResults.length }); operation(); return enhancedResults; } catch (error) { this.logger.error('Hybrid search failed', { error: (0, error_handling_1.getErrorMessage)(error) }); operation(); throw error; } } /** * Gets metadata for a specific node. * @param nodeId - The node ID * @param nodeType - The node type * @returns Metadata object */ async getNodeMetadata(nodeId, nodeType) { try { let sql = ''; let result = null; switch (nodeType) { case 'FileNode': sql = 'SELECT file_path, file_name, file_extension, updated_at FROM files WHERE id = ?'; result = this.sqliteClient.get(sql, [nodeId]); if (result) { return { filePath: result.file_path, fileName: result.file_name, fileExtension: result.file_extension, lastModified: result.updated_at }; } break; case 'CodeNode': sql = 'SELECT file_path, language, updated_at FROM code_nodes WHERE id = ?'; result = this.sqliteClient.get(sql, [nodeId]); if (result) { return { filePath: result.file_path, language: result.language, lastModified: result.updated_at }; } break; case 'CommitNode': sql = 'SELECT author, date FROM commits WHERE id = ?'; result = this.sqliteClient.get(sql, [nodeId]); if (result) { return { author: result.author, date: result.date }; } break; case 'TestNode': sql = 'SELECT file_path, framework, updated_at FROM test_nodes WHERE id = ?'; result = this.sqliteClient.get(sql, [nodeId]); if (result) { return { filePath: result.file_path, framework: result.framework, lastModified: result.updated_at }; } break; case 'PullRequestNode': sql = 'SELECT author, title, url, created_at_pr FROM pull_requests WHERE id = ?'; result = this.sqliteClient.get(sql, [nodeId]); if (result) { return { author: result.author, title: result.title, url: result.url, createdAt: result.created_at_pr }; } break; default: return {}; } return {}; } catch (error) { this.logger.warn('Failed to get node metadata', { nodeId, error: (0, error_handling_1.getErrorMessage)(error) }); return {}; } } /** * Gets the SQLite table name for a given node type. * @param nodeType - The node type * @returns Table name */ getTableNameForNodeType(nodeType) { return nodeType.toLowerCase() + 's'; } /** * Searches for similar code patterns. * @param codeSnippet - Code snippet to find similar patterns for * @param language - Programming language filter * @param options - Search options * @returns Array of similar code nodes */ async findSimilarCode(codeSnippet, language, options = {}) { const filters = {}; if (language) { filters.language = language; } return this.hybridSearch(codeSnippet, filters, { ...options, nodeTypes: ['CodeNode'] }); } /** * Searches code nodes by text matching in SQLite metadata. * @param searchText - Text to search for * @param options - Search options * @returns Promise<SearchResult[]> */ async searchCodeByText(searchText, options = {}) { const { limit = 10, language } = options; try { const conditions = []; const params = []; // Search in name and signature fields conditions.push('(name LIKE ? OR signature LIKE ?)'); params.push(`%${searchText}%`, `%${searchText}%`); if (language) { conditions.push('language = ?'); params.push(language); } const whereClause = conditions.length > 0 ? `WHERE ${conditions.join(' AND ')}` : ''; const sql = `SELECT * FROM code_nodes ${whereClause} LIMIT ?`; const results = this.sqliteClient.all(sql, [...params, limit]); return results.map((row, index) => ({ node: { id: row.id, type: 'CodeNode', properties: { name: row.name, signature: row.signature, language: row.language, filePath: row.file_path, startLine: row.start_line, endLine: row.end_line }, embedding: [], // Empty embedding for text search results sourceText: row.source_text || '' }, similarity: 1.0, // Perfect match for text search rank: index + 1 })); } catch (error) { this.logger.error('Code text search failed', { error: (0, error_handling_1.getErrorMessage)(error) }); return []; } } /** * Searches for files by content or metadata. * @param query - Search query * @param fileExtension - File extension filter * @param options - Search options * @returns Array of file nodes */ async searchFiles(query, fileExtension, options = {}) { const filters = {}; if (fileExtension) { filters.fileExtension = fileExtension; } return this.hybridSearch(query, filters, { ...options, nodeTypes: ['FileNode'] }); } /** * Searches commit history. * @param query - Search query * @param author - Author filter * @param dateRange - Date range filter * @param options - Search options * @returns Array of commit nodes */ async searchCommits(query, author, dateRange, options = {}) { const filters = {}; if (author) { filters.author = author; } if (dateRange) { filters.dateRange = dateRange; } return this.hybridSearch(query, filters, { ...options, nodeTypes: ['CommitNode'] }); } /** * Performs comprehensive search across all databases and node types. * This method combines semantic search, metadata search, and direct text search * to provide the most complete search results possible. * @param query - Search query * @param options - Search options * @returns Promise<SearchResult[]> */ async comprehensiveSearch(query, options = {}) { const operation = this.logger.operation(`Comprehensive search: "${query}"`); try { if (!this.isInitialized) { await this.initialize(); } const { limit = 20, minSimilarity = 0.5, metadataFilters = {} } = options; const allNodeTypes = ['CodeNode', 'FileNode', 'CommitNode', 'TestNode', 'PullRequestNode']; const allResults = []; // 1. Try semantic search across all node types try { const semanticResults = await this.semanticSearch(query, { limit: Math.ceil(limit * 0.4), // 40% from semantic search minSimilarity, nodeTypes: allNodeTypes }); allResults.push(...semanticResults.map(r => ({ ...r, searchType: 'semantic' }))); this.logger.info(`Semantic search found ${semanticResults.length} results`); } catch (error) { this.logger.warn('Semantic search failed in comprehensive search', { error: (0, error_handling_1.getErrorMessage)(error) }); } // 2. Metadata search across all node types try { const metadataNodeIds = await this.metadataSearch(metadataFilters, { limit: Math.ceil(limit * 0.3) // 30% from metadata search }); const metadataResults = await this.getNodesByIds(metadataNodeIds); allResults.push(...metadataResults.map(r => ({ ...r, searchType: 'metadata' }))); this.logger.info(`Metadata search found ${metadataResults.length} results`); } catch (error) { this.logger.warn('Metadata search failed in comprehensive search', { error: (0, error_handling_1.getErrorMessage)(error) }); } // 3. Direct text search for code nodes try { const textResults = await this.searchCodeByText(query, { limit: Math.ceil(limit * 0.2), // 20% from text search language: metadataFilters.language }); allResults.push(...textResults.map(r => ({ ...r, searchType: 'text' }))); this.logger.info(`Text search found ${textResults.length} results`); } catch (error) { this.logger.warn('Text search failed in comprehensive search', { error: (0, error_handling_1.getErrorMessage)(error) }); } // 4. File name search try { const fileResults = await this.searchFilesByName(query, { limit: Math.ceil(limit * 0.1) // 10% from file search }); allResults.push(...fileResults.map(r => ({ ...r, searchType: 'file' }))); this.logger.info(`File search found ${fileResults.length} results`); } catch (error) { this.logger.warn('File search failed in comprehensive search', { error: (0, error_handling_1.getErrorMessage)(error) }); } // Deduplicate and rank results const deduplicatedResults = this.deduplicateResults(allResults); const rankedResults = this.rankComprehensiveResults(deduplicatedResults, query); // Limit final results const finalResults = rankedResults.slice(0, limit); this.logger.info(`Comprehensive search completed`, { query, totalResults: finalResults.length, semanticCount: finalResults.filter(r => r.searchType === 'semantic').length, metadataCount: finalResults.filter(r => r.searchType === 'metadata').length, textCount: finalResults.filter(r => r.searchType === 'text').length, fileCount: finalResults.filter(r => r.searchType === 'file').length }); return finalResults; } catch (error) { this.logger.error('Comprehensive search failed', { error: (0, error_handling_1.getErrorMessage)(error) }); throw error; } finally { operation(); } } /** * Converts node IDs to SearchResult objects. * @param nodeIds - Array of node IDs * @returns Promise<SearchResult[]> */ async getNodesByIds(nodeIds) { const results = []; for (const nodeId of nodeIds) { try { // Try to get node from different tables let node = null; // Check code_nodes table let sql = 'SELECT * FROM code_nodes WHERE id = ?'; let result = this.sqliteClient.get(sql, [nodeId]); if (result) { node = { id: result.id, type: 'CodeNode', properties: { name: result.name, signature: result.signature, language: result.language, filePath: result.file_path, startLine: result.start_line, endLine: result.end_line }, embedding: [], sourceText: result.source_text || '' }; } // Check files table if not found if (!node) { sql = 'SELECT * FROM files WHERE id = ?'; result = this.sqliteClient.get(sql, [nodeId]); if (result) { node = { id: result.id, type: 'FileNode', properties: { filePath: result.file_path, fileName: result.file_name, fileExtension: result.file_extension }, embedding: [], sourceText: '' }; } } // Check commits table if not found if (!node) { sql = 'SELECT * FROM commits WHERE id = ?'; result = this.sqliteClient.get(sql, [nodeId]); if (result) { node = { id: result.id, type: 'CommitNode', properties: { message: result.message, author: result.author, date: result.date, hash: result.hash }, embedding: [], sourceText: result.message || '' }; } } if (node) { results.push({ node, similarity: 0.8, // Default similarity for metadata results rank: results.length + 1 }); } } catch (error) { this.logger.warn('Failed to get node by ID', { nodeId, error: (0, error_handling_1.getErrorMessage)(error) }); } } return results; } /** * Searches files by name pattern. * @param query - Search query * @param options - Search options * @returns Promise<SearchResult[]> */ async searchFilesByName(query, options = {}) { const { limit = 10 } = options; try { const sql = ` SELECT * FROM files WHERE file_path LIKE ? OR file_name LIKE ? ORDER BY CASE WHEN file_name LIKE ? THEN 1 WHEN file_path LIKE ? THEN 2 ELSE 3 END LIMIT ? `; const searchPattern = `%${query}%`; const exactPattern = `%${query}%`; const results = this.sqliteClient.all(sql, [ searchPattern, searchPattern, exactPattern, exactPattern, limit ]); return results.map((row, index) => ({ node: { id: row.id, type: 'FileNode', properties: { filePath: row.file_path, fileName: row.file_name, size: row.size, language: row.language, lastModified: row.last_modified }, embedding: [], // Empty embedding for file search results sourceText: '' }, similarity: 0.9, // High similarity for name matches rank: index + 1 })); } catch (error) { this.logger.error('File name search failed', { error: (0, error_handling_1.getErrorMessage)(error) }); return []; } } /** * Deduplicates search results based on node ID. * @param results - Array of search results * @returns SearchResult[] */ deduplicateResults(results) { const seen = new Set(); const deduplicated = []; for (const result of results) { if (!seen.has(result.node.id)) { seen.add(result.node.id); deduplicated.push(result); } } return deduplicated; } /** * Ranks comprehensive search results based on relevance and search type. * @param results - Array of search results * @param query - Original search query * @returns SearchResult[] */ rankComprehensiveResults(results, query) { return results.sort((a, b) => { // Prioritize by search type (semantic > text > metadata > file) const typeWeights = { semantic: 4, text: 3, metadata: 2, file: 1 }; const aWeight = typeWeights[a.searchType] || 0; const bWeight = typeWeights[b.searchType] || 0; if (aWeight !== bWeight) { return bWeight - aWeight; } // Then by similarity score if (a.similarity !== b.similarity) { return b.similarity - a.similarity; } // Finally by exact name match const aName = a.node.properties.name || a.node.properties.fileName || ''; const bName = b.node.properties.name || b.node.properties.fileName || ''; const aExactMatch = aName.toLowerCase().includes(query.toLowerCase()); const bExactMatch = bName.toLowerCase().includes(query.toLowerCase()); if (aExactMatch && !bExactMatch) return -1; if (!aExactMatch && bExactMatch) return 1; return 0; }); } /** * Disconnects from SQLite database. */ async disconnect() { try { this.sqliteClient.disconnect(); this.isInitialized = false; this.logger.info('Search service disconnected'); } catch (error) { this.logger.error('Failed to disconnect search service', { error: (0, error_handling_1.getErrorMessage)(error) }); throw error; } } /** * Gets search service statistics. * @returns Service statistics */ async getStats() { try { const embeddingStats = await this.embeddingService.getStats(); const sqliteStats = await this.sqliteClient.getIndexingStats(); return { isInitialized: this.isInitialized, embeddingModel: embeddingStats.model, totalIndexedNodes: sqliteStats.totalFiles + sqliteStats.totalCommits }; } catch (error) { this.logger.error('Failed to get search stats', { error: (0, error_handling_1.getErrorMessage)(error) }); throw error; } } } exports.SearchService = SearchService;