UNPKG

@knath2000/codebase-indexing-mcp

Version:

MCP server for codebase indexing with Voyage AI embeddings and Qdrant vector storage

1,036 lines (1,035 loc) โ€ข 63.1 kB
#!/usr/bin/env node import { Server } from '@modelcontextprotocol/sdk/server/index.js'; import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; import { CallToolRequestSchema, ListToolsRequestSchema } from '@modelcontextprotocol/sdk/types.js'; import { ChunkType } from './types.js'; import { IndexingService } from './services/indexing-service.js'; import { SearchService } from './services/search-service.js'; import { loadConfig, validateConfig, printConfigSummary } from './config.js'; import { WorkspaceWatcher } from './services/workspace-watcher.js'; import { WorkspaceManager } from './services/workspace-manager.js'; // Server configuration const SERVER_NAME = 'codebase-indexing-server'; const SERVER_VERSION = '1.0.0'; export const TOOL_DEFINITIONS = [ { name: 'index_directory', description: 'Index all files in a directory recursively', inputSchema: { type: 'object', properties: { directory_path: { type: 'string', description: 'Path to the directory to index' } }, required: ['directory_path'] } }, { name: 'index_file', description: 'Index a single file', inputSchema: { type: 'object', properties: { file_path: { type: 'string', description: 'Path to the file to index' } }, required: ['file_path'] } }, { name: 'search_code', description: 'Search for code using semantic similarity', inputSchema: { type: 'object', properties: { query: { type: 'string', description: 'Search query' }, language: { type: 'string', description: 'Programming language to filter by (optional)' }, chunk_type: { type: 'string', description: 'Type of code chunk to search for (function, class, etc.)' }, file_path: { type: 'string', description: 'File path to search within (optional)' }, limit: { type: 'number', description: 'Maximum number of results to return (default: 10)' }, threshold: { type: 'number', description: 'Minimum similarity threshold (default: 0.7)' } }, required: ['query'] } }, { name: 'search_functions', description: 'Search for functions by name or description', inputSchema: { type: 'object', properties: { query: { type: 'string', description: 'Function name or description to search for' }, language: { type: 'string', description: 'Programming language to filter by (optional)' }, limit: { type: 'number', description: 'Maximum number of results to return (default: 10)' } }, required: ['query'] } }, { name: 'search_classes', description: 'Search for classes by name or description', inputSchema: { type: 'object', properties: { query: { type: 'string', description: 'Class name or description to search for' }, language: { type: 'string', description: 'Programming language to filter by (optional)' }, limit: { type: 'number', description: 'Maximum number of results to return (default: 10)' } }, required: ['query'] } }, { name: 'find_similar', description: 'Find code chunks similar to a given chunk', inputSchema: { type: 'object', properties: { chunk_id: { type: 'string', description: 'ID of the chunk to find similar chunks for' }, limit: { type: 'number', description: 'Maximum number of results to return (default: 10)' }, threshold: { type: 'number', description: 'Minimum similarity threshold (default: 0.7)' } }, required: ['chunk_id'] } }, { name: 'get_code_context', description: 'Get code context around a specific chunk', inputSchema: { type: 'object', properties: { chunk_id: { type: 'string', description: 'ID of the chunk to get context for' }, context_lines: { type: 'number', description: 'Number of lines of context to include (default: 5)' } }, required: ['chunk_id'] } }, { name: 'get_indexing_stats', description: 'Get statistics about the indexed codebase', inputSchema: { type: 'object', properties: {} } }, { name: 'get_search_stats', description: 'Get statistics about the search index', inputSchema: { type: 'object', properties: {} } }, { name: 'clear_index', description: 'Clear the entire search index', inputSchema: { type: 'object', properties: {} } }, { name: 'remove_file', description: 'Remove a file from the search index', inputSchema: { type: 'object', properties: { file_path: { type: 'string', description: 'Path to the file to remove from index' } }, required: ['file_path'] } }, { name: 'reindex_file', description: 'Re-index a single file', inputSchema: { type: 'object', properties: { file_path: { type: 'string', description: 'Path to the file to re-index' } }, required: ['file_path'] } }, { name: 'create_payload_indexes', description: 'Create payload indexes for filtering (chunkType, language, filePath) on existing collection', inputSchema: { type: 'object', properties: { force: { type: 'boolean', description: 'Force creation even if indexes might exist', default: false } }, required: [] } }, { name: 'codebase_search', description: 'Natural language search for codebase understanding. Handles queries like "How is user authentication handled?", "Database connection setup", "Error handling patterns", "API endpoint definitions", "Component state management". Returns relevant code snippets with file paths, line numbers, similarity scores, and navigation links.', inputSchema: { type: 'object', properties: { query: { type: 'string', description: 'Natural language query describing what you want to find in the codebase (e.g., "How is user authentication handled?", "Database connection setup", "Error handling patterns")' }, language: { type: 'string', description: 'Programming language to filter by (optional)' }, chunk_type: { type: 'string', description: 'Type of code chunk to search for (function, class, etc.)' }, file_path: { type: 'string', description: 'File path to search within (optional)' }, limit: { type: 'number', description: 'Maximum number of results to return (default: 10)' }, max_tokens: { type: 'number', description: 'Maximum tokens for context window (optional)' }, enable_hybrid: { type: 'boolean', description: 'Enable hybrid search (dense + sparse)' }, enable_reranking: { type: 'boolean', description: 'Enable LLM re-ranking of results' }, prefer_implementation: { type: 'boolean', description: 'Prefer implementation code over documentation (default: true)' } }, required: ['query'] } }, { name: 'get_health_status', description: 'Get comprehensive health status of all services', inputSchema: { type: 'object', properties: {} } }, { name: 'get_enhanced_stats', description: 'Get enhanced statistics including search cache, hybrid search, and LLM re-ranking metrics', inputSchema: { type: 'object', properties: {} } }, { name: 'get_workspace_info', description: 'Get information about the current workspace and detected project structure', inputSchema: { type: 'object', properties: {}, required: [] } }, { name: 'list_workspaces', description: 'List all detected workspaces and their metadata', inputSchema: { type: 'object', properties: {}, required: [] } }, { name: 'switch_workspace', description: 'Switch to a different workspace by ID', inputSchema: { type: 'object', properties: { workspace_id: { type: 'string', description: 'The ID of the workspace to switch to' } }, required: ['workspace_id'] } }, { name: 'clear_search_cache', description: 'Clear search cache and reset statistics', inputSchema: { type: 'object', properties: {} } }, { name: 'invalidate_file_cache', description: 'Invalidate cache entries for a specific file (useful when file is modified)', inputSchema: { type: 'object', properties: { file_path: { type: 'string', description: 'Path to the file to invalidate from cache' } }, required: ['file_path'] } } ]; // Export function to setup MCP tools (used by HTTP server) export function setupMcpTools(server, indexingService, searchService) { // List available tools server.setRequestHandler(ListToolsRequestSchema, async () => { return { tools: TOOL_DEFINITIONS }; }); // Handle tool calls server.setRequestHandler(CallToolRequestSchema, async (request) => { const { name, arguments: args } = request.params; if (!args) { return { content: [{ type: 'text', text: 'Error: No arguments provided' }], isError: true }; } try { // Ensure services are initialized before tool execution if ('ensureServicesInitialized' in globalThis) { await globalThis.ensureServicesInitialized(); } switch (name) { case 'index_directory': const dirStats = await indexingService.indexDirectory(args.directory_path); return { content: [{ type: 'text', text: `Successfully indexed directory: ${args.directory_path}\nGenerated ${dirStats.totalChunks} chunks` }] }; case 'index_file': const fileChunks = await indexingService.indexFile(args.file_path); return { content: [{ type: 'text', text: `Successfully indexed file: ${args.file_path}\nGenerated ${fileChunks.length} chunks` }] }; case 'search_code': const codeResults = await searchService.search(searchService.buildSearchQuery({ query: args.query, ...(args.language !== undefined ? { language: args.language } : {}), ...(args.chunk_type !== undefined ? { chunkType: args.chunk_type } : {}), ...(args.file_path !== undefined ? { filePath: args.file_path } : {}), ...(args.limit !== undefined ? { limit: args.limit } : {}), ...(args.threshold !== undefined ? { threshold: args.threshold } : {}), ...(args.enable_hybrid !== undefined ? { enableHybrid: args.enable_hybrid } : {}), ...(args.enable_reranking !== undefined ? { enableReranking: args.enable_reranking } : {}), ...(args.llm_reranker_timeout_ms !== undefined ? { llmRerankerTimeoutMs: args.llm_reranker_timeout_ms } : {}), })); return { content: [{ type: 'text', text: `Search results for "${args.query}":\n\n` + codeResults.map((result, index) => `${index + 1}. [Score: ${result.score.toFixed(3)}] ${result.context}\n` + `\`\`\`${result.chunk.language}\n${result.snippet}\n\`\`\``).join('\n\n') }] }; case 'search_functions': const functionResults = await searchService.searchFunctions(searchService.buildSearchQuery({ query: args.query, ...(args.language !== undefined ? { language: args.language } : {}), ...(args.limit !== undefined ? { limit: args.limit } : {}), chunkType: ChunkType.FUNCTION, })); return { content: [{ type: 'text', text: `Function search results for "${args.query}":\n\n` + functionResults.map((result, index) => `${index + 1}. [Score: ${result.score.toFixed(3)}] ${result.context}\n` + `\`\`\`${result.chunk.language}\n${result.snippet}\n\`\`\``).join('\n\n') }] }; case 'search_classes': const classResults = await searchService.searchClasses(searchService.buildSearchQuery({ query: args.query, ...(args.language !== undefined ? { language: args.language } : {}), ...(args.limit !== undefined ? { limit: args.limit } : {}), chunkType: ChunkType.CLASS, })); return { content: [{ type: 'text', text: `Class search results for "${args.query}":\n\n` + classResults.map((result, index) => `${index + 1}. [Score: ${result.score.toFixed(3)}] ${result.context}\n` + `\`\`\`${result.chunk.language}\n${result.snippet}\n\`\`\``).join('\n\n') }] }; case 'find_similar': const similarResults = await searchService.findSimilar(searchService.buildSearchQuery({ query: args.chunk_id, ...(args.limit !== undefined ? { limit: args.limit } : {}), })); return { content: [{ type: 'text', text: `Similar chunks to "${args.chunk_id}":\n\n` + similarResults.map((result, index) => `${index + 1}. [Score: ${result.score.toFixed(3)}] ${result.context}\n` + `\`\`\`${result.chunk.language}\n${result.snippet}\n\`\`\``).join('\n\n') }] }; case 'get_code_context': const codeContextResult = await searchService.getCodeContext(args.chunk_id, args.context_lines); if (!codeContextResult) { return { content: [{ type: 'text', text: `Chunk not found: ${args.chunk_id}` }], isError: true }; } return { content: [{ type: 'text', text: `Code context for chunk "${args.chunk_id}":\n\n` + `File: ${codeContextResult.chunk.filePath}\n` + `Lines: ${codeContextResult.chunk.startLine}-${codeContextResult.chunk.endLine}\n` + `Type: ${codeContextResult.chunk.chunkType}\n\n` + `\`\`\`${codeContextResult.chunk.language}\n${codeContextResult.context}\n\`\`\`` }] }; case 'get_indexing_stats': const indexingStats = indexingService.getStats(); return { content: [{ type: 'text', text: `Indexing Statistics:\n\n` + `Total files: ${indexingStats.totalFiles}\n` + `Total chunks: ${indexingStats.totalChunks}\n` + `Total size: ${indexingStats.totalSize} bytes\n` + `Average chunk size: ${Math.round(indexingStats.averageChunkSize)} bytes\n` + `Last indexed: ${indexingStats.lastIndexed.toISOString()}\n` + `Indexing duration: ${indexingStats.indexingDuration}ms\n` + `Errors: ${indexingStats.errors}\n` + `Warnings: ${indexingStats.warnings}\n` + `Largest file: ${indexingStats.largestFile}\n\n` + `Language distribution:\n${Object.entries(indexingStats.languageDistribution) .map(([lang, count]) => ` ${lang}: ${count}`) .join('\n')}\n\n` + `Chunk type distribution:\n${Object.entries(indexingStats.chunkTypeDistribution) .map(([type, count]) => ` ${type}: ${count}`) .join('\n')}` }] }; case 'get_search_stats': const searchStats = await searchService.getSearchStats(); const languageStats = Object.entries(searchStats.topLanguages) .sort(([, a], [, b]) => b - a) .map(([lang, count]) => ` โ€ข ${lang}: ${count} chunks`) .join('\n'); const chunkTypeStats = Object.entries(searchStats.topChunkTypes) .sort(([, a], [, b]) => b - a) .map(([type, count]) => ` โ€ข ${type}: ${count} chunks`) .join('\n'); return { content: [{ type: 'text', text: `Search Statistics:\n\n` + `Total queries: ${searchStats.totalQueries}\n` + `Average latency: ${searchStats.averageLatency.toFixed(2)}ms\n` + `Cache hit rate: ${searchStats.cacheHitRate.toFixed(2)}%\n` + `Hybrid search usage: ${searchStats.hybridSearchUsage.toFixed(2)}%\n` + `LLM reranker usage: ${searchStats.llmRerankerUsage.toFixed(2)}%\n` + `Error rate: ${searchStats.errorRate.toFixed(2)}%\n` + `Last query: ${searchStats.lastQuery?.toISOString() || 'N/A'}\n\n` + `Top Languages:\n${languageStats}\n\n` + `Top Chunk Types:\n${chunkTypeStats}` }] }; case 'clear_index': await indexingService.clearIndex(); return { content: [{ type: 'text', text: 'Successfully cleared index' }] }; case 'remove_file': await indexingService.removeFile(args.file_path); return { content: [{ type: 'text', text: `Successfully removed file: ${args.file_path}` }] }; case 'reindex_file': const reindexedChunks = await indexingService.reindexFile(args.file_path); return { content: [{ type: 'text', text: `Successfully re-indexed file: ${args.file_path}\nGenerated ${reindexedChunks.length} chunks` }] }; case 'create_payload_indexes': await searchService.qdrantClient.ensurePayloadIndexes(); return { content: [{ type: 'text', text: '๐ŸŽ‰ Successfully created payload indexes for filtering!\n\n' + 'โœ… chunkType index - for filtering by code elements (function, class, interface, etc.)\n' + 'โœ… language index - for filtering by programming language (typescript, javascript, etc.)\n' + 'โœ… filePath index - for file-specific searches\n\n' + '๐Ÿ” Your collection is now ready for @codebase-style filtered searches!' }] }; case 'codebase_search': const enhancedSearchArgs = args; // Build enhanced search query with implementation-focused defaults const enhancedQuery = searchService.buildSearchQuery({ query: enhancedSearchArgs.query, ...(enhancedSearchArgs.language !== undefined ? { language: enhancedSearchArgs.language } : {}), ...(enhancedSearchArgs.chunk_type !== undefined ? { chunkType: enhancedSearchArgs.chunk_type } : {}), ...(enhancedSearchArgs.file_path !== undefined ? { filePath: enhancedSearchArgs.file_path } : {}), limit: enhancedSearchArgs.limit || 20, threshold: enhancedSearchArgs.threshold || 0.15, // Lower threshold for broader recall enableHybrid: enhancedSearchArgs.enable_hybrid !== false, // Default to true enableReranking: enhancedSearchArgs.enable_reranking !== false, // Default to true // Implementation-focused enhancements preferFunctions: true, // Boost function chunks preferClasses: true, // Boost class chunks maxFilesPerType: 5, // Allow more results per file preferImplementation: enhancedSearchArgs.prefer_implementation !== false, // Default to true }); const enhancedResults = await searchService.search(enhancedQuery); if (enhancedResults.length === 0) { return { content: [{ type: 'text', text: `No results found for "${enhancedSearchArgs.query}". Try broadening your search terms or checking spelling.` }], isError: false }; } // Create comprehensive output with metadata const searchTime = Date.now() - Date.now(); // This will be set by search service const formatOutput = (results) => { let output = `Codebase search results for "${enhancedSearchArgs.query}":\n\n`; results.forEach((result, index) => { const score = (result.score * 100).toFixed(2); const chunkType = result.chunk.chunkType || 'generic'; const startLine = result.chunk.startLine; const endLine = result.chunk.endLine; const filePath = result.chunk.filePath; // Create clickable file link with line numbers const fileLink = `[${filePath}:${startLine}-${endLine}](cursor://file?filePath=${encodeURIComponent(filePath)}&startLine=${startLine}&endLine=${endLine})`; output += `${index + 1}. **${chunkType} (Score: ${score}%)** - ${fileLink}\n`; output += `\`\`\`${result.chunk.language}\n${result.chunk.content.trim()}\n\`\`\`\n\n`; }); // Add search metadata const searchStats = searchService.getEnhancedSearchStats(); const cacheHit = searchStats.cacheHitRate > 0 ? 'Yes' : 'No'; const hybridUsed = searchStats.hybridSearchUsage > 0 ? 'Yes' : 'No'; const reranked = searchStats.llmRerankerUsage > 0 ? 'Yes' : 'No'; output += `_Search took ${searchTime.toFixed(2)}ms. Total results: ${results.length}. Cache Hit: ${cacheHit}. Hybrid Search Used: ${hybridUsed}. Reranked: ${reranked}_`; return output; }; return { content: [{ type: 'text', text: formatOutput(enhancedResults) }], isError: false }; case 'get_health_status': // TODO: Implement health monitoring service return { content: [{ type: 'text', text: '๐Ÿฅ **System Health Status**\n\n' + 'โœ… Search Service: Operational\n' + 'โœ… Indexing Service: Operational\n' + 'โš ๏ธ Health monitoring service not yet implemented' }] }; case 'get_enhanced_stats': const enhancedStats = searchService.getEnhancedSearchStats(); return { content: [{ type: 'text', text: `๐Ÿ“Š **Enhanced Search Statistics**\n\n` + `**Search Performance:**\n` + `- Total queries: ${enhancedStats.totalQueries}\n` + `- Cache hit rate: ${(enhancedStats.cacheHitRate * 100).toFixed(1)}%\n` + `- Hybrid search usage: ${enhancedStats.hybridSearchUsage} queries\n` + `- LLM re-ranking usage: ${enhancedStats.llmRerankerUsage} queries\n` + `- Last query: ${enhancedStats.lastQuery.toISOString()}\n\n` + `` }] }; case 'get_workspace_info': // Access the workspace manager from the service (if available) const currentWorkspace = indexingService.currentWorkspace || searchService.currentWorkspace; if (!currentWorkspace) { return { content: [{ type: 'text', text: 'โŒ No workspace detected. Please ensure you are running the server from a valid workspace directory.' }], isError: true }; } return { content: [{ type: 'text', text: `๐Ÿ—๏ธ **Current Workspace Information**\n\n` + `**Name:** ${currentWorkspace.name}\n` + `**Type:** ${currentWorkspace.type}\n` + `**Root Path:** ${currentWorkspace.rootPath}\n` + `**Collection:** ${currentWorkspace.collectionName}\n` + `**Folders:** ${currentWorkspace.folders.length}\n` + currentWorkspace.folders.map((folder, i) => ` ${i + 1}. ${folder}`).join('\n') + '\n\n' + (currentWorkspace.gitRemote ? `**Git Remote:** ${currentWorkspace.gitRemote}\n` : '') + (currentWorkspace.packageName ? `**Package Name:** ${currentWorkspace.packageName}\n` : '') + `**Last Accessed:** ${currentWorkspace.lastAccessed.toISOString()}\n` + `**Workspace ID:** ${currentWorkspace.id}` }] }; case 'list_workspaces': // Get workspace manager from services const workspaceManager = indexingService.workspaceManager || searchService.workspaceManager; if (!workspaceManager) { return { content: [{ type: 'text', text: 'โŒ Workspace manager not available.' }], isError: true }; } const allWorkspaces = workspaceManager.getAllWorkspaces(); const currentWorkspaceId = workspaceManager.getCurrentWorkspace()?.id; if (allWorkspaces.length === 0) { return { content: [{ type: 'text', text: '๐Ÿ“‚ No workspaces have been detected yet.' }] }; } return { content: [{ type: 'text', text: `๐Ÿ“‚ **Detected Workspaces**\n\n` + allWorkspaces.map((workspace, index) => { const current = workspace.id === currentWorkspaceId ? ' โ† **Current**' : ''; const lastAccessed = workspace.lastAccessed.toLocaleDateString(); return `${index + 1}. **${workspace.name}** (${workspace.type})${current}\n` + ` - Path: ${workspace.rootPath}\n` + ` - Collection: ${workspace.collectionName}\n` + ` - Last Accessed: ${lastAccessed}\n` + ` - ID: ${workspace.id.substring(0, 12)}...`; }).join('\n\n') }] }; case 'switch_workspace': const targetWorkspaceId = args.workspace_id; const wsManager = indexingService.workspaceManager || searchService.workspaceManager; if (!wsManager) { return { content: [{ type: 'text', text: 'โŒ Workspace manager not available.' }], isError: true }; } const switchedWorkspace = await wsManager.switchToWorkspace(targetWorkspaceId); if (!switchedWorkspace) { return { content: [{ type: 'text', text: `โŒ Workspace not found: ${targetWorkspaceId}` }], isError: true }; } // Update services to use the new workspace if (indexingService && indexingService.updateQdrantClientForWorkspace) { indexingService.updateQdrantClientForWorkspace(switchedWorkspace); } if (searchService && searchService.updateQdrantClientForWorkspace) { searchService.updateQdrantClientForWorkspace(switchedWorkspace); } return { content: [{ type: 'text', text: `๐Ÿ”„ **Successfully switched to workspace:** ${switchedWorkspace.name}\n\n` + `**Type:** ${switchedWorkspace.type}\n` + `**Collection:** ${switchedWorkspace.collectionName}\n` + `**Path:** ${switchedWorkspace.rootPath}\n\n` + `๐Ÿ”ง All services have been updated to use the new workspace collection.` }] }; case 'clear_search_cache': searchService.clearCaches(); return { content: [{ type: 'text', text: '๐Ÿงน Successfully cleared search cache and reset statistics' }] }; case 'invalidate_file_cache': searchService.invalidateFileCache(args.file_path); return { content: [{ type: 'text', text: `๐Ÿ”„ Successfully invalidated cache entries for file: ${args.file_path}` }] }; default: throw new Error(`Unknown tool: ${name}`); } } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); return { content: [{ type: 'text', text: `Error: ${errorMessage}` }], isError: true }; } }); } class CodebaseIndexingServer { constructor(config) { this.server = new Server({ name: SERVER_NAME, version: SERVER_VERSION, }, { capabilities: { tools: {} } }); // Create shared workspace manager this.workspaceManager = new WorkspaceManager(); this.indexingService = new IndexingService(config, this.workspaceManager); this.searchService = new SearchService(config, this.workspaceManager); this.workspaceDir = process.cwd(); this.workspaceWatcher = new WorkspaceWatcher(this.workspaceDir, this.indexingService, config.supportedExtensions, config.excludePatterns); this.setupToolHandlers(); } setupToolHandlers() { // List available tools this.server.setRequestHandler(ListToolsRequestSchema, async () => { return { tools: TOOL_DEFINITIONS }; }); // Handle tool calls this.server.setRequestHandler(CallToolRequestSchema, async (request) => { const { name, arguments: args } = request.params; try { switch (name) { case 'index_directory': return await this.handleIndexDirectory(args); case 'index_file': return await this.handleIndexFile(args); case 'search_code': return await this.handleSearchCode(args); case 'search_functions': return await this.handleSearchFunctions(args); case 'search_classes': return await this.handleSearchClasses(args); case 'find_similar': return await this.handleFindSimilar(args); case 'get_code_context': return await this.handleGetCodeContext(args); case 'get_indexing_stats': return await this.handleGetIndexingStats(args); case 'get_search_stats': return await this.handleGetSearchStats(args); case 'clear_index': return await this.handleClearIndex(args); case 'remove_file': return await this.handleRemoveFile(args); case 'reindex_file': return await this.handleReindexFile(args); case 'create_payload_indexes': return await this.handleCreatePayloadIndexes(args); case 'codebase_search': return await this.handleCodebaseSearch(args); case 'get_health_status': return await this.handleGetHealthStatus(args); case 'get_enhanced_stats': return await this.handleGetEnhancedStats(args); case 'clear_search_cache': return await this.handleClearSearchCache(args); case 'invalidate_file_cache': return await this.handleInvalidateFileCache(args); default: throw new Error(`Unknown tool: ${name}`); } } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); return { content: [ { type: 'text', text: `Error: ${errorMessage}` } ] }; } }); } async handleIndexDirectory(args) { const { directory_path } = args; const stats = await this.indexingService.indexDirectory(directory_path); return { content: [ { type: 'text', text: `Successfully indexed directory: ${directory_path}\n\n` + `Statistics:\n` + `- Total files: ${stats.totalFiles}\n` + `- Total chunks: ${stats.totalChunks}\n` + `- Total size: ${stats.totalSize} bytes\n` + `- Average chunk size: ${Math.round(stats.averageChunkSize)} bytes\n` + `- Indexing duration: ${stats.indexingDuration}ms\n` + `- Errors: ${stats.errors}\n` + `- Warnings: ${stats.warnings}\n\n` + `Language distribution:\n${Object.entries(stats.languageDistribution) .map(([lang, count]) => ` ${lang}: ${count}`) .join('\n')}\n\n` + `Chunk type distribution:\n${Object.entries(stats.chunkTypeDistribution) .map(([type, count]) => ` ${type}: ${count}`) .join('\n')}` } ] }; } async handleIndexFile(args) { const { file_path } = args; const chunks = await this.indexingService.indexFile(file_path); return { content: [ { type: 'text', text: `Successfully indexed file: ${file_path}\n` + `Generated ${chunks.length} chunks\n\n` + `Chunks:\n${chunks.map(chunk => `- ${chunk.chunkType} (${chunk.startLine}-${chunk.endLine}): ${chunk.functionName || chunk.className || 'unnamed'}`).join('\n')}` } ] }; } async handleSearchCode(args) { const { query, language, chunk_type, file_path, limit, threshold, enable_hybrid, enable_reranking, llm_reranker_timeout_ms } = args; const searchQuery = this.searchService.buildSearchQuery({ query: query, ...(language !== undefined ? { language: language } : {}), ...(chunk_type !== undefined ? { chunkType: chunk_type } : {}), ...(file_path !== undefined ? { filePath: file_path } : {}), ...(limit !== undefined ? { limit: limit } : {}), ...(threshold !== undefined ? { threshold: threshold } : {}), ...(enable_hybrid !== undefined ? { enableHybrid: enable_hybrid } : {}), ...(enable_reranking !== undefined ? { enableReranking: enable_reranking } : {}), ...(llm_reranker_timeout_ms !== undefined ? { llmRerankerTimeoutMs: llm_reranker_timeout_ms } : {}), }); const results = await this.searchService.search(searchQuery); return { content: [ { type: 'text', text: `Search results for "${query}":\n\n` + results.map((result, index) => `${index + 1}. [Score: ${result.score.toFixed(3)}] ${result.context}\n` + `\`\`\`${result.chunk.language}\n${result.snippet}\n\`\`\``).join('\n\n') } ] }; } async handleSearchFunctions(args) { const { query, language, limit } = args; const searchQuery = this.searchService.buildSearchQuery({ query: query, ...(language !== undefined ? { language: language } : {}), ...(limit !== undefined ? { limit: limit } : {}), chunkType: ChunkType.FUNCTION, }); const results = await this.searchService.searchFunctions(searchQuery); return { content: [ { type: 'text', text: `Function search results for "${query}":\n\n` + results.map((result, index) => `${index + 1}. [Score: ${result.score.toFixed(3)}] ${result.chunk.functionName || 'unnamed'}\n` + ` ${result.context}\n` + `\`\`\`${result.chunk.language}\n${result.snippet}\n\`\`\``).join('\n\n') } ] }; } async handleSearchClasses(args) { const { query, language, limit } = args; const searchQuery = this.searchService.buildSearchQuery({ query: query, ...(language !== undefined ? { language: language } : {}), ...(limit !== undefined ? { limit: limit } : {}), chunkType: ChunkType.CLASS, }); const results = await this.searchService.searchClasses(searchQuery); return { content: [ { type: 'text', text: `Class search results for "${query}":\n\n` + results.map((result, index) => `${index + 1}. [Score: ${result.score.toFixed(3)}] ${result.chunk.className || 'unnamed'}\n` + ` ${result.context}\n` + `\`\`\`${result.chunk.language}\n${result.snippet}\n\`\`\``).join('\n\n') } ] }; } async handleFindSimilar(args) { const { chunk_id, limit } = args; const searchQuery = this.searchService.buildSearchQuery({ query: chunk_id, ...(limit !== undefined ? { limit: limit } : {}), }); const results = await this.searchService.findSimilar(searchQuery); return { content: [ { type: 'text', text: `Similar chunks to "${chunk_id}":\n\n` + results.map((result, index) => `${index + 1}. [Score: ${result.score.toFixed(3)}] ${result.context}\n` + `\`\`\`${result.chunk.language}\n${result.snippet}\n\`\`\``).join('\n\n') } ] }; } async handleGetCodeContext(args) { const { chunk_id, context_lines } = args; const codeContextResult = await this.searchService.getCodeContext(chunk_id, context_lines); if (!codeContextResult) { return { content: [{ type: 'text', text: `Chunk not found: ${chunk_id}` }], isError: true }; } return { content: [{ type: 'text', text: `Code context for chunk "${chunk_id}":\n\n` + `File: ${codeContextResult.chunk.filePath}\n` + `Lines: ${codeContextResult.chunk.startLine}-${codeContextResult.chunk.endLine}\n` + `Type: ${codeContextResult.chunk.chunkType}\n\n` + `\`\`\`${codeContextResult.chunk.language}\n${codeContextResult.context}\n\`\`\`` }] }; } async handleGetIndexingStats(_args) { const indexingStats = this.indexingService.getStats(); return { content: [{ type: 'text', text: `Indexing Statistics:\n\n` + `Total files: ${indexingStats.totalFiles}\n` + `Total chunks: ${indexingStats.totalChunks}\n` + `Total size: ${indexingStats.totalSize} bytes\n` + `Average chunk size: ${Math.round(indexingStats.averageChunkSize)} bytes\n` + `Last indexed: ${indexingStats.lastIndexed.toISOString()}\n` + `Indexing duration: ${indexingStats.indexingDuration}ms\n` + `Errors: ${indexingStats.errors}\n` + `Warnings: ${indexingStats.warnings}\n` + `Largest file: ${indexingStats.largestFile}\n\n` + `Language distribution:\n${Object.entries(indexingStats.languageDistribution) .map(([lang, count]) => ` ${lang}: ${count}`) .join('\n')}\n\n` + `Chunk type distribution:\n${Object.entries(indexingStats.chunkTypeDistribution) .map(([type, count]) => ` ${type}: ${count}`) .join('\n')}` }] }; } async handleGetSearchStats(_args) { const searchStats = await this.searchService.getSearchStats(); const languageStats = Object.entries(searchStats.topLanguages) .sort(([, a], [, b]) => b - a) .map(([lang, count]) => ` โ€ข ${lang}: ${count} chunks`) .join('\n');