UNPKG

@wildcard-ai/deepcontext

Version:

Advanced codebase indexing and semantic search MCP server

1,011 lines (988 loc) 54.2 kB
#!/usr/bin/env node /** * Standalone MCP Integration * Provides intelligent codebase indexing and search capabilities via Model Context Protocol. * Delegates to specialized services for file processing, namespace management, and search coordination. */ import * as path from 'path'; import { fileURLToPath } from 'url'; const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); // MCP Server imports import { Server } from '@modelcontextprotocol/sdk/server/index.js'; import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; import { CallToolRequestSchema, ListToolsRequestSchema, ListResourcesRequestSchema, ReadResourceRequestSchema } from '@modelcontextprotocol/sdk/types.js'; // Core components import { IndexingOrchestrator } from './core/indexing/IndexingOrchestrator.js'; import { TreeSitterSymbolExtractorFull } from './core/indexing/TreeSitterSymbolExtractor.treesitter-based.js'; import { LanguageDetector } from './utils/LanguageDetector.js'; import { Logger } from './utils/Logger.js'; import { JinaApiService } from './services/JinaApiService.js'; import { TurbopufferService } from './services/TurbopufferService.js'; import { ConfigurationService } from './services/ConfigurationService.js'; import { NamespaceManagerService } from './services/NamespaceManagerService.js'; import { FileProcessingService } from './services/FileProcessingService.js'; import { SearchCoordinationService } from './services/SearchCoordinationService.js'; import { SemanticSubChunker } from './services/SemanticSubChunker.js'; export class StandaloneContextMcp { config; indexingOrchestrator; languageDetector; logger; jinaApiService; turbopufferService; configurationService; namespaceManagerService; fileProcessingService; searchCoordinationService; symbolExtractor; semanticSubChunker; constructor(config) { // Initialize ConfigurationService with provided config this.configurationService = new ConfigurationService(config, { logConfigurationStatus: false }); this.config = this.configurationService.getConfig(); this.logger = new Logger('STANDALONE-INTEGRATION', this.config.logLevel); this.languageDetector = new LanguageDetector(); this.jinaApiService = new JinaApiService(this.config.jinaApiKey, this.configurationService); this.turbopufferService = new TurbopufferService(this.config.turbopufferApiKey, this.configurationService); this.symbolExtractor = new TreeSitterSymbolExtractorFull(); this.semanticSubChunker = new SemanticSubChunker(this.configurationService); // Initialize NamespaceManagerService first (needed for metadata callback) this.namespaceManagerService = new NamespaceManagerService(this.turbopufferService); // Initialize FileProcessingService with integrated chunk operations const chunkOperations = { getChunkIdsForFile: async (namespace, filePath) => { return await this.turbopufferService.getChunkIdsForFile(namespace, filePath); }, deleteChunksByIds: async (namespace, chunkIds) => { return await this.turbopufferService.deleteChunksByIds(namespace, chunkIds); }, uploadChunks: async (namespace, chunks) => { try { if (!chunks.length) { this.logger.debug('No chunks to upload'); return; } this.logger.info(`Processing ${chunks.length} chunks for semantic sub-chunking...`); // Step 1: Process chunks through semantic sub-chunker to prevent truncation const processedChunks = []; let totalSubChunks = 0; for (const chunk of chunks) { const subChunks = await this.semanticSubChunker.splitLargeChunk(chunk); processedChunks.push(...subChunks); if (subChunks.length > 1) { totalSubChunks += subChunks.length; this.logger.debug(`Split large chunk ${chunk.id} into ${subChunks.length} sub-chunks`); } } if (totalSubChunks > chunks.length) { this.logger.info(`✂️ Created ${totalSubChunks - chunks.length} additional sub-chunks to prevent content loss`); } this.logger.info(`Uploading ${processedChunks.length} processed chunks to namespace: ${namespace}`); // Step 2: Process chunks in batches for embedding generation const BATCH_SIZE = 10; // Optimal batch size for embedding generation for (let i = 0; i < processedChunks.length; i += BATCH_SIZE) { const batch = processedChunks.slice(i, i + BATCH_SIZE); // Validate chunk sizes before embedding const chunkingConfig = this.configurationService.getChunkingConfig(); for (const chunk of batch) { if (chunk.content.length > chunkingConfig.jinaMaxChars) { this.logger.warn(`⚠️ Chunk ${chunk.id} still exceeds ${chunkingConfig.jinaMaxChars} chars (${chunk.content.length}) - may cause embedding errors`); } } // Generate embeddings for the batch const embeddings = await this.jinaApiService.generateEmbeddingBatch(batch.map(chunk => chunk.content)); // Prepare data for Turbopuffer upsert const upsertData = batch.map((chunk, idx) => ({ id: chunk.id, vector: embeddings[idx], content: chunk.content, filePath: chunk.filePath, startLine: chunk.startLine, endLine: chunk.endLine, language: chunk.language, // Handle both IndexingOrchestrator format and core.ts format symbols: chunk.symbols?.map((s) => typeof s === 'string' ? s : s.name || s).join(', ') || '' })); // Upload to vector store await this.turbopufferService.upsert(namespace, upsertData); this.logger.debug(`Uploaded batch ${Math.floor(i / BATCH_SIZE) + 1}/${Math.ceil(processedChunks.length / BATCH_SIZE)} (${batch.length} chunks)`); } this.logger.info(`✅ Successfully uploaded ${processedChunks.length} chunks to ${namespace} (${totalSubChunks - chunks.length} additional sub-chunks created)`); } catch (error) { this.logger.error(`Failed to upload chunks to ${namespace}:`, error); throw error; } } }; this.fileProcessingService = new FileProcessingService(chunkOperations); // Create metadata callback for IndexingOrchestrator - now that NamespaceManagerService is ready const metadataCallback = async (codebasePath, indexedData) => { await this.namespaceManagerService.registerCodebase(codebasePath, indexedData.totalChunks, new Date(indexedData.indexedAt)); await this.fileProcessingService.saveLastIndexedTime(codebasePath, new Date()); }; // Initialize IndexingOrchestrator with enhanced services this.indexingOrchestrator = new IndexingOrchestrator({ jinaApiService: this.jinaApiService, turbopufferService: this.turbopufferService, namespaceManagerService: this.namespaceManagerService, metadataCallback }); // Initialize SearchCoordinationService with connection context extractor const connectionExtractor = async (filePath, content) => { return await this.extractConnectionContext(filePath, content); }; this.searchCoordinationService = new SearchCoordinationService(this.jinaApiService, this.turbopufferService, connectionExtractor, this.configurationService, 'SearchCoordinationService'); } /** * Index a codebase using the enhanced IndexingOrchestrator */ async indexCodebase(codebasePath, forceReindex = false) { const indexingRequest = { codebasePath, forceReindex: forceReindex, enableContentFiltering: true, enableDependencyAnalysis: true }; const indexResult = await this.indexingOrchestrator.indexCodebase(indexingRequest); return { success: indexResult.success, namespace: indexResult.metadata?.namespace || '', filesProcessed: indexResult.metadata?.totalFiles || 0, chunksCreated: indexResult.chunks?.length || 0, processingTimeMs: indexResult.metadata?.indexingTime || 0, message: indexResult.success ? `Successfully indexed ${indexResult.metadata?.totalFiles || 0} files into ${indexResult.chunks?.length || 0} intelligent chunks` : `Indexing failed with ${indexResult.errors?.length || 0} errors`, errors: indexResult.errors }; } /** * Hybrid search using SearchCoordinationService */ async searchHybrid(codebasePath, query, options = {}) { // Ensure index is up-to-date before searching await this.ensureUpToDateIndex(codebasePath); // Get namespace from registered codebase instead of generating it const normalizedPath = path.resolve(codebasePath); const indexed = this.namespaceManagerService.getIndexedCodebase(normalizedPath); if (!indexed) { return { success: false, results: [], searchTime: 0, strategy: 'hybrid', metadata: { vectorResults: 0, bm25Results: 0, totalMatches: 0, reranked: false } }; } const namespace = indexed.namespace; const searchConfig = this.configurationService.getSearchConfig(); const searchResult = await this.searchCoordinationService.searchHybrid(namespace, query, { limit: options.limit || searchConfig.defaultResultLimit, vectorWeight: options.vectorWeight || searchConfig.defaultVectorWeight, bm25Weight: options.bm25Weight || searchConfig.defaultBm25Weight }); return { success: searchResult.success, results: searchResult.results, searchTime: searchResult.searchTime, strategy: searchResult.strategy, metadata: { vectorResults: searchResult.metadata?.vectorResults || 0, bm25Results: searchResult.metadata?.bm25Results || 0, totalMatches: searchResult.metadata?.totalMatches || searchResult.results.length, reranked: searchResult.metadata?.reranked || (options.enableReranking !== false) } }; } /** * BM25 search using SearchCoordinationService */ async searchBM25(codebasePath, query, options = {}) { // Ensure index is up-to-date before searching await this.ensureUpToDateIndex(codebasePath); // Get namespace from registered codebase instead of generating it const normalizedPath = path.resolve(codebasePath); const indexed = this.namespaceManagerService.getIndexedCodebase(normalizedPath); if (!indexed) { return { success: false, results: [], searchTime: 0, strategy: 'bm25' }; } const namespace = indexed.namespace; const searchConfig = this.configurationService.getSearchConfig(); const searchResult = await this.searchCoordinationService.searchBM25(namespace, query, { limit: options.limit || searchConfig.defaultResultLimit, enableReranking: options.enableReranking !== false }); return { success: searchResult.success, results: searchResult.results, searchTime: searchResult.searchTime, strategy: searchResult.strategy }; } /** * Intelligent search using SearchCoordinationService */ async searchWithIntelligence(query, codebasePath, maxResults) { // Ensure index is up-to-date before searching if (codebasePath) { await this.ensureUpToDateIndex(codebasePath); } const searchConfig = this.configurationService.getSearchConfig(); const indexedCodebases = await this.namespaceManagerService.getAllIndexedCodebases(); const searchResult = await this.searchCoordinationService.searchWithIntelligence(query, codebasePath, indexedCodebases, maxResults || searchConfig.defaultResultLimit); if (searchResult.success && searchResult.results.length > 0) { const results = searchResult.results.map((result) => ({ id: result.id, content: result.content, filePath: result.filePath, relativePath: result.metadata?.relativePath || path.relative(codebasePath || '', result.filePath), startLine: result.startLine, endLine: result.endLine, language: result.language || 'unknown', symbols: result.symbols || [], score: result.score, connections: result.connections })); return { success: true, results, totalResults: results.length, searchTimeMs: searchResult.searchTimeMs, message: searchResult.message }; } return { success: searchResult.success, results: [], totalResults: 0, searchTimeMs: searchResult.searchTimeMs, message: searchResult.message }; } /** * Get indexing status via NamespaceManagerService */ async getIndexingStatus(codebasePath) { return await this.namespaceManagerService.getIndexingStatus(codebasePath); } /** * Clear index via NamespaceManagerService */ async clearIndex(codebasePath) { // NamespaceManagerService handles both registry clearing and vector store clearing return await this.namespaceManagerService.clearIndexedCodebases(codebasePath); } /** * Extract relevant connection context using TreeSitterSymbolExtractorFull */ async extractConnectionContext(filePath, chunkContent) { try { // Initialize symbol extractor if needed await this.symbolExtractor.initialize(); // Read the full file content to get imports/exports (they're usually at file level) const fs = await import('fs/promises'); const fullFileContent = await fs.readFile(filePath, 'utf-8'); // Detect language from full file const language = this.languageDetector.detectLanguage(filePath, fullFileContent); // Use TreeSitterSymbolExtractorFull for accurate import/export extraction on full file const symbolResult = await this.symbolExtractor.extractSymbols(fullFileContent, language.language, filePath); const result = { imports: symbolResult.imports.map(imp => imp.module).filter(Boolean).slice(0, 5), exports: symbolResult.exports.slice(0, 5), relatedFiles: symbolResult.imports.map(imp => imp.module).filter(Boolean).slice(0, 5) }; this.logger.debug(`🔗 Extracted connections for ${filePath}:`); this.logger.debug(` Full file content length: ${fullFileContent.length} chars`); this.logger.debug(` Raw imports: ${JSON.stringify(symbolResult.imports)}`); this.logger.debug(` Raw exports: ${JSON.stringify(symbolResult.exports)}`); this.logger.debug(` Final result: ${result.imports.length} imports, ${result.exports.length} exports`); return result; } catch (error) { this.logger.debug('Failed to extract connection context:', error); return { imports: [], exports: [], relatedFiles: [] }; } } /** * Ensure the index is up-to-date by running hash-based incremental indexing before searches */ async ensureUpToDateIndex(codebasePath) { try { const normalizedPath = path.resolve(codebasePath); const indexed = this.namespaceManagerService.getIndexedCodebase(normalizedPath); if (!indexed) { this.logger.debug(`Codebase not indexed, skipping incremental update: ${codebasePath}`); return; } this.logger.debug(`🔄 Running hash-based incremental indexing before search for: ${codebasePath}`); // Run incremental update with hash-based change detection (no time limits) const incrementalResult = await this.fileProcessingService.processIncrementalUpdate(normalizedPath, indexed.namespace, {} // No maxAgeHours - relies on hash-based change detection ); if (incrementalResult.success && incrementalResult.filesProcessed > 0) { this.logger.info(`✅ Hash-based incremental update: ${incrementalResult.filesProcessed} files with actual changes processed`); // Update last indexed time for tracking purposes await this.fileProcessingService.saveLastIndexedTime(normalizedPath, new Date()); } else { this.logger.debug(`⚡ No files with content changes found for: ${codebasePath}`); } } catch (error) { this.logger.warn('Failed to run incremental indexing before search:', error); // Don't fail the search if incremental indexing fails } } async initialize() { await this.namespaceManagerService.initialize(); await this.symbolExtractor.initialize(); const indexedCodebases = await this.namespaceManagerService.getAllIndexedCodebases(); this.logger.info(`Initialized with ${indexedCodebases.size} indexed codebases`); } } // MCP Server Implementation class StandaloneMCPServer { server; contextMcp; constructor() { this.contextMcp = new StandaloneContextMcp(); this.server = new Server({ name: 'intelligent-context-mcp', version: '2.0.0', }, { capabilities: { tools: {}, resources: {} } }); this.setupHandlers(); // Initialize the registry on startup to ensure it's loaded for new sessions this.initializeRegistry(); } async initializeRegistry() { try { await this.contextMcp.initialize(); console.error(`🔍 Registry initialized successfully`); } catch (error) { console.error(`⚠️ Failed to initialize registry:`, error); } } setupHandlers() { this.server.setRequestHandler(ListToolsRequestSchema, async () => { const tools = [ { name: 'index_codebase', description: `<tool> <purpose>Prepares a codebase for intelligent search by creating a searchable index</purpose> <when_to_use> <scenario>Call this first before searching any new codebase</scenario> <scenario>Required prerequisite for search_codebase</scenario> </when_to_use> <parameters> <parameter name="codebase_path" required="true"> <type>string</type> <description>ABSOLUTE path to the directory containing source code files</description> <examples> <valid>/Users/name/project</valid> <valid>/home/user/code/repo</valid> <invalid>.</invalid> <invalid>../project</invalid> <invalid>relative/path</invalid> </examples> <validation>Must be absolute path starting with / (Unix) or C:\\ (Windows)</validation> </parameter> <parameter name="force_reindex" required="false"> <type>boolean</type> <description>Force complete reindexing even if already indexed</description> <default>false</default> <when_to_use>Code has changed significantly or search results seem outdated</when_to_use> </parameter> </parameters> </tool>`, inputSchema: { type: 'object', properties: { codebase_path: { type: 'string' }, force_reindex: { type: 'boolean', default: false } }, required: ['codebase_path'] } }, { name: 'search_codebase', description: `<tool> <purpose>Finds relevant code in an indexed codebase using natural language or keyword queries</purpose> <when_to_use> <scenario>Find specific functions, classes, or code patterns</scenario> <scenario>Get context before making changes to understand dependencies</scenario> <scenario>Explore how existing systems work</scenario> <scenario>Locate examples of API usage or patterns</scenario> </when_to_use> <parameters> <parameter name="query" required="true"> <type>string</type> <description>Natural language or keyword search query describing what code to find</description> </parameter> <parameter name="codebase_path" required="true"> <type>string</type> <description>ABSOLUTE path to the codebase directory to search</description> <examples> <valid>/Users/name/project</valid> <valid>/home/user/code/repo</valid> <invalid>.</invalid> <invalid>../project</invalid> <invalid>relative/path</invalid> </examples> <validation>Must be absolute path starting with / (Unix) or C:\\ (Windows)</validation> </parameter> <parameter name="max_results" required="false"> <type>number</type> <description>Maximum number of code chunks to return</description> <default>5</default> <best_practice>Keep at default 5 for focused results. Use multiple targeted searches rather than increasing this limit</best_practice> </parameter> </parameters> <strategy> <guideline>Use specific technical terms: "authentication middleware", "database connection", "error handler"</guideline> <guideline>Focus on implementation: "user login function" rather than "user management system"</guideline> <guideline>Include file types when relevant: "SQL migration", "React component", "API endpoint"</guideline> </strategy> <workflow> <step>Search discovers relevant files and entry points, use imports and exports to find related files</step> <step>Use Read tool to explore discovered files in detail for complete implementation</step> <step>Use Grep tool for precise pattern matching of specific symbols or exact text</step> <step>Follow imports/exports from results to guide next searches</step> <step>Prefer multiple focused searches with 5 results over single large searches</step> <step>Search provides discovery, not complete solutions</step> </workflow> <result_interpretation> <point>Results ranked by semantic relevance, not code importance</point> <point>Implementation code often appears in results 2-5, not just #1</point> <point>Look for actual code files (.ts, .js, .sql) over documentation (.md, .txt)</point> </result_interpretation> <limitations> <limitation> <description>May miss foundational type definitions</description> <solution>Use Grep for "interface PluginName"</solution> </limitation> <limitation> <description>Shows implementations, not core contracts</description> <solution>Follow up with Read for full context</solution> </limitation> <limitation> <description>Semantic chunks may lack architectural hierarchy</description> <solution>Manual file exploration needed</solution> </limitation> <limitation> <description>Excludes filtered content: test files, generated code, config files, minified files, large data files</description> <solution>Use Grep tool to search test files (*.test.*, *.spec.*, __tests__, /tests/), config files, or generated content</solution> </limitation> <limitation> <description>For precise symbol search</description> <solution>Use Grep tool for exact matches</solution> </limitation> </limitations> <returns>Code chunks with file paths, line numbers, relevance scores, symbol information, imports, and exports</returns> <prerequisites>Codebase must be indexed first with index_codebase</prerequisites> </tool>`, inputSchema: { type: 'object', properties: { query: { type: 'string' }, codebase_path: { type: 'string' }, max_results: { type: 'number', default: 5 } }, required: ['query', 'codebase_path'] } }, { name: 'get_indexing_status', description: `<tool> <purpose>Check if codebases are indexed and get their status information</purpose> <enhanced_features> <feature>Shows completion statistics for finished indexing (success rates, processing time, performance metrics)</feature> <feature>Displays batch processing details (successful/skipped batches)</feature> <feature>References log files for detailed debugging</feature> </enhanced_features> <when_to_use> <scenario>Before indexing to check if already done</scenario> <scenario>After indexing to see completion statistics and success rates</scenario> <scenario>Debug why search returned no results</scenario> <scenario>Confirm indexing completed successfully</scenario> <scenario>Get overview of all indexed codebases</scenario> </when_to_use> <parameters> <parameter name="codebase_path" required="false"> <type>string</type> <description>ABSOLUTE path to specific codebase to check</description> <examples> <valid>/Users/name/project</valid> <valid>/home/user/code/repo</valid> <invalid>.</invalid> <invalid>../project</invalid> <invalid>relative/path</invalid> </examples> <validation>Must be absolute path starting with / (Unix) or C:\\ (Windows)</validation> <optional_behavior>Omit to get status of all indexed codebases</optional_behavior> </parameter> </parameters> <returns>Enhanced indexing status with completion statistics when available</returns> </tool>`, inputSchema: { type: 'object', properties: { codebase_path: { type: 'string' } } } }, { name: 'clear_index', description: `<tool> <purpose>Permanently removes all indexed data for a codebase</purpose> <when_to_use> <scenario>Clear stale data before reindexing after major code changes</scenario> <scenario>Remove old indexed codebases no longer needed</scenario> <scenario>Fix corrupted index causing search issues</scenario> </when_to_use> <parameters> <parameter name="codebase_path" required="false"> <type>string</type> <description>ABSOLUTE path to the codebase to clear</description> <examples> <valid>/Users/name/project</valid> <valid>/home/user/code/repo</valid> <invalid>.</invalid> <invalid>../project</invalid> <invalid>relative/path</invalid> </examples> <validation>Must be absolute path starting with / (Unix) or C:\\ (Windows)</validation> <optional_behavior>Omit to clear ALL indexed codebases (use with caution)</optional_behavior> </parameter> </parameters> <warnings> <warning>Destructive operation. All search capabilities lost until reindexing</warning> </warnings> </tool>`, inputSchema: { type: 'object', properties: { codebase_path: { type: 'string' } } } } ]; return { tools }; }); this.server.setRequestHandler(CallToolRequestSchema, async (request) => { const { name, arguments: args } = request.params; try { switch (name) { case 'index_codebase': try { // Resolve relative paths to absolute paths const codebasePath = path.resolve(args.codebase_path); const forceReindex = args.force_reindex || false; console.log(`🔍 Starting background indexing: ${args.codebase_path} -> ${codebasePath}`); // Spawn background process for indexing const logFile = `background-indexing-${path.basename(codebasePath)}-${new Date().toISOString().replace(/:/g, '-')}.log`; // Use child process to avoid MCP timeout const { spawn } = await import('child_process'); const workerPath = path.resolve(__dirname, '..', 'background-indexing-worker.mjs'); const nodeProcess = spawn('node', [workerPath, codebasePath, forceReindex.toString()], { detached: true, stdio: ['ignore', 'pipe', 'pipe'], env: { ...process.env, WILDCARD_API_KEY: process.env.WILDCARD_API_KEY, WILDCARD_API_URL: process.env.WILDCARD_API_URL }, cwd: process.cwd() }); // Pipe output to log file const fsSync = await import('fs'); const logStream = fsSync.default.createWriteStream(logFile); const addTimestamp = (data) => { const lines = data.toString().split('\n'); return lines.filter(line => line.trim()).map(line => `[${new Date().toISOString()}] ${line}`).join('\n') + '\n'; }; nodeProcess.stdout?.on('data', (data) => { const timestampedData = addTimestamp(data); logStream.write(timestampedData); console.log(timestampedData.trim()); }); nodeProcess.stderr?.on('data', (data) => { const timestampedData = addTimestamp(data); logStream.write(timestampedData); console.error(timestampedData.trim()); }); nodeProcess.on('close', (code) => { logStream.write(`[${new Date().toISOString()}] Process completed with code: ${code}\n`); logStream.end(); console.log(`Background indexing process completed with code: ${code}`); }); // Detach the process so it runs independently nodeProcess.unref(); return { content: [{ type: 'text', text: `🚀 **Background Indexing Started** - Log: \`${logFile}\` **Codebase**: ${codebasePath} **Process**: Running in detached background mode **📋 How to Monitor Progress:** - **Check Status**: Use \`get_indexing_status\` tool to see completion progress - **View Live Logs**: \`tail -f ${logFile}\` - **Final Results**: Status tool will show success rate when complete 💡 **Tip**: The log file is saved in your current directory for easy access. 📝 **For AI Assistant**: Please mention that users can monitor progress with: tail -f ${logFile} (include the dash)` }] }; } catch (error) { // Catch any unhandled errors return { content: [{ type: 'text', text: `❌ Indexing error: ${error instanceof Error ? error.message : String(error)}\n\nStack trace:\n${error instanceof Error ? error.stack : 'No stack trace available'}` }] }; } case 'search_codebase': console.log(`🔍 STANDALONE MCP TOOL CALLED: search_codebase with query "${args.query}"`); // Resolve relative paths to absolute paths (consistent with index_codebase) const resolvedCodebasePath = args.codebase_path ? path.resolve(args.codebase_path) : undefined; // Note: Incremental indexing is automatically triggered before each search const searchResult = await this.contextMcp.searchWithIntelligence(args.query, resolvedCodebasePath, args.max_results || 5); console.log(`🔍 STANDALONE MCP RESULT: ${searchResult.results.length} results, top score: ${searchResult.results[0]?.score}`); if (!searchResult.success) { return { content: [{ type: 'text', text: `❌ Search failed: ${searchResult.message}` }] }; } const response = { total_results: searchResult.totalResults, search_time_ms: searchResult.searchTimeMs, results: searchResult.results.map(chunk => { const chunkAny = chunk; return { file_path: chunk.relativePath, start_line: chunk.startLine, end_line: chunk.endLine, language: chunk.language, content: chunk.content, score: chunk.score, symbols: chunk.symbols, connections: chunk.connections, // Include connection context for Claude ...(chunkAny.originalScore !== undefined && { original_score: chunkAny.originalScore, reranked: chunkAny.reranked || true }) }; }) }; return { content: [{ type: 'text', text: JSON.stringify(response, null, 2) }] }; case 'get_indexing_status': // Resolve relative paths to absolute paths (consistent with other tools) const resolvedStatusPath = args.codebase_path ? path.resolve(args.codebase_path) : undefined; const status = await this.contextMcp.getIndexingStatus(resolvedStatusPath); // Use the current codebase path if none was explicitly provided let codebasePathForLogs = resolvedStatusPath || status.currentCodebase?.path; // If we still don't have a path, try to get it from the first indexed codebase if (!codebasePathForLogs && status.indexedCodebases && status.indexedCodebases.length > 0) { codebasePathForLogs = status.indexedCodebases[0].path; } const enhancedStatus = await this.enhanceStatusWithLogData(status, codebasePathForLogs); return { content: [{ type: 'text', text: this.formatIndexingStatus(enhancedStatus) }] }; case 'clear_index': // Resolve relative paths to absolute paths (consistent with other tools) const resolvedClearPath = args.codebase_path ? path.resolve(args.codebase_path) : undefined; const clearResult = await this.contextMcp.clearIndex(resolvedClearPath); return { content: [{ type: 'text', text: clearResult.success ? '✅ Index cleared successfully' : `❌ Failed to clear index: ${clearResult.message}` }] }; default: throw new Error(`Unknown tool: ${name}`); } } catch (error) { return { content: [{ type: 'text', text: `Error: ${error instanceof Error ? error.message : String(error)}` }] }; } }); // Resource handlers this.server.setRequestHandler(ListResourcesRequestSchema, async () => { const resources = [ { uri: 'mcp://codebase-status', name: 'Codebase Status', description: 'Current status of indexed codebases' } ]; return { resources }; }); this.server.setRequestHandler(ReadResourceRequestSchema, async (request) => { const uri = request.params.uri; switch (uri) { case 'mcp://codebase-status': const status = await this.contextMcp.getIndexingStatus(); return { contents: [{ type: 'text', text: JSON.stringify(status, null, 2) }] }; default: throw new Error(`Unknown resource: ${uri}`); } }); } async run() { // Show configuration status const config = { jinaApiKey: process.env.JINA_API_KEY, turbopufferApiKey: process.env.TURBOPUFFER_API_KEY }; const capabilities = { reranking: !!config.jinaApiKey && config.jinaApiKey !== 'test', vectorSearch: !!config.turbopufferApiKey && config.turbopufferApiKey !== 'test', localBM25: true }; console.error('🔧 Capabilities:', JSON.stringify(capabilities)); // Wildcard hosted backend mode indicator const wildcardEnabled = !!process.env.WILDCARD_API_KEY; const wildcardUrl = process.env.WILDCARD_API_URL || 'https://deepcontext.mcp.wild-card.ai' || 'http://localhost:4000'; if (wildcardEnabled) { console.error(`🌐 Wildcard backend: ENABLED (using hosted Fastify backend)`); console.error(` Base URL: ${wildcardUrl}`); } else { console.error(`🌐 Wildcard backend: disabled (direct provider mode)`); } if (!config.jinaApiKey || config.jinaApiKey === 'test') { console.error('⚠️ Jina API key not provided - result reranking will be disabled'); console.error('💡 Set JINA_API_KEY environment variable to enable result reranking'); } // Initialize the standalone MCP integration await this.contextMcp.initialize(); const transport = new StdioServerTransport(); await this.server.connect(transport); console.error('🚀 Intelligent Context MCP Server ready!'); console.error(`🔄 Result Reranking: ${!!(config.jinaApiKey && config.jinaApiKey !== 'test') ? '✅ Enabled' : '❌ Disabled'}`); console.error('📝 Local BM25 Search: ✅ Always Available'); console.error('🔌 Transport: stdio'); } /** * Enhance indexing status with completion statistics from log files */ async enhanceStatusWithLogData(status, codebasePath) { const enhancedStatus = { ...status }; try { // Find the most recent log file for this codebase const logFile = await this.findMostRecentLogFile(codebasePath); if (logFile) { const logStats = await this.parseLogFileStats(logFile); if (logStats) { enhancedStatus.completionStats = logStats; } } } catch (error) { // Don't fail status check if log parsing fails console.warn('Failed to parse log statistics:', error); } return enhancedStatus; } /** * Find the most recent background indexing log file for a codebase */ async findMostRecentLogFile(codebasePath) { const fs = await import('fs'); const path = await import('path'); try { const files = fs.readdirSync('.'); const codebaseName = codebasePath ? path.basename(codebasePath) : ''; // Find log files that match the specific codebase pattern const logFiles = files.filter(file => { if (!file.startsWith('background-indexing-') || !file.endsWith('.log')) { return false; } // If no specific codebase requested, don't return any logs // (completion stats should only show for specific codebases) if (!codebaseName) { return false; } // Extract the codebase name from the log file pattern: // background-indexing-{codebaseName}-{timestamp}.log // Note: codebaseName can contain hyphens, so we need to be more careful const match = file.match(/^background-indexing-(.+?)-(\d{4}-\d{2}-\d{2}T.+)\.log$/); if (!match) return false; const logCodebaseName = match[1]; return logCodebaseName === codebaseName; }); if (logFiles.length === 0) return null; // Sort by modification time (newest first) const sortedFiles = logFiles .map(file => ({ name: file, mtime: fs.statSync(file).mtime })) .sort((a, b) => b.mtime.getTime() - a.mtime.getTime()); return sortedFiles[0].name; } catch (error) { return null; } } /** * Parse completion statistics from log file */ async parseLogFileStats(logFile) { const fs = await import('fs'); try { const content = fs.readFileSync(logFile, 'utf-8'); const lines = content.split('\n'); let isCompleted = false; let totalChunks = 0; let successfulChunks = 0; let skippedBatches = 0; let totalBatches = 0; let processingTime = 0; let startTime = null; let endTime = null; // Parse log lines for statistics for (const line of lines) { // Check if process completed if (line.includes('Process completed with code: 0')) { isCompleted = true; const timeMatch = line.match(/\[([^\]]+)\]/); if (timeMatch) { endTime = new Date(timeMatch[1]); } } // Extract start time if (line.includes('Starting indexing for:') && !startTime) { const timeMatch = line.match(/\[([^\]]+)\]/); if (timeMatch) { startTime = new Date(timeMatch[1]); } } // Extract upload completion stats - look for the actual completion message if (line.includes('✅ Uploaded') && line.includes('chunks to namespace')) { // Handle the actual format: "✅ Uploaded 354 chunks to namespace: mcp_xxx" const chunkMatch = line.match(/✅ Uploaded (\d+) chunks to namespace/); if (chunkMatch) { successfulChunks = parseInt(chunkMatch[1]); totalChunks = Math.max(totalChunks, successfulChunks); } } // Look for failure indicators if (line.includes('❌ No chunks generated') || line.includes('all files filtered out') || line.includes('parsing failures') || line.includes('Failed indexing attempt registered')) { successfulChunks = 0; } // Look for batch completion messages to count total batches if (line.includes('✅ Batch') && line.includes('completed successfully')) { const batchMatch = line.match(/✅ Batch (\d+)\/(\d+) completed successfully/); if (batchMatch) { totalBatches = parseInt(batchMatch[2]); } } // Extract processing time from JSON result if (line.includes('processingTimeMs')) { const timeMatch = line.match(/"processingTimeMs":\s*(\d+)/); if (timeMatch) { processingTime = parseInt(timeMatch[1]); } } // Extract chunks created from JSON result and check for success if (line.includes('chunksCreated')) { const chunkMatch = line.match(/"chunksCreated":\s*(\d+)/); if (chunkMatch) { const chunks = parseInt(chunkMatch[1]); totalChunks = Math.max(totalChunks, chunks); // Check if this indicates a successful or failed indexing if (line.includes('"success":true') || line.includes('"success": true')) { successfulChunks = chunks; } else if (line.includes('"success":false') || line.includes('"success": false') || chunks === 0) { successfulChunks = 0; // Failed indexing } } } } // Only return stats if indexing is completed if (!isCompleted) { return null; } const successRate = totalChunks > 0 ? (successfulChunks / totalChunks * 100) : 0; const skippedChunks = totalChunks - successfulChunks; const actualProcessingTime = startTime && endTime ? endTime.getTime() - startTime.getTime() : processingTime; return { completed: true, totalChunks, successfulChunks, skippedChunks, successRate: Math.round(successRate * 100) / 100, totalBatches: totalBatches || Math.ceil(totalChunks / 50), // Estimate if not found skippedBatches: skippedBatches || 0, processingTimeMs: actualProcessingTime, processingTimeFormatted: this.formatDuration(actualProcessingTime), logFile }; } catch (error) { return null;