UNPKG

@wildcard-ai/deepcodex

Version:

Advanced codebase indexing and semantic search MCP server

github.com/Wildcard-Official/deepcodex

Wildcard-Official/deepcodex

908 lines (894 loc) • 44.6 kB

JavaScript

#!/usr/bin/env node /** * Standalone MCP Integration * Provides intelligent codebase indexing and search capabilities via Model Context Protocol. * Delegates to specialized services for file processing, namespace management, and search coordination. */ import * as path from 'path'; // MCP Server imports import { Server } from '@modelcontextprotocol/sdk/server/index.js'; import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; import { CallToolRequestSchema, ListToolsRequestSchema, ListResourcesRequestSchema, ReadResourceRequestSchema } from '@modelcontextprotocol/sdk/types.js'; // Core components import { IndexingOrchestrator } from './core/indexing/IndexingOrchestrator.js'; import { TreeSitterSymbolExtractorFull } from './core/indexing/TreeSitterSymbolExtractor.treesitter-based.js'; import { LanguageDetector } from './utils/LanguageDetector.js'; import { Logger } from './utils/Logger.js'; import { JinaApiService } from './services/JinaApiService.js'; import { TurbopufferService } from './services/TurbopufferService.js'; import { ConfigurationService } from './services/ConfigurationService.js'; import { NamespaceManagerService } from './services/NamespaceManagerService.js'; import { FileProcessingService } from './services/FileProcessingService.js'; import { SearchCoordinationService } from './services/SearchCoordinationService.js'; import { SemanticSubChunker } from './services/SemanticSubChunker.js'; export class StandaloneCodexMcp { config; indexingOrchestrator; languageDetector; logger; jinaApiService; turbopufferService; configurationService; namespaceManagerService; fileProcessingService; searchCoordinationService; symbolExtractor; semanticSubChunker; constructor(config) { // Initialize ConfigurationService with provided config this.configurationService = new ConfigurationService(config, { logConfigurationStatus: false }); this.config = this.configurationService.getConfig(); this.logger = new Logger('STANDALONE-INTEGRATION', this.config.logLevel); this.languageDetector = new LanguageDetector(); this.jinaApiService = new JinaApiService(this.config.jinaApiKey); this.turbopufferService = new TurbopufferService(this.config.turbopufferApiKey); this.symbolExtractor = new TreeSitterSymbolExtractorFull(); this.semanticSubChunker = new SemanticSubChunker(); // Initialize NamespaceManagerService first (needed for metadata callback) this.namespaceManagerService = new NamespaceManagerService(this.turbopufferService); // Initialize FileProcessingService with integrated chunk operations const chunkOperations = { getChunkIdsForFile: async (namespace, filePath) => { return await this.turbopufferService.getChunkIdsForFile(namespace, filePath); }, deleteChunksByIds: async (namespace, chunkIds) => { return await this.turbopufferService.deleteChunksByIds(namespace, chunkIds); }, uploadChunks: async (namespace, chunks) => { try { if (!chunks.length) { this.logger.debug('No chunks to upload'); return; } this.logger.info(`Processing ${chunks.length} chunks for semantic sub-chunking...`); // Step 1: Process chunks through semantic sub-chunker to prevent truncation const processedChunks = []; let totalSubChunks = 0; for (const chunk of chunks) { const subChunks = await this.semanticSubChunker.splitLargeChunk(chunk); processedChunks.push(...subChunks); if (subChunks.length > 1) { totalSubChunks += subChunks.length; this.logger.debug(`Split large chunk ${chunk.id} into ${subChunks.length} sub-chunks`); } } if (totalSubChunks > chunks.length) { this.logger.info(`✂️ Created ${totalSubChunks - chunks.length} additional sub-chunks to prevent content loss`); } this.logger.info(`Uploading ${processedChunks.length} processed chunks to namespace: ${namespace}`); // Step 2: Process chunks in batches for embedding generation const BATCH_SIZE = 50; for (let i = 0; i < processedChunks.length; i += BATCH_SIZE) { const batch = processedChunks.slice(i, i + BATCH_SIZE); // Validate chunk sizes before embedding for (const chunk of batch) { if (chunk.content.length > 20000) { this.logger.warn(`⚠️ Chunk ${chunk.id} still exceeds 20K chars (${chunk.content.length}) - may cause embedding errors`); } } // Generate embeddings for the batch const embeddings = await this.jinaApiService.generateEmbeddingBatch(batch.map(chunk => chunk.content)); // Prepare data for Turbopuffer upsert const upsertData = batch.map((chunk, idx) => ({ id: chunk.id, vector: embeddings[idx], content: chunk.content, filePath: chunk.filePath, startLine: chunk.startLine, endLine: chunk.endLine, language: chunk.language, // Handle both IndexingOrchestrator format and core.ts format symbols: chunk.symbols?.map((s) => typeof s === 'string' ? s : s.name || s).join(', ') || '' })); // Upload to vector store await this.turbopufferService.upsert(namespace, upsertData); this.logger.debug(`Uploaded batch ${Math.floor(i / BATCH_SIZE) + 1}/${Math.ceil(processedChunks.length / BATCH_SIZE)} (${batch.length} chunks)`); } this.logger.info(`✅ Successfully uploaded ${processedChunks.length} chunks to ${namespace} (${totalSubChunks - chunks.length} additional sub-chunks created)`); } catch (error) { this.logger.error(`Failed to upload chunks to ${namespace}:`, error); throw error; } } }; this.fileProcessingService = new FileProcessingService(chunkOperations); // Create metadata callback for IndexingOrchestrator - now that NamespaceManagerService is ready const metadataCallback = async (codebasePath, indexedData) => { await this.namespaceManagerService.registerCodebase(codebasePath, indexedData.totalChunks, new Date(indexedData.indexedAt)); await this.fileProcessingService.saveLastIndexedTime(codebasePath, new Date()); }; // Initialize IndexingOrchestrator with enhanced services this.indexingOrchestrator = new IndexingOrchestrator({ jinaApiService: this.jinaApiService, turbopufferService: this.turbopufferService, namespaceManagerService: this.namespaceManagerService, metadataCallback }); // Initialize SearchCoordinationService with connection context extractor const connectionExtractor = async (filePath, content) => { return await this.extractConnectionContext(filePath, content); }; this.searchCoordinationService = new SearchCoordinationService(this.jinaApiService, this.turbopufferService, connectionExtractor, 'SearchCoordinationService'); } /** * Index a codebase using the enhanced IndexingOrchestrator */ async indexCodebase(codebasePath, forceReindex = false) { const indexingRequest = { codebasePath, forceReindex: forceReindex, enableContentFiltering: true, enableDependencyAnalysis: true }; const indexResult = await this.indexingOrchestrator.indexCodebase(indexingRequest); return { success: indexResult.success, namespace: indexResult.metadata?.namespace || '', filesProcessed: indexResult.metadata?.totalFiles || 0, chunksCreated: indexResult.chunks?.length || 0, processingTimeMs: indexResult.metadata?.indexingTime || 0, message: indexResult.success ? `Successfully indexed ${indexResult.metadata?.totalFiles || 0} files into ${indexResult.chunks?.length || 0} intelligent chunks` : `Indexing failed with ${indexResult.errors?.length || 0} errors`, errors: indexResult.errors }; } /** * Hybrid search using SearchCoordinationService */ async searchHybrid(codebasePath, query, options = {}) { // Ensure index is up-to-date before searching await this.ensureUpToDateIndex(codebasePath); // Get namespace from registered codebase instead of generating it const normalizedPath = path.resolve(codebasePath); const indexed = this.namespaceManagerService.getIndexedCodebase(normalizedPath); if (!indexed) { return { success: false, results: [], searchTime: 0, strategy: 'hybrid', metadata: { vectorResults: 0, bm25Results: 0, totalMatches: 0, reranked: false } }; } const namespace = indexed.namespace; const searchResult = await this.searchCoordinationService.searchHybrid(namespace, query, { limit: options.limit || 10, vectorWeight: options.vectorWeight || 0.1, bm25Weight: options.bm25Weight || 0.9 }); return { success: searchResult.success, results: searchResult.results, searchTime: searchResult.searchTime, strategy: searchResult.strategy, metadata: { vectorResults: searchResult.metadata?.vectorResults || 0, bm25Results: searchResult.metadata?.bm25Results || 0, totalMatches: searchResult.metadata?.totalMatches || searchResult.results.length, reranked: searchResult.metadata?.reranked || (options.enableReranking !== false) } }; } /** * BM25 search using SearchCoordinationService */ async searchBM25(codebasePath, query, options = {}) { // Ensure index is up-to-date before searching await this.ensureUpToDateIndex(codebasePath); // Get namespace from registered codebase instead of generating it const normalizedPath = path.resolve(codebasePath); const indexed = this.namespaceManagerService.getIndexedCodebase(normalizedPath); if (!indexed) { return { success: false, results: [], searchTime: 0, strategy: 'bm25' }; } const namespace = indexed.namespace; const searchResult = await this.searchCoordinationService.searchBM25(namespace, query, { limit: options.limit || 10, enableReranking: options.enableReranking !== false }); return { success: searchResult.success, results: searchResult.results, searchTime: searchResult.searchTime, strategy: searchResult.strategy }; } /** * Intelligent search using SearchCoordinationService */ async searchWithIntelligence(query, codebasePath, maxResults = 10) { // Ensure index is up-to-date before searching if (codebasePath) { await this.ensureUpToDateIndex(codebasePath); } const searchResult = await this.searchCoordinationService.searchWithIntelligence(query, codebasePath, this.namespaceManagerService.getAllIndexedCodebases(), maxResults); if (searchResult.success && searchResult.results.length > 0) { const results = searchResult.results.map((result) => ({ id: result.id, content: result.content, filePath: result.filePath, relativePath: result.metadata?.relativePath || path.relative(codebasePath || '', result.filePath), startLine: result.startLine, endLine: result.endLine, language: result.language || 'unknown', symbols: result.symbols || [], score: result.score, connections: result.connections })); return { success: true, results, totalResults: results.length, searchTimeMs: searchResult.searchTimeMs, message: searchResult.message }; } return { success: searchResult.success, results: [], totalResults: 0, searchTimeMs: searchResult.searchTimeMs, message: searchResult.message }; } /** * Get indexing status via NamespaceManagerService */ async getIndexingStatus(codebasePath) { return await this.namespaceManagerService.getIndexingStatus(codebasePath); } /** * Clear index via NamespaceManagerService */ async clearIndex(codebasePath) { // NamespaceManagerService handles both registry clearing and vector store clearing return await this.namespaceManagerService.clearIndexedCodebases(codebasePath); } /** * Extract relevant connection context using TreeSitterSymbolExtractorFull */ async extractConnectionContext(filePath, chunkContent) { try { // Initialize symbol extractor if needed await this.symbolExtractor.initialize(); // Read the full file content to get imports/exports (they're usually at file level) const fs = await import('fs/promises'); const fullFileContent = await fs.readFile(filePath, 'utf-8'); // Detect language from full file const language = this.languageDetector.detectLanguage(filePath, fullFileContent); // Use TreeSitterSymbolExtractorFull for accurate import/export extraction on full file const symbolResult = await this.symbolExtractor.extractSymbols(fullFileContent, language.language, filePath); const result = { imports: symbolResult.imports.map(imp => imp.module).filter(Boolean).slice(0, 5), exports: symbolResult.exports.slice(0, 5), relatedFiles: symbolResult.imports.map(imp => imp.module).filter(Boolean).slice(0, 5) }; this.logger.debug(`🔗 Extracted connections for ${filePath}:`); this.logger.debug(` Full file content length: ${fullFileContent.length} chars`); this.logger.debug(` Raw imports: ${JSON.stringify(symbolResult.imports)}`); this.logger.debug(` Raw exports: ${JSON.stringify(symbolResult.exports)}`); this.logger.debug(` Final result: ${result.imports.length} imports, ${result.exports.length} exports`); return result; } catch (error) { this.logger.debug('Failed to extract connection context:', error); return { imports: [], exports: [], relatedFiles: [] }; } } /** * Ensure the index is up-to-date by running hash-based incremental indexing before searches */ async ensureUpToDateIndex(codebasePath) { try { const normalizedPath = path.resolve(codebasePath); const indexed = this.namespaceManagerService.getIndexedCodebase(normalizedPath); if (!indexed) { this.logger.debug(`Codebase not indexed, skipping incremental update: ${codebasePath}`); return; } this.logger.debug(`🔄 Running hash-based incremental indexing before search for: ${codebasePath}`); // Run incremental update with hash-based change detection (no time limits) const incrementalResult = await this.fileProcessingService.processIncrementalUpdate(normalizedPath, indexed.namespace, {} // No maxAgeHours - relies on hash-based change detection ); if (incrementalResult.success && incrementalResult.filesProcessed > 0) { this.logger.info(`✅ Hash-based incremental update: ${incrementalResult.filesProcessed} files with actual changes processed`); // Update last indexed time for tracking purposes await this.fileProcessingService.saveLastIndexedTime(normalizedPath, new Date()); } else { this.logger.debug(`⚡ No files with content changes found for: ${codebasePath}`); } } catch (error) { this.logger.warn('Failed to run incremental indexing before search:', error); // Don't fail the search if incremental indexing fails } } async initialize() { await this.namespaceManagerService.initialize(); await this.symbolExtractor.initialize(); this.logger.info(`Initialized with ${this.namespaceManagerService.getAllIndexedCodebases().size} indexed codebases`); } } // MCP Server Implementation class StandaloneMCPServer { server; codexMcp; constructor() { this.codexMcp = new StandaloneCodexMcp(); this.server = new Server({ name: 'intelligent-context-mcp', version: '2.0.0', }, { capabilities: { tools: {}, resources: {} } }); this.setupHandlers(); // Initialize the registry on startup to ensure it's loaded for new sessions this.initializeRegistry(); } async initializeRegistry() { try { await this.codexMcp.initialize(); console.error(`🔍 Registry initialized successfully`); } catch (error) { console.error(`⚠️ Failed to initialize registry:`, error); } } setupHandlers() { this.server.setRequestHandler(ListToolsRequestSchema, async () => { const tools = [ { name: 'index_codebase', description: `Prepares a codebase for intelligent search by creating a searchable index. **When to use**: Call this first before searching any new codebase. Required prerequisite for search_codebase. **Use force_reindex=true when**: Code has changed significantly or search results seem outdated.`, inputSchema: { type: 'object', properties: { codebase_path: { type: 'string', description: 'Absolute path to the directory containing source code files' }, force_reindex: { type: 'boolean', description: 'Force complete reindexing even if already indexed (default: false)', default: false } }, required: ['codebase_path'] } }, { name: 'search_codebase', description: `Finds relevant code in an indexed codebase using natural language or keyword queries. **When to use**: - Find specific functions, classes, or code patterns - Get context before making changes to understand dependencies - Explore how existing systems work - Locate examples of API usage or patterns **Returns**: Code chunks with file paths, line numbers, and relevance scores. **Prerequisite**: Codebase must be indexed first with index_codebase.`, inputSchema: { type: 'object', properties: { query: { type: 'string', description: 'Natural language or keyword search query describing what code to find' }, codebase_path: { type: 'string', description: 'Absolute path to the codebase to search (optional if only one codebase indexed)' }, max_results: { type: 'number', description: 'Maximum number of code chunks to return (default: 5)', default: 5 } }, required: ['query'] } }, { name: 'get_indexing_status', description: `Check if codebases are indexed and get their status information. **Enhanced Features**: - Shows completion statistics for finished indexing (success rates, processing time, performance metrics) - Displays batch processing details (successful/skipped batches) - References log files for detailed debugging **When to use**: - Before indexing to check if already done - After indexing to see completion statistics and success rates - Debug why search returned no results - Confirm indexing completed successfully - Get overview of all indexed codebases **Returns**: Enhanced indexing status with completion statistics when available.`, inputSchema: { type: 'object', properties: { codebase_path: { type: 'string', description: 'Optional: Absolute path to specific codebase to check. Omit to get status of all indexed codebases' } } } }, { name: 'clear_index', description: `Permanently removes all indexed data for a codebase. **When to use**: - Clear stale data before reindexing after major code changes - Remove old indexed codebases no longer needed - Fix corrupted index causing search issues **Warning**: Destructive operation. All search capabilities lost until reindexing.`, inputSchema: { type: 'object', properties: { codebase_path: { type: 'string', description: 'Absolute path to the codebase to clear. Omit to clear ALL indexed codebases (use with caution)' } } } } ]; return { tools }; }); this.server.setRequestHandler(CallToolRequestSchema, async (request) => { const { name, arguments: args } = request.params; try { switch (name) { case 'index_codebase': try { // Resolve relative paths to absolute paths const codebasePath = path.resolve(args.codebase_path); const forceReindex = args.force_reindex || false; console.log(`🔍 Starting background indexing: ${args.codebase_path} -> ${codebasePath}`); // Spawn background process for indexing const logFile = `background-indexing-${path.basename(codebasePath)}-${new Date().toISOString().replace(/:/g, '-')}.log`; // Use child process to avoid MCP timeout const { spawn } = await import('child_process'); const nodeProcess = spawn('node', ['background-indexing-worker.mjs', codebasePath, forceReindex.toString()], { detached: true, stdio: ['ignore', 'pipe', 'pipe'], env: { ...process.env, WILDCARD_API_KEY: process.env.WILDCARD_API_KEY, WILDCARD_API_URL: process.env.WILDCARD_API_URL }, cwd: process.cwd() }); // Pipe output to log file const fsSync = await import('fs'); const logStream = fsSync.default.createWriteStream(logFile); const addTimestamp = (data) => { const lines = data.toString().split('\n'); return lines.filter(line => line.trim()).map(line => `[${new Date().toISOString()}] ${line}`).join('\n') + '\n'; }; nodeProcess.stdout?.on('data', (data) => { const timestampedData = addTimestamp(data); logStream.write(timestampedData); console.log(timestampedData.trim()); }); nodeProcess.stderr?.on('data', (data) => { const timestampedData = addTimestamp(data); logStream.write(timestampedData); console.error(timestampedData.trim()); }); nodeProcess.on('close', (code) => { logStream.write(`[${new Date().toISOString()}] Process completed with code: ${code}\n`); logStream.end(); console.log(`Background indexing process completed with code: ${code}`); }); // Detach the process so it runs independently nodeProcess.unref(); return { content: [{ type: 'text', text: `🚀 **Background Indexing Started** - Log: \`${logFile}\` **Codebase**: ${codebasePath} **Process**: Running in detached background mode **📋 How to Monitor Progress:** - **Check Status**: Use \`get_indexing_status\` tool to see completion progress - **View Live Logs**: \`tail -f ${logFile}\` - **Final Results**: Status tool will show success rate when complete 💡 **Tip**: The log file is saved in your current directory for easy access. 📝 **For AI Assistant**: Please mention that users can monitor progress with: tail -f ${logFile} (include the dash)` }] }; } catch (error) { // Catch any unhandled errors return { content: [{ type: 'text', text: `❌ Indexing error: ${error instanceof Error ? error.message : String(error)}\n\nStack trace:\n${error instanceof Error ? error.stack : 'No stack trace available'}` }] }; } case 'search_codebase': console.log(`🔍 STANDALONE MCP TOOL CALLED: search_codebase with query "${args.query}"`); // Note: Incremental indexing is automatically triggered before each search const searchResult = await this.codexMcp.searchWithIntelligence(args.query, args.codebase_path, args.max_results || 5); console.log(`🔍 STANDALONE MCP RESULT: ${searchResult.results.length} results, top score: ${searchResult.results[0]?.score}`); if (!searchResult.success) { return { content: [{ type: 'text', text: `❌ Search failed: ${searchResult.message}` }] }; } const response = { total_results: searchResult.totalResults, search_time_ms: searchResult.searchTimeMs, results: searchResult.results.map(chunk => { const chunkAny = chunk; return { file_path: chunk.relativePath, start_line: chunk.startLine, end_line: chunk.endLine, language: chunk.language, content: chunk.content, score: chunk.score, symbols: chunk.symbols, connections: chunk.connections, // Include connection context for Claude ...(chunkAny.originalScore !== undefined && { original_score: chunkAny.originalScore, reranked: chunkAny.reranked || true }) }; }) }; return { content: [{ type: 'text', text: JSON.stringify(response, null, 2) }] }; case 'get_indexing_status': const status = await this.codexMcp.getIndexingStatus(args.codebase_path); const enhancedStatus = await this.enhanceStatusWithLogData(status, args.codebase_path); return { content: [{ type: 'text', text: this.formatIndexingStatus(enhancedStatus) }] }; case 'clear_index': const clearResult = await this.codexMcp.clearIndex(args.codebase_path); return { content: [{ type: 'text', text: clearResult.success ? '✅ Index cleared successfully' : `❌ Failed to clear index: ${clearResult.message}` }] }; default: throw new Error(`Unknown tool: ${name}`); } } catch (error) { return { content: [{ type: 'text', text: `Error: ${error instanceof Error ? error.message : String(error)}` }] }; } }); // Resource handlers this.server.setRequestHandler(ListResourcesRequestSchema, async () => { const resources = [ { uri: 'mcp://codebase-status', name: 'Codebase Status', description: 'Current status of indexed codebases' } ]; return { resources }; }); this.server.setRequestHandler(ReadResourceRequestSchema, async (request) => { const uri = request.params.uri; switch (uri) { case 'mcp://codebase-status': const status = await this.codexMcp.getIndexingStatus(); return { contents: [{ type: 'text', text: JSON.stringify(status, null, 2) }] }; default: throw new Error(`Unknown resource: ${uri}`); } }); } async run() { // Show configuration status const config = { jinaApiKey: process.env.JINA_API_KEY, turbopufferApiKey: process.env.TURBOPUFFER_API_KEY }; const capabilities = { reranking: !!config.jinaApiKey && config.jinaApiKey !== 'test', vectorSearch: !!config.turbopufferApiKey && config.turbopufferApiKey !== 'test', localBM25: true }; console.error('🔧 Capabilities:', JSON.stringify(capabilities)); // Wildcard hosted backend mode indicator const wildcardEnabled = !!process.env.WILDCARD_API_KEY; const wildcardUrl = process.env.WILDCARD_API_URL || 'https://intelligent-context-backend.onrender.com' || 'http://localhost:4000'; if (wildcardEnabled) { console.error(`🌐 Wildcard backend: ENABLED (using hosted Fastify backend)`); console.error(` Base URL: ${wildcardUrl}`); } else { console.error(`🌐 Wildcard backend: disabled (direct provider mode)`); } if (!config.jinaApiKey || config.jinaApiKey === 'test') { console.error('⚠️ Jina API key not provided - result reranking will be disabled'); console.error('💡 Set JINA_API_KEY environment variable to enable result reranking'); } // Initialize the standalone MCP integration await this.codexMcp.initialize(); const transport = new StdioServerTransport(); await this.server.connect(transport); console.error('🚀 Intelligent Context MCP Server ready!'); console.error(`🔄 Result Reranking: ${!!(config.jinaApiKey && config.jinaApiKey !== 'test') ? '✅ Enabled' : '❌ Disabled'}`); console.error('📝 Local BM25 Search: ✅ Always Available'); console.error('🔌 Transport: stdio'); } /** * Enhance indexing status with completion statistics from log files */ async enhanceStatusWithLogData(status, codebasePath) { const enhancedStatus = { ...status }; try { // Find the most recent log file for this codebase const logFile = await this.findMostRecentLogFile(codebasePath); if (logFile) { const logStats = await this.parseLogFileStats(logFile); if (logStats) { enhancedStatus.completionStats = logStats; } } } catch (error) { // Don't fail status check if log parsing fails console.warn('Failed to parse log statistics:', error); } return enhancedStatus; } /** * Find the most recent background indexing log file for a codebase */ async findMostRecentLogFile(codebasePath) { const fs = await import('fs'); const path = await import('path'); try { const files = fs.readdirSync('.'); const codebaseName = codebasePath ? path.basename(codebasePath) : ''; // Find log files that match the specific codebase pattern const logFiles = files.filter(file => { if (!file.startsWith('background-indexing-') || !file.endsWith('.log')) { return false; } // If no specific codebase requested, don't return any logs // (completion stats should only show for specific codebases) if (!codebaseName) { return false; } // Extract the codebase name from the log file pattern: // background-indexing-{codebaseName}-{timestamp}.log const match = file.match(/^background-indexing-(.+?)-\d{4}-\d{2}-\d{2}T/); return match && match[1] === codebaseName; }); if (logFiles.length === 0) return null; // Sort by modification time (newest first) const sortedFiles = logFiles .map(file => ({ name: file, mtime: fs.statSync(file).mtime })) .sort((a, b) => b.mtime.getTime() - a.mtime.getTime()); return sortedFiles[0].name; } catch (error) { return null; } } /** * Parse completion statistics from log file */ async parseLogFileStats(logFile) { const fs = await import('fs'); try { const content = fs.readFileSync(logFile, 'utf-8'); const lines = content.split('\n'); let isCompleted = false; let totalChunks = 0; let successfulChunks = 0; let skippedBatches = 0; let totalBatches = 0; let processingTime = 0; let startTime = null; let endTime = null; // Parse log lines for statistics for (const line of lines) { // Check if process completed if (line.includes('Process completed with code: 0')) { isCompleted = true; const timeMatch = line.match(/\[([^\]]+)\]/); if (timeMatch) { endTime = new Date(timeMatch[1]); } } // Extract start time if (line.includes('Starting indexing for:') && !startTime) { const timeMatch = line.match(/\[([^\]]+)\]/); if (timeMatch) { startTime = new Date(timeMatch[1]); } } // Extract upload completion stats if (line.includes('Upload complete:') && line.includes('chunks uploaded')) { const chunkMatch = line.match(/(\d+)\/(\d+) chunks uploaded/); if (chunkMatch) { successfulChunks = parseInt(chunkMatch[1]); totalChunks = parseInt(chunkMatch[2]); } const batchMatch = line.match(/\((\d+)\/(\d+) batches skipped/); if (batchMatch) { skippedBatches = parseInt(batchMatch[1]); totalBatches = parseInt(batchMatch[2]); } } else if (line.includes('✅ Uploaded') && line.includes('chunks to namespace')) { // Handle the actual format: "✅ Uploaded 355 chunks to namespace: mcp_xxx" const chunkMatch = line.match(/✅ Uploaded (\d+) chunks/); if (chunkMatch) { successfulChunks = parseInt(chunkMatch[1]); totalChunks = successfulChunks; } } else if (line.includes('Uploaded') && line.includes('chunks to namespace')) { // Fallback for other completion message formats const chunkMatch = line.match(/Uploaded (\d+) chunks/); if (chunkMatch) { successfulChunks = parseInt(chunkMatch[1]); totalChunks = successfulChunks; } } // Extract processing time if (line.includes('processingTimeMs')) { const timeMatch = line.match(/"processingTimeMs":\s*(\d+)/); if (timeMatch) { processingTime = parseInt(timeMatch[1]); } } } // Only return stats if indexing is completed if (!isCompleted) { return null; } const successRate = totalChunks > 0 ? (successfulChunks / totalChunks * 100) : 0; const skippedChunks = totalChunks - successfulChunks; const actualProcessingTime = startTime && endTime ? endTime.getTime() - startTime.getTime() : processingTime; return { completed: true, totalChunks, successfulChunks, skippedChunks, successRate: Math.round(successRate * 100) / 100, totalBatches: totalBatches || Math.ceil(totalChunks / 50), // Estimate if not found skippedBatches: skippedBatches || 0, processingTimeMs: actualProcessingTime, processingTimeFormatted: this.formatDuration(actualProcessingTime), logFile }; } catch (error) { return null; } } /** * Format duration in milliseconds to human readable format */ formatDuration(ms) { if (ms < 1000) return `${ms}ms`; if (ms < 60000) return `${(ms / 1000).toFixed(1)}s`; if (ms < 3600000) return `${(ms / 60000).toFixed(1)}m`; return `${(ms / 3600000).toFixed(1)}h`; } /** * Format indexing status with enhanced information */ formatIndexingStatus(status) { let result = ''; // Basic status information result += `📊 **Indexing Status**\n\n`; if (status.currentCodebase) { const cb = status.currentCodebase; result += `**Current Codebase**: ${cb.path}\n`; result += `**Namespace**: ${cb.namespace}\n`; result += `**Files**: ${cb.fileCount}\n`; result += `**Last Indexed**: ${new Date(cb.lastIndexed).toLocaleString()}\n`; result += `**Status**: ${status.indexed ? '✅ Indexed' : '❌ Not Indexed'}\n\n`; } // Completion statistics (only shown if indexing completed AND there's a current codebase AND stats are for this specific codebase) if (status.completionStats && status.currentCodebase && status.completionStats.logFile) { const stats = status.completionStats; // Verify the log file matches the current codebase name const currentCodebaseName = path.basename(status.currentCodebase.path); if (stats.logFile.includes(`background-indexing-${currentCodebaseName}-`)) { result += `**Success Rate**: ${stats.successRate}% (${stats.successfulChunks}/${stats.totalChunks} chunks)\n`; result += `**Log File**: \`${stats.logFile}\`\n\n`; } } // All indexed codebases if (status.indexedCodebases && status.indexedCodebases.length > 0) { result += `## 📚 **All Indexed Codebases** (${status.indexedCodebases.length})\n\n`; status.indexedCodebases.forEach((cb, index) => { result += `${index + 1}. **${cb.path}**\n`; result += ` - Chunks: ${cb.totalChunks}, Last indexed: ${new Date(cb.indexedAt).toLocaleDateString()}\n`; }); } else { result += `## 📚 **No Indexed Codebases Found**\n\n`; result += `Use the \`index_codebase\` tool to index a codebase first.\n`; } return result; } } // Always run when executed as a CLI const main = async () => { const server = new StandaloneMCPServer(); await server.run(); }; main().catch((error) => { console.error(error); process.exit(1); }); //# sourceMappingURL=standalone-mcp-integration.js.map