UNPKG

zrald1

Version:

Advanced Graph RAG MCP Server with file location identification, graph processing, and result summarization capabilities

908 lines 39.6 kB
import { Server } from '@modelcontextprotocol/sdk/server/index.js'; import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; import { CallToolRequestSchema, ListToolsRequestSchema, ListResourcesRequestSchema, ReadResourceRequestSchema, } from '@modelcontextprotocol/sdk/types.js'; import { VectorStore } from './core/vector-store.js'; import { FileProcessor } from './core/file-processor.js'; export class GraphRAGMCPServer { server; vectorStore; fileProcessor; processedFiles = new Map(); fileNodes = new Map(); constructor() { this.server = new Server({ name: 'zrald-1-mcp-server', version: '1.0.0', }, { capabilities: { tools: {}, resources: {}, }, }); // Initialize components this.vectorStore = new VectorStore(parseInt(process.env.VECTOR_DIMENSION || '384'), parseInt(process.env.MAX_VECTOR_ELEMENTS || '10000')); this.fileProcessor = new FileProcessor({ recursive: true, maxFileSize: 10 * 1024 * 1024, // 10MB includeContent: true }); this.setupHandlers(); } setupHandlers() { // List available tools this.server.setRequestHandler(ListToolsRequestSchema, async () => { return { tools: [ // File Location and Processing Tools { name: 'identify_files', description: 'Identify and locate files based on user query and search criteria', inputSchema: { type: 'object', properties: { query: { type: 'string', description: 'Search query for file identification' }, search_paths: { type: 'array', items: { type: 'string' }, description: 'Paths to search in', default: ['.'] }, file_types: { type: 'array', items: { type: 'string' }, description: 'File extensions to filter by (e.g., [".js", ".ts"])' }, recursive: { type: 'boolean', default: true }, max_results: { type: 'number', default: 50 }, case_sensitive: { type: 'boolean', default: false } }, required: ['query'] } }, { name: 'process_files_to_graph', description: 'Process identified files and add them to the knowledge graph', inputSchema: { type: 'object', properties: { file_ids: { type: 'array', items: { type: 'string' }, description: 'IDs of files to process into graph' }, create_chunks: { type: 'boolean', default: true }, chunk_size: { type: 'number', default: 1000 }, chunk_overlap: { type: 'number', default: 100 }, extract_entities: { type: 'boolean', default: false } }, required: ['file_ids'] } }, { name: 'generate_file_summary', description: 'Generate comprehensive summary and analysis of processed files', inputSchema: { type: 'object', properties: { include_content_analysis: { type: 'boolean', default: true }, include_relationships: { type: 'boolean', default: true }, include_statistics: { type: 'boolean', default: true }, summary_type: { type: 'string', enum: ['brief', 'detailed', 'comprehensive'], default: 'detailed' } } } }, { name: 'search_file_content', description: 'Search within processed file content using vector similarity', inputSchema: { type: 'object', properties: { query: { type: 'string', description: 'Search query' }, query_embedding: { type: 'array', items: { type: 'number' }, description: 'Query embedding vector (optional, will generate if not provided)' }, top_k: { type: 'number', default: 10 }, similarity_threshold: { type: 'number', default: 0.7 }, file_types: { type: 'array', items: { type: 'string' }, description: 'Filter by file types' } }, required: ['query'] } }, { name: 'analyze_file_relationships', description: 'Analyze relationships between files based on content similarity and references', inputSchema: { type: 'object', properties: { analysis_type: { type: 'string', enum: ['similarity', 'references', 'dependencies', 'all'], default: 'all' }, min_similarity: { type: 'number', default: 0.5 }, include_content_overlap: { type: 'boolean', default: true } } } }, { name: 'export_graph_data', description: 'Export the current graph data including files, nodes, and relationships', inputSchema: { type: 'object', properties: { format: { type: 'string', enum: ['json', 'cypher', 'graphml'], default: 'json' }, include_content: { type: 'boolean', default: false }, include_embeddings: { type: 'boolean', default: false } } } }, // Vector Search Tools { name: 'vdb_search', description: 'Vector similarity search across all processed content', inputSchema: { type: 'object', properties: { query_embedding: { type: 'array', items: { type: 'number' }, description: 'Query embedding vector' }, top_k: { type: 'number', default: 10 }, similarity_threshold: { type: 'number', default: 0.7 }, node_types: { type: 'array', items: { type: 'string' }, description: 'Filter by node types' } }, required: ['query_embedding'] } }, // Graph Analytics { name: 'graph_analytics', description: 'Get comprehensive analytics about the file graph', inputSchema: { type: 'object', properties: { include_centrality: { type: 'boolean', default: true }, include_clustering: { type: 'boolean', default: true }, include_file_stats: { type: 'boolean', default: true } } } } ] }; }); // List available resources this.server.setRequestHandler(ListResourcesRequestSchema, async () => { return { resources: [ { uri: 'files://processed-files', name: 'Processed Files', description: 'Access to all processed files and their metadata', mimeType: 'application/json' }, { uri: 'graph://file-graph', name: 'File Knowledge Graph', description: 'Knowledge graph representation of processed files', mimeType: 'application/json' }, { uri: 'analytics://file-analytics', name: 'File Analytics', description: 'Analytics and statistics about processed files', mimeType: 'application/json' } ] }; }); // Handle resource reading this.server.setRequestHandler(ReadResourceRequestSchema, async (request) => { const { uri } = request.params; switch (uri) { case 'files://processed-files': return { contents: [{ uri, mimeType: 'application/json', text: JSON.stringify({ files: Array.from(this.processedFiles.values()), total_count: this.processedFiles.size, last_updated: new Date().toISOString() }, null, 2) }] }; case 'graph://file-graph': return { contents: [{ uri, mimeType: 'application/json', text: JSON.stringify({ nodes: Array.from(this.fileNodes.values()), vector_store_stats: this.vectorStore.getStats(), last_updated: new Date().toISOString() }, null, 2) }] }; case 'analytics://file-analytics': return { contents: [{ uri, mimeType: 'application/json', text: JSON.stringify(await this.generateAnalytics(), null, 2) }] }; default: throw new Error(`Unknown resource: ${uri}`); } }); // Handle tool calls this.server.setRequestHandler(CallToolRequestSchema, async (request) => { const { name, arguments: args } = request.params; try { switch (name) { case 'identify_files': return await this.handleIdentifyFiles(args); case 'process_files_to_graph': return await this.handleProcessFilesToGraph(args); case 'generate_file_summary': return await this.handleGenerateFileSummary(args); case 'search_file_content': return await this.handleSearchFileContent(args); case 'analyze_file_relationships': return await this.handleAnalyzeFileRelationships(args); case 'export_graph_data': return await this.handleExportGraphData(args); case 'vdb_search': return await this.handleVDBSearch(args); case 'graph_analytics': return await this.handleGraphAnalytics(args); default: throw new Error(`Unknown tool: ${name}`); } } catch (error) { console.error(`Error executing tool ${name}:`, error); return { content: [{ type: 'text', text: `Error executing ${name}: ${error instanceof Error ? error.message : String(error)}` }], isError: true }; } }); } async initialize() { try { console.error('DEBUG: Initializing vector store...'); await this.vectorStore.initialize(); console.error('DEBUG: Vector store initialized'); console.error('DEBUG: Zrald-1 Graph RAG MCP Server ready'); } catch (error) { console.error('DEBUG: Initialization error:', error instanceof Error ? error.message : String(error)); console.error('DEBUG: Starting in mock mode'); } } async start() { try { console.error('DEBUG: Creating stdio transport...'); const transport = new StdioServerTransport(); console.error('DEBUG: Connecting MCP server...'); await this.server.connect(transport); console.error('DEBUG: MCP server connected and ready'); } catch (error) { console.error('DEBUG: MCP server start error:', error instanceof Error ? error.message : String(error)); console.error('DEBUG: MCP server start stack:', error instanceof Error ? error.stack : String(error)); throw error; } } async shutdown() { try { console.log('🔄 Zrald-1 Graph RAG MCP Server shutting down...'); console.log('🛑 Zrald-1 Graph RAG MCP Server shutdown complete'); } catch (error) { console.error('⚠️ Error during shutdown:', error); } } // Tool handler implementations async handleIdentifyFiles(args) { try { const searchOptions = { query: args.query, searchPaths: args.search_paths || ['.'], fileTypes: args.file_types, recursive: args.recursive !== false, maxResults: args.max_results || 50, caseSensitive: args.case_sensitive || false }; const files = await this.fileProcessor.identifyFiles(searchOptions); // Store identified files for (const file of files) { this.processedFiles.set(file.id, file); } return { content: [{ type: 'text', text: JSON.stringify({ success: true, files_found: files.length, files: files.map(f => ({ id: f.id, name: f.name, path: f.path, extension: f.extension, size: f.size, metadata: f.metadata })), search_summary: { query: args.query, paths_searched: searchOptions.searchPaths, total_results: files.length } }, null, 2) }] }; } catch (error) { return { content: [{ type: 'text', text: JSON.stringify({ success: false, error: error instanceof Error ? error.message : String(error) }, null, 2) }], isError: true }; } } async handleProcessFilesToGraph(args) { try { const fileIds = args.file_ids || []; const createChunks = args.create_chunks !== false; const chunkSize = args.chunk_size || 1000; const chunkOverlap = args.chunk_overlap || 100; const processedNodes = []; const processedChunks = []; for (const fileId of fileIds) { const file = this.processedFiles.get(fileId); if (!file) { console.warn(`File with ID ${fileId} not found`); continue; } // Convert file to node const fileNode = { id: file.id, type: 'file', label: file.name, properties: { path: file.path, extension: file.extension, size: file.size, content: file.content }, metadata: file.metadata, created_at: file.created_at, updated_at: file.updated_at }; // Generate embedding for file (mock implementation) if (file.content) { fileNode.embedding = this.generateMockEmbedding(); } processedNodes.push(fileNode); this.fileNodes.set(fileNode.id, fileNode); // Add to vector store if (fileNode.embedding) { await this.vectorStore.addNode(fileNode); } // Create chunks if requested if (createChunks && file.content) { const chunks = this.fileProcessor.fileToChunks(file, chunkSize, chunkOverlap); for (const chunk of chunks) { // Generate embedding for chunk (mock implementation) chunk.embedding = this.generateMockEmbedding(); await this.vectorStore.addChunk(chunk); processedChunks.push(chunk); } } } return { content: [{ type: 'text', text: JSON.stringify({ success: true, nodes_created: processedNodes.length, chunks_created: processedChunks.length, files_processed: fileIds.length, processing_summary: { total_nodes: this.fileNodes.size, vector_store_stats: this.vectorStore.getStats() } }, null, 2) }] }; } catch (error) { return { content: [{ type: 'text', text: JSON.stringify({ success: false, error: error instanceof Error ? error.message : String(error) }, null, 2) }], isError: true }; } } async handleGenerateFileSummary(args) { try { const includeContentAnalysis = args.include_content_analysis !== false; const includeRelationships = args.include_relationships !== false; const includeStatistics = args.include_statistics !== false; const summaryType = args.summary_type || 'detailed'; const files = Array.from(this.processedFiles.values()); const nodes = Array.from(this.fileNodes.values()); // Generate comprehensive summary const summary = this.fileProcessor.generateSummary(files); const result = { summary_type: summaryType, file_processing_summary: summary, graph_summary: { total_nodes: nodes.length, node_types: this.getNodeTypeDistribution(nodes), vector_store_stats: this.vectorStore.getStats() }, timestamp: new Date().toISOString() }; if (includeContentAnalysis) { result.content_analysis = this.analyzeContent(files); } if (includeStatistics) { result.detailed_statistics = this.generateDetailedStatistics(files, nodes); } return { content: [{ type: 'text', text: JSON.stringify({ success: true, summary: result }, null, 2) }] }; } catch (error) { return { content: [{ type: 'text', text: JSON.stringify({ success: false, error: error instanceof Error ? error.message : String(error) }, null, 2) }], isError: true }; } } async handleSearchFileContent(args) { try { const query = args.query; let queryEmbedding = args.query_embedding; // Generate embedding if not provided (mock implementation) if (!queryEmbedding) { queryEmbedding = this.generateMockEmbedding(); } const topK = args.top_k || 10; const threshold = args.similarity_threshold || 0.7; const results = await this.vectorStore.searchNodes(queryEmbedding, topK, threshold); // Filter by file types if specified let filteredResults = results; if (args.file_types && args.file_types.length > 0) { filteredResults = results.filter(result => { if (result.node && result.node.type === 'file') { const extension = result.node.properties?.extension; return args.file_types.includes(extension); } return true; }); } return { content: [{ type: 'text', text: JSON.stringify({ success: true, query: query, results_found: filteredResults.length, results: filteredResults.map(r => ({ id: r.id, score: r.score, type: r.node?.type || r.chunk?.document_id ? 'chunk' : 'unknown', content_preview: this.getContentPreview(r), metadata: r.node?.metadata || r.chunk?.metadata })) }, null, 2) }] }; } catch (error) { return { content: [{ type: 'text', text: JSON.stringify({ success: false, error: error instanceof Error ? error.message : String(error) }, null, 2) }], isError: true }; } } async handleAnalyzeFileRelationships(args) { try { const analysisType = args.analysis_type || 'all'; const minSimilarity = args.min_similarity || 0.5; const includeContentOverlap = args.include_content_overlap !== false; const files = Array.from(this.processedFiles.values()); const relationships = []; // Analyze relationships based on type if (analysisType === 'similarity' || analysisType === 'all') { const similarityRelationships = await this.analyzeSimilarityRelationships(files, minSimilarity); relationships.push(...similarityRelationships); } if (analysisType === 'references' || analysisType === 'all') { const referenceRelationships = this.analyzeReferenceRelationships(files); relationships.push(...referenceRelationships); } if (analysisType === 'dependencies' || analysisType === 'all') { const dependencyRelationships = this.analyzeDependencyRelationships(files); relationships.push(...dependencyRelationships); } return { content: [{ type: 'text', text: JSON.stringify({ success: true, analysis_type: analysisType, relationships_found: relationships.length, relationships: relationships, analysis_summary: { total_files_analyzed: files.length, relationship_types: this.getRelationshipTypeDistribution(relationships) } }, null, 2) }] }; } catch (error) { return { content: [{ type: 'text', text: JSON.stringify({ success: false, error: error instanceof Error ? error.message : String(error) }, null, 2) }], isError: true }; } } async handleExportGraphData(args) { try { const format = args.format || 'json'; const includeContent = args.include_content || false; const includeEmbeddings = args.include_embeddings || false; const files = Array.from(this.processedFiles.values()); const nodes = Array.from(this.fileNodes.values()); let exportData = { metadata: { export_timestamp: new Date().toISOString(), format: format, total_files: files.length, total_nodes: nodes.length } }; if (format === 'json') { exportData.files = files.map(f => ({ ...f, content: includeContent ? f.content : undefined })); exportData.nodes = nodes.map(n => ({ ...n, embedding: includeEmbeddings ? n.embedding : undefined })); exportData.vector_store_stats = this.vectorStore.getStats(); } return { content: [{ type: 'text', text: JSON.stringify({ success: true, format: format, export_data: exportData }, null, 2) }] }; } catch (error) { return { content: [{ type: 'text', text: JSON.stringify({ success: false, error: error instanceof Error ? error.message : String(error) }, null, 2) }], isError: true }; } } async handleVDBSearch(args) { try { const queryEmbedding = args.query_embedding; const topK = args.top_k || 10; const threshold = args.similarity_threshold || 0.7; const nodeTypes = args.node_types; let results; if (nodeTypes && nodeTypes.length > 0) { results = await this.vectorStore.searchByNodeTypes(queryEmbedding, nodeTypes, topK, threshold); } else { results = await this.vectorStore.searchNodes(queryEmbedding, topK, threshold); } return { content: [{ type: 'text', text: JSON.stringify({ success: true, operator: 'VDBOperator', results_found: results.length, results: results.map(r => ({ id: r.id, score: r.score, type: r.node?.type || 'chunk', label: r.node?.label || 'Content', metadata: r.node?.metadata || r.chunk?.metadata })) }, null, 2) }] }; } catch (error) { return { content: [{ type: 'text', text: JSON.stringify({ success: false, error: error instanceof Error ? error.message : String(error) }, null, 2) }], isError: true }; } } async handleGraphAnalytics(args) { try { const includeCentrality = args.include_centrality !== false; const includeClustering = args.include_clustering !== false; const includeFileStats = args.include_file_stats !== false; const analytics = await this.generateAnalytics(); return { content: [{ type: 'text', text: JSON.stringify({ success: true, analytics: analytics }, null, 2) }] }; } catch (error) { return { content: [{ type: 'text', text: JSON.stringify({ success: false, error: error instanceof Error ? error.message : String(error) }, null, 2) }], isError: true }; } } // Helper methods generateMockEmbedding() { return VectorStore.generateRandomEmbedding(384); } getContentPreview(result) { if (result.node && result.node.properties?.content) { return result.node.properties.content.substring(0, 200) + '...'; } if (result.chunk && result.chunk.content) { return result.chunk.content.substring(0, 200) + '...'; } return 'No content available'; } getNodeTypeDistribution(nodes) { const distribution = {}; for (const node of nodes) { distribution[node.type] = (distribution[node.type] || 0) + 1; } return distribution; } analyzeContent(files) { const totalContent = files.reduce((sum, f) => sum + (f.content?.length || 0), 0); const filesWithContent = files.filter(f => f.content && f.content.length > 0); return { total_characters: totalContent, files_with_content: filesWithContent.length, average_content_length: filesWithContent.length > 0 ? totalContent / filesWithContent.length : 0, content_distribution: this.getContentLengthDistribution(filesWithContent) }; } getContentLengthDistribution(files) { const distribution = { small: 0, medium: 0, large: 0, xlarge: 0 }; for (const file of files) { const length = file.content?.length || 0; if (length < 1000) distribution.small++; else if (length < 10000) distribution.medium++; else if (length < 100000) distribution.large++; else distribution.xlarge++; } return distribution; } generateDetailedStatistics(files, nodes) { return { file_statistics: { total_files: files.length, total_size_bytes: files.reduce((sum, f) => sum + f.size, 0), file_type_distribution: this.getFileTypeDistribution(files), size_distribution: this.getFileSizeDistribution(files) }, graph_statistics: { total_nodes: nodes.length, node_type_distribution: this.getNodeTypeDistribution(nodes), nodes_with_embeddings: nodes.filter(n => n.embedding).length }, vector_store_statistics: this.vectorStore.getStats() }; } getFileTypeDistribution(files) { const distribution = {}; for (const file of files) { const ext = file.extension || 'no_extension'; distribution[ext] = (distribution[ext] || 0) + 1; } return distribution; } getFileSizeDistribution(files) { const distribution = { tiny: 0, small: 0, medium: 0, large: 0 }; for (const file of files) { if (file.size < 1024) distribution.tiny++; else if (file.size < 10240) distribution.small++; else if (file.size < 102400) distribution.medium++; else distribution.large++; } return distribution; } async analyzeSimilarityRelationships(files, minSimilarity) { const relationships = []; // Mock similarity analysis - in real implementation, would use embeddings for (let i = 0; i < files.length; i++) { for (let j = i + 1; j < files.length; j++) { const similarity = Math.random(); // Mock similarity score if (similarity >= minSimilarity) { relationships.push({ id: `similarity_${files[i].id}_${files[j].id}`, source_id: files[i].id, target_id: files[j].id, type: 'SIMILAR_TO', properties: { similarity_score: similarity }, weight: similarity, confidence: similarity }); } } } return relationships; } analyzeReferenceRelationships(files) { const relationships = []; // Mock reference analysis - would analyze imports, includes, etc. for (const file of files) { if (file.content) { // Simple mock: look for file references in content for (const otherFile of files) { if (file.id !== otherFile.id && file.content.includes(otherFile.name)) { relationships.push({ id: `reference_${file.id}_${otherFile.id}`, source_id: file.id, target_id: otherFile.id, type: 'REFERENCES', properties: { reference_type: 'name_mention' }, weight: 1.0, confidence: 0.8 }); } } } } return relationships; } analyzeDependencyRelationships(files) { const relationships = []; // Mock dependency analysis - would analyze actual dependencies const codeFiles = files.filter(f => ['.js', '.ts', '.py', '.java'].includes(f.extension)); for (let i = 0; i < codeFiles.length; i++) { for (let j = i + 1; j < codeFiles.length; j++) { if (Math.random() > 0.8) { // Mock dependency relationships.push({ id: `dependency_${codeFiles[i].id}_${codeFiles[j].id}`, source_id: codeFiles[i].id, target_id: codeFiles[j].id, type: 'DEPENDS_ON', properties: { dependency_type: 'import' }, weight: 1.0, confidence: 0.9 }); } } } return relationships; } getRelationshipTypeDistribution(relationships) { const distribution = {}; for (const rel of relationships) { distribution[rel.type] = (distribution[rel.type] || 0) + 1; } return distribution; } async generateAnalytics() { const files = Array.from(this.processedFiles.values()); const nodes = Array.from(this.fileNodes.values()); return { file_analytics: this.fileProcessor.generateSummary(files), graph_analytics: { total_nodes: nodes.length, node_type_distribution: this.getNodeTypeDistribution(nodes), nodes_with_embeddings: nodes.filter(n => n.embedding).length }, vector_store_analytics: this.vectorStore.getStats(), processing_analytics: { files_processed: files.length, nodes_created: nodes.length, last_processing_time: new Date().toISOString() } }; } } //# sourceMappingURL=mcp-server.js.map