UNPKG

zrald

Version:

Advanced Graph RAG MCP Server with sophisticated graph structures, operators, and agentic capabilities for AI agents

259 lines 12.3 kB
import { BaseOperator } from './base-operator.js'; import { FromRelOperatorConfigSchema, OccurrenceOperatorConfigSchema } from '../types/graph.js'; /** * FromRel Operator - Text chunk tracing * Traces backward from relationships to find the original text chunks that established those connections */ export class FromRelOperator extends BaseOperator { constructor(graphDb, vectorStore) { super('FromRelOperator', 'chunk', graphDb, vectorStore); } async execute(config) { const validatedConfig = await this.validateConfig(config, FromRelOperatorConfigSchema); const sourceChunks = []; const contextNodes = []; const sourceRelationships = []; const scores = {}; const seenChunkIds = new Set(); // Process each relationship ID for (const relationshipId of validatedConfig.relationship_ids) { // Find the relationship in the graph const relationships = await this.findRelationshipById(relationshipId); for (const relationship of relationships) { sourceRelationships.push(relationship); // Get source and target nodes const sourceNode = await this.graphDb.getNode(relationship.source_id); const targetNode = await this.graphDb.getNode(relationship.target_id); if (sourceNode) contextNodes.push(sourceNode); if (targetNode) contextNodes.push(targetNode); // Find chunks that mention both entities (source of the relationship) const relatedChunks = await this.findChunksForRelationship(relationship, validatedConfig.include_context, validatedConfig.context_window); for (const chunk of relatedChunks) { if (!seenChunkIds.has(chunk.id)) { sourceChunks.push(chunk); seenChunkIds.add(chunk.id); // Score based on relationship confidence and chunk relevance scores[chunk.id] = this.calculateChunkRelevanceScore(chunk, relationship, sourceNode, targetNode); } } } } // Sort chunks by relevance score sourceChunks.sort((a, b) => (scores[b.id] || 0) - (scores[a.id] || 0)); return this.createResult([...new Map(contextNodes.map(n => [n.id, n])).values()], // Remove duplicates sourceRelationships, sourceChunks, scores, { tracing_method: 'relationship_to_chunks', relationships_processed: validatedConfig.relationship_ids.length, context_included: validatedConfig.include_context, context_window: validatedConfig.context_window, unique_chunks_found: sourceChunks.length }); } async findRelationshipById(relationshipId) { // In a real implementation, this would query the graph database // For now, we'll search through all relationships const allNodeTypes = ['entity', 'concept', 'document', 'chunk', 'summary']; const relationships = []; for (const nodeType of allNodeTypes) { const nodes = await this.graphDb.getNodesByType(nodeType); for (const node of nodes) { const nodeRels = await this.graphDb.getRelationships(node.id, 'both'); const matchingRel = nodeRels.find(r => r.id === relationshipId); if (matchingRel && !relationships.find(r => r.id === matchingRel.id)) { relationships.push(matchingRel); } } } return relationships; } async findChunksForRelationship(relationship, includeContext, contextWindow) { const chunks = []; // Get all chunks from vector store const vectorStats = this.vectorStore.getStats(); const allChunkIds = []; // This is a simplified approach - in practice, you'd have a more efficient way to get all chunks for (let i = 0; i < vectorStats.totalChunks; i++) { // Get chunk by iterating through stored chunks // This would be replaced with a proper chunk retrieval method } // Find chunks that contain entities from the relationship const sourceNode = await this.graphDb.getNode(relationship.source_id); const targetNode = await this.graphDb.getNode(relationship.target_id); if (!sourceNode || !targetNode) return chunks; // Search for chunks containing both entities const searchTerms = [sourceNode.label, targetNode.label]; const relevantChunks = await this.searchChunksForTerms(searchTerms); for (const chunk of relevantChunks) { chunks.push(chunk); // Add context chunks if requested if (includeContext) { const contextChunks = await this.getContextChunks(chunk, contextWindow); chunks.push(...contextChunks); } } return chunks; } async searchChunksForTerms(terms) { // This would use a text search or vector search to find relevant chunks // For now, return empty array as placeholder return []; } async getContextChunks(chunk, contextWindow) { // Get surrounding chunks from the same document const contextChunks = []; for (let offset = -contextWindow; offset <= contextWindow; offset++) { if (offset === 0) continue; // Skip the original chunk const contextPosition = chunk.position + offset; if (contextPosition >= 0) { // Find chunk at this position in the same document // This would be implemented with proper chunk retrieval } } return contextChunks; } calculateChunkRelevanceScore(chunk, relationship, sourceNode, targetNode) { let score = 0.5; // Base score // Boost score based on relationship confidence score += relationship.confidence * 0.3; // Boost score if chunk contains entity mentions if (sourceNode && chunk.content.toLowerCase().includes(sourceNode.label.toLowerCase())) { score += 0.2; } if (targetNode && chunk.content.toLowerCase().includes(targetNode.label.toLowerCase())) { score += 0.2; } // Boost score based on chunk metadata if (chunk.metadata?.importance) { score += chunk.metadata.importance * 0.1; } return Math.min(1.0, Math.max(0.0, score)); } } /** * Occurrence Operator - Entity co-occurrence analysis * Identifies passages where specified entities appear together, capturing contextual co-occurrence patterns */ export class OccurrenceOperator extends BaseOperator { constructor(graphDb, vectorStore) { super('OccurrenceOperator', 'chunk', graphDb, vectorStore); } async execute(config) { const validatedConfig = await this.validateConfig(config, OccurrenceOperatorConfigSchema); const coOccurrenceChunks = []; const relatedNodes = []; const scores = {}; const coOccurrenceStats = {}; // Get nodes for the specified entities const entityNodes = []; for (const entityId of validatedConfig.entities) { const node = await this.graphDb.getNode(entityId); if (node) { entityNodes.push(node); relatedNodes.push(node); } } if (entityNodes.length < 2) { return this.createResult(relatedNodes, [], [], {}, { error: 'Need at least 2 entities for co-occurrence analysis', entities_found: entityNodes.length }); } // Find chunks where entities co-occur const coOccurrenceResults = await this.findCoOccurrenceChunks(entityNodes, validatedConfig.co_occurrence_window, validatedConfig.min_frequency, validatedConfig.include_positions); for (const result of coOccurrenceResults) { coOccurrenceChunks.push(result.chunk); scores[result.chunk.id] = result.score; if (validatedConfig.include_positions) { coOccurrenceStats[result.chunk.id] = { entity_positions: result.positions, co_occurrence_count: result.occurrenceCount, window_matches: result.windowMatches }; } } // Sort by co-occurrence strength coOccurrenceChunks.sort((a, b) => (scores[b.id] || 0) - (scores[a.id] || 0)); return this.createResult(relatedNodes, [], coOccurrenceChunks, scores, { analysis_type: 'entity_co_occurrence', entities_analyzed: validatedConfig.entities, co_occurrence_window: validatedConfig.co_occurrence_window, min_frequency: validatedConfig.min_frequency, total_co_occurrences: coOccurrenceChunks.length, position_data_included: validatedConfig.include_positions, co_occurrence_stats: coOccurrenceStats }); } async findCoOccurrenceChunks(entities, window, minFrequency, includePositions) { const results = []; // This is a simplified implementation // In practice, you'd have a more efficient text search system // Get all chunks (this would be optimized in a real implementation) const allChunks = await this.getAllChunks(); for (const chunk of allChunks) { const analysis = this.analyzeChunkCoOccurrence(chunk, entities, window, includePositions); if (analysis.occurrenceCount >= minFrequency) { results.push({ chunk, score: analysis.score, occurrenceCount: analysis.occurrenceCount, positions: includePositions ? analysis.positions : undefined, windowMatches: analysis.windowMatches }); } } return results; } async getAllChunks() { // Placeholder - would be implemented with proper chunk retrieval return []; } analyzeChunkCoOccurrence(chunk, entities, window, includePositions) { const text = chunk.content.toLowerCase(); const entityLabels = entities.map(e => e.label.toLowerCase()); const positions = {}; let windowMatches = 0; // Find all positions of each entity for (const entity of entities) { const label = entity.label.toLowerCase(); positions[entity.id] = []; let index = 0; while ((index = text.indexOf(label, index)) !== -1) { positions[entity.id].push(index); index += label.length; } } // Count co-occurrences within the window let totalCoOccurrences = 0; const entityIds = entities.map(e => e.id); for (let i = 0; i < entityIds.length; i++) { for (let j = i + 1; j < entityIds.length; j++) { const entity1Positions = positions[entityIds[i]] || []; const entity2Positions = positions[entityIds[j]] || []; for (const pos1 of entity1Positions) { for (const pos2 of entity2Positions) { if (Math.abs(pos1 - pos2) <= window) { totalCoOccurrences++; windowMatches++; } } } } } // Calculate score based on co-occurrence density and frequency const totalEntityMentions = Object.values(positions).reduce((sum, posList) => sum + posList.length, 0); const score = totalEntityMentions > 0 ? (totalCoOccurrences / totalEntityMentions) * Math.log(totalCoOccurrences + 1) : 0; return { score: Math.min(1.0, score), occurrenceCount: totalCoOccurrences, positions: includePositions ? positions : undefined, windowMatches }; } } //# sourceMappingURL=chunk-operators.js.map