UNPKG

zrald

Version:

Advanced Graph RAG MCP Server with sophisticated graph structures, operators, and agentic capabilities for AI agents

397 lines 15.5 kB
import { v4 as uuidv4 } from 'uuid'; import { EmbeddingGenerator } from './embedding-generator.js'; export class GraphBuilder { embeddingGenerator; constructor(embeddingGenerator) { this.embeddingGenerator = embeddingGenerator; } // Passage Graph Builder async buildPassageGraph(documents) { const nodes = []; const relationships = []; const chunks = []; for (const doc of documents) { // Create document node const docNode = { id: doc.id, type: 'document', label: doc.metadata?.title || `Document ${doc.id}`, properties: { content_length: doc.content.length, ...doc.metadata }, embedding: await this.embeddingGenerator.generateEmbedding(doc.content.slice(0, 500)), metadata: doc.metadata, created_at: new Date(), updated_at: new Date() }; nodes.push(docNode); // Chunk the document const textChunks = EmbeddingGenerator.chunkText(doc.content, 512, 50); for (let i = 0; i < textChunks.length; i++) { const chunkId = `${doc.id}_chunk_${i}`; // Create chunk const chunk = { id: chunkId, content: textChunks[i], document_id: doc.id, position: i, embedding: await this.embeddingGenerator.generateEmbedding(textChunks[i]), entities: this.extractSimpleEntities(textChunks[i]), metadata: { chunk_index: i, total_chunks: textChunks.length } }; chunks.push(chunk); // Create chunk node const chunkNode = { id: chunkId, type: 'chunk', label: `Chunk ${i + 1}`, properties: { position: i, content_preview: textChunks[i].slice(0, 100) + '...', word_count: textChunks[i].split(' ').length }, embedding: chunk.embedding, metadata: chunk.metadata, created_at: new Date(), updated_at: new Date() }; nodes.push(chunkNode); // Create relationship from document to chunk const docChunkRel = { id: uuidv4(), source_id: doc.id, target_id: chunkId, type: 'CONTAINS', properties: { chunk_position: i }, weight: 1.0, confidence: 1.0, created_at: new Date() }; relationships.push(docChunkRel); // Create relationships between adjacent chunks if (i > 0) { const prevChunkId = `${doc.id}_chunk_${i - 1}`; const adjacentRel = { id: uuidv4(), source_id: prevChunkId, target_id: chunkId, type: 'NEXT', properties: { sequence: i }, weight: 0.8, confidence: 1.0, created_at: new Date() }; relationships.push(adjacentRel); } } } // Create similarity relationships between chunks await this.addSimilarityRelationships(nodes.filter(n => n.type === 'chunk'), relationships); return { nodes, relationships, type: 'passage' }; } // Trees Graph Builder async buildTreesGraph(hierarchicalData) { const nodes = []; const relationships = []; const rootNodes = []; let maxLevel = 0; // Create nodes for (const item of hierarchicalData) { const node = { id: item.id, type: item.level === 0 ? 'summary' : 'concept', label: item.metadata?.title || `Node ${item.id}`, properties: { level: item.level, content: item.content, ...item.metadata }, embedding: await this.embeddingGenerator.generateEmbedding(item.content), metadata: { level: item.level, ...item.metadata }, created_at: new Date(), updated_at: new Date() }; nodes.push(node); if (item.level === 0) { rootNodes.push(item.id); } maxLevel = Math.max(maxLevel, item.level); } // Create hierarchical relationships for (const item of hierarchicalData) { if (item.parent_id) { const parentChildRel = { id: uuidv4(), source_id: item.parent_id, target_id: item.id, type: 'PARENT_OF', properties: { level_difference: 1, child_level: item.level }, weight: 1.0, confidence: 1.0, created_at: new Date() }; relationships.push(parentChildRel); } } // Add sibling relationships (same level, same parent) const nodesByParent = new Map(); for (const item of hierarchicalData) { if (item.parent_id) { if (!nodesByParent.has(item.parent_id)) { nodesByParent.set(item.parent_id, []); } nodesByParent.get(item.parent_id).push(item.id); } } for (const siblings of nodesByParent.values()) { for (let i = 0; i < siblings.length; i++) { for (let j = i + 1; j < siblings.length; j++) { const siblingRel = { id: uuidv4(), source_id: siblings[i], target_id: siblings[j], type: 'SIBLING', properties: { relationship_type: 'sibling' }, weight: 0.6, confidence: 0.8, created_at: new Date() }; relationships.push(siblingRel); } } } return { nodes, relationships, root_nodes: rootNodes, levels: maxLevel + 1, type: 'trees' }; } // Knowledge Graph Builder async buildKnowledgeGraph(triples) { const nodes = []; const relationships = []; const entities = new Set(); const concepts = new Set(); const seenNodes = new Set(); // Process triples for (const triple of triples) { // Create subject node if (!seenNodes.has(triple.subject)) { const subjectNode = { id: triple.subject, type: this.determineNodeType(triple.subject), label: this.formatLabel(triple.subject), properties: { original_text: triple.subject, ...triple.metadata?.subject }, embedding: await this.embeddingGenerator.generateEmbedding(triple.subject), metadata: triple.metadata?.subject, created_at: new Date(), updated_at: new Date() }; nodes.push(subjectNode); seenNodes.add(triple.subject); if (subjectNode.type === 'entity') entities.add(triple.subject); if (subjectNode.type === 'concept') concepts.add(triple.subject); } // Create object node if (!seenNodes.has(triple.object)) { const objectNode = { id: triple.object, type: this.determineNodeType(triple.object), label: this.formatLabel(triple.object), properties: { original_text: triple.object, ...triple.metadata?.object }, embedding: await this.embeddingGenerator.generateEmbedding(triple.object), metadata: triple.metadata?.object, created_at: new Date(), updated_at: new Date() }; nodes.push(objectNode); seenNodes.add(triple.object); if (objectNode.type === 'entity') entities.add(triple.object); if (objectNode.type === 'concept') concepts.add(triple.object); } // Create relationship const relationship = { id: uuidv4(), source_id: triple.subject, target_id: triple.object, type: this.normalizeRelationType(triple.predicate), properties: { original_predicate: triple.predicate, ...triple.metadata?.relationship }, weight: triple.metadata?.weight || 1.0, confidence: triple.metadata?.confidence || 0.9, metadata: triple.metadata?.relationship, created_at: new Date() }; relationships.push(relationship); } return { nodes, relationships, entities: Array.from(entities), concepts: Array.from(concepts), type: 'knowledge' }; } // DAG Builder for workflow planning async buildDAG(tasks) { const nodes = []; const relationships = []; const entryPoints = []; const exitPoints = []; // Create task nodes for (const task of tasks) { const node = { id: task.id, type: 'concept', // Tasks are conceptual nodes label: task.name, properties: { description: task.description, task_type: 'workflow_task', ...task.metadata }, embedding: await this.embeddingGenerator.generateEmbedding(`${task.name} ${task.description}`), metadata: task.metadata, created_at: new Date(), updated_at: new Date() }; nodes.push(node); // Identify entry points (no dependencies) if (task.dependencies.length === 0) { entryPoints.push(task.id); } } // Create dependency relationships for (const task of tasks) { for (const depId of task.dependencies) { const depRel = { id: uuidv4(), source_id: depId, target_id: task.id, type: 'DEPENDS_ON', properties: { dependency_type: 'prerequisite' }, weight: 1.0, confidence: 1.0, created_at: new Date() }; relationships.push(depRel); } } // Identify exit points (no dependents) const dependents = new Set(relationships.map(r => r.source_id)); for (const task of tasks) { if (!dependents.has(task.id)) { exitPoints.push(task.id); } } return { nodes, relationships, entry_points: entryPoints, exit_points: exitPoints, type: 'dag' }; } // Helper methods async addSimilarityRelationships(chunkNodes, relationships, threshold = 0.7) { for (let i = 0; i < chunkNodes.length; i++) { for (let j = i + 1; j < chunkNodes.length; j++) { const node1 = chunkNodes[i]; const node2 = chunkNodes[j]; if (node1.embedding && node2.embedding) { const similarity = EmbeddingGenerator.cosineSimilarity(node1.embedding, node2.embedding); if (similarity >= threshold) { const simRel = { id: uuidv4(), source_id: node1.id, target_id: node2.id, type: 'SIMILAR_TO', properties: { similarity_score: similarity }, weight: similarity, confidence: similarity, created_at: new Date() }; relationships.push(simRel); } } } } } extractSimpleEntities(text) { // Simple entity extraction - in practice, use proper NER const entities = []; // Extract capitalized words/phrases const capitalizedMatches = text.match(/\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\b/g); if (capitalizedMatches) { entities.push(...capitalizedMatches); } // Extract quoted strings const quotedMatches = text.match(/"([^"]+)"/g); if (quotedMatches) { entities.push(...quotedMatches.map(match => match.slice(1, -1))); } return [...new Set(entities)]; } determineNodeType(text) { // Simple heuristic - in practice, use proper classification const conceptKeywords = [ 'concept', 'idea', 'theory', 'principle', 'method', 'approach', 'strategy', 'technique', 'process', 'system', 'framework' ]; const textLower = text.toLowerCase(); const isAbstract = conceptKeywords.some(keyword => textLower.includes(keyword)); const hasCapitalization = /^[A-Z]/.test(text); return isAbstract || !hasCapitalization ? 'concept' : 'entity'; } formatLabel(text) { return text .replace(/_/g, ' ') .replace(/([A-Z])/g, ' $1') .trim() .replace(/\s+/g, ' ') .split(' ') .map(word => word.charAt(0).toUpperCase() + word.slice(1).toLowerCase()) .join(' '); } normalizeRelationType(predicate) { // Normalize relationship types const normalized = predicate .toUpperCase() .replace(/[^A-Z0-9]/g, '_') .replace(/_+/g, '_') .replace(/^_|_$/g, ''); // Map common predicates to standard types const mappings = { 'IS_A': 'IS_A', 'HAS': 'HAS', 'PART_OF': 'PART_OF', 'RELATED_TO': 'RELATED_TO', 'CAUSES': 'CAUSES', 'ENABLES': 'ENABLES', 'REQUIRES': 'REQUIRES' }; return mappings[normalized] || normalized; } } //# sourceMappingURL=graph-builders.js.map