zrald
Version:
Advanced Graph RAG MCP Server with sophisticated graph structures, operators, and agentic capabilities for AI agents
397 lines • 15.5 kB
JavaScript
import { v4 as uuidv4 } from 'uuid';
import { EmbeddingGenerator } from './embedding-generator.js';
export class GraphBuilder {
embeddingGenerator;
constructor(embeddingGenerator) {
this.embeddingGenerator = embeddingGenerator;
}
// Passage Graph Builder
async buildPassageGraph(documents) {
const nodes = [];
const relationships = [];
const chunks = [];
for (const doc of documents) {
// Create document node
const docNode = {
id: doc.id,
type: 'document',
label: doc.metadata?.title || `Document ${doc.id}`,
properties: {
content_length: doc.content.length,
...doc.metadata
},
embedding: await this.embeddingGenerator.generateEmbedding(doc.content.slice(0, 500)),
metadata: doc.metadata,
created_at: new Date(),
updated_at: new Date()
};
nodes.push(docNode);
// Chunk the document
const textChunks = EmbeddingGenerator.chunkText(doc.content, 512, 50);
for (let i = 0; i < textChunks.length; i++) {
const chunkId = `${doc.id}_chunk_${i}`;
// Create chunk
const chunk = {
id: chunkId,
content: textChunks[i],
document_id: doc.id,
position: i,
embedding: await this.embeddingGenerator.generateEmbedding(textChunks[i]),
entities: this.extractSimpleEntities(textChunks[i]),
metadata: { chunk_index: i, total_chunks: textChunks.length }
};
chunks.push(chunk);
// Create chunk node
const chunkNode = {
id: chunkId,
type: 'chunk',
label: `Chunk ${i + 1}`,
properties: {
position: i,
content_preview: textChunks[i].slice(0, 100) + '...',
word_count: textChunks[i].split(' ').length
},
embedding: chunk.embedding,
metadata: chunk.metadata,
created_at: new Date(),
updated_at: new Date()
};
nodes.push(chunkNode);
// Create relationship from document to chunk
const docChunkRel = {
id: uuidv4(),
source_id: doc.id,
target_id: chunkId,
type: 'CONTAINS',
properties: { chunk_position: i },
weight: 1.0,
confidence: 1.0,
created_at: new Date()
};
relationships.push(docChunkRel);
// Create relationships between adjacent chunks
if (i > 0) {
const prevChunkId = `${doc.id}_chunk_${i - 1}`;
const adjacentRel = {
id: uuidv4(),
source_id: prevChunkId,
target_id: chunkId,
type: 'NEXT',
properties: { sequence: i },
weight: 0.8,
confidence: 1.0,
created_at: new Date()
};
relationships.push(adjacentRel);
}
}
}
// Create similarity relationships between chunks
await this.addSimilarityRelationships(nodes.filter(n => n.type === 'chunk'), relationships);
return {
nodes,
relationships,
type: 'passage'
};
}
// Trees Graph Builder
async buildTreesGraph(hierarchicalData) {
const nodes = [];
const relationships = [];
const rootNodes = [];
let maxLevel = 0;
// Create nodes
for (const item of hierarchicalData) {
const node = {
id: item.id,
type: item.level === 0 ? 'summary' : 'concept',
label: item.metadata?.title || `Node ${item.id}`,
properties: {
level: item.level,
content: item.content,
...item.metadata
},
embedding: await this.embeddingGenerator.generateEmbedding(item.content),
metadata: { level: item.level, ...item.metadata },
created_at: new Date(),
updated_at: new Date()
};
nodes.push(node);
if (item.level === 0) {
rootNodes.push(item.id);
}
maxLevel = Math.max(maxLevel, item.level);
}
// Create hierarchical relationships
for (const item of hierarchicalData) {
if (item.parent_id) {
const parentChildRel = {
id: uuidv4(),
source_id: item.parent_id,
target_id: item.id,
type: 'PARENT_OF',
properties: {
level_difference: 1,
child_level: item.level
},
weight: 1.0,
confidence: 1.0,
created_at: new Date()
};
relationships.push(parentChildRel);
}
}
// Add sibling relationships (same level, same parent)
const nodesByParent = new Map();
for (const item of hierarchicalData) {
if (item.parent_id) {
if (!nodesByParent.has(item.parent_id)) {
nodesByParent.set(item.parent_id, []);
}
nodesByParent.get(item.parent_id).push(item.id);
}
}
for (const siblings of nodesByParent.values()) {
for (let i = 0; i < siblings.length; i++) {
for (let j = i + 1; j < siblings.length; j++) {
const siblingRel = {
id: uuidv4(),
source_id: siblings[i],
target_id: siblings[j],
type: 'SIBLING',
properties: { relationship_type: 'sibling' },
weight: 0.6,
confidence: 0.8,
created_at: new Date()
};
relationships.push(siblingRel);
}
}
}
return {
nodes,
relationships,
root_nodes: rootNodes,
levels: maxLevel + 1,
type: 'trees'
};
}
// Knowledge Graph Builder
async buildKnowledgeGraph(triples) {
const nodes = [];
const relationships = [];
const entities = new Set();
const concepts = new Set();
const seenNodes = new Set();
// Process triples
for (const triple of triples) {
// Create subject node
if (!seenNodes.has(triple.subject)) {
const subjectNode = {
id: triple.subject,
type: this.determineNodeType(triple.subject),
label: this.formatLabel(triple.subject),
properties: {
original_text: triple.subject,
...triple.metadata?.subject
},
embedding: await this.embeddingGenerator.generateEmbedding(triple.subject),
metadata: triple.metadata?.subject,
created_at: new Date(),
updated_at: new Date()
};
nodes.push(subjectNode);
seenNodes.add(triple.subject);
if (subjectNode.type === 'entity')
entities.add(triple.subject);
if (subjectNode.type === 'concept')
concepts.add(triple.subject);
}
// Create object node
if (!seenNodes.has(triple.object)) {
const objectNode = {
id: triple.object,
type: this.determineNodeType(triple.object),
label: this.formatLabel(triple.object),
properties: {
original_text: triple.object,
...triple.metadata?.object
},
embedding: await this.embeddingGenerator.generateEmbedding(triple.object),
metadata: triple.metadata?.object,
created_at: new Date(),
updated_at: new Date()
};
nodes.push(objectNode);
seenNodes.add(triple.object);
if (objectNode.type === 'entity')
entities.add(triple.object);
if (objectNode.type === 'concept')
concepts.add(triple.object);
}
// Create relationship
const relationship = {
id: uuidv4(),
source_id: triple.subject,
target_id: triple.object,
type: this.normalizeRelationType(triple.predicate),
properties: {
original_predicate: triple.predicate,
...triple.metadata?.relationship
},
weight: triple.metadata?.weight || 1.0,
confidence: triple.metadata?.confidence || 0.9,
metadata: triple.metadata?.relationship,
created_at: new Date()
};
relationships.push(relationship);
}
return {
nodes,
relationships,
entities: Array.from(entities),
concepts: Array.from(concepts),
type: 'knowledge'
};
}
// DAG Builder for workflow planning
async buildDAG(tasks) {
const nodes = [];
const relationships = [];
const entryPoints = [];
const exitPoints = [];
// Create task nodes
for (const task of tasks) {
const node = {
id: task.id,
type: 'concept', // Tasks are conceptual nodes
label: task.name,
properties: {
description: task.description,
task_type: 'workflow_task',
...task.metadata
},
embedding: await this.embeddingGenerator.generateEmbedding(`${task.name} ${task.description}`),
metadata: task.metadata,
created_at: new Date(),
updated_at: new Date()
};
nodes.push(node);
// Identify entry points (no dependencies)
if (task.dependencies.length === 0) {
entryPoints.push(task.id);
}
}
// Create dependency relationships
for (const task of tasks) {
for (const depId of task.dependencies) {
const depRel = {
id: uuidv4(),
source_id: depId,
target_id: task.id,
type: 'DEPENDS_ON',
properties: { dependency_type: 'prerequisite' },
weight: 1.0,
confidence: 1.0,
created_at: new Date()
};
relationships.push(depRel);
}
}
// Identify exit points (no dependents)
const dependents = new Set(relationships.map(r => r.source_id));
for (const task of tasks) {
if (!dependents.has(task.id)) {
exitPoints.push(task.id);
}
}
return {
nodes,
relationships,
entry_points: entryPoints,
exit_points: exitPoints,
type: 'dag'
};
}
// Helper methods
async addSimilarityRelationships(chunkNodes, relationships, threshold = 0.7) {
for (let i = 0; i < chunkNodes.length; i++) {
for (let j = i + 1; j < chunkNodes.length; j++) {
const node1 = chunkNodes[i];
const node2 = chunkNodes[j];
if (node1.embedding && node2.embedding) {
const similarity = EmbeddingGenerator.cosineSimilarity(node1.embedding, node2.embedding);
if (similarity >= threshold) {
const simRel = {
id: uuidv4(),
source_id: node1.id,
target_id: node2.id,
type: 'SIMILAR_TO',
properties: { similarity_score: similarity },
weight: similarity,
confidence: similarity,
created_at: new Date()
};
relationships.push(simRel);
}
}
}
}
}
extractSimpleEntities(text) {
// Simple entity extraction - in practice, use proper NER
const entities = [];
// Extract capitalized words/phrases
const capitalizedMatches = text.match(/\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\b/g);
if (capitalizedMatches) {
entities.push(...capitalizedMatches);
}
// Extract quoted strings
const quotedMatches = text.match(/"([^"]+)"/g);
if (quotedMatches) {
entities.push(...quotedMatches.map(match => match.slice(1, -1)));
}
return [...new Set(entities)];
}
determineNodeType(text) {
// Simple heuristic - in practice, use proper classification
const conceptKeywords = [
'concept', 'idea', 'theory', 'principle', 'method', 'approach',
'strategy', 'technique', 'process', 'system', 'framework'
];
const textLower = text.toLowerCase();
const isAbstract = conceptKeywords.some(keyword => textLower.includes(keyword));
const hasCapitalization = /^[A-Z]/.test(text);
return isAbstract || !hasCapitalization ? 'concept' : 'entity';
}
formatLabel(text) {
return text
.replace(/_/g, ' ')
.replace(/([A-Z])/g, ' $1')
.trim()
.replace(/\s+/g, ' ')
.split(' ')
.map(word => word.charAt(0).toUpperCase() + word.slice(1).toLowerCase())
.join(' ');
}
normalizeRelationType(predicate) {
// Normalize relationship types
const normalized = predicate
.toUpperCase()
.replace(/[^A-Z0-9]/g, '_')
.replace(/_+/g, '_')
.replace(/^_|_$/g, '');
// Map common predicates to standard types
const mappings = {
'IS_A': 'IS_A',
'HAS': 'HAS',
'PART_OF': 'PART_OF',
'RELATED_TO': 'RELATED_TO',
'CAUSES': 'CAUSES',
'ENABLES': 'ENABLES',
'REQUIRES': 'REQUIRES'
};
return mappings[normalized] || normalized;
}
}
//# sourceMappingURL=graph-builders.js.map