zrald
Version:
Advanced Graph RAG MCP Server with sophisticated graph structures, operators, and agentic capabilities for AI agents
259 lines • 12.3 kB
JavaScript
import { BaseOperator } from './base-operator.js';
import { FromRelOperatorConfigSchema, OccurrenceOperatorConfigSchema } from '../types/graph.js';
/**
* FromRel Operator - Text chunk tracing
* Traces backward from relationships to find the original text chunks that established those connections
*/
export class FromRelOperator extends BaseOperator {
constructor(graphDb, vectorStore) {
super('FromRelOperator', 'chunk', graphDb, vectorStore);
}
async execute(config) {
const validatedConfig = await this.validateConfig(config, FromRelOperatorConfigSchema);
const sourceChunks = [];
const contextNodes = [];
const sourceRelationships = [];
const scores = {};
const seenChunkIds = new Set();
// Process each relationship ID
for (const relationshipId of validatedConfig.relationship_ids) {
// Find the relationship in the graph
const relationships = await this.findRelationshipById(relationshipId);
for (const relationship of relationships) {
sourceRelationships.push(relationship);
// Get source and target nodes
const sourceNode = await this.graphDb.getNode(relationship.source_id);
const targetNode = await this.graphDb.getNode(relationship.target_id);
if (sourceNode)
contextNodes.push(sourceNode);
if (targetNode)
contextNodes.push(targetNode);
// Find chunks that mention both entities (source of the relationship)
const relatedChunks = await this.findChunksForRelationship(relationship, validatedConfig.include_context, validatedConfig.context_window);
for (const chunk of relatedChunks) {
if (!seenChunkIds.has(chunk.id)) {
sourceChunks.push(chunk);
seenChunkIds.add(chunk.id);
// Score based on relationship confidence and chunk relevance
scores[chunk.id] = this.calculateChunkRelevanceScore(chunk, relationship, sourceNode, targetNode);
}
}
}
}
// Sort chunks by relevance score
sourceChunks.sort((a, b) => (scores[b.id] || 0) - (scores[a.id] || 0));
return this.createResult([...new Map(contextNodes.map(n => [n.id, n])).values()], // Remove duplicates
sourceRelationships, sourceChunks, scores, {
tracing_method: 'relationship_to_chunks',
relationships_processed: validatedConfig.relationship_ids.length,
context_included: validatedConfig.include_context,
context_window: validatedConfig.context_window,
unique_chunks_found: sourceChunks.length
});
}
async findRelationshipById(relationshipId) {
// In a real implementation, this would query the graph database
// For now, we'll search through all relationships
const allNodeTypes = ['entity', 'concept', 'document', 'chunk', 'summary'];
const relationships = [];
for (const nodeType of allNodeTypes) {
const nodes = await this.graphDb.getNodesByType(nodeType);
for (const node of nodes) {
const nodeRels = await this.graphDb.getRelationships(node.id, 'both');
const matchingRel = nodeRels.find(r => r.id === relationshipId);
if (matchingRel && !relationships.find(r => r.id === matchingRel.id)) {
relationships.push(matchingRel);
}
}
}
return relationships;
}
async findChunksForRelationship(relationship, includeContext, contextWindow) {
const chunks = [];
// Get all chunks from vector store
const vectorStats = this.vectorStore.getStats();
const allChunkIds = [];
// This is a simplified approach - in practice, you'd have a more efficient way to get all chunks
for (let i = 0; i < vectorStats.totalChunks; i++) {
// Get chunk by iterating through stored chunks
// This would be replaced with a proper chunk retrieval method
}
// Find chunks that contain entities from the relationship
const sourceNode = await this.graphDb.getNode(relationship.source_id);
const targetNode = await this.graphDb.getNode(relationship.target_id);
if (!sourceNode || !targetNode)
return chunks;
// Search for chunks containing both entities
const searchTerms = [sourceNode.label, targetNode.label];
const relevantChunks = await this.searchChunksForTerms(searchTerms);
for (const chunk of relevantChunks) {
chunks.push(chunk);
// Add context chunks if requested
if (includeContext) {
const contextChunks = await this.getContextChunks(chunk, contextWindow);
chunks.push(...contextChunks);
}
}
return chunks;
}
async searchChunksForTerms(terms) {
// This would use a text search or vector search to find relevant chunks
// For now, return empty array as placeholder
return [];
}
async getContextChunks(chunk, contextWindow) {
// Get surrounding chunks from the same document
const contextChunks = [];
for (let offset = -contextWindow; offset <= contextWindow; offset++) {
if (offset === 0)
continue; // Skip the original chunk
const contextPosition = chunk.position + offset;
if (contextPosition >= 0) {
// Find chunk at this position in the same document
// This would be implemented with proper chunk retrieval
}
}
return contextChunks;
}
calculateChunkRelevanceScore(chunk, relationship, sourceNode, targetNode) {
let score = 0.5; // Base score
// Boost score based on relationship confidence
score += relationship.confidence * 0.3;
// Boost score if chunk contains entity mentions
if (sourceNode && chunk.content.toLowerCase().includes(sourceNode.label.toLowerCase())) {
score += 0.2;
}
if (targetNode && chunk.content.toLowerCase().includes(targetNode.label.toLowerCase())) {
score += 0.2;
}
// Boost score based on chunk metadata
if (chunk.metadata?.importance) {
score += chunk.metadata.importance * 0.1;
}
return Math.min(1.0, Math.max(0.0, score));
}
}
/**
* Occurrence Operator - Entity co-occurrence analysis
* Identifies passages where specified entities appear together, capturing contextual co-occurrence patterns
*/
export class OccurrenceOperator extends BaseOperator {
constructor(graphDb, vectorStore) {
super('OccurrenceOperator', 'chunk', graphDb, vectorStore);
}
async execute(config) {
const validatedConfig = await this.validateConfig(config, OccurrenceOperatorConfigSchema);
const coOccurrenceChunks = [];
const relatedNodes = [];
const scores = {};
const coOccurrenceStats = {};
// Get nodes for the specified entities
const entityNodes = [];
for (const entityId of validatedConfig.entities) {
const node = await this.graphDb.getNode(entityId);
if (node) {
entityNodes.push(node);
relatedNodes.push(node);
}
}
if (entityNodes.length < 2) {
return this.createResult(relatedNodes, [], [], {}, {
error: 'Need at least 2 entities for co-occurrence analysis',
entities_found: entityNodes.length
});
}
// Find chunks where entities co-occur
const coOccurrenceResults = await this.findCoOccurrenceChunks(entityNodes, validatedConfig.co_occurrence_window, validatedConfig.min_frequency, validatedConfig.include_positions);
for (const result of coOccurrenceResults) {
coOccurrenceChunks.push(result.chunk);
scores[result.chunk.id] = result.score;
if (validatedConfig.include_positions) {
coOccurrenceStats[result.chunk.id] = {
entity_positions: result.positions,
co_occurrence_count: result.occurrenceCount,
window_matches: result.windowMatches
};
}
}
// Sort by co-occurrence strength
coOccurrenceChunks.sort((a, b) => (scores[b.id] || 0) - (scores[a.id] || 0));
return this.createResult(relatedNodes, [], coOccurrenceChunks, scores, {
analysis_type: 'entity_co_occurrence',
entities_analyzed: validatedConfig.entities,
co_occurrence_window: validatedConfig.co_occurrence_window,
min_frequency: validatedConfig.min_frequency,
total_co_occurrences: coOccurrenceChunks.length,
position_data_included: validatedConfig.include_positions,
co_occurrence_stats: coOccurrenceStats
});
}
async findCoOccurrenceChunks(entities, window, minFrequency, includePositions) {
const results = [];
// This is a simplified implementation
// In practice, you'd have a more efficient text search system
// Get all chunks (this would be optimized in a real implementation)
const allChunks = await this.getAllChunks();
for (const chunk of allChunks) {
const analysis = this.analyzeChunkCoOccurrence(chunk, entities, window, includePositions);
if (analysis.occurrenceCount >= minFrequency) {
results.push({
chunk,
score: analysis.score,
occurrenceCount: analysis.occurrenceCount,
positions: includePositions ? analysis.positions : undefined,
windowMatches: analysis.windowMatches
});
}
}
return results;
}
async getAllChunks() {
// Placeholder - would be implemented with proper chunk retrieval
return [];
}
analyzeChunkCoOccurrence(chunk, entities, window, includePositions) {
const text = chunk.content.toLowerCase();
const entityLabels = entities.map(e => e.label.toLowerCase());
const positions = {};
let windowMatches = 0;
// Find all positions of each entity
for (const entity of entities) {
const label = entity.label.toLowerCase();
positions[entity.id] = [];
let index = 0;
while ((index = text.indexOf(label, index)) !== -1) {
positions[entity.id].push(index);
index += label.length;
}
}
// Count co-occurrences within the window
let totalCoOccurrences = 0;
const entityIds = entities.map(e => e.id);
for (let i = 0; i < entityIds.length; i++) {
for (let j = i + 1; j < entityIds.length; j++) {
const entity1Positions = positions[entityIds[i]] || [];
const entity2Positions = positions[entityIds[j]] || [];
for (const pos1 of entity1Positions) {
for (const pos2 of entity2Positions) {
if (Math.abs(pos1 - pos2) <= window) {
totalCoOccurrences++;
windowMatches++;
}
}
}
}
}
// Calculate score based on co-occurrence density and frequency
const totalEntityMentions = Object.values(positions).reduce((sum, posList) => sum + posList.length, 0);
const score = totalEntityMentions > 0
? (totalCoOccurrences / totalEntityMentions) * Math.log(totalCoOccurrences + 1)
: 0;
return {
score: Math.min(1.0, score),
occurrenceCount: totalCoOccurrences,
positions: includePositions ? positions : undefined,
windowMatches
};
}
}
//# sourceMappingURL=chunk-operators.js.map