@juspay/neurolink
Version:
Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio
402 lines (401 loc) • 13.3 kB
JavaScript
/**
* Graph RAG Implementation
*
* Knowledge graph-based retrieval augmented generation.
* Creates semantic relationships between document chunks and uses
* random walk with restart for context-aware retrieval.
*/
import { randomUUID } from "crypto";
import { logger } from "../../utils/logger.js";
/**
* Graph-based Retrieval Augmented Generation
*
* Creates a knowledge graph from document chunks where nodes represent
* documents and edges represent semantic relationships based on
* embedding similarity.
*/
export class GraphRAG {
nodes = new Map();
edges = new Map();
dimension;
threshold;
constructor(config) {
this.dimension = config?.dimension ?? 1536;
this.threshold = config?.threshold ?? 0.7;
}
/**
* Create a knowledge graph from document chunks and embeddings
*
* @param chunks - Array of document chunks
* @param embeddings - Corresponding embedding vectors
*/
createGraph(chunks, embeddings) {
if (chunks.length !== embeddings.length) {
throw new Error("Chunks and embeddings arrays must have the same length");
}
// Clear existing graph
this.nodes.clear();
this.edges.clear();
// Create nodes
const nodeIds = [];
for (let i = 0; i < chunks.length; i++) {
const id = randomUUID();
nodeIds.push(id);
this.nodes.set(id, {
id,
content: chunks[i].text,
metadata: chunks[i].metadata || {},
embedding: embeddings[i].vector,
});
}
// Create edges based on semantic similarity
for (let i = 0; i < nodeIds.length; i++) {
const edges = [];
const nodeA = this.nodes.get(nodeIds[i]);
if (!nodeA?.embedding) {
continue;
}
for (let j = 0; j < nodeIds.length; j++) {
if (i === j) {
continue;
}
const nodeB = this.nodes.get(nodeIds[j]);
if (!nodeB?.embedding) {
continue;
}
const similarity = this.cosineSimilarity(nodeA.embedding, nodeB.embedding);
if (similarity >= this.threshold) {
edges.push({
source: nodeIds[i],
target: nodeIds[j],
weight: similarity,
type: "semantic",
});
}
}
// Sort edges by weight descending
edges.sort((a, b) => b.weight - a.weight);
this.edges.set(nodeIds[i], edges);
}
logger.info("[GraphRAG] Graph created", {
nodes: this.nodes.size,
totalEdges: Array.from(this.edges.values()).reduce((sum, e) => sum + e.length, 0),
threshold: this.threshold,
});
}
/**
* Query the graph using random walk with restart
*
* @param params - Query parameters including embedding vector
* @returns Ranked nodes by relevance
*/
query(params) {
const { query, topK = 10, randomWalkSteps = 100, restartProb = 0.15, } = params;
if (this.nodes.size === 0) {
return [];
}
// Calculate initial similarities to query
const similarities = new Map();
for (const [id, node] of this.nodes) {
if (node.embedding) {
similarities.set(id, this.cosineSimilarity(query, node.embedding));
}
}
// Find starting nodes (most similar to query)
const sortedNodes = Array.from(similarities.entries())
.sort((a, b) => b[1] - a[1])
.slice(0, Math.min(5, this.nodes.size));
if (sortedNodes.length === 0) {
return [];
}
// Random walk with restart
const visitCounts = new Map();
const startNodeIds = sortedNodes.map(([id]) => id);
const startProbs = this.normalizeProbs(sortedNodes.map(([, sim]) => sim));
for (let step = 0; step < randomWalkSteps; step++) {
// Choose starting node based on query similarity
const startIdx = this.weightedRandomChoice(startProbs);
let currentNode = startNodeIds[startIdx];
// Walk with restart probability
if (Math.random() >= restartProb) {
const edges = this.edges.get(currentNode) || [];
if (edges.length > 0) {
// Choose next node based on edge weights
const edgeWeights = edges.map((e) => e.weight);
const normalizedWeights = this.normalizeProbs(edgeWeights);
const nextIdx = this.weightedRandomChoice(normalizedWeights);
currentNode = edges[nextIdx].target;
}
}
// Update visit count
visitCounts.set(currentNode, (visitCounts.get(currentNode) || 0) + 1);
}
// Combine visit frequency with query similarity for final ranking
const scores = new Map();
const maxVisits = Math.max(...visitCounts.values());
for (const [id] of this.nodes) {
const visitScore = (visitCounts.get(id) || 0) / maxVisits;
const similarityScore = similarities.get(id) || 0;
// Weighted combination
scores.set(id, 0.6 * similarityScore + 0.4 * visitScore);
}
// Sort and return top K
const rankedNodes = Array.from(scores.entries())
.sort((a, b) => b[1] - a[1])
.slice(0, topK)
.flatMap(([id, score]) => {
const node = this.nodes.get(id);
if (!node) {
return [];
}
return [
{
id,
content: node.content,
metadata: node.metadata,
score,
},
];
});
logger.debug("[GraphRAG] Query completed", {
topK,
resultsCount: rankedNodes.length,
topScore: rankedNodes[0]?.score,
});
return rankedNodes;
}
/**
* Add a single node to the graph
*
* @param chunk - Document chunk
* @param embedding - Embedding vector
* @returns Node ID
*/
addNode(chunk, embedding) {
const id = randomUUID();
this.nodes.set(id, {
id,
content: chunk.text,
metadata: chunk.metadata || {},
embedding: embedding.vector,
});
// Create edges to existing nodes
const edges = [];
for (const [existingId, existingNode] of this.nodes) {
if (existingId === id) {
continue;
}
if (!existingNode.embedding) {
continue;
}
const similarity = this.cosineSimilarity(embedding.vector, existingNode.embedding);
if (similarity >= this.threshold) {
edges.push({
source: id,
target: existingId,
weight: similarity,
type: "semantic",
});
// Add reverse edge
const existingEdges = this.edges.get(existingId) || [];
existingEdges.push({
source: existingId,
target: id,
weight: similarity,
type: "semantic",
});
this.edges.set(existingId, existingEdges);
}
}
this.edges.set(id, edges);
return id;
}
/**
* Remove a node and its edges from the graph
*
* @param id - Node ID to remove
* @returns True if node was removed
*/
removeNode(id) {
if (!this.nodes.has(id)) {
return false;
}
// Remove node
this.nodes.delete(id);
this.edges.delete(id);
// Remove edges pointing to this node
for (const [nodeId, edges] of this.edges) {
this.edges.set(nodeId, edges.filter((e) => e.target !== id));
}
return true;
}
/**
* Get graph statistics
*/
getStats() {
const edgeCount = Array.from(this.edges.values()).reduce((sum, e) => sum + e.length, 0);
return {
nodeCount: this.nodes.size,
edgeCount,
avgDegree: this.nodes.size > 0 ? edgeCount / this.nodes.size : 0,
threshold: this.threshold,
};
}
/**
* Get a node by ID
*/
getNode(id) {
return this.nodes.get(id);
}
/**
* Get all nodes
*/
getAllNodes() {
return Array.from(this.nodes.values());
}
/**
* Get edges for a node
*/
getEdges(nodeId) {
return this.edges.get(nodeId) || [];
}
/**
* Find connected components in the graph
*/
findConnectedComponents() {
const visited = new Set();
const components = [];
for (const nodeId of this.nodes.keys()) {
if (visited.has(nodeId)) {
continue;
}
const component = [];
const queue = [nodeId];
while (queue.length > 0) {
const current = queue.shift();
if (current === undefined || visited.has(current)) {
continue;
}
visited.add(current);
component.push(current);
const edges = this.edges.get(current) || [];
for (const edge of edges) {
if (!visited.has(edge.target)) {
queue.push(edge.target);
}
}
}
components.push(component);
}
return components;
}
/**
* Calculate cosine similarity between two vectors
*/
cosineSimilarity(a, b) {
if (a.length !== b.length) {
throw new Error("Vectors must have the same dimension");
}
let dotProduct = 0;
let normA = 0;
let normB = 0;
for (let i = 0; i < a.length; i++) {
dotProduct += a[i] * b[i];
normA += a[i] * a[i];
normB += b[i] * b[i];
}
const denominator = Math.sqrt(normA) * Math.sqrt(normB);
return denominator === 0 ? 0 : dotProduct / denominator;
}
/**
* Normalize probabilities to sum to 1
*/
normalizeProbs(probs) {
const sum = probs.reduce((a, b) => a + b, 0);
return sum === 0
? probs.map(() => 1 / probs.length)
: probs.map((p) => p / sum);
}
/**
* Weighted random choice
*/
weightedRandomChoice(weights) {
const random = Math.random();
let cumulative = 0;
for (let i = 0; i < weights.length; i++) {
cumulative += weights[i];
if (random <= cumulative) {
return i;
}
}
return weights.length - 1;
}
/**
* Update similarity threshold and rebuild edges
*/
updateThreshold(threshold) {
this.threshold = threshold;
// Rebuild edges with new threshold
this.edges.clear();
const nodeIds = Array.from(this.nodes.keys());
for (let i = 0; i < nodeIds.length; i++) {
const edges = [];
const nodeA = this.nodes.get(nodeIds[i]);
if (!nodeA?.embedding) {
continue;
}
for (let j = 0; j < nodeIds.length; j++) {
if (i === j) {
continue;
}
const nodeB = this.nodes.get(nodeIds[j]);
if (!nodeB?.embedding) {
continue;
}
const similarity = this.cosineSimilarity(nodeA.embedding, nodeB.embedding);
if (similarity >= threshold) {
edges.push({
source: nodeIds[i],
target: nodeIds[j],
weight: similarity,
type: "semantic",
});
}
}
edges.sort((a, b) => b.weight - a.weight);
this.edges.set(nodeIds[i], edges);
}
}
/**
* Serialize graph to JSON
*/
toJSON() {
return {
nodes: Array.from(this.nodes.values()),
edges: Array.from(this.edges.entries()).map(([source, edges]) => ({
source,
edges,
})),
config: {
dimension: this.dimension,
threshold: this.threshold,
},
};
}
/**
* Load graph from JSON
*/
static fromJSON(json) {
const graph = new GraphRAG({
dimension: json.config.dimension,
threshold: json.config.threshold,
});
for (const node of json.nodes) {
graph.nodes.set(node.id, node);
}
for (const { source, edges } of json.edges) {
graph.edges.set(source, edges);
}
return graph;
}
}