UNPKG

@graphty/algorithms

Version:

Graph algorithms library for browser environments implemented in TypeScript

342 lines 12.3 kB
/** * Synergistic Deep Graph Clustering (SynC) Algorithm * * This algorithm combines representation learning with structure augmentation * for improved clustering performance on graphs. It jointly optimizes node * embeddings and cluster assignments while preserving graph structure. * * Based on: "Synergistic Deep Graph Clustering" (arXiv:2406.15797, June 2024) * * @param graph - Input graph to cluster * @param config - Configuration options * @returns Clustering result with assignments and embeddings */ export function syncClustering(graph, config) { const { numClusters, maxIterations = 100, tolerance = 1e-6, seed = 42, learningRate = 0.01, lambda = 0.1, } = config; // Set random seed for reproducibility (save original) const originalRandom = Math.random; Math.random = seedRandom(seed); const nodes = Array.from(graph.nodes()); const nodeCount = nodes.length; if (nodeCount === 0) { return { clusters: new Map(), loss: 0, iterations: 0, embeddings: new Map(), converged: true, }; } if (numClusters <= 0 || numClusters > nodeCount) { throw new Error(`Invalid number of clusters: ${String(numClusters)}. Must be between 1 and ${String(nodeCount)}`); } // Initialize node embeddings (simplified version using graph features) const embeddingDim = Math.min(64, nodeCount); const embeddings = new Map(); // Initialize embeddings based on node features for (const node of nodes) { const embedding = initializeNodeEmbedding(graph, node.id, embeddingDim); embeddings.set(node.id, embedding); } // Initialize cluster centers const clusterCenters = initializeClusterCenters(embeddings, numClusters); let previousLoss = Infinity; let iterations = 0; let converged = false; for (iterations = 0; iterations < maxIterations; iterations++) { // E-step: Assign nodes to clusters const clusters = new Map(); for (const node of nodes) { const nodeEmbedding = embeddings.get(node.id); if (!nodeEmbedding) { continue; } let bestCluster = 0; let minDistance = Infinity; for (let k = 0; k < numClusters; k++) { const center = clusterCenters[k]; if (!center) { continue; } const distance = euclideanDistance(nodeEmbedding, center); if (distance < minDistance) { minDistance = distance; bestCluster = k; } } clusters.set(node.id, bestCluster); } // M-step: Update embeddings and cluster centers updateEmbeddings(graph, embeddings, clusters, learningRate, lambda); updateClusterCenters(embeddings, clusters, clusterCenters, numClusters); // Calculate current loss const currentLoss = calculateLoss(graph, embeddings, clusters, clusterCenters, lambda); // Check for convergence if (Math.abs(previousLoss - currentLoss) < tolerance) { converged = true; break; } previousLoss = currentLoss; } // Final cluster assignment const finalClusters = new Map(); for (const node of nodes) { const nodeEmbedding = embeddings.get(node.id); if (!nodeEmbedding) { continue; } let bestCluster = 0; let minDistance = Infinity; for (let k = 0; k < numClusters; k++) { const center = clusterCenters[k]; if (!center) { continue; } const distance = euclideanDistance(nodeEmbedding, center); if (distance < minDistance) { minDistance = distance; bestCluster = k; } } finalClusters.set(node.id, bestCluster); } // Restore original random function Math.random = originalRandom; return { clusters: finalClusters, loss: previousLoss, iterations: iterations + 1, embeddings, converged, }; } /** * Initialize node embedding based on graph structure and features */ function initializeNodeEmbedding(graph, nodeId, dim) { const embedding = new Array(dim).fill(0); // Use node degree as a base feature const degree = graph.degree(nodeId); const normalizedDegree = degree / Math.max(1, graph.nodeCount - 1); // Initialize with small random values influenced by graph structure for (let i = 0; i < dim; i++) { embedding[i] = ((Math.random() - 0.5) * 0.1) + (normalizedDegree * 0.1); } return embedding; } /** * Initialize cluster centers using k-means++ style initialization */ function initializeClusterCenters(embeddings, numClusters) { const embeddingArray = Array.from(embeddings.values()); const centers = []; // Choose first center randomly const firstCenter = embeddingArray[Math.floor(Math.random() * embeddingArray.length)]; if (firstCenter) { centers.push([...firstCenter]); } // Choose remaining centers using k-means++ initialization for (let k = 1; k < numClusters; k++) { const distances = []; let totalDistance = 0; for (const embedding of embeddingArray) { let minDistance = Infinity; for (const center of centers) { const distance = euclideanDistance(embedding, center); minDistance = Math.min(minDistance, distance); } distances.push(minDistance * minDistance); totalDistance += minDistance * minDistance; } // Choose next center with probability proportional to squared distance let randomValue = Math.random() * totalDistance; for (let i = 0; i < embeddingArray.length; i++) { const distanceValue = distances[i]; if (distanceValue !== undefined) { randomValue -= distanceValue; } if (randomValue <= 0) { const newCenter = embeddingArray[i]; if (newCenter) { centers.push([...newCenter]); } break; } } } return centers; } /** * Update node embeddings using gradient descent */ function updateEmbeddings(graph, embeddings, clusters, learningRate, lambda) { const gradients = new Map(); // Initialize gradients for (const [nodeId, embedding] of embeddings) { gradients.set(nodeId, new Array(embedding.length).fill(0)); } // Calculate gradients based on graph structure (simplified) for (const node of graph.nodes()) { const nodeId = node.id; const nodeEmbedding = embeddings.get(nodeId); if (!nodeEmbedding) { continue; } const gradient = gradients.get(nodeId); if (!gradient) { continue; } // Neighbor reconstruction loss gradient for (const neighborId of graph.neighbors(nodeId)) { const neighborEmbedding = embeddings.get(neighborId); if (!neighborEmbedding) { continue; } const diff = nodeEmbedding.map((val, i) => val - (neighborEmbedding[i] ?? 0)); for (let i = 0; i < gradient.length; i++) { const gradVal = gradient[i]; const diffVal = diff[i]; if (gradVal !== undefined && diffVal !== undefined) { gradient[i] = gradVal + (lambda * diffVal); } } } // Regularization gradient for (let i = 0; i < gradient.length; i++) { const gradVal = gradient[i]; const nodeVal = nodeEmbedding[i]; if (gradVal !== undefined && nodeVal !== undefined) { gradient[i] = gradVal + (lambda * nodeVal); } } } // Update embeddings for (const [nodeId, embedding] of embeddings) { const gradient = gradients.get(nodeId); if (!gradient) { continue; } for (let i = 0; i < embedding.length; i++) { const embVal = embedding[i]; const gradVal = gradient[i]; if (embVal !== undefined && gradVal !== undefined) { embedding[i] = embVal - (learningRate * gradVal); } } } } /** * Update cluster centers based on current assignments */ function updateClusterCenters(embeddings, clusters, clusterCenters, numClusters) { const dimensions = clusterCenters[0]?.length ?? 0; const clusterSums = Array.from({ length: numClusters }, () => new Array(dimensions).fill(0)); const clusterCounts = new Array(numClusters).fill(0); // Sum embeddings for each cluster for (const [nodeId, clusterIdx] of clusters) { const embedding = embeddings.get(nodeId); if (!embedding) { continue; } for (let i = 0; i < embedding.length; i++) { const clusterSum = clusterSums[clusterIdx]; if (!clusterSum) { continue; } const sumVal = clusterSum[i]; const embVal = embedding[i]; if (sumVal !== undefined && embVal !== undefined) { clusterSum[i] = sumVal + embVal; } } if (clusterCounts[clusterIdx] !== undefined) { clusterCounts[clusterIdx]++; } } // Update cluster centers (average of assigned embeddings) for (let k = 0; k < numClusters; k++) { const count = clusterCounts[k]; const center = clusterCenters[k]; const sum = clusterSums[k]; if (count !== undefined && count > 0 && center && sum) { for (let i = 0; i < center.length; i++) { const sumVal = sum[i]; if (sumVal !== undefined) { center[i] = sumVal / count; } } } } } /** * Calculate the total loss function */ function calculateLoss(graph, embeddings, clusters, clusterCenters, lambda) { let clusteringLoss = 0; let reconstructionLoss = 0; let regularizationLoss = 0; // Clustering loss (distance to cluster centers) for (const [nodeId, clusterIdx] of clusters) { const embedding = embeddings.get(nodeId); if (!embedding) { continue; } const center = clusterCenters[clusterIdx]; if (!center) { continue; } clusteringLoss += euclideanDistance(embedding, center) ** 2; } // Graph reconstruction loss for (const node of graph.nodes()) { const nodeId = node.id; const nodeEmbedding = embeddings.get(nodeId); if (!nodeEmbedding) { continue; } for (const neighborId of graph.neighbors(nodeId)) { const neighborEmbedding = embeddings.get(neighborId); if (!neighborEmbedding) { continue; } const distance = euclideanDistance(nodeEmbedding, neighborEmbedding); reconstructionLoss += distance ** 2; } } // Regularization loss for (const embedding of embeddings.values()) { for (const value of embedding) { regularizationLoss += value ** 2; } } return clusteringLoss + (lambda * reconstructionLoss) + (lambda * regularizationLoss); } /** * Calculate Euclidean distance between two vectors */ function euclideanDistance(a, b) { let sum = 0; for (let i = 0; i < a.length; i++) { const aVal = a[i]; const bVal = b[i]; if (aVal !== undefined && bVal !== undefined) { const diff = aVal - bVal; sum += diff * diff; } } return Math.sqrt(sum); } /** * Simple seeded random number generator for reproducibility */ function seedRandom(seed) { const m = 0x80000000; // 2**31 const a = 1103515245; const c = 12345; seed = seed % m; return function () { seed = ((a * seed) + c) % m; return seed / (m - 1); }; } //# sourceMappingURL=sync.js.map