@graphty/algorithms
Version:
Graph algorithms library for browser environments implemented in TypeScript
342 lines • 12.3 kB
JavaScript
/**
* Synergistic Deep Graph Clustering (SynC) Algorithm
*
* This algorithm combines representation learning with structure augmentation
* for improved clustering performance on graphs. It jointly optimizes node
* embeddings and cluster assignments while preserving graph structure.
*
* Based on: "Synergistic Deep Graph Clustering" (arXiv:2406.15797, June 2024)
*
* @param graph - Input graph to cluster
* @param config - Configuration options
* @returns Clustering result with assignments and embeddings
*/
export function syncClustering(graph, config) {
const { numClusters, maxIterations = 100, tolerance = 1e-6, seed = 42, learningRate = 0.01, lambda = 0.1, } = config;
// Set random seed for reproducibility (save original)
const originalRandom = Math.random;
Math.random = seedRandom(seed);
const nodes = Array.from(graph.nodes());
const nodeCount = nodes.length;
if (nodeCount === 0) {
return {
clusters: new Map(),
loss: 0,
iterations: 0,
embeddings: new Map(),
converged: true,
};
}
if (numClusters <= 0 || numClusters > nodeCount) {
throw new Error(`Invalid number of clusters: ${String(numClusters)}. Must be between 1 and ${String(nodeCount)}`);
}
// Initialize node embeddings (simplified version using graph features)
const embeddingDim = Math.min(64, nodeCount);
const embeddings = new Map();
// Initialize embeddings based on node features
for (const node of nodes) {
const embedding = initializeNodeEmbedding(graph, node.id, embeddingDim);
embeddings.set(node.id, embedding);
}
// Initialize cluster centers
const clusterCenters = initializeClusterCenters(embeddings, numClusters);
let previousLoss = Infinity;
let iterations = 0;
let converged = false;
for (iterations = 0; iterations < maxIterations; iterations++) {
// E-step: Assign nodes to clusters
const clusters = new Map();
for (const node of nodes) {
const nodeEmbedding = embeddings.get(node.id);
if (!nodeEmbedding) {
continue;
}
let bestCluster = 0;
let minDistance = Infinity;
for (let k = 0; k < numClusters; k++) {
const center = clusterCenters[k];
if (!center) {
continue;
}
const distance = euclideanDistance(nodeEmbedding, center);
if (distance < minDistance) {
minDistance = distance;
bestCluster = k;
}
}
clusters.set(node.id, bestCluster);
}
// M-step: Update embeddings and cluster centers
updateEmbeddings(graph, embeddings, clusters, learningRate, lambda);
updateClusterCenters(embeddings, clusters, clusterCenters, numClusters);
// Calculate current loss
const currentLoss = calculateLoss(graph, embeddings, clusters, clusterCenters, lambda);
// Check for convergence
if (Math.abs(previousLoss - currentLoss) < tolerance) {
converged = true;
break;
}
previousLoss = currentLoss;
}
// Final cluster assignment
const finalClusters = new Map();
for (const node of nodes) {
const nodeEmbedding = embeddings.get(node.id);
if (!nodeEmbedding) {
continue;
}
let bestCluster = 0;
let minDistance = Infinity;
for (let k = 0; k < numClusters; k++) {
const center = clusterCenters[k];
if (!center) {
continue;
}
const distance = euclideanDistance(nodeEmbedding, center);
if (distance < minDistance) {
minDistance = distance;
bestCluster = k;
}
}
finalClusters.set(node.id, bestCluster);
}
// Restore original random function
Math.random = originalRandom;
return {
clusters: finalClusters,
loss: previousLoss,
iterations: iterations + 1,
embeddings,
converged,
};
}
/**
* Initialize node embedding based on graph structure and features
*/
function initializeNodeEmbedding(graph, nodeId, dim) {
const embedding = new Array(dim).fill(0);
// Use node degree as a base feature
const degree = graph.degree(nodeId);
const normalizedDegree = degree / Math.max(1, graph.nodeCount - 1);
// Initialize with small random values influenced by graph structure
for (let i = 0; i < dim; i++) {
embedding[i] = ((Math.random() - 0.5) * 0.1) + (normalizedDegree * 0.1);
}
return embedding;
}
/**
* Initialize cluster centers using k-means++ style initialization
*/
function initializeClusterCenters(embeddings, numClusters) {
const embeddingArray = Array.from(embeddings.values());
const centers = [];
// Choose first center randomly
const firstCenter = embeddingArray[Math.floor(Math.random() * embeddingArray.length)];
if (firstCenter) {
centers.push([...firstCenter]);
}
// Choose remaining centers using k-means++ initialization
for (let k = 1; k < numClusters; k++) {
const distances = [];
let totalDistance = 0;
for (const embedding of embeddingArray) {
let minDistance = Infinity;
for (const center of centers) {
const distance = euclideanDistance(embedding, center);
minDistance = Math.min(minDistance, distance);
}
distances.push(minDistance * minDistance);
totalDistance += minDistance * minDistance;
}
// Choose next center with probability proportional to squared distance
let randomValue = Math.random() * totalDistance;
for (let i = 0; i < embeddingArray.length; i++) {
const distanceValue = distances[i];
if (distanceValue !== undefined) {
randomValue -= distanceValue;
}
if (randomValue <= 0) {
const newCenter = embeddingArray[i];
if (newCenter) {
centers.push([...newCenter]);
}
break;
}
}
}
return centers;
}
/**
* Update node embeddings using gradient descent
*/
function updateEmbeddings(graph, embeddings, clusters, learningRate, lambda) {
const gradients = new Map();
// Initialize gradients
for (const [nodeId, embedding] of embeddings) {
gradients.set(nodeId, new Array(embedding.length).fill(0));
}
// Calculate gradients based on graph structure (simplified)
for (const node of graph.nodes()) {
const nodeId = node.id;
const nodeEmbedding = embeddings.get(nodeId);
if (!nodeEmbedding) {
continue;
}
const gradient = gradients.get(nodeId);
if (!gradient) {
continue;
}
// Neighbor reconstruction loss gradient
for (const neighborId of graph.neighbors(nodeId)) {
const neighborEmbedding = embeddings.get(neighborId);
if (!neighborEmbedding) {
continue;
}
const diff = nodeEmbedding.map((val, i) => val - (neighborEmbedding[i] ?? 0));
for (let i = 0; i < gradient.length; i++) {
const gradVal = gradient[i];
const diffVal = diff[i];
if (gradVal !== undefined && diffVal !== undefined) {
gradient[i] = gradVal + (lambda * diffVal);
}
}
}
// Regularization gradient
for (let i = 0; i < gradient.length; i++) {
const gradVal = gradient[i];
const nodeVal = nodeEmbedding[i];
if (gradVal !== undefined && nodeVal !== undefined) {
gradient[i] = gradVal + (lambda * nodeVal);
}
}
}
// Update embeddings
for (const [nodeId, embedding] of embeddings) {
const gradient = gradients.get(nodeId);
if (!gradient) {
continue;
}
for (let i = 0; i < embedding.length; i++) {
const embVal = embedding[i];
const gradVal = gradient[i];
if (embVal !== undefined && gradVal !== undefined) {
embedding[i] = embVal - (learningRate * gradVal);
}
}
}
}
/**
* Update cluster centers based on current assignments
*/
function updateClusterCenters(embeddings, clusters, clusterCenters, numClusters) {
const dimensions = clusterCenters[0]?.length ?? 0;
const clusterSums = Array.from({ length: numClusters }, () => new Array(dimensions).fill(0));
const clusterCounts = new Array(numClusters).fill(0);
// Sum embeddings for each cluster
for (const [nodeId, clusterIdx] of clusters) {
const embedding = embeddings.get(nodeId);
if (!embedding) {
continue;
}
for (let i = 0; i < embedding.length; i++) {
const clusterSum = clusterSums[clusterIdx];
if (!clusterSum) {
continue;
}
const sumVal = clusterSum[i];
const embVal = embedding[i];
if (sumVal !== undefined && embVal !== undefined) {
clusterSum[i] = sumVal + embVal;
}
}
if (clusterCounts[clusterIdx] !== undefined) {
clusterCounts[clusterIdx]++;
}
}
// Update cluster centers (average of assigned embeddings)
for (let k = 0; k < numClusters; k++) {
const count = clusterCounts[k];
const center = clusterCenters[k];
const sum = clusterSums[k];
if (count !== undefined && count > 0 && center && sum) {
for (let i = 0; i < center.length; i++) {
const sumVal = sum[i];
if (sumVal !== undefined) {
center[i] = sumVal / count;
}
}
}
}
}
/**
* Calculate the total loss function
*/
function calculateLoss(graph, embeddings, clusters, clusterCenters, lambda) {
let clusteringLoss = 0;
let reconstructionLoss = 0;
let regularizationLoss = 0;
// Clustering loss (distance to cluster centers)
for (const [nodeId, clusterIdx] of clusters) {
const embedding = embeddings.get(nodeId);
if (!embedding) {
continue;
}
const center = clusterCenters[clusterIdx];
if (!center) {
continue;
}
clusteringLoss += euclideanDistance(embedding, center) ** 2;
}
// Graph reconstruction loss
for (const node of graph.nodes()) {
const nodeId = node.id;
const nodeEmbedding = embeddings.get(nodeId);
if (!nodeEmbedding) {
continue;
}
for (const neighborId of graph.neighbors(nodeId)) {
const neighborEmbedding = embeddings.get(neighborId);
if (!neighborEmbedding) {
continue;
}
const distance = euclideanDistance(nodeEmbedding, neighborEmbedding);
reconstructionLoss += distance ** 2;
}
}
// Regularization loss
for (const embedding of embeddings.values()) {
for (const value of embedding) {
regularizationLoss += value ** 2;
}
}
return clusteringLoss + (lambda * reconstructionLoss) + (lambda * regularizationLoss);
}
/**
* Calculate Euclidean distance between two vectors
*/
function euclideanDistance(a, b) {
let sum = 0;
for (let i = 0; i < a.length; i++) {
const aVal = a[i];
const bVal = b[i];
if (aVal !== undefined && bVal !== undefined) {
const diff = aVal - bVal;
sum += diff * diff;
}
}
return Math.sqrt(sum);
}
/**
* Simple seeded random number generator for reproducibility
*/
function seedRandom(seed) {
const m = 0x80000000; // 2**31
const a = 1103515245;
const c = 12345;
seed = seed % m;
return function () {
seed = ((a * seed) + c) % m;
return seed / (m - 1);
};
}
//# sourceMappingURL=sync.js.map