@graphty/algorithms
Version:
Graph algorithms library for browser environments implemented in TypeScript
697 lines (604 loc) • 23.9 kB
text/typescript
import type {Graph} from "../core/graph.js";
import type {NodeId} from "../types/index.js";
import {euclideanDistance} from "../utils/math-utilities.js";
/**
* Spectral Clustering implementation
*
* Uses eigenvalues and eigenvectors of the graph Laplacian matrix to perform clustering.
* Particularly effective for finding non-convex clusters and communities in graphs.
*
* Time complexity: O(V³) for eigendecomposition
* Space complexity: O(V²)
*/
export interface SpectralClusteringOptions {
k: number; // Number of clusters to find
laplacianType?: "unnormalized" | "normalized" | "randomWalk"; // Type of Laplacian
maxIterations?: number; // Max iterations for k-means (default: 100)
tolerance?: number; // Convergence tolerance (default: 1e-4)
}
export interface SpectralClusteringResult {
communities: NodeId[][];
clusterAssignments: Map<NodeId, number>;
eigenvalues?: number[];
eigenvectors?: number[][];
}
/**
* Perform spectral clustering on a graph
*
* @warning This implementation uses simplified power iteration for eigenvector
* computation with approximate eigenvalues. For production use cases requiring
* precise clustering, consider using a proper linear algebra library like ml-matrix.
*
* The approximate eigenvalues (0.1, 0.2 for second and third eigenvectors) work
* well for most graph structures but may produce suboptimal results for graphs
* with unusual spectral properties.
*/
export function spectralClustering(
graph: Graph,
options: SpectralClusteringOptions,
): SpectralClusteringResult {
const {
k,
laplacianType = "normalized",
maxIterations = 100,
tolerance = 1e-4,
} = options;
// Input validation
if (k < 1 || !Number.isInteger(k)) {
throw new Error("k must be a positive integer");
}
const nodes = Array.from(graph.nodes());
const nodeIds = nodes.map((node) => node.id);
const n = nodeIds.length;
if (k >= n) {
// Return each node as its own cluster
const communities: NodeId[][] = nodeIds.map((id) => [id]);
const clusterAssignments = new Map<NodeId, number>();
nodeIds.forEach((id, index) => clusterAssignments.set(id, index));
return {communities, clusterAssignments};
}
// Build adjacency matrix
const adjacencyMatrix = buildAdjacencyMatrix(graph, nodeIds);
// Build Laplacian matrix
const laplacianMatrix = buildLaplacianMatrix(adjacencyMatrix, laplacianType);
// Find k smallest eigenvectors
const eigenResult = findSmallestEigenvectors(laplacianMatrix, k);
// Perform k-means clustering on the eigenvectors
// For spectral clustering, we need to transpose the eigenvector matrix
// Each row should be a data point (node) with features from the eigenvectors
const dataPoints: number[][] = [];
for (let i = 0; i < nodeIds.length; i++) {
const point: number[] = [];
for (let j = 0; j < k; j++) {
const eigenvector = eigenResult.eigenvectors[j];
if (eigenvector) {
point.push(eigenvector[i] ?? 0);
}
}
dataPoints.push(point);
}
// Normalize the data points row-wise (for normalized spectral clustering)
if (laplacianType === "normalized") {
normalizeRows(dataPoints);
}
const kmeans = kMeansClustering(dataPoints, k, maxIterations, tolerance);
// Build communities
const communities: NodeId[][] = Array.from({length: k}, () => []);
const clusterAssignments = new Map<NodeId, number>();
for (let i = 0; i < nodeIds.length; i++) {
const clusterId = kmeans.assignments[i] ?? 0;
const nodeId = nodeIds[i];
if (!nodeId) {
continue;
}
// Ensure clusterId is valid
if (clusterId >= 0 && clusterId < k) {
const community = communities[clusterId];
if (community) {
community.push(nodeId);
}
clusterAssignments.set(nodeId, clusterId);
} else {
// Assign to first cluster if invalid
const firstCommunity = communities[0];
if (firstCommunity) {
firstCommunity.push(nodeId);
}
clusterAssignments.set(nodeId, 0);
}
}
// Filter out empty communities
const nonEmptyCommunities = communities.filter((community) => community.length > 0);
return {
communities: nonEmptyCommunities,
clusterAssignments,
eigenvalues: eigenResult.eigenvalues,
eigenvectors: eigenResult.eigenvectors,
};
}
/**
* Build adjacency matrix from graph
*/
function buildAdjacencyMatrix(graph: Graph, nodeIds: NodeId[]): number[][] {
const n = nodeIds.length;
const matrix: number[][] = Array.from({length: n}, () => Array(n).fill(0) as number[]);
const nodeToIndex = new Map<NodeId, number>();
nodeIds.forEach((id, index) => nodeToIndex.set(id, index));
for (let i = 0; i < n; i++) {
const nodeId = nodeIds[i];
if (!nodeId) {
continue;
}
const neighbors = graph.neighbors(nodeId);
for (const neighbor of neighbors) {
const j = nodeToIndex.get(neighbor);
if (j !== undefined) {
const edge = graph.getEdge(nodeId, neighbor);
const weight = edge?.weight ?? 1;
const matrixRow = matrix[i];
if (matrixRow) {
matrixRow[j] = weight;
}
if (!graph.isDirected) {
const matrixRowJ = matrix[j];
if (matrixRowJ) {
matrixRowJ[i] = weight;
}
}
}
}
}
return matrix;
}
/**
* Build Laplacian matrix from adjacency matrix
*/
function buildLaplacianMatrix(adjacency: number[][], type: string): number[][] {
const n = adjacency.length;
const laplacian: number[][] = Array.from({length: n}, () => Array(n).fill(0) as number[]);
// Calculate degree matrix
const degrees = Array(n).fill(0) as number[];
for (let i = 0; i < n; i++) {
for (let j = 0; j < n; j++) {
const adjacencyRow = adjacency[i];
const adjacencyVal = adjacencyRow ? adjacencyRow[j] : 0;
if (adjacencyVal !== undefined) {
const degreeVal = degrees[i];
if (degreeVal !== undefined) {
degrees[i] = degreeVal + adjacencyVal;
}
}
}
}
if (type === "unnormalized") {
// L = D - A
for (let i = 0; i < n; i++) {
for (let j = 0; j < n; j++) {
if (i === j) {
const laplacianRow = laplacian[i];
const degreeVal = degrees[i];
if (laplacianRow && degreeVal !== undefined) {
laplacianRow[j] = degreeVal;
}
} else {
const adjacencyRow = adjacency[i];
const laplacianRow = laplacian[i];
if (adjacencyRow && laplacianRow) {
const adjacencyVal = adjacencyRow[j];
laplacianRow[j] = adjacencyVal !== undefined ? -adjacencyVal : 0;
}
}
}
}
} else if (type === "normalized") {
// L_sym = D^(-1/2) * L * D^(-1/2)
for (let i = 0; i < n; i++) {
for (let j = 0; j < n; j++) {
if (i === j) {
const di = degrees[i];
const laplacianRow = laplacian[i];
if (laplacianRow) {
laplacianRow[j] = di !== undefined && di > 0 ? 1 : 0;
}
} else {
const adjacencyRow = adjacency[i];
if (!adjacencyRow) {
continue;
}
const adjacencyVal = adjacencyRow[j];
const di = degrees[i];
const dj = degrees[j];
if (adjacencyVal !== undefined && adjacencyVal > 0 && di !== undefined && dj !== undefined && di > 0 && dj > 0) {
const laplacianRow = laplacian[i];
if (laplacianRow) {
laplacianRow[j] = -adjacencyVal / Math.sqrt(di * dj);
}
}
}
}
}
} else if (type === "randomWalk") {
// L_rw = D^(-1) * L
for (let i = 0; i < n; i++) {
for (let j = 0; j < n; j++) {
if (i === j) {
const di = degrees[i];
const laplacianRow = laplacian[i];
if (laplacianRow) {
laplacianRow[j] = di !== undefined && di > 0 ? 1 : 0;
}
} else {
const adjacencyRow = adjacency[i];
if (!adjacencyRow) {
continue;
}
const adjacencyVal = adjacencyRow[j];
const di = degrees[i];
if (adjacencyVal !== undefined && adjacencyVal > 0 && di !== undefined && di > 0) {
const laplacianRow = laplacian[i];
if (laplacianRow) {
laplacianRow[j] = -adjacencyVal / di;
}
}
}
}
}
}
return laplacian;
}
/**
* Find k smallest eigenvectors using simplified eigendecomposition
* This is a simplified implementation - in practice, you'd use LAPACK or similar
*/
function findSmallestEigenvectors(matrix: number[][], k: number): {
eigenvalues: number[];
eigenvectors: number[][];
} {
const n = matrix.length;
if (n === 0 || k === 0) {
return {eigenvalues: [], eigenvectors: []};
}
// For very small matrices, use the full power iteration approach
// Remove the simplified approach that was causing issues
if (k >= n) {
// If k >= n, we still need proper eigenvectors, not identity
// Fall through to the power iteration below
}
// For spectral clustering, we need proper eigenvectors
// Special handling for small k values which are common in clustering
if (k <= 3 && n > k) {
return computeSmallestEigenvectorsSimple(matrix, k, n);
}
// For larger k, use power iteration
const eigenvectors: number[][] = [];
const eigenvalues: number[] = [];
const maxIterations = 100;
for (let eigIdx = 0; eigIdx < k; eigIdx++) {
// Initialize random vector
let vector = Array(n).fill(0).map(() => Math.random() - 0.5);
// Normalize initial vector
const initNorm = Math.sqrt(vector.reduce((sum, val) => sum + (val * val), 0));
if (initNorm > 0) {
vector = vector.map((val) => val / initNorm);
}
// Orthogonalize against previous eigenvectors
for (let j = 0; j < eigIdx; j++) {
const ejVector = eigenvectors[j];
if (!ejVector) {
continue;
}
const dot = vector.reduce((sum, val, idx) => sum + (val * (ejVector[idx] ?? 0)), 0);
vector = vector.map((val, idx) => val - (dot * (ejVector[idx] ?? 0)));
}
// Power iteration
for (let iter = 0; iter < maxIterations; iter++) {
// Multiply by matrix
const newVector = Array(n).fill(0) as number[];
for (let i = 0; i < n; i++) {
for (let j = 0; j < n; j++) {
const matrixRow = matrix[i];
const vecVal = vector[j];
const matrixVal = matrixRow?.[j];
if (matrixVal !== undefined && vecVal !== undefined) {
const nvVal = newVector[i];
if (nvVal !== undefined) {
newVector[i] = nvVal + (matrixVal * vecVal);
}
}
}
}
// Orthogonalize against previous eigenvectors
for (let j = 0; j < eigIdx; j++) {
const ejVector = eigenvectors[j];
if (!ejVector) {
continue;
}
const dot = newVector.reduce((sum, val, idx) => sum + (val * (ejVector[idx] ?? 0)), 0);
for (let i = 0; i < n; i++) {
const ejVal = ejVector[i];
if (ejVal !== undefined) {
const nvVal = newVector[i];
if (nvVal !== undefined) {
newVector[i] = nvVal - (dot * ejVal);
}
}
}
}
// Normalize
const norm = Math.sqrt(newVector.reduce((sum, val) => sum + ((val * val)), 0));
if (norm > 1e-10) {
vector = newVector.map((val) => val / norm);
} else {
break;
}
}
// Calculate eigenvalue (Rayleigh quotient)
let eigenvalue = 0;
const Av = Array(n).fill(0) as number[];
for (let i = 0; i < n; i++) {
for (let j = 0; j < n; j++) {
const matrixRow = matrix[i];
const vecVal = vector[j];
const matrixVal = matrixRow?.[j];
if (matrixVal !== undefined && vecVal !== undefined) {
const avVal = Av[i];
if (avVal !== undefined) {
Av[i] = avVal + (matrixVal * vecVal);
}
}
}
}
eigenvalue = vector.reduce((sum, val, idx) => {
const avVal = Av[idx];
return sum + (val * (avVal ?? 0));
}, 0);
eigenvectors.push(vector);
eigenvalues.push(eigenvalue);
}
return {eigenvalues, eigenvectors};
}
/**
* Normalize matrix rows to unit length
*/
function normalizeRows(matrix: number[][]): void {
for (const row of matrix) {
const norm = Math.sqrt(row.reduce((sum, val) => sum + (val * val), 0));
if (norm > 0) {
for (let j = 0; j < row.length; j++) {
const val = row[j];
if (val !== undefined) {
row[j] = val / norm;
}
}
}
}
}
/**
* K-means clustering algorithm
*/
function kMeansClustering(
data: number[][],
k: number,
maxIterations: number,
tolerance = 1e-4,
): {assignments: number[], centroids: number[][]} {
const n = data.length;
const d = data[0]?.length ?? 0;
// Handle edge cases
if (n === 0 || k === 0) {
return {assignments: [], centroids: []};
}
if (k >= n) {
// Each point is its own cluster
return {
assignments: Array.from({length: n}, (_, i) => i),
centroids: data.slice(0, n),
};
}
// Initialize centroids by selecting random data points
const centroids: number[][] = [];
const selectedIndices = new Set<number>();
while (centroids.length < k && selectedIndices.size < n) {
const idx = Math.floor(Math.random() * n);
if (!selectedIndices.has(idx) && data[idx]) {
selectedIndices.add(idx);
centroids.push([... data[idx]]);
}
}
// Fill remaining centroids with random values if needed
while (centroids.length < k) {
const centroid = Array(d).fill(0) as number[];
for (let j = 0; j < d; j++) {
centroid[j] = Math.random() - 0.5;
}
centroids.push(centroid);
}
const assignments = Array(n).fill(0) as number[];
let oldAssignments = Array(n).fill(-1) as number[];
for (let iteration = 0; iteration < maxIterations; iteration++) {
// Assign points to closest centroids
for (let i = 0; i < n; i++) {
let minDistance = Number.POSITIVE_INFINITY;
let bestCluster = 0;
for (let j = 0; j < k; j++) {
const dataPoint = data[i];
const centroid = centroids[j];
if (!dataPoint || !centroid) {
continue;
}
const distance = euclideanDistance(dataPoint, centroid);
if (distance < minDistance) {
minDistance = distance;
bestCluster = j;
}
}
assignments[i] = bestCluster;
}
// Check for convergence based on assignment changes
let assignmentsChanged = false;
for (let i = 0; i < n; i++) {
if (assignments[i] !== oldAssignments[i]) {
assignmentsChanged = true;
break;
}
}
if (!assignmentsChanged) {
break;
}
oldAssignments = [... assignments];
// Store old centroids for tolerance-based convergence check
const oldCentroids = centroids.map((c) => [... c]);
// Update centroids
const counts = Array(k).fill(0) as number[];
const sums: number[][] = Array.from({length: k}, () => Array(d).fill(0) as number[]);
for (let i = 0; i < n; i++) {
const cluster = assignments[i] ?? 0;
const dataPoint = data[i];
if (!dataPoint) {
continue;
}
const sumsCluster = sums[cluster];
if (sumsCluster !== undefined) {
const countVal = counts[cluster];
if (countVal !== undefined) {
counts[cluster] = countVal + 1;
}
for (let j = 0; j < d; j++) {
const dpVal = dataPoint[j];
if (dpVal !== undefined) {
const sumVal = sumsCluster[j];
if (sumVal !== undefined) {
sumsCluster[j] = sumVal + dpVal;
}
}
}
}
}
for (let i = 0; i < k; i++) {
const countVal = counts[i];
if (countVal !== undefined && countVal > 0) {
const sumsRow = sums[i];
const centroidsRow = centroids[i];
if (sumsRow !== undefined && centroidsRow !== undefined) {
for (let j = 0; j < d; j++) {
const sumVal = sumsRow[j];
if (sumVal !== undefined) {
const countValInner = counts[i];
if (countValInner !== undefined) {
centroidsRow[j] = sumVal / countValInner;
}
}
}
}
}
}
// Check for tolerance-based convergence (centroid movement)
let maxCentroidShift = 0;
for (let i = 0; i < k; i++) {
const oldCentroid = oldCentroids[i];
const newCentroid = centroids[i];
if (oldCentroid && newCentroid) {
const shift = euclideanDistance(oldCentroid, newCentroid);
if (shift > maxCentroidShift) {
maxCentroidShift = shift;
}
}
}
if (maxCentroidShift < tolerance) {
break;
}
}
return {assignments, centroids};
}
/**
* Compute smallest eigenvectors for small k (optimized for k=2, k=3)
*/
function computeSmallestEigenvectorsSimple(matrix: number[][], k: number, n: number): {
eigenvalues: number[];
eigenvectors: number[][];
} {
const eigenvectors: number[][] = [];
const eigenvalues: number[] = [];
// First eigenvector is constant (corresponds to eigenvalue 0 for connected graph)
const firstVector = Array(n).fill(1 / Math.sqrt(n)) as number[];
eigenvectors.push(firstVector);
eigenvalues.push(0);
// For k >= 2, compute the Fiedler vector (second smallest eigenvector)
if (k >= 2) {
// Use power iteration on I - L/lambda_max to find second smallest
const maxEig = 2; // For normalized Laplacian, max eigenvalue <= 2
let vector = Array(n).fill(0).map(() => Math.random() - 0.5);
// Make orthogonal to first eigenvector
const dot1 = vector.reduce((sum, val) => sum + (val / Math.sqrt(n)), 0);
vector = vector.map((val) => val - (dot1 / Math.sqrt(n)));
// Power iteration on shifted matrix
for (let iter = 0; iter < 100; iter++) {
// Compute (I - L/maxEig) * v
const newVector = Array(n).fill(0) as number[];
// Identity part
for (let i = 0; i < n; i++) {
newVector[i] = vector[i] ?? 0;
}
// Subtract L * v / maxEig
for (let i = 0; i < n; i++) {
for (let j = 0; j < n; j++) {
const matrixVal = matrix[i]?.[j] ?? 0;
const vecVal = vector[j] ?? 0;
newVector[i] = (newVector[i] ?? 0) - ((matrixVal * vecVal) / maxEig);
}
}
// Orthogonalize against first eigenvector
const dot = newVector.reduce((sum, val) => sum + (val / Math.sqrt(n)), 0);
for (let i = 0; i < n; i++) {
newVector[i] = (newVector[i] ?? 0) - (dot / Math.sqrt(n));
}
// Normalize
const norm = Math.sqrt(newVector.reduce((sum, val) => sum + (val * val), 0));
if (norm > 1e-10) {
vector = newVector.map((val) => val / norm);
}
}
eigenvectors.push(vector);
eigenvalues.push(0.1); // Approximate
}
// For k = 3, add another eigenvector
if (k >= 3) {
let vector = Array(n).fill(0).map(() => Math.random() - 0.5);
// Orthogonalize against previous eigenvectors
for (const prev of eigenvectors) {
const dot = vector.reduce((sum, val, idx) => sum + (val * (prev[idx] ?? 0)), 0);
vector = vector.map((val, idx) => val - (dot * (prev[idx] ?? 0)));
}
// Similar power iteration
for (let iter = 0; iter < 50; iter++) {
const newVector = Array(n).fill(0) as number[];
// Identity part
for (let i = 0; i < n; i++) {
newVector[i] = vector[i] ?? 0;
}
// Subtract L * v / 2
for (let i = 0; i < n; i++) {
for (let j = 0; j < n; j++) {
const matrixVal = matrix[i]?.[j] ?? 0;
const vecVal = vector[j] ?? 0;
newVector[i] = (newVector[i] ?? 0) - ((matrixVal * vecVal) / 2);
}
}
// Orthogonalize
for (const prev of eigenvectors) {
const dot = newVector.reduce((sum, val, idx) => sum + (val * (prev[idx] ?? 0)), 0);
for (let i = 0; i < n; i++) {
newVector[i] = (newVector[i] ?? 0) - (dot * (prev[i] ?? 0));
}
}
// Normalize
const norm = Math.sqrt(newVector.reduce((sum, val) => sum + (val * val), 0));
if (norm > 1e-10) {
vector = newVector.map((val) => val / norm);
}
}
eigenvectors.push(vector);
eigenvalues.push(0.2); // Approximate
}
return {eigenvalues: eigenvalues.slice(0, k), eigenvectors: eigenvectors.slice(0, k)};
}