UNPKG

@sgratzl/science

Version:

Scientific and statistical computing in JavaScript.

125 lines (109 loc) 3.56 kB
import {euclidean} from './distance'; export default function hcluster() { var distance = euclidean, linkage = "single"; // single, complete or average function hcluster(vectors) { var n = vectors.length, dMin = [], cSize = [], distMatrix = [], clusters = [], c1, c2, c1Cluster, c2Cluster, p, root, i, j; // Initialise distance matrix and vector of closest clusters. i = -1; while (++i < n) { dMin[i] = 0; distMatrix[i] = []; j = -1; while (++j < n) { distMatrix[i][j] = i === j ? Infinity : distance(vectors[i] , vectors[j]); if (distMatrix[i][dMin[i]] > distMatrix[i][j]) dMin[i] = j; } } // create leaves of the tree i = -1; while (++i < n) { clusters[i] = []; clusters[i][0] = { left: null, right: null, dist: 0, centroid: vectors[i], size: 1, depth: 0 }; cSize[i] = 1; } // Main loop for (p = 0; p < n-1; p++) { // find the closest pair of clusters c1 = 0; for (i = 0; i < n; i++) { if (distMatrix[i][dMin[i]] < distMatrix[c1][dMin[c1]]) c1 = i; } c2 = dMin[c1]; // create node to store cluster info c1Cluster = clusters[c1][0]; c2Cluster = clusters[c2][0]; var newCluster = { left: c1Cluster, right: c2Cluster, dist: distMatrix[c1][c2], centroid: calculateCentroid(c1Cluster.size, c1Cluster.centroid, c2Cluster.size, c2Cluster.centroid), size: c1Cluster.size + c2Cluster.size, depth: 1 + Math.max(c1Cluster.depth, c2Cluster.depth) }; clusters[c1].splice(0, 0, newCluster); cSize[c1] += cSize[c2]; // overwrite row c1 with respect to the linkage type for (j = 0; j < n; j++) { switch (linkage) { case "single": if (distMatrix[c1][j] > distMatrix[c2][j]) distMatrix[j][c1] = distMatrix[c1][j] = distMatrix[c2][j]; break; case "complete": if (distMatrix[c1][j] < distMatrix[c2][j]) distMatrix[j][c1] = distMatrix[c1][j] = distMatrix[c2][j]; break; case "average": distMatrix[j][c1] = distMatrix[c1][j] = (cSize[c1] * distMatrix[c1][j] + cSize[c2] * distMatrix[c2][j]) / (cSize[c1] + cSize[j]); break; } } distMatrix[c1][c1] = Infinity; // infinity ­out old row c2 and column c2 for (i = 0; i < n; i++) distMatrix[i][c2] = distMatrix[c2][i] = Infinity; // update dmin and replace ones that previous pointed to c2 to point to c1 for (j = 0; j < n; j++) { if (dMin[j] == c2) dMin[j] = c1; if (distMatrix[c1][j] < distMatrix[c1][dMin[c1]]) dMin[c1] = j; } // keep track of the last added cluster root = newCluster; } return root; } hcluster.distance = function(x) { if (!arguments.length) return distance; distance = x; return hcluster; }; return hcluster; }; function calculateCentroid(c1Size, c1Centroid, c2Size, c2Centroid) { var newCentroid = [], newSize = c1Size + c2Size, n = c1Centroid.length, i = -1; while (++i < n) { newCentroid[i] = (c1Size * c1Centroid[i] + c2Size * c2Centroid[i]) / newSize; } return newCentroid; }