UNPKG

jhcluster

Version:

Hierarchical clustering algorithm

376 lines (311 loc) 10.9 kB
function _typeof(obj) { "@babel/helpers - typeof"; if (typeof Symbol === "function" && typeof Symbol.iterator === "symbol") { _typeof = function _typeof(obj) { return typeof obj; }; } else { _typeof = function _typeof(obj) { return obj && typeof Symbol === "function" && obj.constructor === Symbol && obj !== Symbol.prototype ? "symbol" : typeof obj; }; } return _typeof(obj); } /* Author: Corneliu S. (github.com/upphiminn) 2013 Code style is very imperative, I know :) */ (function (root, factory) { if (typeof define === 'function' && define.amd) { // AMD. Register as an anonymous module. define([], factory); } else if ((typeof module === "undefined" ? "undefined" : _typeof(module)) === 'object' && module.exports) { // Node. Does not work with strict CommonJS, but // only CommonJS-like environments that support module.exports, // like Node. module.exports = factory(); } else { // Browser globals (root is window) root.returnExports = factory(); } })(typeof self !== 'undefined' ? self : this, function () { var jHC = function jHC() { //Local lets var point_data; var distance_function; var linkage; var clusters = []; var leaf_nodes = {}; var point_cluster_assignment = []; var point_distance_matrix = []; var cluster_distance_matrix = {}; var link_distance = {}; //Distance Functions function euclidean_distance(a, b) { var d = 0; for (var i = 0; i < a.length; i++) { d += Math.pow(a[i] - b[i], 2); } return Math.sqrt(d); } function mannhattan_distance(a, b) { var d = 0; for (var i = 0; i < a.length; i++) { d += Math.abs(a[i] - b[i]); } return Math.sqrt(d); } function haversine_distance(a, b) { var precision = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : 4; var R = 6371; var lat1 = a[1] * Math.PI / 180; var lon1 = a[0] * Math.PI / 180; var lat2 = b[1] * Math.PI / 180; var lon2 = b[0] * Math.PI / 180; var dLat = lat2 - lat1; var dLon = lon2 - lon1; a = Math.sin(dLat / 2) * Math.sin(dLat / 2) + Math.cos(lat1) * Math.cos(lat2) * Math.sin(dLon / 2) * Math.sin(dLon / 2); var c = 2 * Math.atan2(Math.sqrt(a), Math.sqrt(1 - a)); var d = R * c; return d.toPrecision(precision); } // Core Algorithm Related. function init() { point_data.forEach(function (d, i) { clusters[i] = { name: i, // interal id coordinates: point_data[i], size: 1 }; leaf_nodes[clusters[i].name] = clusters[i]; point_cluster_assignment[i] = i; }); } function update_point_cluster_assignment(new_id, c1_id, c2_id) { for (var i = 0; i < point_cluster_assignment.length; i++) { if (point_cluster_assignment[i] == c1_id || point_cluster_assignment[i] == c2_id) { point_cluster_assignment[i] = new_id; } } } function compute_centroid(cluster_name) { var cluster_coordinates = []; var num_points = 0; for (var i = 0; i < point_cluster_assignment.length; i++) { if (point_cluster_assignment[i] == cluster_name) { for (var j = 0; j < point_data[i].length; j++) { cluster_coordinates[j] = (cluster_coordinates[j] || 0) + point_data[i][j]; } num_points++; } } for (var _j = 0; _j < cluster_coordinates.length; _j++) { cluster_coordinates[_j] /= num_points; } return cluster_coordinates; } function update_next_link_index(c1, c2) { var link_distance_keys = Object.keys(link_distance); link_distance_keys.forEach(function (k) { if (link_distance[k] == c1 || link_distance[k] == c2) { link_distance[k] = undefined; } }); for (var i = 0; i < clusters.length; i++) { if (typeof clusters[i] === 'undefined') { continue; } var cl_1 = clusters[i].name; for (var j = 0; j < clusters.length; j++) { if (typeof clusters[j] === 'undefined') { continue; } var cl_2 = clusters[j].name; if (cl_1 == cl_2) { continue; } if (link_distance[i] == undefined) { link_distance[cl_1] = cl_2; } else if (cluster_distance_matrix[cl_1][link_distance[cl_1]] > cluster_distance_matrix[cl_1][cl_2]) { link_distance[cl_1] = cl_2; } } } } function cluster_link_distance(c1, c2) { var c1_points = []; var c2_points = []; point_cluster_assignment.forEach(function (d, i) { if (d == c1.name) { c1_points.push(i); } else if (d == c2.name) { c2_points.push(i); } }); if (linkage === 'SINGLE') { var min = Infinity; for (var i = 0; i < c1_points.length; i++) { for (var j = 0; j < c2_points.length; j++) { var d = distance(point_data[c1_points[i]], point_data[c2_points[j]]); if (d < min) { min = d; } } } return min; } else if (linkage === 'COMPLETE') { var max = 0; for (var _i = 0; _i < c1_points.length; _i++) { for (var _j2 = 0; _j2 < c2_points.length; _j2++) { var _d = distance(point_data[c1_points[_i]], point_data[c2_points[_j2]]); if (_d > max) { max = _d; } } } return max; } else if (linkage === 'AVERAGE') { var average = 0; for (var _i2 = 0; _i2 < c1_points.length; _i2++) { for (var _j3 = 0; _j3 < c2_points.length; _j3++) { var _d2 = distance(point_data[c1_points[_i2]], point_data[c2_points[_j3]]); average = average + +_d2; } average /= c1_points.length * c2_points.length; } return average; } } function update_distance_matrix(c_id) { if (arguments.length === 0) { // intialization for (var i = 0; i < clusters.length; i++) { for (var j = 0; j <= i; j++) { var d = Infinity; if (i != j) d = distance(clusters[i].coordinates, clusters[j].coordinates); point_distance_matrix[i] = point_distance_matrix[i] || []; point_distance_matrix[j] = point_distance_matrix[j] || []; point_distance_matrix[i][j] = d; point_distance_matrix[j][i] = d; } } point_distance_matrix.forEach(function (line, i) { cluster_distance_matrix[i] = {}; var min_link_distance = Infinity; line.forEach(function (element, j) { cluster_distance_matrix[i][j] = element; if (min_link_distance > element) { link_distance[i] = j; min_link_distance = element; } }); }); } else { // update for (var _i3 = 0; _i3 < clusters.length; _i3++) { var _d3 = Infinity; if (typeof clusters[_i3] === 'undefined') { continue; } if (clusters[_i3].name != c_id) { _d3 = cluster_link_distance(clusters[_i3], clusters[c_id]); } cluster_distance_matrix[c_id] = cluster_distance_matrix[c_id] || {}; cluster_distance_matrix[c_id][clusters[_i3].name] = _d3; cluster_distance_matrix[clusters[_i3].name][c_id] = _d3; } } } function get_closest_clusters() { var min = Infinity; var c1_id; var c2_id; for (var i = 0; i < clusters.length; i++) { if (typeof clusters[i] === 'undefined') { continue; } var cluster_id = clusters[i].name; if (cluster_distance_matrix[cluster_id][link_distance[cluster_id]] < min) { min = cluster_distance_matrix[cluster_id][link_distance[cluster_id]]; c2_id = link_distance[cluster_id]; c1_id = cluster_id; } } return [c1_id, c2_id, min]; } function remove_clusters(c1, c2) { delete clusters[c1]; delete clusters[c2]; delete link_distance[c1]; delete link_distance[c2]; delete cluster_distance_matrix[c1]; delete cluster_distance_matrix[c2]; var keys = Object.keys(cluster_distance_matrix); for (var i = 0; i < keys.length; i++) { if (typeof cluster_distance_matrix[i] !== 'undefined') { delete cluster_distance_matrix[i][c1]; delete cluster_distance_matrix[i][c2]; } } } function merge_clusters() { var to_merge = get_closest_clusters(); var new_cluster = { name: clusters.length, size: clusters[to_merge[0]].size + clusters[to_merge[1]].size, children: [clusters[to_merge[0]], clusters[to_merge[1]]] }; clusters[to_merge[0]].parent = new_cluster; clusters[to_merge[1]].parent = new_cluster; remove_clusters(to_merge[0], to_merge[1]); update_point_cluster_assignment(new_cluster.name, to_merge[0], to_merge[1]); var coordinates = compute_centroid(new_cluster.name); new_cluster.coordinates = coordinates; clusters.push(new_cluster); update_distance_matrix(new_cluster.name); update_next_link_index(to_merge[0], to_merge[1]); return new_cluster; } var core = function core() { // main algorithm loop var num_clusters = point_data.length; init(); update_distance_matrix(); update_next_link_index(); while (num_clusters > 1) { merge_clusters(); num_clusters--; } return clusters; }; core.leafNodes = function () { return leaf_nodes; }; core.distance = function (fn) { if (arguments.length == 1) { if (typeof fn === 'string') { switch (fn) { case 'HAVERSINE': distance = haversine_distance; break; case 'EUCLIDEAN': distance = euclidean_distance; break; case 'MANHATTAN': distance = manhattan_distance; break; default: distance = euclidean_distance; } } else if (typeof fn === 'function') { distance = fn; } } return core; }; core.linkage = function (l) { if (typeof l === 'string') { if (l === 'AVERAGE' || l === 'COMPLETE' || l === 'SINGLE') linkage = l; } return core; }; core.data = function (data) { if (_typeof(data) != undefined) { point_data = data; } return core; }; return core; }; return jHC; }); //# sourceMappingURL=jHC.js.map