UNPKG

@jsmlt/jsmlt

Version:

JavaScript Machine Learning

243 lines (191 loc) 10 kB
'use strict'; Object.defineProperty(exports, "__esModule", { value: true }); var _extends = Object.assign || function (target) { for (var i = 1; i < arguments.length; i++) { var source = arguments[i]; for (var key in source) { if (Object.prototype.hasOwnProperty.call(source, key)) { target[key] = source[key]; } } } return target; }; var _createClass = function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; }(); var _base = require('../base'); var _base2 = _interopRequireDefault(_base); var _arrays = require('../../arrays'); var Arrays = _interopRequireWildcard(_arrays); var _random = require('../../random'); var Random = _interopRequireWildcard(_random); function _interopRequireWildcard(obj) { if (obj && obj.__esModule) { return obj; } else { var newObj = {}; if (obj != null) { for (var key in obj) { if (Object.prototype.hasOwnProperty.call(obj, key)) newObj[key] = obj[key]; } } newObj.default = obj; return newObj; } } function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; } function _toConsumableArray(arr) { if (Array.isArray(arr)) { for (var i = 0, arr2 = Array(arr.length); i < arr.length; i++) { arr2[i] = arr[i]; } return arr2; } else { return Array.from(arr); } } function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } } function _possibleConstructorReturn(self, call) { if (!self) { throw new ReferenceError("this hasn't been initialised - super() hasn't been called"); } return call && (typeof call === "object" || typeof call === "function") ? call : self; } function _inherits(subClass, superClass) { if (typeof superClass !== "function" && superClass !== null) { throw new TypeError("Super expression must either be null or a function, not " + typeof superClass); } subClass.prototype = Object.create(superClass && superClass.prototype, { constructor: { value: subClass, enumerable: false, writable: true, configurable: true } }); if (superClass) Object.setPrototypeOf ? Object.setPrototypeOf(subClass, superClass) : subClass.__proto__ = superClass; } // Internal dependencies /** * k-means clusterer. */ var KMeans = function (_Clusterer) { _inherits(KMeans, _Clusterer); /** * Constructor. Initialize class members and store user-defined options. * * @param {Object} [optionsUser] - User-defined options for KNN * @param {number} [optionsUser.numClusters = 8] - Number of clusters to assign in total * @param {string} [optionsUser.initialization = 'random'] - Initialization procedure for cluster * centers. Either 'random', for randomly selecting (without replacement) a datapoint for each * cluster center, or 'kmeans++', for initializing cluster centroids with the * [kmeans++ procedure](https://en.wikipedia.org/wiki/K-means%2B%2B) */ function KMeans() { var optionsUser = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : {}; _classCallCheck(this, KMeans); // Parse options var _this = _possibleConstructorReturn(this, (KMeans.__proto__ || Object.getPrototypeOf(KMeans)).call(this)); var optionsDefault = { numClusters: 2, initialization: 'kmeans++' }; var options = _extends({}, optionsDefault, optionsUser); // Set options _this.numClusters = options.numClusters; _this.initialization = options.initialization; return _this; } /** * Initialize the centroids of each of the clusters based on the user's settings * * @param {Array.<Array.<number>>} X - Features per data point */ _createClass(KMeans, [{ key: 'initializeCentroids', value: function initializeCentroids(X) { var _this2 = this; if (this.initialization === 'kmeans++') { // Clear list of centroids this.centroids = []; // Get indices [0, ..., n-1] for n datapoints var indices = [].concat(_toConsumableArray(Array(this.numSamples))).map(function (x, i) { return i; }); var _loop = function _loop(i) { var weights = void 0; if (_this2.centroids.length) { // Step 1. Compute the distance of each sample to the nearest cluster centroid var minDistances = indices.map(function (x) { return ( // Minimize distance to nearest centroid by maximizing negative squared distance Math.min.apply(Math, _toConsumableArray(_this2.centroids.map(function (centroid) { return Arrays.norm(Arrays.sum(centroid, Arrays.scale(X[x], -1))); }))) ); }); if (minDistances.filter(function (x) { return x > 0; }).length > 0) { // Step 2a. Calculate squared distances, which will be used as the weights for sampling // a data point for the new cluster centroid weights = Arrays.power(minDistances, 2); } else { // Step 2b. If all remaining samples have distance 0 to the nearest cluster centroid, // there are (too many) samples with the exact same coordinates. This is a rare case. // However, it can happen, for example when you have 3 clusters and 3 samples, and 2 of // the samples have the same features weights = 'uniform'; } } else { weights = 'uniform'; } // Step 4. Choose a data point from the remaining data points at random, with the computed // sample weights. Use it as the new cluster centroid, and remove it from the list of // potential cluster centroids var sampleIndex = Random.sample(indices, 1, false, weights)[0]; _this2.centroids.push(X[sampleIndex]); indices = indices.filter(function (x) { return x !== sampleIndex; }); }; for (var i = 0; i < this.numClusters; i += 1) { _loop(i); } } else { // Random initialization. Each centroid is chosen randomly without replacement from the data // points // Get indices [0, ..., n-1] for n datapoints var _indices = [].concat(_toConsumableArray(Array(this.numSamples))).map(function (x, i) { return i; }); // Sample a random index (without replacement) for each cluster, and use its features as // the initial centroid for that cluster this.centroids = Random.sample(_indices, this.numClusters).map(function (x) { return X[x]; }); } } /** * @see {@link Clusterer#train} */ }, { key: 'train', value: function train(X) { // Number of features per sample this.numSamples = Arrays.getShape(X)[0]; this.numFeatures = Arrays.getShape(X)[1]; // Check whether there aren't more clusters than samples if (this.numSamples < this.numClusters) { throw new Error('Too many clusters (numClusters=' + this.numClusters + ') for the number for the\n number of samples (numSamples=' + this.numSamples + '). The number of clusters should be equal to\n or greater than the number of samples.'); } // Initialize cluster centroids this.initializeCentroids(X); // Keep track of current and last cluster assignments for all samples var assignments = []; var assignmentsPrevious = void 0; var epoch = 0; do { // Recalculate clusters if (assignments.length > 0) { // For each cluster, calculate the new centroid as the mean of the features of all samples // assigned to that cluster this.centroids = this.centroids.map(function (centroid, clusterId) { var clusterNumSamples = assignments.filter(function (x) { return x === clusterId; }).length; // If there are no samples assigned to this cluster, keep the centroid the same. This // is to prevent unstable behaviour from happening if (clusterNumSamples === 0) { return centroid; } // The new cluster centroid is the mean of all samples assigned this cluster return Arrays.scale( // Sum of all assigned samples Arrays.sum.apply(Arrays, _toConsumableArray(X.filter(function (x, i) { return assignments[i] === clusterId; }))), // Divide by the number of assignments 1 / clusterNumSamples); }); } // Store previous assignments assignmentsPrevious = assignments.slice(); // Assign clusters to samples assignments = this.cluster(X); epoch += 1; } while (!Arrays.equal(assignments, assignmentsPrevious) && epoch < 100); } /** * @see {@link Clusterer#cluster} */ }, { key: 'cluster', value: function cluster(X) { var _this3 = this; return X.map(function (x) { return ( // Minimize distance to centroid by maximizing negative squared distance Arrays.argMax( // Calculate negative squared distance from sample to centroid _this3.centroids.map(function (centroid) { return -Arrays.norm(Arrays.sum(centroid, Arrays.scale(x, -1))); })) ); }); } }]); return KMeans; }(_base2.default); exports.default = KMeans; module.exports = exports['default'];