@jsmlt/jsmlt
Version:
JavaScript Machine Learning
248 lines (186 loc) • 12.3 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", {
value: true
});
exports["default"] = void 0;
var _base = _interopRequireDefault(require("../base"));
var Arrays = _interopRequireWildcard(require("../../arrays"));
var Random = _interopRequireWildcard(require("../../random"));
function _getRequireWildcardCache() { if (typeof WeakMap !== "function") return null; var cache = new WeakMap(); _getRequireWildcardCache = function _getRequireWildcardCache() { return cache; }; return cache; }
function _interopRequireWildcard(obj) { if (obj && obj.__esModule) { return obj; } var cache = _getRequireWildcardCache(); if (cache && cache.has(obj)) { return cache.get(obj); } var newObj = {}; if (obj != null) { var hasPropertyDescriptor = Object.defineProperty && Object.getOwnPropertyDescriptor; for (var key in obj) { if (Object.prototype.hasOwnProperty.call(obj, key)) { var desc = hasPropertyDescriptor ? Object.getOwnPropertyDescriptor(obj, key) : null; if (desc && (desc.get || desc.set)) { Object.defineProperty(newObj, key, desc); } else { newObj[key] = obj[key]; } } } } newObj["default"] = obj; if (cache) { cache.set(obj, newObj); } return newObj; }
function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { "default": obj }; }
function _typeof(obj) { if (typeof Symbol === "function" && typeof Symbol.iterator === "symbol") { _typeof = function _typeof(obj) { return typeof obj; }; } else { _typeof = function _typeof(obj) { return obj && typeof Symbol === "function" && obj.constructor === Symbol && obj !== Symbol.prototype ? "symbol" : typeof obj; }; } return _typeof(obj); }
function _toConsumableArray(arr) { return _arrayWithoutHoles(arr) || _iterableToArray(arr) || _nonIterableSpread(); }
function _nonIterableSpread() { throw new TypeError("Invalid attempt to spread non-iterable instance"); }
function _iterableToArray(iter) { if (Symbol.iterator in Object(iter) || Object.prototype.toString.call(iter) === "[object Arguments]") return Array.from(iter); }
function _arrayWithoutHoles(arr) { if (Array.isArray(arr)) { for (var i = 0, arr2 = new Array(arr.length); i < arr.length; i++) { arr2[i] = arr[i]; } return arr2; } }
function ownKeys(object, enumerableOnly) { var keys = Object.keys(object); if (Object.getOwnPropertySymbols) { var symbols = Object.getOwnPropertySymbols(object); if (enumerableOnly) symbols = symbols.filter(function (sym) { return Object.getOwnPropertyDescriptor(object, sym).enumerable; }); keys.push.apply(keys, symbols); } return keys; }
function _objectSpread(target) { for (var i = 1; i < arguments.length; i++) { var source = arguments[i] != null ? arguments[i] : {}; if (i % 2) { ownKeys(source, true).forEach(function (key) { _defineProperty(target, key, source[key]); }); } else if (Object.getOwnPropertyDescriptors) { Object.defineProperties(target, Object.getOwnPropertyDescriptors(source)); } else { ownKeys(source).forEach(function (key) { Object.defineProperty(target, key, Object.getOwnPropertyDescriptor(source, key)); }); } } return target; }
function _defineProperty(obj, key, value) { if (key in obj) { Object.defineProperty(obj, key, { value: value, enumerable: true, configurable: true, writable: true }); } else { obj[key] = value; } return obj; }
function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } }
function _defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } }
function _createClass(Constructor, protoProps, staticProps) { if (protoProps) _defineProperties(Constructor.prototype, protoProps); if (staticProps) _defineProperties(Constructor, staticProps); return Constructor; }
function _possibleConstructorReturn(self, call) { if (call && (_typeof(call) === "object" || typeof call === "function")) { return call; } return _assertThisInitialized(self); }
function _assertThisInitialized(self) { if (self === void 0) { throw new ReferenceError("this hasn't been initialised - super() hasn't been called"); } return self; }
function _getPrototypeOf(o) { _getPrototypeOf = Object.setPrototypeOf ? Object.getPrototypeOf : function _getPrototypeOf(o) { return o.__proto__ || Object.getPrototypeOf(o); }; return _getPrototypeOf(o); }
function _inherits(subClass, superClass) { if (typeof superClass !== "function" && superClass !== null) { throw new TypeError("Super expression must either be null or a function"); } subClass.prototype = Object.create(superClass && superClass.prototype, { constructor: { value: subClass, writable: true, configurable: true } }); if (superClass) _setPrototypeOf(subClass, superClass); }
function _setPrototypeOf(o, p) { _setPrototypeOf = Object.setPrototypeOf || function _setPrototypeOf(o, p) { o.__proto__ = p; return o; }; return _setPrototypeOf(o, p); }
/**
* k-means clusterer.
*/
var KMeans =
/*#__PURE__*/
function (_Clusterer) {
_inherits(KMeans, _Clusterer);
/**
* Constructor. Initialize class members and store user-defined options.
*
* @param {Object} [optionsUser] - User-defined options for KNN
* @param {number} [optionsUser.numClusters = 8] - Number of clusters to assign in total
* @param {string} [optionsUser.initialization = 'random'] - Initialization procedure for cluster
* centers. Either 'random', for randomly selecting (without replacement) a datapoint for each
* cluster center, or 'kmeans++', for initializing cluster centroids with the
* [kmeans++ procedure](https://en.wikipedia.org/wiki/K-means%2B%2B)
*/
function KMeans() {
var _this;
var optionsUser = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : {};
_classCallCheck(this, KMeans);
_this = _possibleConstructorReturn(this, _getPrototypeOf(KMeans).call(this)); // Parse options
var optionsDefault = {
numClusters: 2,
initialization: 'kmeans++'
};
var options = _objectSpread({}, optionsDefault, {}, optionsUser); // Set options
_this.numClusters = options.numClusters;
_this.initialization = options.initialization;
return _this;
}
/**
* Initialize the centroids of each of the clusters based on the user's settings
*
* @param {Array.<Array.<number>>} X - Features per data point
*/
_createClass(KMeans, [{
key: "initializeCentroids",
value: function initializeCentroids(X) {
var _this2 = this;
if (this.initialization === 'kmeans++') {
// Clear list of centroids
this.centroids = []; // Get indices [0, ..., n-1] for n datapoints
var indices = _toConsumableArray(Array(this.numSamples)).map(function (x, i) {
return i;
});
var _loop = function _loop(i) {
var weights = void 0;
if (_this2.centroids.length) {
// Step 1. Compute the distance of each sample to the nearest cluster centroid
var minDistances = indices.map(function (x) {
return (// Minimize distance to nearest centroid by maximizing negative squared distance
Math.min.apply(Math, _toConsumableArray(_this2.centroids.map(function (centroid) {
return Arrays.norm(Arrays.sum(centroid, Arrays.scale(X[x], -1)));
})))
);
});
if (minDistances.filter(function (x) {
return x > 0;
}).length > 0) {
// Step 2a. Calculate squared distances, which will be used as the weights for sampling
// a data point for the new cluster centroid
weights = Arrays.power(minDistances, 2);
} else {
// Step 2b. If all remaining samples have distance 0 to the nearest cluster centroid,
// there are (too many) samples with the exact same coordinates. This is a rare case.
// However, it can happen, for example when you have 3 clusters and 3 samples, and 2 of
// the samples have the same features
weights = 'uniform';
}
} else {
weights = 'uniform';
} // Step 4. Choose a data point from the remaining data points at random, with the computed
// sample weights. Use it as the new cluster centroid, and remove it from the list of
// potential cluster centroids
var sampleIndex = Random.sample(indices, 1, false, weights)[0];
_this2.centroids.push(X[sampleIndex]);
indices = indices.filter(function (x) {
return x !== sampleIndex;
});
};
for (var i = 0; i < this.numClusters; i += 1) {
_loop(i);
}
} else {
// Random initialization. Each centroid is chosen randomly without replacement from the data
// points
// Get indices [0, ..., n-1] for n datapoints
var _indices = _toConsumableArray(Array(this.numSamples)).map(function (x, i) {
return i;
}); // Sample a random index (without replacement) for each cluster, and use its features as
// the initial centroid for that cluster
this.centroids = Random.sample(_indices, this.numClusters).map(function (x) {
return X[x];
});
}
}
/**
* @see {@link Clusterer#train}
*/
}, {
key: "train",
value: function train(X) {
// Number of features per sample
this.numSamples = Arrays.getShape(X)[0];
this.numFeatures = Arrays.getShape(X)[1]; // Check whether there aren't more clusters than samples
if (this.numSamples < this.numClusters) {
throw new Error("Too many clusters (numClusters=".concat(this.numClusters, ") for the number for the\n number of samples (numSamples=").concat(this.numSamples, "). The number of clusters should be equal to\n or greater than the number of samples."));
} // Initialize cluster centroids
this.initializeCentroids(X); // Keep track of current and last cluster assignments for all samples
var assignments = [];
var assignmentsPrevious;
var epoch = 0;
do {
// Recalculate clusters
if (assignments.length > 0) {
// For each cluster, calculate the new centroid as the mean of the features of all samples
// assigned to that cluster
this.centroids = this.centroids.map(function (centroid, clusterId) {
var clusterNumSamples = assignments.filter(function (x) {
return x === clusterId;
}).length; // If there are no samples assigned to this cluster, keep the centroid the same. This
// is to prevent unstable behaviour from happening
if (clusterNumSamples === 0) {
return centroid;
} // The new cluster centroid is the mean of all samples assigned this cluster
return Arrays.scale( // Sum of all assigned samples
Arrays.sum.apply(Arrays, _toConsumableArray(X.filter(function (x, i) {
return assignments[i] === clusterId;
}))), // Divide by the number of assignments
1 / clusterNumSamples);
});
} // Store previous assignments
assignmentsPrevious = assignments.slice(); // Assign clusters to samples
assignments = this.cluster(X);
epoch += 1;
} while (!Arrays.equal(assignments, assignmentsPrevious) && epoch < 100);
}
/**
* @see {@link Clusterer#cluster}
*/
}, {
key: "cluster",
value: function cluster(X) {
var _this3 = this;
return X.map(function (x) {
return (// Minimize distance to centroid by maximizing negative squared distance
Arrays.argMax( // Calculate negative squared distance from sample to centroid
_this3.centroids.map(function (centroid) {
return -Arrays.norm(Arrays.sum(centroid, Arrays.scale(x, -1)));
}))
);
});
}
}]);
return KMeans;
}(_base["default"]);
exports["default"] = KMeans;
module.exports = exports.default;