entropyx
Version:
A simple data mining library, written in TypeScript
103 lines • 3.54 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.KMeans = void 0;
const distance_1 = require("@/base/distance");
class KMeans {
constructor(options = {}) {
this.options = options;
this.data = [];
this.centroids = [];
if (!this.options.k)
this.options.k = 3;
if (!this.options.maxIterations)
this.options.maxIterations = 100;
if (!this.options.distance) {
this.options.distance = distance_1.Distance.euclidean;
}
}
fit(data) {
this.data = data;
const { k, maxIterations, distance } = this.options;
this.centroids = this.initializeCentroids(this.data, k);
let assignments = new Array(this.data.length).fill(-1);
for (let iter = 0; iter < maxIterations; iter++) {
const newAssignments = this.assignClusters(this.data, this.centroids, distance);
if (KMeans.arraysEqual(assignments, newAssignments)) {
break;
}
assignments = newAssignments;
this.centroids = this.computeCentroids(this.data, assignments, k);
}
const clusters = [];
for (let i = 0; i < k; i++) {
clusters.push([]);
}
for (let i = 0; i < this.data.length; i++) {
clusters[assignments[i]].push(this.data[i]);
}
return {
clusters,
centroids: this.centroids,
};
}
initializeCentroids(data, k) {
const centroids = [];
const usedIndices = new Set();
while (centroids.length < k) {
const randomIndex = Math.floor(Math.random() * data.length);
if (!usedIndices.has(randomIndex)) {
usedIndices.add(randomIndex);
centroids.push([...data[randomIndex]]);
}
}
return centroids;
}
assignClusters(data, centroids, distanceFn) {
return data.map((point) => {
let minDist = Number.MAX_VALUE;
let clusterIndex = 0;
for (let i = 0; i < centroids.length; i++) {
const dist = distanceFn(point, centroids[i]);
if (dist < minDist) {
minDist = dist;
clusterIndex = i;
}
}
return clusterIndex;
});
}
computeCentroids(data, assignments, k) {
const newCentroids = [];
const clusterCounts = new Array(k).fill(0);
for (let i = 0; i < k; i++) {
newCentroids.push(new Array(data[0].length).fill(0));
}
for (let i = 0; i < data.length; i++) {
const clusterIndex = assignments[i];
clusterCounts[clusterIndex]++;
for (let dim = 0; dim < data[0].length; dim++) {
newCentroids[clusterIndex][dim] += data[i][dim];
}
}
for (let i = 0; i < k; i++) {
if (clusterCounts[i] === 0) {
continue;
}
for (let dim = 0; dim < data[0].length; dim++) {
newCentroids[i][dim] /= clusterCounts[i];
}
}
return newCentroids;
}
static arraysEqual(arr1, arr2) {
if (arr1.length !== arr2.length)
return false;
for (let i = 0; i < arr1.length; i++) {
if (arr1[i] !== arr2[i])
return false;
}
return true;
}
}
exports.KMeans = KMeans;
//# sourceMappingURL=kmeans.js.map