UNPKG

entropyx

Version:

A simple data mining library, written in TypeScript

103 lines 3.54 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.KMeans = void 0; const distance_1 = require("@/base/distance"); class KMeans { constructor(options = {}) { this.options = options; this.data = []; this.centroids = []; if (!this.options.k) this.options.k = 3; if (!this.options.maxIterations) this.options.maxIterations = 100; if (!this.options.distance) { this.options.distance = distance_1.Distance.euclidean; } } fit(data) { this.data = data; const { k, maxIterations, distance } = this.options; this.centroids = this.initializeCentroids(this.data, k); let assignments = new Array(this.data.length).fill(-1); for (let iter = 0; iter < maxIterations; iter++) { const newAssignments = this.assignClusters(this.data, this.centroids, distance); if (KMeans.arraysEqual(assignments, newAssignments)) { break; } assignments = newAssignments; this.centroids = this.computeCentroids(this.data, assignments, k); } const clusters = []; for (let i = 0; i < k; i++) { clusters.push([]); } for (let i = 0; i < this.data.length; i++) { clusters[assignments[i]].push(this.data[i]); } return { clusters, centroids: this.centroids, }; } initializeCentroids(data, k) { const centroids = []; const usedIndices = new Set(); while (centroids.length < k) { const randomIndex = Math.floor(Math.random() * data.length); if (!usedIndices.has(randomIndex)) { usedIndices.add(randomIndex); centroids.push([...data[randomIndex]]); } } return centroids; } assignClusters(data, centroids, distanceFn) { return data.map((point) => { let minDist = Number.MAX_VALUE; let clusterIndex = 0; for (let i = 0; i < centroids.length; i++) { const dist = distanceFn(point, centroids[i]); if (dist < minDist) { minDist = dist; clusterIndex = i; } } return clusterIndex; }); } computeCentroids(data, assignments, k) { const newCentroids = []; const clusterCounts = new Array(k).fill(0); for (let i = 0; i < k; i++) { newCentroids.push(new Array(data[0].length).fill(0)); } for (let i = 0; i < data.length; i++) { const clusterIndex = assignments[i]; clusterCounts[clusterIndex]++; for (let dim = 0; dim < data[0].length; dim++) { newCentroids[clusterIndex][dim] += data[i][dim]; } } for (let i = 0; i < k; i++) { if (clusterCounts[i] === 0) { continue; } for (let dim = 0; dim < data[0].length; dim++) { newCentroids[i][dim] /= clusterCounts[i]; } } return newCentroids; } static arraysEqual(arr1, arr2) { if (arr1.length !== arr2.length) return false; for (let i = 0; i < arr1.length; i++) { if (arr1[i] !== arr2[i]) return false; } return true; } } exports.KMeans = KMeans; //# sourceMappingURL=kmeans.js.map