@thi.ng/k-means
Version:
k-means & k-medians with customizable distance functions and centroid initializations for n-D vectors
81 lines • 2.99 kB
TypeScript
import type { IDistance } from "@thi.ng/distance";
import type { IRandom } from "@thi.ng/random";
import type { ReadonlyVec } from "@thi.ng/vectors";
import type { CentroidStrategy, Cluster, KMeansOpts } from "./api.js";
/**
* Takes an array of n-dimensional `samples` and attempts to assign them to up
* to `k` clusters (might produce less), using the behavior defined by
* (optionally) given `opts`.
*
* @remarks
* https://en.wikipedia.org/wiki/K-medians_clustering
*
* @param k -
* @param samples -
* @param opts -
*/
export declare const kmeans: <T extends ReadonlyVec>(k: number, samples: T[], opts?: Partial<KMeansOpts>) => Cluster[];
/**
* k-means++ initialization / selection of initial cluster centroids. Default
* centroid initialization method for {@link kmeans}.
*
* @remarks
* Might return fewer than `k` centroid IDs if the requested number cannot be
* fulfilled (e.g. due to lower number of samples and/or distance metric).
* Throws an error if `samples` are empty.
*
* The optional `exponent` (default: 2) is applied to scale the distances to
* nearest centroid, which will be used to control the weight distribution for
* choosing next centroid. A higher exponent means that points with larger
* distances will be more prioritized in the random selection.
*
* References:
*
* - https://en.wikipedia.org/wiki/K-means%2B%2B
* - http://ilpubs.stanford.edu:8090/778/1/2006-13.pdf
* - http://vldb.org/pvldb/vol5/p622_bahmanbahmani_vldb2012.pdf (TODO)
*
* @param k -
* @param samples -
* @param dist -
* @param rnd -
* @param exponent -
*/
export declare const kmeansPlusPlus: <T extends ReadonlyVec>(k: number, samples: T[], dist?: IDistance<ReadonlyVec>, rnd?: IRandom, exponent?: number) => T[];
/**
* Default centroid strategy forming new centroids by averaging the position of
* participating samples.
*
* @param dim -
*/
export declare const means: CentroidStrategy;
/**
* Centroid strategy forming new centroids via componentwise medians.
*
* @remarks
* https://en.wikipedia.org/wiki/K-medians_clustering
*/
export declare const medians: CentroidStrategy;
/**
* Means centroid strategy for decimal degree lat/lon positions (e.g. WGS84).
* Unlike the default {@link means} strategy, this one treats latitude values
* correctly in terms of the ±180 deg boundary and ensures samples on either
* side of the Pacific are forming correct centroids.
*
* @remarks
* When using this strategy, you should also use the
* [`HAVERSINE_LATLON`](https://docs.thi.ng/umbrella/distance/variables/HAVERSINE_LATLON.html)
* distance metric for {@link KMeansOpts.dist}.
*
* @example
* ```ts
* import { kmeans, meansLatLon } from "@thi.ng/k-means";
* import { HAVERSINE_LATLON } from "@thi.ng/distance";
*
* kmeans(3, [...], { strategy: meansLatLon, dist: HAVERSINE_LATLON })
* ```
*
* https://en.wikipedia.org/wiki/World_Geodetic_System
*/
export declare const meansLatLon: CentroidStrategy;
//# sourceMappingURL=kmeans.d.ts.map