@ai-on-browser/data-analysis-models
Version:
Data analysis model package without any dependencies
143 lines (129 loc) • 2.93 kB
JavaScript
class KMeans {
constructor(x) {
this._x = x
this._k = 2
const n = this._x.length
const idx = []
for (let i = 0; i < this._k; i++) {
idx.push(Math.floor(Math.random() * (n - i)))
}
if (idx[0] <= idx[1]) {
idx[1]++
}
this._c = idx.map(v => this._x[v])
this._d = (a, b) => Math.sqrt(a.reduce((s, v, i) => s + (v - b[i]) ** 2, 0))
}
get c() {
return this._c
}
fit() {
const p = this.predict()
const c = this._c.map(p => Array.from(p, () => 0))
const count = Array(this._k).fill(0)
const n = this._x.length
for (let i = 0; i < n; i++) {
for (let j = 0; j < this._x[i].length; j++) {
c[p[i]][j] += this._x[i][j]
}
count[p[i]]++
}
let d = 0
for (let k = 0; k < this._k; k++) {
const mc = c[k].map(v => v / count[k])
d += this._c[k].reduce((s, v, j) => s + (v - mc[j]) ** 2, 0)
this._c[k] = c[k].map(v => v / count[k])
}
return d
}
predict() {
const p = []
const n = this._x.length
for (let i = 0; i < n; i++) {
let min_d = Infinity
p[i] = -1
for (let k = 0; k < this._k; k++) {
const d = this._d(this._x[i], this._c[k])
if (d < min_d) {
min_d = d
p[i] = k
}
}
}
return p
}
}
/**
* Bisecting k-Means algorithm
*/
export default class BisectingKMeans {
// A Comparison of Document Clustering Technique
// https://www.philippe-fournier-viger.com/spmf/bisectingkmeans.pdf
// https://scikit-learn.org/stable/modules/generated/sklearn.cluster.BisectingKMeans.html
constructor() {
this._c = []
this._d = (a, b) => Math.sqrt(a.reduce((s, v, i) => s + (v - b[i]) ** 2, 0))
}
/**
* Centroids
* @type {Array<Array<number>>}
*/
get centroids() {
return this._c
}
/**
* Fit model.
* @param {Array<Array<number>>} datas Training data
*/
fit(datas) {
const n = datas.length
const d = datas[0].length
if (this._c.length === 0) {
this._c[0] = Array(d).fill(0)
for (let i = 0; i < n; i++) {
for (let j = 0; j < d; j++) {
this._c[0][j] += datas[i][j]
}
}
this._c[0] = this._c[0].map(v => v / n)
return
}
const p = this.predict(datas)
const ns = Array(this._c.length).fill(0)
for (let i = 0; i < n; i++) {
ns[p[i]]++
}
let max_k = 0
for (let k = 1; k < this._c.length; k++) {
if (ns[max_k] < ns[k]) {
max_k = k
}
}
if (ns[max_k] <= 1) {
return
}
const xk = datas.filter((_, i) => p[i] === max_k)
const model = new KMeans(xk)
while (model.fit() > 1.0e-12);
this._c.splice(max_k, 1, ...model.c)
}
/**
* Returns predicted categories.
* @param {Array<Array<number>>} datas Sample data
* @returns {number[]} Predicted values
*/
predict(datas) {
const p = []
for (let i = 0; i < datas.length; i++) {
let min_d = Infinity
p[i] = -1
for (let k = 0; k < this._c.length; k++) {
const d = this._d(datas[i], this._c[k])
if (d < min_d) {
min_d = d
p[i] = k
}
}
}
return p
}
}