@ai-on-browser/data-analysis-models
Version:
Data analysis model package without any dependencies
218 lines (204 loc) • 5.17 kB
JavaScript
import Matrix from '../util/matrix.js'
/**
* arbitrarily ORiented projected CLUSter generation
*/
export default class ORCLUS {
// Finding Generalized Projected Clusters in High Dimensional Spaces
// https://dl.acm.org/doi/pdf/10.1145/342009.335383
/**
* @param {number} k Number of clusters
* @param {number} k0 Number of begining seeds
* @param {number} l Number of dimensions
*/
constructor(k, k0, l) {
this._k = k
this._k0 = k0
this._l = l
this._alpha = 0.5
this._beta = null
}
_pdist(a, b, e) {
return Math.sqrt(
e.reduce(
(s, ei) => s + (ei.reduce((s, v, d) => s + v * a[d], 0) - ei.reduce((s, v, d) => s + v * b[d], 0)) ** 2,
0
)
)
}
/**
* Fit model.
* @param {Array<Array<number>>} datas Training data
*/
fit(datas) {
this._x = datas
const dim = this._x[0].length
const idx = []
for (let i = 0; i < this._k0; i++) {
idx.push(Math.floor(Math.random() * (this._x.length - i)))
}
for (let i = idx.length - 1; i >= 0; i--) {
for (let j = idx.length - 1; j > i; j--) {
if (idx[i] <= idx[j]) {
idx[j]++
}
}
}
this._s = idx.map(v => this._x[v])
let lc = dim
let kc = this._k0
this._e = []
for (let i = 0; i < kc; i++) {
this._e[i] = []
for (let d = 0; d < lc; d++) {
this._e[i][d] = Array(dim).fill(0)
this._e[i][d][d] = 1
}
}
this._beta = Math.exp((-Math.log(dim / this._l) * Math.log(1 / this._alpha)) / Math.log(this._k0 / this._k))
while (kc > this._k) {
const [s, c] = this._assign()
this._s = s
for (let i = 0; i < kc; i++) {
this._e[i] = this._findVectors(c[i], lc)
}
const knew = Math.max(this._k, kc * this._alpha)
const lnew = Math.max(this._l, lc * this._beta)
const [sn, , en] = this._merge(c, knew, lnew)
this._s = sn
this._e = en
lc = lnew
kc = this._s.length
}
}
_assign() {
const dim = this._x[0].length
const c = Array.from(this._s, () => [])
for (let i = 0; i < this._x.length; i++) {
let min_d = Infinity
let min_k = -1
for (let k = 0; k < this._s.length; k++) {
const d = this._pdist(this._x[i], this._s[k], this._e[k])
if (d < min_d) {
min_d = d
min_k = k
}
}
c[min_k].push(i)
}
const s = []
for (let k = 0; k < c.length; k++) {
s[k] = Array(dim).fill(0)
for (let j = 0; j < c[k].length; j++) {
for (let d = 0; d < dim; d++) {
s[k][d] += this._x[c[k][j]][d]
}
}
s[k] = s[k].map(v => v / c[k].length)
}
return [s, c]
}
_findVectors(c, q) {
if (c.length === 0) {
return []
}
const d = this._x[0].length
const m = Matrix.fromArray(c.map(v => this._x[v])).cov()
const eigvec = m.eigenVectors()
return eigvec.slice(d - Math.ceil(q), d, 1).t.toArray()
}
_merge(c, knew, lnew) {
const dim = this._x[0].length
const ed = Array.from(c, () => [])
const sd = Array.from(c, () => [])
const r = Array.from(c, () => [])
for (let i = 0; i < c.length; i++) {
for (let j = i + 1; j < c.length; j++) {
const cij = c[i].concat(c[j])
ed[i][j] = this._findVectors(cij, lnew)
sd[i][j] = Array(dim).fill(0)
for (let k = 0; k < cij.length; k++) {
for (let d = 0; d < dim; d++) {
sd[i][j][d] += this._x[cij[k]][d]
}
}
sd[i][j] = sd[i][j].map(v => v / cij.length)
r[i][j] = 0
for (let k = 0; k < cij.length; k++) {
r[i][j] += this._pdist(this._x[cij[k]], sd[i][j], ed[i][j]) ** 2
}
r[i][j] /= cij.length
}
}
c = c.concat()
const s = this._s.concat()
const e = this._e.concat()
while (s.length > knew) {
let id = -1
let jd = -1
let min_r = Infinity
for (let i = 0; i < r.length; i++) {
for (let j = i + 1; j < r.length; j++) {
if (r[i][j] < min_r) {
min_r = r[i][j]
id = i
jd = j
}
}
}
s[id] = sd[id][jd]
c[id] = c[id].concat(c[jd])
e[id] = ed[id][jd]
s.splice(jd, 1)
c.splice(jd, 1)
e.splice(jd, 1)
sd.splice(jd, 1)
ed.splice(jd, 1)
r.splice(jd, 1)
for (let i = 0; i < s.length; i++) {
sd[i].splice(jd, 1)
ed[i].splice(jd, 1)
r[i].splice(jd, 1)
if (i !== id) {
const il = i < id ? i : id
const ih = i < id ? id : i
const cij = c[il].concat(c[ih])
ed[il][ih] = this._findVectors(cij, lnew)
sd[il][ih] = Array(dim).fill(0)
for (let k = 0; k < cij.length; k++) {
for (let d = 0; d < dim; d++) {
sd[il][ih][d] += this._x[cij[k]][d]
}
}
sd[il][ih] = sd[il][ih].map(v => v / cij.length)
r[il][ih] = 0
for (let k = 0; k < cij.length; k++) {
r[il][ih] += this._pdist(this._x[cij[k]], sd[il][ih], ed[il][ih]) ** 2
}
r[il][ih] /= cij.length
}
}
}
return [s, c, e]
}
/**
* Returns predicted categories.
* @param {Array<Array<number>>} datas Sample data
* @returns {number[]} Predicted values
*/
predict(datas) {
const p = []
for (let i = 0; i < datas.length; i++) {
let min_d = Infinity
let min_k = -1
for (let k = 0; k < this._s.length; k++) {
const d = this._pdist(datas[i], this._s[k], this._e[k])
if (d < min_d) {
min_d = d
min_k = k
}
}
p[i] = min_k
}
return p
}
}