@ai-on-browser/data-analysis-models
Version:
Data analysis model package without any dependencies
229 lines (215 loc) • 5.03 kB
JavaScript
import Matrix from '../util/matrix.js'
/**
* Linear manifold clustering
*/
export default class LMCLUS {
// Linear manifold clustering in high dimensional spaces by stochastic search
// https://citeseerx.ist.psu.edu/document?repid=rep1&type=pdf&doi=76925699b88b9e3c599269f214b0b50fb02bb1f6
/**
* @param {number} k Max LM dim
* @param {number} s Sampling level
* @param {number} gamma Sensitivity threshold
*/
constructor(k, s, gamma) {
this._k = k
this._s = s
this._gamma = gamma
}
/**
* Number of clusters.
* @type {number}
*/
get size() {
return this._c.length
}
/**
* Fit model
* @param {Array<Array<number>>} datas Training data
*/
fit(datas) {
this._c = []
this._dims = []
const d = datas.concat()
const labeled = Array(d.length).fill(false)
while (labeled.some(v => !v)) {
let ddIndexes = []
for (let i = 0; i < labeled.length; i++) {
if (!labeled[i]) {
ddIndexes.push(i)
}
}
let dd = ddIndexes.map(i => d[i])
let lmDim = 1
for (let k = 1; k <= this._k; k++) {
const [g, tau, phi, beta] = this._findSeparation(dd, k, this._s)
if (g <= this._gamma) {
break
}
const newdd = []
const newddidx = []
for (let j = 0; j < dd.length; j++) {
const xd = new Matrix(
dd[j].length,
1,
dd[j].map((v, i) => v - phi[i])
)
const bxd = beta.dot(xd)
const dist = xd.tDot(xd).toScaler() - bxd.tDot(bxd).toScaler()
if (dist < tau) {
newdd.push(dd[j])
newddidx.push(ddIndexes[j])
}
}
dd = newdd
lmDim = k
ddIndexes = newddidx
}
if (dd.length === 0) {
continue
}
this._c.push(ddIndexes)
this._dims.push(lmDim)
for (let i = 0; i < ddIndexes.length; i++) {
labeled[ddIndexes[i]] = true
}
}
}
_sampleidx(n, k) {
const idx = []
for (let i = 0; i < k; i++) {
idx.push(Math.floor(Math.random() * (n - i)))
}
for (let i = idx.length - 1; i >= 0; i--) {
for (let j = idx.length - 1; j > i; j--) {
if (idx[i] <= idx[j]) {
idx[j]++
}
}
}
return idx
}
_findSeparation(d, k, s) {
let gamma = -Infinity
let tau = -Infinity
let phi = null
let beta = null
const eps = 1.0e-8
const c = 1
const N = Math.min(Math.log(eps) / Math.log(1 - (1 / s) ** k), c * d.length)
for (let i = 0; i < N; i++) {
const idxes = this._sampleidx(d.length, k + 1)
const m = idxes.map(idx => d[idx])
const [b] = Matrix.fromArray(m).qrGramSchmidt()
const distances = []
for (let j = 0; j < d.length; j++) {
if (idxes.includes(j)) {
continue
}
const xd = new Matrix(d[j].length, 1, d[j])
const bxd = b.dot(xd)
const dist = xd.tDot(xd).toScaler() - bxd.tDot(bxd).toScaler()
distances.push(dist)
}
const [hist, ranges] = this._makeHistogram(distances)
const [t, g] = this._findMinimumErrorThreshold(hist, ranges)
if (g > gamma) {
gamma = g
tau = t
phi = m[0]
beta = b
}
}
return [gamma, tau, phi, beta]
}
_makeHistogram(d) {
let max = -Infinity
let min = Infinity
let sum = 0
for (let i = 0; i < d.length; i++) {
max = Math.max(max, d[i])
min = Math.min(min, d[i])
sum += d[i]
}
const mean = sum / d.length
let vari = 0
for (let i = 0; i < d.length; i++) {
vari += (mean - d[i]) ** 2
}
vari /= d.length
const std = Math.sqrt(vari)
const step = std * Math.cbrt((24 * Math.sqrt(Math.PI)) / d.length)
const ranges = [min]
while (ranges[ranges.length - 1] < max) {
ranges[ranges.length] = ranges[ranges.length - 1] + step
}
const count = ranges.length - 1
const hist = Array(count).fill(0)
for (let i = 0; i < d.length; i++) {
if (d[i] === max) {
hist[count - 1]++
} else {
hist[Math.floor((d[i] - min) / step)]++
}
}
return [hist, ranges]
}
_findMinimumErrorThreshold(h, r) {
let tau = -1
let minj = Infinity
let maxj = -Infinity
let discriminability = 0
for (let t = 0; t < h.length - 1; t++) {
let p1 = 0
let p2 = 0
let m1 = 0
let m2 = 0
for (let i = 0; i < h.length; i++) {
if (i <= t) {
p1 += h[i]
m1 += i * h[i]
} else {
p2 += h[i]
m2 += i * h[i]
}
}
m1 /= p1
m2 /= p2
let s1 = 0
let s2 = 0
for (let i = 0; i < h.length; i++) {
if (i <= t) {
s1 += (i - m1) ** 2 * h[i]
} else {
s2 += (i - m2) ** 2 * h[i]
}
}
s1 /= p1
s2 /= p2
const j =
1 +
2 * (p1 * Math.log(Math.sqrt(s1)) + p2 * Math.log(Math.sqrt(s2))) -
2 * (p1 * Math.log(p1) + p2 * Math.log(p2))
if (j < minj) {
minj = j
tau = t
discriminability = (m1 - m2) ** 2 / (s1 + s2)
}
maxj = Math.max(maxj, j)
}
const g = discriminability * (maxj - minj)
return [r[tau + 1], g]
}
/**
* Returns predicted categories.
* @returns {number[]} Predicted values
*/
predict() {
const pred = []
for (let k = 0; k < this._c.length; k++) {
for (let i = 0; i < this._c[k].length; i++) {
pred[this._c[k][i]] = k
}
}
return pred
}
}