@ai-on-browser/data-analysis-models
Version:
Data analysis model package without any dependencies
343 lines (315 loc) • 7.51 kB
JavaScript
import Matrix from '../util/matrix.js'
/**
* Gaussian mixture model
*/
export class GMM {
// see https://www.slideshare.net/TakayukiYagi1/em-66114496
// Anomaly detection https://towardsdatascience.com/understanding-anomaly-detection-in-python-using-gaussian-mixture-model-e26e5d06094b
// A Survey of Outlier Detection Methodologies. (2004)
constructor() {
this._k = 0
this._d = null
this._p = []
this._m = []
this._s = []
}
_init(datas) {
if (!this._d) {
this._d = datas[0].length
for (let i = 0; i < this._k; i++) {
this.add()
this._k--
}
}
}
/**
* Add a new cluster.
*/
add() {
this._k++
if (this._d) {
this._p.push(Math.random())
this._m.push(Matrix.random(this._d, 1))
const s = Matrix.randn(this._d, this._d)
this._s.push(s.tDot(s))
}
}
/**
* Clear all clusters.
*/
clear() {
this._k = 0
this._p = []
this._m = []
this._s = []
}
/**
* Returns probabilities.
* @param {Array<Array<number>>} data Sample data
* @returns {Array<Array<number>>} Predicted values
*/
probability(data) {
this._init(data)
return data.map(v => {
const x = new Matrix(this._d, 1, v)
const prob = []
for (let i = 0; i < this._k; i++) {
const v = this._gaussian(x, this._m[i], this._s[i]) * this._p[i]
prob.push(v)
}
return prob
})
}
/**
* Returns predicted categories.
* @param {Array<Array<number>>} data Sample data
* @returns {number[]} Predicted values
*/
predict(data) {
this._init(data)
return data.map(v => {
const x = new Matrix(this._d, 1, v)
let max_p = 0
let max_c = -1
for (let i = 0; i < this._k; i++) {
const v = this._gaussian(x, this._m[i], this._s[i])
if (v > max_p) {
max_p = v
max_c = i
}
}
return max_c
})
}
_gaussian(x, m, s) {
const xs = Matrix.sub(x, m)
return (
Math.exp(-0.5 * xs.tDot(s.inv()).dot(xs).toScaler()) /
(Math.sqrt(2 * Math.PI) ** this._d * Math.sqrt(s.det()))
)
}
/**
* Fit model.
* @param {Array<Array<number>>} datas Training data
*/
fit(datas) {
this._init(datas)
const n = datas.length
const g = []
const N = Array(this._k).fill(0)
const x = []
datas.forEach(data => {
const ns = []
let s = 0
const xi = new Matrix(this._d, 1, data)
for (let j = 0; j < this._k; j++) {
const v = this._gaussian(xi, this._m[j], this._s[j]) * this._p[j]
ns.push(v || 0)
s += v || 0
}
const gi = ns.map(v => v / (s || 1.0))
g.push(gi)
x.push(xi)
gi.forEach((v, j) => {
N[j] += v
})
})
for (let i = 0; i < this._k; i++) {
const new_mi = new Matrix(this._d, 1)
for (let j = 0; j < n; j++) {
new_mi.add(Matrix.mult(x[j], g[j][i]))
}
new_mi.div(N[i])
this._m[i] = new_mi
const new_si = new Matrix(this._d, this._d)
for (let j = 0; j < n; j++) {
let tt = Matrix.sub(x[j], new_mi)
tt = tt.dot(tt.t)
tt.mult(g[j][i])
new_si.add(tt)
}
new_si.div(N[i])
new_si.add(Matrix.eye(this._d, this._d, 1.0e-8))
this._s[i] = new_si
this._p[i] = N[i] / n
}
}
}
/**
* Semi-Supervised gaussian mixture model
*/
export class SemiSupervisedGMM extends GMM {
// http://yamaguchiyuto.hatenablog.com/entry/machine-learning-advent-calendar-2014
/**
* Categories
* @type {*[]}
*/
get categories() {
return this._classes
}
/**
* Initialize model.
* @param {Array<Array<number>>} datas Training data
* @param {(* | null)[]} labels Target values
*/
init(datas, labels) {
this.clear()
this._init(datas)
this._classes = [...new Set(labels.filter(v => v != null))]
for (let k = 0; k < this._classes.length; k++) {
super.add()
}
}
add() {}
/**
* Fit model.
* @param {Array<Array<number>>} datas Training data
* @param {(* | null)[]} y Target values
*/
fit(datas, y) {
this._init(datas)
const n = datas.length
const g = []
const N = Array(this._k).fill(0)
const x = []
datas.forEach((data, i) => {
const ns = []
let s = 0
const xi = new Matrix(this._d, 1, data)
for (let j = 0; j < this._k; j++) {
const cidx = this._classes.indexOf(y[i])
let v = 0
if (cidx < 0) {
v = this._gaussian(xi, this._m[j], this._s[j]) * this._p[j]
} else {
v = cidx === j ? 1 : 0
}
ns.push(v || 0)
s += v || 0
}
const gi = ns.map(v => v / (s || 1.0))
g.push(gi)
x.push(xi)
gi.forEach((v, j) => {
N[j] += v
})
})
for (let i = 0; i < this._k; i++) {
const new_mi = new Matrix(this._d, 1)
for (let j = 0; j < n; j++) {
new_mi.add(Matrix.mult(x[j], g[j][i]))
}
new_mi.div(N[i])
this._m[i] = new_mi
const new_si = new Matrix(this._d, this._d)
for (let j = 0; j < n; j++) {
let tt = Matrix.sub(x[j], new_mi)
tt = tt.dot(tt.t)
tt.mult(g[j][i])
new_si.add(tt)
}
new_si.div(N[i])
new_si.add(Matrix.eye(this._d, this._d, 1.0e-8))
this._s[i] = new_si
this._p[i] = N[i] / n
}
}
/**
* Returns predicted categories.
* @param {Array<Array<number>>} data Sample data
* @returns {*[]} Predicted values
*/
predict(data) {
return super.predict(data).map(v => this._classes[v])
}
}
/**
* Gaussian mixture regression
*/
export class GMR extends GMM {
// https://datachemeng.com/gaussianmixtureregression/
constructor() {
super()
this._input_d = 0
this._mx = []
this._my = []
this._sxx = []
this._sxy = []
}
/**
* Add a new cluster.
*/
add() {
super.add()
if (this._mx.length < this._m.length) {
for (let i = this._mx.length; i < this._m.length; i++) {
this._mx[i] = this._m[i].slice(0, this._input_d)
this._my[i] = this._m[i].slice(this._input_d)
this._sxx[i] = this._s[i].block(0, 0, this._input_d, this._input_d)
this._sxy[i] = this._s[i].block(this._input_d, 0, null, this._input_d)
}
}
}
/**
* Clear all clusters.
*/
clear() {
super.clear()
this._mx = []
this._my = []
this._sxx = []
this._sxy = []
}
/**
* Fit model.
* @param {Array<Array<number>>} x Training data
* @param {Array<Array<number>>} y Target values
*/
fit(x, y) {
this._input_d = x[0].length
const datas = x.map((v, i) => v.concat(y[i]))
super.fit(datas)
this._mx = this._m.map(m => m.slice(0, this._input_d))
this._my = this._m.map(m => m.slice(this._input_d))
this._sxx = this._s.map(m => m.block(0, 0, this._input_d, this._input_d))
this._sxy = this._s.map(m => m.block(0, this._input_d, this._input_d, null))
}
/**
* Returns probabilities.
* @param {Array<Array<number>>} x Sample data
* @param {Array<Array<number>>} y Target values
* @returns {Array<Array<number>>} Predicted values
*/
probability(x, y) {
const datas = x.map((v, i) => v.concat(y[i]))
return super.probability(datas)
}
/**
* Returns predicted values.
* @param {Array<Array<number>>} x Sample data
* @returns {Array<Array<number>>} Predicted values
*/
predict(x) {
if (this._mx.length === 0) {
return []
}
x = Matrix.fromArray(x)
const w = new Matrix(x.rows, this._k)
for (let i = 0; i < x.rows; i++) {
const xi = x.row(i).t
for (let k = 0; k < this._k; k++) {
const v = this._gaussian(xi, this._mx[k], this._sxx[k]) * this._p[k]
w.set(i, k, v)
}
}
w.div(w.sum(1))
const ys = new Matrix(x.rows, this._my[0].cols)
for (let k = 0; k < this._k; k++) {
const c = Matrix.sub(x, this._mx[k].t).dot(this._sxx[k].inv()).dot(this._sxy[k])
c.add(this._my[k])
c.mult(w.col(k))
ys.add(c)
}
return ys.toArray()
}
}