@ai-on-browser/data-analysis-models
Version:
Data analysis model package without any dependencies
338 lines (312 loc) • 7.72 kB
JavaScript
const logGamma = z => {
// https://en.wikipedia.org/wiki/Lanczos_approximation
// https://slpr.sakura.ne.jp/qp/gamma-function/
let x = 0
if (Number.isInteger(z)) {
for (let i = 2; i < z; i++) {
x += Math.log(i)
}
} else if (Number.isInteger(z - 0.5)) {
const n = z - 0.5
x = Math.log(Math.sqrt(Math.PI)) - Math.log(2) * n
for (let i = 2 * n - 1; i > 0; i -= 2) {
x += Math.log(i)
}
} else if (z < 0.5) {
x = Math.log(Math.PI) - Math.log(Math.sin(Math.PI * z)) - logGamma(1 - z)
} else {
const p = [
676.5203681218851, -1259.1392167224028, 771.32342877765313, -176.61502916214059, 12.507343278686905,
-0.13857109526572012, 9.9843695780195716e-6, 1.5056327351493116e-7,
]
z -= 1
x = 0.99999999999980993
for (let i = 0; i < p.length; i++) {
x += p[i] / (z + i + 1)
}
const t = z + p.length - 0.5
x = Math.log(Math.sqrt(2 * Math.PI)) + Math.log(t) * (z + 0.5) - t + Math.log(x)
}
return x
}
class ArrayKeyMap {
constructor() {
this._map = new Map()
}
get size() {
return this._map.size
}
_getKey(key) {
if (this._map.has(key)) {
return key
}
for (const k of this.keys()) {
if (key.every((v, j) => v === k[j])) {
return k
}
}
return null
}
keys() {
return this._map.keys()
}
has(key) {
return this._getKey(key) !== null
}
get(key) {
return this._map.get(this._getKey(key))
}
set(key, value) {
const okey = this._getKey(key)
if (okey !== null) {
return this._map.set(okey, value)
}
return this._map.set(key, value)
}
}
/**
* Bayesian Network
*/
export default class BayesianNetwork {
// http://www.ai.lab.uec.ac.jp/wp-content/uploads/2016/03/2c998b492dee21b62c17d77f786482f0.pdf
// https://www.jstage.jst.go.jp/article/jjsai/25/6/25_803/_pdf
// http://www.jfssa.jp/taikai/2009/program/docs/00034.pdf
/**
* @param {number} alpha Equivalent sample size
*/
constructor(alpha) {
this._th = null
this._graph = null
this._alpha = alpha
this._ess = 1
this._n = 0
this._cand = null
this._score_method = 'bdeu'
}
/**
* Fit model.
* @param {Array<Array<*>>} x Training data
*/
fit(x) {
if (!this._cand) {
this._n = x[0].length
this._cand = []
for (let i = 0; i < this._n; i++) {
this._cand[i] = [...new Set(x.map(v => v[i]))]
}
}
this._fitStructure(x)
this._fitParameter(x)
}
_fitStructure(x) {
this._fitStructure_dp(x)
}
_fitStructure_dp(x) {
const localScores = []
const bpss = []
for (let i = 0; i < this._n; i++) {
localScores[i] = new ArrayKeyMap()
bpss[i] = new ArrayKeyMap()
const key = Array(this._n).fill(0)
key[i] = 1
do {
const c = []
for (let j = 0; j < this._n; j++) {
if (key[j]) c.push(j)
}
const xi = x.map(r => c.map(idx => r[idx]))
const cand = c.map(idx => this._cand[idx])
const g = []
const gc = []
for (let j = 0; j < c.length; j++) {
if (c[j] === i) {
g[j] = gc
} else {
g[j] = []
gc.push(j)
}
}
const score = this._score(xi, g, cand)
localScores[i].set(key.concat(), score)
let bps = [key.concat(), score]
for (let d = 0; d < key.length; d++) {
if (key[d] === 0) {
continue
}
const bkey = key.concat()
bkey[d] = 0
if (bkey.reduce((s, v) => s + v, 0) <= 1) {
continue
}
const bscore = bpss[i].get(bkey)
if (bscore && bps[1] < bscore[1]) {
bps = bscore
}
}
bpss[i].set(key.concat(), bps)
for (let j = this._n - 1; j >= 0; j--) {
if (i === j) continue
if (!key[j]) {
key[j] = 1
break
}
key[j] = 0
}
} while (key.some((v, j) => j !== i && v === 1))
}
const sinkScores = new ArrayKeyMap()
const key = Array(this._n).fill(0)
key[this._n - 1] = 1
do {
const m = key.reduce((s, v) => s + v, 0)
let sink = [null, -Infinity]
for (let i = 0; i < bpss.length; i++) {
let score = bpss[i].get(key)?.[1]
if (!score) {
continue
}
if (m > 1) {
const ki = key.concat()
ki[i] = 0
score += sinkScores.get(ki)[1]
}
if (score > sink[1]) {
sink = [i, score]
}
}
sinkScores.set(key.concat(), sink)
for (let j = this._n - 1; j >= 0; j--) {
if (!key[j]) {
key[j] = 1
break
}
key[j] = 0
}
} while (key.some(v => v === 1))
const v = Array(this._n).fill(1)
const order = []
for (let i = 0; i < this._n; i++) {
const k = sinkScores.get(v)[0]
order[i] = k
v[k] = 0
}
this._graph = []
for (let i = order.length - 1; i >= 0; i--) {
v[order[i]] = 1
const k = bpss[order[i]].get(v)[0]
const idx = []
for (let j = 0; j < k.length; j++) {
if (k[j] && j !== order[i]) {
idx.push(j)
}
}
this._graph[order[i]] = idx
}
}
_score(x, graph = this._graph, cand = this._cand) {
if (this._score_method === 'bdeu') {
return this._bdeu(x, graph, cand)
}
}
_bdeu(x, graph = this._graph, cand = this._cand, exact = false) {
if (exact) {
return this._logBDeu_exact(x, graph, cand)
}
return this._logBDeu_appro(x, graph, cand)
}
_logBDeu_exact(x, graph = this._graph, cand = this._cand) {
const n = this._count(x, graph, cand)
let logs = 0
for (let i = 0; i < n.length; i++) {
for (const k of n[i].keys()) {
const tl = n[i].get(k)
for (let j = 0; j < tl.length; j++) {
const a = this._ess / (n[i].size * tl.length)
logs += logGamma(a + tl[j])
logs -= logGamma(a)
}
const sn = tl.reduce((s, v) => s + v, 0)
const sa = this._ess / n[i].size
logs += logGamma(sa)
logs -= logGamma(sa + sn)
}
}
return logs
}
_logBDeu_appro(x, graph = this._graph, cand = this._cand) {
const n = this._count(x, graph, cand)
let logs = 0
for (let i = 0; i < n.length; i++) {
logs += this._ess * Math.log(this._cand[i].length)
}
for (let i = 0; i < n.length; i++) {
for (const k of n[i].keys()) {
const tl = n[i].get(k)
const sa = this._ess / n[i].size
const sn = tl.reduce((s, v) => s + v, 0)
for (let j = 0; j < tl.length; j++) {
const a = this._ess / (n[i].size * tl.length)
logs += (a + tl[j]) * Math.log((a + tl[j]) / (sa + sn))
logs -=
(((tl.length - 1) / tl.length) * Math.log(1 + (tl.length * n[i].size * tl[j]) / this._ess)) / 2
}
}
}
return logs
}
_fitParameter(x) {
this._th = this._count(x)
for (let i = 0; i < this._th.length; i++) {
for (const k of this._th[i].keys()) {
const a = this._th[i].get(k)
const s = a.reduce((s, v) => s + v, 0)
this._th[i].set(
k,
a.map(v => (this._alpha + v) / (this._alpha * a.length + s))
)
}
}
}
_count(x, graph = this._graph, cand = this._cand) {
const n = []
for (let i = 0; i < graph.length; i++) {
const cidx = x.map(v => cand[i].indexOf(v[i]))
n[i] = new ArrayKeyMap()
const p = Array(graph[i].length).fill(0)
do {
const m = Array(cand[i].length).fill(0)
for (let t = 0; t < x.length; t++) {
if (graph[i].some((j, k) => cand[j][p[k]] !== x[t][j])) {
continue
}
m[cidx[t]]++
}
n[i].set(p.concat(), m)
for (let k = 0; k < p.length; k++) {
p[k]++
if (p[k] < cand[graph[i][k]].length) {
break
}
p[k] = 0
}
} while (p.reduce((s, v) => s + v, 0) > 0)
}
return n
}
/**
* Returns probability values.
* @param {Array<Array<*>>} x Sample data
* @returns {number[]} Predicted values
*/
probability(x) {
return x.map(r => {
const idx = r.map((v, d) => this._cand[d].indexOf(v))
let p = 1
for (let i = 0; i < this._graph.length; i++) {
const key = this._graph[i].map(d => idx[d])
p *= this._th[i].get(key)?.[idx[i]] || 0
}
return p
})
}
}