@ai-on-browser/data-analysis-models
Version:
Data analysis model package without any dependencies
221 lines (213 loc) • 6.12 kB
JavaScript
/**
* STatistical INformation Grid-based method
*/
export default class STING {
// https://en.wikipedia.org/wiki/Cluster_analysis
// "STING : A Statistical Information Grid Approach to Spatial Data Mining"
/**
* @param {number} c specified density
*/
constructor(c) {
this._c = c
this._cells = null
this._t = 0.05
}
/**
* Fit model.
* @param {Array<Array<number>>} datas Training data
*/
fit(datas) {
const x = datas
const n = x.length
const dim = x[0].length
const mins = x.reduce((m, d) => m.map((v, i) => Math.min(v, d[i])), Array(dim).fill(Infinity))
const maxs = x.reduce((m, d) => m.map((v, i) => Math.max(v, d[i])), Array(dim).fill(-Infinity))
const ranges = mins.map((m, i) => [m, maxs[i]])
this._cells = {
ranges: ranges,
children: [],
}
let layer = [this._cells]
const spl_size = 2 ** dim
const average_number = 5
const max_depth = Math.log(n / average_number) / Math.log(spl_size)
const cells = [layer]
for (let a = 0; a < max_depth; a++) {
const new_stack = []
for (const c of layer) {
const rng = c.ranges
for (let i = 0; i < spl_size; i++) {
let p = i
const r = []
for (let k = 0; k < dim; k++) {
const m = (rng[k][1] + rng[k][0]) / 2
if (p % 2 === 0) {
r.push([rng[k][0], m])
} else {
r.push([m, rng[k][1]])
}
p = Math.floor(p / 2)
}
const t = {
ranges: r,
children: [],
}
new_stack.push(t)
c.children.push(t)
}
}
layer = new_stack
cells.push(layer)
}
let bottomSpace = 1
for (let d = 0; d < dim; d++) {
const range = layer[0].ranges[d]
bottomSpace *= range[1] - range[0]
}
for (let i = 0; i < layer.length; i++) {
const c = layer[i]
const d = x.filter(v => {
return c.ranges.every((r, i) => r[0] <= v[i] && (r[1] === maxs[i] ? v[i] <= r[1] : v[i] < r[1]))
})
c.n = d.length
c.min = Array(dim).fill(Infinity)
c.max = Array(dim).fill(-Infinity)
const sum = Array(dim).fill(0)
for (let j = 0; j < c.n; j++) {
for (let k = 0; k < dim; k++) {
sum[k] += d[j][k]
c.min[k] = Math.min(c.min[k], d[j][k])
c.max[k] = Math.max(c.max[k], d[j][k])
}
}
c.m = sum.map(v => (c.n > 0 ? v / c.n : 0))
const s = Array(dim).fill(0)
for (let j = 0; j < c.n; j++) {
for (let k = 0; k < dim; k++) {
s[k] += (d[j][k] - c.m[k]) ** 2
}
}
c.s = s.map(v => (c.n > 0 ? Math.sqrt(v / c.n) : 0))
c.dist = Array(dim).fill('normal')
c.area = bottomSpace
}
for (let k = cells.length - 2; k >= 0; k--) {
for (let i = 0; i < cells[k].length; i++) {
let nki = 0
let aki = 0
const sum = Array(dim).fill(0)
const min = Array(dim).fill(Infinity)
cells[k][i].min = min
const max = Array(dim).fill(-Infinity)
cells[k][i].max = max
const s = Array(dim).fill(0)
const ccells = cells[k + 1].slice(i * spl_size, (i + 1) * spl_size)
for (const ccell of ccells) {
nki += ccell.n
aki += ccell.area
for (let p = 0; p < dim; p++) {
sum[p] += ccell.m[p] * ccell.n
min[p] = Math.min(min[p], ccell.min[p])
max[p] = Math.max(max[p], ccell.max[p])
s[p] += (ccell.s[p] ** 2 + ccell.m[p] ** 2) * ccell.n
}
}
cells[k][i].n = nki
cells[k][i].m = sum.map(v => (nki > 0 ? v / nki : 0))
cells[k][i].s = s.map((v, p) => (nki > 0 ? Math.sqrt(v / nki - (sum[p] / nki) ** 2) : 0))
const eps = 0.1
cells[k][i].dist = Array(dim).fill('normal')
cells[k][i].area = aki
for (let d = 0; d < dim; d++) {
let confl = 0
let dist = 'normal'
for (const ccell of ccells) {
let mdiff = 0
let sdiff = 0
if (cells[k][i].m[d] !== 0) {
mdiff += Math.abs((cells[k][i].m[d] - ccell.m[d]) / cells[k][i].m[d])
} else if (ccell.m[d] !== 0) {
mdiff += Math.abs((cells[k][i].m[d] - ccell.m[d]) / ccell.m[d])
}
if (cells[k][i].s[d] !== 0) {
sdiff += Math.abs((cells[k][i].s[d] - ccell.s[d]) / cells[k][i].s[d])
} else if (ccell.s[d] !== 0) {
sdiff += Math.abs((cells[k][i].s[d] - ccell.s[d]) / ccell.s[d])
}
if (dist !== ccell.dist && mdiff < eps && sdiff < eps) {
confl += ccell.n
} else if (mdiff >= eps || sdiff >= eps) {
confl = nki
}
}
if (nki > 0 && confl / nki > this._t) {
dist = 'none'
}
cells[k][i].dist[d] = dist
}
}
}
let relevantCells = [this._cells]
for (let k = 1; k < cells.length; k++) {
const childRelevantCells = []
for (let i = 0; i < relevantCells.length; i++) {
for (const child of relevantCells[i].children) {
if (child.n < child.area * this._c) {
continue
}
childRelevantCells.push(child)
}
}
relevantCells = childRelevantCells
}
this._clusters = []
const stack = []
while (true) {
if (stack.length === 0) {
if (relevantCells.length === 0) {
break
}
stack.push(relevantCells.pop())
this._clusters.push([])
}
const curcell = stack.pop()
this._clusters[this._clusters.length - 1].push(curcell)
for (let k = relevantCells.length - 1; k >= 0; k--) {
const c = relevantCells[k]
let adjointCnt = 0
for (let d = 0; d < dim && adjointCnt < 2; d++) {
if (curcell.ranges[d][0] === c.ranges[d][0] && curcell.ranges[d][1] === c.ranges[d][1]) {
} else if (curcell.ranges[d][0] === c.ranges[d][1] || curcell.ranges[d][1] === c.ranges[d][0]) {
adjointCnt++
} else {
adjointCnt = Infinity
}
}
if (adjointCnt === 1) {
stack.push(c)
relevantCells.splice(k, 1)
}
}
}
}
/**
* Returns predicted categories.
* @param {Array<Array<number>>} datas Sample data
* @returns {number[]} Predicted values
*/
predict(datas) {
const p = []
for (let i = 0; i < datas.length; i++) {
p[i] = -1
for (let k = 0; k < this._clusters.length && p[i] < 0; k++) {
for (const cell of this._clusters[k]) {
if (datas[i].every((v, d) => cell.ranges[d][0] <= v && v <= cell.ranges[d][1])) {
p[i] = k
break
}
}
}
}
return p
}
}