UNPKG

@ai-on-browser/data-analysis-models

Version:

Data analysis model package without any dependencies

230 lines (208 loc) 5.12 kB
class Link { constructor() { this._link = [] } at(i, j) { if (i < j) { return this._link[i]?.[j] || 0 } else { return this._link[j]?.[i] || 0 } } set(i, j, value) { if (i < j) { this._link[i] ||= [] this._link[i][j] = value } else { this._link[j] ||= [] this._link[j][i] = value } } } /** * @typedef {object} ROCKNode * @property {number[]} [point] Data point of leaf node * @property {number} [index] Data index of leaf node * @property {number} g Number of leaf nodes * @property {number} [distance] Distance between children nodes * @property {ROCKNode[]} [children] Children nodes * @property {ROCKNode[]} leafs Leaf nodes */ /** * RObust Clustering using linKs */ export default class ROCK { // https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.103.5187&rep=rep1&type=pdf // ROCK: A Robust Clustering Algorithm for Categorical Attributes. /** * @param {number} th Threshold * @param {number} k Number of clusters */ constructor(th, k) { this._th = th this._k = k } _f() { return (1 - this._th) / (1 + this._th) } _g(c1, c2, links) { const v1 = c1.leafs const v2 = c2.leafs let link = 0 for (let i = 0; i < v1.length; i++) { for (let j = 0; j < v2.length; j++) { link += links.at(v1[i].index, v2[j].index) } } const p = 1 + 2 * this._f() return link / ((v1.length + v2.length) ** p - v1.length ** p - v2.length ** p) } _sim(a, b) { return 1 / (1 + Math.sqrt(a.reduce((s, v, i) => s + (v - b[i]) ** 2, 0))) } _link(data) { const n = data.length const nbrlist = [] for (let i = 0; i < n; nbrlist[i++] = []); for (let i = 0; i < n; i++) { for (let j = i + 1; j < n; j++) { if (this._sim(data[i], data[j]) >= this._th) { nbrlist[i].push(j) nbrlist[j].push(i) } } } const link = new Link() for (let i = 0; i < n; i++) { const nbr = nbrlist[i] for (let j = 0; j < nbr.length; j++) { for (let l = j + 1; l < nbr.length - 1; l++) { link.set(nbr[j], nbr[l], link.at(nbr[j], nbr[l]) + 1) } } } return link } /** * Fit model. * @param {Array<Array<number>>} data Training data */ fit(data) { const link = this._link(data) const n = data.length const Q = [] for (let i = 0; i < n; i++) { Q.push({ point: data[i], index: i, distance: 0, get leafs() { return [this] }, }) } const q = [] for (let i = 0; i < n; i++) { q[i] = [] for (let j = 0; j < n; j++) { if (link.at(i, j) > 0) { q[i].push({ index: j, g: this._g(Q[i], Q[j], link) }) } } q[i].sort((a, b) => b.g - a.g) Q[i].g = q[i][0]?.g || 0 } Q.sort((a, b) => b.g - a.g) let supidx = n while (Q.length > 2) { const u = Q.shift() if (u.g === 0) { Q.push(u) break } const uidx = u.index const vidx = q[uidx][0].index let v = null for (let i = 0; i < Q.length; i++) { if (Q[i].index === vidx) { v = Q.splice(i, 1)[0] break } } const widx = supidx++ const w = { index: widx, children: [u, v], get leafs() { return [...this.children[0].leafs, ...this.children[1].leafs] }, } const xidx = [...new Set([...q[uidx].map(v => v.index), ...q[vidx].map(v => v.index)])] q[widx] = [] for (let i = 0; i < xidx.length; i++) { if (xidx[i] === uidx || xidx[i] === vidx) continue link.set(widx, xidx[i], link.at(uidx, xidx[i]) + link.at(vidx, xidx[i])) const x = Q.find(v => v.index === xidx[i]) const g = this._g(w, x, link) q[widx].push({ index: xidx[i], g: g }) q[xidx[i]] = q[xidx[i]].filter(v => v.index !== uidx && v.index !== vidx) q[xidx[i]].push({ index: widx, g: g }) q[xidx[i]].sort((a, b) => b.g - a.g) x.g = q[xidx[i]][0].g } q[uidx] = [] q[vidx] = [] q[widx].sort((a, b) => b.g - a.g) w.g = q[widx][0]?.g || 0 Q.push(w) Q.sort((a, b) => b.g - a.g) } this._root = { g: 0, children: Q, get leafs() { return [...this.children[0].leafs, ...this.children[1].leafs] }, } } /** * Returns the specified number of clusters. * @param {number} number Number of clusters * @returns {ROCKNode[]} Cluster nodes */ getClusters(number) { const scanNodes = this._root.children while (scanNodes.length < number) { let min_goodness = Infinity let min_goodness_idx = -1 for (let i = 0; i < scanNodes.length; i++) { const node = scanNodes[i] if (node.children && node.g < min_goodness) { min_goodness_idx = i min_goodness = node.g } } if (min_goodness_idx === -1) { break } const min_goodness_node = scanNodes[min_goodness_idx] scanNodes.splice(min_goodness_idx, 1, min_goodness_node.children[0], min_goodness_node.children[1]) } return scanNodes } /** * Returns predicted categories. * @returns {number[]} Predicted values */ predict() { const p = [] const clusters = this.getClusters(this._k) for (let i = 0; i < clusters.length; i++) { const leafs = clusters[i].leafs for (let k = 0; k < leafs.length; k++) { p[leafs[k].index] = i } } return p } }