UNPKG

@ai-on-browser/data-analysis-models

Version:

Data analysis model package without any dependencies

358 lines (332 loc) 7.97 kB
const metrics = { euclid: () => (a, b) => Math.sqrt(a.reduce((s, v, i) => s + (v - b[i]) ** 2, 0)), manhattan: () => (a, b) => a.reduce((s, v, i) => s + Math.abs(v - b[i]), 0), chebyshev: () => (a, b) => Math.max(...a.map((v, i) => Math.abs(v - b[i]))), minkowski: ({ p = 2 } = {}) => (a, b) => a.reduce((s, v, i) => s + (v - b[i]) ** p, 0) ** (1 / p), } /** * Bsae class for k-nearest neighbor models */ class KNNBase { /** * @param {number} [k] Number of neighborhoods * @param {'euclid' | 'manhattan' | 'chebyshev' | 'minkowski' | function (number[], number[]): number} [metric] Metric name */ constructor(k = 5, metric = 'euclid') { this._p = [] this._c = [] this._k = k this._metric = metric if (typeof this._metric === 'function') { this._d = this._metric } else { this._d = metrics[this._metric]() } } _near_points(data) { const ps = [] this._p.forEach((p, i) => { const d = this._d(data, p) if (ps.length < this._k || d < ps[this._k - 1].d) { if (ps.length >= this._k) ps.pop() ps.push({ d: d, category: this._c[i], idx: i, }) for (let k = ps.length - 1; k > 0; k--) { if (ps[k - 1].d > ps[k].d) { ;[ps[k], ps[k - 1]] = [ps[k - 1], ps[k]] } } } }) return ps } /** * Add a data. * @param {number[]} point Training data * @param {*} [category] Target value */ _add(point, category) { this._p.push(point) this._c.push(category) } } /** * k-nearest neighbor */ export class KNN extends KNNBase { /** * @param {number} [k] Number of neighborhoods * @param {'euclid' | 'manhattan' | 'chebyshev' | 'minkowski' | function (number[], number[]): number} [metric] Metric name */ constructor(k = 5, metric = 'euclid') { super(k, metric) } /** * Add a data. * @param {number[]} point Training data * @param {*} category Target value */ add(point, category) { super._add(point, category) } /** * Add datas. * @param {Array<Array<number>>} datas Training data * @param {*[]} targets Target values */ fit(datas, targets) { for (let i = 0; i < datas.length; i++) { this.add(datas[i], targets[i]) } } /** * Returns predicted categories. * @param {Array<Array<number>>} datas Sample data * @returns {*[]} Predicted values */ predict(datas) { return datas.map(data => { const ps = this._near_points(data) const clss = {} ps.forEach(p => { const cat = p.category if (!clss[cat]) { clss[cat] = { category: cat, count: 1, min_d: p.d, } } else { clss[cat].count += 1 clss[cat].min_d = Math.min(clss[cat].min_d, p.d) } }) let max_count = 0 let min_dist = -1 let target_cat = null for (const k of Object.keys(clss)) { if (max_count < clss[k].count || (max_count === clss[k].count && clss[k].min_d < min_dist)) { max_count = clss[k].count min_dist = clss[k].min_d target_cat = clss[k].category } } return target_cat }) } } /** * k-nearest neighbor regression */ export class KNNRegression extends KNNBase { /** * @param {number} [k] Number of neighborhoods * @param {'euclid' | 'manhattan' | 'chebyshev' | 'minkowski' | function (number[], number[]): number} [metric] Metric name */ constructor(k = 5, metric = 'euclid') { super(k, metric) } /** * Add a data. * @param {number[]} point Training data * @param {number} category Target value */ add(point, category) { super._add(point, category) } /** * Add datas. * @param {Array<Array<number>>} datas Training data * @param {number[]} targets Target values */ fit(datas, targets) { for (let i = 0; i < datas.length; i++) { this.add(datas[i], targets[i]) } } /** * Returns predicted values. * @param {Array<Array<number>>} datas Sample data * @returns {number[]} Predicted values */ predict(datas) { return datas.map(data => { const ps = this._near_points(data) return ps.reduce((acc, v) => acc + v.category, 0) / ps.length }) } } /** * k-nearest neighbor anomaly detection */ export class KNNAnomaly extends KNNBase { /** * @param {number} [k] Number of neighborhoods * @param {'euclid' | 'manhattan' | 'chebyshev' | 'minkowski' | function (number[], number[]): number} [metric] Metric name */ constructor(k = 5, metric = 'euclid') { super(k, metric) } /** * Add a data. * @param {number[]} point Training data */ add(point) { super._add(point) } /** * Add datas. * @param {Array<Array<number>>} datas Training data */ fit(datas) { for (let i = 0; i < datas.length; i++) { this.add(datas[i]) } } /** * Returns anomaly degrees. * @param {Array<Array<number>>} datas Sample data * @returns {number[]} Predicted values */ predict(datas) { return datas.map(data => { const ps = this._near_points(data) return ps[ps.length - 1].d }) } } /** * k-nearest neighbor density estimation */ export class KNNDensityEstimation extends KNNBase { // https://home.hiroshima-u.ac.jp/tkurita/lecture/prnn/node12.html /** * @param {number} [k] Number of neighborhoods * @param {'euclid' | 'manhattan' | 'chebyshev' | 'minkowski' | function (number[], number[]): number} [metric] Metric name */ constructor(k = 5, metric = 'euclid') { super(k, metric) } /** * Add a data. * @param {number[]} point Training data */ add(point) { super._add(point) } /** * Add datas. * @param {Array<Array<number>>} datas Training data */ fit(datas) { for (let i = 0; i < datas.length; i++) { this.add(datas[i]) } } _logGamma(z) { // https://ja.wikipedia.org/wiki/%E3%82%AC%E3%83%B3%E3%83%9E%E9%96%A2%E6%95%B0 let x = 0 if (Number.isInteger(z)) { for (let i = 2; i < z; i++) { x += Math.log(i) } } else { const n = z - 0.5 x = Math.log(Math.sqrt(Math.PI)) - Math.log(2) * n for (let i = 2 * n - 1; i > 0; i -= 2) { x += Math.log(i) } } return x } /** * Returns predicted values. * @param {Array<Array<number>>} datas Sample data * @returns {number[]} Predicted values */ predict(datas) { return datas.map(data => { const ps = this._near_points(data) const r = ps[ps.length - 1].d const d = data.length const ilogv = this._logGamma(d / 2 + 1) - (d / 2) * Math.log(Math.PI) - d * Math.log(r) return (Math.exp(ilogv) * this._k) / this._p.length }) } } /** * Semi-supervised k-nearest neighbor */ export class SemiSupervisedKNN extends KNNBase { // https://products.sint.co.jp/aisia/blog/vol1-20 /** * @param {number} [k] Number of neighborhoods * @param {'euclid' | 'manhattan' | 'chebyshev' | 'minkowski' | function (number[], number[]): number} [metric] Metric name */ constructor(k = 5, metric = 'euclid') { super(k, metric) this._k = Infinity this._orgk = k } /** * Add a data. * @param {number[]} point Training data * @param {* | null} category Target value */ add(point, category) { super._add(point, category) } /** * Add datas. * @param {Array<Array<number>>} datas Training data * @param {(* | null)[]} targets Target values */ fit(datas, targets) { for (let i = 0; i < datas.length; i++) { this.add(datas[i], targets[i]) } } /** * Returns predicted values. * @returns {*[]} Predicted values */ predict() { while (true) { const tmpnear = [] for (let i = 0; i < this._p.length; i++) { if (this._c[i] != null) { let cnt = 0 const ps = this._near_points(this._p[i]) for (const p of ps) { if (p.category == null) { if (p.d < (tmpnear[p.idx]?.d ?? Infinity)) { tmpnear[p.idx] = { d: p.d, category: this._c[i], } } if (++cnt >= this._orgk) { break } } } } } if (tmpnear.length === 0) { break } for (let i = 0; i < this._p.length; i++) { if (tmpnear[i]) { this._c[i] = tmpnear[i].category } } } return this._c } }