UNPKG

@ai-on-browser/data-analysis-models

Version:

Data analysis model package without any dependencies

257 lines (240 loc) 5.79 kB
/** * Decision tree */ class DecisionTree { constructor() { this._depth = 0 } /** * Depth of the tree * @type {number} */ get depth() { return this._depth } /** * Initialize model. * @param {Array<Array<number>>} datas Training data * @param {*[]} targets Target values */ init(datas, targets) { this._datas = datas.map((d, i) => ({ value: d, target: targets[i], })) this._tree = { datas: this._datas, value: this._calcValue(this._datas), score: this._calcScore(this._datas), children: [], get leafs() { return this.children.length === 0 ? [this] : this.children.reduce((c, v) => c.concat(v.leafs), []) }, } this._features = datas[0].length this._depth = 1 } /** * Fit model. */ fit() { this._tree.leafs.forEach(node => { let best_score = node.score let best_feature = -1 let best_threshold = -1 for (let i = 0; i < this._features; i++) { const values = node.datas.map(p => p.value[i]) values.sort((a, b) => a - b) for (let vidx = 0; vidx < values.length - 1; vidx++) { let th = (values[vidx] + values[vidx + 1]) / 2 let lt = node.datas.filter(p => p.value[i] < th) let rt = node.datas.filter(p => p.value[i] >= th) const score = (this._calcScore(lt) * lt.length + this._calcScore(rt) * rt.length) / values.length if (score < best_score) { best_score = score best_feature = i best_threshold = th } } } if (best_score < node.score) { node.feature = best_feature node.threshold = best_threshold const lt = node.datas.filter(p => p.value[best_feature] < best_threshold) const rt = node.datas.filter(p => p.value[best_feature] >= best_threshold) node.children.push({ datas: lt, score: this._calcScore(lt), value: this._calcValue(lt), children: [], get leafs() { return this.children.length === 0 ? [this] : this.children.reduce((c, v) => c.concat(v.leafs), []) }, }) node.children.push({ datas: rt, score: this._calcScore(rt), value: this._calcValue(rt), children: [], get leafs() { return this.children.length === 0 ? [this] : this.children.reduce((c, v) => c.concat(v.leafs), []) }, }) } }) this._depth++ } /** * Returns importances of the features. * @returns {number[]} Importances */ importance() { const imp = Array(this._features).fill(0) let s = 0 const stack = [this._tree] while (stack.length > 0) { const node = stack.pop() if (node.children.length === 0) { continue } const pdata = node.datas const ldata = node.children[0].datas const rdata = node.children[1].datas const v = (this._calcScore(pdata) * pdata.length - this._calcScore(ldata) * ldata.length - this._calcScore(rdata) * rdata.length) / this._datas.length imp[node.feature] += v s += v stack.push(...node.children) } if (s === 0) { return imp } return imp.map(v => v / s) } /** * Returns predicted values. * @param {Array<Array<number>>} data Sample data * @returns {number[]} Predicted values */ predict_value(data) { return data.map(d => { let t = this._tree while (t.children.length > 0) { t = d[t.feature] < t.threshold ? t.children[0] : t.children[1] } return t.value }) } } /** * Decision tree classifier */ export class DecisionTreeClassifier extends DecisionTree { /** * @param {'ID3' | 'CART'} method Method name */ constructor(method) { super() this._method = method } _calcValue(datas) { return this._classesRate(datas) } _calcScore(datas) { if (this._method === 'ID3') { return this._id3(datas) } return this._gini(datas) } _classesRate(datas) { const classes = new Map() datas.forEach(t => { classes.set(t.target, (classes.get(t.target) || 0) + 1) }) classes.forEach((v, k) => { classes.set(k, (v /= datas.length)) }) return classes } _id3(datas) { const cr = this._classesRate(datas) let j = 0 cr.forEach(v => (j -= v * Math.log(v))) return j } _gini(datas) { const cr = this._classesRate(datas) let j = 1 cr.forEach(v => (j -= v ** 2)) return j } /** * Returns probability of the datas. * @param {Array<Array<number>>} data Sample data * @returns {number[]} Predicted values */ predict_prob(data) { return this.predict_value(data) } /** * Returns predicted values. * @param {Array<Array<number>>} data Sample data * @returns {*[]} Predicted values */ predict(data) { const prob = this.predict_prob(data) return prob.map(d => { let max_c = 0, max_cls = -1 d.forEach((v, k) => { if (v > max_c) { max_c = v max_cls = k } }) return max_cls }) } } /** * Decision tree regression */ export class DecisionTreeRegression extends DecisionTree { _calcValue(datas) { if (datas.length === 0) return 0 if (Array.isArray(datas[0].target)) { const dim = datas[0].target.length return datas .reduce((acc, d) => acc.map((v, i) => v + d.target[i]), Array(dim).fill(0)) .map(v => v / datas.length) } else { return datas.reduce((acc, d) => acc + d.target, 0) / datas.length } } _calcScore(datas) { if (datas.length === 0) return 0 const m = this._calcValue(datas) if (Array.isArray(datas[0].target)) { return Math.sqrt( datas.reduce((acc, d) => acc + d.target.reduce((s, v, i) => s + (v - m[i]) ** 2, 0), 0) / datas.length ) } else { return Math.sqrt(datas.reduce((acc, d) => acc + (d.target - m) ** 2, 0) / datas.length) } } /** * Returns predicted values. * @param {Array<Array<number>>} data Sample data * @returns {number[]} Predicted values */ predict(data) { return this.predict_value(data) } }