ml-random-forest
Version:
Random forest for classification and regression
156 lines (139 loc) • 4.15 kB
JavaScript
import { Matrix } from 'ml-matrix';
import * as Random from 'random-js';
export function checkFloat(n) {
return n > 0.0 && n <= 1.0;
}
export function isFloat(n) {
return Number(n) === n && n % 1 !== 0;
}
/**
* Select n with replacement elements on the training set and values, where n is the size of the training set.
* @ignore
* @param {Matrix} trainingSet
* @param {Array} trainingValue
* @param {number} seed - seed for the random selection, must be a 32-bit integer.
* @return {object} with new X and y.
*/
export function examplesBaggingWithReplacement(
trainingSet,
trainingValue,
seed,
) {
let engine;
let distribution = Random.integer(0, trainingSet.rows - 1);
if (seed === undefined) {
engine = Random.MersenneTwister19937.autoSeed();
} else if (Number.isInteger(seed)) {
engine = Random.MersenneTwister19937.seed(seed);
} else {
throw new RangeError(
`Expected seed must be undefined or integer not ${seed}`,
);
}
let Xr = new Array(trainingSet.rows);
let yr = new Array(trainingSet.rows);
let oob = new Array(trainingSet.rows).fill(0);
let oobN = trainingSet.rows;
for (let i = 0; i < trainingSet.rows; ++i) {
let index = distribution(engine);
Xr[i] = trainingSet.getRow(index);
yr[i] = trainingValue[index];
if (oob[index]++ === 0) {
oobN--;
}
}
let Xoob = new Array(oobN);
let ioob = new Array(oobN);
// run backwards to have ioob filled in increasing order
for (let i = trainingSet.rows - 1; i >= 0 && oobN > 0; --i) {
if (oob[i] === 0) {
Xoob[--oobN] = trainingSet.getRow(i);
ioob[oobN] = i;
}
}
return {
X: new Matrix(Xr),
y: yr,
Xoob: new Matrix(Xoob),
ioob,
seed: engine.next(),
};
}
/**
* selects n features from the training set with or without replacement, returns the new training set and the indexes used.
* @ignore
* @param {Matrix} trainingSet
* @param {number} n - features.
* @param {boolean} replacement
* @param {number} seed - seed for the random selection, must be a 32-bit integer.
* @return {object}
*/
export function featureBagging(trainingSet, n, replacement, seed) {
if (trainingSet.columns < n) {
throw new RangeError(
'N should be less or equal to the number of columns of X',
);
}
let distribution = Random.integer(0, trainingSet.columns - 1);
let engine;
if (seed === undefined) {
engine = Random.MersenneTwister19937.autoSeed();
} else if (Number.isInteger(seed)) {
engine = Random.MersenneTwister19937.seed(seed);
} else {
throw new RangeError(
`Expected seed must be undefined or integer not ${seed}`,
);
}
let toRet = new Matrix(trainingSet.rows, n);
let usedIndex;
let index;
if (replacement) {
usedIndex = new Array(n);
for (let i = 0; i < n; ++i) {
index = distribution(engine);
usedIndex[i] = index;
toRet.setColumn(i, trainingSet.getColumn(index));
}
} else {
usedIndex = new Set();
index = distribution(engine);
for (let i = 0; i < n; ++i) {
while (usedIndex.has(index)) {
index = distribution(engine);
}
toRet.setColumn(i, trainingSet.getColumn(index));
usedIndex.add(index);
}
usedIndex = Array.from(usedIndex);
}
return {
X: toRet,
usedIndex: usedIndex,
seed: engine.next(),
};
}
/**
* collects and combines the individual results from the tree predictions on Out-Of-Bag data
* @ignore
* @param {{index: {Array},predicted: {Array}}[]} oob: array of individual tree predictions
* @param {array} y: true labels
* @param {(predictions:{Array})=>{number}} aggregate: aggregation function
* @return {Array}
*/
export const collectOOB = (oob, y, aggregate) => {
const res = Array(y.length);
for (let i = 0; i < y.length; i++) {
const all = [];
for (let j = 0; j < oob.length; j++) {
const o = oob[j];
if (o.index[0] === i) {
all.push(o.predicted[0]);
o.index = o.index.slice(1);
o.predicted = o.predicted.slice(1);
}
}
res[i] = { true: y[i], all: all, predicted: aggregate(all) };
}
return res;
};