UNPKG

@jsmlt/jsmlt

Version:

JavaScript Machine Learning

190 lines (145 loc) 7.68 kB
'use strict'; Object.defineProperty(exports, "__esModule", { value: true }); var _extends = Object.assign || function (target) { for (var i = 1; i < arguments.length; i++) { var source = arguments[i]; for (var key in source) { if (Object.prototype.hasOwnProperty.call(source, key)) { target[key] = source[key]; } } } return target; }; var _createClass = function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; }(); var _base = require('../base'); var _decisionTree = require('./decision-tree'); var _decisionTree2 = _interopRequireDefault(_decisionTree); var _arrays = require('../../arrays'); var Arrays = _interopRequireWildcard(_arrays); var _random = require('../../random'); var Random = _interopRequireWildcard(_random); function _interopRequireWildcard(obj) { if (obj && obj.__esModule) { return obj; } else { var newObj = {}; if (obj != null) { for (var key in obj) { if (Object.prototype.hasOwnProperty.call(obj, key)) newObj[key] = obj[key]; } } newObj.default = obj; return newObj; } } function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; } function _toConsumableArray(arr) { if (Array.isArray(arr)) { for (var i = 0, arr2 = Array(arr.length); i < arr.length; i++) { arr2[i] = arr[i]; } return arr2; } else { return Array.from(arr); } } function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } } function _possibleConstructorReturn(self, call) { if (!self) { throw new ReferenceError("this hasn't been initialised - super() hasn't been called"); } return call && (typeof call === "object" || typeof call === "function") ? call : self; } function _inherits(subClass, superClass) { if (typeof superClass !== "function" && superClass !== null) { throw new TypeError("Super expression must either be null or a function, not " + typeof superClass); } subClass.prototype = Object.create(superClass && superClass.prototype, { constructor: { value: subClass, enumerable: false, writable: true, configurable: true } }); if (superClass) Object.setPrototypeOf ? Object.setPrototypeOf(subClass, superClass) : subClass.__proto__ = superClass; } // Internal dependencies /** * Random forest learner. Builds multiple decision trees with a random subsample of the samples, * and averages their predictions for the final prediction model. */ var RandomForest = function (_Classifier) { _inherits(RandomForest, _Classifier); /** * Constructor. Initialize class members and store user-defined options. * * @param {Object} [optionsUser] - User-defined options for random forest * @param {number} [optionsUser.numTrees = 10] - Number of decision trees to build * @param {string} [optionsUser.criterion = 'gini'] - Splitting criterion. Either 'gini', for the * Gini coefficient, or 'entropy' for the Shannon entropy * @param {number|string} [optionsUser.numFeatures = 1.0] - Number of features to subsample at * each node. Either a number (float), in which case the input fraction of features is used * (e.g., 1.0 for all features), or a string. If string, 'sqrt' and 'log2' are supported, * causing the algorithm to use sqrt(n) and log2(n) features, respectively (where n is the * total number of features) * @param {boolean} [bootstrap = true] - Whether to select samples for each tree by bootstrapping. * If false, all samples are used for each tree. If true, n samples are drawn with replacement * from the full set of samples for each tree (where n is the total number of samples) * @param {number} [optionsUser.numTrees = 10] - Number of trees to construct */ function RandomForest() { var optionsUser = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : {}; _classCallCheck(this, RandomForest); // Parse options var _this = _possibleConstructorReturn(this, (RandomForest.__proto__ || Object.getPrototypeOf(RandomForest)).call(this)); var optionsDefault = { criterion: 'gini', numFeatures: 1.0, numTrees: 10, bootstrap: true }; var options = _extends({}, optionsDefault, optionsUser); // Set options _this.criterion = options.criterion; _this.numFeatures = options.numFeatures; _this.numTrees = options.numTrees; _this.bootstrap = options.bootstrap; return _this; } /** * @see {@link Classifier#train} */ _createClass(RandomForest, [{ key: 'train', value: function train(X, y) { if (X.length !== y.length) { throw new Error('Number of data points should match number of labels.'); } // Construct and train decision trees this.trees = []; // All sample indices var sampleIndices = [].concat(_toConsumableArray(Array(X.length))).map(function (x, i) { return i; }); for (var i = 0; i < this.numTrees; i += 1) { // Construct decision tree var tree = new _decisionTree2.default({ criterion: this.criterion, numFeatures: this.numFeatures }); // Select the input samples. If bootstrapping is disabled, use all samples. If it is enabled, // use a bootstrapped sample of all samples var treeX = void 0; var treeY = void 0; if (this.bootstrap) { var treeSamples = Random.sample(sampleIndices, X.length, true); treeX = treeSamples.map(function (sampleIndex) { return X[sampleIndex]; }); treeY = treeSamples.map(function (sampleIndex) { return y[sampleIndex]; }); } else { treeX = X; treeY = y; } // Train the tree tree.train(treeX, treeY); // Add the trained tree to the list of trees this.trees.push(tree); } } /** * @see {@link Classifier#predict} */ }, { key: 'predict', value: function predict(X) { var _this2 = this; if (typeof this.trees === 'undefined') { throw new Error('Model has to be trained in order to make predictions.'); } // Make prediction for each data point var predictions = X.map(function (x) { return _this2.predictSample(x); }); return predictions; } /** * Make a prediction for a single sample. * * @param {Array.<number>} sampleFeatures - Data point features * @return {mixed} Prediction. Label of class with highest prevalence among k nearest neighbours */ }, { key: 'predictSample', value: function predictSample(sampleFeatures) { // Gather predictions from all trees var predictions = this.trees.map(function (x) { return x.predictSample(sampleFeatures); }); // Count the number of votes for each class var predictionCounts = Arrays.valueCounts(predictions); // Predict the class with the most predictions return predictionCounts.reduce(function (r, x) { return x[1] > r[1] ? x : r; }, [-1, -1])[0]; } }]); return RandomForest; }(_base.Classifier); exports.default = RandomForest; module.exports = exports['default'];