UNPKG

ml-naivebayes

Version:
320 lines (280 loc) 9.39 kB
'use strict'; Object.defineProperty(exports, '__esModule', { value: true }); function _interopDefault (ex) { return (ex && (typeof ex === 'object') && 'default' in ex) ? ex['default'] : ex; } var Matrix = require('ml-matrix'); var Matrix__default = _interopDefault(Matrix); /** * @private * Function that retuns an array of matrices of the cases that belong to each class. * @param {Matrix} X - dataset * @param {Array} y - predictions * @return {Array} */ function separateClasses(X, y) { var features = X.columns; var classes = 0; var totalPerClasses = new Array(10000); // max upperbound of classes for (var i = 0; i < y.length; i++) { if (totalPerClasses[y[i]] === undefined) { totalPerClasses[y[i]] = 0; classes++; } totalPerClasses[y[i]]++; } var separatedClasses = new Array(classes); var currentIndex = new Array(classes); for (i = 0; i < classes; ++i) { separatedClasses[i] = new Matrix__default(totalPerClasses[i], features); currentIndex[i] = 0; } for (i = 0; i < X.rows; ++i) { separatedClasses[y[i]].setRow(currentIndex[y[i]], X.getRow(i)); currentIndex[y[i]]++; } return separatedClasses; } class GaussianNB { /** * Constructor for the Gaussian Naive Bayes classifier, the parameters here is just for loading purposes. * @constructor * @param {boolean} reload * @param {object} model */ constructor(reload, model) { if (reload) { this.means = model.means; this.calculateProbabilities = model.calculateProbabilities; } } /** * Function that trains the classifier with a matrix that represents the training set and an array that * represents the label of each row in the training set. the labels must be numbers between 0 to n-1 where * n represents the number of classes. * * WARNING: in the case that one class, all the cases in one or more features have the same value, the * Naive Bayes classifier will not work well. * @param {Matrix|Array} trainingSet * @param {Matrix|Array} trainingLabels */ train(trainingSet, trainingLabels) { var C1 = Math.sqrt(2 * Math.PI); // constant to precalculate the squared root trainingSet = Matrix.Matrix.checkMatrix(trainingSet); if (trainingSet.rows !== trainingLabels.length) { throw new RangeError( 'the size of the training set and the training labels must be the same.' ); } var separatedClasses = separateClasses(trainingSet, trainingLabels); var calculateProbabilities = new Array(separatedClasses.length); this.means = new Array(separatedClasses.length); for (var i = 0; i < separatedClasses.length; ++i) { var means = separatedClasses[i].mean('column'); var std = separatedClasses[i].standardDeviation('column', { mean: means }); var logPriorProbability = Math.log( separatedClasses[i].rows / trainingSet.rows ); calculateProbabilities[i] = new Array(means.length + 1); calculateProbabilities[i][0] = logPriorProbability; for (var j = 1; j < means.length + 1; ++j) { var currentStd = std[j - 1]; calculateProbabilities[i][j] = [ 1 / (C1 * currentStd), -2 * currentStd * currentStd ]; } this.means[i] = means; } this.calculateProbabilities = calculateProbabilities; } /** * function that predicts each row of the dataset (must be a matrix). * * @param {Matrix|Array} dataset * @return {Array} */ predict(dataset) { dataset = Matrix.Matrix.checkMatrix(dataset); if (dataset.rows === this.calculateProbabilities[0].length) { throw new RangeError( 'the dataset must have the same features as the training set' ); } var predictions = new Array(dataset.rows); for (var i = 0; i < predictions.length; ++i) { predictions[i] = getCurrentClass( dataset.getRow(i), this.means, this.calculateProbabilities ); } return predictions; } /** * Function that export the NaiveBayes model. * @return {object} */ toJSON() { return { modelName: 'NaiveBayes', means: this.means, calculateProbabilities: this.calculateProbabilities }; } /** * Function that create a GaussianNB classifier with the given model. * @param {object} model * @return {GaussianNB} */ static load(model) { if (model.modelName !== 'NaiveBayes') { throw new RangeError( 'The current model is not a Multinomial Naive Bayes, current model:', model.name ); } return new GaussianNB(true, model); } } /** * @private * Function the retrieves a prediction with one case. * * @param {Array} currentCase * @param {Array} mean - Precalculated means of each class trained * @param {Array} classes - Precalculated value of each class (Prior probability and probability function of each feature) * @return {number} */ function getCurrentClass(currentCase, mean, classes) { var maxProbability = 0; var predictedClass = -1; // going through all precalculated values for the classes for (var i = 0; i < classes.length; ++i) { var currentProbability = classes[i][0]; // initialize with the prior probability for (var j = 1; j < classes[0][1].length + 1; ++j) { currentProbability += calculateLogProbability( currentCase[j - 1], mean[i][j - 1], classes[i][j][0], classes[i][j][1] ); } currentProbability = Math.exp(currentProbability); if (currentProbability > maxProbability) { maxProbability = currentProbability; predictedClass = i; } } return predictedClass; } /** * @private * function that retrieves the probability of the feature given the class. * @param {number} value - value of the feature. * @param {number} mean - mean of the feature for the given class. * @param {number} C1 - precalculated value of (1 / (sqrt(2*pi) * std)). * @param {number} C2 - precalculated value of (2 * std^2) for the denominator of the exponential. * @return {number} */ function calculateLogProbability(value, mean, C1, C2) { value = value - mean; return Math.log(C1 * Math.exp((value * value) / C2)); } class MultinomialNB { /** * Constructor for Multinomial Naive Bayes, the model parameter is for load purposes. * @constructor * @param {object} model - for load purposes. */ constructor(model) { if (model) { this.conditionalProbability = Matrix.Matrix.checkMatrix( model.conditionalProbability ); this.priorProbability = Matrix.Matrix.checkMatrix(model.priorProbability); } } /** * Train the classifier with the current training set and labels, the labels must be numbers between 0 and n. * @param {Matrix|Array} trainingSet * @param {Array} trainingLabels */ train(trainingSet, trainingLabels) { trainingSet = Matrix.Matrix.checkMatrix(trainingSet); if (trainingSet.rows !== trainingLabels.length) { throw new RangeError( 'the size of the training set and the training labels must be the same.' ); } var separateClass = separateClasses(trainingSet, trainingLabels); this.priorProbability = new Matrix.Matrix(separateClass.length, 1); for (var i = 0; i < separateClass.length; ++i) { this.priorProbability.set(i, 0, Math.log( separateClass[i].rows / trainingSet.rows )); } var features = trainingSet.columns; this.conditionalProbability = new Matrix.Matrix(separateClass.length, features); for (i = 0; i < separateClass.length; ++i) { var classValues = Matrix.Matrix.checkMatrix(separateClass[i]); var total = classValues.sum(); var divisor = total + features; this.conditionalProbability.setRow( i, Matrix.Matrix.rowVector(classValues .sum('column')) .add(1) .div(divisor) .apply(matrixLog) ); } } /** * Retrieves the predictions for the dataset with the current model. * @param {Matrix|Array} dataset * @return {Array} - predictions from the dataset. */ predict(dataset) { dataset = Matrix.Matrix.checkMatrix(dataset); var predictions = new Array(dataset.rows); for (var i = 0; i < dataset.rows; ++i) { var currentElement = dataset.getRowVector(i); const v = Matrix.Matrix.columnVector(this.conditionalProbability .clone() .mulRowVector(currentElement) .sum('row')); predictions[i] = v .add(this.priorProbability) .maxIndex()[0]; } return predictions; } /** * Function that saves the current model. * @return {object} - model in JSON format. */ toJSON() { return { name: 'MultinomialNB', priorProbability: this.priorProbability, conditionalProbability: this.conditionalProbability }; } /** * Creates a new MultinomialNB from the given model * @param {object} model * @return {MultinomialNB} */ static load(model) { if (model.name !== 'MultinomialNB') { throw new RangeError(`${model.name} is not a Multinomial Naive Bayes`); } return new MultinomialNB(model); } } function matrixLog(i, j) { this.set(i, j, Math.log(this.get(i, j))); } exports.GaussianNB = GaussianNB; exports.MultinomialNB = MultinomialNB;