lumenize
Version:
Illuminating the forest AND the trees in your data.
688 lines (601 loc) • 29.8 kB
JavaScript
// Generated by CoffeeScript 1.7.1
(function() {
var BayesianClassifier, Classifier, JSON, OLAPCube, functions, utils,
__hasProp = {}.hasOwnProperty,
__extends = function(child, parent) { for (var key in parent) { if (__hasProp.call(parent, key)) child[key] = parent[key]; } function ctor() { this.constructor = child; } ctor.prototype = parent.prototype; child.prototype = new ctor(); child.__super__ = parent.prototype; return child; };
functions = require('./functions').functions;
utils = require('tztime').utils;
OLAPCube = require('./OLAPCube').OLAPCube;
JSON = require('JSON2');
Classifier = (function() {
function Classifier() {}
/*
@class Classifier
__Base class for all Classifiers__
See individual subclasses for usage details
*/
Classifier.getBucketCountMinMax = function(values) {
var max, min, targetBucketCount;
targetBucketCount = Math.floor(Math.sqrt(values.length)) + 1;
if (targetBucketCount < 3) {
throw new Error("Need more training data");
}
min = functions.min(values);
max = functions.max(values);
return {
targetBucketCount: targetBucketCount,
min: min,
max: max
};
};
Classifier.generateConstantWidthBucketer = function(values) {
var bucketSize, bucketer, i, max, min, targetBucketCount, _i, _ref, _ref1;
_ref = Classifier.getBucketCountMinMax(values), targetBucketCount = _ref.targetBucketCount, min = _ref.min, max = _ref.max;
bucketSize = (max - min) / targetBucketCount;
bucketer = [];
bucketer.push({
value: 'B' + 0,
startOn: null,
endBelow: min + bucketSize
});
for (i = _i = 1, _ref1 = targetBucketCount - 2; 1 <= _ref1 ? _i <= _ref1 : _i >= _ref1; i = 1 <= _ref1 ? ++_i : --_i) {
bucketer.push({
value: 'B' + i,
startOn: min + bucketSize * i,
endBelow: min + bucketSize * (i + 1)
});
}
bucketer.push({
value: 'B' + (targetBucketCount - 1),
startOn: min + bucketSize * (targetBucketCount - 1),
endBelow: null
});
return bucketer;
};
Classifier.generateConstantQuantityBucketer = function(values) {
var bucketSize, bucketer, currentBoundary, i, lastBoundary, max, min, targetBucketCount, _i, _ref, _ref1;
_ref = Classifier.getBucketCountMinMax(values), targetBucketCount = _ref.targetBucketCount, min = _ref.min, max = _ref.max;
bucketSize = 100 / targetBucketCount;
bucketer = [];
currentBoundary = functions.percentileCreator(bucketSize)(values);
bucketer.push({
value: 'B' + 0,
startOn: null,
endBelow: currentBoundary
});
for (i = _i = 1, _ref1 = targetBucketCount - 2; 1 <= _ref1 ? _i <= _ref1 : _i >= _ref1; i = 1 <= _ref1 ? ++_i : --_i) {
lastBoundary = currentBoundary;
currentBoundary = functions.percentileCreator(bucketSize * (i + 1))(values);
bucketer.push({
value: 'B' + i,
startOn: lastBoundary,
endBelow: currentBoundary
});
}
bucketer.push({
value: 'B' + (targetBucketCount - 1),
startOn: currentBoundary,
endBelow: null
});
return bucketer;
};
Classifier.splitAt = function(values, index) {
var left, right;
left = values.slice(0, index);
right = values.slice(index);
return {
left: left,
right: right
};
};
Classifier.optimalSplitFor2Buckets = function(values) {
var bestIndex, bestLeft, bestRight, bestTotalErrorSquared, i, left, right, splitAt, totalErrorSquared, _i, _ref, _ref1;
bestIndex = 1;
bestTotalErrorSquared = Number.MAX_VALUE;
for (i = _i = 1, _ref = values.length - 1; 1 <= _ref ? _i <= _ref : _i >= _ref; i = 1 <= _ref ? ++_i : --_i) {
_ref1 = Classifier.splitAt(values, i), left = _ref1.left, right = _ref1.right;
totalErrorSquared = functions.errorSquared(left) + functions.errorSquared(right);
if (totalErrorSquared < bestTotalErrorSquared) {
bestTotalErrorSquared = totalErrorSquared;
bestIndex = i;
bestLeft = left;
bestRight = right;
}
}
splitAt = (values[bestIndex - 1] + values[bestIndex]) / 2;
return {
splitAt: splitAt,
left: bestLeft,
right: bestRight
};
};
Classifier.areAllSame = function(values) {
var firstValue, value, _i, _len;
firstValue = values[0];
for (_i = 0, _len = values.length; _i < _len; _i++) {
value = values[_i];
if (value !== firstValue) {
return false;
}
}
return true;
};
Classifier.findBucketSplits = function(currentSplits, values, targetBucketCount) {
var left, right, splitAt, _ref;
if (values.length < 5 || Classifier.areAllSame(values)) {
return null;
}
_ref = Classifier.optimalSplitFor2Buckets(values), splitAt = _ref.splitAt, left = _ref.left, right = _ref.right;
currentSplits.push(splitAt);
if (currentSplits.length < targetBucketCount) {
Classifier.findBucketSplits(currentSplits, left, targetBucketCount);
Classifier.findBucketSplits(currentSplits, right, targetBucketCount);
}
return currentSplits;
};
Classifier.generateVOptimalBucketer = function(values) {
var bucketer, currentBoundary, i, lastBoundary, max, min, splits, targetBucketCount, _i, _ref, _ref1;
_ref = Classifier.getBucketCountMinMax(values), targetBucketCount = _ref.targetBucketCount, min = _ref.min, max = _ref.max;
values.sort(function(a, b) {
return a - b;
});
splits = [];
Classifier.findBucketSplits(splits, values, targetBucketCount);
splits.sort(function(a, b) {
return a - b;
});
bucketer = [];
currentBoundary = splits[0];
bucketer.push({
value: 'B' + 0,
startOn: null,
endBelow: currentBoundary
});
for (i = _i = 1, _ref1 = splits.length - 1; 1 <= _ref1 ? _i <= _ref1 : _i >= _ref1; i = 1 <= _ref1 ? ++_i : --_i) {
lastBoundary = currentBoundary;
currentBoundary = splits[i];
bucketer.push({
value: 'B' + i,
startOn: lastBoundary,
endBelow: currentBoundary
});
}
bucketer.push({
value: 'B' + splits.length,
startOn: currentBoundary,
endBelow: null
});
return bucketer;
};
Classifier.prototype.discreteizeRow = function(row) {
var bin, feature, index, value, _i, _j, _len, _len1, _ref, _ref1;
_ref = this.features;
for (_i = 0, _len = _ref.length; _i < _len; _i++) {
feature = _ref[_i];
if (feature.type === 'continuous') {
value = row[feature.field];
if (value == null) {
throw new Error("Could not find field " + feature.field + " in " + (JSON.stringify(row)) + ".");
}
_ref1 = feature.bins;
for (index = _j = 0, _len1 = _ref1.length; _j < _len1; index = ++_j) {
bin = _ref1[index];
if (bin.startOn != null) {
if (bin.endBelow != null) {
if ((bin.startOn <= value && value < bin.endBelow)) {
row[feature.field] = bin.value;
break;
}
} else if (bin.startOn <= value) {
row[feature.field] = bin.value;
break;
}
} else if (value < bin.endBelow) {
row[feature.field] = bin.value;
break;
}
}
}
}
return row;
};
return Classifier;
})();
BayesianClassifier = (function(_super) {
__extends(BayesianClassifier, _super);
/*
@class BayesianClassifier
__A Bayesian classifier with non-parametric modeling of distributions using v-optimal bucketing.__
If you look for libraries for Bayesian classification, the primary use case is spam filtering and they assume that
the presence or absence of a word is the only feature you are interested in. This is a more general purpose tool.
*# Features ##
* Works even for bi-modal and other non-normal distributions
* No requirement that you identify the distribution
* Uses [non-parametric modeling](http://en.wikipedia.org/wiki/Non-parametric_statistics)
* Uses v-optimal bucketing so it deals well with outliers and sharp cliffs
* Serialize (`getStateForSaving()`) and deserialize (`newFromSavedState()`) to preserve training between sessions
*# Why the assumption of a normal distribution is bad in some cases ##
The [wikipedia example of using Bayes](https://en.wikipedia.org/wiki/Naive_Bayes_classifier#Sex_classification) tries
to determine if someone was male or female based upon the height, weight
and shoe size. The assumption is that men are generally larger, heavier, and have larger shoe size than women. In the
example, they use the mean and variance of the male-only and female-only populations to characterize those
distributions. This works because these characteristics are generally normally distributed **and the distribution for
men is generally to the right of the distribution for women**.
However, let's ask a group of folks who work together if they consider themselves a team and let's try to use the size
of the group as a feature to predict what a new group would say. If the group is very small (1-2 people), they are
less likely to consider themselves a team (partnership maybe), but if they are too large (say > 10), they are also
unlikely to refer to themselves as a team. The non-team distribution is bimodal, looking at its mean and variance
completely mis-characterizes it. Also, the distribution is zero bound so it's likely to be asymmetric, which also
poses problems for a normal distribution assumption.
*# So what do we do instead? ##
This classifier uses the actual sampled percentage for buckets of the data. This approach is often referred to
as "building a non-parametric model", although "un-named distribution" strikes me a better label.
**Pros/Cons**. The use of a non-parametric approach will allow us to deal with non-normal distributions (asymmetric,
bimodal, etc.) without ever having to identify which nominal distribution is the best fit or having to ask the user
(who may not know) what distribution to use. The downside to this approach is that it generally requires a larger
training set. You will need to experiment to determine how small is too small for your situation.
This approach is hinted at in the [wikipedia article on Bayesian classifiers](https://en.wikipedia.org/wiki/Naive_Bayes_classifier)
as "binning to discretize the feature values, to obtain a new set of Bernoulli-distributed features". However, this
classifier does not create new separate Bernoulli features for each bin. Rather, it creates a mapping function from a feature
value to a probability indicating how often the feature value is coincident with a particular outputField value. This mapping
function is different for each bin.
*# V-optimal bucketing ##
There are two common approaches to bucketing:
1. Make each bucket be equal in width along the x-axis (like we would for a histogram) (equi-width)
2. Make each bucket have roughly the same number of data points (equi-depth)
It turns out neither of the above works out well unless the training set is relatively large. Rather, there is an
approach called [v-optimal bucketing](http://en.wikipedia.org/wiki/V-optimal_histograms) which attempts to find the
optimal boundaries in the data. The basic idea is to look for the splits that provide the minimum total error-squared
where the "error" for each point is the distance of that point from the arithmetic mean. This classifier uses v-optimal
bucketing when the training set hass 144 or fewer rows. Above that it switches to equi-depth bucketing. Note, I only
evaluated a single scenario (Rally RealTeam), but 144 was the point where equi-depth started to provide as-good results as
v-optimal bucketing. Note, in my test, much larger sets had moderately _better_ results with equi-depth bucketing.
The algorithm used here for v-optimal bucketing is slightly inspired by
[this non-recursive code](http://www.mathcs.emory.edu/~cheung/Courses/584-StreamDB/Syllabus/06-Histograms/v-opt3.html).
However, the implementation here is recursive and I've made some different choices about when to terminate the splitting. To
understand the essence of the algorithm used, you need only look at the 9 lines of code in the `findBucketSplits()` function.
The `optimalSplitFor2Buckets()` function will split the values into two buckets. It tries each possible split
starting with only one in the bucket on the left all the way down to a split with only one in the bucket on the right.
One of the design choices I made for this algorithm means that you can't precicely control the number of buckets. It
also seems to have a tendency to create very lopsided bucketing breakdowns. The latter may be the reason that
equi-depth bucketing has better results when there are hundreds of rows in the training
set. We may wish to revisit this algorithm at a later time because my instinct is that there is probably some
definition of "optimal" that is at least as good as equi-depth for large training sets. I suspect the current algorith
favors splitting the left. A better algorithm wouldn't have a left and a right. It would find the optimal split for
each of the current splits and take the one that gave the entire new splitting regime the lowest overall error.
This new algorithm would be much more computationally intensive but for small training sets, I don't think it will
be a deal breaker and we can always use equi-depth once for larger sets.
*# Simple example ##
First we need to require the classifier.
{BayesianClassifier} = require('../')
Before we start, let's take a look at our training set. The assumption is that we think TeamSize and HasChildProject
will be predictors for RealTeam.
trainingSet = [
{TeamSize: 5, HasChildProject: 0, RealTeam: 1},
{TeamSize: 3, HasChildProject: 1, RealTeam: 0},
{TeamSize: 3, HasChildProject: 1, RealTeam: 1},
{TeamSize: 1, HasChildProject: 0, RealTeam: 0},
{TeamSize: 2, HasChildProject: 1, RealTeam: 0},
{TeamSize: 2, HasChildProject: 0, RealTeam: 0},
{TeamSize: 15, HasChildProject: 1, RealTeam: 0},
{TeamSize: 27, HasChildProject: 1, RealTeam: 0},
{TeamSize: 13, HasChildProject: 1, RealTeam: 1},
{TeamSize: 7, HasChildProject: 0, RealTeam: 1},
{TeamSize: 7, HasChildProject: 0, RealTeam: 0},
{TeamSize: 9, HasChildProject: 1, RealTeam: 1},
{TeamSize: 6, HasChildProject: 0, RealTeam: 1},
{TeamSize: 5, HasChildProject: 0, RealTeam: 1},
{TeamSize: 5, HasChildProject: 0, RealTeam: 0},
]
Now, let's set up a simple config indicating our assumptions. Note how the type for TeamSize is 'continuous'
whereas the type for HasChildProject is 'discrete' eventhough a number is stored. Continuous types must be numbers
but discrete types can either be numbers or strings.
config =
outputField: "RealTeam"
features: [
{field: 'TeamSize', type: 'continuous'},
{field: 'HasChildProject', type: 'discrete'}
]
We can now instantiate the classifier with that config,
classifier = new BayesianClassifier(config)
and pass in our training set.
percentWins = classifier.train(trainingSet)
The call to `train()` returns the percentage of times that the trained classifier gets the right answer for the training
set. This should usually be pretty high. Anything below say, 70% and you probably don't have the right "features"
in your training set or you don't have enough training set data. Our made up exmple is a borderline case.
console.log(percentWins)
* 0.7333333333333333
Now, let's see how the trained classifier is used to predict "RealTeam"-ness. We simply pass in an object with
fields for each of our features. A very small team with child projects are definitely not a RealTeam.
console.log(classifier.predict({TeamSize: 1, HasChildProject: 1}))
* 0
However, a mid-sized project with no child projects most certainly is a RealTeam.
console.log(classifier.predict({TeamSize: 7, HasChildProject: 0}))
* 1
Here is a less obvious case, with one indicator going one way (too big) and another going the other way (no child projects).
console.log(classifier.predict({TeamSize: 29, HasChildProject: 0}))
* 0
If you want to know the strength of the prediction, you can pass in `true` as the second parameter to the `predict()` method.
console.log(classifier.predict({TeamSize: 29, HasChildProject: 0}, true))
* { '0': 0.6956521739130435, '1': 0.30434782608695654 }
We're only 69.6% sure this is not a RealTeam. Notice how the keys for the output are strings eventhough we passed in values
of type Number for the RealTeam field in our training set. We had no choice in this case because keys of JavaScript
Objects must be strings. However, the classifier is smart enough to know that you wanted
Like the Lumenize calculators, you can save and restore the state of a trained classifier.
savedState = classifier.getStateForSaving('some meta data')
newClassifier = BayesianClassifier.newFromSavedState(savedState)
console.log(newClassifier.meta)
* some meta data
It will make the same predictions.
console.log(newClassifier.predict({TeamSize: 29, HasChildProject: 0}, true))
* { '0': 0.6956521739130435, '1': 0.30434782608695654 }
*/
function BayesianClassifier(userConfig) {
this.userConfig = userConfig;
/*
@constructor
@param {Object} userConfig See Config options for details.
@cfg {String} outputField String indicating which field in the training set is what we are trying to predict
@cfg {Object[]} features Array of Maps which specifies the fields to use as features. Each row in the array should
be in the form of `{field: <fieldName>, type: <'continuous' | 'discrete'>}`. Note, that you can even declare Number type
fields as 'discrete'. It is preferable to do this if you know that it can only be one of a hand full of values
(0 vs 1 for example).
**WARNING: If you choose 'discrete' for the feature type, then ALL possible values for that feature must appear
in the training set. If the classifier is asked to make a prediction with a value that it has never seen
before, it will fail catostrophically.**
*/
this.config = utils.clone(this.userConfig);
this.outputField = this.config.outputField;
this.features = this.config.features;
}
BayesianClassifier.prototype.train = function(userSuppliedTrainingSet) {
/*
@method train
Train the classifier with a training set.
@return {Number} The percentage of time that the trained classifier returns the expected outputField for the rows
in the training set. If this is low (say below 70%), you need more predictive fields and/or more data in your
training set.
@param {Object[]} userSuppliedTrainingSet an Array of Maps containing a field for the outputField as well as a field
for each of the features specified in the config.
*/
var bin, bucketGenerator, bucketer, countForThisValue, denominator, denominatorCell, dimensions, feature, featureCube, featureValues, filter, loses, n, numerator, numeratorCell, outputDimension, outputValue, outputValuesCube, percentWins, prediction, row, trainingSet, value, values, wins, _i, _j, _k, _l, _len, _len1, _len2, _len3, _len4, _len5, _len6, _len7, _m, _n, _o, _p, _ref, _ref1, _ref2, _ref3, _ref4, _ref5;
trainingSet = utils.clone(userSuppliedTrainingSet);
outputDimension = [
{
field: this.outputField
}
];
outputValuesCube = new OLAPCube({
dimensions: outputDimension
}, trainingSet);
this.outputValues = outputValuesCube.getDimensionValues(this.outputField);
this.outputFieldTypeIsNumber = true;
_ref = this.outputValues;
for (_i = 0, _len = _ref.length; _i < _len; _i++) {
value = _ref[_i];
if (utils.type(value) !== 'number') {
this.outputFieldTypeIsNumber = false;
}
}
n = trainingSet.length;
filter = {};
this.baseProbabilities = {};
_ref1 = this.outputValues;
for (_j = 0, _len1 = _ref1.length; _j < _len1; _j++) {
outputValue = _ref1[_j];
filter[this.outputField] = outputValue;
countForThisValue = outputValuesCube.getCell(filter)._count;
this.baseProbabilities[outputValue] = countForThisValue / n;
}
if (n >= 144) {
bucketGenerator = Classifier.generateConstantQuantityBucketer;
} else {
bucketGenerator = Classifier.generateVOptimalBucketer;
}
_ref2 = this.features;
for (_k = 0, _len2 = _ref2.length; _k < _len2; _k++) {
feature = _ref2[_k];
if (feature.type === 'continuous') {
values = (function() {
var _l, _len3, _results;
_results = [];
for (_l = 0, _len3 = trainingSet.length; _l < _len3; _l++) {
row = trainingSet[_l];
_results.push(row[feature.field]);
}
return _results;
})();
bucketer = bucketGenerator(values);
feature.bins = bucketer;
} else if (feature.type === 'discrete') {
} else {
throw new Error("Unrecognized feature type: " + feature.type + ".");
}
}
for (_l = 0, _len3 = trainingSet.length; _l < _len3; _l++) {
row = trainingSet[_l];
this.discreteizeRow(row);
}
_ref3 = this.features;
for (_m = 0, _len4 = _ref3.length; _m < _len4; _m++) {
feature = _ref3[_m];
dimensions = [
{
field: this.outputField,
keepTotals: true
}
];
dimensions.push({
field: feature.field
});
featureCube = new OLAPCube({
dimensions: dimensions
}, trainingSet);
featureValues = featureCube.getDimensionValues(feature.field);
if (feature.type === 'discrete') {
feature.bins = (function() {
var _len5, _n, _results;
_results = [];
for (_n = 0, _len5 = featureValues.length; _n < _len5; _n++) {
value = featureValues[_n];
_results.push({
value: value
});
}
return _results;
})();
}
_ref4 = feature.bins;
for (_n = 0, _len5 = _ref4.length; _n < _len5; _n++) {
bin = _ref4[_n];
bin.probabilities = {};
_ref5 = this.outputValues;
for (_o = 0, _len6 = _ref5.length; _o < _len6; _o++) {
outputValue = _ref5[_o];
filter = {};
filter[feature.field] = bin.value;
denominatorCell = featureCube.getCell(filter);
if (denominatorCell != null) {
denominator = denominatorCell._count;
} else {
denominator = 0;
}
filter[this.outputField] = outputValue;
numeratorCell = featureCube.getCell(filter);
numerator = (numeratorCell != null ? numeratorCell._count : void 0) | 0;
bin.probabilities[outputValue] = numerator / denominator;
}
}
}
trainingSet = utils.clone(userSuppliedTrainingSet);
wins = 0;
loses = 0;
for (_p = 0, _len7 = trainingSet.length; _p < _len7; _p++) {
row = trainingSet[_p];
prediction = this.predict(row);
if (prediction === row[this.outputField]) {
wins++;
} else {
loses++;
}
}
percentWins = wins / (wins + loses);
return percentWins;
};
BayesianClassifier.prototype.predict = function(row, returnProbabilities) {
var bin, feature, matchingBin, max, outputValue, outputValueForMax, probabilities, probability, _i, _j, _len, _len1, _ref, _ref1, _ref2;
if (returnProbabilities == null) {
returnProbabilities = false;
}
/*
@method predict
Use the trained classifier to make a prediction.
@return {String|Number|Object} If returnProbabilities is false (the default), then it will return the prediction.
If returnProbabilities is true, then it will return an Object indicating the probability for each possible
outputField value.
@param {Object} row an Object containing a field for each of the features specified by the config.
@param {Boolean} [returnProbabilities = false] If true, then the output will indicate the probabilities of each
possible outputField value. Otherwise, the output of a call to `predict()` will return the predicted value with
the highest probability.
*/
row = this.discreteizeRow(row);
probabilities = {};
_ref = this.baseProbabilities;
for (outputValue in _ref) {
probability = _ref[outputValue];
probabilities[outputValue] = probability;
}
_ref1 = this.features;
for (_i = 0, _len = _ref1.length; _i < _len; _i++) {
feature = _ref1[_i];
matchingBin = null;
_ref2 = feature.bins;
for (_j = 0, _len1 = _ref2.length; _j < _len1; _j++) {
bin = _ref2[_j];
if (row[feature.field] === bin.value) {
matchingBin = bin;
break;
}
}
if (matchingBin == null) {
throw new Error("No matching bin for " + feature.field + "=" + row[feature.field] + " in the training set.");
}
for (outputValue in probabilities) {
probability = probabilities[outputValue];
probabilities[outputValue] = probability * matchingBin.probabilities[outputValue] / (probability * matchingBin.probabilities[outputValue] + (1 - probability) * (1 - matchingBin.probabilities[outputValue]));
}
}
max = 0;
outputValueForMax = null;
for (outputValue in probabilities) {
probability = probabilities[outputValue];
if (probability > max) {
max = probability;
outputValueForMax = outputValue;
}
}
if (returnProbabilities) {
return probabilities;
} else {
if (this.outputFieldTypeIsNumber) {
return Number(outputValueForMax);
} else {
return outputValueForMax;
}
}
};
BayesianClassifier.prototype.getStateForSaving = function(meta) {
/*
@method getStateForSaving
Enables saving the state of a Classifier.
See the bottom of the "Simple example" for example code of using this
saving and restoring functionality.
@param {Object} [meta] An optional parameter that will be added to the serialized output and added to the meta field
within the deserialized Classifier
@return {Object} Returns an Ojbect representing the state of the Classifier. This Object is suitable for saving to
an object store. Use the static method `newFromSavedState()` with this Object as the parameter to reconstitute the Classifier.
*/
var out;
out = {
userConfig: this.userConfig,
outputField: this.outputField,
outputValues: this.outputValues,
outputFieldTypeIsNumber: this.outputFieldTypeIsNumber,
baseProbabilities: this.baseProbabilities,
features: this.features
};
if (meta != null) {
out.meta = meta;
}
return out;
};
BayesianClassifier.newFromSavedState = function(p) {
/*
@method newFromSavedState
Deserializes a previously stringified Classifier and returns a new Classifier.
See the bottom of the "Simple example" for example code of using this
saving and restoring functionality.
@static
@param {String/Object} p A String or Object from a previously saved Classifier state
@return {Classifier}
*/
var classifier;
if (utils.type(p) === 'string') {
p = JSON.parse(p);
}
classifier = new BayesianClassifier(p.userConfig);
classifier.outputField = p.outputField;
classifier.outputValues = p.outputValues;
classifier.outputFieldTypeIsNumber = p.outputFieldTypeIsNumber;
classifier.baseProbabilities = p.baseProbabilities;
classifier.features = p.features;
if (p.meta != null) {
classifier.meta = p.meta;
}
return classifier;
};
return BayesianClassifier;
})(Classifier);
exports.Classifier = Classifier;
exports.BayesianClassifier = BayesianClassifier;
}).call(this);