lumenize
Version:
Illuminating the forest AND the trees in your data.
727 lines (639 loc) • 30.5 kB
JavaScript
// Generated by CoffeeScript 1.10.0
(function() {
var BayesianClassifier, Classifier, OLAPCube, functions, utils,
extend = function(child, parent) { for (var key in parent) { if (hasProp.call(parent, key)) child[key] = parent[key]; } function ctor() { this.constructor = child; } ctor.prototype = parent.prototype; child.prototype = new ctor(); child.__super__ = parent.prototype; return child; },
hasProp = {}.hasOwnProperty;
functions = require('./functions').functions;
utils = require('tztime').utils;
OLAPCube = require('./OLAPCube').OLAPCube;
Classifier = (function() {
function Classifier() {}
/*
@class Classifier
__Base class for all Classifiers__
See individual subclasses for usage details
*/
Classifier.getBucketCountMinMax = function(values) {
var max, min, targetBucketCount;
targetBucketCount = Math.floor(Math.sqrt(values.length)) + 1;
if (targetBucketCount < 3) {
throw new Error("Need more training data");
}
min = functions.min(values);
max = functions.max(values);
return {
targetBucketCount: targetBucketCount,
min: min,
max: max
};
};
Classifier.generateConstantWidthBucketer = function(values) {
var bucketSize, bucketer, i, j, max, min, ref, ref1, targetBucketCount;
ref = Classifier.getBucketCountMinMax(values), targetBucketCount = ref.targetBucketCount, min = ref.min, max = ref.max;
bucketSize = (max - min) / targetBucketCount;
bucketer = [];
bucketer.push({
value: 'B' + 0,
startOn: null,
endBelow: min + bucketSize
});
for (i = j = 1, ref1 = targetBucketCount - 2; 1 <= ref1 ? j <= ref1 : j >= ref1; i = 1 <= ref1 ? ++j : --j) {
bucketer.push({
value: 'B' + i,
startOn: min + bucketSize * i,
endBelow: min + bucketSize * (i + 1)
});
}
bucketer.push({
value: 'B' + (targetBucketCount - 1),
startOn: min + bucketSize * (targetBucketCount - 1),
endBelow: null
});
return bucketer;
};
Classifier.generateConstantQuantityBucketer = function(values) {
var bucketSize, bucketer, currentBoundary, i, j, lastBoundary, max, min, ref, ref1, targetBucketCount;
ref = Classifier.getBucketCountMinMax(values), targetBucketCount = ref.targetBucketCount, min = ref.min, max = ref.max;
bucketSize = 100 / targetBucketCount;
bucketer = [];
currentBoundary = functions.percentileCreator(bucketSize)(values);
bucketer.push({
value: 'B' + 0,
startOn: null,
endBelow: currentBoundary
});
for (i = j = 1, ref1 = targetBucketCount - 2; 1 <= ref1 ? j <= ref1 : j >= ref1; i = 1 <= ref1 ? ++j : --j) {
lastBoundary = currentBoundary;
currentBoundary = functions.percentileCreator(bucketSize * (i + 1))(values);
bucketer.push({
value: 'B' + i,
startOn: lastBoundary,
endBelow: currentBoundary
});
}
bucketer.push({
value: 'B' + (targetBucketCount - 1),
startOn: currentBoundary,
endBelow: null
});
return bucketer;
};
Classifier.splitAt = function(values, index) {
var left, right;
left = values.slice(0, index);
right = values.slice(index);
return {
left: left,
right: right
};
};
Classifier.splitAtValue = function(values, split) {
var j, left, len, right, value;
left = [];
right = [];
for (j = 0, len = values.length; j < len; j++) {
value = values[j];
if (value < split) {
left.push(value);
} else {
right.push(value);
}
}
return {
left: left,
right: right
};
};
Classifier.optimalSplitFor2Buckets = function(values) {
var bestIndex, bestLeft, bestRight, bestTotalErrorSquared, i, j, left, ref, ref1, right, splitAt, totalErrorSquared;
bestIndex = 1;
bestTotalErrorSquared = Number.MAX_VALUE;
for (i = j = 1, ref = values.length - 1; 1 <= ref ? j <= ref : j >= ref; i = 1 <= ref ? ++j : --j) {
ref1 = Classifier.splitAt(values, i), left = ref1.left, right = ref1.right;
totalErrorSquared = functions.errorSquared(left) + functions.errorSquared(right);
if (totalErrorSquared < bestTotalErrorSquared) {
bestTotalErrorSquared = totalErrorSquared;
bestIndex = i;
bestLeft = left;
bestRight = right;
}
}
splitAt = (values[bestIndex - 1] + values[bestIndex]) / 2;
return {
splitAt: splitAt,
left: bestLeft,
right: bestRight
};
};
Classifier.areAllSame = function(values) {
var firstValue, j, len, value;
firstValue = values[0];
for (j = 0, len = values.length; j < len; j++) {
value = values[j];
if (value !== firstValue) {
return false;
}
}
return true;
};
Classifier.findBucketSplits = function(currentSplits, values, targetBucketCount, originalValues) {
var errorSquared, j, left, left2, len, maxErrorSquared, maxErrorSquaredValues, ref, ref1, ref2, right, right2, split, splitAt;
if (originalValues == null) {
originalValues = values.slice(0);
}
if (values.length < 5 || Classifier.areAllSame(values)) {
return null;
}
ref = Classifier.optimalSplitFor2Buckets(values), splitAt = ref.splitAt, left = ref.left, right = ref.right;
currentSplits.push(splitAt);
currentSplits.sort(function(a, b) {
return a - b;
});
while (currentSplits.length < targetBucketCount - 1) {
right = originalValues;
maxErrorSquared = 0;
maxErrorSquaredValues = null;
for (j = 0, len = currentSplits.length; j < len; j++) {
split = currentSplits[j];
ref1 = Classifier.splitAtValue(right, split), left = ref1.left, right = ref1.right;
errorSquared = functions.errorSquared(left);
if (errorSquared > maxErrorSquared) {
maxErrorSquared = errorSquared;
maxErrorSquaredValues = left;
}
}
errorSquared = functions.errorSquared(right);
if (errorSquared > maxErrorSquared) {
maxErrorSquared = errorSquared;
maxErrorSquaredValues = right;
}
ref2 = Classifier.optimalSplitFor2Buckets(maxErrorSquaredValues), splitAt = ref2.splitAt, left2 = ref2.left2, right2 = ref2.right2;
currentSplits.push(splitAt);
currentSplits.sort(function(a, b) {
return a - b;
});
}
return currentSplits;
};
Classifier.generateVOptimalBucketer = function(values) {
var bucketer, currentBoundary, i, j, lastBoundary, max, min, ref, ref1, splits, targetBucketCount;
ref = Classifier.getBucketCountMinMax(values), targetBucketCount = ref.targetBucketCount, min = ref.min, max = ref.max;
values.sort(function(a, b) {
return a - b;
});
splits = [];
Classifier.findBucketSplits(splits, values, targetBucketCount);
splits.sort(function(a, b) {
return a - b;
});
bucketer = [];
currentBoundary = splits[0];
bucketer.push({
value: 'B' + 0,
startOn: null,
endBelow: currentBoundary
});
for (i = j = 1, ref1 = splits.length - 1; 1 <= ref1 ? j <= ref1 : j >= ref1; i = 1 <= ref1 ? ++j : --j) {
lastBoundary = currentBoundary;
currentBoundary = splits[i];
bucketer.push({
value: 'B' + i,
startOn: lastBoundary,
endBelow: currentBoundary
});
}
bucketer.push({
value: 'B' + splits.length,
startOn: currentBoundary,
endBelow: null
});
return bucketer;
};
Classifier.prototype.discreteizeRow = function(row) {
var bin, feature, index, j, k, len, len1, ref, ref1, value;
ref = this.features;
for (j = 0, len = ref.length; j < len; j++) {
feature = ref[j];
if (feature.type === 'continuous') {
value = row[feature.field];
if (value == null) {
throw new Error("Could not find field " + feature.field + " in " + (JSON.stringify(row)) + ".");
}
ref1 = feature.bins;
for (index = k = 0, len1 = ref1.length; k < len1; index = ++k) {
bin = ref1[index];
if (bin.startOn != null) {
if (bin.endBelow != null) {
if ((bin.startOn <= value && value < bin.endBelow)) {
row[feature.field] = bin.value;
break;
}
} else if (bin.startOn <= value) {
row[feature.field] = bin.value;
break;
}
} else if (value < bin.endBelow) {
row[feature.field] = bin.value;
break;
}
}
}
}
return row;
};
return Classifier;
})();
BayesianClassifier = (function(superClass) {
extend(BayesianClassifier, superClass);
/*
@class BayesianClassifier
__A Bayesian classifier with non-parametric modeling of distributions using v-optimal bucketing.__
If you look for libraries for Bayesian classification, the primary use case is spam filtering and they assume that
the presence or absence of a word is the only feature you are interested in. This is a more general purpose tool.
## Features ##
* Works even for bi-modal and other non-normal distributions
* No requirement that you identify the distribution
* Uses [non-parametric modeling](http://en.wikipedia.org/wiki/Non-parametric_statistics)
* Uses v-optimal bucketing so it deals well with outliers and sharp cliffs
* Serialize (`getStateForSaving()`) and deserialize (`newFromSavedState()`) to preserve training between sessions
## Why the assumption of a normal distribution is bad in some cases ##
The [wikipedia example of using Bayes](https://en.wikipedia.org/wiki/Naive_Bayes_classifier#Sex_classification) tries
to determine if someone was male or female based upon the height, weight
and shoe size. The assumption is that men are generally larger, heavier, and have larger shoe size than women. In the
example, they use the mean and variance of the male-only and female-only populations to characterize those
distributions. This works because these characteristics are generally normally distributed **and the distribution for
men is generally to the right of the distribution for women**.
However, let's ask a group of folks who work together if they consider themselves a team and let's try to use the size
of the group as a feature to predict what a new group would say. If the group is very small (1-2 people), they are
less likely to consider themselves a team (partnership maybe), but if they are too large (say > 10), they are also
unlikely to refer to themselves as a team. The non-team distribution is bimodal, looking at its mean and variance
completely mis-characterizes it. Also, the distribution is zero bound so it's likely to be asymmetric, which also
poses problems for a normal distribution assumption.
## So what do we do instead? ##
This classifier uses the actual values (in buckets) rather than characterize the distribution as "normal", "log-normal", etc.
This approach is often referred to as "building a non-parametric model".
**Pros/Cons**. The use of a non-parametric approach will allow us to deal with non-normal distributions (asymmetric,
bimodal, etc.) without ever having to identify which nominal distribution is the best fit or having to ask the user
(who may not know) what distribution to use. The downside to this approach is that it generally requires a larger
training set. You will need to experiment to determine how small is too small for your situation.
This approach is hinted at in the [wikipedia article on Bayesian classifiers](https://en.wikipedia.org/wiki/Naive_Bayes_classifier)
as "binning to discretize the feature values, to obtain a new set of Bernoulli-distributed features". However, this
classifier does not create new separate Bernoulli features for each bin. Rather, it creates a mapping function from a feature
value to a probability indicating how often the feature value is coincident with a particular outputField value. This mapping
function is different for each bin.
## V-optimal bucketing ##
There are two common approaches to bucketing:
1. Make each bucket be equal in width along the x-axis (like we would for a histogram) (equi-width)
2. Make each bucket have roughly the same number of data points (equi-depth)
It turns out neither of the above works out well unless the training set is relatively large. Rather, there is an
approach called [v-optimal bucketing](http://en.wikipedia.org/wiki/V-optimal_histograms) which attempts to find the
optimal boundaries in the data. The basic idea is to look for the splits that provide the minimum total error-squared
where the "error" for each point is the distance of that point from the arithmetic mean. This classifier uses v-optimal
bucketing when the training set has 144 or fewer rows. Above that it switches to equi-depth bucketing. Note, I only
evaluated a single scenario (Rally RealTeam), but 144 was the point where equi-depth started to provide as-good results as
v-optimal bucketing. Note, in my test, much larger sets had moderately _better_ results with equi-depth bucketing.
That said, the 144 cutoff was determined with an older version of the v-optimal bucketing. I've since fixed that old
algorithms tendency to produce lopsided distributions. It may very well be possible for v-optimal to be better for
even larger numbers of data points. I need to run a new experiment to see.
The algorithm used here for v-optimal bucketing is slightly inspired by
[this](http://www.mathcs.emory.edu/~cheung/Courses/584-StreamDB/Syllabus/06-Histograms/v-opt3.html).
However, I've made some different choices about when to terminate the splitting and deciding what portion to split again. To
understand the essence of the algorithm used, you need only look at the 9 lines of code in the `findBucketSplits()` function.
The `optimalSplitFor2Buckets()` function will split the values into two buckets. It tries each possible split
starting with only one in the bucket on the left all the way down to a split with only one in the bucket on the right.
It then figures out which split has the highest error and splits that again until we have the target number of splits.
## Simple example ##
First we need to require the classifier.
{BayesianClassifier} = require('../')
Before we start, let's take a look at our training set. The assumption is that we think TeamSize and HasChildProject
will be predictors for RealTeam.
trainingSet = [
{TeamSize: 5, HasChildProject: 0, RealTeam: 1},
{TeamSize: 3, HasChildProject: 1, RealTeam: 0},
{TeamSize: 3, HasChildProject: 1, RealTeam: 1},
{TeamSize: 1, HasChildProject: 0, RealTeam: 0},
{TeamSize: 2, HasChildProject: 1, RealTeam: 0},
{TeamSize: 2, HasChildProject: 0, RealTeam: 0},
{TeamSize: 15, HasChildProject: 1, RealTeam: 0},
{TeamSize: 27, HasChildProject: 1, RealTeam: 0},
{TeamSize: 13, HasChildProject: 1, RealTeam: 1},
{TeamSize: 7, HasChildProject: 0, RealTeam: 1},
{TeamSize: 7, HasChildProject: 0, RealTeam: 0},
{TeamSize: 9, HasChildProject: 1, RealTeam: 1},
{TeamSize: 6, HasChildProject: 0, RealTeam: 1},
{TeamSize: 5, HasChildProject: 0, RealTeam: 1},
{TeamSize: 5, HasChildProject: 0, RealTeam: 0},
]
Now, let's set up a simple config indicating our assumptions. Note how the type for TeamSize is 'continuous'
whereas the type for HasChildProject is 'discrete' eventhough a number is stored. Continuous types must be numbers
but discrete types can either be numbers or strings.
config =
outputField: "RealTeam"
features: [
{field: 'TeamSize', type: 'continuous'},
{field: 'HasChildProject', type: 'discrete'}
]
We can now instantiate the classifier with that config,
classifier = new BayesianClassifier(config)
and pass in our training set.
percentWins = classifier.train(trainingSet)
The call to `train()` returns the percentage of times that the trained classifier gets the right answer for the training
set. This should usually be pretty high. Anything below say, 70% and you probably don't have the right "features"
in your training set or you don't have enough training set data. Our made up exmple is a borderline case.
console.log(percentWins)
* 0.7333333333333333
Now, let's see how the trained classifier is used to predict "RealTeam"-ness. We simply pass in an object with
fields for each of our features. A very small team with child projects are definitely not a RealTeam.
console.log(classifier.predict({TeamSize: 1, HasChildProject: 1}))
* 0
However, a mid-sized project with no child projects most certainly is a RealTeam.
console.log(classifier.predict({TeamSize: 7, HasChildProject: 0}))
* 1
Here is a less obvious case, with one indicator going one way (the right size) and another going the other way (has child projects).
console.log(classifier.predict({TeamSize: 5, HasChildProject: 1}))
* 1
If you want to know the strength of the prediction, you can pass in `true` as the second parameter to the `predict()` method.
console.log(classifier.predict({TeamSize: 5, HasChildProject: 1}, true))
* { '0': 0.3786982248520709, '1': 0.6213017751479291 }
We're only 62.1% sure this is a RealTeam. Notice how the keys for the output are strings eventhough we passed in values
of type Number for the RealTeam field in our training set. We had no choice in this case because keys of JavaScript
Objects must be strings. However, the classifier is smart enough to convert it back to the correct type if you call
it without passing in true for the second parameter.
Like the Lumenize calculators, you can save and restore the state of a trained classifier.
savedState = classifier.getStateForSaving('some meta data')
newClassifier = BayesianClassifier.newFromSavedState(savedState)
console.log(newClassifier.meta)
* some meta data
It will make the same predictions.
console.log(newClassifier.predict({TeamSize: 5, HasChildProject: 1}, true))
* { '0': 0.3786982248520709, '1': 0.6213017751479291 }
*/
function BayesianClassifier(userConfig) {
this.userConfig = userConfig;
/*
@constructor
@param {Object} userConfig See Config options for details.
@cfg {String} outputField String indicating which field in the training set is what we are trying to predict
@cfg {Object[]} features Array of Maps which specifies the fields to use as features. Each row in the array should
be in the form of `{field: <fieldName>, type: <'continuous' | 'discrete'>}`. Note, that you can even declare Number type
fields as 'discrete'. It is preferable to do this if you know that it can only be one of a hand full of values
(0 vs 1 for example).
**WARNING: If you choose 'discrete' for the feature type, then ALL possible values for that feature must appear
in the training set. If the classifier is asked to make a prediction with a value that it has never seen
before, it will fail catostrophically.**
*/
this.config = utils.clone(this.userConfig);
this.outputField = this.config.outputField;
this.features = this.config.features;
}
BayesianClassifier.prototype.train = function(userSuppliedTrainingSet) {
/*
@method train
Train the classifier with a training set.
@return {Number} The percentage of time that the trained classifier returns the expected outputField for the rows
in the training set. If this is low (say below 70%), you need more predictive fields and/or more data in your
training set.
@param {Object[]} userSuppliedTrainingSet an Array of Maps containing a field for the outputField as well as a field
for each of the features specified in the config.
*/
var bin, bucketGenerator, bucketer, countForThisValue, denominator, denominatorCell, dimensions, feature, featureCube, featureValues, filter, j, k, l, len, len1, len2, len3, len4, len5, len6, len7, loses, m, n, numerator, numeratorCell, o, outputDimension, outputValue, outputValuesCube, percentWins, prediction, q, r, ref, ref1, ref2, ref3, ref4, ref5, row, s, trainingSet, value, values, wins;
trainingSet = utils.clone(userSuppliedTrainingSet);
outputDimension = [
{
field: this.outputField
}
];
outputValuesCube = new OLAPCube({
dimensions: outputDimension
}, trainingSet);
this.outputValues = outputValuesCube.getDimensionValues(this.outputField);
this.outputFieldTypeIsNumber = true;
ref = this.outputValues;
for (j = 0, len = ref.length; j < len; j++) {
value = ref[j];
if (utils.type(value) !== 'number') {
this.outputFieldTypeIsNumber = false;
}
}
n = trainingSet.length;
filter = {};
this.baseProbabilities = {};
ref1 = this.outputValues;
for (k = 0, len1 = ref1.length; k < len1; k++) {
outputValue = ref1[k];
filter[this.outputField] = outputValue;
countForThisValue = outputValuesCube.getCell(filter)._count;
this.baseProbabilities[outputValue] = countForThisValue / n;
}
if (n >= 144) {
bucketGenerator = Classifier.generateConstantQuantityBucketer;
} else {
bucketGenerator = Classifier.generateVOptimalBucketer;
}
ref2 = this.features;
for (l = 0, len2 = ref2.length; l < len2; l++) {
feature = ref2[l];
if (feature.type === 'continuous') {
values = (function() {
var len3, m, results;
results = [];
for (m = 0, len3 = trainingSet.length; m < len3; m++) {
row = trainingSet[m];
results.push(row[feature.field]);
}
return results;
})();
bucketer = bucketGenerator(values);
feature.bins = bucketer;
} else if (feature.type === 'discrete') {
} else {
throw new Error("Unrecognized feature type: " + feature.type + ".");
}
}
for (m = 0, len3 = trainingSet.length; m < len3; m++) {
row = trainingSet[m];
this.discreteizeRow(row);
}
ref3 = this.features;
for (o = 0, len4 = ref3.length; o < len4; o++) {
feature = ref3[o];
dimensions = [
{
field: this.outputField,
keepTotals: true
}
];
dimensions.push({
field: feature.field
});
featureCube = new OLAPCube({
dimensions: dimensions
}, trainingSet);
featureValues = featureCube.getDimensionValues(feature.field);
if (feature.type === 'discrete') {
feature.bins = (function() {
var len5, q, results;
results = [];
for (q = 0, len5 = featureValues.length; q < len5; q++) {
value = featureValues[q];
results.push({
value: value
});
}
return results;
})();
}
ref4 = feature.bins;
for (q = 0, len5 = ref4.length; q < len5; q++) {
bin = ref4[q];
bin.probabilities = {};
ref5 = this.outputValues;
for (r = 0, len6 = ref5.length; r < len6; r++) {
outputValue = ref5[r];
filter = {};
filter[feature.field] = bin.value;
denominatorCell = featureCube.getCell(filter);
if (denominatorCell != null) {
denominator = denominatorCell._count;
} else {
denominator = 0;
}
filter[this.outputField] = outputValue;
numeratorCell = featureCube.getCell(filter);
numerator = (numeratorCell != null ? numeratorCell._count : void 0) | 0;
bin.probabilities[outputValue] = numerator / denominator;
}
}
}
trainingSet = utils.clone(userSuppliedTrainingSet);
wins = 0;
loses = 0;
for (s = 0, len7 = trainingSet.length; s < len7; s++) {
row = trainingSet[s];
prediction = this.predict(row);
if (prediction === row[this.outputField]) {
wins++;
} else {
loses++;
}
}
percentWins = wins / (wins + loses);
return percentWins;
};
BayesianClassifier.prototype.predict = function(row, returnProbabilities) {
var bin, feature, j, k, len, len1, matchingBin, max, outputValue, outputValueForMax, probabilities, probability, ref, ref1, ref2;
if (returnProbabilities == null) {
returnProbabilities = false;
}
/*
@method predict
Use the trained classifier to make a prediction.
@return {String|Number|Object} If returnProbabilities is false (the default), then it will return the prediction.
If returnProbabilities is true, then it will return an Object indicating the probability for each possible
outputField value.
@param {Object} row an Object containing a field for each of the features specified by the config.
@param {Boolean} [returnProbabilities = false] If true, then the output will indicate the probabilities of each
possible outputField value. Otherwise, the output of a call to `predict()` will return the predicted value with
the highest probability.
*/
row = this.discreteizeRow(row);
probabilities = {};
ref = this.baseProbabilities;
for (outputValue in ref) {
probability = ref[outputValue];
probabilities[outputValue] = probability;
}
ref1 = this.features;
for (j = 0, len = ref1.length; j < len; j++) {
feature = ref1[j];
matchingBin = null;
ref2 = feature.bins;
for (k = 0, len1 = ref2.length; k < len1; k++) {
bin = ref2[k];
if (row[feature.field] === bin.value) {
matchingBin = bin;
break;
}
}
if (matchingBin == null) {
throw new Error("No matching bin for " + feature.field + "=" + row[feature.field] + " in the training set.");
}
for (outputValue in probabilities) {
probability = probabilities[outputValue];
probabilities[outputValue] = probability * matchingBin.probabilities[outputValue] / (probability * matchingBin.probabilities[outputValue] + (1 - probability) * (1 - matchingBin.probabilities[outputValue]));
}
}
max = 0;
outputValueForMax = null;
for (outputValue in probabilities) {
probability = probabilities[outputValue];
if (probability > max) {
max = probability;
outputValueForMax = outputValue;
}
}
if (returnProbabilities) {
return probabilities;
} else {
if (this.outputFieldTypeIsNumber) {
return Number(outputValueForMax);
} else {
return outputValueForMax;
}
}
};
BayesianClassifier.prototype.getStateForSaving = function(meta) {
/*
@method getStateForSaving
Enables saving the state of a Classifier.
See the bottom of the "Simple example" for example code of using this
saving and restoring functionality.
@param {Object} [meta] An optional parameter that will be added to the serialized output and added to the meta field
within the deserialized Classifier
@return {Object} Returns an Ojbect representing the state of the Classifier. This Object is suitable for saving to
an object store. Use the static method `newFromSavedState()` with this Object as the parameter to reconstitute the Classifier.
*/
var out;
out = {
userConfig: this.userConfig,
outputField: this.outputField,
outputValues: this.outputValues,
outputFieldTypeIsNumber: this.outputFieldTypeIsNumber,
baseProbabilities: this.baseProbabilities,
features: this.features
};
if (meta != null) {
out.meta = meta;
}
return out;
};
BayesianClassifier.newFromSavedState = function(p) {
/*
@method newFromSavedState
Deserializes a previously stringified Classifier and returns a new Classifier.
See the bottom of the "Simple example" for example code of using this
saving and restoring functionality.
@static
@param {String/Object} p A String or Object from a previously saved Classifier state
@return {Classifier}
*/
var classifier;
if (utils.type(p) === 'string') {
p = JSON.parse(p);
}
classifier = new BayesianClassifier(p.userConfig);
classifier.outputField = p.outputField;
classifier.outputValues = p.outputValues;
classifier.outputFieldTypeIsNumber = p.outputFieldTypeIsNumber;
classifier.baseProbabilities = p.baseProbabilities;
classifier.features = p.features;
if (p.meta != null) {
classifier.meta = p.meta;
}
return classifier;
};
return BayesianClassifier;
})(Classifier);
exports.Classifier = Classifier;
exports.BayesianClassifier = BayesianClassifier;
}).call(this);