lumenize
Version:
Illuminating the forest AND the trees in your data.
196 lines (185 loc) • 6.68 kB
JavaScript
// Generated by CoffeeScript 1.7.1
(function() {
var anova, correlate, fDist, functions, normInverseUpper, utils, _ref;
utils = require('tztime').utils;
functions = require('./functions').functions;
_ref = require('./distributions').distributions, fDist = _ref.fDist, normInverseUpper = _ref.normInverseUpper;
correlate = require('./correlate').correlate;
anova = function(rawData, overallPredicate, field, groups, ci) {
var bucket, buckets, data, errorDF, errorMS, errorSS, factorDF, factorF, factorMS, factorP, factorSS, group, histogram, i, index, multiplier, nTimesMeanSquared, overallMean, overallN, overallSum, overallSumSquares, pooledNumerator, pooledStandardDeviation, r, rSquared, rSquaredAdjusted, residual, residualPlot, residuals, row, totalDF, totalSS, value, xStdDev, xValues, y, yStdDev, yValues, _i, _j, _k, _l, _len, _len1, _len2, _len3, _len4, _len5, _len6, _len7, _m, _n, _o, _p, _q, _r, _ref1;
if (ci == null) {
ci = 0.95;
}
/*
@param {Object} groups {label, predicate} This is modified as a side-effect of this function. Many properties are added.
https://onlinecourses.science.psu.edu/stat414/node/218
http://www.calvin.edu/~rpruim/courses/m243/F03/overheads/ANOVAf03.ppt
*/
utils.assert((0 < ci && ci < 1.0), "ci must be between 0.0 and 1.0");
if (overallPredicate != null) {
data = (function() {
var _i, _len, _results;
_results = [];
for (_i = 0, _len = rawData.length; _i < _len; _i++) {
row = rawData[_i];
if (overallPredicate(row) && (row[field] != null)) {
_results.push(row);
}
}
return _results;
})();
} else {
data = rawData;
}
utils.assert(groups.length < data.length, 'After filtering with the overallPredicate, there were fewer rows in the dataset than there were groups');
overallN = 0;
overallSum = 0;
overallSumSquares = 0;
pooledNumerator = 0;
for (_i = 0, _len = groups.length; _i < _len; _i++) {
group = groups[_i];
group.values = (function() {
var _j, _len1, _results;
_results = [];
for (_j = 0, _len1 = data.length; _j < _len1; _j++) {
row = data[_j];
if (group.predicate(row)) {
_results.push(row[field]);
}
}
return _results;
})();
group.sum = functions.sum(group.values);
group.n = group.values.length;
group.sumSquares = functions.sumSquares(group.values);
group.variance = functions.variance(group.values);
group.standardDeviation = Math.sqrt(group.variance);
group.mean = group.sum / group.n;
overallN += group.n;
overallSum += group.sum;
overallSumSquares += group.sumSquares;
pooledNumerator += (group.n - 1) * group.variance;
}
overallMean = overallSum / overallN;
pooledStandardDeviation = Math.sqrt(pooledNumerator / (overallN - groups.length));
multiplier = normInverseUpper((1.0 - ci) / 2);
for (_j = 0, _len1 = groups.length; _j < _len1; _j++) {
group = groups[_j];
group.ciDelta = multiplier * pooledStandardDeviation / Math.sqrt(group.n);
}
residuals = [];
for (_k = 0, _len2 = groups.length; _k < _len2; _k++) {
group = groups[_k];
_ref1 = group.values;
for (_l = 0, _len3 = _ref1.length; _l < _len3; _l++) {
value = _ref1[_l];
residual = group.mean - value;
residuals.push(residual);
}
}
residuals = residuals.sort(function(a, b) {
return a - b;
});
residualPlot = [];
for (index = _m = 0, _len4 = residuals.length; _m < _len4; index = ++_m) {
r = residuals[index];
i = index + 1;
if (i === 1) {
y = 1 - Math.pow(0.5, 1 / residuals.length);
} else if (i === residuals.length) {
y = Math.pow(0.5, 1 / residuals.length);
} else {
y = (i - 0.3175) / (residuals.length + 0.365);
}
y = y - 0.5;
if (y === 0) {
y = 0;
} else {
y = Math.abs(y) * y;
}
residualPlot.push({
x: r,
y: y
});
}
xValues = (function() {
var _len5, _n, _results;
_results = [];
for (_n = 0, _len5 = residualPlot.length; _n < _len5; _n++) {
r = residualPlot[_n];
_results.push(r.x);
}
return _results;
})();
yValues = (function() {
var _len5, _n, _results;
_results = [];
for (_n = 0, _len5 = residualPlot.length; _n < _len5; _n++) {
r = residualPlot[_n];
_results.push(r.y);
}
return _results;
})();
xStdDev = functions.standardDeviation(xValues);
yStdDev = functions.standardDeviation(yValues);
for (_n = 0, _len5 = residualPlot.length; _n < _len5; _n++) {
r = residualPlot[_n];
r.x = r.x / xStdDev;
r.y = r.y / yStdDev;
}
buckets = {};
for (bucket = _o = -2.5; _o <= 2.5; bucket = ++_o) {
buckets[bucket] = 0;
}
for (_p = 0, _len6 = residualPlot.length; _p < _len6; _p++) {
r = residualPlot[_p];
bucket = Math.floor(r.y + 1.0) - 0.5;
buckets[bucket] += 1;
}
histogram = [];
for (bucket = _q = -2.5; _q <= 2.5; bucket = ++_q) {
row = {
label: "" + (-0.5 + bucket) + " to " + (0.5 + bucket),
center: bucket,
count: buckets[bucket]
};
histogram.push(row);
}
factorDF = groups.length - 1;
errorDF = overallN - groups.length;
totalDF = factorDF + errorDF;
factorSS = 0;
for (_r = 0, _len7 = groups.length; _r < _len7; _r++) {
group = groups[_r];
factorSS += group.n * group.mean * group.mean;
}
nTimesMeanSquared = overallN * overallMean * overallMean;
factorSS -= nTimesMeanSquared;
totalSS = overallSumSquares - nTimesMeanSquared;
errorSS = totalSS - factorSS;
factorMS = factorSS / factorDF;
errorMS = errorSS / errorDF;
factorF = factorMS / errorMS;
factorP = fDist(factorDF, errorDF, factorF);
rSquared = factorSS / totalSS;
rSquaredAdjusted = Math.abs(1 - (1 - rSquared) * (overallN - 1) / (overallN - groups.length));
return {
factorDF: factorDF,
factorSS: factorSS,
factorMS: factorMS,
factorF: factorF,
factorP: factorP,
errorDF: errorDF,
errorSS: errorSS,
errorMS: errorMS,
totalDF: totalDF,
totalSS: totalSS,
rSquared: rSquared,
rSquaredAdjusted: rSquaredAdjusted,
residualPlot: residualPlot,
histogram: histogram,
pooledStandardDeviation: pooledStandardDeviation
};
};
exports.anova = anova;
}).call(this);