lumenize
Version:
Illuminating the forest AND the trees in your data.
196 lines (185 loc) • 6.67 kB
JavaScript
// Generated by CoffeeScript 1.10.0
(function() {
var anova, correlate, fDist, functions, normInverseUpper, ref, utils;
utils = require('tztime').utils;
functions = require('./functions').functions;
ref = require('./distributions').distributions, fDist = ref.fDist, normInverseUpper = ref.normInverseUpper;
correlate = require('./correlate').correlate;
anova = function(rawData, overallPredicate, field, groups, ci) {
var bucket, buckets, data, errorDF, errorMS, errorSS, factorDF, factorF, factorMS, factorP, factorSS, group, histogram, i, index, j, k, l, len, len1, len2, len3, len4, len5, len6, len7, m, multiplier, n, nTimesMeanSquared, o, overallMean, overallN, overallSum, overallSumSquares, p, pooledNumerator, pooledStandardDeviation, q, r, rSquared, rSquaredAdjusted, ref1, ref2, ref3, residual, residualPlot, residuals, row, s, t, totalDF, totalSS, value, xStdDev, xValues, y, yStdDev, yValues;
if (ci == null) {
ci = 0.95;
}
/*
@param {Object} groups {label, predicate} This is modified as a side-effect of this function. Many properties are added.
https://onlinecourses.science.psu.edu/stat414/node/218
http://www.calvin.edu/~rpruim/courses/m243/F03/overheads/ANOVAf03.ppt
*/
utils.assert((0 < ci && ci < 1.0), "ci must be between 0.0 and 1.0");
if (overallPredicate != null) {
data = (function() {
var j, len, results;
results = [];
for (j = 0, len = rawData.length; j < len; j++) {
row = rawData[j];
if (overallPredicate(row) && (row[field] != null)) {
results.push(row);
}
}
return results;
})();
} else {
data = rawData;
}
utils.assert(groups.length < data.length, 'After filtering with the overallPredicate, there were fewer rows in the dataset than there were groups');
overallN = 0;
overallSum = 0;
overallSumSquares = 0;
pooledNumerator = 0;
for (j = 0, len = groups.length; j < len; j++) {
group = groups[j];
group.values = (function() {
var k, len1, results;
results = [];
for (k = 0, len1 = data.length; k < len1; k++) {
row = data[k];
if (group.predicate(row)) {
results.push(row[field]);
}
}
return results;
})();
group.sum = functions.sum(group.values);
group.n = group.values.length;
group.sumSquares = functions.sumSquares(group.values);
group.variance = functions.variance(group.values);
group.standardDeviation = Math.sqrt(group.variance);
group.mean = group.sum / group.n;
overallN += group.n;
overallSum += group.sum;
overallSumSquares += group.sumSquares;
pooledNumerator += (group.n - 1) * group.variance;
}
overallMean = overallSum / overallN;
pooledStandardDeviation = Math.sqrt(pooledNumerator / (overallN - groups.length));
multiplier = normInverseUpper((1.0 - ci) / 2);
for (k = 0, len1 = groups.length; k < len1; k++) {
group = groups[k];
group.ciDelta = multiplier * pooledStandardDeviation / Math.sqrt(group.n);
}
residuals = [];
for (l = 0, len2 = groups.length; l < len2; l++) {
group = groups[l];
ref1 = group.values;
for (m = 0, len3 = ref1.length; m < len3; m++) {
value = ref1[m];
residual = group.mean - value;
residuals.push(residual);
}
}
residuals = residuals.sort(function(a, b) {
return a - b;
});
residualPlot = [];
for (index = n = 0, len4 = residuals.length; n < len4; index = ++n) {
r = residuals[index];
i = index + 1;
if (i === 1) {
y = 1 - Math.pow(0.5, 1 / residuals.length);
} else if (i === residuals.length) {
y = Math.pow(0.5, 1 / residuals.length);
} else {
y = (i - 0.3175) / (residuals.length + 0.365);
}
y = y - 0.5;
if (y === 0) {
y = 0;
} else {
y = Math.abs(y) * y;
}
residualPlot.push({
x: r,
y: y
});
}
xValues = (function() {
var len5, o, results;
results = [];
for (o = 0, len5 = residualPlot.length; o < len5; o++) {
r = residualPlot[o];
results.push(r.x);
}
return results;
})();
yValues = (function() {
var len5, o, results;
results = [];
for (o = 0, len5 = residualPlot.length; o < len5; o++) {
r = residualPlot[o];
results.push(r.y);
}
return results;
})();
xStdDev = functions.standardDeviation(xValues);
yStdDev = functions.standardDeviation(yValues);
for (o = 0, len5 = residualPlot.length; o < len5; o++) {
r = residualPlot[o];
r.x = r.x / xStdDev;
r.y = r.y / yStdDev;
}
buckets = {};
for (bucket = p = ref2 = -2.5; ref2 <= 2.5 ? p <= 2.5 : p >= 2.5; bucket = ref2 <= 2.5 ? ++p : --p) {
buckets[bucket] = 0;
}
for (q = 0, len6 = residualPlot.length; q < len6; q++) {
r = residualPlot[q];
bucket = Math.floor(r.y + 1.0) - 0.5;
buckets[bucket] += 1;
}
histogram = [];
for (bucket = s = ref3 = -2.5; ref3 <= 2.5 ? s <= 2.5 : s >= 2.5; bucket = ref3 <= 2.5 ? ++s : --s) {
row = {
label: (-0.5 + bucket) + " to " + (0.5 + bucket),
center: bucket,
count: buckets[bucket]
};
histogram.push(row);
}
factorDF = groups.length - 1;
errorDF = overallN - groups.length;
totalDF = factorDF + errorDF;
factorSS = 0;
for (t = 0, len7 = groups.length; t < len7; t++) {
group = groups[t];
factorSS += group.n * group.mean * group.mean;
}
nTimesMeanSquared = overallN * overallMean * overallMean;
factorSS -= nTimesMeanSquared;
totalSS = overallSumSquares - nTimesMeanSquared;
errorSS = totalSS - factorSS;
factorMS = factorSS / factorDF;
errorMS = errorSS / errorDF;
factorF = factorMS / errorMS;
factorP = fDist(factorDF, errorDF, factorF);
rSquared = factorSS / totalSS;
rSquaredAdjusted = Math.abs(1 - (1 - rSquared) * (overallN - 1) / (overallN - groups.length));
return {
factorDF: factorDF,
factorSS: factorSS,
factorMS: factorMS,
factorF: factorF,
factorP: factorP,
errorDF: errorDF,
errorSS: errorSS,
errorMS: errorMS,
totalDF: totalDF,
totalSS: totalSS,
rSquared: rSquared,
rSquaredAdjusted: rSquaredAdjusted,
residualPlot: residualPlot,
histogram: histogram,
pooledStandardDeviation: pooledStandardDeviation
};
};
exports.anova = anova;
}).call(this);