UNPKG

lumenize

Version:

Illuminating the forest AND the trees in your data.

196 lines (185 loc) 6.67 kB
// Generated by CoffeeScript 1.10.0 (function() { var anova, correlate, fDist, functions, normInverseUpper, ref, utils; utils = require('tztime').utils; functions = require('./functions').functions; ref = require('./distributions').distributions, fDist = ref.fDist, normInverseUpper = ref.normInverseUpper; correlate = require('./correlate').correlate; anova = function(rawData, overallPredicate, field, groups, ci) { var bucket, buckets, data, errorDF, errorMS, errorSS, factorDF, factorF, factorMS, factorP, factorSS, group, histogram, i, index, j, k, l, len, len1, len2, len3, len4, len5, len6, len7, m, multiplier, n, nTimesMeanSquared, o, overallMean, overallN, overallSum, overallSumSquares, p, pooledNumerator, pooledStandardDeviation, q, r, rSquared, rSquaredAdjusted, ref1, ref2, ref3, residual, residualPlot, residuals, row, s, t, totalDF, totalSS, value, xStdDev, xValues, y, yStdDev, yValues; if (ci == null) { ci = 0.95; } /* @param {Object} groups {label, predicate} This is modified as a side-effect of this function. Many properties are added. https://onlinecourses.science.psu.edu/stat414/node/218 http://www.calvin.edu/~rpruim/courses/m243/F03/overheads/ANOVAf03.ppt */ utils.assert((0 < ci && ci < 1.0), "ci must be between 0.0 and 1.0"); if (overallPredicate != null) { data = (function() { var j, len, results; results = []; for (j = 0, len = rawData.length; j < len; j++) { row = rawData[j]; if (overallPredicate(row) && (row[field] != null)) { results.push(row); } } return results; })(); } else { data = rawData; } utils.assert(groups.length < data.length, 'After filtering with the overallPredicate, there were fewer rows in the dataset than there were groups'); overallN = 0; overallSum = 0; overallSumSquares = 0; pooledNumerator = 0; for (j = 0, len = groups.length; j < len; j++) { group = groups[j]; group.values = (function() { var k, len1, results; results = []; for (k = 0, len1 = data.length; k < len1; k++) { row = data[k]; if (group.predicate(row)) { results.push(row[field]); } } return results; })(); group.sum = functions.sum(group.values); group.n = group.values.length; group.sumSquares = functions.sumSquares(group.values); group.variance = functions.variance(group.values); group.standardDeviation = Math.sqrt(group.variance); group.mean = group.sum / group.n; overallN += group.n; overallSum += group.sum; overallSumSquares += group.sumSquares; pooledNumerator += (group.n - 1) * group.variance; } overallMean = overallSum / overallN; pooledStandardDeviation = Math.sqrt(pooledNumerator / (overallN - groups.length)); multiplier = normInverseUpper((1.0 - ci) / 2); for (k = 0, len1 = groups.length; k < len1; k++) { group = groups[k]; group.ciDelta = multiplier * pooledStandardDeviation / Math.sqrt(group.n); } residuals = []; for (l = 0, len2 = groups.length; l < len2; l++) { group = groups[l]; ref1 = group.values; for (m = 0, len3 = ref1.length; m < len3; m++) { value = ref1[m]; residual = group.mean - value; residuals.push(residual); } } residuals = residuals.sort(function(a, b) { return a - b; }); residualPlot = []; for (index = n = 0, len4 = residuals.length; n < len4; index = ++n) { r = residuals[index]; i = index + 1; if (i === 1) { y = 1 - Math.pow(0.5, 1 / residuals.length); } else if (i === residuals.length) { y = Math.pow(0.5, 1 / residuals.length); } else { y = (i - 0.3175) / (residuals.length + 0.365); } y = y - 0.5; if (y === 0) { y = 0; } else { y = Math.abs(y) * y; } residualPlot.push({ x: r, y: y }); } xValues = (function() { var len5, o, results; results = []; for (o = 0, len5 = residualPlot.length; o < len5; o++) { r = residualPlot[o]; results.push(r.x); } return results; })(); yValues = (function() { var len5, o, results; results = []; for (o = 0, len5 = residualPlot.length; o < len5; o++) { r = residualPlot[o]; results.push(r.y); } return results; })(); xStdDev = functions.standardDeviation(xValues); yStdDev = functions.standardDeviation(yValues); for (o = 0, len5 = residualPlot.length; o < len5; o++) { r = residualPlot[o]; r.x = r.x / xStdDev; r.y = r.y / yStdDev; } buckets = {}; for (bucket = p = ref2 = -2.5; ref2 <= 2.5 ? p <= 2.5 : p >= 2.5; bucket = ref2 <= 2.5 ? ++p : --p) { buckets[bucket] = 0; } for (q = 0, len6 = residualPlot.length; q < len6; q++) { r = residualPlot[q]; bucket = Math.floor(r.y + 1.0) - 0.5; buckets[bucket] += 1; } histogram = []; for (bucket = s = ref3 = -2.5; ref3 <= 2.5 ? s <= 2.5 : s >= 2.5; bucket = ref3 <= 2.5 ? ++s : --s) { row = { label: (-0.5 + bucket) + " to " + (0.5 + bucket), center: bucket, count: buckets[bucket] }; histogram.push(row); } factorDF = groups.length - 1; errorDF = overallN - groups.length; totalDF = factorDF + errorDF; factorSS = 0; for (t = 0, len7 = groups.length; t < len7; t++) { group = groups[t]; factorSS += group.n * group.mean * group.mean; } nTimesMeanSquared = overallN * overallMean * overallMean; factorSS -= nTimesMeanSquared; totalSS = overallSumSquares - nTimesMeanSquared; errorSS = totalSS - factorSS; factorMS = factorSS / factorDF; errorMS = errorSS / errorDF; factorF = factorMS / errorMS; factorP = fDist(factorDF, errorDF, factorF); rSquared = factorSS / totalSS; rSquaredAdjusted = Math.abs(1 - (1 - rSquared) * (overallN - 1) / (overallN - groups.length)); return { factorDF: factorDF, factorSS: factorSS, factorMS: factorMS, factorF: factorF, factorP: factorP, errorDF: errorDF, errorSS: errorSS, errorMS: errorMS, totalDF: totalDF, totalSS: totalSS, rSquared: rSquared, rSquaredAdjusted: rSquaredAdjusted, residualPlot: residualPlot, histogram: histogram, pooledStandardDeviation: pooledStandardDeviation }; }; exports.anova = anova; }).call(this);