UNPKG

lumenize

Version:

Illuminating the forest AND the trees in your data.

196 lines (185 loc) 6.68 kB
// Generated by CoffeeScript 1.7.1 (function() { var anova, correlate, fDist, functions, normInverseUpper, utils, _ref; utils = require('tztime').utils; functions = require('./functions').functions; _ref = require('./distributions').distributions, fDist = _ref.fDist, normInverseUpper = _ref.normInverseUpper; correlate = require('./correlate').correlate; anova = function(rawData, overallPredicate, field, groups, ci) { var bucket, buckets, data, errorDF, errorMS, errorSS, factorDF, factorF, factorMS, factorP, factorSS, group, histogram, i, index, multiplier, nTimesMeanSquared, overallMean, overallN, overallSum, overallSumSquares, pooledNumerator, pooledStandardDeviation, r, rSquared, rSquaredAdjusted, residual, residualPlot, residuals, row, totalDF, totalSS, value, xStdDev, xValues, y, yStdDev, yValues, _i, _j, _k, _l, _len, _len1, _len2, _len3, _len4, _len5, _len6, _len7, _m, _n, _o, _p, _q, _r, _ref1; if (ci == null) { ci = 0.95; } /* @param {Object} groups {label, predicate} This is modified as a side-effect of this function. Many properties are added. https://onlinecourses.science.psu.edu/stat414/node/218 http://www.calvin.edu/~rpruim/courses/m243/F03/overheads/ANOVAf03.ppt */ utils.assert((0 < ci && ci < 1.0), "ci must be between 0.0 and 1.0"); if (overallPredicate != null) { data = (function() { var _i, _len, _results; _results = []; for (_i = 0, _len = rawData.length; _i < _len; _i++) { row = rawData[_i]; if (overallPredicate(row) && (row[field] != null)) { _results.push(row); } } return _results; })(); } else { data = rawData; } utils.assert(groups.length < data.length, 'After filtering with the overallPredicate, there were fewer rows in the dataset than there were groups'); overallN = 0; overallSum = 0; overallSumSquares = 0; pooledNumerator = 0; for (_i = 0, _len = groups.length; _i < _len; _i++) { group = groups[_i]; group.values = (function() { var _j, _len1, _results; _results = []; for (_j = 0, _len1 = data.length; _j < _len1; _j++) { row = data[_j]; if (group.predicate(row)) { _results.push(row[field]); } } return _results; })(); group.sum = functions.sum(group.values); group.n = group.values.length; group.sumSquares = functions.sumSquares(group.values); group.variance = functions.variance(group.values); group.standardDeviation = Math.sqrt(group.variance); group.mean = group.sum / group.n; overallN += group.n; overallSum += group.sum; overallSumSquares += group.sumSquares; pooledNumerator += (group.n - 1) * group.variance; } overallMean = overallSum / overallN; pooledStandardDeviation = Math.sqrt(pooledNumerator / (overallN - groups.length)); multiplier = normInverseUpper((1.0 - ci) / 2); for (_j = 0, _len1 = groups.length; _j < _len1; _j++) { group = groups[_j]; group.ciDelta = multiplier * pooledStandardDeviation / Math.sqrt(group.n); } residuals = []; for (_k = 0, _len2 = groups.length; _k < _len2; _k++) { group = groups[_k]; _ref1 = group.values; for (_l = 0, _len3 = _ref1.length; _l < _len3; _l++) { value = _ref1[_l]; residual = group.mean - value; residuals.push(residual); } } residuals = residuals.sort(function(a, b) { return a - b; }); residualPlot = []; for (index = _m = 0, _len4 = residuals.length; _m < _len4; index = ++_m) { r = residuals[index]; i = index + 1; if (i === 1) { y = 1 - Math.pow(0.5, 1 / residuals.length); } else if (i === residuals.length) { y = Math.pow(0.5, 1 / residuals.length); } else { y = (i - 0.3175) / (residuals.length + 0.365); } y = y - 0.5; if (y === 0) { y = 0; } else { y = Math.abs(y) * y; } residualPlot.push({ x: r, y: y }); } xValues = (function() { var _len5, _n, _results; _results = []; for (_n = 0, _len5 = residualPlot.length; _n < _len5; _n++) { r = residualPlot[_n]; _results.push(r.x); } return _results; })(); yValues = (function() { var _len5, _n, _results; _results = []; for (_n = 0, _len5 = residualPlot.length; _n < _len5; _n++) { r = residualPlot[_n]; _results.push(r.y); } return _results; })(); xStdDev = functions.standardDeviation(xValues); yStdDev = functions.standardDeviation(yValues); for (_n = 0, _len5 = residualPlot.length; _n < _len5; _n++) { r = residualPlot[_n]; r.x = r.x / xStdDev; r.y = r.y / yStdDev; } buckets = {}; for (bucket = _o = -2.5; _o <= 2.5; bucket = ++_o) { buckets[bucket] = 0; } for (_p = 0, _len6 = residualPlot.length; _p < _len6; _p++) { r = residualPlot[_p]; bucket = Math.floor(r.y + 1.0) - 0.5; buckets[bucket] += 1; } histogram = []; for (bucket = _q = -2.5; _q <= 2.5; bucket = ++_q) { row = { label: "" + (-0.5 + bucket) + " to " + (0.5 + bucket), center: bucket, count: buckets[bucket] }; histogram.push(row); } factorDF = groups.length - 1; errorDF = overallN - groups.length; totalDF = factorDF + errorDF; factorSS = 0; for (_r = 0, _len7 = groups.length; _r < _len7; _r++) { group = groups[_r]; factorSS += group.n * group.mean * group.mean; } nTimesMeanSquared = overallN * overallMean * overallMean; factorSS -= nTimesMeanSquared; totalSS = overallSumSquares - nTimesMeanSquared; errorSS = totalSS - factorSS; factorMS = factorSS / factorDF; errorMS = errorSS / errorDF; factorF = factorMS / errorMS; factorP = fDist(factorDF, errorDF, factorF); rSquared = factorSS / totalSS; rSquaredAdjusted = Math.abs(1 - (1 - rSquared) * (overallN - 1) / (overallN - groups.length)); return { factorDF: factorDF, factorSS: factorSS, factorMS: factorMS, factorF: factorF, factorP: factorP, errorDF: errorDF, errorSS: errorSS, errorMS: errorMS, totalDF: totalDF, totalSS: totalSS, rSquared: rSquared, rSquaredAdjusted: rSquaredAdjusted, residualPlot: residualPlot, histogram: histogram, pooledStandardDeviation: pooledStandardDeviation }; }; exports.anova = anova; }).call(this);