UNPKG

lumenize

Version:

Illuminating the forest AND the trees in your data.

639 lines (564 loc) 20.8 kB
// Generated by CoffeeScript 1.10.0 (function() { var functions, utils; utils = require('tztime').utils; /* @class functions Rules about dependencies: * If a function can be calculated incrementally from an oldResult and newValues, then you do not need to specify dependencies * If a funciton can be calculated from other incrementally calculable results, then you need only specify those dependencies * If a function needs the full list of values to be calculated (like percentile coverage), then you must specify 'values' * To support the direct passing in of OLAP cube cells, you can provide a prefix (field name) so the key in dependentValues can be generated * 'count' is special and does not use a prefix because it is not dependent up a particular field * You should calculate the dependencies before you calculate the thing that is depedent. The OLAP cube does some checking to confirm you've done this. */ functions = {}; functions._populateDependentValues = function(values, dependencies, dependentValues, prefix) { var d, j, key, len, out; if (dependentValues == null) { dependentValues = {}; } if (prefix == null) { prefix = ''; } out = {}; for (j = 0, len = dependencies.length; j < len; j++) { d = dependencies[j]; if (d === 'count') { if (prefix === '') { key = 'count'; } else { key = '_count'; } } else { key = prefix + d; } if (dependentValues[key] == null) { dependentValues[key] = functions[d](values, void 0, void 0, dependentValues, prefix); } out[d] = dependentValues[key]; } return out; }; /* @method sum @static @param {Number[]} [values] Must either provide values or oldResult and newValues @param {Number} [oldResult] for incremental calculation @param {Number[]} [newValues] for incremental calculation @return {Number} The sum of the values */ functions.sum = function(values, oldResult, newValues) { var j, len, temp, tempValues, v; if (oldResult != null) { temp = oldResult; tempValues = newValues; } else { temp = 0; tempValues = values; } for (j = 0, len = tempValues.length; j < len; j++) { v = tempValues[j]; temp += v; } return temp; }; /* @method product @static @param {Number[]} [values] Must either provide values or oldResult and newValues @param {Number} [oldResult] for incremental calculation @param {Number[]} [newValues] for incremental calculation @return {Number} The product of the values */ functions.product = function(values, oldResult, newValues) { var j, len, temp, tempValues, v; if (oldResult != null) { temp = oldResult; tempValues = newValues; } else { temp = 1; tempValues = values; } for (j = 0, len = tempValues.length; j < len; j++) { v = tempValues[j]; temp = temp * v; } return temp; }; /* @method sumSquares @static @param {Number[]} [values] Must either provide values or oldResult and newValues @param {Number} [oldResult] for incremental calculation @param {Number[]} [newValues] for incremental calculation @return {Number} The sum of the squares of the values */ functions.sumSquares = function(values, oldResult, newValues) { var j, len, temp, tempValues, v; if (oldResult != null) { temp = oldResult; tempValues = newValues; } else { temp = 0; tempValues = values; } for (j = 0, len = tempValues.length; j < len; j++) { v = tempValues[j]; temp += v * v; } return temp; }; /* @method sumCubes @static @param {Number[]} [values] Must either provide values or oldResult and newValues @param {Number} [oldResult] for incremental calculation @param {Number[]} [newValues] for incremental calculation @return {Number} The sum of the cubes of the values */ functions.sumCubes = function(values, oldResult, newValues) { var j, len, temp, tempValues, v; if (oldResult != null) { temp = oldResult; tempValues = newValues; } else { temp = 0; tempValues = values; } for (j = 0, len = tempValues.length; j < len; j++) { v = tempValues[j]; temp += v * v * v; } return temp; }; /* @method lastValue @static @param {Number[]} [values] Must either provide values or newValues @param {Number} [oldResult] Not used. It is included to make the interface consistent. @param {Number[]} [newValues] for incremental calculation @return {Number} The last value */ functions.lastValue = function(values, oldResult, newValues) { if (newValues != null) { return newValues[newValues.length - 1]; } return values[values.length - 1]; }; /* @method firstValue @static @param {Number[]} [values] Must either provide values or oldResult @param {Number} [oldResult] for incremental calculation @param {Number[]} [newValues] Not used. It is included to make the interface consistent. @return {Number} The first value */ functions.firstValue = function(values, oldResult, newValues) { if (oldResult != null) { return oldResult; } return values[0]; }; /* @method count @static @param {Number[]} [values] Must either provide values or oldResult and newValues @param {Number} [oldResult] for incremental calculation @param {Number[]} [newValues] for incremental calculation @return {Number} The length of the values Array */ functions.count = function(values, oldResult, newValues) { if (oldResult != null) { return oldResult + newValues.length; } return values.length; }; /* @method min @static @param {Number[]} [values] Must either provide values or oldResult and newValues @param {Number} [oldResult] for incremental calculation @param {Number[]} [newValues] for incremental calculation @return {Number} The minimum value or null if no values */ functions.min = function(values, oldResult, newValues) { var j, len, temp, v; if (oldResult != null) { return functions.min(newValues.concat([oldResult])); } if (values.length === 0) { return null; } temp = values[0]; for (j = 0, len = values.length; j < len; j++) { v = values[j]; if (v < temp) { temp = v; } } return temp; }; /* @method max @static @param {Number[]} [values] Must either provide values or oldResult and newValues @param {Number} [oldResult] for incremental calculation @param {Number[]} [newValues] for incremental calculation @return {Number} The maximum value or null if no values */ functions.max = function(values, oldResult, newValues) { var j, len, temp, v; if (oldResult != null) { return functions.max(newValues.concat([oldResult])); } if (values.length === 0) { return null; } temp = values[0]; for (j = 0, len = values.length; j < len; j++) { v = values[j]; if (v > temp) { temp = v; } } return temp; }; /* @method values @static @param {Object[]} [values] Must either provide values or oldResult and newValues @param {Number} [oldResult] for incremental calculation @param {Number[]} [newValues] for incremental calculation @return {Array} All values (allows duplicates). Can be used for drill down. */ functions.values = function(values, oldResult, newValues) { if (oldResult != null) { return oldResult.concat(newValues); } return values; }; /* @method uniqueValues @static @param {Object[]} [values] Must either provide values or oldResult and newValues @param {Number} [oldResult] for incremental calculation @param {Number[]} [newValues] for incremental calculation @return {Array} Unique values. This is good for generating an OLAP dimension or drill down. */ functions.uniqueValues = function(values, oldResult, newValues) { var j, key, l, len, len1, r, temp, temp2, tempValues, v, value; temp = {}; if (oldResult != null) { for (j = 0, len = oldResult.length; j < len; j++) { r = oldResult[j]; temp[r] = null; } tempValues = newValues; } else { tempValues = values; } temp2 = []; for (l = 0, len1 = tempValues.length; l < len1; l++) { v = tempValues[l]; temp[v] = null; } for (key in temp) { value = temp[key]; temp2.push(key); } return temp2; }; /* @method average @static @param {Number[]} [values] Must either provide values or oldResult and newValues @param {Number} [oldResult] not used by this function but included so all functions have a consistent signature @param {Number[]} [newValues] not used by this function but included so all functions have a consistent signature @param {Object} [dependentValues] If the function can be calculated from the results of other functions, this allows you to provide those pre-calculated values. @return {Number} The arithmetic mean */ functions.average = function(values, oldResult, newValues, dependentValues, prefix) { var count, ref, sum; ref = functions._populateDependentValues(values, functions.average.dependencies, dependentValues, prefix), count = ref.count, sum = ref.sum; if (count === 0) { return null; } else { return sum / count; } }; functions.average.dependencies = ['count', 'sum']; /* @method errorSquared @static @param {Number[]} [values] Must either provide values or oldResult and newValues @param {Number} [oldResult] not used by this function but included so all functions have a consistent signature @param {Number[]} [newValues] not used by this function but included so all functions have a consistent signature @param {Object} [dependentValues] If the function can be calculated from the results of other functions, this allows you to provide those pre-calculated values. @return {Number} The error squared */ functions.errorSquared = function(values, oldResult, newValues, dependentValues, prefix) { var count, difference, errorSquared, j, len, mean, ref, sum, v; ref = functions._populateDependentValues(values, functions.errorSquared.dependencies, dependentValues, prefix), count = ref.count, sum = ref.sum; mean = sum / count; errorSquared = 0; for (j = 0, len = values.length; j < len; j++) { v = values[j]; difference = v - mean; errorSquared += difference * difference; } return errorSquared; }; functions.errorSquared.dependencies = ['count', 'sum']; /* @method variance @static @param {Number[]} [values] Must either provide values or oldResult and newValues @param {Number} [oldResult] not used by this function but included so all functions have a consistent signature @param {Number[]} [newValues] not used by this function but included so all functions have a consistent signature @param {Object} [dependentValues] If the function can be calculated from the results of other functions, this allows you to provide those pre-calculated values. @return {Number} The variance */ functions.variance = function(values, oldResult, newValues, dependentValues, prefix) { var count, ref, sum, sumSquares; ref = functions._populateDependentValues(values, functions.variance.dependencies, dependentValues, prefix), count = ref.count, sum = ref.sum, sumSquares = ref.sumSquares; if (count === 0) { return null; } else if (count === 1) { return 0; } else { return (count * sumSquares - sum * sum) / (count * (count - 1)); } }; functions.variance.dependencies = ['count', 'sum', 'sumSquares']; /* @method standardDeviation @static @param {Number[]} [values] Must either provide values or oldResult and newValues @param {Number} [oldResult] not used by this function but included so all functions have a consistent signature @param {Number[]} [newValues] not used by this function but included so all functions have a consistent signature @param {Object} [dependentValues] If the function can be calculated from the results of other functions, this allows you to provide those pre-calculated values. @return {Number} The standard deviation */ functions.standardDeviation = function(values, oldResult, newValues, dependentValues, prefix) { return Math.sqrt(functions.variance(values, oldResult, newValues, dependentValues, prefix)); }; functions.standardDeviation.dependencies = functions.variance.dependencies; /* @method percentileCreator @static @param {Number} p The percentile for the resulting function (50 = median, 75, 99, etc.) @return {Function} A function to calculate the percentile When the user passes in `p<n>` as an aggregation function, this `percentileCreator` is called to return the appropriate percentile function. The returned function will find the `<n>`th percentile where `<n>` is some number in the form of `##[.##]`. (e.g. `p40`, `p99`, `p99.9`). There is no official definition of percentile. The most popular choices differ in the interpolation algorithm that they use. The function returned by this `percentileCreator` uses the Excel interpolation algorithm which differs from the NIST primary method. However, NIST lists something very similar to the Excel approach as an acceptible alternative. The only difference seems to be for the edge case for when you have only two data points in your data set. Agreement with Excel, NIST's acceptance of it as an alternative (almost), and the fact that it makes the most sense to me is why this approach was chosen. http://en.wikipedia.org/wiki/Percentile#Alternative_methods Note: `median` is an alias for p50. The approach chosen for calculating p50 gives you the exact same result as the definition for median even for edge cases like sets with only one or two data points. */ functions.percentileCreator = function(p) { var f; f = function(values, oldResult, newValues, dependentValues, prefix) { var d, k, n, sortfunc, vLength; if (values == null) { values = functions._populateDependentValues(values, ['values'], dependentValues, prefix).values; } if (values.length === 0) { return null; } sortfunc = function(a, b) { return a - b; }; vLength = values.length; values.sort(sortfunc); n = (p * (vLength - 1) / 100) + 1; k = Math.floor(n); d = n - k; if (n === 1) { return values[1 - 1]; } if (n === vLength) { return values[vLength - 1]; } return values[k - 1] + d * (values[k] - values[k - 1]); }; f.dependencies = ['values']; return f; }; /* @method median @static @param {Number[]} [values] Must either provide values or oldResult and newValues @param {Number} [oldResult] not used by this function but included so all functions have a consistent signature @param {Number[]} [newValues] not used by this function but included so all functions have a consistent signature @param {Object} [dependentValues] If the function can be calculated from the results of other functions, this allows you to provide those pre-calculated values. @return {Number} The median */ functions.median = functions.percentileCreator(50); functions.expandFandAs = function(a) { /* @method expandFandAs @static @param {Object} a Will look like this `{as: 'mySum', f: 'sum', field: 'Points'}` @return {Object} returns the expanded specification Takes specifications for functions and expands them to include the actual function and 'as'. If you do not provide an 'as' property, it will build it from the field name and function with an underscore between. Also, if the 'f' provided is a string, it is copied over to the 'metric' property before the 'f' property is replaced with the actual function. `{field: 'a', f: 'sum'}` would expand to `{as: 'a_sum', field: 'a', metric: 'sum', f: [Function]}`. */ var p; utils.assert(a.f != null, "'f' missing from specification: \n" + (JSON.stringify(a, void 0, 4))); if (utils.type(a.f) === 'function') { utils.assert(a.as != null, 'Must provide "as" field with your aggregation when providing a user defined function'); a.metric = a.f.toString(); } else if (functions[a.f] != null) { a.metric = a.f; a.f = functions[a.f]; } else if (a.f.substr(0, 1) === 'p') { a.metric = a.f; p = /\p(\d+(.\d+)?)/.exec(a.f)[1]; a.f = functions.percentileCreator(Number(p)); } else { throw new Error(a.f + " is not a recognized built-in function"); } if (a.as == null) { if (a.metric === 'count') { a.field = ''; a.metric = 'count'; } a.as = a.field + "_" + a.metric; utils.assert((a.field != null) || a.f === 'count', "'field' missing from specification: \n" + (JSON.stringify(a, void 0, 4))); } return a; }; functions.expandMetrics = function(metrics, addCountIfMissing, addValuesForCustomFunctions) { var assureDependenciesAbove, confirmMetricAbove, countRow, dependencies, hasCount, index, j, l, len, len1, m, metricsRow, valuesRow; if (metrics == null) { metrics = []; } if (addCountIfMissing == null) { addCountIfMissing = false; } if (addValuesForCustomFunctions == null) { addValuesForCustomFunctions = false; } /* @method expandMetrics @static @private This is called internally by several Lumenize Calculators. You should probably not call it. */ confirmMetricAbove = function(m, fieldName, aboveThisIndex) { var currentRow, i, lookingFor, metricsLength; if (m === 'count') { lookingFor = '_' + m; } else { lookingFor = fieldName + '_' + m; } i = 0; while (i < aboveThisIndex) { currentRow = metrics[i]; if (currentRow.as === lookingFor) { return true; } i++; } i = aboveThisIndex + 1; metricsLength = metrics.length; while (i < metricsLength) { currentRow = metrics[i]; if (currentRow.as === lookingFor) { throw new Error("Depdencies must appear before the metric they are dependant upon. " + m + " appears after."); } i++; } return false; }; assureDependenciesAbove = function(dependencies, fieldName, aboveThisIndex) { var d, j, len, newRow; for (j = 0, len = dependencies.length; j < len; j++) { d = dependencies[j]; if (!confirmMetricAbove(d, fieldName, aboveThisIndex)) { if (d === 'count') { newRow = { f: 'count' }; } else { newRow = { f: d, field: fieldName }; } functions.expandFandAs(newRow); metrics.unshift(newRow); return false; } } return true; }; if (addValuesForCustomFunctions) { for (index = j = 0, len = metrics.length; j < len; index = ++j) { m = metrics[index]; if (utils.type(m.f) === 'function') { if (m.f.dependencies == null) { m.f.dependencies = []; } if (m.f.dependencies[0] !== 'values') { m.f.dependencies.push('values'); } if (!confirmMetricAbove('values', m.field, index)) { valuesRow = { f: 'values', field: m.field }; functions.expandFandAs(valuesRow); metrics.unshift(valuesRow); } } } } hasCount = false; for (l = 0, len1 = metrics.length; l < len1; l++) { m = metrics[l]; functions.expandFandAs(m); if (m.metric === 'count') { hasCount = true; } } if (addCountIfMissing && !hasCount) { countRow = { f: 'count' }; functions.expandFandAs(countRow); metrics.unshift(countRow); } index = 0; while (index < metrics.length) { metricsRow = metrics[index]; if (utils.type(metricsRow.f) === 'function') { dependencies = ['values']; } if (metricsRow.f.dependencies != null) { if (!assureDependenciesAbove(metricsRow.f.dependencies, metricsRow.field, index)) { index = -1; } } index++; } return metrics; }; exports.functions = functions; }).call(this);