UNPKG

simple-statistics

Version:
1 lines 242 kB
{"version":3,"file":"simple-statistics.mjs","sources":["../src/add_to_mean.js","../src/chunk.js","../src/make_matrix.js","../src/numeric_sort.js","../src/unique_count_sorted.js","../src/ckmeans.js","../src/sum.js","../src/mean.js","../src/sum_nth_power_deviations.js","../src/sample_variance.js","../src/sample_standard_deviation.js","../src/coefficient_of_variation.js","../src/combinations.js","../src/combinations_replacement.js","../src/combine_means.js","../src/combine_variances.js","../src/max.js","../src/min.js","../src/equal_interval_breaks.js","../src/extent.js","../src/extent_sorted.js","../src/geometric_mean.js","../src/harmonic_mean.js","../src/quantile_sorted.js","../src/quickselect.js","../src/quantile.js","../src/interquartile_range.js","../src/jenks_breaks.js","../src/jenks_matrices.js","../src/jenks.js","../src/linear_regression.js","../src/linear_regression_line.js","../src/log_average.js","../src/max_sorted.js","../src/sum_simple.js","../src/mean_simple.js","../src/median.js","../src/median_absolute_deviation.js","../src/median_sorted.js","../src/min_sorted.js","../src/mode_sorted.js","../src/mode.js","../src/mode_fast.js","../src/permutations_heap.js","../src/product.js","../src/quantile_rank_sorted.js","../src/quantile_rank.js","../src/r_squared.js","../src/root_mean_square.js","../src/shuffle_in_place.js","../src/shuffle.js","../src/sample.js","../src/sample_covariance.js","../src/sample_correlation.js","../src/sample_kurtosis.js","../src/sample_rank_correlation.js","../src/sample_skewness.js","../src/sample_with_replacement.js","../src/variance.js","../src/standard_deviation.js","../src/subtract_from_mean.js","../src/t_test.js","../src/t_test_two_sample.js","../src/wilcoxon_rank_sum.js","../src/epsilon.js","../src/relative_error.js","../src/approx_equal.js","../src/bayesian_classifier.js","../src/bernoulli_distribution.js","../src/binomial_distribution.js","../src/sign.js","../src/bisect.js","../src/chi_squared_distribution_table.js","../src/chi_squared_goodness_of_fit.js","../src/cumulative_std_logistic_probability.js","../src/standard_normal_table.js","../src/cumulative_std_normal_probability.js","../src/error_function.js","../src/factorial.js","../src/gamma.js","../src/gammaln.js","../src/inverse_error_function.js","../src/euclidean_distance.js","../src/k_means_cluster.js","../src/kernel_density_estimation.js","../src/logit.js","../src/perceptron.js","../src/permutation_test.js","../src/poisson_distribution.js","../src/probit.js","../src/silhouette.js","../src/silhouette_metric.js","../src/z_score.js"],"sourcesContent":["/**\n * When adding a new value to a list, one does not have to necessary\n * recompute the mean of the list in linear time. They can instead use\n * this function to compute the new mean by providing the current mean,\n * the number of elements in the list that produced it and the new\n * value to add.\n *\n * @since 2.5.0\n * @param {number} mean current mean\n * @param {number} n number of items in the list\n * @param {number} newValue the added value\n * @returns {number} the new mean\n *\n * @example\n * addToMean(14, 5, 53); // => 20.5\n */\nfunction addToMean(mean, n, newValue) {\n return mean + (newValue - mean) / (n + 1);\n}\n\nexport default addToMean;\n","/**\n * Split an array into chunks of a specified size. This function\n * has the same behavior as [PHP's array_chunk](http://php.net/manual/en/function.array-chunk.php)\n * function, and thus will insert smaller-sized chunks at the end if\n * the input size is not divisible by the chunk size.\n *\n * `x` is expected to be an array, and `chunkSize` a number.\n * The `x` array can contain any kind of data.\n *\n * @param {Array} x a sample\n * @param {number} chunkSize size of each output array. must be a positive integer\n * @returns {Array<Array>} a chunked array\n * @throws {Error} if chunk size is less than 1 or not an integer\n * @example\n * chunk([1, 2, 3, 4, 5, 6], 2);\n * // => [[1, 2], [3, 4], [5, 6]]\n */\nfunction chunk(x, chunkSize) {\n // a list of result chunks, as arrays in an array\n const output = [];\n\n // `chunkSize` must be zero or higher - otherwise the loop below,\n // in which we call `start += chunkSize`, will loop infinitely.\n // So, we'll detect and throw in that case to indicate\n // invalid input.\n if (chunkSize < 1) {\n throw new Error(\"chunk size must be a positive number\");\n }\n\n if (Math.floor(chunkSize) !== chunkSize) {\n throw new Error(\"chunk size must be an integer\");\n }\n\n // `start` is the index at which `.slice` will start selecting\n // new array elements\n for (let start = 0; start < x.length; start += chunkSize) {\n // for each chunk, slice that part of the array and add it\n // to the output. The `.slice` function does not change\n // the original array.\n output.push(x.slice(start, start + chunkSize));\n }\n return output;\n}\n\nexport default chunk;\n","/**\n * Create a new column x row matrix.\n *\n * @private\n * @param {number} columns\n * @param {number} rows\n * @return {Array<Array<number>>} matrix\n * @example\n * makeMatrix(10, 10);\n */\nfunction makeMatrix(columns, rows) {\n const matrix = [];\n for (let i = 0; i < columns; i++) {\n const column = [];\n for (let j = 0; j < rows; j++) {\n column.push(0);\n }\n matrix.push(column);\n }\n return matrix;\n}\n\nexport default makeMatrix;\n","/**\n * Sort an array of numbers by their numeric value, ensuring that the\n * array is not changed in place.\n *\n * This is necessary because the default behavior of .sort\n * in JavaScript is to sort arrays as string values\n *\n * [1, 10, 12, 102, 20].sort()\n * // output\n * [1, 10, 102, 12, 20]\n *\n * @param {Array<number>} x input array\n * @return {Array<number>} sorted array\n * @private\n * @example\n * numericSort([3, 2, 1]) // => [1, 2, 3]\n */\nfunction numericSort(x) {\n return (\n x\n // ensure the array is not changed in-place\n .slice()\n // comparator function that treats input as numeric\n .sort(function (a, b) {\n return a - b;\n })\n );\n}\n\nexport default numericSort;\n","/**\n * For a sorted input, counting the number of unique values\n * is possible in constant time and constant memory. This is\n * a simple implementation of the algorithm.\n *\n * Values are compared with `===`, so objects and non-primitive objects\n * are not handled in any special way.\n *\n * @param {Array<*>} x an array of any kind of value\n * @returns {number} count of unique values\n * @example\n * uniqueCountSorted([1, 2, 3]); // => 3\n * uniqueCountSorted([1, 1, 1]); // => 1\n */\nfunction uniqueCountSorted(x) {\n let uniqueValueCount = 0;\n let lastSeenValue;\n for (let i = 0; i < x.length; i++) {\n if (i === 0 || x[i] !== lastSeenValue) {\n lastSeenValue = x[i];\n uniqueValueCount++;\n }\n }\n return uniqueValueCount;\n}\n\nexport default uniqueCountSorted;\n","import makeMatrix from \"./make_matrix.js\";\nimport numericSort from \"./numeric_sort.js\";\nimport uniqueCountSorted from \"./unique_count_sorted.js\";\n\n/**\n * Generates incrementally computed values based on the sums and sums of\n * squares for the data array\n *\n * @private\n * @param {number} j\n * @param {number} i\n * @param {Array<number>} sums\n * @param {Array<number>} sumsOfSquares\n * @return {number}\n * @example\n * ssq(0, 1, [-1, 0, 2], [1, 1, 5]);\n */\nfunction ssq(j, i, sums, sumsOfSquares) {\n let sji; // s(j, i)\n if (j > 0) {\n const muji = (sums[i] - sums[j - 1]) / (i - j + 1); // mu(j, i)\n sji =\n sumsOfSquares[i] - sumsOfSquares[j - 1] - (i - j + 1) * muji * muji;\n } else {\n sji = sumsOfSquares[i] - (sums[i] * sums[i]) / (i + 1);\n }\n if (sji < 0) {\n return 0;\n }\n return sji;\n}\n\n/**\n * Function that recursively divides and conquers computations\n * for cluster j\n *\n * @private\n * @param {number} iMin Minimum index in cluster to be computed\n * @param {number} iMax Maximum index in cluster to be computed\n * @param {number} cluster Index of the cluster currently being computed\n * @param {Array<Array<number>>} matrix\n * @param {Array<Array<number>>} backtrackMatrix\n * @param {Array<number>} sums\n * @param {Array<number>} sumsOfSquares\n */\nfunction fillMatrixColumn(\n iMin,\n iMax,\n cluster,\n matrix,\n backtrackMatrix,\n sums,\n sumsOfSquares\n) {\n if (iMin > iMax) {\n return;\n }\n\n // Start at midpoint between iMin and iMax\n const i = Math.floor((iMin + iMax) / 2);\n\n matrix[cluster][i] = matrix[cluster - 1][i - 1];\n backtrackMatrix[cluster][i] = i;\n\n let jlow = cluster; // the lower end for j\n\n if (iMin > cluster) {\n jlow = Math.max(jlow, backtrackMatrix[cluster][iMin - 1] || 0);\n }\n jlow = Math.max(jlow, backtrackMatrix[cluster - 1][i] || 0);\n\n let jhigh = i - 1; // the upper end for j\n if (iMax < matrix[0].length - 1) {\n /* c8 ignore start */\n jhigh = Math.min(jhigh, backtrackMatrix[cluster][iMax + 1] || 0);\n /* c8 ignore end */\n }\n\n let sji;\n let sjlowi;\n let ssqjlow;\n let ssqj;\n for (let j = jhigh; j >= jlow; --j) {\n sji = ssq(j, i, sums, sumsOfSquares);\n\n if (sji + matrix[cluster - 1][jlow - 1] >= matrix[cluster][i]) {\n break;\n }\n\n // Examine the lower bound of the cluster border\n sjlowi = ssq(jlow, i, sums, sumsOfSquares);\n\n ssqjlow = sjlowi + matrix[cluster - 1][jlow - 1];\n\n if (ssqjlow < matrix[cluster][i]) {\n // Shrink the lower bound\n matrix[cluster][i] = ssqjlow;\n backtrackMatrix[cluster][i] = jlow;\n }\n jlow++;\n\n ssqj = sji + matrix[cluster - 1][j - 1];\n if (ssqj < matrix[cluster][i]) {\n matrix[cluster][i] = ssqj;\n backtrackMatrix[cluster][i] = j;\n }\n }\n\n fillMatrixColumn(\n iMin,\n i - 1,\n cluster,\n matrix,\n backtrackMatrix,\n sums,\n sumsOfSquares\n );\n fillMatrixColumn(\n i + 1,\n iMax,\n cluster,\n matrix,\n backtrackMatrix,\n sums,\n sumsOfSquares\n );\n}\n\n/**\n * Initializes the main matrices used in Ckmeans and kicks\n * off the divide and conquer cluster computation strategy\n *\n * @private\n * @param {Array<number>} data sorted array of values\n * @param {Array<Array<number>>} matrix\n * @param {Array<Array<number>>} backtrackMatrix\n */\nfunction fillMatrices(data, matrix, backtrackMatrix) {\n const nValues = matrix[0].length;\n\n // Shift values by the median to improve numeric stability\n const shift = data[Math.floor(nValues / 2)];\n\n // Cumulative sum and cumulative sum of squares for all values in data array\n const sums = [];\n const sumsOfSquares = [];\n\n // Initialize first column in matrix & backtrackMatrix\n for (let i = 0, shiftedValue; i < nValues; ++i) {\n shiftedValue = data[i] - shift;\n if (i === 0) {\n sums.push(shiftedValue);\n sumsOfSquares.push(shiftedValue * shiftedValue);\n } else {\n sums.push(sums[i - 1] + shiftedValue);\n sumsOfSquares.push(\n sumsOfSquares[i - 1] + shiftedValue * shiftedValue\n );\n }\n\n // Initialize for cluster = 0\n matrix[0][i] = ssq(0, i, sums, sumsOfSquares);\n backtrackMatrix[0][i] = 0;\n }\n\n // Initialize the rest of the columns\n let iMin;\n for (let cluster = 1; cluster < matrix.length; ++cluster) {\n if (cluster < matrix.length - 1) {\n iMin = cluster;\n } else {\n // No need to compute matrix[K-1][0] ... matrix[K-1][N-2]\n iMin = nValues - 1;\n }\n\n fillMatrixColumn(\n iMin,\n nValues - 1,\n cluster,\n matrix,\n backtrackMatrix,\n sums,\n sumsOfSquares\n );\n }\n}\n\n/**\n * Ckmeans clustering is an improvement on heuristic-based clustering\n * approaches like Jenks. The algorithm was developed in\n * [Haizhou Wang and Mingzhou Song](http://journal.r-project.org/archive/2011-2/RJournal_2011-2_Wang+Song.pdf)\n * as a [dynamic programming](https://en.wikipedia.org/wiki/Dynamic_programming) approach\n * to the problem of clustering numeric data into groups with the least\n * within-group sum-of-squared-deviations.\n *\n * Minimizing the difference within groups - what Wang & Song refer to as\n * `withinss`, or within sum-of-squares, means that groups are optimally\n * homogenous within and the data is split into representative groups.\n * This is very useful for visualization, where you may want to represent\n * a continuous variable in discrete color or style groups. This function\n * can provide groups that emphasize differences between data.\n *\n * Being a dynamic approach, this algorithm is based on two matrices that\n * store incrementally-computed values for squared deviations and backtracking\n * indexes.\n *\n * This implementation is based on Ckmeans 3.4.6, which introduced a new divide\n * and conquer approach that improved runtime from O(kn^2) to O(kn log(n)).\n *\n * Unlike the [original implementation](https://cran.r-project.org/web/packages/Ckmeans.1d.dp/index.html),\n * this implementation does not include any code to automatically determine\n * the optimal number of clusters: this information needs to be explicitly\n * provided.\n *\n * ### References\n * _Ckmeans.1d.dp: Optimal k-means Clustering in One Dimension by Dynamic\n * Programming_ Haizhou Wang and Mingzhou Song ISSN 2073-4859\n *\n * from The R Journal Vol. 3/2, December 2011\n * @param {Array<number>} x input data, as an array of number values\n * @param {number} nClusters number of desired classes. This cannot be\n * greater than the number of values in the data array.\n * @returns {Array<Array<number>>} clustered input\n * @throws {Error} if the number of requested clusters is higher than the size of the data\n * @example\n * ckmeans([-1, 2, -1, 2, 4, 5, 6, -1, 2, -1], 3);\n * // The input, clustered into groups of similar numbers.\n * //= [[-1, -1, -1, -1], [2, 2, 2], [4, 5, 6]]);\n */\nfunction ckmeans(x, nClusters) {\n if (nClusters > x.length) {\n throw new Error(\n \"cannot generate more classes than there are data values\"\n );\n }\n\n const sorted = numericSort(x);\n // we'll use this as the maximum number of clusters\n const uniqueCount = uniqueCountSorted(sorted);\n\n // if all of the input values are identical, there's one cluster\n // with all of the input in it.\n if (uniqueCount === 1) {\n return [sorted];\n }\n\n // named 'S' originally\n const matrix = makeMatrix(nClusters, sorted.length);\n // named 'J' originally\n const backtrackMatrix = makeMatrix(nClusters, sorted.length);\n\n // This is a dynamic programming way to solve the problem of minimizing\n // within-cluster sum of squares. It's similar to linear regression\n // in this way, and this calculation incrementally computes the\n // sum of squares that are later read.\n fillMatrices(sorted, matrix, backtrackMatrix);\n\n // The real work of Ckmeans clustering happens in the matrix generation:\n // the generated matrices encode all possible clustering combinations, and\n // once they're generated we can solve for the best clustering groups\n // very quickly.\n const clusters = [];\n let clusterRight = backtrackMatrix[0].length - 1;\n\n // Backtrack the clusters from the dynamic programming matrix. This\n // starts at the bottom-right corner of the matrix (if the top-left is 0, 0),\n // and moves the cluster target with the loop.\n for (let cluster = backtrackMatrix.length - 1; cluster >= 0; cluster--) {\n const clusterLeft = backtrackMatrix[cluster][clusterRight];\n\n // fill the cluster from the sorted input by taking a slice of the\n // array. the backtrack matrix makes this easy - it stores the\n // indexes where the cluster should start and end.\n clusters[cluster] = sorted.slice(clusterLeft, clusterRight + 1);\n\n if (cluster > 0) {\n clusterRight = clusterLeft - 1;\n }\n }\n\n return clusters;\n}\n\nexport default ckmeans;\n","/**\n * Our default sum is the [Kahan-Babuska algorithm](https://pdfs.semanticscholar.org/1760/7d467cda1d0277ad272deb2113533131dc09.pdf).\n * This method is an improvement over the classical\n * [Kahan summation algorithm](https://en.wikipedia.org/wiki/Kahan_summation_algorithm).\n * It aims at computing the sum of a list of numbers while correcting for\n * floating-point errors. Traditionally, sums are calculated as many\n * successive additions, each one with its own floating-point roundoff. These\n * losses in precision add up as the number of numbers increases. This alternative\n * algorithm is more accurate than the simple way of calculating sums by simple\n * addition.\n *\n * This runs in `O(n)`, linear time, with respect to the length of the array.\n *\n * @param {Array<number>} x input\n * @return {number} sum of all input numbers\n * @example\n * sum([1, 2, 3]); // => 6\n */\nfunction sum(x) {\n // If the array is empty, we needn't bother computing its sum\n if (x.length === 0) {\n return 0;\n }\n\n // Initializing the sum as the first number in the array\n let sum = x[0];\n\n // Keeping track of the floating-point error correction\n let correction = 0;\n\n let transition;\n\n if (typeof sum !== \"number\") {\n return Number.NaN;\n }\n\n for (let i = 1; i < x.length; i++) {\n if (typeof x[i] !== \"number\") {\n return Number.NaN;\n }\n transition = sum + x[i];\n\n // Here we need to update the correction in a different fashion\n // if the new absolute value is greater than the absolute sum\n if (Math.abs(sum) >= Math.abs(x[i])) {\n correction += sum - transition + x[i];\n } else {\n correction += x[i] - transition + sum;\n }\n\n sum = transition;\n }\n\n // Returning the corrected sum\n return sum + correction;\n}\n\nexport default sum;\n","import sum from \"./sum.js\";\n\n/**\n * The mean, _also known as average_,\n * is the sum of all values over the number of values.\n * This is a [measure of central tendency](https://en.wikipedia.org/wiki/Central_tendency):\n * a method of finding a typical or central value of a set of numbers.\n *\n * This runs in `O(n)`, linear time, with respect to the length of the array.\n *\n * @param {Array<number>} x sample of one or more data points\n * @throws {Error} if the length of x is less than one\n * @returns {number} mean\n * @example\n * mean([0, 10]); // => 5\n */\nfunction mean(x) {\n if (x.length === 0) {\n throw new Error(\"mean requires at least one data point\");\n }\n\n return sum(x) / x.length;\n}\n\nexport default mean;\n","import mean from \"./mean.js\";\n\n/**\n * The sum of deviations to the Nth power.\n * When n=2 it's the sum of squared deviations.\n * When n=3 it's the sum of cubed deviations.\n *\n * @param {Array<number>} x\n * @param {number} n power\n * @returns {number} sum of nth power deviations\n *\n * @example\n * var input = [1, 2, 3];\n * // since the variance of a set is the mean squared\n * // deviations, we can calculate that with sumNthPowerDeviations:\n * sumNthPowerDeviations(input, 2) / input.length;\n */\nfunction sumNthPowerDeviations(x, n) {\n const meanValue = mean(x);\n let sum = 0;\n let tempValue;\n let i;\n\n // This is an optimization: when n is 2 (we're computing a number squared),\n // multiplying the number by itself is significantly faster than using\n // the Math.pow method.\n if (n === 2) {\n for (i = 0; i < x.length; i++) {\n tempValue = x[i] - meanValue;\n sum += tempValue * tempValue;\n }\n } else {\n for (i = 0; i < x.length; i++) {\n sum += Math.pow(x[i] - meanValue, n);\n }\n }\n\n return sum;\n}\n\nexport default sumNthPowerDeviations;\n","import sumNthPowerDeviations from \"./sum_nth_power_deviations.js\";\n\n/**\n * The [sample variance](https://en.wikipedia.org/wiki/Variance#Sample_variance)\n * is the sum of squared deviations from the mean. The sample variance\n * is distinguished from the variance by the usage of [Bessel's Correction](https://en.wikipedia.org/wiki/Bessel's_correction):\n * instead of dividing the sum of squared deviations by the length of the input,\n * it is divided by the length minus one. This corrects the bias in estimating\n * a value from a set that you don't know if full.\n *\n * References:\n * * [Wolfram MathWorld on Sample Variance](http://mathworld.wolfram.com/SampleVariance.html)\n *\n * @param {Array<number>} x a sample of two or more data points\n * @throws {Error} if the length of x is less than 2\n * @return {number} sample variance\n * @example\n * sampleVariance([1, 2, 3, 4, 5]); // => 2.5\n */\nfunction sampleVariance(x) {\n if (x.length < 2) {\n throw new Error(\"sampleVariance requires at least two data points\");\n }\n\n const sumSquaredDeviationsValue = sumNthPowerDeviations(x, 2);\n\n // this is Bessels' Correction: an adjustment made to sample statistics\n // that allows for the reduced degree of freedom entailed in calculating\n // values from samples rather than complete populations.\n const besselsCorrection = x.length - 1;\n\n // Find the mean value of that list\n return sumSquaredDeviationsValue / besselsCorrection;\n}\n\nexport default sampleVariance;\n","import sampleVariance from \"./sample_variance.js\";\n\n/**\n * The [sample standard deviation](http://en.wikipedia.org/wiki/Standard_deviation#Sample_standard_deviation)\n * is the square root of the sample variance.\n *\n * @param {Array<number>} x input array\n * @returns {number} sample standard deviation\n * @example\n * sampleStandardDeviation([2, 4, 4, 4, 5, 5, 7, 9]).toFixed(2);\n * // => '2.14'\n */\nfunction sampleStandardDeviation(x) {\n const sampleVarianceX = sampleVariance(x);\n return Math.sqrt(sampleVarianceX);\n}\n\nexport default sampleStandardDeviation;\n","import mean from \"./mean.js\";\nimport sampleStandardDeviation from \"./sample_standard_deviation.js\";\n\n/**\n * The`coefficient of variation`_ is the ratio of the standard deviation to the mean.\n * .._`coefficient of variation`: https://en.wikipedia.org/wiki/Coefficient_of_variation\n *\n *\n * @param {Array} x input\n * @returns {number} coefficient of variation\n * @example\n * coefficientOfVariation([1, 2, 3, 4]).toFixed(3); // => 0.516\n * coefficientOfVariation([1, 2, 3, 4, 5]).toFixed(3); // => 0.527\n * coefficientOfVariation([-1, 0, 1, 2, 3, 4]).toFixed(3); // => 1.247\n */\nfunction coefficientOfVariation(x) {\n return sampleStandardDeviation(x) / mean(x);\n}\n\nexport default coefficientOfVariation;\n","/**\n * Implementation of Combinations\n * Combinations are unique subsets of a collection - in this case, k x from a collection at a time.\n * https://en.wikipedia.org/wiki/Combination\n * @param {Array} x any type of data\n * @param {int} k the number of objects in each group (without replacement)\n * @returns {Array<Array>} array of permutations\n * @example\n * combinations([1, 2, 3], 2); // => [[1,2], [1,3], [2,3]]\n */\n\nfunction combinations(x, k) {\n let i;\n let subI;\n const combinationList = [];\n let subsetCombinations;\n let next;\n\n for (i = 0; i < x.length; i++) {\n if (k === 1) {\n combinationList.push([x[i]]);\n } else {\n subsetCombinations = combinations(x.slice(i + 1, x.length), k - 1);\n for (subI = 0; subI < subsetCombinations.length; subI++) {\n next = subsetCombinations[subI];\n next.unshift(x[i]);\n combinationList.push(next);\n }\n }\n }\n return combinationList;\n}\n\nexport default combinations;\n","/**\n * Implementation of [Combinations](https://en.wikipedia.org/wiki/Combination) with replacement\n * Combinations are unique subsets of a collection - in this case, k x from a collection at a time.\n * 'With replacement' means that a given element can be chosen multiple times.\n * Unlike permutation, order doesn't matter for combinations.\n *\n * @param {Array} x any type of data\n * @param {int} k the number of objects in each group (without replacement)\n * @returns {Array<Array>} array of permutations\n * @example\n * combinationsReplacement([1, 2], 2); // => [[1, 1], [1, 2], [2, 2]]\n */\nfunction combinationsReplacement(x, k) {\n const combinationList = [];\n\n for (let i = 0; i < x.length; i++) {\n if (k === 1) {\n // If we're requested to find only one element, we don't need\n // to recurse: just push `x[i]` onto the list of combinations.\n combinationList.push([x[i]]);\n } else {\n // Otherwise, recursively find combinations, given `k - 1`. Note that\n // we request `k - 1`, so if you were looking for k=3 combinations, we're\n // requesting k=2. This -1 gets reversed in the for loop right after this\n // code, since we concatenate `x[i]` onto the selected combinations,\n // bringing `k` back up to your requested level.\n // This recursion may go many levels deep, since it only stops once\n // k=1.\n const subsetCombinations = combinationsReplacement(\n x.slice(i, x.length),\n k - 1\n );\n\n for (let j = 0; j < subsetCombinations.length; j++) {\n combinationList.push([x[i]].concat(subsetCombinations[j]));\n }\n }\n }\n\n return combinationList;\n}\n\nexport default combinationsReplacement;\n","/**\n * When combining two lists of values for which one already knows the means,\n * one does not have to necessary recompute the mean of the combined lists in\n * linear time. They can instead use this function to compute the combined\n * mean by providing the mean & number of values of the first list and the mean\n * & number of values of the second list.\n *\n * @since 3.0.0\n * @param {number} mean1 mean of the first list\n * @param {number} n1 number of items in the first list\n * @param {number} mean2 mean of the second list\n * @param {number} n2 number of items in the second list\n * @returns {number} the combined mean\n *\n * @example\n * combineMeans(5, 3, 4, 3); // => 4.5\n */\nfunction combineMeans(mean1, n1, mean2, n2) {\n return (mean1 * n1 + mean2 * n2) / (n1 + n2);\n}\n\nexport default combineMeans;\n","import combineMeans from \"./combine_means.js\";\n\n/**\n * When combining two lists of values for which one already knows the variances,\n * one does not have to necessary recompute the variance of the combined lists\n * in linear time. They can instead use this function to compute the combined\n * variance by providing the variance, mean & number of values of the first list\n * and the variance, mean & number of values of the second list.\n *\n * @since 3.0.0\n * @param {number} variance1 variance of the first list\n * @param {number} mean1 mean of the first list\n * @param {number} n1 number of items in the first list\n * @param {number} variance2 variance of the second list\n * @param {number} mean2 mean of the second list\n * @param {number} n2 number of items in the second list\n * @returns {number} the combined mean\n *\n * @example\n * combineVariances(14 / 3, 5, 3, 8 / 3, 4, 3); // => 47 / 12\n */\nfunction combineVariances(variance1, mean1, n1, variance2, mean2, n2) {\n const newMean = combineMeans(mean1, n1, mean2, n2);\n\n return (\n (n1 * (variance1 + Math.pow(mean1 - newMean, 2)) +\n n2 * (variance2 + Math.pow(mean2 - newMean, 2))) /\n (n1 + n2)\n );\n}\n\nexport default combineVariances;\n","/**\n * This computes the maximum number in an array.\n *\n * This runs in `O(n)`, linear time, with respect to the length of the array.\n *\n * @param {Array<number>} x sample of one or more data points\n * @returns {number} maximum value\n * @throws {Error} if the length of x is less than one\n * @example\n * max([1, 2, 3, 4]);\n * // => 4\n */\nfunction max(x) {\n if (x.length === 0) {\n throw new Error(\"max requires at least one data point\");\n }\n\n let value = x[0];\n for (let i = 1; i < x.length; i++) {\n if (x[i] > value) {\n value = x[i];\n }\n }\n return value;\n}\n\nexport default max;\n","/**\n * The min is the lowest number in the array.\n * This runs in `O(n)`, linear time, with respect to the length of the array.\n *\n * @param {Array<number>} x sample of one or more data points\n * @throws {Error} if the length of x is less than one\n * @returns {number} minimum value\n * @example\n * min([1, 5, -10, 100, 2]); // => -10\n */\nfunction min(x) {\n if (x.length === 0) {\n throw new Error(\"min requires at least one data point\");\n }\n\n let value = x[0];\n for (let i = 1; i < x.length; i++) {\n if (x[i] < value) {\n value = x[i];\n }\n }\n return value;\n}\n\nexport default min;\n","import max from \"./max.js\";\nimport min from \"./min.js\";\n\n/**\n * Given an array of x, this will find the extent of the\n * x and return an array of breaks that can be used\n * to categorize the x into a number of classes. The\n * returned array will always be 1 longer than the number of\n * classes because it includes the minimum value.\n *\n * @param {Array<number>} x an array of number values\n * @param {number} nClasses number of desired classes\n * @returns {Array<number>} array of class break positions\n * @example\n * equalIntervalBreaks([1, 2, 3, 4, 5, 6], 4); // => [1, 2.25, 3.5, 4.75, 6]\n */\nfunction equalIntervalBreaks(x, nClasses) {\n if (x.length < 2) {\n return x;\n }\n\n const theMin = min(x);\n const theMax = max(x);\n\n // the first break will always be the minimum value\n // in the xset\n const breaks = [theMin];\n\n // The size of each break is the full range of the x\n // divided by the number of classes requested\n const breakSize = (theMax - theMin) / nClasses;\n\n // In the case of nClasses = 1, this loop won't run\n // and the returned breaks will be [min, max]\n for (let i = 1; i < nClasses; i++) {\n breaks.push(breaks[0] + breakSize * i);\n }\n\n // the last break will always be the\n // maximum.\n breaks.push(theMax);\n\n return breaks;\n}\n\nexport default equalIntervalBreaks;\n","/**\n * This computes the minimum & maximum number in an array.\n *\n * This runs in `O(n)`, linear time, with respect to the length of the array.\n *\n * @param {Array<number>} x sample of one or more data points\n * @returns {Array<number>} minimum & maximum value\n * @throws {Error} if the length of x is less than one\n * @example\n * extent([1, 2, 3, 4]);\n * // => [1, 4]\n */\nfunction extent(x) {\n if (x.length === 0) {\n throw new Error(\"extent requires at least one data point\");\n }\n\n let min = x[0];\n let max = x[0];\n for (let i = 1; i < x.length; i++) {\n if (x[i] > max) {\n max = x[i];\n }\n if (x[i] < min) {\n min = x[i];\n }\n }\n return [min, max];\n}\n\nexport default extent;\n","/**\n * The extent is the lowest & highest number in the array. With a sorted array,\n * the first element in the array is always the lowest while the last element is always the largest, so this calculation\n * can be done in one step, or constant time.\n *\n * @param {Array<number>} x input\n * @returns {Array<number>} minimum & maximum value\n * @example\n * extentSorted([-100, -10, 1, 2, 5]); // => [-100, 5]\n */\nfunction extentSorted(x) {\n return [x[0], x[x.length - 1]];\n}\n\nexport default extentSorted;\n","/**\n * The [Geometric Mean](https://en.wikipedia.org/wiki/Geometric_mean) is\n * a mean function that is more useful for numbers in different\n * ranges.\n *\n * This is the nth root of the input numbers multiplied by each other.\n *\n * The geometric mean is often useful for\n * **[proportional growth](https://en.wikipedia.org/wiki/Geometric_mean#Proportional_growth)**: given\n * growth rates for multiple years, like _80%, 16.66% and 42.85%_, a simple\n * mean will incorrectly estimate an average growth rate, whereas a geometric\n * mean will correctly estimate a growth rate that, over those years,\n * will yield the same end value.\n *\n * This runs in `O(n)`, linear time, with respect to the length of the array.\n *\n * @param {Array<number>} x sample of one or more data points\n * @returns {number} geometric mean\n * @throws {Error} if x is empty\n * @throws {Error} if x contains a negative number\n * @example\n * var growthRates = [1.80, 1.166666, 1.428571];\n * var averageGrowth = ss.geometricMean(growthRates);\n * var averageGrowthRates = [averageGrowth, averageGrowth, averageGrowth];\n * var startingValue = 10;\n * var startingValueMean = 10;\n * growthRates.forEach(function(rate) {\n * startingValue *= rate;\n * });\n * averageGrowthRates.forEach(function(rate) {\n * startingValueMean *= rate;\n * });\n * startingValueMean === startingValue;\n */\nfunction geometricMean(x) {\n if (x.length === 0) {\n throw new Error(\"geometricMean requires at least one data point\");\n }\n\n // the starting value.\n let value = 1;\n\n for (let i = 0; i < x.length; i++) {\n // the geometric mean is only valid for positive numbers\n if (x[i] < 0) {\n throw new Error(\n \"geometricMean requires only non-negative numbers as input\"\n );\n }\n\n // repeatedly multiply the value by each number\n value *= x[i];\n }\n\n return Math.pow(value, 1 / x.length);\n}\n\nexport default geometricMean;\n","/**\n * The [Harmonic Mean](https://en.wikipedia.org/wiki/Harmonic_mean) is\n * a mean function typically used to find the average of rates.\n * This mean is calculated by taking the reciprocal of the arithmetic mean\n * of the reciprocals of the input numbers.\n *\n * This is a [measure of central tendency](https://en.wikipedia.org/wiki/Central_tendency):\n * a method of finding a typical or central value of a set of numbers.\n *\n * This runs in `O(n)`, linear time, with respect to the length of the array.\n *\n * @param {Array<number>} x sample of one or more data points\n * @returns {number} harmonic mean\n * @throws {Error} if x is empty\n * @throws {Error} if x contains a negative number\n * @example\n * harmonicMean([2, 3]).toFixed(2) // => '2.40'\n */\nfunction harmonicMean(x) {\n if (x.length === 0) {\n throw new Error(\"harmonicMean requires at least one data point\");\n }\n\n let reciprocalSum = 0;\n\n for (let i = 0; i < x.length; i++) {\n // the harmonic mean is only valid for positive numbers\n if (x[i] <= 0) {\n throw new Error(\n \"harmonicMean requires only positive numbers as input\"\n );\n }\n\n reciprocalSum += 1 / x[i];\n }\n\n // divide n by the reciprocal sum\n return x.length / reciprocalSum;\n}\n\nexport default harmonicMean;\n","/**\n * This is the internal implementation of quantiles: when you know\n * that the order is sorted, you don't need to re-sort it, and the computations\n * are faster.\n *\n * This implements the linear interpolation method (type=7 in R/numpy),\n * which is the default in numpy.percentile and R's quantile.\n *\n * @param {Array<number>} x sample of one or more data points\n * @param {number} p desired quantile: a number between 0 to 1, inclusive\n * @returns {number} quantile value\n * @throws {Error} if p ix outside of the range from 0 to 1\n * @throws {Error} if x is empty\n * @example\n * quantileSorted([3, 6, 7, 8, 8, 9, 10, 13, 15, 16, 20], 0.5); // => 9\n */\nfunction quantileSorted(x, p) {\n // Use (n-1) * p for index, matching numpy's linear method (type=7)\n const idx = (x.length - 1) * p;\n if (x.length === 0) {\n throw new Error(\"quantile requires at least one data point.\");\n } else if (p < 0 || p > 1) {\n throw new Error(\"quantiles must be between 0 and 1\");\n } else if (p === 1) {\n // If p is 1, directly return the last element\n return x[x.length - 1];\n } else if (p === 0) {\n // If p is 0, directly return the first element\n return x[0];\n } else if (idx % 1 !== 0) {\n // If idx is not integer, interpolate linearly between floor and ceil\n const lower = Math.floor(idx);\n const upper = Math.ceil(idx);\n const fraction = idx - lower;\n return x[lower] + fraction * (x[upper] - x[lower]);\n } else {\n // If idx is integer, type=7 returns the value at that index\n return x[idx];\n }\n}\n\nexport default quantileSorted;\n","/**\n * Rearrange items in `arr` so that all items in `[left, k]` range are the smallest.\n * The `k`-th element will have the `(k - left + 1)`-th smallest value in `[left, right]`.\n *\n * Implements Floyd-Rivest selection algorithm https://en.wikipedia.org/wiki/Floyd-Rivest_algorithm\n *\n * @param {Array<number>} arr input array\n * @param {number} k pivot index\n * @param {number} [left] left index\n * @param {number} [right] right index\n * @returns {void} mutates input array\n * @example\n * var arr = [65, 28, 59, 33, 21, 56, 22, 95, 50, 12, 90, 53, 28, 77, 39];\n * quickselect(arr, 8);\n * // = [39, 28, 28, 33, 21, 12, 22, 50, 53, 56, 59, 65, 90, 77, 95]\n */\nfunction quickselect(arr, k, left, right) {\n left = left || 0;\n right = right || arr.length - 1;\n\n while (right > left) {\n // 600 and 0.5 are arbitrary constants chosen in the original paper to minimize execution time\n if (right - left > 600) {\n const n = right - left + 1;\n const m = k - left + 1;\n const z = Math.log(n);\n const s = 0.5 * Math.exp((2 * z) / 3);\n let sd = 0.5 * Math.sqrt((z * s * (n - s)) / n);\n if (m - n / 2 < 0) sd *= -1;\n const newLeft = Math.max(left, Math.floor(k - (m * s) / n + sd));\n const newRight = Math.min(\n right,\n Math.floor(k + ((n - m) * s) / n + sd)\n );\n quickselect(arr, k, newLeft, newRight);\n }\n\n const t = arr[k];\n let i = left;\n let j = right;\n\n swap(arr, left, k);\n if (arr[right] > t) swap(arr, left, right);\n\n while (i < j) {\n swap(arr, i, j);\n i++;\n j--;\n while (arr[i] < t) i++;\n while (arr[j] > t) j--;\n }\n\n if (arr[left] === t) swap(arr, left, j);\n else {\n j++;\n swap(arr, j, right);\n }\n\n if (j <= k) left = j + 1;\n if (k <= j) right = j - 1;\n }\n}\n\nfunction swap(arr, i, j) {\n const tmp = arr[i];\n arr[i] = arr[j];\n arr[j] = tmp;\n}\n\nexport default quickselect;\n","import quantileSorted from \"./quantile_sorted.js\";\nimport quickselect from \"./quickselect.js\";\n\n/**\n * The [quantile](https://en.wikipedia.org/wiki/Quantile):\n * this is a population quantile, since we assume to know the entire\n * dataset in this library. This implementation uses linear interpolation,\n * equivalent to R's type=7 and numpy's default percentile method.\n *\n * Sample is a one-dimensional array of numbers,\n * and p is either a decimal number from 0 to 1 or an array of decimal\n * numbers from 0 to 1.\n * In terms of a k/q quantile, p = k/q - it's just dealing with fractions or dealing\n * with decimal values.\n * When p is an array, the result of the function is also an array containing the appropriate\n * quantiles in input order\n *\n * @param {Array<number>} x sample of one or more numbers\n * @param {Array<number> | number} p the desired quantile, as a number between 0 and 1\n * @returns {number} quantile\n * @example\n * quantile([3, 6, 7, 8, 8, 9, 10, 13, 15, 16, 20], 0.5); // => 9\n */\nfunction quantile(x, p) {\n const copy = x.slice();\n\n if (Array.isArray(p)) {\n // rearrange elements so that each element corresponding to a requested\n // quantile is on a place it would be if the array was fully sorted\n multiQuantileSelect(copy, p);\n // Initialize the result array\n const results = [];\n // For each requested quantile\n for (let i = 0; i < p.length; i++) {\n results[i] = quantileSorted(copy, p[i]);\n }\n return results;\n } else {\n const idx = quantileIndex(copy.length, p);\n quantileSelect(copy, idx, 0, copy.length - 1);\n return quantileSorted(copy, p);\n }\n}\n\nfunction quantileSelect(arr, k, left, right) {\n if (k % 1 === 0) {\n quickselect(arr, k, left, right);\n } else {\n k = Math.floor(k);\n quickselect(arr, k, left, right);\n quickselect(arr, k + 1, k + 1, right);\n }\n}\n\nfunction multiQuantileSelect(arr, p) {\n const indices = [0];\n for (let i = 0; i < p.length; i++) {\n indices.push(quantileIndex(arr.length, p[i]));\n }\n indices.push(arr.length - 1);\n indices.sort(compare);\n\n const stack = [0, indices.length - 1];\n\n while (stack.length) {\n const r = Math.ceil(stack.pop());\n const l = Math.floor(stack.pop());\n if (r - l <= 1) continue;\n\n const m = Math.floor((l + r) / 2);\n quantileSelect(\n arr,\n indices[m],\n Math.floor(indices[l]),\n Math.ceil(indices[r])\n );\n\n stack.push(l, m, m, r);\n }\n}\n\nfunction compare(a, b) {\n return a - b;\n}\n\nfunction quantileIndex(len, p) {\n // Use (n-1) * p to match numpy's linear method (type=7)\n const idx = (len - 1) * p;\n if (p === 1) {\n // If p is 1, directly return the last index\n return len - 1;\n } else if (p === 0) {\n // If p is 0, directly return the first index\n return 0;\n } else if (idx % 1 !== 0) {\n // If index is not integer, keep the fractional position so we can\n // select both surrounding order statistics for interpolation.\n return idx;\n } else {\n // If index is integer, return that exact position.\n return idx;\n }\n}\n\nexport default quantile;\n","import quantile from \"./quantile.js\";\n\n/**\n * The [Interquartile range](http://en.wikipedia.org/wiki/Interquartile_range) is\n * a measure of statistical dispersion, or how scattered, spread, or\n * concentrated a distribution is. It's computed as the difference between\n * the third quartile and first quartile.\n *\n * @param {Array<number>} x sample of one or more numbers\n * @returns {number} interquartile range: the span between lower and upper quartile,\n * 0.25 and 0.75\n * @example\n * interquartileRange([0, 1, 2, 3]); // => 2\n */\nfunction interquartileRange(x) {\n // Interquartile range is the span between the upper quartile,\n // at `0.75`, and lower quartile, `0.25`\n const q1 = quantile(x, 0.75);\n const q2 = quantile(x, 0.25);\n\n if (typeof q1 === \"number\" && typeof q2 === \"number\") {\n return q1 - q2;\n }\n}\n\nexport default interquartileRange;\n","/*\n * Pull Breaks Values for Jenks\n *\n * the second part of the jenks recipe: take the calculated matrices\n * and derive an array of n breaks.\n *\n * @private\n */\nfunction jenksBreaks(data, lowerClassLimits, nClasses) {\n let k = data.length;\n const kclass = [];\n let countNum = nClasses;\n\n // the calculation of classes will never include the upper\n // bound, so we need to explicitly set it\n kclass[nClasses] = data[data.length - 1];\n\n // the lowerClassLimits matrix is used as indices into itself\n // here: the `k` variable is reused in each iteration.\n while (countNum > 0) {\n kclass[countNum - 1] = data[lowerClassLimits[k][countNum] - 1];\n k = lowerClassLimits[k][countNum] - 1;\n countNum--;\n }\n\n return kclass;\n}\n\nexport default jenksBreaks;\n","/*\n * Compute Matrices for Jenks\n *\n * Compute the matrices required for Jenks breaks. These matrices\n * can be used for any classing of data with `classes <= nClasses`\n *\n * @private\n */\nfunction jenksMatrices(data, nClasses) {\n // in the original implementation, these matrices are referred to\n // as `LC` and `OP`\n //\n // * lowerClassLimits (LC): optimal lower class limits\n // * varianceCombinations (OP): optimal variance combinations for all classes\n const lowerClassLimits = [];\n const varianceCombinations = [];\n // loop counters\n let i;\n let j;\n // the variance, as computed at each step in the calculation\n let variance = 0;\n\n // Initialize and fill each matrix with zeroes\n for (i = 0; i < data.length + 1; i++) {\n const tmp1 = [];\n const tmp2 = [];\n // despite these arrays having the same values, we need\n // to keep them separate so that changing one does not change\n // the other\n for (j = 0; j < nClasses + 1; j++) {\n tmp1.push(0);\n tmp2.push(0);\n }\n lowerClassLimits.push(tmp1);\n varianceCombinations.push(tmp2);\n }\n\n for (i = 1; i < nClasses + 1; i++) {\n lowerClassLimits[1][i] = 1;\n varianceCombinations[1][i] = 0;\n // in the original implementation, 9999999 is used but\n // since Javascript has `Infinity`, we use that.\n for (j = 2; j < data.length + 1; j++) {\n varianceCombinations[j][i] = Number.POSITIVE_INFINITY;\n }\n }\n\n for (let l = 2; l < data.length + 1; l++) {\n // `SZ` originally. this is the sum of the values seen thus\n // far when calculating variance.\n let sum = 0;\n // `ZSQ` originally. the sum of squares of values seen\n // thus far\n let sumSquares = 0;\n // `WT` originally. This is the number of\n let w = 0;\n // `IV` originally\n let i4 = 0;\n\n // in several instances, you could say `Math.pow(x, 2)`\n // instead of `x * x`, but this is slower in some browsers\n // introduces an unnecessary concept.\n for (let m = 1; m < l + 1; m++) {\n // `III` originally\n const lowerClassLimit = l - m + 1;\n const val = data[lowerClassLimit - 1];\n\n // here we're estimating variance for each potential classing\n // of the data, for each potential number of classes. `w`\n // is the number of data points considered so far.\n w++;\n\n // increase the current sum and sum-of-squares\n sum += val;\n sumSquares += val * val;\n\n // the variance at this point in the sequence is the difference\n // between the sum of squares and the total x 2, over the number\n // of samples.\n variance = sumSquares - (sum * sum) / w;\n\n i4 = lowerClassLimit - 1;\n\n if (i4 !== 0) {\n for (j = 2; j < nClasses + 1; j++) {\n // if adding this element to an existing class\n // will increase its variance beyond the limit, break\n // the class at this point, setting the `lowerClassLimit`\n // at this point.\n if (\n varianceCombinations[l][j] >=\n variance + varianceCombinations[i4][j - 1]\n ) {\n lowerClassLimits[l][j] = lowerClassLimit;\n varianceCombinations[l][j] =\n variance + varianceCombinations[i4][j - 1];\n }\n }\n }\n }\n\n lowerClassLimits[l][1] = 1;\n varianceCombinations[l][1] = variance;\n }\n\n // return the two matrices. for just providing breaks, only\n // `lowerClassLimits` is needed, but variances can be useful to\n // evaluate goodness of fit.\n return {\n lowerClassLimits: lowerClassLimits,\n varianceCombinations: varianceCombinations\n };\n}\n\nexport default jenksMatrices;\n","import jenksBreaks from \"./jenks_breaks.js\";\nimport jenksMatrices from \"./jenks_matrices.js\";\n\n/**\n * The **[jenks natural breaks optimization](http://en.wikipedia.org/wiki/Jenks_natural_breaks_optimization)**\n * is an algorithm commonly used in cartography and visualization to decide\n * upon groupings of data values that minimize variance within themselves\n * and maximize variation between themselves.\n *\n * For instance, cartographers often use jenks in order to choose which\n * values are assigned to which colors in a [choropleth](https://en.wikipedia.org/wiki/Choropleth_map)\n * map.\n *\n * @param {Array<number>} data input data, as an array of number values\n * @param {number} nClasses number of desired classes\n * @returns {Array<number>} array of class break positions\n * // split data into 3 break points\n * jenks([1, 2, 4, 5, 7, 9, 10, 20], 3) // = [1, 7, 20, 20]\n */\nfunction jenks(data, nClasses) {\