simple-statistics

Version:

Simple Statistics

github.com/simple-statistics/simple-statistics

1 lines • 241 kB

Source Map (JSON)

{"version":3,"file":"simple-statistics.mjs","sources":["../src/linear_regression.js","../src/linear_regression_line.js","../src/sum.js","../src/mean.js","../src/sum_nth_power_deviations.js","../src/variance.js","../src/standard_deviation.js","../src/r_squared.js","../src/mode_sorted.js","../src/numeric_sort.js","../src/mode.js","../src/mode_fast.js","../src/min.js","../src/max.js","../src/extent.js","../src/min_sorted.js","../src/max_sorted.js","../src/extent_sorted.js","../src/sum_simple.js","../src/product.js","../src/quantile_sorted.js","../src/quickselect.js","../src/quantile.js","../src/quantile_rank_sorted.js","../src/quantile_rank.js","../src/interquartile_range.js","../src/median.js","../src/median_absolute_deviation.js","../src/chunk.js","../src/sample_with_replacement.js","../src/shuffle_in_place.js","../src/shuffle.js","../src/sample.js","../src/make_matrix.js","../src/unique_count_sorted.js","../src/ckmeans.js","../src/jenks_breaks.js","../src/jenks_matrices.js","../src/jenks.js","../src/equal_interval_breaks.js","../src/sample_covariance.js","../src/sample_variance.js","../src/sample_standard_deviation.js","../src/sample_correlation.js","../src/sample_rank_correlation.js","../src/sample_skewness.js","../src/sample_kurtosis.js","../src/permutations_heap.js","../src/combinations.js","../src/combinations_replacement.js","../src/add_to_mean.js","../src/combine_means.js","../src/combine_variances.js","../src/geometric_mean.js","../src/log_average.js","../src/harmonic_mean.js","../src/mean_simple.js","../src/median_sorted.js","../src/subtract_from_mean.js","../src/root_mean_square.js","../src/coefficient_of_variation.js","../src/t_test.js","../src/t_test_two_sample.js","../src/wilcoxon_rank_sum.js","../src/bayesian_classifier.js","../src/perceptron.js","../src/epsilon.js","../src/factorial.js","../src/gamma.js","../src/gammaln.js","../src/bernoulli_distribution.js","../src/binomial_distribution.js","../src/poisson_distribution.js","../src/chi_squared_distribution_table.js","../src/chi_squared_goodness_of_fit.js","../src/kernel_density_estimation.js","../src/z_score.js","../src/standard_normal_table.js","../src/cumulative_std_normal_probability.js","../src/cumulative_std_logistic_probability.js","../src/error_function.js","../src/inverse_error_function.js","../src/probit.js","../src/logit.js","../src/permutation_test.js","../src/sign.js","../src/bisect.js","../src/euclidean_distance.js","../src/k_means_cluster.js","../src/silhouette.js","../src/silhouette_metric.js","../src/relative_error.js","../src/approx_equal.js"],"sourcesContent":["/**\n * [Simple linear regression](http://en.wikipedia.org/wiki/Simple_linear_regression)\n * is a simple way to find a fitted line\n * between a set of coordinates. This algorithm finds the slope and y-intercept of a regression line\n * using the least sum of squares.\n *\n * @param {Array<Array<number>>} data an array of two-element of arrays,\n * like `[[0, 1], [2, 3]]`\n * @returns {Object} object containing slope and intersect of regression line\n * @example\n * linearRegression([[0, 0], [1, 1]]); // => { m: 1, b: 0 }\n */\nfunction linearRegression(data) {\n let m;\n let b;\n\n // Store data length in a local variable to reduce\n // repeated object property lookups\n const dataLength = data.length;\n\n //if there's only one point, arbitrarily choose a slope of 0\n //and a y-intercept of whatever the y of the initial point is\n if (dataLength === 1) {\n m = 0;\n b = data[0][1];\n } else {\n // Initialize our sums and scope the `m` and `b`\n // variables that define the line.\n let sumX = 0;\n let sumY = 0;\n let sumXX = 0;\n let sumXY = 0;\n\n // Use local variables to grab point values\n // with minimal object property lookups\n let point;\n let x;\n let y;\n\n // Gather the sum of all x values, the sum of all\n // y values, and the sum of x^2 and (x*y) for each\n // value.\n //\n // In math notation, these would be SS_x, SS_y, SS_xx, and SS_xy\n for (let i = 0; i < dataLength; i++) {\n point = data[i];\n x = point[0];\n y = point[1];\n\n sumX += x;\n sumY += y;\n\n sumXX += x * x;\n sumXY += x * y;\n }\n\n // `m` is the slope of the regression line\n m =\n (dataLength * sumXY - sumX * sumY) /\n (dataLength * sumXX - sumX * sumX);\n\n // `b` is the y-intercept of the line.\n b = sumY / dataLength - (m * sumX) / dataLength;\n }\n\n // Return both values as an object.\n return {\n m: m,\n b: b\n };\n}\n\nexport default linearRegression;\n","/**\n * Given the output of `linearRegression`: an object\n * with `m` and `b` values indicating slope and intercept,\n * respectively, generate a line function that translates\n * x values into y values.\n *\n * @param {Object} mb object with `m` and `b` members, representing\n * slope and intersect of desired line\n * @returns {Function} method that computes y-value at any given\n * x-value on the line.\n * @example\n * var l = linearRegressionLine(linearRegression([[0, 0], [1, 1]]));\n * l(0) // = 0\n * l(2) // = 2\n * linearRegressionLine({ b: 0, m: 1 })(1); // => 1\n * linearRegressionLine({ b: 1, m: 1 })(1); // => 2\n */\nfunction linearRegressionLine(mb /*: { b: number, m: number }*/) {\n // Return a function that computes a `y` value for each\n // x value it is given, based on the values of `b` and `a`\n // that we just computed.\n return function (x) {\n return mb.b + mb.m * x;\n };\n}\n\nexport default linearRegressionLine;\n","/**\n * Our default sum is the [Kahan-Babuska algorithm](https://pdfs.semanticscholar.org/1760/7d467cda1d0277ad272deb2113533131dc09.pdf).\n * This method is an improvement over the classical\n * [Kahan summation algorithm](https://en.wikipedia.org/wiki/Kahan_summation_algorithm).\n * It aims at computing the sum of a list of numbers while correcting for\n * floating-point errors. Traditionally, sums are calculated as many\n * successive additions, each one with its own floating-point roundoff. These\n * losses in precision add up as the number of numbers increases. This alternative\n * algorithm is more accurate than the simple way of calculating sums by simple\n * addition.\n *\n * This runs in `O(n)`, linear time, with respect to the length of the array.\n *\n * @param {Array<number>} x input\n * @return {number} sum of all input numbers\n * @example\n * sum([1, 2, 3]); // => 6\n */\nfunction sum(x) {\n // If the array is empty, we needn't bother computing its sum\n if (x.length === 0) {\n return 0;\n }\n\n // Initializing the sum as the first number in the array\n let sum = x[0];\n\n // Keeping track of the floating-point error correction\n let correction = 0;\n\n let transition;\n\n if (typeof sum !== \"number\") {\n return Number.NaN;\n }\n\n for (let i = 1; i < x.length; i++) {\n if (typeof x[i] !== \"number\") {\n return Number.NaN;\n }\n transition = sum + x[i];\n\n // Here we need to update the correction in a different fashion\n // if the new absolute value is greater than the absolute sum\n if (Math.abs(sum) >= Math.abs(x[i])) {\n correction += sum - transition + x[i];\n } else {\n correction += x[i] - transition + sum;\n }\n\n sum = transition;\n }\n\n // Returning the corrected sum\n return sum + correction;\n}\n\nexport default sum;\n","import sum from \"./sum.js\";\n\n/**\n * The mean, _also known as average_,\n * is the sum of all values over the number of values.\n * This is a [measure of central tendency](https://en.wikipedia.org/wiki/Central_tendency):\n * a method of finding a typical or central value of a set of numbers.\n *\n * This runs in `O(n)`, linear time, with respect to the length of the array.\n *\n * @param {Array<number>} x sample of one or more data points\n * @throws {Error} if the length of x is less than one\n * @returns {number} mean\n * @example\n * mean([0, 10]); // => 5\n */\nfunction mean(x) {\n if (x.length === 0) {\n throw new Error(\"mean requires at least one data point\");\n }\n\n return sum(x) / x.length;\n}\n\nexport default mean;\n","import mean from \"./mean.js\";\n\n/**\n * The sum of deviations to the Nth power.\n * When n=2 it's the sum of squared deviations.\n * When n=3 it's the sum of cubed deviations.\n *\n * @param {Array<number>} x\n * @param {number} n power\n * @returns {number} sum of nth power deviations\n *\n * @example\n * var input = [1, 2, 3];\n * // since the variance of a set is the mean squared\n * // deviations, we can calculate that with sumNthPowerDeviations:\n * sumNthPowerDeviations(input, 2) / input.length;\n */\nfunction sumNthPowerDeviations(x, n) {\n const meanValue = mean(x);\n let sum = 0;\n let tempValue;\n let i;\n\n // This is an optimization: when n is 2 (we're computing a number squared),\n // multiplying the number by itself is significantly faster than using\n // the Math.pow method.\n if (n === 2) {\n for (i = 0; i < x.length; i++) {\n tempValue = x[i] - meanValue;\n sum += tempValue * tempValue;\n }\n } else {\n for (i = 0; i < x.length; i++) {\n sum += Math.pow(x[i] - meanValue, n);\n }\n }\n\n return sum;\n}\n\nexport default sumNthPowerDeviations;\n","import sumNthPowerDeviations from \"./sum_nth_power_deviations.js\";\n\n/**\n * The [variance](http://en.wikipedia.org/wiki/Variance)\n * is the sum of squared deviations from the mean.\n *\n * This is an implementation of variance, not sample variance:\n * see the `sampleVariance` method if you want a sample measure.\n *\n * @param {Array<number>} x a population of one or more data points\n * @returns {number} variance: a value greater than or equal to zero.\n * zero indicates that all values are identical.\n * @throws {Error} if x's length is 0\n * @example\n * variance([1, 2, 3, 4, 5, 6]); // => 2.9166666666666665\n */\nfunction variance(x) {\n if (x.length === 0) {\n throw new Error(\"variance requires at least one data point\");\n }\n\n // Find the mean of squared deviations between the\n // mean value and each value.\n return sumNthPowerDeviations(x, 2) / x.length;\n}\n\nexport default variance;\n","import variance from \"./variance.js\";\n\n/**\n * The [standard deviation](http://en.wikipedia.org/wiki/Standard_deviation)\n * is the square root of the variance. This is also known as the population\n * standard deviation. It's useful for measuring the amount\n * of variation or dispersion in a set of values.\n *\n * Standard deviation is only appropriate for full-population knowledge: for\n * samples of a population, {@link sampleStandardDeviation} is\n * more appropriate.\n *\n * @param {Array<number>} x input\n * @returns {number} standard deviation\n * @example\n * variance([2, 4, 4, 4, 5, 5, 7, 9]); // => 4\n * standardDeviation([2, 4, 4, 4, 5, 5, 7, 9]); // => 2\n */\nfunction standardDeviation(x) {\n if (x.length === 1) {\n return 0;\n }\n const v = variance(x);\n return Math.sqrt(v);\n}\n\nexport default standardDeviation;\n","/**\n * The [R Squared](http://en.wikipedia.org/wiki/Coefficient_of_determination)\n * value of data compared with a function `f`\n * is the sum of the squared differences between the prediction\n * and the actual value.\n *\n * @param {Array<Array<number>>} x input data: this should be doubly-nested\n * @param {Function} func function called on `[i][0]` values within the dataset\n * @returns {number} r-squared value\n * @example\n * var samples = [[0, 0], [1, 1]];\n * var regressionLine = linearRegressionLine(linearRegression(samples));\n * rSquared(samples, regressionLine); // = 1 this line is a perfect fit\n */\nfunction rSquared(x, func) {\n if (x.length < 2) {\n return 1;\n }\n\n // Compute the average y value for the actual\n // data set in order to compute the\n // _total sum of squares_\n let sum = 0;\n for (let i = 0; i < x.length; i++) {\n sum += x[i][1];\n }\n const average = sum / x.length;\n\n // Compute the total sum of squares - the\n // squared difference between each point\n // and the average of all points.\n let sumOfSquares = 0;\n for (let j = 0; j < x.length; j++) {\n sumOfSquares += Math.pow(average - x[j][1], 2);\n }\n\n // Finally estimate the error: the squared\n // difference between the estimate and the actual data\n // value at each point.\n let err = 0;\n for (let k = 0; k < x.length; k++) {\n err += Math.pow(x[k][1] - func(x[k][0]), 2);\n }\n\n // As the error grows larger, its ratio to the\n // sum of squares increases and the r squared\n // value grows lower.\n return 1 - err / sumOfSquares;\n}\n\nexport default rSquared;\n","/**\n * The [mode](https://en.wikipedia.org/wiki/Mode_%28statistics%29) is the number\n * that appears in a list the highest number of times.\n * There can be multiple modes in a list: in the event of a tie, this\n * algorithm will return the most recently seen mode.\n *\n * This is a [measure of central tendency](https://en.wikipedia.org/wiki/Central_tendency):\n * a method of finding a typical or central value of a set of numbers.\n *\n * This runs in `O(n)` because the input is sorted.\n *\n * @param {Array<number>} sorted a sample of one or more data points\n * @returns {number} mode\n * @throws {Error} if sorted is empty\n * @example\n * modeSorted([0, 0, 1]); // => 0\n */\nfunction modeSorted(sorted) {\n // Handle edge cases:\n // The mode of an empty list is undefined\n if (sorted.length === 0) {\n throw new Error(\"mode requires at least one data point\");\n }\n if (sorted.length === 1) {\n return sorted[0];\n }\n\n // This assumes it is dealing with an array of size > 1, since size\n // 0 and 1 are handled immediately. Hence it starts at index 1 in the\n // array.\n let last = sorted[0];\n // store the mode as we find new modes\n let value = Number.NaN;\n // store how many times we've seen the mode\n let maxSeen = 0;\n // how many times the current candidate for the mode\n // has been seen\n let seenThis = 1;\n\n // end at sorted.length + 1 to fix the case in which the mode is\n // the highest number that occurs in the sequence. the last iteration\n // compares sorted[i], which is undefined, to the highest number\n // in the series\n for (let i = 1; i < sorted.length + 1; i++) {\n // we're seeing a new number pass by\n if (sorted[i] !== last) {\n // the last number is the new mode since we saw it more\n // often than the old one\n if (seenThis > maxSeen) {\n maxSeen = seenThis;\n value = last;\n }\n seenThis = 1;\n last = sorted[i];\n // if this isn't a new number, it's one more occurrence of\n // the potential mode\n } else {\n seenThis++;\n }\n }\n return value;\n}\n\nexport default modeSorted;\n","/**\n * Sort an array of numbers by their numeric value, ensuring that the\n * array is not changed in place.\n *\n * This is necessary because the default behavior of .sort\n * in JavaScript is to sort arrays as string values\n *\n * [1, 10, 12, 102, 20].sort()\n * // output\n * [1, 10, 102, 12, 20]\n *\n * @param {Array<number>} x input array\n * @return {Array<number>} sorted array\n * @private\n * @example\n * numericSort([3, 2, 1]) // => [1, 2, 3]\n */\nfunction numericSort(x) {\n return (\n x\n // ensure the array is not changed in-place\n .slice()\n // comparator function that treats input as numeric\n .sort(function (a, b) {\n return a - b;\n })\n );\n}\n\nexport default numericSort;\n","import modeSorted from \"./mode_sorted.js\";\nimport numericSort from \"./numeric_sort.js\";\n\n/**\n * The [mode](https://en.wikipedia.org/wiki/Mode_%28statistics%29) is the number\n * that appears in a list the highest number of times.\n * There can be multiple modes in a list: in the event of a tie, this\n * algorithm will return the most recently seen mode.\n *\n * This is a [measure of central tendency](https://en.wikipedia.org/wiki/Central_tendency):\n * a method of finding a typical or central value of a set of numbers.\n *\n * This runs in `O(n log(n))` because it needs to sort the array internally\n * before running an `O(n)` search to find the mode.\n *\n * @param {Array<number>} x input\n * @returns {number} mode\n * @example\n * mode([0, 0, 1]); // => 0\n */\nfunction mode(x) {\n // Sorting the array lets us iterate through it below and be sure\n // that every time we see a new number it's new and we'll never\n // see the same number twice\n return modeSorted(numericSort(x));\n}\n\nexport default mode;\n","/* globals Map: false */\n\n/**\n * The [mode](https://en.wikipedia.org/wiki/Mode_%28statistics%29) is the number\n * that appears in a list the highest number of times.\n * There can be multiple modes in a list: in the event of a tie, this\n * algorithm will return the most recently seen mode.\n *\n * modeFast uses a Map object to keep track of the mode, instead of the approach\n * used with `mode`, a sorted array. As a result, it is faster\n * than `mode` and supports any data type that can be compared with `==`.\n * It also requires a\n * [JavaScript environment with support for Map](https://kangax.github.io/compat-table/es6/#test-Map),\n * and will throw an error if Map is not available.\n *\n * This is a [measure of central tendency](https://en.wikipedia.org/wiki/Central_tendency):\n * a method of finding a typical or central value of a set of numbers.\n *\n * @param {Array<*>} x a sample of one or more data points\n * @returns {?*} mode\n * @throws {ReferenceError} if the JavaScript environment doesn't support Map\n * @throws {Error} if x is empty\n * @example\n * modeFast(['rabbits', 'rabbits', 'squirrels']); // => 'rabbits'\n */\nfunction modeFast(x) {\n // This index will reflect the incidence of different values, indexing\n // them like\n // { value: count }\n const index = new Map();\n\n // A running `mode` and the number of times it has been encountered.\n let mode;\n let modeCount = 0;\n\n for (let i = 0; i < x.length; i++) {\n let newCount = index.get(x[i]);\n if (newCount === undefined) {\n newCount = 1;\n } else {\n newCount++;\n }\n if (newCount > modeCount) {\n mode = x[i];\n modeCount = newCount;\n }\n index.set(x[i], newCount);\n }\n\n if (modeCount === 0) {\n throw new Error(\"mode requires at last one data point\");\n }\n\n return mode;\n}\n\nexport default modeFast;\n","/**\n * The min is the lowest number in the array.\n * This runs in `O(n)`, linear time, with respect to the length of the array.\n *\n * @param {Array<number>} x sample of one or more data points\n * @throws {Error} if the length of x is less than one\n * @returns {number} minimum value\n * @example\n * min([1, 5, -10, 100, 2]); // => -10\n */\nfunction min(x) {\n if (x.length === 0) {\n throw new Error(\"min requires at least one data point\");\n }\n\n let value = x[0];\n for (let i = 1; i < x.length; i++) {\n if (x[i] < value) {\n value = x[i];\n }\n }\n return value;\n}\n\nexport default min;\n","/**\n * This computes the maximum number in an array.\n *\n * This runs in `O(n)`, linear time, with respect to the length of the array.\n *\n * @param {Array<number>} x sample of one or more data points\n * @returns {number} maximum value\n * @throws {Error} if the length of x is less than one\n * @example\n * max([1, 2, 3, 4]);\n * // => 4\n */\nfunction max(x) {\n if (x.length === 0) {\n throw new Error(\"max requires at least one data point\");\n }\n\n let value = x[0];\n for (let i = 1; i < x.length; i++) {\n if (x[i] > value) {\n value = x[i];\n }\n }\n return value;\n}\n\nexport default max;\n","/**\n * This computes the minimum & maximum number in an array.\n *\n * This runs in `O(n)`, linear time, with respect to the length of the array.\n *\n * @param {Array<number>} x sample of one or more data points\n * @returns {Array<number>} minimum & maximum value\n * @throws {Error} if the length of x is less than one\n * @example\n * extent([1, 2, 3, 4]);\n * // => [1, 4]\n */\nfunction extent(x) {\n if (x.length === 0) {\n throw new Error(\"extent requires at least one data point\");\n }\n\n let min = x[0];\n let max = x[0];\n for (let i = 1; i < x.length; i++) {\n if (x[i] > max) {\n max = x[i];\n }\n if (x[i] < min) {\n min = x[i];\n }\n }\n return [min, max];\n}\n\nexport default extent;\n","/**\n * The minimum is the lowest number in the array. With a sorted array,\n * the first element in the array is always the smallest, so this calculation\n * can be done in one step, or constant time.\n *\n * @param {Array<number>} x input\n * @returns {number} minimum value\n * @example\n * minSorted([-100, -10, 1, 2, 5]); // => -100\n */\nfunction minSorted(x) {\n return x[0];\n}\n\nexport default minSorted;\n","/**\n * The maximum is the highest number in the array. With a sorted array,\n * the last element in the array is always the largest, so this calculation\n * can be done in one step, or constant time.\n *\n * @param {Array<number>} x input\n * @returns {number} maximum value\n * @example\n * maxSorted([-100, -10, 1, 2, 5]); // => 5\n */\nfunction maxSorted(x) {\n return x[x.length - 1];\n}\n\nexport default maxSorted;\n","/**\n * The extent is the lowest & highest number in the array. With a sorted array,\n * the first element in the array is always the lowest while the last element is always the largest, so this calculation\n * can be done in one step, or constant time.\n *\n * @param {Array<number>} x input\n * @returns {Array<number>} minimum & maximum value\n * @example\n * extentSorted([-100, -10, 1, 2, 5]); // => [-100, 5]\n */\nfunction extentSorted(x) {\n return [x[0], x[x.length - 1]];\n}\n\nexport default extentSorted;\n","/**\n * The simple [sum](https://en.wikipedia.org/wiki/Summation) of an array\n * is the result of adding all numbers together, starting from zero.\n *\n * This runs in `O(n)`, linear time, with respect to the length of the array.\n *\n * @param {Array<number>} x input\n * @return {number} sum of all input numbers\n * @example\n * sumSimple([1, 2, 3]); // => 6\n */\nfunction sumSimple(x) {\n let value = 0;\n for (let i = 0; i < x.length; i++) {\n if (typeof x[i] !== \"number\") {\n return Number.NaN;\n }\n value += x[i];\n }\n return value;\n}\n\nexport default sumSimple;\n","/**\n * The [product](https://en.wikipedia.org/wiki/Product_(mathematics)) of an array\n * is the result of multiplying all numbers together, starting using one as the multiplicative identity.\n *\n * This runs in `O(n)`, linear time, with respect to the length of the array.\n *\n * @param {Array<number>} x input\n * @return {number} product of all input numbers\n * @example\n * product([1, 2, 3, 4]); // => 24\n */\nfunction product(x) {\n let value = 1;\n for (let i = 0; i < x.length; i++) {\n value *= x[i];\n }\n return value;\n}\n\nexport default product;\n","/**\n * This is the internal implementation of quantiles: when you know\n * that the order is sorted, you don't need to re-sort it, and the computations\n * are faster.\n *\n * @param {Array<number>} x sample of one or more data points\n * @param {number} p desired quantile: a number between 0 to 1, inclusive\n * @returns {number} quantile value\n * @throws {Error} if p ix outside of the range from 0 to 1\n * @throws {Error} if x is empty\n * @example\n * quantileSorted([3, 6, 7, 8, 8, 9, 10, 13, 15, 16, 20], 0.5); // => 9\n */\nfunction quantileSorted(x, p) {\n const idx = x.length * p;\n if (x.length === 0) {\n throw new Error(\"quantile requires at least one data point.\");\n } else if (p < 0 || p > 1) {\n throw new Error(\"quantiles must be between 0 and 1\");\n } else if (p === 1) {\n // If p is 1, directly return the last element\n return x[x.length - 1];\n } else if (p === 0) {\n // If p is 0, directly return the first element\n return x[0];\n } else if (idx % 1 !== 0) {\n // If p is not integer, return the next element in array\n return x[Math.ceil(idx) - 1];\n } else if (x.length % 2 === 0) {\n // If the list has even-length, we'll take the average of this number\n // and the next value, if there is one\n return (x[idx - 1] + x[idx]) / 2;\n } else {\n // Finally, in the simple case of an integer value\n // with an odd-length list, return the x value at the index.\n return x[idx];\n }\n}\n\nexport default quantileSorted;\n","/**\n * Rearrange items in `arr` so that all items in `[left, k]` range are the smallest.\n * The `k`-th element will have the `(k - left + 1)`-th smallest value in `[left, right]`.\n *\n * Implements Floyd-Rivest selection algorithm https://en.wikipedia.org/wiki/Floyd-Rivest_algorithm\n *\n * @param {Array<number>} arr input array\n * @param {number} k pivot index\n * @param {number} [left] left index\n * @param {number} [right] right index\n * @returns {void} mutates input array\n * @example\n * var arr = [65, 28, 59, 33, 21, 56, 22, 95, 50, 12, 90, 53, 28, 77, 39];\n * quickselect(arr, 8);\n * // = [39, 28, 28, 33, 21, 12, 22, 50, 53, 56, 59, 65, 90, 77, 95]\n */\nfunction quickselect(arr, k, left, right) {\n left = left || 0;\n right = right || arr.length - 1;\n\n while (right > left) {\n // 600 and 0.5 are arbitrary constants chosen in the original paper to minimize execution time\n if (right - left > 600) {\n const n = right - left + 1;\n const m = k - left + 1;\n const z = Math.log(n);\n const s = 0.5 * Math.exp((2 * z) / 3);\n let sd = 0.5 * Math.sqrt((z * s * (n - s)) / n);\n if (m - n / 2 < 0) sd *= -1;\n const newLeft = Math.max(left, Math.floor(k - (m * s) / n + sd));\n const newRight = Math.min(\n right,\n Math.floor(k + ((n - m) * s) / n + sd)\n );\n quickselect(arr, k, newLeft, newRight);\n }\n\n const t = arr[k];\n let i = left;\n let j = right;\n\n swap(arr, left, k);\n if (arr[right] > t) swap(arr, left, right);\n\n while (i < j) {\n swap(arr, i, j);\n i++;\n j--;\n while (arr[i] < t) i++;\n while (arr[j] > t) j--;\n }\n\n if (arr[left] === t) swap(arr, left, j);\n else {\n j++;\n swap(arr, j, right);\n }\n\n if (j <= k) left = j + 1;\n if (k <= j) right = j - 1;\n }\n}\n\nfunction swap(arr, i, j) {\n const tmp = arr[i];\n arr[i] = arr[j];\n arr[j] = tmp;\n}\n\nexport default quickselect;\n","import quantileSorted from \"./quantile_sorted.js\";\nimport quickselect from \"./quickselect.js\";\n\n/**\n * The [quantile](https://en.wikipedia.org/wiki/Quantile):\n * this is a population quantile, since we assume to know the entire\n * dataset in this library. This is an implementation of the\n * [Quantiles of a Population](http://en.wikipedia.org/wiki/Quantile#Quantiles_of_a_population)\n * algorithm from wikipedia.\n *\n * Sample is a one-dimensional array of numbers,\n * and p is either a decimal number from 0 to 1 or an array of decimal\n * numbers from 0 to 1.\n * In terms of a k/q quantile, p = k/q - it's just dealing with fractions or dealing\n * with decimal values.\n * When p is an array, the result of the function is also an array containing the appropriate\n * quantiles in input order\n *\n * @param {Array<number>} x sample of one or more numbers\n * @param {Array<number> | number} p the desired quantile, as a number between 0 and 1\n * @returns {number} quantile\n * @example\n * quantile([3, 6, 7, 8, 8, 9, 10, 13, 15, 16, 20], 0.5); // => 9\n */\nfunction quantile(x, p) {\n const copy = x.slice();\n\n if (Array.isArray(p)) {\n // rearrange elements so that each element corresponding to a requested\n // quantile is on a place it would be if the array was fully sorted\n multiQuantileSelect(copy, p);\n // Initialize the result array\n const results = [];\n // For each requested quantile\n for (let i = 0; i < p.length; i++) {\n results[i] = quantileSorted(copy, p[i]);\n }\n return results;\n } else {\n const idx = quantileIndex(copy.length, p);\n quantileSelect(copy, idx, 0, copy.length - 1);\n return quantileSorted(copy, p);\n }\n}\n\nfunction quantileSelect(arr, k, left, right) {\n if (k % 1 === 0) {\n quickselect(arr, k, left, right);\n } else {\n k = Math.floor(k);\n quickselect(arr, k, left, right);\n quickselect(arr, k + 1, k + 1, right);\n }\n}\n\nfunction multiQuantileSelect(arr, p) {\n const indices = [0];\n for (let i = 0; i < p.length; i++) {\n indices.push(quantileIndex(arr.length, p[i]));\n }\n indices.push(arr.length - 1);\n indices.sort(compare);\n\n const stack = [0, indices.length - 1];\n\n while (stack.length) {\n const r = Math.ceil(stack.pop());\n const l = Math.floor(stack.pop());\n if (r - l <= 1) continue;\n\n const m = Math.floor((l + r) / 2);\n quantileSelect(\n arr,\n indices[m],\n Math.floor(indices[l]),\n Math.ceil(indices[r])\n );\n\n stack.push(l, m, m, r);\n }\n}\n\nfunction compare(a, b) {\n return a - b;\n}\n\nfunction quantileIndex(len, p) {\n const idx = len * p;\n if (p === 1) {\n // If p is 1, directly return the last index\n return len - 1;\n } else if (p === 0) {\n // If p is 0, directly return the first index\n return 0;\n } else if (idx % 1 !== 0) {\n // If index is not integer, return the next index in array\n return Math.ceil(idx) - 1;\n } else if (len % 2 === 0) {\n // If the list has even-length, we'll return the middle of two indices\n // around quantile to indicate that we need an average value of the two\n return idx - 0.5;\n } else {\n // Finally, in the simple case of an integer index\n // with an odd-length list, return the index\n return idx;\n }\n}\n\nexport default quantile;\n","/* eslint no-bitwise: 0 */\n\n/**\n * This function returns the quantile in which one would find the given value in\n * the given array. With a sorted array, leveraging binary search, we can find\n * this information in logarithmic time.\n *\n * @param {Array<number>} x input\n * @returns {number} value value\n * @example\n * quantileRankSorted([1, 2, 3, 4], 3); // => 0.75\n * quantileRankSorted([1, 2, 3, 3, 4], 3); // => 0.7\n * quantileRankSorted([1, 2, 3, 4], 6); // => 1\n * quantileRankSorted([1, 2, 3, 3, 5], 4); // => 0.8\n */\nfunction quantileRankSorted(x, value) {\n // Value is lesser than any value in the array\n if (value < x[0]) {\n return 0;\n }\n\n // Value is greater than any value in the array\n if (value > x[x.length - 1]) {\n return 1;\n }\n\n let l = lowerBound(x, value);\n\n // Value is not in the array\n if (x[l] !== value) {\n return l / x.length;\n }\n\n l++;\n\n const u = upperBound(x, value);\n\n // The value exists only once in the array\n if (u === l) {\n return l / x.length;\n }\n\n // Here, we are basically computing the mean of the range of indices\n // containing our searched value. But, instead, of initializing an\n // array and looping over it, there is a dedicated math formula that\n // we apply below to get the result.\n const r = u - l + 1;\n const sum = (r * (u + l)) / 2;\n const mean = sum / r;\n\n return mean / x.length;\n}\n\nfunction lowerBound(x, value) {\n let mid = 0;\n let lo = 0;\n let hi = x.length;\n\n while (lo < hi) {\n mid = (lo + hi) >>> 1;\n\n if (value <= x[mid]) {\n hi = mid;\n } else {\n lo = -~mid;\n }\n }\n\n return lo;\n}\n\nfunction upperBound(x, value) {\n let mid = 0;\n let lo = 0;\n let hi = x.length;\n\n while (lo < hi) {\n mid = (lo + hi) >>> 1;\n\n if (value >= x[mid]) {\n lo = -~mid;\n } else {\n hi = mid;\n }\n }\n\n return lo;\n}\n\nexport default quantileRankSorted;\n","import numericSort from \"./numeric_sort.js\";\nimport quantileRankSorted from \"./quantile_rank_sorted.js\";\n\n/**\n * This function returns the quantile in which one would find the given value in\n * the given array. It will copy and sort your array before each run, so\n * if you know your array is already sorted, you should use `quantileRankSorted`\n * instead.\n *\n * @param {Array<number>} x input\n * @returns {number} value value\n * @example\n * quantileRank([4, 3, 1, 2], 3); // => 0.75\n * quantileRank([4, 3, 2, 3, 1], 3); // => 0.7\n * quantileRank([2, 4, 1, 3], 6); // => 1\n * quantileRank([5, 3, 1, 2, 3], 4); // => 0.8\n */\nfunction quantileRank(x, value) {\n // Cloning and sorting the array\n const sortedCopy = numericSort(x);\n\n return quantileRankSorted(sortedCopy, value);\n}\n\nexport default quantileRank;\n","import quantile from \"./quantile.js\";\n\n/**\n * The [Interquartile range](http://en.wikipedia.org/wiki/Interquartile_range) is\n * a measure of statistical dispersion, or how scattered, spread, or\n * concentrated a distribution is. It's computed as the difference between\n * the third quartile and first quartile.\n *\n * @param {Array<number>} x sample of one or more numbers\n * @returns {number} interquartile range: the span between lower and upper quartile,\n * 0.25 and 0.75\n * @example\n * interquartileRange([0, 1, 2, 3]); // => 2\n */\nfunction interquartileRange(x) {\n // Interquartile range is the span between the upper quartile,\n // at `0.75`, and lower quartile, `0.25`\n const q1 = quantile(x, 0.75);\n const q2 = quantile(x, 0.25);\n\n if (typeof q1 === \"number\" && typeof q2 === \"number\") {\n return q1 - q2;\n }\n}\n\nexport default interquartileRange;\n","import quantile from \"./quantile.js\";\n\n/**\n * The [median](http://en.wikipedia.org/wiki/Median) is\n * the middle number of a list. This is often a good indicator of 'the middle'\n * when there are outliers that skew the `mean()` value.\n * This is a [measure of central tendency](https://en.wikipedia.org/wiki/Central_tendency):\n * a method of finding a typical or central value of a set of numbers.\n *\n * The median isn't necessarily one of the elements in the list: the value\n * can be the average of two elements if the list has an even length\n * and the two central values are different.\n *\n * @param {Array<number>} x input\n * @returns {number} median value\n * @example\n * median([10, 2, 5, 100, 2, 1]); // => 3.5\n */\nfunction median(x) {\n return +quantile(x, 0.5);\n}\n\nexport default median;\n","import median from \"./median.js\";\n\n/**\n * The [Median Absolute Deviation](http://en.wikipedia.org/wiki/Median_absolute_deviation) is\n * a robust measure of statistical\n * dispersion. It is more resilient to outliers than the standard deviation.\n *\n * @param {Array<number>} x input array\n * @returns {number} median absolute deviation\n * @example\n * medianAbsoluteDeviation([1, 1, 2, 2, 4, 6, 9]); // => 1\n */\nfunction medianAbsoluteDeviation(x) {\n const medianValue = median(x);\n const medianAbsoluteDeviations = [];\n\n // Make a list of absolute deviations from the median\n for (let i = 0; i < x.length; i++) {\n medianAbsoluteDeviations.push(Math.abs(x[i] - medianValue));\n }\n\n // Find the median value of that list\n return median(medianAbsoluteDeviations);\n}\n\nexport default medianAbsoluteDeviation;\n","/**\n * Split an array into chunks of a specified size. This function\n * has the same behavior as [PHP's array_chunk](http://php.net/manual/en/function.array-chunk.php)\n * function, and thus will insert smaller-sized chunks at the end if\n * the input size is not divisible by the chunk size.\n *\n * `x` is expected to be an array, and `chunkSize` a number.\n * The `x` array can contain any kind of data.\n *\n * @param {Array} x a sample\n * @param {number} chunkSize size of each output array. must be a positive integer\n * @returns {Array<Array>} a chunked array\n * @throws {Error} if chunk size is less than 1 or not an integer\n * @example\n * chunk([1, 2, 3, 4, 5, 6], 2);\n * // => [[1, 2], [3, 4], [5, 6]]\n */\nfunction chunk(x, chunkSize) {\n // a list of result chunks, as arrays in an array\n const output = [];\n\n // `chunkSize` must be zero or higher - otherwise the loop below,\n // in which we call `start += chunkSize`, will loop infinitely.\n // So, we'll detect and throw in that case to indicate\n // invalid input.\n if (chunkSize < 1) {\n throw new Error(\"chunk size must be a positive number\");\n }\n\n if (Math.floor(chunkSize) !== chunkSize) {\n throw new Error(\"chunk size must be an integer\");\n }\n\n // `start` is the index at which `.slice` will start selecting\n // new array elements\n for (let start = 0; start < x.length; start += chunkSize) {\n // for each chunk, slice that part of the array and add it\n // to the output. The `.slice` function does not change\n // the original array.\n output.push(x.slice(start, start + chunkSize));\n }\n return output;\n}\n\nexport default chunk;\n","/**\n * Sampling with replacement is a type of sampling that allows the same\n * item to be picked out of a population more than once.\n *\n * @param {Array<*>} x an array of any kind of value\n * @param {number} n count of how many elements to take\n * @param {Function} [randomSource=Math.random] an optional entropy source that\n * returns numbers between 0 inclusive and 1 exclusive: the range [0, 1)\n * @return {Array} n sampled items from the population\n * @example\n * var values = [1, 2, 3, 4];\n * sampleWithReplacement(values, 2); // returns 2 random values, like [2, 4];\n */\nfunction sampleWithReplacement(x, n, randomSource) {\n if (x.length === 0) {\n return [];\n }\n\n // a custom random number source can be provided if you want to use\n // a fixed seed or another random number generator, like\n // [random-js](https://www.npmjs.org/package/random-js)\n randomSource = randomSource || Math.random;\n\n const length = x.length;\n const sample = [];\n\n for (let i = 0; i < n; i++) {\n const index = Math.floor(randomSource() * length);\n\n sample.push(x[index]);\n }\n\n return sample;\n}\n\nexport default sampleWithReplacement;\n","/**\n * A [Fisher-Yates shuffle](http://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle)\n * in-place - which means that it **will change the order of the original\n * array by reference**.\n *\n * This is an algorithm that generates a random [permutation](https://en.wikipedia.org/wiki/Permutation)\n * of a set.\n *\n * @param {Array} x sample of one or more numbers\n * @param {Function} [randomSource=Math.random] an optional entropy source that\n * returns numbers between 0 inclusive and 1 exclusive: the range [0, 1)\n * @returns {Array} x\n * @example\n * var x = [1, 2, 3, 4];\n * shuffleInPlace(x);\n * // x is shuffled to a value like [2, 1, 4, 3]\n */\nfunction shuffleInPlace(x, randomSource) {\n // a custom random number source can be provided if you want to use\n // a fixed seed or another random number generator, like\n // [random-js](https://www.npmjs.org/package/random-js)\n randomSource = randomSource || Math.random;\n\n // store the current length of the x to determine\n // when no elements remain to shuffle.\n let length = x.length;\n\n // temporary is used to hold an item when it is being\n // swapped between indices.\n let temporary;\n\n // The index to swap at each stage.\n let index;\n\n // While there are still items to shuffle\n while (length > 0) {\n // choose a random index within the subset of the array\n // that is not yet shuffled\n index = Math.floor(randomSource() * length--);\n\n // store the value that we'll move temporarily\n temporary = x[length];\n\n // swap the value at `x[length]` with `x[index]`\n x[length] = x[index];\n x[index] = temporary;\n }\n\n return x;\n}\n\nexport default shuffleInPlace;\n","import shuffleInPlace from \"./shuffle_in_place.js\";\n\n/**\n * A [Fisher-Yates shuffle](http://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle)\n * is a fast way to create a random permutation of a finite set. This is\n * a function around `shuffle_in_place` that adds the guarantee that\n * it will not modify its input.\n *\n * @param {Array} x sample of 0 or more numbers\n * @param {Function} [randomSource=Math.random] an optional entropy source that\n * returns numbers between 0 inclusive and 1 exclusive: the range [0, 1)\n * @return {Array} shuffled version of input\n * @example\n * var shuffled = shuffle([1, 2, 3, 4]);\n * shuffled; // = [2, 3, 1, 4] or any other random permutation\n */\nfunction shuffle(x, randomSource) {\n // slice the original array so that it is not modified\n const sample = x.slice();\n\n // and then shuffle that shallow-copied array, in place\n return shuffleInPlace(sample, randomSource);\n}\n\nexport default shuffle;\n","import shuffle from \"./shuffle.js\";\n\n/**\n * Create a [simple random sample](http://en.wikipedia.org/wiki/Simple_random_sample)\n * from a given array of `n` elements.\n *\n * The sampled values will be in any order, not necessarily the order\n * they appear in the input.\n *\n * @param {Array<any>} x input array. can contain any type\n * @param {number} n count of how many elements to take\n * @param {Function} [randomSource=Math.random] an optional entropy source that\n * returns numbers between 0 inclusive and 1 exclusive: the range [0, 1)\n * @return {Array} subset of n elements in original array\n *\n * @example\n * var values = [1, 2, 4, 5, 6, 7, 8, 9];\n * sample(values, 3); // returns 3 random values, like [2, 5, 8];\n */\nfunction sample(x, n, randomSource) {\n // shuffle the original array using a fisher-yates shuffle\n const shuffled = shuffle(x, randomSource);\n\n // and then return a subset of it - the first `n` elements.\n return shuffled.slice(0, n);\n}\n\nexport default sample;\n","/**\n * Create a new column x row matrix.\n *\n * @private\n * @param {number} columns\n * @param {number} rows\n * @return {Array<Array<number>>} matrix\n * @example\n * makeMatrix(10, 10);\n */\nfunction makeMatrix(columns, rows) {\n const matrix = [];\n for (let i = 0; i < columns; i++) {\n const column = [];\n for (let j = 0; j < rows; j++) {\n column.push(0);\n }\n matrix.push(column);\n }\n return matrix;\n}\n\nexport default makeMatrix;\n","/**\n * For a sorted input, counting the number of unique values\n * is possible in constant time and constant memory. This is\n * a simple implementation of the algorithm.\n *\n * Values are compared with `===`, so objects and non-primitive objects\n * are not handled in any special way.\n *\n * @param {Array<*>} x an array of any kind of value\n * @returns {number} count of unique values\n * @example\n * uniqueCountSorted([1, 2, 3]); // => 3\n * uniqueCountSorted([1, 1, 1]); // => 1\n */\nfunction uniqueCountSorted(x) {\n let uniqueValueCount = 0;\n let lastSeenValue;\n for (let i = 0; i < x.length; i++) {\n if (i === 0 || x[i] !== lastSeenValue) {\n lastSeenValue = x[i];\n uniqueValueCount++;\n }\n }\n return uniqueValueCount;\n}\n\nexport default uniqueCountSorted;\n","import makeMatrix from \"./make_matrix.js\";\nimport numericSort from \"./numeric_sort.js\";\nimport uniqueCountSorted from \"./unique_count_sorted.js\";\n\n/**\n * Generates incrementally computed values based on the sums and sums of\n * squares for the data array\n *\n * @private\n * @param {number} j\n * @param {number} i\n * @param {Array<number>} sums\n * @param {Array<number>} sumsOfSquares\n * @return {number}\n * @example\n * ssq(0, 1, [-1, 0, 2], [1, 1, 5]);\n */\nfunction ssq(j, i, sums, sumsOfSquares) {\n let sji; // s(j, i)\n if (j > 0) {\n const muji = (sums[i] - sums[j - 1]) / (i - j + 1); // mu(j, i)\n sji =\n sumsOfSquares[i] - sumsOfSquares[j - 1] - (i - j + 1) * muji * muji;\n } else {\n sji = sumsOfSquares[i] - (sums[i] * sums[i]) / (i + 1);\n }\n if (sji < 0) {\n return 0;\n }\n return sji;\n}\n\n/**\n * Function that recursively divides and conquers computations\n * for cluster j\n *\n * @private\n * @param {number} iMin Minimum index in cluster to be computed\n * @param {number} iMax Maximum index in cluster to be computed\n * @param {number} cluster Index of the cluster currently being computed\n * @param {Array<Array<number>>} matrix\n * @param {Array<Array<number>>} backtrackMatrix\n * @param {Array<number>} sums\n * @param {Array<number>} sumsOfSquares\n */\nfunction fillMatrixColumn(\n iMin,\n iMax,\n cluster,\n matrix,\n backtrackMatrix,\n sums,\n sumsOfSquares\n) {\n if (iMin > iMax) {\n return;\n }\n\n // Start at midpoint between iMin and iMax\n const i = Math.floor((iMin + iMax) / 2);\n\n matrix[cluster][i] = matrix[cluster - 1][i - 1];\n backtrackMatrix[cluster][i] = i;\n\n let jlow = cluster; // the lower end for j\n\n if (iMin > cluster) {\n jlow = Math.max(jlow, backtrackMatrix[cluster][iMin - 1] || 0);\n }\n jlow = Math.max(jlow, backtrackMatrix[cluster - 1][i] || 0);\n\n let jhigh = i - 1; // the upper end for j\n if (iMax < matrix[0].length - 1) {\n /* c8 ignore start */\n jhigh = Math.min(jhigh, backtrackMatrix[cluster][iMax + 1] || 0);\n /* c8 ignore end */\n }\n\n let sji;\n let sjlowi;\n let ssqjlow;\n let ssqj;\n for (let j = jhigh; j >= jlow; --j) {\n sji = ssq(j, i, sums, sumsOfSquares);\n\n if (sji + matrix[cluster - 1][jlow - 1] >= matrix[cluster][i]) {\n break;\n }\n\n // Examine the lower bound of the cluster border\n sjlowi = ssq(jlow, i, sums, sumsOfSquares);\n\n ssqjlow = sjlowi + matrix[cluster - 1][jlow - 1];\n\n if (ssqjlow < matrix[cluster][i]) {\n // Shrink the lower bound\n matrix[cluster][i] = ssqjlow;\n backtrackMatrix[cluster][i] = jlow;\n }\n jlow++;\n\n ssqj = sji + matrix[cluster - 1][j - 1];\n if (ssqj < matrix[cluster][i]) {\n matrix[cluster][i] = ssqj;\n backtrackMatrix[cluster][i] = j;\n }\n }\n\n fillMatrixColumn(\n iMin,\n i - 1,\n cluster,\n matrix,\n backtrackMatrix,\n sums,\n sumsOfSquares\n );\n fillMatrixColumn(\n i + 1,\n iMax,\n cluster,\n matrix,\n backtrackMatrix,\n sums,\n sumsOfSquares\n );\n}\n\n/**\n * Initializes the main matrices used in Ckmeans and kicks\n * off the divide and conquer cluster computation strategy\n *\n * @private\n * @param {Array<number>} data sorted array of values\n * @param {Array<Array<number>>} matrix\n * @param {Array<Array<number>>} backtrackMatrix\n */\nfunction fillMatrices(data, matrix, backtrackMatrix) {\n const nValues = matrix[0].length;\n\n // Shift values by the median to improve numeric stability\n const shift = data[Math.floor(nValues / 2)];\n\n // Cumulative sum and cumulative sum of squares for all values in data array\n const sums = [];\n const sumsOfSquares = [];\n\n // Initialize first column in matrix & backtrackMatrix\n for (let i = 0, shiftedValue; i < nValues; ++i) {\n shiftedValue = data[i] - shift;\n if (i === 0) {\n sums.push(shiftedValue);\n sumsOfSquares.push(shiftedValue * shiftedValue);\n } else {\n sums.push(sums[i - 1] + shiftedValue);\n sumsOfSquares.push(\n sumsOfSquares[i - 1] + shiftedValue * shiftedValue\n );\n }\n\n // Initialize for cluster = 0\n matrix[0][i] = ssq(0, i, sums, sumsOfSquares);\n backtrackMatrix[0][i] = 0;\n }\n\n // Initialize the rest of the columns\n let iMin;\n for (let cluster = 1; cluster < matrix.length; ++cluster) {\n if (cluster < matrix.length - 1) {\n iMin = cluster;\n } else {\n // No need to compute matrix[K-1][0] ... matrix[K-1][N-2]\n iMin = nValues - 1;\n }\n\n fillMatrixColumn(\n iMin,\n nValues - 1,\n cluster,\n matrix,\n backtrackMatrix,\n sums,\n sumsOfS