UNPKG

@stdlib/stats

Version:

Standard library statistical functions.

523 lines (492 loc) 20.2 kB
/* * @license Apache-2.0 * * Copyright (c) 2021 The Stdlib Authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ // TypeScript Version: 4.1 /* eslint-disable max-lines */ import anova1 = require( './../../anova1' ); import bartlettTest = require( './../../bartlett-test' ); import base = require( './../../base' ); import binomialTest = require( './../../binomial-test' ); import chi2gof = require( './../../chi2gof' ); import chi2test = require( './../../chi2test' ); import flignerTest = require( './../../fligner-test' ); import incr = require( './../../incr' ); import iterators = require( './../../iter' ); import kde2d = require( './../../kde2d' ); import kruskalTest = require( './../../kruskal-test' ); import kstest = require( './../../kstest' ); import leveneTest = require( './../../levene-test' ); import lowess = require( './../../lowess' ); import padjust = require( './../../padjust' ); import pcorrtest = require( './../../pcorrtest' ); import ranks = require( './../../ranks' ); import ttest = require( './../../ttest' ); import ttest2 = require( './../../ttest2' ); import vartest = require( './../../vartest' ); import wilcoxon = require( './../../wilcoxon' ); import ztest = require( './../../ztest' ); import ztest2 = require( './../../ztest2' ); /** * Interface describing the `stats` namespace. */ interface Namespace { /** * Perform a one-way analysis of variance (ANOVA). * * @param x - measured values * @param factor - array of treatments * @param options - function options * @param options.alpha - significance level (default: 0.05) * @throws must provide valid options * @throws `factor` must have at least two unique elements * @throws length of `x` must be greater than or equal to two * @throws `x` and `factor` must have the same length * @returns test results * * @example * var x = [ 1, 3, 5, 2, 4, 6, 8, 7, 10, 11, 12, 15 ]; * var f = [ 'control', 'treatA', 'treatB', 'treatC', 'control', 'treatA', 'treatB', 'treatC', 'control', 'treatA', 'treatB', 'treatC' ]; * var out = ns.anova1( x, f ); * // returns {...} */ anova1: typeof anova1; /** * Compute Bartlett’s test for equal variances. * * @param arr0 - first numeric array * @param args - subsequent numeric arrays and an optional options object * @throws must provide valid options * @returns test results */ bartlettTest: typeof bartlettTest; /** * Base (i.e., lower-level) statistical functions. */ base: typeof base; /** * Computes an exact test for the success probability in a Bernoulli experiment. * * @param x - number of successes or two-element array with successes and failures * @param n - total number of observations * @param options - function options * @param options.alpha - significance level (default: 0.05) * @param options.alternative - alternative hypothesis (`two-sided`, `less`, or `greater`; default: 'two-sided') * @param options.p - success probability under H0 (default: 0.5) * @throws must provide valid options * @returns test results * * @example * var out = ns.binomialTest( 682, 925 ); * // returns {...} * * @example * var out = ns.binomialTest( [ 682, 243 ] ); * // returns {...} */ binomialTest: typeof binomialTest; /** * Performs a chi-square goodness-of-fit test. * * @param x - observation frequencies * @param y - expected frequencies or a discrete probability distribution name * @param args - probability mass function (PMF) arguments followed by an optional options object * @throws first argument must be an array-like object or a 1-dimensional array containing nonnegative integers * @throws second argument must be either an array-like object (or a 1-dimensional array) of nonnegative numbers, an array-like object (or a 1-dimensional array) of probabilities summing to one, or a discrete probability distribution name * @throws must provide valid options * @throws first and second arguments must have the same length * @throws first argument must contain at least one element greater than zero * @returns test results * * @example * var x = [ 89, 37, 30, 28, 2 ]; * var p = [ 0.40, 0.20, 0.20, 0.15, 0.05 ]; * * var out = ns.chi2gof( x, p ); * * var o = out.toJSON(); * // returns { 'pValue': ~0.0406, 'statistic': ~9.9901, ... } */ chi2gof: typeof chi2gof; /** * Performs a chi-square independence test. * * @param x - two-way table of observed frequencies * @param options - function options * @returns test results * * @example * var x = [ [ 20, 30 ], [ 30, 20 ] ]; * * var out = ns.chi2test( x ); * * var o = out.toJSON(); * // returns { 'rejected': false, 'alpha': 0.05, 'pValue': ~0.072, ... } */ chi2test: typeof chi2test; /** * Computes the Fligner-Killeen test for equal variances. * * @param arr0 - first numeric array * @param args - subsequent numeric arrays and an optional options object * @throws must provide valid options * @returns test results */ flignerTest: typeof flignerTest; /** * Incremental statistics. */ incr: typeof incr; /** * Statistical function iterators. */ iterators: typeof iterators; /** * Computes two-dimensional kernel density estimates. * * @param x - array of x values * @param y - array of y values * @param options - function options * @param options.h - array of length two containing the bandwidth values for x and y * @param options.n - number of partitions on the x- and y-axes (default: 25) * @param options.xMin - lower limit of x * @param options.xMax - upper limit of x * @param options.yMin - lower limit of y * @param options.yMax - upper limit of y * @param options.kernel - a string or function to specifying the used kernel function (default: 'gaussian') * @throws first and second arguments must be of the same length * @throws must provide valid options * @returns object containing the density estimates (`z`) along grid points (`x` and `y` values) * * @example * var x = [ 0.6333, 0.8643, 1.0952, 1.3262, 1.5571, 1.7881, 2.019, 2.25, 2.481, 2.7119 ]; * var y = [ -0.0468, 0.8012, 1.6492, 2.4973, 3.3454, 4.1934, 5.0415, 5.8896, 6.7376, 7.5857 ]; * var out = ns.kde2d( x, y ); */ kde2d: typeof kde2d; /** * Computes the Kruskal-Wallis test for equality of medians. * * @param arr0 - first numeric array * @param args - subsequent numeric arrays and an optional options object * @throws must provide valid options * @returns test results */ kruskalTest: typeof kruskalTest; /** * Computes a Kolmogorov-Smirnov goodness-of-fit test. * * @param x - input array holding numeric values * @param y - either a CDF function or a string denoting the name of a distribution * @param params - distribution parameters passed to reference CDF followed by an optional options object * @throws must provide valid options * @returns test result object * * @example * var out = ns.kstest( [ 2.0, 1.0, 5.0, -5.0, 3.0, 0.5, 6.0 ], 'normal', 0.0, 1.0 ); * // returns { 'pValue': ~0.015, 'statistic': ~0.556, ... } */ kstest: typeof kstest; /** * Computes Levene's test for equal variances. * * @param x - first numeric array * @param y - second numeric array * @param args - subsequent numeric arrays and an optional options object * @throws must provide valid options * @returns test results */ leveneTest: typeof leveneTest; /** * Locally-weighted polynomial regression via the LOWESS algorithm. * * ## References * * - Cleveland, William S. 1979. "Robust Locally and Smoothing Weighted Regression Scatterplots." _Journal of the American Statistical Association_ 74 (368): 829–36. doi:[10.1080/01621459.1979.10481038](https://doi.org/10.1080/01621459.1979.10481038). * - Cleveland, William S. 1981. "Lowess: A program for smoothing scatterplots by robust locally weighted regression." _American Statistician_ 35 (1): 54–55. doi:[10.2307/2683591](https://doi.org/10.2307/2683591). * * @param x - ordered x-axis values (abscissa values) * @param y - corresponding y-axis values (ordinate values) * @param options - function options * @param options.f - smoother span (proportion of points which influence smoothing at each value) * @param options.nsteps - number of iterations in the robust fit (fewer iterations translates to faster function execution) * @param options.delta - nonnegative parameter which may be used to reduce the number of computations * @param options.sorted - boolean indicating if the input array `x` is already in sorted order * @throws arguments `x` and `y` must have the same length * @returns ordered x-values and fitted values */ lowess: typeof lowess; /** * Adjusts supplied p-values for multiple comparisons via a specified method. * * ## Notes * * - The `method` parameter can be one of the following values: * * - **bh**: Benjamini-Hochberg procedure controlling the False Discovery Rate (FDR). * - **bonferroni**: Bonferroni correction fixing the family-wise error rate by multiplying the p-values with the number of comparisons. The Bonferroni correction is usually a too conservative adjustment compared to the others. * - **by**: Procedure by Benjamini & Yekutieli for controlling the False Discovery Rate (FDR) under dependence. * - **holm**: Hommel's method controlling family-wise error rate. It is uniformly more powerful than the Bonferroni correction. * - **hommel**: Hommel's method, which is valid when hypothesis tests are independent. It is more expensive to compute than the other methods. * * - By default, the number of comparisons for which the p-values should be corrected is equal to the number of provided p-values. Alternatively, it is possible to set `comparisons` to a number greater than the length of `pvals`. In that case, the methods assume `comparisons - pvals.length` unobserved p-values that are greater than all observed p-values (for Holm's method and the Bonferroni correction) or equal to `1` for the remaining methods. * * @param pvals - p-values to be adjusted * @param method - correction method * @param comparisons - number of comparisons (default: pvals.length) * @throws comparisons must be greater or equal to the number of elements in `pvals` * @returns array containing the corrected p-values * * @example * var pvalues = [ 0.008, 0.03, 0.123, 0.6, 0.2 ]; * var out = ns.padjust( pvalues, 'bonferroni' ); * // returns [ 0.04, 0.15, ..., 1, 1 ] * * @example * var pvalues = [ 0.008, 0.03, 0.123, 0.6, 0.2 ]; * var out = ns.padjust( pvalues, 'by' ); * // returns [ ~0.091, ~0.171, ..., 1, ~0.571 ] * * @example * var pvalues = [ 0.008, 0.03, 0.123, 0.6, 0.2 ]; * var out = ns.padjust( pvalues, 'bh' ); * // returns [ 0.04, 0.075, ..., 0.6, 0.25 ] * * @example * var pvalues = [ 0.008, 0.03, 0.123, 0.6, 0.2 ]; * var out = ns.padjust( pvalues, 'holm' ); * // returns [ 0.04, 0.12, ..., 0.6, 0.4 ] * * @example * var pvalues = [ 0.008, 0.03, 0.123, 0.6, 0.2 ]; * var out = ns.padjust( pvalues, 'hommel' ); * // returns [ 0.032, 0.12, ..., 0.6, 0.4 ] */ padjust: typeof padjust; /** * Computes a Pearson product-moment correlation test between paired samples. * * ## Notes * * - By default, the function performs a t-test for the null hypothesis that the data in arrays or typed arrays `x` and `y` is not correlated. A test against a different population correlation can be carried out by supplying the `rho` option. In this case, a test using the Fisher's z transform is conducted. * - The returned object comes with a `.print()` method which when invoked will print a formatted output of the results of the hypothesis test. * * @param x - first data array * @param y - second data array * @param options - function options * @param options.alpha - significance level (default: 0.05) * @param options.alternative - alternative hypothesis (`two-sided`, `less` or `greater`; default: 'two-sided') * @param options.rho - correlation under H0 (default: 0.0) * @throws x and y must be arrays of the same length * @throws x and y must contain at least four elements * @throws must provide valid options * @returns test result object * * @example * var x = [ 2, 4, 3, 1, 2, 3 ]; * var y = [ 3, 2, 4, 1, 2, 4 ]; * var out = ns.pcorrtest( x, y ); */ pcorrtest: typeof pcorrtest; /** * Computes the sample ranks for the values of an array-like object. * * ## Notes * * - When all elements of the `array` are different, the ranks are uniquely determined. When there are equal elements (called *ties*), the `method` option determines how they are handled. The default, `'average'`, replaces the ranks of the ties by their mean. Other possible options are `'min'` and `'max'`, which replace the ranks of the ties by their minimum and maximum, respectively. `'dense'` works like `'min'`, with the difference that the next highest element after a tie is assigned the next smallest integer. Finally, `ordinal` gives each element in `arr` a distinct rank, according to the position they appear in. * - The `missing` option is used to specify how to handle missing data. By default, `NaN` or `null` are treated as missing values. `'last'`specifies that missing values are placed last, `'first'` that the are assigned the lowest ranks and `'remove'` means that they are removed from the array before the ranks are calculated. * * @param x - data array * @param options - options object * @param options.method - method name determining how ties are treated (`average`, `min`, `max`, `dense`, or `ordinal`; default: 'average') * @param options.missing - determines where missing values go (`first`,`last`, or `remove`; default: 'last') * @param options.encoding - array of values encoding missing values * @throws must provide valid options * @returns array containing the computed ranks for the elements of x * * @example * var arr = [ 1.1, 2.0, 3.5, 0.0, 2.4 ]; * var out = ns.ranks( arr ); * // returns [ 2, 3, 5, 1, 4 ] * * @example * // Ties are averaged: * arr = [ 2, 2, 1, 4, 3 ]; * out = ns.ranks( arr ); * // returns [ 2.5, 2.5, 1, 5, 4 ] * * @example * // Missing values are placed last: * arr = [ null, 2, 2, 1, 4, 3, NaN, NaN ]; * out = ns.ranks( arr ); * // returns [ 6, 2.5, 2.5, 1, 5, 4, 7 ,8 ] */ ranks: typeof ranks; /** * Computes a one-sample or paired Student's t test. * * @param x - input array * @param y - optional paired array * @param options - function options * @param options.alpha - significance level (default: 0.05) * @param options.alternative - alternative hypothesis (`two-sided`, `less`, or `greater`; default: 'two-sided') * @param options.mu - mean under `H0` (default: 0) * @throws first argument must have at least two elements * @throws paired array must have the same length as the first argument * @throws second argument must be either a numeric array or an options object * @throws must provide valid options * @returns test results * * @example * var x = [ 4.0, 4.0, 6.0, 6.0, 5.0 ]; * var opts = { * 'mu': 5.0 * }; * var out = ns.ttest( x, opts ); * // returns {...} * * @example * var x = [ 4.0, 4.0, 6.0, 6.0, 5.0 ]; * var y = [ 5.0, 5.0, 5.5, 7.0, 5.8 ]; * var opts = { * 'alpha': 0.1 * }; * var out = ns.ttest( x, opts ); * // returns {...} */ ttest: typeof ttest; /** * Computes a two-sample Student's t test. * * @param x - first data array * @param y - second data array * @param options - function options * @param options.alpha - significance level (default: 0.05) * @param options.alternative - alternative hypothesis (`two-sided`, `less` or `greater`; default: 'two-sided') * @param options.difference - difference in means under H0 (default: 0) * @param options.variance - whether variances are `equal` or `unequal` under H0 (default: 'unequal') * @throws must provide valid options * @returns test result object * * @example * var incrspace = require( '@stdlib/array/base/incrspace' ); * * var a = incrspace( 1, 11, 1 ); * var b = incrspace( 7, 21, 1 ); * * var out = ns.ttest2( a, b ); * var table = out.print(); */ ttest2: typeof ttest2; /** * Computes a two-sample F-test for equal variances. * * @param x - first data array * @param y - second data array * @param options - function options * @param options.alpha - significance level (default: 0.05) * @param options.alternative - alternative hypothesis (`two-sided`, `less`, or `greater`; default: 'two-sided') * @param options.ratio - ratio of population variances under H0 (default: 1) * @throws must provide valid options * @returns test result object * * @example * var x = [ 610, 610, 550, 590, 565, 570 ]; * var y = [ 560, 550, 580, 550, 560, 590, 550, 590 ]; * * var out = ns.vartest( x, y ); */ vartest: typeof vartest; /** * Computes a Wilcoxon signed rank test. * * @param x - data array * @param y - optional paired data array * @param options - function options * @param options.alpha - significance level (default: 0.05) * @param options.alternative - alternative hypothesis (`two-sided`, `less`, or `greater`; default: 'two-sided') * @param options.zeroMethod - method governing how zero-differences are handled (`pratt`, `wilcox`, or `zsplit`; default: 'wilcox') * @param options.correction - continuity correction adjusting the Wilcoxon rank statistic by 0.5 towards the mean (default: true) * @param options.exact - whether to force using the exact distribution instead of a normal approximation when there are more than fifty data points (default: false) * @param options.mu - location parameter under H0 (default: 0) * @throws must provide valid options * @returns test result object * * @example * var x = [ 1.83, 0.50, 1.62, 2.48, 1.68, 1.88, 1.55, 3.06, 1.30 ]; * var y = [ 0.878, 0.647, 0.598, 2.05, 1.06, 1.29, 1.06, 3.14, 1.29 ]; * var out = ns.wilcoxon( x, y ); */ wilcoxon: typeof wilcoxon; /** * Computes a one-sample z-test. * * @param x - data array * @param sigma - known standard deviation * @param options - function options * @param options.alpha - significance level (default: 0.05) * @param options.alternative - alternative hypothesis (`two-sided`, `less` or `greater`; default: 'two-sided') * @param options.mu - mean under H0 (default: 0) * @throws sigma argument must be a positive number * @throws must provide valid options * @returns test result object * * @example * var arr = [ 4, 4, 6, 6, 5 ]; * var out = ns.ztest( arr, 1.0, { * 'mu': 5 * }); * * @example * var arr = [ 4, 4, 6, 6, 5 ]; * var out = ns.ztest( arr, 1.0, { * 'alternative': 'greater' * }); */ ztest: typeof ztest; /** * Computes a two-sample z-test. * * @param x - first data array * @param y - second data array * @param sigmax - known standard deviation of first group * @param sigmay - known standard deviation of second group * @param options - function options * @param options.alpha - significance level (default: 0.05) * @param options.alternative - alternative hypothesis (`two-sided`, `less` or `greater`; default: 'two-sided') * @param options.difference - difference in means under H0 (default: 0) * @throws sigmax argument must be a positive number * @throws sigmay argument must be a positive number * @throws must provide valid options * @returns test result object * * @example * var x = [ 2.66, 1.5, 3.25, 0.993, 2.31, 2.41, 1.76, 2.57, 2.62, 1.23 ]; // Drawn from N(2,1) * var y = [ 4.88, 2.93, 2.96, 4.5, -0.0603, 4.62, 3.35, 2.98 ]; // Drawn from N(3,2) * * var out = ns.ztest2( x, y, 1.0, 2.0 ); * // returns {...} * * var table = out.print(); */ ztest2: typeof ztest2; } /** * Standard library statistical functions. */ declare var ns: Namespace; // EXPORTS // export = ns;