simple-statistics
Version:
54 lines (46 loc) • 2.06 kB
JavaScript
import mean from "./mean.js";
/**
* [Sample covariance](https://en.wikipedia.org/wiki/Sample_mean_and_covariance) of two datasets:
* how much do the two datasets move together?
* x and y are two datasets, represented as arrays of numbers.
*
* @param {Array<number>} x a sample of two or more data points
* @param {Array<number>} y a sample of two or more data points
* @throws {Error} if x and y do not have equal lengths
* @throws {Error} if x or y have length of one or less
* @returns {number} sample covariance
* @example
* sampleCovariance([1, 2, 3, 4, 5, 6], [6, 5, 4, 3, 2, 1]); // => -3.5
*/
function sampleCovariance(x, y) {
// The two datasets must have the same length which must be more than 1
if (x.length !== y.length) {
throw new Error("sampleCovariance requires samples with equal lengths");
}
if (x.length < 2) {
throw new Error(
"sampleCovariance requires at least two data points in each sample"
);
}
// determine the mean of each dataset so that we can judge each
// value of the dataset fairly as the difference from the mean. this
// way, if one dataset is [1, 2, 3] and [2, 3, 4], their covariance
// does not suffer because of the difference in absolute values
const xmean = mean(x);
const ymean = mean(y);
let sum = 0;
// for each pair of values, the covariance increases when their
// difference from the mean is associated - if both are well above
// or if both are well below
// the mean, the covariance increases significantly.
for (let i = 0; i < x.length; i++) {
sum += (x[i] - xmean) * (y[i] - ymean);
}
// this is Bessels' Correction: an adjustment made to sample statistics
// that allows for the reduced degree of freedom entailed in calculating
// values from samples rather than complete populations.
const besselsCorrection = x.length - 1;
// the covariance is weighted by the length of the datasets.
return sum / besselsCorrection;
}
export default sampleCovariance;