mann-whitney-utest
Version:
The Mann-Whitney U test is a nonparametric statistical test
178 lines (143 loc) • 4.82 kB
JavaScript
// Mann-Whitney U test
// Luke Mitchell, April 2016
// https://github.com/lukem512/mann-whitney-utest
// The object key used to store the observation value.
var __key = 'val';
// Rank the list.
// Inspired by https://gist.github.com/gungorbudak/1c3989cc26b9567c6e50
var rank = function(list) {
// First, sort in ascending order
list.sort(function(a, b) {
return (a[__key] - b[__key]);
});
// Second, add the rank to the objects
list = list.map(function(item, index) {
item.rank = index + 1;
return item;
});
// Third, use median values for groups with the same rank
for (var i = 0; i < list.length; /* nothing */ ) {
var count = 1;
var total = list[i].rank;
for (var j = 0; list[i + j + 1] && (list[i + j][__key] === list[i + j + 1][__key]); j++) {
total += list[i + j + 1].rank;
count++;
}
var rank = (total / count);
for (var k = 0; k < count; k++) {
list[i + k].rank = rank;
}
i = i + count;
}
return list;
};
// Compute the rank of a sample, given a ranked
// list and a list of observations for that sample.
var sampleRank = function(rankedList, observations) {
// Clone the array
var __observations = observations.slice(0);
// Compute the rank
var rank = 0;
rankedList.forEach(function(observation) {
var index = __observations.indexOf(observation[__key]);
if (index > -1) {
// Add the rank to the sum
rank += observation.rank;
// Remove the observation from the list
__observations.splice(index, 1);
}
});
return rank;
};
// Compute the U value of a sample,
// given the rank and the list of observations
// for that sample.
var uValue = function(rank, observations) {
var k = observations.length;
return rank - ((k * (k+1)) / 2);
};
// Check the U values are valid.
// This utilises a property of the Mann-Whitney U test
// that ensures the sum of the U values equals the product
// of the number of observations.
var check = module.exports.check = function(u, samples) {
return (u[0] + u[1]) == (samples[0].length * samples[1].length);
};
// Approximate the crticial value for the samples.
// This is necessary when the sample sizes are greater than 20
// as the U tables are limited to 20x20.
// https://en.wikipedia.org/wiki/Mann%E2%80%93Whitney_U_test#Normal_approximation_and_tie_correction
var criticalValue = module.exports.criticalValue = function(u, samples) {
var uVal = Math.min(u[0], u[1]);
var prod = samples[0].length * samples[1].length;
var n = samples[0].length + samples[1].length;
var mean = prod / 2;
// Count the ranks
var counts = {};
samples.forEach(function(sample) {
sample.forEach(function(o) {
if (!counts[o]) counts[o] = 1;
else counts[o]++;
});
});
// Find any tied ranks
var ties = Object.keys(counts)
.filter(function(key) { return counts[key] > 1 })
.map(function(tie) { return counts[tie] });
var k = ties.length;
// Compute correction
var correction = 0;
for (var i = 0; i < k; i++) {
correction += (Math.pow(ties[i],3) - ties[i]) / (n * (n-1));
}
// Compute standard deviation using correction for ties
var stddev = Math.sqrt(
(prod/12) * ((n + 1) - correction)
);
// Approximate the critical value
var z = Math.abs((uVal - mean) / stddev);
return z;
};
// Test the result for significance.
// A result is significant if the lesser U-value is
// less than the critical value.
var significant = module.exports.significant = function(u, samples) {
return (Math.min(u[0], u[1]) < criticalValue(u, samples));
};
// Perform te Mann-Whitney U test on an array of samples.
// The input should be of the form [[a, b, c], [e, f, g]]
// where {a, b, ..., g} are numeric values forming two
// samples.
var test = module.exports.test = function(samples, key) {
// Perform validation
if (!Array.isArray(samples)) throw Error('Samples must be an array');
if (samples.length !== 2) throw Error('Samples must contain exactly two samples');
for (var i = 0; i < 2; i++) {
if (!samples[i] || samples[i].length == 0) throw Error('Samples cannot be empty');
if (!Array.isArray(samples[i])) throw Error('Sample ' + i + ' must be an array');
}
// Rank the entire list of observations
var all = samples[0].concat(samples[1]);
var unranked = all.map(function(val) {
var result = {};
result[__key] = val;
return result;
});
var ranked = rank(unranked);
// Compute the rank of each sample
var ranks = [];
for (var i = 0; i < 2; i++) {
ranks[i] = sampleRank(ranked, samples[i]);
}
// Compute the U values
var us = [];
for (var i = 0; i < 2; i++) {
us[i] = uValue(ranks[i], samples[i]);
}
// An optimisation is to use a property of the U test
// to calculate the U value of sample 1 based on the value
// of sample 0
// var u[1] = (samples[0].length * samples[1].length) - u[0];
// Return the array of U values
return us;
};