pca-js
Version:
Principal Components Analysis in javascript
668 lines (659 loc) • 23.9 kB
JavaScript
(function (global, factory) {
typeof exports === 'object' && typeof module !== 'undefined' ? module.exports = factory() :
typeof define === 'function' && define.amd ? define(factory) :
(global = typeof globalThis !== 'undefined' ? globalThis : global || self, global.PCA = factory());
})(this, (function () { 'use strict';
function assertNotNull(value, fieldName) {
if (value == null) {
throw new Error(`->${fieldName} is null or undefined`);
}
}
function assertDefined(value, defaultValue) {
return typeof value === "undefined" ? defaultValue : value;
}
function assertMatrixElements(matrix, name, indices) {
for (const [row, col] of indices) {
assertNotNull(matrix[row], `Row ${name}[${row}][${col}]`);
if (col !== undefined) {
assertNotNull(matrix[row][col], `Col ${name}[${row}][${col}]`);
}
}
}
function assertValidMatrix(a, aName) {
if (!a[0] || !a.length) {
throw new Error(`->${aName} should be a valid matrix`);
}
}
function assertValidMatrices(a, b, aName, bName) {
assertValidMatrix(a, aName);
assertValidMatrix(b, bName);
if (b.length !== a[0].length) {
throw new Error(`Columns in ${aName} should be the same as the number of rows in ${bName}`);
}
}
/**
* Formats the given matrix to a specified precision for neatness
* @param {Matrix} data
* @param {number} precision
*/
function format(data, precision) {
let TEN = Math.pow(10, precision);
return data.map(function (d, i) {
return d.map(function (n) {
return Math.round(n * TEN) / TEN;
});
});
}
/**
* Deep Clones a matrix in an optimal fashion
* @param {Matrix} arr
*/
function clone(arr) {
return arr.map(row => Array.from(new Float64Array(row)));
}
/**
* Multiplies AxB, where A and B are matrices of nXm and mXn dimensions
* @param {Matrix} a
* @param {Matrix} b
*/
function multiply(a, b) {
//Changes for issue #11 OutOfMemoryError for moderately large datasets
assertValidMatrices(a, b, "a", "b");
const aRows = a.length;
const aCols = a[0].length;
const bCols = b[0].length;
const flat = new Float64Array(aRows * bCols);
for (let i = 0; i < aRows; i++) {
for (let k = 0; k < aCols; k++) {
const aVal = a[i][k];
const iOffset = i * bCols;
for (let j = 0; j < bCols; j++) {
flat[iOffset + j] += aVal * b[k][j];
}
}
}
const result = [];
for (let i = 0; i < aRows; i++) {
result[i] = Array.from(flat.subarray(i * bCols, (i + 1) * bCols));
}
return result;
}
/**
* Utility function to subtract matrix b from a
*
* @param {Matrix} a
* @param {Matrix} b
* @returns
*/
function subtract(a, b) {
assertValidMatrix(a, "a");
assertValidMatrix(b, "b");
const aRows = a.length;
const aCols = a[0].length;
const bRows = b.length;
const bCols = b[0].length;
if (!(aRows === bRows && aCols === bCols))
throw new Error("Both A and B should have the same dimensions");
const result = Array.from({ length: aRows }, () => Array(bCols).fill(0));
for (var i = 0; i < aRows; i++) {
for (var j = 0; j < bCols; j++) {
result[i][j] = a[i][j] - b[i][j];
}
}
return result;
}
/**
* Multiplies a matrix into a factor
*
* @param {Matrix} matrix
* @param {number} factor
* @returns
*/
function scale(matrix, factor) {
assertValidMatrix(matrix, "a");
const aRows = matrix.length;
const aCols = matrix[0].length;
const result = Array.from({ length: aRows }, () => Array(aCols).fill(0));
for (var i = 0; i < aRows; i++) {
for (var j = 0; j < aCols; j++) {
result[i][j] = matrix[i][j] * factor;
}
}
return result;
}
/**
* Fix for #11, OOM on moderately large datasets, fuses scale and multiply into a single operation to save memory
*
* @param {Matrix} a
* @param {Matrix} b
* @param {number} factor
* @returns
*/
function multiplyAndScale(a, b, factor) {
assertValidMatrices(a, b, "a", "b");
const aRows = a.length;
const aCols = a[0].length;
const bCols = b[0].length;
const flat = new Float64Array(aRows * bCols);
for (let i = 0; i < aRows; i++) {
for (let k = 0; k < aCols; k++) {
const aVal = a[i][k] * factor;
const iOffset = i * bCols;
for (let j = 0; j < bCols; j++) {
flat[iOffset + j] += aVal * b[k][j];
}
}
}
const result = [];
for (let i = 0; i < aRows; i++) {
result[i] = Array.from(flat.subarray(i * bCols, (i + 1) * bCols));
}
return result;
}
/**
* Generates a unit square matrix
* @param {number} rows = number of rows to fill
*/
function unitSquareMatrix(rows) {
const result = Array.from({ length: rows }, () => Array(rows).fill(0));
for (let i = 0; i < rows; i++) {
for (let j = 0; j < rows; j++) {
result[i][j] = 1;
}
}
return result;
}
/**
* Transposes a matrix, converts rows to columns
* @param {Matrix} matrix = matrix to be copied and transposed, op does not take place in-place
*/
function transpose(matrix, inplace = false) {
if (inplace) {
const operated = clone(matrix);
assertValidMatrix(operated, "a");
let transposed = operated[0].map(function (m, c) {
return matrix.map(function (r) {
return r[c];
});
});
return transposed;
}
else {
assertValidMatrix(matrix, "a");
let transposed = matrix[0].map(function (m, c) {
return matrix.map(function (r) {
return r[c];
});
});
return transposed;
}
}
/**
* Compute the thin SVD from G. H. Golub and C. Reinsch, Numer. Math. 14, 403-420 (1970)
* @param {Matrix} A
* @returns {Matrix} U,S(diagonal matrix of singular values) and Vt
*/
function svd(A) {
//NOTE: Always throw errors if null values encountered
let temp;
let prec = Math.pow(2, -52); // assumes double precision
let tolerance = 1.e-64 / prec;
let itmax = 50;
let c = 0;
let i = 0;
let j = 0;
let k = 0;
let l = 0;
let u = clone(A);
let m = u.length;
assertNotNull(u[0], "u");
let n = u[0].length;
if (m < n)
throw "Need more rows than columns";
let e = new Array(n); //vector1
let q = new Array(n); //vector2
for (i = 0; i < n; i++)
e[i] = q[i] = 0.0;
let v = rep([n, n], 0);
function pythag(a, b) {
a = Math.abs(a);
b = Math.abs(b);
if (a > b)
return a * Math.sqrt(1.0 + (b * b / a / a));
else if (b == 0.0)
return a;
return b * Math.sqrt(1.0 + (a * a / b / b));
}
//repeat a value along an s dimensional matrix
function rep(s, v, k) {
let index_k = assertDefined(k, 0);
let n = s[index_k], ret = Array(n), i;
if (index_k === s.length - 1) {
for (i = n - 2; i >= 0; i -= 2) {
ret[i + 1] = v;
ret[i] = v;
}
if (i === -1) {
ret[0] = v;
}
return ret;
}
for (i = n - 1; i >= 0; i--) {
ret[i] = rep(s, v, index_k + 1);
}
return ret;
}
//TODO: Householder's reduction to bidiagonal form
let f = 0.0;
let g = 0.0;
let h = 0.0;
let x = 0.0;
let y = 0.0;
let z = 0.0;
let s = 0.0;
for (i = 0; i < n; i++) {
e[i] = g; //vector
s = 0.0; //sum
l = i + 1; //stays i+1
for (j = i; j < m; j++) {
assertMatrixElements(u, "u[j, i]", [[j, i]]);
s += (u[j][i] * u[j][i]);
}
if (s <= tolerance)
g = 0.0;
else {
assertMatrixElements(u, "u[i, i]", [[i, i]]);
f = u[i][i];
g = Math.sqrt(s);
if (f >= 0.0)
g = -g;
h = f * g - s;
u[i][i] = f - g;
for (j = l; j < n; j++) {
s = 0.0;
for (k = i; k < m; k++) {
assertMatrixElements(u, "u[k, i], [k, j]", [[k, i], [k, j]]);
s += u[k][i] * u[k][j];
}
f = s / h;
for (k = i; k < m; k++)
u[k][j] += f * u[k][i];
}
}
q[i] = g;
s = 0.0;
for (j = l; j < n; j++) {
assertMatrixElements(u, "u[i, j]", [[i, j]]);
s = s + u[i][j] * u[i][j];
}
if (s <= tolerance)
g = 0.0;
else {
f = u[i][i + 1];
g = Math.sqrt(s);
if (f >= 0.0)
g = -g;
h = f * g - s;
assertMatrixElements(u, "u[i, i+1]", [[i, i + 1]]);
u[i][i + 1] = f - g;
for (j = l; j < n; j++)
e[j] = u[i][j] / h;
for (j = l; j < m; j++) {
s = 0.0;
for (k = l; k < n; k++) {
assertMatrixElements(u, "u[j, k], [i, k]", [[j, k], [i, k]]);
s += (u[j][k] * u[i][k]);
}
for (k = l; k < n; k++) {
assertMatrixElements(u, "u[j, k]", [[j, k]]);
u[j][k] += s * e[k];
}
}
}
y = Math.abs(q[i]) + Math.abs(e[i]);
if (y > x)
x = y;
}
//TODO: accumulation of right hand transformations
for (i = n - 1; i != -1; i += -1) {
if (g != 0.0) {
assertMatrixElements(u, "u[i, i+1]", [[i, i + 1]]);
h = g * u[i][i + 1];
for (j = l; j < n; j++) {
assertMatrixElements(u, "u[i, j]", [[i, j]]);
v[j][i] = u[i][j] / h; //u is array, v is square of columns
}
for (j = l; j < n; j++) {
s = 0.0;
for (k = l; k < n; k++) {
assertMatrixElements(u, "u[i, k]", [[i, k]]);
assertMatrixElements(v, "v[k, j]", [[k, j]]);
s += u[i][k] * v[k][j];
}
for (k = l; k < n; k++) {
assertMatrixElements(v, "v[k, j],[k, i]", [[k, j], [k, i]]);
v[k][j] += (s * v[k][i]);
}
}
}
for (j = l; j < n; j++) {
assertMatrixElements(v, "v[i, j],[j, i]", [[i, j], [j, i]]);
v[i][j] = 0;
v[j][i] = 0;
}
v[i][i] = 1;
g = e[i];
l = i;
}
//TODO: accumulation of left hand transformations
for (i = n - 1; i != -1; i += -1) {
l = i + 1;
g = q[i];
for (j = l; j < n; j++) {
assertMatrixElements(u, "u[i, j]", [[i, j]]);
u[i][j] = 0;
}
if (g != 0.0) {
assertMatrixElements(u, "u[i, i]", [[i, i]]);
h = u[i][i] * g;
for (j = l; j < n; j++) {
s = 0.0;
for (k = l; k < m; k++) {
assertMatrixElements(u, "u[k, j],[k, i]", [[k, j], [k, i]]);
s += u[k][i] * u[k][j];
}
f = s / h;
for (k = i; k < m; k++) {
assertMatrixElements(u, "u[k, j],[k, i]", [[k, j], [k, i]]);
u[k][j] += f * u[k][i];
}
}
for (j = i; j < m; j++) {
assertMatrixElements(u, "u[j, i]", [[j, i]]);
u[j][i] = u[j][i] / g;
}
}
else {
for (j = i; j < m; j++) {
assertMatrixElements(u, "u[j, i]", [[j, i]]);
u[j][i] = 0;
}
}
u[i][i] += 1;
}
//TODO: diagonalization of the bidiagonal form
prec = prec * x;
for (k = n - 1; k != -1; k += -1) {
for (let iteration = 0; iteration < itmax; iteration++) { // test f splitting
let test_convergence = false;
for (l = k; l != -1; l += -1) {
if (Math.abs(e[l]) <= prec) {
test_convergence = true;
break;
}
if (Math.abs(q[l - 1]) <= prec)
break;
}
if (!test_convergence) { // cancellation of e[l] if l>0
c = 0.0;
s = 1.0;
let l1 = l - 1;
for (i = l; i < k + 1; i++) {
f = s * e[i];
e[i] = c * e[i];
if (Math.abs(f) <= prec)
break;
g = q[i];
h = pythag(f, g);
q[i] = h;
c = g / h;
s = -f / h;
for (j = 0; j < m; j++) {
assertMatrixElements(u, "u[j, l1],[j, i]", [[j, l1], [j, i]]);
y = u[j][l1];
z = u[j][i];
u[j][l1] = y * c + (z * s);
u[j][i] = -y * s + (z * c);
}
}
}
//TODO: test of convergence
z = q[k];
if (l == k) { //convergence
if (z < 0.0) { //q[k] is made non-negative
q[k] = -z;
for (j = 0; j < n; j++) {
assertMatrixElements(v, "v[k, j]", [[j, k]]);
v[j][k] = -v[j][k];
}
}
break; //break out of iteration loop and move on to next k value
}
if (iteration >= itmax - 1)
throw `Error: no convergence for ${iteration} exceeding ${itmax - 1}`;
// shift from bottom 2x2 minor
x = q[l];
y = q[k - 1];
g = e[k - 1];
h = e[k];
f = ((y - z) * (y + z) + (g - h) * (g + h)) / (2.0 * h * y);
g = pythag(f, 1.0);
if (f < 0.0)
f = ((x - z) * (x + z) + h * (y / (f - g) - h)) / x;
else
f = ((x - z) * (x + z) + h * (y / (f + g) - h)) / x;
// next QR transformation
c = 1.0;
s = 1.0;
for (i = l + 1; i < k + 1; i++) {
g = e[i];
y = q[i];
h = s * g;
g = c * g;
z = pythag(f, h);
e[i - 1] = z;
c = f / z;
s = h / z;
f = x * c + g * s;
g = -x * s + g * c;
h = y * s;
y = y * c;
for (j = 0; j < n; j++) {
assertMatrixElements(v, "v[j, i],[j, i-1]", [[j, i - 1], [j, i]]);
x = v[j][i - 1];
z = v[j][i];
v[j][i - 1] = x * c + z * s;
v[j][i] = -x * s + z * c;
}
z = pythag(f, h);
q[i - 1] = z;
c = f / z;
s = h / z;
f = c * g + s * y;
x = -s * g + c * y;
for (j = 0; j < m; j++) {
assertMatrixElements(u, "u[j, i],[j, i-1]", [[j, i - 1], [j, i]]);
y = u[j][i - 1];
z = u[j][i];
u[j][i - 1] = y * c + z * s;
u[j][i] = -y * s + z * c;
}
}
e[l] = 0.0;
e[k] = f;
q[k] = x;
}
}
for (i = 0; i < q.length; i++)
if (q[i] < prec)
q[i] = 0;
//TODO: sort eigenvalues
for (i = 0; i < n; i++) {
for (j = i - 1; j >= 0; j--) {
if (q[j] < q[i]) {
c = q[j];
q[j] = q[i];
q[i] = c;
for (k = 0; k < u.length; k++) {
assertMatrixElements(u, "u[k, i]", [[k, i], [k, j]]);
temp = u[k][i];
u[k][i] = u[k][j];
u[k][j] = temp;
}
for (k = 0; k < v.length; k++) {
assertMatrixElements(v, "v[k, i]", [[k, i], [k, j]]);
temp = v[k][i];
v[k][i] = v[k][j];
v[k][j] = temp;
}
i = j;
}
}
}
return {
U: u,
S: q,
V: v
};
}
/**
* The first step is to subtract the mean and center data
*
* @param {Matrix} matrix - data in an mXn matrix format
* @returns
*/
function computeDeviationMatrix(matrix) {
let unit = unitSquareMatrix(matrix.length);
return subtract(matrix, multiplyAndScale(unit, matrix, 1 / matrix.length));
}
/**
* Computes variance from deviation
*
* @param {Matrix} deviation - data minus mean as calculated from computeDeviationMatrix
* @returns
*/
function computeDeviationScores(deviation) {
let devSumOfSquares = multiply(transpose(deviation), deviation);
return devSumOfSquares;
}
/**
* Calculates the var covar square matrix using either population or sample
*
* @param {Matrix} devSumOfSquares
* @param {boolean} sample - true/false whether data is from sample or not
* @returns
*/
function computeVarianceCovariance(devSumOfSquares, sample) {
let varianceCovariance;
if (sample) {
varianceCovariance = scale(devSumOfSquares, 1 / (devSumOfSquares.length - 1));
}
else {
varianceCovariance = scale(devSumOfSquares, 1 / devSumOfSquares.length);
}
return varianceCovariance;
}
/**
* Matrix is the deviation sum of squares as computed earlier
*
* @param {Matrix} matrix - output of computeDeviationScores
* @returns
*/
function computeSVD(matrix) {
let result = svd(matrix);
let eigenvectors = result.U;
let eigenvalues = result.S;
let results = eigenvalues.map(function (value, i) {
let obj = {
eigenvalue: value,
eigenvector: eigenvectors.map(function (vector) {
return -1 * vector[i]; //HACK prevent completely negative vectors
})
};
return obj;
});
return results;
}
/**
* Get reduced dataset after removing some dimensions
*
* @param {Matrix} data - initial matrix started out with
* @param {EigenObject[]} vectorObjs - eigenvectors selected as part of process
* @returns
*/
function computeAdjustedData(data, ...vectorObjs) {
//NOTE: no need to transpose vectors since they're already in row normal form
let vectors = vectorObjs.map(function (v) {
return v.eigenvector;
});
let matrixMinusMean = computeDeviationMatrix(data);
let adjustedData = multiply(vectors, transpose(matrixMinusMean));
let unit = unitSquareMatrix(data.length);
let avgData = multiplyAndScale(unit, data, -1 / data.length);
let formattedAdjustData = format(adjustedData, 2);
return {
adjustedData: adjustedData,
formattedAdjustedData: formattedAdjustData,
avgData: avgData,
selectedVectors: vectors,
};
}
/**
* Get original data set from reduced data set (decompress)
* @param {Matrix} adjustedData = formatted or unformatted adjusted data
* @param {Matrix} vectors = selectedVectors
* @param {Matrix} avgData = avgData
*/
function computeOriginalData(adjustedData, vectors, avgData) {
let originalWithoutMean = transpose(multiply(transpose(vectors), adjustedData));
let originalWithMean = subtract(originalWithoutMean, avgData);
let formattedData = format(originalWithMean, 2);
return {
originalData: originalWithMean,
formattedOriginalData: formattedData,
};
}
/**
* Get percentage explained, or loss
* @param {EigenObject[]} vectors
* @param {EigenObject[]} selected
*/
function computePercentageExplained(vectors, ...selected) {
let total = vectors.map(v => v.eigenvalue).reduce((a, b) => a + b);
let explained = selected.map(v => v.eigenvalue).reduce((a, b) => a + b);
return explained / total;
}
/**
*
* @param {Matrix} data
* @returns {EigenObject[]} eigen values and vectors in the matrix
*/
function getEigenVectors(data) {
return computeSVD(computeVarianceCovariance(computeDeviationScores(computeDeviationMatrix(data)), false));
}
function analyseTopResult(data) {
let eigenVectors = getEigenVectors(data);
let sorted = eigenVectors.sort(function (a, b) {
return b.eigenvalue - a.eigenvalue;
});
let selected = sorted[0];
return computeAdjustedData(data, selected);
}
var PCACore = /*#__PURE__*/Object.freeze({
__proto__: null,
analyseTopResult: analyseTopResult,
computeAdjustedData: computeAdjustedData,
computeDeviationMatrix: computeDeviationMatrix,
computeDeviationScores: computeDeviationScores,
computeOriginalData: computeOriginalData,
computePercentageExplained: computePercentageExplained,
computeSVD: computeSVD,
computeVarianceCovariance: computeVarianceCovariance,
getEigenVectors: getEigenVectors
});
// For browser global
if (typeof window !== 'undefined') {
window.PCA = PCACore;
}
return PCACore;
}));