entropyx
Version:
A simple data mining library, written in TypeScript
151 lines • 5.82 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.PCA = void 0;
const ml_matrix_1 = require("ml-matrix");
class PCA {
constructor(options = {}) {
this.means = [];
this.stDevs = [];
this.components = [];
this.eigenvalues = [];
this.explainedVariance = [];
this.projectionDimension = options.projectionDimension ?? 2;
this.centerData = options.center ?? true;
this.scaleData = options.scale ?? false;
}
fit(data) {
if (data.length === 0) {
return {
projection: [],
components: [],
eigenvalues: [],
explainedVariance: [],
};
}
const nFeatures = data[0].length;
const rawData = data.map((row) => [...row]);
let transformedData = data.map((row) => [...row]);
if (this.centerData) {
this.means = this.computeMeans(transformedData);
transformedData = transformedData.map((row) => row.map((val, i) => val - this.means[i]));
}
else {
this.means = new Array(nFeatures).fill(0);
}
if (this.scaleData) {
this.stDevs = this.computeStDevs(transformedData);
transformedData = transformedData.map((row) => row.map((val, i) => {
const sd = this.stDevs[i];
return sd === 0 ? 0 : val / sd;
}));
}
else {
this.stDevs = new Array(nFeatures).fill(1);
}
const covarianceMatrix = this.computeCovariance(transformedData);
const covMat = new ml_matrix_1.Matrix(covarianceMatrix);
const eig = new ml_matrix_1.EigenvalueDecomposition(covMat, { assumeSymmetric: true });
const eigenValuesArr = eig.realEigenvalues;
const eigenVectorsMat = eig.eigenvectorMatrix.to2DArray();
const sorted = this.sortEigenPairs(eigenValuesArr, eigenVectorsMat);
const sortedEigenValues = sorted.eigenValues;
const sortedEigenVectors = sorted.eigenVectors;
const k = Math.min(this.projectionDimension, nFeatures);
this.eigenvalues = sortedEigenValues.slice(0, k);
this.components = [];
for (let j = 0; j < k; j++) {
const column = [];
for (let i = 0; i < nFeatures; i++) {
column.push(sortedEigenVectors[i][j]);
}
this.components.push(column);
}
const totalVar = sortedEigenValues.reduce((acc, val) => acc + val, 0);
this.explainedVariance = this.eigenvalues.map((val) => val / totalVar);
const projection = this.transform(rawData);
return {
projection,
components: this.components,
eigenvalues: this.eigenvalues,
explainedVariance: this.explainedVariance,
};
}
transform(data) {
if (data.length === 0)
return [];
const centeredScaled = data.map((row) => row.map((val, i) => (val - this.means[i]) / (this.stDevs[i] || 1)));
return centeredScaled.map((row) => {
const coords = [];
for (let pcIndex = 0; pcIndex < this.components.length; pcIndex++) {
const comp = this.components[pcIndex];
let dot = 0;
for (let i = 0; i < row.length; i++) {
dot += row[i] * comp[i];
}
coords.push(dot);
}
return coords;
});
}
computeMeans(data) {
const nSamples = data.length;
const nFeatures = data[0].length;
const means = new Array(nFeatures).fill(0);
for (let i = 0; i < nSamples; i++) {
for (let j = 0; j < nFeatures; j++) {
means[j] += data[i][j];
}
}
for (let j = 0; j < nFeatures; j++) {
means[j] /= nSamples;
}
return means;
}
computeStDevs(data) {
const nSamples = data.length;
const nFeatures = data[0].length;
const stDevs = new Array(nFeatures).fill(0);
for (let i = 0; i < nSamples; i++) {
for (let j = 0; j < nFeatures; j++) {
stDevs[j] += data[i][j] ** 2;
}
}
for (let j = 0; j < nFeatures; j++) {
stDevs[j] = Math.sqrt(stDevs[j] / (nSamples - 1));
}
return stDevs;
}
computeCovariance(data) {
const nSamples = data.length;
const nFeatures = data[0].length;
const cov = Array.from({ length: nFeatures }, () => new Array(nFeatures).fill(0));
for (let s = 0; s < nSamples; s++) {
for (let i = 0; i < nFeatures; i++) {
for (let j = 0; j < nFeatures; j++) {
cov[i][j] += data[s][i] * data[s][j];
}
}
}
for (let i = 0; i < nFeatures; i++) {
for (let j = 0; j < nFeatures; j++) {
cov[i][j] /= nSamples - 1;
}
}
return cov;
}
sortEigenPairs(eigenValues, eigenVectors) {
const pairs = eigenValues.map((val, i) => ({ val, idx: i }));
pairs.sort((a, b) => b.val - a.val);
const sortedValues = pairs.map((p) => p.val);
const sortedVectors = new Array(eigenVectors.length).fill(null).map(() => new Array(eigenVectors.length).fill(0));
for (let col = 0; col < pairs.length; col++) {
const sourceCol = pairs[col].idx;
for (let row = 0; row < eigenVectors.length; row++) {
sortedVectors[row][col] = eigenVectors[row][sourceCol];
}
}
return { eigenValues: sortedValues, eigenVectors: sortedVectors };
}
}
exports.PCA = PCA;
//# sourceMappingURL=pca.js.map