UNPKG

entropyx

Version:

A simple data mining library, written in TypeScript

151 lines 5.82 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.PCA = void 0; const ml_matrix_1 = require("ml-matrix"); class PCA { constructor(options = {}) { this.means = []; this.stDevs = []; this.components = []; this.eigenvalues = []; this.explainedVariance = []; this.projectionDimension = options.projectionDimension ?? 2; this.centerData = options.center ?? true; this.scaleData = options.scale ?? false; } fit(data) { if (data.length === 0) { return { projection: [], components: [], eigenvalues: [], explainedVariance: [], }; } const nFeatures = data[0].length; const rawData = data.map((row) => [...row]); let transformedData = data.map((row) => [...row]); if (this.centerData) { this.means = this.computeMeans(transformedData); transformedData = transformedData.map((row) => row.map((val, i) => val - this.means[i])); } else { this.means = new Array(nFeatures).fill(0); } if (this.scaleData) { this.stDevs = this.computeStDevs(transformedData); transformedData = transformedData.map((row) => row.map((val, i) => { const sd = this.stDevs[i]; return sd === 0 ? 0 : val / sd; })); } else { this.stDevs = new Array(nFeatures).fill(1); } const covarianceMatrix = this.computeCovariance(transformedData); const covMat = new ml_matrix_1.Matrix(covarianceMatrix); const eig = new ml_matrix_1.EigenvalueDecomposition(covMat, { assumeSymmetric: true }); const eigenValuesArr = eig.realEigenvalues; const eigenVectorsMat = eig.eigenvectorMatrix.to2DArray(); const sorted = this.sortEigenPairs(eigenValuesArr, eigenVectorsMat); const sortedEigenValues = sorted.eigenValues; const sortedEigenVectors = sorted.eigenVectors; const k = Math.min(this.projectionDimension, nFeatures); this.eigenvalues = sortedEigenValues.slice(0, k); this.components = []; for (let j = 0; j < k; j++) { const column = []; for (let i = 0; i < nFeatures; i++) { column.push(sortedEigenVectors[i][j]); } this.components.push(column); } const totalVar = sortedEigenValues.reduce((acc, val) => acc + val, 0); this.explainedVariance = this.eigenvalues.map((val) => val / totalVar); const projection = this.transform(rawData); return { projection, components: this.components, eigenvalues: this.eigenvalues, explainedVariance: this.explainedVariance, }; } transform(data) { if (data.length === 0) return []; const centeredScaled = data.map((row) => row.map((val, i) => (val - this.means[i]) / (this.stDevs[i] || 1))); return centeredScaled.map((row) => { const coords = []; for (let pcIndex = 0; pcIndex < this.components.length; pcIndex++) { const comp = this.components[pcIndex]; let dot = 0; for (let i = 0; i < row.length; i++) { dot += row[i] * comp[i]; } coords.push(dot); } return coords; }); } computeMeans(data) { const nSamples = data.length; const nFeatures = data[0].length; const means = new Array(nFeatures).fill(0); for (let i = 0; i < nSamples; i++) { for (let j = 0; j < nFeatures; j++) { means[j] += data[i][j]; } } for (let j = 0; j < nFeatures; j++) { means[j] /= nSamples; } return means; } computeStDevs(data) { const nSamples = data.length; const nFeatures = data[0].length; const stDevs = new Array(nFeatures).fill(0); for (let i = 0; i < nSamples; i++) { for (let j = 0; j < nFeatures; j++) { stDevs[j] += data[i][j] ** 2; } } for (let j = 0; j < nFeatures; j++) { stDevs[j] = Math.sqrt(stDevs[j] / (nSamples - 1)); } return stDevs; } computeCovariance(data) { const nSamples = data.length; const nFeatures = data[0].length; const cov = Array.from({ length: nFeatures }, () => new Array(nFeatures).fill(0)); for (let s = 0; s < nSamples; s++) { for (let i = 0; i < nFeatures; i++) { for (let j = 0; j < nFeatures; j++) { cov[i][j] += data[s][i] * data[s][j]; } } } for (let i = 0; i < nFeatures; i++) { for (let j = 0; j < nFeatures; j++) { cov[i][j] /= nSamples - 1; } } return cov; } sortEigenPairs(eigenValues, eigenVectors) { const pairs = eigenValues.map((val, i) => ({ val, idx: i })); pairs.sort((a, b) => b.val - a.val); const sortedValues = pairs.map((p) => p.val); const sortedVectors = new Array(eigenVectors.length).fill(null).map(() => new Array(eigenVectors.length).fill(0)); for (let col = 0; col < pairs.length; col++) { const sourceCol = pairs[col].idx; for (let row = 0; row < eigenVectors.length; row++) { sortedVectors[row][col] = eigenVectors[row][sourceCol]; } } return { eigenValues: sortedValues, eigenVectors: sortedVectors }; } } exports.PCA = PCA; //# sourceMappingURL=pca.js.map