UNPKG

entropyx

Version:

A simple data mining library, written in TypeScript

186 lines 6.88 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.TSNE = void 0; const distance_1 = require("@/base/distance"); const seedrandom_1 = require("seedrandom"); class TSNE { constructor(options = {}) { this.dim = options.dimension ?? 2; this.perplexity = options.perplexity ?? 30; this.learningRate = options.learningRate ?? 200; this.maxIterations = options.maxIterations ?? 1000; this.earlyExaggeration = options.earlyExaggeration ?? 12; this.earlyExaggerationIter = options.earlyExaggerationIter ?? 250; this.distanceFn = options.distanceFn ?? distance_1.Distance.squaredEuclidean; if (typeof options.randomSeed === 'number') { this.random = (0, seedrandom_1.alea)(options.randomSeed.toString()); } else { this.random = Math.random; } } fit(data) { const n = data.length; if (n === 0) { return { embedding: [] }; } const distances = this.computePairwiseDistances(data); const P = this.computePMatrix(distances, this.perplexity); this.applyExaggeration(P, this.earlyExaggeration); const Y = this.initializeEmbedding(n, this.dim); const YVel = Array.from({ length: n }, () => new Array(this.dim).fill(0)); const totalIter = this.maxIterations; const stageOneIters = Math.min(this.earlyExaggerationIter, totalIter); const stageTwoIters = totalIter - stageOneIters; let iteration = 0; for (let i = 0; i < stageOneIters; i++) { this.gradientDescentStep(Y, YVel, P, iteration); iteration++; } this.removeExaggeration(P, this.earlyExaggeration); for (let i = 0; i < stageTwoIters; i++) { this.gradientDescentStep(Y, YVel, P, iteration); iteration++; } return { embedding: Y }; } gradientDescentStep(Y, YVel, P, iteration) { const n = Y.length; const momentum = iteration < 250 ? 0.5 : 0.8; const Q = Array.from({ length: n }, () => new Array(n).fill(0)); const embeddingDist = Array.from({ length: n }, () => new Array(n).fill(0)); let sumQ = 0; for (let i = 0; i < n; i++) { for (let j = i + 1; j < n; j++) { const dist = this.squaredDist(Y[i], Y[j]); embeddingDist[i][j] = dist; embeddingDist[j][i] = dist; const qVal = 1 / (1 + dist); Q[i][j] = qVal; Q[j][i] = qVal; sumQ += 2 * qVal; } } for (let i = 0; i < n; i++) { for (let j = 0; j < n; j++) { Q[i][j] /= sumQ; } } const grads = Array.from({ length: n }, () => new Array(this.dim).fill(0)); for (let i = 0; i < n; i++) { for (let j = 0; j < n; j++) { if (i === j) continue; const diff = P[i][j] - Q[i][j]; const factor = (4 * diff) / (1 + embeddingDist[i][j]); for (let d = 0; d < this.dim; d++) { grads[i][d] += factor * (Y[i][d] - Y[j][d]); } } } for (let i = 0; i < n; i++) { for (let d = 0; d < this.dim; d++) { YVel[i][d] = momentum * YVel[i][d] - this.learningRate * grads[i][d]; Y[i][d] += YVel[i][d]; } } } computePMatrix(distances, perplexity) { const n = distances.length; const P = Array.from({ length: n }, () => new Array(n).fill(0)); const logU = Math.log(perplexity); for (let i = 0; i < n; i++) { let beta = 1; let betaMin = -Infinity; let betaMax = Infinity; const row = distances[i].map((val, j) => (i === j ? Infinity : val)); let tries = 0; const maxTries = 50; let currP = []; while (tries < maxTries) { currP = row.map((dist) => Math.exp(-dist * beta)); const sumP = currP.reduce((acc, v) => acc + v, 0); const normalizedP = currP.map((v) => v / sumP); let currEntropy = 0; for (let j = 0; j < n; j++) { if (normalizedP[j] > 1e-12) { currEntropy -= normalizedP[j] * Math.log(normalizedP[j]); } } const entropyDiff = currEntropy - logU; if (Math.abs(entropyDiff) < 1e-5) { break; } if (entropyDiff > 0) { betaMin = beta; beta = isFinite(betaMax) ? (beta + betaMax) / 2 : beta * 2; } else { betaMax = beta; beta = isFinite(betaMin) ? (beta + betaMin) / 2 : beta / 2; } tries++; } const sumP = currP.reduce((acc, v) => acc + v, 0); for (let j = 0; j < n; j++) { P[i][j] = i === j ? 0 : currP[j] / sumP; } } const P_sym = Array.from({ length: n }, () => new Array(n).fill(0)); for (let i = 0; i < n; i++) { for (let j = 0; j < n; j++) { P_sym[i][j] = (P[i][j] + P[j][i]) / (2 * n); } } return P_sym; } applyExaggeration(P, factor) { const n = P.length; for (let i = 0; i < n; i++) { for (let j = 0; j < n; j++) { P[i][j] *= factor; } } } removeExaggeration(P, factor) { const n = P.length; for (let i = 0; i < n; i++) { for (let j = 0; j < n; j++) { P[i][j] /= factor; } } } computePairwiseDistances(data) { const n = data.length; const distMat = Array.from({ length: n }, () => new Array(n).fill(0)); for (let i = 0; i < n; i++) { for (let j = i + 1; j < n; j++) { const d = this.distanceFn(data[i], data[j]); distMat[i][j] = d; distMat[j][i] = d; } } return distMat; } initializeEmbedding(n, d) { const Y = []; for (let i = 0; i < n; i++) { const row = []; for (let j = 0; j < d; j++) { row.push((this.random() - 0.5) * 1e-3); } Y.push(row); } return Y; } squaredDist(a, b) { let sum = 0; for (let i = 0; i < a.length; i++) { const diff = a[i] - b[i]; sum += diff * diff; } return sum; } } exports.TSNE = TSNE; //# sourceMappingURL=t-sne.js.map