entropyx
Version:
A simple data mining library, written in TypeScript
186 lines • 6.88 kB
JavaScript
Object.defineProperty(exports, "__esModule", { value: true });
exports.TSNE = void 0;
const distance_1 = require("@/base/distance");
const seedrandom_1 = require("seedrandom");
class TSNE {
constructor(options = {}) {
this.dim = options.dimension ?? 2;
this.perplexity = options.perplexity ?? 30;
this.learningRate = options.learningRate ?? 200;
this.maxIterations = options.maxIterations ?? 1000;
this.earlyExaggeration = options.earlyExaggeration ?? 12;
this.earlyExaggerationIter = options.earlyExaggerationIter ?? 250;
this.distanceFn = options.distanceFn ?? distance_1.Distance.squaredEuclidean;
if (typeof options.randomSeed === 'number') {
this.random = (0, seedrandom_1.alea)(options.randomSeed.toString());
}
else {
this.random = Math.random;
}
}
fit(data) {
const n = data.length;
if (n === 0) {
return { embedding: [] };
}
const distances = this.computePairwiseDistances(data);
const P = this.computePMatrix(distances, this.perplexity);
this.applyExaggeration(P, this.earlyExaggeration);
const Y = this.initializeEmbedding(n, this.dim);
const YVel = Array.from({ length: n }, () => new Array(this.dim).fill(0));
const totalIter = this.maxIterations;
const stageOneIters = Math.min(this.earlyExaggerationIter, totalIter);
const stageTwoIters = totalIter - stageOneIters;
let iteration = 0;
for (let i = 0; i < stageOneIters; i++) {
this.gradientDescentStep(Y, YVel, P, iteration);
iteration++;
}
this.removeExaggeration(P, this.earlyExaggeration);
for (let i = 0; i < stageTwoIters; i++) {
this.gradientDescentStep(Y, YVel, P, iteration);
iteration++;
}
return { embedding: Y };
}
gradientDescentStep(Y, YVel, P, iteration) {
const n = Y.length;
const momentum = iteration < 250 ? 0.5 : 0.8;
const Q = Array.from({ length: n }, () => new Array(n).fill(0));
const embeddingDist = Array.from({ length: n }, () => new Array(n).fill(0));
let sumQ = 0;
for (let i = 0; i < n; i++) {
for (let j = i + 1; j < n; j++) {
const dist = this.squaredDist(Y[i], Y[j]);
embeddingDist[i][j] = dist;
embeddingDist[j][i] = dist;
const qVal = 1 / (1 + dist);
Q[i][j] = qVal;
Q[j][i] = qVal;
sumQ += 2 * qVal;
}
}
for (let i = 0; i < n; i++) {
for (let j = 0; j < n; j++) {
Q[i][j] /= sumQ;
}
}
const grads = Array.from({ length: n }, () => new Array(this.dim).fill(0));
for (let i = 0; i < n; i++) {
for (let j = 0; j < n; j++) {
if (i === j)
continue;
const diff = P[i][j] - Q[i][j];
const factor = (4 * diff) / (1 + embeddingDist[i][j]);
for (let d = 0; d < this.dim; d++) {
grads[i][d] += factor * (Y[i][d] - Y[j][d]);
}
}
}
for (let i = 0; i < n; i++) {
for (let d = 0; d < this.dim; d++) {
YVel[i][d] = momentum * YVel[i][d] - this.learningRate * grads[i][d];
Y[i][d] += YVel[i][d];
}
}
}
computePMatrix(distances, perplexity) {
const n = distances.length;
const P = Array.from({ length: n }, () => new Array(n).fill(0));
const logU = Math.log(perplexity);
for (let i = 0; i < n; i++) {
let beta = 1;
let betaMin = -Infinity;
let betaMax = Infinity;
const row = distances[i].map((val, j) => (i === j ? Infinity : val));
let tries = 0;
const maxTries = 50;
let currP = [];
while (tries < maxTries) {
currP = row.map((dist) => Math.exp(-dist * beta));
const sumP = currP.reduce((acc, v) => acc + v, 0);
const normalizedP = currP.map((v) => v / sumP);
let currEntropy = 0;
for (let j = 0; j < n; j++) {
if (normalizedP[j] > 1e-12) {
currEntropy -= normalizedP[j] * Math.log(normalizedP[j]);
}
}
const entropyDiff = currEntropy - logU;
if (Math.abs(entropyDiff) < 1e-5) {
break;
}
if (entropyDiff > 0) {
betaMin = beta;
beta = isFinite(betaMax) ? (beta + betaMax) / 2 : beta * 2;
}
else {
betaMax = beta;
beta = isFinite(betaMin) ? (beta + betaMin) / 2 : beta / 2;
}
tries++;
}
const sumP = currP.reduce((acc, v) => acc + v, 0);
for (let j = 0; j < n; j++) {
P[i][j] = i === j ? 0 : currP[j] / sumP;
}
}
const P_sym = Array.from({ length: n }, () => new Array(n).fill(0));
for (let i = 0; i < n; i++) {
for (let j = 0; j < n; j++) {
P_sym[i][j] = (P[i][j] + P[j][i]) / (2 * n);
}
}
return P_sym;
}
applyExaggeration(P, factor) {
const n = P.length;
for (let i = 0; i < n; i++) {
for (let j = 0; j < n; j++) {
P[i][j] *= factor;
}
}
}
removeExaggeration(P, factor) {
const n = P.length;
for (let i = 0; i < n; i++) {
for (let j = 0; j < n; j++) {
P[i][j] /= factor;
}
}
}
computePairwiseDistances(data) {
const n = data.length;
const distMat = Array.from({ length: n }, () => new Array(n).fill(0));
for (let i = 0; i < n; i++) {
for (let j = i + 1; j < n; j++) {
const d = this.distanceFn(data[i], data[j]);
distMat[i][j] = d;
distMat[j][i] = d;
}
}
return distMat;
}
initializeEmbedding(n, d) {
const Y = [];
for (let i = 0; i < n; i++) {
const row = [];
for (let j = 0; j < d; j++) {
row.push((this.random() - 0.5) * 1e-3);
}
Y.push(row);
}
return Y;
}
squaredDist(a, b) {
let sum = 0;
for (let i = 0; i < a.length; i++) {
const diff = a[i] - b[i];
sum += diff * diff;
}
return sum;
}
}
exports.TSNE = TSNE;
//# sourceMappingURL=t-sne.js.map
;