clustering-tfjs
Version:
High-performance TypeScript clustering algorithms (K-Means, Spectral, Agglomerative) with TensorFlow.js acceleration and scikit-learn compatibility
184 lines (183 loc) • 7.08 kB
JavaScript
;
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || (function () {
var ownKeys = function(o) {
ownKeys = Object.getOwnPropertyNames || function (o) {
var ar = [];
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
return ar;
};
return ownKeys(o);
};
return function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
__setModuleDefault(result, mod);
return result;
};
})();
Object.defineProperty(exports, "__esModule", { value: true });
exports.BENCHMARK_CONFIGS = void 0;
exports.benchmarkAlgorithm = benchmarkAlgorithm;
exports.getAvailableBackends = getAvailableBackends;
exports.runBenchmarkSuite = runBenchmarkSuite;
exports.formatBenchmarkResults = formatBenchmarkResults;
const tf = __importStar(require("@tensorflow/tfjs"));
const perf_hooks_1 = require("perf_hooks");
const agglomerative_1 = require("../clustering/agglomerative");
const spectral_1 = require("../clustering/spectral");
const kmeans_1 = require("../clustering/kmeans");
const synthetic_1 = require("../datasets/synthetic");
exports.BENCHMARK_CONFIGS = [
{ samples: 100, features: 10, centers: 3, label: 'small' },
{ samples: 1000, features: 50, centers: 5, label: 'medium' },
{ samples: 10000, features: 100, centers: 10, label: 'large' },
];
async function benchmarkAlgorithm(algorithm, config, backend) {
// Generate dataset
const { X } = (0, synthetic_1.makeBlobs)({
nSamples: config.samples,
nFeatures: config.features,
centers: config.centers,
randomState: 42,
});
// Initialize backend
const backendInitStart = perf_hooks_1.performance.now();
await tf.setBackend(backend);
await tf.ready();
const backendInitTime = perf_hooks_1.performance.now() - backendInitStart;
// Track memory
const memBefore = tf.memory();
// Run clustering
const start = perf_hooks_1.performance.now();
let _labels;
switch (algorithm) {
case 'kmeans': {
const kmeans = new kmeans_1.KMeans({ nClusters: config.centers, randomState: 42 });
await kmeans.fit(X);
_labels = Array.isArray(kmeans.labels_)
? kmeans.labels_
: (await kmeans.labels_.array());
break;
}
case 'spectral': {
const spectral = new spectral_1.SpectralClustering({
nClusters: config.centers,
affinity: 'rbf',
randomState: 42,
});
await spectral.fit(X);
_labels = spectral.labels_;
break;
}
case 'agglomerative': {
const agglo = new agglomerative_1.AgglomerativeClustering({
nClusters: config.centers,
linkage: 'average',
});
await agglo.fit(X);
_labels = Array.isArray(agglo.labels_)
? agglo.labels_
: (await agglo.labels_.array());
break;
}
}
const executionTime = perf_hooks_1.performance.now() - start;
const memAfter = tf.memory();
return {
algorithm,
backend,
datasetSize: config.samples,
features: config.features,
executionTime,
memoryUsed: memAfter.numBytes - memBefore.numBytes,
memoryPeak: memAfter.numBytes,
tensorCount: memAfter.numTensors - memBefore.numTensors,
backendInitTime,
};
}
async function getAvailableBackends() {
const backends = ['cpu'];
// TODO: Add WASM backend check when types are available
// Currently commented out to avoid TypeScript errors
// try {
// await import('@tensorflow/tfjs-backend-wasm');
// backends.push('wasm');
// } catch {}
// Check if tfjs-node is available
try {
await Promise.resolve().then(() => __importStar(require('@tensorflow/tfjs-node')));
backends.push('tensorflow');
}
catch {
// tfjs-node not available, skip
}
// Check if tfjs-node-gpu is available
try {
// @ts-expect-error - tfjs-node-gpu may not be installed, this is expected
await Promise.resolve().then(() => __importStar(require('@tensorflow/tfjs-node-gpu')));
backends.push('tensorflow-gpu');
}
catch {
// tfjs-node-gpu not available, skip
}
return backends;
}
async function runBenchmarkSuite() {
const results = [];
const backends = await getAvailableBackends();
const algorithms = [
'kmeans',
'spectral',
'agglomerative',
];
console.log(`Available backends: ${backends.join(', ')}`);
for (const backend of backends) {
for (const algorithm of algorithms) {
for (const config of exports.BENCHMARK_CONFIGS) {
console.log(`Benchmarking ${algorithm} on ${backend} with ${config.label} dataset...`);
try {
const result = await benchmarkAlgorithm(algorithm, config, backend);
results.push(result);
console.log(` Time: ${result.executionTime.toFixed(2)}ms`);
console.log(` Memory: ${(result.memoryUsed / 1024 / 1024).toFixed(2)}MB`);
}
catch (error) {
console.error(` Failed: ${error instanceof Error ? error.message : String(error)}`);
}
}
}
}
return results;
}
function formatBenchmarkResults(results) {
let output = '# Benchmark Results\n\n';
output +=
'| Algorithm | Backend | Dataset | Time (ms) | Memory (MB) | Backend Init (ms) |\n';
output +=
'|-----------|---------|---------|-----------|-------------|-------------------|\n';
for (const result of results) {
const dataset = `${result.datasetSize}x${result.features}`;
const time = result.executionTime.toFixed(2);
const memory = (result.memoryUsed / 1024 / 1024).toFixed(2);
const init = result.backendInitTime.toFixed(2);
output += `| ${result.algorithm} | ${result.backend} | ${dataset} | ${time} | ${memory} | ${init} |\n`;
}
return output;
}