UNPKG

clustering-tfjs

Version:

High-performance TypeScript clustering algorithms (K-Means, Spectral, Agglomerative) with TensorFlow.js acceleration and scikit-learn compatibility

184 lines (183 loc) 7.08 kB
"use strict"; var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || (function () { var ownKeys = function(o) { ownKeys = Object.getOwnPropertyNames || function (o) { var ar = []; for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k; return ar; }; return ownKeys(o); }; return function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]); __setModuleDefault(result, mod); return result; }; })(); Object.defineProperty(exports, "__esModule", { value: true }); exports.BENCHMARK_CONFIGS = void 0; exports.benchmarkAlgorithm = benchmarkAlgorithm; exports.getAvailableBackends = getAvailableBackends; exports.runBenchmarkSuite = runBenchmarkSuite; exports.formatBenchmarkResults = formatBenchmarkResults; const tf = __importStar(require("@tensorflow/tfjs")); const perf_hooks_1 = require("perf_hooks"); const agglomerative_1 = require("../clustering/agglomerative"); const spectral_1 = require("../clustering/spectral"); const kmeans_1 = require("../clustering/kmeans"); const synthetic_1 = require("../datasets/synthetic"); exports.BENCHMARK_CONFIGS = [ { samples: 100, features: 10, centers: 3, label: 'small' }, { samples: 1000, features: 50, centers: 5, label: 'medium' }, { samples: 10000, features: 100, centers: 10, label: 'large' }, ]; async function benchmarkAlgorithm(algorithm, config, backend) { // Generate dataset const { X } = (0, synthetic_1.makeBlobs)({ nSamples: config.samples, nFeatures: config.features, centers: config.centers, randomState: 42, }); // Initialize backend const backendInitStart = perf_hooks_1.performance.now(); await tf.setBackend(backend); await tf.ready(); const backendInitTime = perf_hooks_1.performance.now() - backendInitStart; // Track memory const memBefore = tf.memory(); // Run clustering const start = perf_hooks_1.performance.now(); let _labels; switch (algorithm) { case 'kmeans': { const kmeans = new kmeans_1.KMeans({ nClusters: config.centers, randomState: 42 }); await kmeans.fit(X); _labels = Array.isArray(kmeans.labels_) ? kmeans.labels_ : (await kmeans.labels_.array()); break; } case 'spectral': { const spectral = new spectral_1.SpectralClustering({ nClusters: config.centers, affinity: 'rbf', randomState: 42, }); await spectral.fit(X); _labels = spectral.labels_; break; } case 'agglomerative': { const agglo = new agglomerative_1.AgglomerativeClustering({ nClusters: config.centers, linkage: 'average', }); await agglo.fit(X); _labels = Array.isArray(agglo.labels_) ? agglo.labels_ : (await agglo.labels_.array()); break; } } const executionTime = perf_hooks_1.performance.now() - start; const memAfter = tf.memory(); return { algorithm, backend, datasetSize: config.samples, features: config.features, executionTime, memoryUsed: memAfter.numBytes - memBefore.numBytes, memoryPeak: memAfter.numBytes, tensorCount: memAfter.numTensors - memBefore.numTensors, backendInitTime, }; } async function getAvailableBackends() { const backends = ['cpu']; // TODO: Add WASM backend check when types are available // Currently commented out to avoid TypeScript errors // try { // await import('@tensorflow/tfjs-backend-wasm'); // backends.push('wasm'); // } catch {} // Check if tfjs-node is available try { await Promise.resolve().then(() => __importStar(require('@tensorflow/tfjs-node'))); backends.push('tensorflow'); } catch { // tfjs-node not available, skip } // Check if tfjs-node-gpu is available try { // @ts-expect-error - tfjs-node-gpu may not be installed, this is expected await Promise.resolve().then(() => __importStar(require('@tensorflow/tfjs-node-gpu'))); backends.push('tensorflow-gpu'); } catch { // tfjs-node-gpu not available, skip } return backends; } async function runBenchmarkSuite() { const results = []; const backends = await getAvailableBackends(); const algorithms = [ 'kmeans', 'spectral', 'agglomerative', ]; console.log(`Available backends: ${backends.join(', ')}`); for (const backend of backends) { for (const algorithm of algorithms) { for (const config of exports.BENCHMARK_CONFIGS) { console.log(`Benchmarking ${algorithm} on ${backend} with ${config.label} dataset...`); try { const result = await benchmarkAlgorithm(algorithm, config, backend); results.push(result); console.log(` Time: ${result.executionTime.toFixed(2)}ms`); console.log(` Memory: ${(result.memoryUsed / 1024 / 1024).toFixed(2)}MB`); } catch (error) { console.error(` Failed: ${error instanceof Error ? error.message : String(error)}`); } } } } return results; } function formatBenchmarkResults(results) { let output = '# Benchmark Results\n\n'; output += '| Algorithm | Backend | Dataset | Time (ms) | Memory (MB) | Backend Init (ms) |\n'; output += '|-----------|---------|---------|-----------|-------------|-------------------|\n'; for (const result of results) { const dataset = `${result.datasetSize}x${result.features}`; const time = result.executionTime.toFixed(2); const memory = (result.memoryUsed / 1024 / 1024).toFixed(2); const init = result.backendInitTime.toFixed(2); output += `| ${result.algorithm} | ${result.backend} | ${dataset} | ${time} | ${memory} | ${init} |\n`; } return output; }