UNPKG

tf-kmeans-node

Version:

A Library for Calculating K-Means using Tensorflow, add save model function, for nodejs

265 lines 12.1 kB
"use strict"; var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k); __setModuleDefault(result, mod); return result; }; var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; var __generator = (this && this.__generator) || function (thisArg, body) { var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g; return g = { next: verb(0), "throw": verb(1), "return": verb(2) }, typeof Symbol === "function" && (g[Symbol.iterator] = function() { return this; }), g; function verb(n) { return function (v) { return step([n, v]); }; } function step(op) { if (f) throw new TypeError("Generator is already executing."); while (_) try { if (f = 1, y && (t = op[0] & 2 ? y["return"] : op[0] ? y["throw"] || ((t = y["return"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t; if (y = 0, t) op = [op[0] & 2, t.value]; switch (op[0]) { case 0: case 1: t = op; break; case 4: _.label++; return { value: op[1], done: false }; case 5: _.label++; y = op[1]; op = [0]; continue; case 7: op = _.ops.pop(); _.trys.pop(); continue; default: if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; } if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; } if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; } if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; } if (t[2]) _.ops.pop(); _.trys.pop(); continue; } op = body.call(thisArg, _); } catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; } if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true }; } }; Object.defineProperty(exports, "__esModule", { value: true }); var tf = __importStar(require("@tensorflow/tfjs")); var fs = __importStar(require("fs")); var KMeans = (function () { function KMeans(_a) { var _b = _a === void 0 ? {} : _a, _c = _b.k, k = _c === void 0 ? 2 : _c, _d = _b.maxIter, maxIter = _d === void 0 ? 10 : _d, _e = _b.distanceFunction, distanceFunction = _e === void 0 ? KMeans.euclideanDistance : _e, _f = _b.centroids, centroids = _f === void 0 ? [] : _f; this.k = 2; this.maxIter = 10; this.distanceFunction = KMeans.euclideanDistance; this.k = k; this.maxIter = maxIter; this.distanceFunction = distanceFunction; if (centroids && centroids.length) { console.log('Recovering k-means model...'); this.centroids = tf.tensor(centroids); } } KMeans.prototype.save = function (path) { var model = { k: this.k, maxIter: this.maxIter, centroids: this.centroids.arraySync(), }; fs.writeFileSync(path, JSON.stringify(model)); return model; }; KMeans.euclideanDistance = function (values, centroids) { return tf.tidy(function () { return values.squaredDifference(centroids).sum(1).sqrt(); }); }; KMeans.cosineDistance = function (values, centroids) { return tf.tidy(function () { var k = centroids.shape[0]; values = values.reshape([1, values.shape[0]]); var dot = tf.layers.dot({ axes: -1 }); var magnitudeV = dot.apply([values, values]).sqrt(); var magnitudeC = dot.apply([centroids, centroids]).sqrt(); var down = magnitudeV.mul(magnitudeC).reshape([k]); var up = dot.apply([values, centroids]).reshape([k]); var one = tf.fill([centroids.shape[0]], 1); return one.sub(up.div(down)); }); }; KMeans.prototype.generateIndices = function (rows) { var indices = []; indices.length = rows; for (var i = 0; i < indices.length; ++i) indices[i] = i; return indices; }; KMeans.prototype.newCentroidSingle = function (values, assignments, cluster, rows) { return tf.tidy(function () { var selectedIndices = []; selectedIndices.length = rows; selectedIndices = selectedIndices.fill(cluster); var selectedIndicesT = tf.tensor(selectedIndices); var where = tf.equal(assignments, selectedIndicesT).asType('int32'); where = where.reshape([where.shape[0], 1]); var count = where.sum(); var newCentroid = values.mul(where).sum(0).div(count); return newCentroid; }); }; KMeans.prototype.newCentroids = function (values, assignments) { var _this = this; return tf.tidy(function () { var rows = values.shape[0]; var centroids = []; for (var cluster = 0; cluster < _this.k; ++cluster) { centroids.push(_this.newCentroidSingle(values, assignments, cluster, rows)); } return tf.stack(centroids); }); }; KMeans.prototype.assignCluster = function (value, centroids) { var _this = this; return tf.tidy(function () { var distances = _this.distanceFunction(value, centroids); return { minIndex: distances.argMin(0), minValue: distances.min(0), minCenter: centroids.gather(distances.argMin(0)), }; }); }; KMeans.prototype.assignClusters = function (values, centroids) { var _this = this; return tf.tidy(function () { var rows = values.shape[0]; var minIndexes = []; var minValues = []; var minCenters = []; for (var _i = 0, _a = _this.generateIndices(rows); _i < _a.length; _i++) { var index = _a[_i]; var value = values.gather(index); var cluster = _this.assignCluster(value, centroids); minIndexes.push(cluster.minIndex); minValues.push(cluster.minValue); minCenters.push(cluster.minCenter); value.dispose(); } return { index: tf.stack(minIndexes), distance: tf.stack(minValues), center: tf.stack(minCenters), }; }); }; KMeans.prototype.randomSample = function (vals) { var _this = this; return tf.tidy(function () { var rows = vals.shape[0]; if (rows < _this.k) throw new Error('Rows are Less than K'); var indicesRaw = tf.util.createShuffledIndices(rows).slice(0, _this.k); var indices = []; indicesRaw.forEach(function (index) { return indices.push(index); }); return tf.gatherND(vals, tf.tensor(indices, [_this.k, 1], 'int32')); }); }; KMeans.prototype.checkCentroidSimmilarity = function (newCentroids, centroids, vals) { var _this = this; return tf.tidy(function () { return newCentroids .equal(centroids) .asType('int32') .sum(1) .div(vals.shape[1]) .sum() .equal(_this.k) .dataSync()[0]; }); }; KMeans.prototype.trainSingleStep = function (values) { var _this = this; return tf.tidy(function () { var predictions = _this.predict(values).index; var newCentroids = _this.newCentroids(values, predictions); return [newCentroids, predictions]; }); }; KMeans.prototype.train = function (values, callback) { if (callback === void 0) { callback = function (_centroid, _predictions) { }; } this.centroids = this.randomSample(values); var iter = 0; while (true) { var _a = this.trainSingleStep(values), newCentroids = _a[0], predictions = _a[1]; var same = this.checkCentroidSimmilarity(newCentroids, this.centroids, values); if (same || iter >= this.maxIter) { newCentroids.dispose(); return predictions; } this.centroids.dispose(); this.centroids = newCentroids; ++iter; callback(this.centroids, predictions); } }; KMeans.prototype.trainAsync = function (values, callback) { var _this = this; if (callback === void 0) { callback = function (_iter, _centroid, _predictions) { return __awaiter(_this, void 0, void 0, function () { return __generator(this, function (_a) { return [2]; }); }); }; } return __awaiter(this, void 0, void 0, function () { var iter, _a, newCentroids, predictions, same; return __generator(this, function (_b) { switch (_b.label) { case 0: this.centroids = this.randomSample(values); iter = 0; _b.label = 1; case 1: if (!true) return [3, 3]; _a = this.trainSingleStep(values), newCentroids = _a[0], predictions = _a[1]; same = this.checkCentroidSimmilarity(newCentroids, this.centroids, values); if (same || iter >= this.maxIter) { newCentroids.dispose(); return [2, predictions]; } this.centroids.dispose(); this.centroids = newCentroids; return [4, callback(iter, this.centroids, predictions)]; case 2: _b.sent(); ++iter; return [3, 1]; case 3: return [2]; } }); }); }; KMeans.prototype.predict = function (y) { var _this = this; return tf.tidy(function () { if (y.shape[1] == null) y = y.reshape([1, y.shape[0]]); return _this.assignClusters(y, _this.centroids); }); }; KMeans.prototype.dispose = function () { this.centroids.dispose(); }; return KMeans; }()); exports.default = KMeans; //# sourceMappingURL=index.js.map