tf-kmeans-node
Version:
A Library for Calculating K-Means using Tensorflow, add save model function, for nodejs
265 lines • 12.1 kB
JavaScript
"use strict";
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
__setModuleDefault(result, mod);
return result;
};
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
var __generator = (this && this.__generator) || function (thisArg, body) {
var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g;
return g = { next: verb(0), "throw": verb(1), "return": verb(2) }, typeof Symbol === "function" && (g[Symbol.iterator] = function() { return this; }), g;
function verb(n) { return function (v) { return step([n, v]); }; }
function step(op) {
if (f) throw new TypeError("Generator is already executing.");
while (_) try {
if (f = 1, y && (t = op[0] & 2 ? y["return"] : op[0] ? y["throw"] || ((t = y["return"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t;
if (y = 0, t) op = [op[0] & 2, t.value];
switch (op[0]) {
case 0: case 1: t = op; break;
case 4: _.label++; return { value: op[1], done: false };
case 5: _.label++; y = op[1]; op = [0]; continue;
case 7: op = _.ops.pop(); _.trys.pop(); continue;
default:
if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; }
if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; }
if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; }
if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; }
if (t[2]) _.ops.pop();
_.trys.pop(); continue;
}
op = body.call(thisArg, _);
} catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; }
if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true };
}
};
Object.defineProperty(exports, "__esModule", { value: true });
var tf = __importStar(require("@tensorflow/tfjs"));
var fs = __importStar(require("fs"));
var KMeans = (function () {
function KMeans(_a) {
var _b = _a === void 0 ? {} : _a, _c = _b.k, k = _c === void 0 ? 2 : _c, _d = _b.maxIter, maxIter = _d === void 0 ? 10 : _d, _e = _b.distanceFunction, distanceFunction = _e === void 0 ? KMeans.euclideanDistance : _e, _f = _b.centroids, centroids = _f === void 0 ? [] : _f;
this.k = 2;
this.maxIter = 10;
this.distanceFunction = KMeans.euclideanDistance;
this.k = k;
this.maxIter = maxIter;
this.distanceFunction = distanceFunction;
if (centroids && centroids.length) {
console.log('Recovering k-means model...');
this.centroids = tf.tensor(centroids);
}
}
KMeans.prototype.save = function (path) {
var model = {
k: this.k,
maxIter: this.maxIter,
centroids: this.centroids.arraySync(),
};
fs.writeFileSync(path, JSON.stringify(model));
return model;
};
KMeans.euclideanDistance = function (values, centroids) {
return tf.tidy(function () { return values.squaredDifference(centroids).sum(1).sqrt(); });
};
KMeans.cosineDistance = function (values, centroids) {
return tf.tidy(function () {
var k = centroids.shape[0];
values = values.reshape([1, values.shape[0]]);
var dot = tf.layers.dot({ axes: -1 });
var magnitudeV = dot.apply([values, values]).sqrt();
var magnitudeC = dot.apply([centroids, centroids]).sqrt();
var down = magnitudeV.mul(magnitudeC).reshape([k]);
var up = dot.apply([values, centroids]).reshape([k]);
var one = tf.fill([centroids.shape[0]], 1);
return one.sub(up.div(down));
});
};
KMeans.prototype.generateIndices = function (rows) {
var indices = [];
indices.length = rows;
for (var i = 0; i < indices.length; ++i)
indices[i] = i;
return indices;
};
KMeans.prototype.newCentroidSingle = function (values, assignments, cluster, rows) {
return tf.tidy(function () {
var selectedIndices = [];
selectedIndices.length = rows;
selectedIndices = selectedIndices.fill(cluster);
var selectedIndicesT = tf.tensor(selectedIndices);
var where = tf.equal(assignments, selectedIndicesT).asType('int32');
where = where.reshape([where.shape[0], 1]);
var count = where.sum();
var newCentroid = values.mul(where).sum(0).div(count);
return newCentroid;
});
};
KMeans.prototype.newCentroids = function (values, assignments) {
var _this = this;
return tf.tidy(function () {
var rows = values.shape[0];
var centroids = [];
for (var cluster = 0; cluster < _this.k; ++cluster) {
centroids.push(_this.newCentroidSingle(values, assignments, cluster, rows));
}
return tf.stack(centroids);
});
};
KMeans.prototype.assignCluster = function (value, centroids) {
var _this = this;
return tf.tidy(function () {
var distances = _this.distanceFunction(value, centroids);
return {
minIndex: distances.argMin(0),
minValue: distances.min(0),
minCenter: centroids.gather(distances.argMin(0)),
};
});
};
KMeans.prototype.assignClusters = function (values, centroids) {
var _this = this;
return tf.tidy(function () {
var rows = values.shape[0];
var minIndexes = [];
var minValues = [];
var minCenters = [];
for (var _i = 0, _a = _this.generateIndices(rows); _i < _a.length; _i++) {
var index = _a[_i];
var value = values.gather(index);
var cluster = _this.assignCluster(value, centroids);
minIndexes.push(cluster.minIndex);
minValues.push(cluster.minValue);
minCenters.push(cluster.minCenter);
value.dispose();
}
return {
index: tf.stack(minIndexes),
distance: tf.stack(minValues),
center: tf.stack(minCenters),
};
});
};
KMeans.prototype.randomSample = function (vals) {
var _this = this;
return tf.tidy(function () {
var rows = vals.shape[0];
if (rows < _this.k)
throw new Error('Rows are Less than K');
var indicesRaw = tf.util.createShuffledIndices(rows).slice(0, _this.k);
var indices = [];
indicesRaw.forEach(function (index) { return indices.push(index); });
return tf.gatherND(vals, tf.tensor(indices, [_this.k, 1], 'int32'));
});
};
KMeans.prototype.checkCentroidSimmilarity = function (newCentroids, centroids, vals) {
var _this = this;
return tf.tidy(function () {
return newCentroids
.equal(centroids)
.asType('int32')
.sum(1)
.div(vals.shape[1])
.sum()
.equal(_this.k)
.dataSync()[0];
});
};
KMeans.prototype.trainSingleStep = function (values) {
var _this = this;
return tf.tidy(function () {
var predictions = _this.predict(values).index;
var newCentroids = _this.newCentroids(values, predictions);
return [newCentroids, predictions];
});
};
KMeans.prototype.train = function (values, callback) {
if (callback === void 0) { callback = function (_centroid, _predictions) { }; }
this.centroids = this.randomSample(values);
var iter = 0;
while (true) {
var _a = this.trainSingleStep(values), newCentroids = _a[0], predictions = _a[1];
var same = this.checkCentroidSimmilarity(newCentroids, this.centroids, values);
if (same || iter >= this.maxIter) {
newCentroids.dispose();
return predictions;
}
this.centroids.dispose();
this.centroids = newCentroids;
++iter;
callback(this.centroids, predictions);
}
};
KMeans.prototype.trainAsync = function (values, callback) {
var _this = this;
if (callback === void 0) { callback = function (_iter, _centroid, _predictions) { return __awaiter(_this, void 0, void 0, function () { return __generator(this, function (_a) {
return [2];
}); }); }; }
return __awaiter(this, void 0, void 0, function () {
var iter, _a, newCentroids, predictions, same;
return __generator(this, function (_b) {
switch (_b.label) {
case 0:
this.centroids = this.randomSample(values);
iter = 0;
_b.label = 1;
case 1:
if (!true) return [3, 3];
_a = this.trainSingleStep(values), newCentroids = _a[0], predictions = _a[1];
same = this.checkCentroidSimmilarity(newCentroids, this.centroids, values);
if (same || iter >= this.maxIter) {
newCentroids.dispose();
return [2, predictions];
}
this.centroids.dispose();
this.centroids = newCentroids;
return [4, callback(iter, this.centroids, predictions)];
case 2:
_b.sent();
++iter;
return [3, 1];
case 3: return [2];
}
});
});
};
KMeans.prototype.predict = function (y) {
var _this = this;
return tf.tidy(function () {
if (y.shape[1] == null)
y = y.reshape([1, y.shape[0]]);
return _this.assignClusters(y, _this.centroids);
});
};
KMeans.prototype.dispose = function () {
this.centroids.dispose();
};
return KMeans;
}());
exports.default = KMeans;
//# sourceMappingURL=index.js.map