qminer
Version:
A C++ based data analytics platform for processing large-scale real-time streams containing structured and unstructured data
399 lines (339 loc) • 14.4 kB
JavaScript
/**
* Copyright (c) 2015, Jozef Stefan Institute, Quintelligence d.o.o. and contributors
* All rights reserved.
*
* This source code is licensed under the FreeBSD license found in the
* LICENSE file in the root directory of this source tree.
*/
// JavaScript source code
var qm = require('../../index.js');
var quants = qm.analytics.quantiles;
var fs = qm.fs;
var assert = require("../../src/nodejs/scripts/assert.js");
describe("TDigest test", function () {
describe("Constructor test", function () {
it("should return a default constructor", function () {
var tdigest = new quants.TDigest();
var params = tdigest.getParams();
assert.strictEqual(params.seed, 0, "invalid seed");
assert.strictEqual(params.clusters, 100, "invalid number of clusters");
assert.strictEqual(params.minEps, 1e-4, "invalid max accuracy");
assert.strictEqual(params.compression, "never", "invalid compression");
assert(tdigest.init === false); });
it("should construct using provided parameters", function () {
var tdigest = new quants.TDigest({ seed: 1, clusters: 50, minEps: 1e-5, compression: "periodic" });
var params = tdigest.getParams();
assert.strictEqual(params.seed, 1, "invalid seed");
assert.strictEqual(params.clusters, 50, "invalid number of clusters");
assert.strictEqual(params.minEps, 1e-5, "invalid max accuracy");
assert.strictEqual(params.compression, "periodic", "invalid compression");
assert(tdigest.init === false);
});
});
describe("Fit/quantile test", function () {
it("It should return a the prediction for the given fit", function () {
var tdigest = new quants.TDigest();
assert(tdigest.init === false);
var inputs = [10, 1, 2, 8, 9, 5, 6, 4, 7, 3];
for (var i = 0; i < inputs.length; i++) {
tdigest.insert(inputs[i]);
assert(tdigest.init === true);
}
var pred = tdigest.quantile(0.1);
// TODO test
});
});
describe('Serialization Tests', function () {
it('should serialize and deserialize', function () {
var tdigest = new quants.TDigest({ seed: 1, clusters: 50, minEps: 1e-5 });
tdigest.save(fs.openWrite('tdigest.bin')).close();
var tdigest2 = new quants.TDigest(fs.openRead('tdigest.bin'));
var params = tdigest2.getParams();
assert.strictEqual(params.seed, 1, "invalid seed");
assert.strictEqual(params.clusters, 50, "invalid number of clusters");
assert.strictEqual(params.minEps, 1e-5, "invalid max accuracy");
assert(tdigest.init === false);
})
});
});
describe("BufferedTDigest test", function () {
describe("Constructor test", function () {
it("should return a default constructor", function () {
var tdigest = new quants.BufferedTDigest();
var params = tdigest.getParams();
assert.strictEqual(params.seed, 0, "invalid seed");
assert.strictEqual(params.delta, 100, "invalid delta");
assert.strictEqual(params.bufferLen, 1000, "invalid buffer length: " + params.bufferLen);
assert(tdigest.init === false);
});
it("should construct using provided parameters", function () {
var tdigest = new quants.BufferedTDigest({
seed: 1,
delta: 50,
bufferLen: 500
});
var params = tdigest.getParams();
assert.strictEqual(params.seed, 1, "invalid seed");
assert.strictEqual(params.delta, 50, "invalid delta");
assert.strictEqual(params.bufferLen, 500, "invalid buffer length: " + params.bufferLen);
assert(tdigest.init === false);
});
});
describe('Serialization Tests', function () {
it('should serialize and deserialize', function () {
var tdigest = new quants.BufferedTDigest({
seed: 1,
delta: 50,
bufferLen: 500
});
tdigest.save(fs.openWrite('bufftdigest.bin')).close();
var tdigest2 = new quants.BufferedTDigest(fs.openRead('bufftdigest.bin'));
var params = tdigest2.getParams();
assert.strictEqual(params.seed, 1, "invalid seed");
assert.strictEqual(params.delta, 50, "invalid delta");
assert.strictEqual(params.bufferLen, 500, "invalid buffer length: " + params.bufferLen);
assert(tdigest.init === false);
})
});
describe('Testing buffer', function () {
it('should auto flush at correct time', function () {
var buffLen = 500;
var tdigest = new quants.BufferedTDigest({
seed: 1,
clusters: 50,
bufferLen: buffLen
});
for (var i = 0; i < buffLen-1; ++i) {
tdigest.insert(3 + Math.random());
var val = tdigest.quantile(0);
assert.strictEqual(val, 0);
}
tdigest.insert(3 + Math.random());
assert(tdigest.quantile(0) > 0)
})
it('should flush manually at correct time', function () {
var buffLen = 500;
var tdigest = new quants.BufferedTDigest({
seed: 1,
clusters: 50,
bufferLen: buffLen
});
for (var i = 0; i < 100; ++i) {
tdigest.insert(3 + Math.random());
var val = tdigest.quantile(0);
assert.strictEqual(val, 0);
}
tdigest.flush();
assert(tdigest.quantile(0) > 0)
})
})
});
describe('Gk test', function () {
describe('Constructor test', function () {
it('should set correct default parameters', function () {
var gk = new quants.Gk();
var params = gk.getParams();
assert(params.eps != null);
assert(params.autoCompress != null);
assert(params.useBands != null);
assert.strictEqual(params.eps, 0.01);
assert.strictEqual(params.autoCompress, true);
assert.strictEqual(params.useBands, true);
})
it('should set correct parameters', function () {
var gk = new quants.Gk({
eps: 0.1,
autoCompress: false,
useBands: false
})
var params = gk.getParams();
assert(params.eps != null);
assert(params.autoCompress != null);
assert(params.useBands != null);
assert.strictEqual(params.eps, 0.1);
assert.strictEqual(params.autoCompress, false);
assert.strictEqual(params.useBands, false);
})
})
describe('Serialization test', function () {
var batchSize = 1000;
var nbatches = 10;
var eps = .01;
var gk = new quants.Gk({
eps: eps
})
var maxRelErr = eps;
var targets = [];
for (var prob = 0; prob <= 1; prob += 0.001) {
targets.push(prob);
}
var vals = [];
for (var i = 0; i < batchSize; i++) {
vals.push(i);
}
for (var batchN = 0; batchN < nbatches; batchN++) {
// shuffle the array
for (var i = 0; i < batchSize; i++) {
var swapN = Math.floor(Math.random()*batchSize);
var temp = vals[i];
vals[i] = vals[swapN];
vals[swapN] = temp;
}
for (var i = 0; i < batchSize; i++) {
gk.insert(vals[i]);
}
var quantiles = gk.quantile(targets);
for (var targetN = 0; targetN < targets.length; targetN++) {
var prob = targets[targetN];
var quant = gk.quantile(prob);
assert.strictEqual(quant, quantiles[targetN]);
assert(Math.floor((prob - maxRelErr)*batchSize) <= quant);
assert(Math.ceil((prob + maxRelErr)*batchSize) >= quant);
}
}
gk.save(qm.fs.openWrite('gk-orig.dat')).close();
var gk1 = new quants.Gk(qm.fs.openRead('gk-orig.dat'));
for (var cumProb = 0; cumProb <= 1; cumProb += 0.001) {
var quant_hat = gk1.quantile(cumProb);
assert(Math.floor((cumProb - maxRelErr)*batchSize) <= quant_hat);
assert(Math.ceil((cumProb + maxRelErr)*batchSize) >= quant_hat);
}
})
describe('CDF test', function () {
var gk = new quants.Gk({
eps: 0.01
});
for (var i = 0; i < 100; i++) {
gk.insert(Math.floor(Math.random()*100));
}
it('values should be bounded between 0 and 1', function () {
assert.strictEqual(gk.cdf(100), 1);
assert.strictEqual(gk.cdf(-1),0);
assert.strictEqual(gk.cdf(1000), 1);
})
it('function should be non-decreasing', function () {
var previous = 0;
for (var i = 0; i <= 100; i++) {
assert(previous <= gk.cdf(i));
previous = gk.cdf(i);
}
})
it('check relationship with quantile function', function(){
pval = 0.5;
var quant = gk.quantile(pval);
assert(gk.cdf(quant) >= pval);
})
})
})
describe('BiasedGk test', function () {
describe('Constructor test', function () {
it('should set correct default parameters', function () {
var gk = new quants.BiasedGk();
var params = gk.getParams();
assert(params.eps != null);
assert(params.targetProb != null);
assert(params.compression != null);
assert(params.useBands != null);
assert.strictEqual(params.eps, 0.1);
assert.strictEqual(params.targetProb, 0.01);
assert.strictEqual(params.compression, "periodic");
assert.strictEqual(params.useBands, true);
})
it('should set correct parameters', function () {
var gk = new quants.BiasedGk({
eps: 0.05,
targetProb: 0.99,
compression: "aggressive",
useBands: false
})
var params = gk.getParams();
assert(params.eps != null);
assert(params.targetProb != null);
assert(params.compression != null);
assert(params.useBands != null);
assert.strictEqual(params.eps, 0.05);
assert.strictEqual(params.targetProb, 0.99);
assert.strictEqual(params.compression, "aggressive");
assert.strictEqual(params.useBands, false);
})
})
describe('Serialization test', function () {
var batchSize = 1000;
var nbatches = 10;
var eps = .1;
var targetProb = 0.01;
var gkLow = new quants.BiasedGk({
eps: eps,
targetProb: targetProb
})
var gkHigh = new quants.BiasedGk({
eps: eps,
targetProb: 1 - targetProb
})
var isErrorInRangeLow = function (prob, quant) {
var p = Math.max(prob, targetProb);
var mxRelError = p*eps;
var mxError = Math.ceil(mxRelError*batchSize);
var error = Math.abs(quant - prob*batchSize);
return error <= mxError + 1e-9;
}
var isErrorInRangeHigh = function (prob, quant) {
var p = Math.max(1 - prob, 1 - targetProb);
var mxRelError = p*eps;
var mxError = Math.ceil(mxRelError*batchSize);
var error = Math.abs(quant - prob*batchSize);
return error <= mxError + 1e-9;
}
var targets = [];
for (var prob = 0; prob <= 1; prob += 0.001) {
targets.push(prob);
}
var vals = [];
for (var i = 0; i < batchSize; i++) {
vals.push(i);
}
for (var batchN = 0; batchN < nbatches; batchN++) {
// shuffle the array
for (var i = 0; i < batchSize; i++) {
var swapN = Math.floor(Math.random()*batchSize);
var temp = vals[i];
vals[i] = vals[swapN];
vals[swapN] = temp;
}
for (var i = 0; i < batchSize; i++) {
gkLow.insert(vals[i]);
gkHigh.insert(vals[i]);
}
var quantsLow = gkLow.quantile(targets);
var quantsHigh = gkHigh.quantile(targets);
for (var targetN = 0; targetN < targets.length; targetN++) {
var prob = targets[targetN];
var quantLow = gkLow.quantile(prob);
var quantHigh = gkHigh.quantile(prob);
assert.strictEqual(quantLow, quantsLow[targetN]);
assert.strictEqual(quantHigh, quantsHigh[targetN]);
assert(isErrorInRangeLow(prob, quantLow));
assert(isErrorInRangeHigh(prob, quantHigh));
}
}
gkLow.save(qm.fs.openWrite('gkLow-orig.dat')).close();
gkHigh.save(qm.fs.openWrite('gkHigh-orig.dat')).close();
var gkLow1 = new quants.BiasedGk(qm.fs.openRead('gkLow-orig.dat'));
var gkHigh1 = new quants.BiasedGk(qm.fs.openRead('gkHigh-orig.dat'));
for (var cumProb = 0; cumProb <= 1; cumProb += 0.001) {
var quantLow = gkLow1.quantile(cumProb);
var quantHigh = gkHigh1.quantile(cumProb);
assert(isErrorInRangeLow(cumProb, quantLow));
assert(isErrorInRangeHigh(cumProb, quantHigh));
}
var params = gkLow1.getParams();
assert(params.eps != null);
assert(params.targetProb != null);
assert(params.compression != null);
assert(params.useBands != null);
assert.strictEqual(params.eps, 0.1);
assert.strictEqual(params.targetProb, 0.01);
assert.strictEqual(params.compression, "periodic");
assert.strictEqual(params.useBands, true);
})
})