qminer
Version:
A C++ based data analytics platform for processing large-scale real-time streams containing structured and unstructured data
559 lines (546 loc) • 23 kB
HTML
<html>
<head>
<meta name="generator" content="JSDoc 3">
<meta charset="utf-8">
<title>Class: Gk</title>
<link rel="stylesheet" href="https://brick.a.ssl.fastly.net/Karla:400,400i,700,700i" type="text/css">
<link rel="stylesheet" href="https://brick.a.ssl.fastly.net/Noto+Serif:400,400i,700,700i" type="text/css">
<link rel="stylesheet" href="https://brick.a.ssl.fastly.net/Inconsolata:500" type="text/css">
<link href="css/baseline.css" rel="stylesheet">
</head>
<body onload="prettyPrint()">
<nav id="jsdoc-navbar" role="navigation" class="jsdoc-navbar">
<div id="jsdoc-navbar-container">
<div id="jsdoc-navbar-content">
<a href="index.html" class="jsdoc-navbar-package-name">QMiner JavaScript API v9.4.0</a>
</div>
</div>
</nav>
<div id="jsdoc-body-container">
<div id="jsdoc-content">
<div id="jsdoc-content-container">
<div id="jsdoc-main" role="main">
<header class="page-header">
<div class="symbol-detail-labels"><span class="label label-kind">class</span> <span class="label label-static">static</span></div>
<h1><small><a href="module-analytics.html">analytics</a>.<wbr></small><span class="symbol-name">Gk</span></h1>
<p class="source-link">Source: <a href="analyticsdoc.js.html#source-line-2491">analyticsdoc.<wbr>js:2491</a></p>
<div class="symbol-classdesc">
<p>Greenwald - Khanna algorithm for online quantile estimation. Given
a comulative probability p, the algorithm returns the approximate value of
the p-th quantile.</p>
<p>The algorithm works by keeping a summary of buckets, each summarizing a
range of values. Through the run of the algorithm new buckets are created
and periodically merged if possible.</p>
<p>It is was first explained in:
"Space-Efficient Online Computation of Quantile Summaries"
http://infolab.stanford.edu/~datar/courses/cs361a/papers/quantiles.pdf</p>
<p>The error is bounded by the rank of the output element (not by the absolute value).
Specifically, the worst case error in rank is bounded by eps*n, where n is the
number of elements in the summary.</p>
</div>
<dl class="dl-compact">
</dl>
</header>
<section id="summary">
<div class="summary-callout">
<h2 class="summary-callout-heading">Properties</h2>
<div class="summary-content">
<div class="summary-column">
<dl class="dl-summary-callout">
<dt><a href="module-analytics.Gk.html#.init">init</a></dt>
<dd>
</dd>
<dt><a href="module-analytics.Gk.html#.memory">memory</a></dt>
<dd>
</dd>
</dl>
</div>
<div class="summary-column">
<dl class="dl-summary-callout">
<dt><a href="module-analytics.Gk.html#.samples">samples</a></dt>
<dd>
</dd>
<dt><a href="module-analytics.Gk.html#.size">size</a></dt>
<dd>
</dd>
</dl>
</div>
<div class="summary-column">
</div>
</div>
</div>
<div class="summary-callout">
<h2 class="summary-callout-heading">Methods</h2>
<div class="summary-content">
<div class="summary-column">
<dl class="dl-summary-callout">
<dt><a href="module-analytics.Gk.html#.compress">compress(val)</a></dt>
<dd>
</dd>
<dt><a href="module-analytics.Gk.html#.compress">compress()</a></dt>
<dd>
</dd>
<dt><a href="module-analytics.Gk.html#.kolmogorovStat">kolmogorovStat(distribution)</a></dt>
<dd>
</dd>
</dl>
</div>
<div class="summary-column">
<dl class="dl-summary-callout">
<dt><a href="module-analytics.Gk.html#.kolmogorovTest">kolmogorovTest(distribution, alpha)</a></dt>
<dd>
</dd>
<dt><a href="module-analytics.Gk.html#.save">save(fout)</a></dt>
<dd>
</dd>
<dt><a href="module-analytics.Gk.html#cdf">cdf(vals)</a></dt>
<dd>
</dd>
</dl>
</div>
<div class="summary-column">
<dl class="dl-summary-callout">
<dt><a href="module-analytics.Gk.html#getParams">getParams()</a></dt>
<dd>
</dd>
<dt><a href="module-analytics.Gk.html#quantile">quantile(pVals)</a></dt>
<dd>
</dd>
</dl>
</div>
</div>
</div>
</section>
<section>
<h2 id="Gk">new <span class="symbol-name">Gk</span><span class="signature"><span class="signature-params">([arg])</span></span></h2>
<section>
<h3>
Example
</h3>
<div>
<pre class="prettyprint"><code>// import modules
var qm = require('qminer');
var fs = require('qminer').fs;
var quants = qm.analytics.quantiles;
// create the Gk object
var gk = new quants.Gk({
eps: 0.001,
autoCompress: true
});
// create the data used for calculating quantiles
var inputs = [10, 1, 2, 8, 9, 5, 6, 4, 7, 3];
// fit the model
for (var i = 0; i < inputs.length; i++) {
gk.insert(inputs[i]);
}
// make the estimation for the 0.1 quantile
var quant = gk.quantile(0.1);
// save the model
gk.save(fs.openWrite('gk.bin')).close();
// open the gk model under a new variable
var gk2 = new quants.Gk(fs.openRead('gk.bin'));</code></pre>
</div>
</section>
<section>
<h3>Parameter</h3>
<table class="jsdoc-details-table">
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Optional</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr>
<td>
<p>arg</p>
</td>
<td>
<p>(module:analytics.quantiles~GkParam or <a href="module-fs.FIn.html">module:fs.FIn</a>)</p>
</td>
<td>
<p>Yes</p>
</td>
<td>
<p>Construction arguments. There are two ways of constructing:
<br>1. Using the module:analytics.quantiles~GkParam object,
<br>2. using the file input stream <a href="module-fs.FIn.html">module:fs.FIn</a>.
</p>
</td>
</tr>
</tbody>
</table>
</section>
<dl class="dl-compact">
</dl>
</section>
<section>
<h2>Properties</h2>
<section>
<div class="symbol-detail-labels"><span class="label label-static">static</span></div>
<h3 id=".init"><span class="symbol-name">init</span></h3>
<p>Returns the current size of the algorithms summary in number of tuples.</p>
<dl class="dl-compact">
</dl>
<div class="symbol-detail-labels"><span class="label label-static">static</span></div>
<h3 id=".memory"><span class="symbol-name">memory</span></h3>
<p>Returns the models current memory consumption.</p>
<dl class="dl-compact">
</dl>
<div class="symbol-detail-labels"><span class="label label-static">static</span></div>
<h3 id=".samples"><span class="symbol-name">samples</span></h3>
<p>Returns the number of samples seen by the model.</p>
<dl class="dl-compact">
</dl>
<div class="symbol-detail-labels"><span class="label label-static">static</span></div>
<h3 id=".size"><span class="symbol-name">size</span></h3>
<p>Returns the current size of the algorithms summary in number of tuples.</p>
<dl class="dl-compact">
</dl>
</section>
<h2>Methods</h2>
<section>
<div class="symbol-detail-labels"><span class="label label-static">static</span></div>
<h3 id=".compress"><span class="symbol-name">compress</span><span class="signature"><span class="signature-params">(val)</span> → <span class="signature-returns"> module:analytics.quantiles.Gk</span></span></h3>
<p>Adds a new value to the summary.</p>
<section>
<h4>
Example
</h4>
<div>
<pre class="prettyprint"><code>var qm = require('qminer');
var gk = new qm.analytics.quantiles.Gk();
gk.insert(1.0);
gk.insert(2.0);</code></pre>
</div>
</section>
<section>
<h4>Parameter</h4>
<table class="jsdoc-details-table">
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Optional</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr>
<td>
<p>val</p>
</td>
<td>
<p>number</p>
</td>
<td>
<p> </p>
</td>
<td>
<p>the value</p>
</td>
</tr>
</tbody>
</table>
</section>
<dl class="dl-compact">
<dt>Returns</dt>
<dd>
<p><code>module:analytics.quantiles.Gk</code>B reference to self</p>
</dd>
</dl>
<div class="symbol-detail-labels"><span class="label label-static">static</span></div>
<h3 id=".compress"><span class="symbol-name">compress</span><span class="signature"><span class="signature-params">()</span></span></h3>
<p>Manually runs the compression procedure.</p>
<dl class="dl-compact">
<dt>Returns</dt>
<dd>
<p>reference to self</p>
</dd>
</dl>
<div class="symbol-detail-labels"><span class="label label-static">static</span></div>
<h3 id=".kolmogorovStat"><span class="symbol-name">kolmogorovStat</span><span class="signature"><span class="signature-params">(distribution)</span> → <span class="signature-returns"> number</span></span></h3>
<p>Compares this distribution to <code>dist</code> and returns the Kolmogorov-Smirnov
statistic:</p>
<p>D_n,m = sup_x|f1(x) - f2(x)|</p>
<p>where f1 and f2 are cumulative distribution function of this distribution and
<code>dist</code> respectively.
</p>
<section>
<h4>Parameter</h4>
<table class="jsdoc-details-table">
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Optional</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr>
<td>
<p>distribution</p>
</td>
<td>
<p>module:analytics.quantiles.quantiles.Gk</p>
</td>
<td>
<p> </p>
</td>
<td>
<p>the distribution to compare against</p>
</td>
</tr>
</tbody>
</table>
</section>
<dl class="dl-compact">
<dt>Returns</dt>
<dd>
<p><code>number</code>B - the K-S statistic</p>
</dd>
</dl>
<div class="symbol-detail-labels"><span class="label label-static">static</span></div>
<h3 id=".kolmogorovTest"><span class="symbol-name">kolmogorovTest</span><span class="signature"><span class="signature-params">(distribution, alpha)</span> → <span class="signature-returns"> boolean</span></span></h3>
<p>Compares this distribution to <code>dist</code> using the Kolmogorov-Smirnov test with
significance <code>alpha</code>.</p>
<section>
<h4>Parameters</h4>
<table class="jsdoc-details-table">
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Optional</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr>
<td>
<p>distribution</p>
</td>
<td>
<p>module:analytics.quantiles.quantiles.Gk</p>
</td>
<td>
<p> </p>
</td>
<td>
<p>the distribution to compare against</p>
</td>
</tr>
<tr>
<td>
<p>alpha</p>
</td>
<td>
<p>number</p>
</td>
<td>
<p> </p>
</td>
<td>
<p>the statistical significance</p>
</td>
</tr>
</tbody>
</table>
</section>
<dl class="dl-compact">
<dt>Returns</dt>
<dd>
<p><code>boolean</code>B - true if the distributions differ</p>
</dd>
</dl>
<div class="symbol-detail-labels"><span class="label label-static">static</span></div>
<h3 id=".save"><span class="symbol-name">save</span><span class="signature"><span class="signature-params">(fout)</span> → <span class="signature-returns"> <a href="module-fs.FOut.html">module:fs.FOut</a></span></span></h3>
<p>Saves the objects state into the output stream.</p>
<section>
<h4>Parameter</h4>
<table class="jsdoc-details-table">
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Optional</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr>
<td>
<p>fout</p>
</td>
<td>
<p><a href="module-fs.FOut.html">module:fs.FOut</a></p>
</td>
<td>
<p> </p>
</td>
<td>
<p>the output stream</p>
</td>
</tr>
</tbody>
</table>
</section>
<dl class="dl-compact">
<dt>Returns</dt>
<dd>
<p><code><a href="module-fs.FOut.html">module:fs.FOut</a></code>B - the output stream</p>
</dd>
</dl>
<h3 id="cdf"><span class="symbol-name">cdf</span><span class="signature"><span class="signature-params">(vals)</span> → <span class="signature-returns"> (number or Array)</span></span></h3>
<p>Provided a given value or array of values it returns the corresponding
values of the cumulative distribution function.</p>
<section>
<h4>
Example
</h4>
<div>
<pre class="prettyprint"><code>var qm = require('qminer');
var gk = new qm.analytics.quantiles.Gk({
eps: 0.1
});
gk.insert(1.0);
gk.insert(2.0);
gk.insert(1.0);
gk.insert(3.0);
gk.insert(2.0);
console.log(gk.cdf(0)); // prints the CDF for x = 0
console.log(gk.cdf(2)); // prints the CDF for x = 2
console.log(gk.cdf(4)); // prints the CDF for x = 4</code></pre>
</div>
</section>
<section>
<h4>Parameter</h4>
<table class="jsdoc-details-table">
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Optional</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr>
<td>
<p>vals</p>
</td>
<td>
<p>(number or Array)</p>
</td>
<td>
<p> </p>
</td>
<td>
<p>the values which we a querying (quantiles)</p>
</td>
</tr>
</tbody>
</table>
</section>
<dl class="dl-compact">
<dt>Returns</dt>
<dd>
<p><code>(number or Array)</code>B pVals - depending whether the input was a single value or array the method returns a probability or array of probabilities</p>
</dd>
</dl>
<h3 id="getParams"><span class="symbol-name">getParams</span><span class="signature"><span class="signature-params">()</span> → <span class="signature-returns"> module:analytics.quantiles~GkParam</span></span></h3>
<p>Returns the models' parameters as a JavaScript object (JSON). These parameters
are the same as are set through the constructor.</p>
<dl class="dl-compact">
<dt>Returns</dt>
<dd>
<p><code>module:analytics.quantiles~GkParam</code>B The construction parameters.</p>
<p>var analytics = qm.analytics;
var gk = new analytics.quantiles.Gk();
var params = gk.getParams();</p>
<p>console.log(params.eps);
console.log(params.autoCompress);</p>
</dd>
</dl>
<h3 id="quantile"><span class="symbol-name">quantile</span><span class="signature"><span class="signature-params">(pVals)</span> → <span class="signature-returns"> (number or Array)</span></span></h3>
<p>Given an input cumulative probability, returns a quantile associated with that
probability (e.g. for input 0.5 it will return the median).</p>
<section>
<h4>
Example
</h4>
<div>
<pre class="prettyprint"><code>var qm = require('qminer');
var gk = new qm.analytics.quantiles.Gk({
eps: 0.1
});
gk.insert(1.0);
gk.insert(2.0);
gk.insert(1.0);
gk.insert(3.0);
gk.insert(2.0);
console.log(gk.quantile(0.01)); // prints the first percentile
console.log(gk.quantile(0.25)); // prints the first quartile
console.log(gk.quantile(0.5)); // prints the median</code></pre>
</div>
</section>
<section>
<h4>Parameter</h4>
<table class="jsdoc-details-table">
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Optional</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr>
<td>
<p>pVals</p>
</td>
<td>
<p>(number or Array)</p>
</td>
<td>
<p> </p>
</td>
<td>
<p>the p-values which we a querying</p>
</td>
</tr>
</tbody>
</table>
</section>
<dl class="dl-compact">
<dt>Returns</dt>
<dd>
<p><code>(number or Array)</code>B quantiles - depending whether the input was a single value or array the method returns a quantile or array of quantiles</p>
</dd>
</dl>
</section>
</section>
</div>
</div>
<nav id="jsdoc-toc-nav" role="navigation"></nav>
</div>
</div>
<footer id="jsdoc-footer" class="jsdoc-footer">
<div id="jsdoc-footer-container">
<p>
</p>
</div>
</footer>
<script src="scripts/jquery.min.js"></script>
<script src="scripts/tree.jquery.js"></script>
<script src="scripts/prettify.js"></script>
<script src="scripts/jsdoc-toc.js"></script>
<script src="scripts/linenumber.js"></script>
<script src="scripts/scrollanchor.js"></script>
</body>
</html>