UNPKG

wink-statistics

Version:

Fast and Numerically Stable Statistical Analysis Utilities

140 lines (127 loc) 4.72 kB
// wink-statistics // Fast and Numerically Stable Statistical Analysis Utilities. // // Copyright (C) GRAYPE Systems Private Limited // // This file is part of “wink-statistics”. // // Permission is hereby granted, free of charge, to any person obtaining a // copy of this software and associated documentation files (the "Software"), // to deal in the Software without restriction, including without limitation // the rights to use, copy, modify, merge, publish, distribute, sublicense, // and/or sell copies of the Software, and to permit persons to whom the // Software is furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included // in all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER // DEALINGS IN THE SOFTWARE. // ## streaming // Load wink helpers for object to array conversion & sorting. var helpers = require( 'wink-helpers' ); var getValidFD = require( './get-valid-fd.js' ); // ### freqTable /** * * Frequency table is built incrementally with arrival of each value from the * stream of data. * * The [`build()`](https://winkjs.org/wink-statistics/Stream.html#build) requires * a single argument, which could be either a string or numeric value. * * The [`result()`](https://winkjs.org/wink-statistics/Stream.html#result) returns * an object containing the frequency `table` sorted in descending order of * category frequency, along with table `size`, `sum` of frequencies, * `x2` — chi-squared statistic, `df` — degree of freedom, and the * `entropy`. * * The `x2` along with the `df` can be used to test the hypothesis, "the distribution is uniform". The * `percentage` in `table` represents %age of a category share in the `sum`; and `expected` count * assuming uniform distribution. * * @memberof streaming# * @return {Stream} Object containing methods such as `build()`, `result()` & `reset()`. * @example * var ft = freqTable(); * ft.build( 'Tea' ); * ft.build( 'Tea' ); * ft.build( 'Tea' ); * ft.build( 'Pepsi' ); * ft.build( 'Pepsi' ); * ft.build( 'Gin' ); * ft.build( 'Coke' ); * ft.build( 'Coke' ); * ft.value(); * // returns { Tea: 3, Pepsi: 2, Gin: 1, Coke: 2 } * ft.result(); * // returns { * // table: [ * // { category: 'Tea', observed: 3, percentage: 37.5, expected: 2 }, * // { category: 'Pepsi', observed: 2, percentage: 25, expected: 2 }, * // { category: 'Coke', observed: 2, percentage: 25, expected: 2 }, * // { category: 'Gin', observed: 1, percentage: 12.5, expected: 2 } * // ], * // size: 4, * // sum: 8, * // x2: 1, * // df: 3, * // entropy: 1.9056 * // } */ var freqTable = function () { var obj = Object.create( null ); var methods = Object.create( null ); var sum = 0; methods.build = function ( x ) { obj[ x ] = 1 + ( obj[ x ] || 0 ); sum += 1; return undefined; }; // compute() methods.value = function () { return obj; }; // value() methods.result = function ( fractionDigits ) { var fd = getValidFD( fractionDigits ); var t = helpers.object.table( obj ); var imax = t.length; var table = new Array( imax ); var expectedVal = sum / imax; var x2 = 0; var entropy = 0; var p; var diff; var ft = Object.create( null ); t.sort( helpers.array.descendingOnValue ); for ( var i = 0; i < imax; i += 1 ) { table[ i ] = Object.create( null ); table[ i ].category = t[ i ][ 0 ]; table[ i ].observed = t[ i ][ 1 ]; p = t[ i ][ 1 ] / sum; table[ i ].percentage = +( p * 100 ).toFixed( fd ); table[ i ].expected = +expectedVal.toFixed( fd ); diff = ( t[ i ][ 1 ] - expectedVal ); x2 += ( diff * ( diff / expectedVal ) ); entropy += -p * Math.log2( p ); } ft.table = table; ft.size = imax; ft.sum = sum; ft.x2 = +x2.toFixed( fd ); ft.df = ( imax - 1 ); ft.entropy = +entropy.toFixed( fd ); return ft; }; // result() methods.reset = function () { obj = Object.create( null ); sum = 0; }; // reset() methods.compute = methods.build; return methods; }; // freqTable() module.exports = freqTable;