xtabs
Version:
A cross tabulation library for Node.js
439 lines (386 loc) • 12.5 kB
JavaScript
//
// Factor related code.
//
var unique = function(x) {
var set = {}, hasNull = false;
for (var i = 0; i < x.length; i++) {
if (x[i] === null) hasNull = true;
else if (set[x[i]] == null) set[x[i]] = true;
}
return hasNull ? Object.keys(set).concat(null) : Object.keys(set);
};
var match = function(x, table, useNull) {
// Construct map.
var map = {};
for (var i = 0; i < table.length; i++) {
if (table[i] !== null) map[table[i]] = i;
}
// Construct results.
var res = new Array(x.length);
var nullValue = (useNull ? table.length - 1 : null);
for (var i = 0; i < x.length; i++) {
res[i] = (x[i] !== null ? map[x[i]] : nullValue);
}
return res;
};
var isFactor = function(o) {
return (Array.isArray(o) && o.levels != null);
};
var factor = function(x, useNull) {
if (isFactor(x)) {
if (useNull && x.levels[x.levels.length - 1] !== null) {
var tmp = x.slice(0, x.length);
tmp.levels = x.levels;
x = tmp;
x.levels.push(null);
for (var i = 0; i < x.length; i++) {
if (x[i] === null) x[i] = x.levels.length - 1;
}
} else if (!useNull && x.levels[x.levels.length - 1] === null) {
var tmp = x.slice(0, x.length);
tmp.levels = x.levels;
x = tmp;
for (var i = 0; i < x.length; i++) {
if (x[i] === x.levels.length - 1) x[i] = null;
}
x.levels.length--;
}
return x;
}
x = Array.isArray(x) ? x : [x];
var levels = unique(x);
if (!useNull && levels[levels.length - 1] === null) levels.length--;
var f = match(x, levels, useNull);
f.levels = levels;
return f;
};
var asString = function(f) {
if (!isFactor(f)) return String(f);
// Construct map.
var map = {};
for (var i = 0; i < f.levels.length; i++) {
map[i] = f.levels[i];
}
// Construct results.
var res = new Array(f.length);
for (var i = 0; i < f.length; i++) {
res[i] = (f[i] !== null ? map[f[i]] : null);
}
return res;
};
exports.factor = factor;
exports.isFactor = isFactor;
exports.asString = asString;
//
// Cross tabulation related code.
//
var Table = function(dim, dimnames, array) {
this.dim = dim;
this.dimnames = dimnames;
this.array = array;
// Indices for level names
this._levelIndices = new Array(this.dim.length);
this._levelNullsIndex = new Array(this.dim.length);
for (var i = 0; i < this.dim.length; i++) {
this._levelIndices[i] = {};
this._levelNullsIndex[i] = undefined;
for (var j = 0; j < this.dimnames[i].names.length; j++) {
if (this.dimnames[i].names[j] === null) {
this._levelNullsIndex[i] = j;
} else {
this._levelIndices[i][this.dimnames[i].names[j]] = j;
}
}
}
// Used for calculate offset of indices.
this._multipliers = new Array(this.dim.length);
this._multipliers[this._multipliers.length - 1] = 1;
for (var i = this._multipliers.length - 2; i >= 0; i--) {
this._multipliers[i] = this._multipliers[i + 1] * this.dim[i + 1];
}
Object.defineProperties(this, {
_levelIndices: {enumerable: false},
_levelNullsIndex: {enumerable: false},
_multipliers: {enumerable: false}
});
};
Table.prototype._offset = function(ind) {
var val = 0;
for (var i = 0; i < ind.length; i++) {
val += ind[i] * this._multipliers[i];
}
return val;
};
Object.defineProperty(Table.prototype, "_offset", {enumerable: false});
Table.prototype.get = function() {
var indices = Array.prototype.slice.call(arguments);
if (indices.length === 0) return this;
// Normalize indices.
for (var i = 0; i < this.dim.length; i++) {
if (Array.isArray(indices[i])) {
for (var j = 0; j < indices[i].length; j++) {
if (typeof indices[i][j] !== "number") {
// Normalize index to number.
var ind = (indices[i][j] === null ? this._levelNullsIndex[i] : this._levelIndices[i][indices[i][j]]);
if (ind === undefined)
throw new Error("Variable '" + indices[i][j] + "' does not exist.");
indices[i][j] = ind;
}
}
} else if (indices[i] === undefined) {
// Normalize undefined to the respective dim indices.
indices[i] = new Array(this.dim[i]);
for (var j = 0; j < indices[i].length; j++) {
indices[i][j] = j;
}
} else if (typeof indices[i] !== "number") {
// Normalize index to number.
var ind = (indices[i] === null ? this._levelNullsIndex[i] : this._levelIndices[i][indices[i]]);
if (ind === undefined)
throw new Error("Variable '" + indices[i] + "' does not exist.");
indices[i] = [ind];
} else {
indices[i] = [indices[i]];
}
}
if (indices.every(function(ind) { return ind.length === 1 })) {
// Retrieve a singular data.
return this.array[this._offset(indices)];
} else {
// Retrieve as new Table instance.
var dim = indices
.map(function(ind) { return ind.length })
.filter(function(len) { return len > 1 });
var dimnames = this.dimnames
.map(function(d, i) {
return {
dim: d.dim,
names: indices[i].map(function(ind) { return d.names[ind] })
};
})
.filter(function(d, i) { return indices[i].length > 1 });
var array = new Array(dim.reduce(function(x, y) { return x * y }));
var nextIndex = (function() {
// Index initialization.
var pos = new Array(indices.length);
for (var i = 0; i < pos.length; i++) {
pos[i] = 0;
}
return function(index) {
var p, ind;
// Set indices according pos position.
for (var i = 0; i < index.length; i++) {
index[i] = indices[i][pos[i]];
}
// Increase position.
for (var i = pos.length - 1; i >= 0; i--) {
p = pos[i];
ind = indices[i];
if (p + 1 < ind.length) {
pos[i] = p + 1;
break;
} else {
pos[i] = 0;
}
}
return index;
};
}());
var ind = new Array(indices.length);
for (var i = 0; i < array.length; i++) {
ind = nextIndex(ind);
array[i] = this.array[this._offset(ind)];
}
return Object.freeze(new Table(dim, dimnames, array));
}
};
var table = function(x) {
var useNull, varNames, vars;
if (typeof arguments[arguments.length - 1] === "boolean") {
useNull = arguments[arguments.length - 1];
arguments.length--;
}
if (arguments.length <= 1) {
varNames = [""];
vars = [factor(x, useNull)];
} else {
varNames = Array.prototype.slice.call(arguments, 1, arguments.length);
vars = new Array(varNames.length);
for (var i = 0; i < varNames.length; i++) {
vars[i] = x[varNames[i]];
}
if (!vars.every(function(v) { return v.length === vars[0].length }))
throw new Error("variables must have the same length.");
for (var i = 0; i < varNames.length; i++) {
vars[i] = factor(vars[i], useNull);
}
}
var dimnames = new Array(vars.length);
var dim = new Array(vars.length);
var index = new Array(vars.length);
// Assign dimnames and dim according to factor levels.
for (var i = 0; i < vars.length; i++) {
dimnames[i] = {
dim: varNames[i],
names: vars[i].levels.slice(0)
};
dim[i] = vars[i].levels.length;
}
var array = new Array(dim.reduce(function(x, y) { return x * y }));
// Initialize all counts to zero.
for (var i = 0; i < array.length; i++) {
array[i] = 0;
}
var t = new Table(dim, dimnames, array);
// Counts
next: for (var i = 0; i < vars[0].length; i++) {
for (var j = 0; j < vars.length; j++) {
if (vars[j][i] == null) continue next;
index[j] = vars[j][i];
}
array[t._offset(index)]++;
}
return Object.freeze(t);
};
exports.table = table;
//
// Functions on Table
//
var addMargins = function(table, margins, fun) {
if (!Array.isArray(margins)) margins = [margins];
var funIsArray = Array.isArray(fun);
if (funIsArray && margins.length !== fun.length)
throw new Error("Functions' number does not match margins' number.");
// Clone dim and dimnames.
var dim = table.dim.slice(0);
var dimnames = table.dimnames.map(function(d) {
return {
dim: d.dim,
names: d.names.slice(0)
};
});
for (var i = 0; i < margins.length; i++) {
if (margins[i] < dim.length) {
if (funIsArray) {
dim[margins[i]] += fun[i].length;
for (var j = 0; j < fun[i].length; j++) {
dimnames[margins[i]].names.push(fun[i][j].n);
}
} else {
dim[margins[i]]++;
dimnames[margins[i]].names.push(fun.n);
}
}
}
var array = new Array(dim.reduce(function(x, y) { return x * y }));
var arrayIndex = 0;
var blockSizes = table.dim.map(function(d, i) {
return table.dim.slice(i).reduce(function(x, y) { return x * y });
});
var blockSizeDiffs = dim.map(function(d, i) {
return (d - table.dim[i]) * (
i + 1 < dim.length ?
dim.slice(i + 1).reduce(function(x, y) { return x * y }) :
1);
});
// Fill in with data first, leaving the necessary spaces to
// be filled later.
for (var i = 0; i < table.array.length; i++, arrayIndex++) {
for (var j = 0; j < blockSizes.length; j++) {
if (i >= blockSizes[j] && i % blockSizes[j] === 0) {
arrayIndex += blockSizeDiffs[j];
break;
}
}
array[arrayIndex] = table.array[i];
}
var currentDim = table.dim.slice(0);
var newBlockSizes = dim.map(function(d, i) {
return dim.slice(i).reduce(function(x, y) { return x * y });
});
var addMargin = function(margin, funs) {
var groups = 1;
for (var i = margin - 1; i >= 0; i--) { groups *= currentDim[i]; }
var currentBlockLength = (margin + 1 < currentDim.length ? currentDim[margin + 1] : 1);
for (var i = margin + 2; i < currentDim.length; i++) {
currentBlockLength *= currentDim[i];
}
var blockLength = (margin + 1 < dim.length ? dim[margin + 1] : 1);
for (var i = margin + 2; i < dim.length; i++) {
blockLength *= dim[i];
}
var a = new Array(currentDim[margin]);
var addCalculation = function(group) {
for (var i = 0; i < currentBlockLength; i++) {
for (var j = 0; j < a.length; j++) {
a[j] = array[group * newBlockSizes[margin] + j * blockLength + i];
}
for (var j = 0; j < funs.length; j++) {
array[group * newBlockSizes[margin] + (currentDim[margin] + j) * blockLength + i] = funs[j].f(a);
}
}
};
for (var i = 0; i < groups; i++) {
addCalculation(i);
}
};
var singleFunList;
if (!funIsArray) singleFunList = [fun];
for (var i = 0; i < margins.length; i++) {
if (funIsArray) {
addMargin(margins[i], fun[i]);
currentDim[margins[i]] += fun[i].length;
} else {
addMargin(margins[i], singleFunList);
currentDim[margins[i]]++;
}
}
return Object.freeze(new Table(dim, dimnames, array));
};
var prop = function(table, margin) {
var array = new Array(table.array.length);
if (margin == null) {
var total = 0;
for (var i = 0; i < table.array.length; i++) {
total += table.array[i];
}
for (var i = 0; i < array.length; i++) {
array[i] = table.array[i] / total;
}
} else {
var blockSize = margin + 1 < table.dim.length ? table.dim[margin + 1] : 1;
for (var i = margin + 2; i < table.dim.length; i++) {
blockSize *= table.dim[i];
}
var numberOfBlocks = margin - 1 >= 0 ? table.dim[margin - 1] : 1;
for (var i = margin - 2; i >= 0; i--) {
numberOfBlocks *= table.dim[i];
}
var jumpSize = 1;
for (var i = margin; i < table.dim.length; i++) {
jumpSize *= table.dim[i];
}
debugger;
var calcGroup = function(group) {
var sum = 0;
for (var i = 0; i < numberOfBlocks; i++) {
for (var j = 0; j < blockSize; j++) {
var pos = group * blockSize + i * jumpSize + j;
sum += table.array[pos];
}
}
for (var i = 0; i < numberOfBlocks; i++) {
for (var j = 0; j < blockSize; j++) {
var pos = group * blockSize + i * jumpSize + j;
array[pos] = table.array[pos] / sum;
}
}
};
for (var i = 0; i < table.dim[margin]; i++) {
calcGroup(i);
}
}
return Object.freeze(new Table(table.dim, table.dimnames, array));
};
exports.addMargins = addMargins;
exports.prop = prop;