jsdataframe
Version:
a data frame library inspired by R and Python Pandas
1,859 lines (1,556 loc) • 132 kB
JavaScript
// UMD boilerplate from https://github.com/umdjs/umd - "commonjsStrict.js" template
;(function (root, factory) {
if (typeof define === 'function' && define.amd) {
// AMD. Register as an anonymous module.
define(['exports'], factory);
} else if (typeof exports === 'object' && typeof exports.nodeName !== 'string') {
// CommonJS
factory(exports);
} else {
// Browser globals
factory((root.jsdataframe = {}));
}
}(this, function (exports) {
"use strict";
var jd = exports;
jd.version = '0.2.0';
/*-----------------------------------------------------------------------------
* Polyfills
*/
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Number/isNaN
Number.isNaN = Number.isNaN || function(value) {
return value !== value;
};
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Number/isInteger
Number.isInteger = Number.isInteger || function(value) {
return typeof value === "number" &&
isFinite(value) &&
Math.floor(value) === value;
};
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/findIndex
if (!Array.prototype.findIndex) {
Array.prototype.findIndex = function(predicate) {
if (this === null) {
throw new TypeError('Array.prototype.findIndex called on null or undefined');
}
if (typeof predicate !== 'function') {
throw new TypeError('predicate must be a function');
}
var list = Object(this);
var length = list.length >>> 0;
var thisArg = arguments[1];
var value;
for (var i = 0; i < length; i++) {
value = list[i];
if (predicate.call(thisArg, value, i, list)) {
return i;
}
}
return -1;
};
}
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/includes
if (!String.prototype.includes) {
String.prototype.includes = function(search, start) {
//'use strict';
if (typeof start !== 'number') {
start = 0;
}
if (start + search.length > this.length) {
return false;
} else {
return this.indexOf(search, start) !== -1;
}
};
}
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/startsWith
if (!String.prototype.startsWith) {
String.prototype.startsWith = function(searchString, position){
position = position || 0;
return this.substr(position, searchString.length) === searchString;
};
}
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/endsWith
if (!String.prototype.endsWith) {
String.prototype.endsWith = function(searchString, position) {
var subjectString = this.toString();
if (typeof position !== 'number' || !isFinite(position) || Math.floor(position) !== position || position > subjectString.length) {
position = subjectString.length;
}
position -= searchString.length;
var lastIndex = subjectString.indexOf(searchString, position);
return lastIndex !== -1 && lastIndex === position;
};
}
/*=============================================================================
##### ###### #### # ## ##### ## ##### # #### # # ####
# # # # # # # # # # # # # # # # ## # #
# # ##### # # # # # # # # # # # # # # # ####
# # # # # ###### ##### ###### # # # # # # # #
# # # # # # # # # # # # # # # # # ## # #
##### ###### #### ###### # # # # # # # # #### # # ####
*/
/*-----------------------------------------------------------------------------
* Define prototypes (instead of constructor functions)
*/
var vectorProto = {};
vectorProto.type = 'jsdataframe.Vector';
var numVecProto = Object.create(vectorProto);
var boolVecProto = Object.create(vectorProto);
var strVecProto = Object.create(vectorProto);
var dateVecProto = Object.create(vectorProto);
var dfProto = {};
dfProto.type = 'jsdataframe.DataFrame';
// Supporting types
var rangeProto = {};
rangeProto.type = 'jsdataframe.Range';
var byDtypeProto = {};
byDtypeProto.type = 'jsdataframe.ByDtype';
var exclusionProto = {};
exclusionProto.type = 'jsdataframe.Exclusion';
// Private helper types
var abstractIndexProto = {};
abstractIndexProto.type = 'jsdataframe.AbstractIndex';
var nestedIndexProto = Object.create(abstractIndexProto);
nestedIndexProto.type = 'jsdataframe.NestedIndex';
/*-----------------------------------------------------------------------------
* Constants
*/
var VALID_DTYPES = Object.create(null);
VALID_DTYPES.number = true;
VALID_DTYPES.boolean = true;
VALID_DTYPES.string = true;
VALID_DTYPES.date = true;
VALID_DTYPES.object = true;
var NA_VALUE = {
number: NaN,
boolean: null,
string: null,
date: null,
object: null
};
var PROTO_MAP = {
number: numVecProto,
boolean: boolVecProto,
string: strVecProto,
date: dateVecProto,
object: vectorProto
};
var COERCE_FUNC = {
number: coerceToNum,
boolean: coerceToBool,
string: coerceToStr,
date: coerceToDate,
object: function(x) { return x; }
};
// Private exports for testing purposes
jd._private_export = {};
/*=============================================================================
#### ##### ## ##### # #### ###### # # # # #### ####
# # # # # # # # # # # ## # # # #
#### # # # # # # ##### # # # # # # ####
# # ###### # # # # # # # # # # #
# # # # # # # # # # # # # ## # # # #
#### # # # # # #### # #### # # #### ####
*/
/*-----------------------------------------------------------------------------
* Vector Creation
*/
jd.vector = function(array, dtype, copyArray) {
if (!Array.isArray(array)) {
throw new Error('"array" argument must be an Array');
}
if (isUndefined(copyArray) || copyArray) {
array = array.slice();
}
if (isUndefined(dtype)) {
dtype = null;
}
return (dtype === null) ?
inferVectorDtype(array) :
enforceVectorDtype(array, dtype);
};
jd.seq = function(start, stop, step, includeStop) {
if (arguments.length === 1) {
if (!isNumber(start)) {
throw new Error('both "start" and "stop" arguments must be ' +
'specified for non-numeric sequences');
}
stop = start;
start = 0;
} else if (inferDtype(start) !== inferDtype(stop)) {
throw new Error('"start" and "stop" must have the same dtype');
}
step = isUndefined(step) ? 1 : step;
includeStop = isUndefined(includeStop) ? false : includeStop;
// Handle character sequence case
if (isString(start)) {
if (start.length !== 1 || stop.length !== 1) {
throw new Error('both "start" and "stop" must be single characters ' +
'for character sequences');
}
var charCodeSeq = jd.seq(start.charCodeAt(0), stop.charCodeAt(0),
step, includeStop);
return charCodeSeq.map(charCodeToStr);
}
// Validate step sign
if (step === 0) {
throw new Error('"step" must be nonzero');
}
if (start < stop && step < 0) {
throw new Error('"step" must be positive when start < stop');
}
if (start > stop && step > 0) {
throw new Error('"step" must be negative when start > stop');
}
// Generate sequence
var array = [];
var curr = start;
while (
step > 0 ?
(includeStop ? curr <= stop : curr < stop) :
(includeStop ? curr >= stop : curr > stop)
) {
array.push(curr);
curr += step;
}
return newVector(array, 'number');
};
function charCodeToStr(charCode) {
return String.fromCharCode(charCode);
}
jd.seqOut = function(start, lengthOut, step) {
if (arguments.length < 3) {
step = 1;
}
// Validate arguments
step = +step;
validateNonnegInt(lengthOut, 'lengthOut');
// Handle character sequence case
if (isString(start)) {
if (start.length !== 1) {
throw new Error('"start" must be a single character ' +
'for character sequences');
}
var charCodeSeq = jd.seqOut(start.charCodeAt(0), lengthOut, step);
return charCodeSeq.map(charCodeToStr);
}
// Generate sequence
var array = allocArray(lengthOut);
var curr = start;
for (var i = 0; i < lengthOut; i++) {
array[i] = curr;
curr += step;
}
return newVector(array, 'number');
};
jd.linspace = function(start, stop, length) {
// Validate arguments
start = +start;
stop = +stop;
validateNonnegInt(length, 'length');
// Generate sequence
var array = allocArray(length);
var step = (length === 1) ? 0 : (stop - start) / (length - 1);
for (var i = 0; i < length; i++) {
array[i] = start + i * step;
}
return newVector(array, 'number');
};
jd.rep = function(values, times) {
validateNonnegInt(times, 'times');
values = ensureVector(values);
var inputArr = values.values;
var inputLength = inputArr.length;
var outputArr = allocArray(inputLength * times);
for (var repInd = 0; repInd < times; repInd++) {
var offset = repInd * inputLength;
for (var inputInd = 0; inputInd < inputLength; inputInd++) {
outputArr[offset + inputInd] = inputArr[inputInd];
}
}
return newVector(outputArr, values.dtype);
};
jd.repEach = function(values, times) {
validateNonnegInt(times, 'times');
values = ensureVector(values);
var inputArr = values.values;
var inputLength = inputArr.length;
var outputArr = allocArray(inputLength * times);
for (var inputInd = 0; inputInd < inputLength; inputInd++) {
var offset = inputInd * times;
for (var repInd = 0; repInd < times; repInd++) {
outputArr[offset + repInd] = inputArr[inputInd];
}
}
return newVector(outputArr, values.dtype);
};
jd.repNa = function(times, dtype) {
validateNonnegInt(times, 'times');
validateDtype(dtype);
var naValue = NA_VALUE[dtype];
var array = allocArray(times);
for (var i = 0; i < times; i++) {
array[i] = naValue;
}
return newVector(array, dtype);
};
/*-----------------------------------------------------------------------------
* DataFrame Creation
*/
jd.df = function(columns, colNames) {
// Standardize 'colNames' argument to string vector if present
if (!isUndefined(colNames)) {
colNames = ensureStringVector(colNames);
}
// Standardize 'columns' argument to array format
var numCols;
if (columns.type === vectorProto.type) {
throw new Error('"columns" should not itself be a vector');
} else if (Array.isArray(columns)) {
numCols = columns.length;
if (isUndefined(colNames)) {
colNames = generateColNames(numCols);
} else if (colNames.size() !== numCols) {
throw new Error('the length of "colNames" (' + colNames.size() +
') does not match the length of "columns" (' + numCols + ')');
}
} else if (typeof columns === 'object') {
var keys = Object.keys(columns);
numCols = keys.length;
var colMap = columns;
if (isUndefined(colNames)) {
colNames = newVector(keys, 'string');
} else {
if (colNames.isNa().any()) {
throw new Error('"colNames" cannot have null entries when ' +
'"columns" is an object');
} else if (colNames.duplicated().any()) {
throw new Error('"colNames" cannot have duplicate entries when ' +
'"columns" is an object');
} else if (colNames.size() !== numCols ||
colNames.isIn(keys).not().any()) {
throw new Error('"colNames" must match all the keys in ' +
'"columns" if "columns" is an object');
}
}
columns = allocArray(numCols);
for (var i = 0; i < numCols; i++) {
columns[i] = colMap[colNames.values[i]];
}
} else {
throw new Error('expected "columns" to be an array or object but got: ',
columns);
}
return newDataFrame(columns, colNames);
};
jd.dfFromObjArray = function(objArray, colOrder) {
if (!Array.isArray(objArray)) {
throw new Error('"objArray" must be an array');
}
var nRow = objArray.length;
var definedOrder = true;
var j;
var columns;
if (isUndefined(colOrder)) {
colOrder = [];
columns = [];
definedOrder = false;
} else {
colOrder = ensureStringVector(colOrder);
if (colOrder.isNa().any()) {
throw new Error('"colOrder" cannot have null entries');
} else if (colOrder.duplicated().any()) {
throw new Error('"colOrder" cannot have duplicate entries');
}
colOrder = colOrder.values;
columns = allocArray(colOrder.length);
for (j = 0; j < colOrder.length; j++) {
columns[j] = allocArray(nRow);
}
}
// Populate columns
var foundCols = Object.create(null);
for (var i = 0; i < nRow; i++) {
var rowObj = objArray[i];
if (!definedOrder) {
var keys = Object.keys(rowObj);
for (j = 0; j < keys.length; j++) {
var key = keys[j];
if (!(key in foundCols)) {
colOrder.push(key);
var newColArr = allocArray(nRow);
for (var k = 0; k < i; k++) {
newColArr[k] = null;
}
columns.push(newColArr);
foundCols[key] = key;
}
}
}
for (j = 0; j < colOrder.length; j++) {
columns[j][i] = rowObj.propertyIsEnumerable(colOrder[j]) ?
rowObj[colOrder[j]] : null;
}
}
return newDataFrame(columns, newVector(colOrder, 'string'));
};
jd.dfFromMatrix = function(matrix, colNames) {
if (!Array.isArray(matrix)) {
throw new Error('"matrix" must be an array');
}
var nCol = matrix.length > 0 ? matrix[0].length : 0;
colNames = isUndefined(colNames) ?
generateColNames(nCol) :
ensureStringVector(colNames);
if (nCol > 0 && nCol !== colNames.size()) {
throw new Error('"colNames" must have the same length as each ' +
'row array');
}
return dfFromMatrixHelper(matrix, 0, colNames);
};
// Forms a data frame using 'matrix' starting with 'startRow' and
// setting column names to the 'colNames' string vector
function dfFromMatrixHelper(matrix, startRow, colNames) {
var nCol = colNames.size();
var nRow = matrix.length - startRow;
var columns = allocArray(nCol);
var j;
for (j = 0; j < nCol; j++) {
columns[j] = allocArray(nRow);
}
for (var i = 0; i < nRow; i++) {
var rowArray = matrix[i + startRow];
if (rowArray.length !== nCol) {
throw new Error('all row arrays must be of the same size');
}
for (j = 0; j < nCol; j++) {
columns[j][i] = rowArray[j];
}
}
return newDataFrame(columns, colNames);
}
jd.dfFromMatrixWithHeader = function(matrix) {
if (!Array.isArray(matrix)) {
throw new Error('"matrix" must be an array');
} else if (matrix.length === 0) {
throw new Error('"matrix" must not have length 0');
}
var colNames = ensureStringVector(matrix[0]);
if (matrix.length > 1 && colNames.size() !== matrix[1].length) {
throw new Error('header row must have the same length as other ' +
'row arrays');
}
return dfFromMatrixHelper(matrix, 1, colNames);
};
/*-----------------------------------------------------------------------------
* Conversion
*/
jd.unpack = function(obj) {
if (obj.type === vectorProto.type) {
return unpackVector(obj);
} else if (obj.type === dfProto.type) {
var names = unpackVector(obj.names);
var cols = obj.cols.map(function(col) {
return unpackVector(col);
});
return jd.df(cols, names);
} else {
throw new Error('"obj" has unrecognized type: ' + obj.type);
}
};
function unpackVector(obj) {
return jd.vector(obj.values, obj.dtype);
}
/*-----------------------------------------------------------------------------
* Concatenation
*/
jd.vCat = function() {
var numArgs = arguments.length;
// First pass: determine total output length and defaultDtype
var vectorArgs = allocArray(numArgs);
var defaultDtype = null;
var outputLen = 0;
for (var i = 0; i < numArgs; i++) {
var currArg = arguments[i];
if (!isUndefined(currArg) && currArg !== null &&
currArg.type === dfProto.type) {
throw new Error('cannot pass data frame arguments to jd.vCat');
}
var vector = ensureVector(currArg);
if (defaultDtype === null && vector.dtype !== 'object') {
defaultDtype = vector.dtype;
}
outputLen += vector.size();
vectorArgs[i] = vector;
}
defaultDtype = (defaultDtype === null) ? 'object' : defaultDtype;
// Second pass: populate output array
var outputArr = allocArray(outputLen);
var index = 0;
for (i = 0; i < numArgs; i++) {
var argArray = vectorArgs[i].values;
var argArrLen = argArray.length;
for (var j = 0; j < argArrLen; j++) {
outputArr[index] = argArray[j];
index++;
}
}
return inferVectorDtype(outputArr, defaultDtype);
};
jd.colCat = function() {
var numArgs = arguments.length;
var args = allocArray(numArgs);
for (var i = 0; i < numArgs; i++) {
args[i] = arguments[i];
}
return jd._colCatArray(args);
};
jd._colCatArray = function(array) {
var arrLen = array.length;
var columns = [];
var colNameArray = [];
var j;
for (var i = 0; i < arrLen; i++) {
var elem = array[i];
if (isUndefined(elem) || elem === null) {
// treat as scalar
columns.push(elem);
colNameArray.push(null);
} else if (elem.type === dfProto.type) {
// elem is a data frame
var nCol = elem._cols.length;
for (j = 0; j < nCol; j++) {
columns.push(elem._cols[j]);
colNameArray.push(elem._names.values[j]);
}
} else if (typeof elem === 'object' &&
elem.type !== vectorProto.type &&
!Array.isArray(elem)) {
// elem is an object for column name wrapping
var keys = Object.keys(elem);
for (j = 0; j < keys.length; j++) {
var key = keys[j];
columns.push(elem[key]);
colNameArray.push(key);
}
} else {
// elem is a vector, array, or scalar
columns.push(elem);
colNameArray.push(null);
}
}
return newDataFrame(columns, newVector(colNameArray, 'string'));
};
jd.rowCat = function() {
var numArgs = arguments.length;
var args = allocArray(numArgs);
for (var i = 0; i < numArgs; i++) {
args[i] = arguments[i];
}
return jd._rowCatArray(args);
};
// Define types of elements for rowCatArray function
var ROW_ELEM_TYPES = {
SCALAR: 0,
ARRAY: 1,
VECTOR: 2,
DATA_FRAME: 3
};
jd._rowCatArray = function(array) {
var arrLen = array.length;
// Check column and row count and resolve column names
var elemTypes = allocArray(arrLen);
var colNameArr = null;
var numRows = 0;
var numCols = -1;
var elem, i, j;
for (i = 0; i < arrLen; i++) {
elem = array[i];
var elemColCount;
if (isUndefined(elem) || elem === null || typeof elem !== 'object') {
elemTypes[i] = ROW_ELEM_TYPES.SCALAR;
numRows++;
elemColCount = numCols;
} else if (elem.type === dfProto.type) {
elemTypes[i] = ROW_ELEM_TYPES.DATA_FRAME;
elemColCount = elem.nCol();
if (elemColCount === 0) {
continue;
}
numRows += elem.nRow();
// Check column names
if (colNameArr === null) {
colNameArr = elem._names.values.slice();
} else {
var len = Math.min(colNameArr.length, elemColCount);
var elemNameArr = elem._names.values;
for (j = 0; j < len; j++) {
if (elemNameArr[j] !== colNameArr[j]) {
colNameArr[j] = null;
}
}
}
} else if (elem.type === vectorProto.type) {
elemTypes[i] = ROW_ELEM_TYPES.VECTOR;
numRows++;
elemColCount = elem.values.length;
} else if (Array.isArray(elem)) {
elemTypes[i] = ROW_ELEM_TYPES.ARRAY;
numRows++;
elemColCount = elem.length;
} else {
// treat object as scalar
elemTypes[i] = ROW_ELEM_TYPES.SCALAR;
numRows++;
elemColCount = numCols;
}
// Check column counts
if (numCols === -1) {
numCols = elemColCount;
} else if (numCols !== elemColCount) {
throw new Error('arguments imply differing number of columns: ' +
numCols + ', ' + elemColCount);
}
}
if (numRows === 0) {
return jd.df([]);
}
if (numCols === -1) {
numCols = 1;
}
var colNames = (colNameArr === null) ?
jd.repNa(numCols, 'string') :
newVector(colNameArr, 'string');
// Assign values for new data frame
var columns = allocArray(numCols);
for (j = 0; j < numCols; j++) {
columns[j] = allocArray(numRows);
}
var currRow = 0;
for (i = 0; i < arrLen; i++) {
elem = array[i];
switch (elemTypes[i]) {
case ROW_ELEM_TYPES.SCALAR:
for (j = 0; j < numCols; j++) {
columns[j][currRow] = elem;
}
currRow++;
break;
case ROW_ELEM_TYPES.VECTOR:
elem = elem.values;
/* falls through */
case ROW_ELEM_TYPES.ARRAY:
for (j = 0; j < numCols; j++) {
columns[j][currRow] = elem[j];
}
currRow++;
break;
case ROW_ELEM_TYPES.DATA_FRAME:
var nRow = elem.nRow();
for (j = 0; j < numCols; j++) {
for (var k = 0; k < nRow; k++) {
columns[j][currRow + k] = elem._cols[j].values[k];
}
}
currRow += nRow;
break;
}
}
return newDataFrame(columns, colNames);
};
jd.strCat = function() {
var numArgs = arguments.length;
if (numArgs === 0) {
throw new Error('"strCat" must be called with at least one argument');
}
var argArrays = allocArray(numArgs);
for (var i = 0; i < numArgs; i++) {
argArrays[i] = ensureVector(arguments[i]).values;
}
var internCache = Object.create(null);
var resultArr = combineMultipleArrays(argArrays, elemStrCat, internCache);
return newVector(resultArr, 'string');
};
var elemStrCat = useStringInterning(function() {
var argLen = arguments.length;
var args = allocArray(argLen);
for (var i = 0; i < argLen; i++) {
var value = arguments[i];
if (isMissing(value)) {
return null;
}
args[i] = elemToString(value);
}
return args.join('');
});
/*-----------------------------------------------------------------------------
* Printing
*/
// Constants
var _MIN_MAX_WIDTH = 55;
var _MIN_MAX_LINES = 4;
var _MAX_STR_WIDTH = 45;
var _FIXED_NUM_DIGITS = 6;
var _EXP_FRAC_DIGITS = 6;
var _NUM_FIXED_LOWER_BOUND = Math.pow(10, 1 - _FIXED_NUM_DIGITS);
var _NUM_FIXED_UPPER_BOUND = 1e7 - 1e-9;
var _PRINT_SEP = ' ';
var _SKIP_MARKER = '..';
var _ROW_ID_SUFFIX = ':';
jd.printingOpts = {};
jd.printingOpts._maxWidth = 79;
jd.printingOpts._maxLines = 10;
jd.printingOpts._printCallback = function(stringToPrint) {
console.log(stringToPrint);
};
jd.printingOpts.getMaxWidth = function() {
return this._maxWidth;
};
jd.printingOpts.setMaxWidth = function(maxWidth) {
validatePrintMax(maxWidth, _MIN_MAX_WIDTH, 'maxWidth');
this._maxWidth = maxWidth;
};
jd.printingOpts.getMaxLines = function() {
return this._maxLines;
};
jd.printingOpts.setMaxLines = function(maxLines) {
validatePrintMax(maxLines, _MIN_MAX_LINES, 'maxLines');
this._maxLines = maxLines;
};
jd.printingOpts.setPrintFunction = function(callback) {
validateFunction(callback, 'callback');
this._printCallback = callback;
};
vectorProto.p = function(maxLines) {
var printStr = this.printToString(maxLines);
jd.printingOpts._printCallback(printStr);
};
vectorProto.printToString = function(maxLines) {
if (isUndefined(maxLines)) {
maxLines = jd.printingOpts._maxLines;
} else {
validatePrintMax(maxLines, _MIN_MAX_LINES, 'maxLines');
}
if (this.values.length === 0) {
return this.toString();
}
var rowIds = rightAlign(makeRowIds(this.values.length, maxLines));
var printVector = rightAlign(this._toTruncatedPrintVector(maxLines));
var printLines = jd.strCat(rowIds, _PRINT_SEP, printVector);
return this.toString() + '\n' + printLines.strJoin('\n');
};
dfProto.p = function(maxLines) {
var printStr = this.printToString(maxLines);
jd.printingOpts._printCallback(printStr);
};
dfProto.printToString = function(maxLines) {
if (isUndefined(maxLines)) {
maxLines = jd.printingOpts._maxLines;
} else {
validatePrintMax(maxLines, _MIN_MAX_LINES, 'maxLines');
}
var rowIds = rightAlign(jd.vCat('',
makeRowIds(this.nRow(), maxLines)));
var printVectors = [rowIds];
var colIdx = 0;
var totalWidth = rowIds.at(0).length;
var stopWidth = jd.printingOpts._maxWidth - _SKIP_MARKER.length -
_PRINT_SEP.length;
while (totalWidth <= stopWidth && colIdx < this.nCol()) {
var colVec = this._cols[colIdx]._toTruncatedPrintVector(maxLines);
var printVec = rightAlign(jd.vCat(
toPrintString(this._names.at(colIdx)),
colVec));
printVectors.push(_PRINT_SEP);
printVectors.push(printVec);
totalWidth += _PRINT_SEP.length + printVec.at(0).length;
colIdx++;
}
if (totalWidth > stopWidth) {
printVectors.pop();
printVectors.push(_SKIP_MARKER);
}
var printLines = jd.strCat.apply(jd, printVectors);
return this.toString() + '\n' + printLines.strJoin('\n');
};
// Helper for converting a vector to a string vector of printable
// elements, truncating if the number of elements is more than 'maxLines'
vectorProto._toTruncatedPrintVector = function(maxLines) {
if (this.values.length > maxLines) {
var halfCount = Math.ceil(maxLines / 2 - 1);
var headRange = jd.rng(0, halfCount);
var tailRange = jd.rng(-halfCount);
var printVec = this.s([headRange, tailRange])._toPrintVector();
return jd.vCat(
printVec.s(headRange), _SKIP_MARKER, printVec.s(tailRange));
} else {
return this._toPrintVector();
}
};
// Helper for converting each element to a printable string
vectorProto._toPrintVector = function() {
return this.map(toPrintString);
};
// Helper for converting each element to a printable string
numVecProto._toPrintVector = function() {
if (this.values.some(numIsBelowFixedThreshold) ||
this.values.some(numIsAboveFixedThreshold)) {
return this.map(function(num) {
return num.toExponential(_EXP_FRAC_DIGITS);
});
} else {
var fracDigits = Math.min(_FIXED_NUM_DIGITS,
this.map(fractionDigits).max());
return this.map(function(num) {
return num.toFixed(fracDigits);
});
}
};
function numIsBelowFixedThreshold(num) {
return num !== 0 && Math.abs(num) < _NUM_FIXED_LOWER_BOUND;
}
function numIsAboveFixedThreshold(num) {
return Math.abs(num) > _NUM_FIXED_UPPER_BOUND;
}
// Helper for right-aligning every element in a string vector,
// padding with spaces so all elements are the same width
function rightAlign(strVec) {
var maxWidth = strVec.nChar().max();
var padding = jd.rep(' ', maxWidth).strJoin('');
return strVec.map(function(str) {
return (padding + str).slice(-padding.length);
});
}
// Helper to create column of row ids for printing
function makeRowIds(numRows, maxLines) {
var printVec = jd.seq(numRows)._toTruncatedPrintVector(maxLines);
return printVec.map(function(str) {
return str === _SKIP_MARKER ? str : str + _ROW_ID_SUFFIX;
});
}
// Helper for converting a value to a printable string
function toPrintString(value) {
if (isUndefined(value)) {
return 'undefined';
} else if (value === null) {
return 'null';
} else if (Number.isNaN(value)) {
return 'NaN';
} else {
var str = coerceToStr(value);
var lines = str.split('\n', 2);
if (lines.length > 1) {
str = lines[0] + '...';
}
if (str.length > _MAX_STR_WIDTH) {
str = str.slice(0, _MAX_STR_WIDTH - 3) + '...';
}
return str;
}
}
jd._private_export.toPrintString = toPrintString;
// Helper to validate a candidate print maximum
function validatePrintMax(candidate, lowerBound, label) {
if (typeof candidate !== 'number' || Number.isNaN(candidate)) {
throw new Error('"' + label + '" must be a number');
} else if (candidate < lowerBound) {
throw new Error('"' + label + '" too small');
}
}
// Helper for retrieving the number of digits after the decimal point for
// the given number.
// This function doesn't work for numbers represented in scientific
// notation, but such numbers will trigger different printing logic anyway.
function fractionDigits(number) {
var splitArr = number.toString().split('.');
return (splitArr.length > 1) ?
splitArr[1].length :
0;
}
jd._private_export.fractionDigits = fractionDigits;
/*=============================================================================
# # ###### #### ##### #### #####
# # # # # # # # # #
# # ##### # # # # # #
# # # # # # # #####
# # # # # # # # # #
## ###### #### # #### # #
*/
vectorProto.dtype = 'object';
// Initializes the vector instance's properties
vectorProto._init = function(array) {
this.values = array;
this._index = null;
};
vectorProto.size = function() {
return this.values.length;
};
vectorProto.toString = function() {
return 'Vector[dtype:' + this.dtype +
', size:' + this.values.length + ']';
};
/*-----------------------------------------------------------------------------
* Conversion
*/
vectorProto.toArray = function() {
return this.values.slice();
};
vectorProto.toDtype = function(dtype) {
if (this.dtype === dtype) {
return this;
}
if (dtype === 'string') {
var internCache = Object.create(null);
var resultArr = combineMultipleArrays(
[this.values], coerceToStrInterned, internCache);
return newVector(resultArr, 'string');
}
return jd.vector(this.values, dtype);
};
var coerceToStrInterned = useStringInterning(coerceToStr);
vectorProto.pack = function() {
return packVector(this);
};
// Helper for packing the given vector, including metadata by default
function packVector(vector, includeMetadata) {
includeMetadata = isUndefined(includeMetadata) ? true : includeMetadata;
var dtype = vector.dtype;
if (vector.dtype === 'date') {
vector = vector.toDtype('number');
}
var values = (vector.dtype !== 'number') ?
vector.values.slice() :
vector.values.map(function(x) {
return Number.isNaN(x) ? null : x;
});
var result = {dtype: dtype, values: values};
if (includeMetadata) {
result.version = jd.version;
result.type = vectorProto.type;
}
return result;
}
/*-----------------------------------------------------------------------------
* Missing Values
*/
vectorProto.isNa = function() {
return newVector(this.values.map(isMissing), 'boolean');
};
vectorProto.dropNa = function() {
return this.filter(isNotMissing);
};
function isNotMissing(value) {
return !isMissing(value);
}
vectorProto.replaceNa = function(value) {
var coerceFunc = COERCE_FUNC[this.dtype];
value = coerceFunc(value);
var array = this.values.slice();
for (var i = 0; i < array.length; i++) {
if (isMissing(array[i])) {
array[i] = value;
}
}
return newVector(array, this.dtype);
};
/*-----------------------------------------------------------------------------
* Subset Selection / Modification
*/
vectorProto.s = function(selector) {
var intIdxVec = standardIndexing(selector, this.values.length);
if (intIdxVec === null) {
return this;
}
var newArray = subsetArray(this.values, intIdxVec.values);
return newVector(newArray, this.dtype);
};
vectorProto.sMod = function(selector, values) {
var intIdxVec = standardIndexing(selector, this.values.length);
if (intIdxVec === null) {
intIdxVec = jd.seq(this.values.length);
}
values = ensureVector(values, this.dtype);
validateVectorIsDtype(values, this.dtype);
var isSingleValue = (values.size() === 1);
if (!isSingleValue && values.size() !== intIdxVec.size()) {
throw new Error('length mismatch: cannot assign ' + values.size() +
' values to a selection of length ' + intIdxVec.size());
}
var intIdxArr = intIdxVec.values;
var result = this.values.slice();
for (var i = 0; i < intIdxArr.length; i++) {
result[intIdxArr[i]] = isSingleValue ? values.values[0] : values.values[i];
}
return newVector(result, this.dtype);
};
vectorProto.at = function(i) {
i = ensureScalar(i);
i = resolveIntIdx(i, this.size());
return this.values[i];
};
vectorProto.head = function(n) {
if (isUndefined(n)) {
n = 6;
}
validateInt(n, 'n');
return this.s(jd.rng(0, n));
};
vectorProto.tail = function(n) {
if (isUndefined(n)) {
n = 6;
}
validateInt(n, 'n');
var start = (n < 0) ? -n : this.size() - n;
return this.s(jd.rng(start, undefined));
};
vectorProto.ifElse = function(cond, other) {
cond = ensureVector(cond, 'boolean');
if (cond.size() !== this.size()) {
throw new Error('"cond" must be the same length as this vector');
}
validateVectorIsDtype(cond, 'boolean');
other = ensureVector(other, this.dtype);
validateVectorIsDtype(other, this.dtype);
var resultArr = combineMultipleArrays(
[this.values, cond.values, other.values, [NA_VALUE[this.dtype]]],
elemIfElse
);
return newVector(resultArr, this.dtype);
};
function elemIfElse(thisElem, cond, other, naValue) {
return (
isMissing(cond) ? naValue :
cond ? thisElem :
other
);
}
vectorProto.ex = function() {
return jd.ex(this);
};
/*-----------------------------------------------------------------------------
* Array.prototype Adaptations
*/
vectorProto.map = function() {
var array = Array.prototype.map.apply(this.values, arguments);
return inferVectorDtype(array, this.dtype);
};
vectorProto.reduce = function() {
return Array.prototype.reduce.apply(this.values, arguments);
};
vectorProto.reduceRight = function() {
return Array.prototype.reduceRight.apply(this.values, arguments);
};
vectorProto.findIndex = function() {
return Array.prototype.findIndex.apply(this.values, arguments);
};
vectorProto.indexOf = function(searchElement) {
if (this.dtype === 'object') {
return this.values.indexOf(searchElement);
}
var intInds = this._getIndex().lookupKey([searchElement]);
if (intInds === null) {
return -1;
} else if (typeof intInds === 'number') {
return intInds;
} else {
return intInds[0];
}
};
vectorProto.sort = function(compareFunction) {
if (isUndefined(compareFunction)) {
compareFunction = compare;
}
var array = this.values.slice();
Array.prototype.sort.call(array, compareFunction);
return newVector(array, this.dtype);
};
vectorProto.reverse = function() {
var array = this.values.slice();
Array.prototype.reverse.call(array);
return newVector(array, this.dtype);
};
vectorProto.filter = function() {
var array = Array.prototype.filter.apply(this.values, arguments);
return newVector(array, this.dtype);
};
vectorProto.strJoin = function(separator) {
return this.values.map(elemToString).join(separator);
};
vectorProto.combine = function() {
var numArgs = arguments.length;
if (numArgs < 2) {
throw new Error('must supply at least one "varg"');
}
var func = arguments[numArgs - 1];
if (typeof func !== 'function') {
throw new Error('the last argument to "combine" must be a function');
}
var vargArrays = allocArray(numArgs);
vargArrays[0] = this.values;
for (var j = 1; j < numArgs; j++) {
vargArrays[j] = ensureVector(arguments[j - 1]).values;
}
var resultArray = combineMultipleArrays(vargArrays, func);
return jd.vector(resultArray);
};
/*-----------------------------------------------------------------------------
* Comparison
*/
vectorProto.eq = function(other) {
other = ensureVector(other, this.dtype);
var outputLen = validateArrayLengths(this.size(), other.size());
if (this.dtype !== other.dtype) {
return jd.repNa(outputLen, 'boolean');
}
var array = (this.dtype === 'object') ?
combineArrays(this.values, other.values, null, elemObjEq) :
combineArrays(this.values, other.values, null, elemEq);
return newVector(array, 'boolean');
};
function elemEq(x, y) {
return compare(x, y) === 0;
}
// Returns true if x === y or if x and y are both NaN. This is meant
// for shallow equals over elements with "object" dtype only.
function elemObjEq(x, y) {
return (Number.isNaN(x) && Number.isNaN(y)) || x === y;
}
vectorProto.neq = function(other) {
return this.eq(other).not();
};
vectorProto.lt = function(other) {
other = ensureVector(other, this.dtype);
var outputLen = validateArrayLengths(this.size(), other.size());
if (this.dtype !== other.dtype) {
return jd.repNa(outputLen, 'boolean');
}
var array = combineArrays(this.values, other.values, null, elemLt);
return newVector(array, 'boolean');
};
function elemLt(x, y) {
return compare(x, y) < 0;
}
vectorProto.gt = function(other) {
other = ensureVector(other, this.dtype);
var outputLen = validateArrayLengths(this.size(), other.size());
if (this.dtype !== other.dtype) {
return jd.repNa(outputLen, 'boolean');
}
var array = combineArrays(this.values, other.values, null, elemGt);
return newVector(array, 'boolean');
};
function elemGt(x, y) {
return compare(x, y) > 0;
}
vectorProto.lte = function(other) {
return this.lt(other).or(this.eq(other));
};
vectorProto.gte = function(other) {
return this.gt(other).or(this.eq(other));
};
vectorProto.between = function(lower, upper, inclusive) {
// TODO
throw new Error('unimplemented method (TODO)');
};
vectorProto.equals = function(other, tolerance) {
if (isMissing(other) || other.type !== vectorProto.type ||
this.size() !== other.size() || this.dtype !== other.dtype) {
return false;
}
if (this === other) {
return true;
}
var eqFunc = elemEq;
if (this.dtype === 'number') {
eqFunc = isUndefined(tolerance) ? numClose :
function(x, y) {
return (Number.isNaN(x) && Number.isNaN(y)) ||
Math.abs(x - y) <= tolerance;
};
} else if (this.dtype === 'object'){
eqFunc = elemObjEq;
}
var array1 = this.values;
var array2 = other.values;
for (var i = 0; i < array1.length; i++) {
if (!eqFunc(array1[i], array2[i])) {
return false;
}
}
return true;
};
// Returns true if x and y are within 1e-7 tolerance or are both NaN
function numClose(x, y) {
return (Number.isNaN(x) && Number.isNaN(y)) ||
Math.abs(x - y) <= 1e-7;
}
/*-----------------------------------------------------------------------------
* Order-based
*/
vectorProto.min = function(skipNa) {
var ind = this.idxMin(skipNa);
return Number.isNaN(ind) ? NA_VALUE[this.dtype] : this.values[ind];
};
vectorProto.max = function(skipNa) {
var ind = this.idxMax(skipNa);
return Number.isNaN(ind) ? NA_VALUE[this.dtype] : this.values[ind];
};
vectorProto.cuMin = function(skipNa) {
if (isUndefined(skipNa)) {
skipNa = true;
}
var array = skipNa ?
cumulativeReduce(this.values, elemMin) :
cumulativeReduce(this.values, NA_VALUE[this.dtype], elemMin);
return newVector(array, this.dtype);
};
function elemMin(x, y) {
return compare(y, x) < 0 ? y : x;
}
vectorProto.cuMax = function(skipNa) {
if (isUndefined(skipNa)) {
skipNa = true;
}
var array = skipNa ?
cumulativeReduce(this.values, elemMax) :
cumulativeReduce(this.values, NA_VALUE[this.dtype], elemMax);
return newVector(array, this.dtype);
};
function elemMax(x, y) {
return compare(y, x) > 0 ? y : x;
}
vectorProto.idxMin = function(skipNa) {
if (isUndefined(skipNa)) {
skipNa = true;
}
var thisArray = this.values;
var minIndex = NaN;
var minValue = null;
for (var i = 0; i < thisArray.length; i++) {
var currVal = thisArray[i];
if (!isMissing(currVal)) {
if (compare(currVal, minValue) < 0 || Number.isNaN(minIndex)) {
minValue = currVal;
minIndex = i;
}
} else if (!skipNa) {
return NaN;
}
}
return minIndex;
};
vectorProto.idxMax = function(skipNa) {
if (isUndefined(skipNa)) {
skipNa = true;
}
var thisArray = this.values;
var maxIndex = NaN;
var maxValue = null;
for (var i = 0; i < thisArray.length; i++) {
var currVal = thisArray[i];
if (!isMissing(currVal)) {
if (compare(currVal, maxValue) > 0 || Number.isNaN(maxIndex)) {
maxValue = currVal;
maxIndex = i;
}
} else if (!skipNa) {
return NaN;
}
}
return maxIndex;
};
vectorProto.pMin = function(other) {
other = ensureVector(other, this.dtype);
validateVectorIsDtype(other, this.dtype);
var array = combineArrays(this.values, other.values, NA_VALUE[this.dtype],
elemMin);
return newVector(array, this.dtype);
};
vectorProto.pMax = function(other) {
other = ensureVector(other, this.dtype);
validateVectorIsDtype(other, this.dtype);
var array = combineArrays(this.values, other.values, NA_VALUE[this.dtype],
elemMax);
return newVector(array, this.dtype);
};
vectorProto.clip = function(lower, upper) {
lower = ensureVector(lower, this.dtype);
upper = ensureVector(upper, this.dtype);
validateVectorIsDtype(lower, this.dtype);
validateVectorIsDtype(upper, this.dtype);
var resultArr = combineMultipleArrays(
[this.values, lower.values, upper.values],
elemClip
);
return newVector(resultArr, this.dtype);
};
function elemClip(elem, lower, upper) {
var missingLower = isMissing(lower);
var missingUpper = isMissing(upper);
if (!missingLower && !missingUpper && lower > upper) {
throw new Error('invalid range: lower (' + lower + ') > upper (' +
upper + ')');
}
return (
isMissing(elem) ? elem :
(!missingLower && elem < lower) ? lower :
(!missingUpper && elem > upper) ? upper :
elem
);
}
vectorProto.rank = function() {
// TODO
throw new Error('unimplemented method (TODO)');
};
/*-----------------------------------------------------------------------------
* Membership
*/
vectorProto.contains = function(value) {
validateVectorIsNotDtype(this, 'object');
value = ensureScalar(value);
var valDtype = inferDtype(value);
if (valDtype !== this.dtype && valDtype !== null) {
throw new Error('"value" does not match the dtype of this vector');
}
return this._getIndex().lookupKey([value]) !== null;
};
vectorProto.isIn = function(values) {
validateVectorIsNotDtype(this, 'object');
values = ensureVector(values, this.dtype);
validateVectorIsDtype(values, this.dtype);
return values._getIndex().has([this]);
};
vectorProto.valueCounts = function() {
validateVectorIsNotDtype(this, 'object');
var valCountObj = this._getIndex().valueCounts();
var df = jd.df([valCountObj.vectors[0], valCountObj.counts],
['value', 'count']);
return df.sort(['count', 'value'], [false, true]);
};
vectorProto.unique = function() {
validateVectorIsNotDtype(this, 'object');
return this._getIndex().unique()[0];
};
vectorProto.nUnique = function() {
validateVectorIsNotDtype(this, 'object');
return this._getIndex().size;
};
vectorProto.duplicated = function(keep) {
validateVectorIsNotDtype(this, 'object');
return this._getIndex().duplicated(keep);
};
vectorProto.replace = function() {
// TODO
throw new Error('unimplemented method (TODO)');
};
vectorProto.describe = function() {
// TODO
throw new Error('unimplemented method (TODO)');
};
// Private helper for retrieving the index or creating one if it's not
// yet present
vectorProto._getIndex = function() {
if (this._index === null) {
this._index = newNestedIndex([this]);
}
return this._index;
};
/*-----------------------------------------------------------------------------
* Set Operations
*/
vectorProto.union = function(other) {
validateVectorIsNotDtype(this, 'object');
other = ensureVector(other, this.dtype);
validateVectorIsDtype(other, this.dtype);
return jd.vCat(this, other).unique();
};
vectorProto.intersect = function(other) {
validateVectorIsNotDtype(this, 'object');
other = ensureVector(other, this.dtype);
validateVectorIsDtype(other, this.dtype);
var unique = this.unique();
return unique.s(unique.isIn(other));
};
vectorProto.setdiff = function(other) {
validateVectorIsNotDtype(this, 'object');
other = ensureVector(other, this.dtype);
validateVectorIsDtype(other, this.dtype);
var unique = this.unique();
return unique.s(unique.isIn(other).ex());
};
/*=============================================================================
# # # # # # # # ###### ####
## # # # ## ## # # # # #
# # # # # # ## # # # ##### #
# # # # # # # # # # #
# ## # # # # # # # # #
# # #### # # ## ###### ####
*/
numVecProto.dtype = 'number';
/*-----------------------------------------------------------------------------
* Operators
*/
numVecProto.add = function(other) {
other = ensureVector(other, 'number');
validateVectorIsDtype(other, 'number');
var array = combineArrays(this.values, other.values, NaN, numberAdd);
return newVector(array, 'number');
};
function numberAdd(x, y) {
return x + y;
}
numVecProto.sub = function(other) {
other = ensureVector(other, 'number');
validateVectorIsDtype(other, 'number');
var array = combineArrays(this.values, other.values, NaN, numberSub);
return newVector(array, 'number');
};
function numberSub(x, y) {
return x - y;
}
numVecProto.mul = function(other) {
other = ensureVector(other, 'number');
validateVectorIsDtype(other, 'number');
var array = combineArrays(this.values, other.values, NaN, numberMul);
return newVector(array, 'number');
};
function numberMul(x, y) {
return x * y;
}
numVecProto.div = function(other) {
other = ensureVector(other, 'number');
validateVectorIsDtype(other, 'number');
var array = combineArrays(this.values, other.values, NaN, numberDiv);
return newVector(array, 'number');
};
function numberDiv(x, y) {
return x / y;
}
numVecProto.mod = function(other) {
other = ensureVector(other, 'number');
validateVectorIsDtype(other, 'number');
var array = combineArrays(this.values, other.values, NaN, numberMod);
return newVector(array, 'number');
};
function numberMod(x, y) {
return x % y;
}
numVecProto.pow = function(other) {
other = ensureVector(other, 'number');
validateVectorIsDtype(other, 'number');
var array = combineArrays(this.values, other.values, NaN, Math.pow);
return newVector(array, 'number');
};
/*-----------------------------------------------------------------------------
* Unary functions
*/
numVecProto.abs = function() {
return newVector(this.values.map(Math.abs), 'number');
};
numVecProto.sqrt = function() {
return newVector(this.values.map(Math.sqrt), 'number');
};
numVecProto.sign = function() {
return newVector(this.values.map(Math.sign), 'number');
};
numVecProto.ceil = function() {
return newVector(this.values.map(Math.ceil), 'number');
};
numVecProto.floor = function() {
return newVector(this.values.map(Math.floor), 'number');
};
numVecProto.round = function() {
return newVector(this.values.map(Math.round), 'number');
};
numVecProto.exp = function() {
return newVector(this.values.map(Math.exp), 'number');
};
numVecProto.log = function() {
return newVector(this.values.map(Math.log), 'number');
};
numVecProto.sin = function() {
return newVector(this.values.map(Math.sin), 'number');
};
numVecProto.cos = function() {
return newVector(this.values.map(Math.cos), 'number');
};
numVecProto.tan = function() {
return newVector(this.values.map(Math.tan), 'number');
};
numVecProto.asin = function() {
return newVector(this.values.map(Math.asin), 'number');
};
numVecProto.acos = function() {
return newVector(this.values.map(Math.acos), 'number');
};
numVecProto.atan = function() {
return newVector(this.values.map(Math.atan), 'number');
};
/*-----------------------------------------------------------------------------
* Aggregation
*/
numVecProto.sum = function(skipNa) {
if (isUndefined(skipNa)) {
skipNa = true;
}
return skipNa ?
reduceNonNa(this.values, 0, numberAdd) :
reduceUnless(this.values, 0, isMissing, numberAdd);
};
numVecProto.cuSum = function(skipNa) {
if (isUndefined(skipNa)) {
skipNa = true;
}
var array = skipNa ?
cumulativeReduce(this.values, numberAdd) :
cumulativeReduce(this.values, NA_VALUE.number, numberAdd);
return newVector(array, 'number');
};
numVecProto.mean = function(skipNa) {
if (isUndefined(skipNa)) {
skipNa = true;
}
var stats = {n: 0, sum: 0.0};
var result = skipNa ?
reduceNonNa(this.values, stats, meanReducer) :
reduceUnless(this.values, stats, isMissing, meanReducer);
return (Number.isNaN(result) || result.n === 0) ?
NaN :
result.sum / result.n;
};
function meanReducer(stats, x) {
stats.n++;
stats.sum += x;
return stats;
}
numVecProto.stdev = function(skipNa) {
var variance = this.var(skipNa);
return Number.isNaN(variance) ? NaN : Math.sqrt(variance);
};
// Implement the "online algorithm" for variance:
// https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Online_algorithm
numVecProto.var = function(skipNa) {
if (isUndefined(skipNa)) {
skipNa = true;
}
var stats = {n: 0, mean: 0.0, M2: 0.0};
var result = skipNa ?
reduceNonNa(this.values, stats, varReducer) :
reduceUnless(this.values, stats, isMissing, varReducer);
return (Number.isNaN(result) || result.n < 2) ?
NaN :
result.M2 / (result.n - 1);
};
function varReducer(stats, x) {
stats.n++;
var delta = x - stats.mean;
stats.mean += delta / stats.n;
stats.M2 += delta * (x - stats.mean);
return stats;
}
/*=============================================================================
##### #### #### # # # ###### ####
# # # # # # # # # # # #
##### # # # # # # # ##### #
#