UNPKG

jsdataframe

Version:

a data frame library inspired by R and Python Pandas

1,859 lines (1,556 loc) 132 kB
// UMD boilerplate from https://github.com/umdjs/umd - "commonjsStrict.js" template ;(function (root, factory) { if (typeof define === 'function' && define.amd) { // AMD. Register as an anonymous module. define(['exports'], factory); } else if (typeof exports === 'object' && typeof exports.nodeName !== 'string') { // CommonJS factory(exports); } else { // Browser globals factory((root.jsdataframe = {})); } }(this, function (exports) { "use strict"; var jd = exports; jd.version = '0.2.0'; /*----------------------------------------------------------------------------- * Polyfills */ // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Number/isNaN Number.isNaN = Number.isNaN || function(value) { return value !== value; }; // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Number/isInteger Number.isInteger = Number.isInteger || function(value) { return typeof value === "number" && isFinite(value) && Math.floor(value) === value; }; // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/findIndex if (!Array.prototype.findIndex) { Array.prototype.findIndex = function(predicate) { if (this === null) { throw new TypeError('Array.prototype.findIndex called on null or undefined'); } if (typeof predicate !== 'function') { throw new TypeError('predicate must be a function'); } var list = Object(this); var length = list.length >>> 0; var thisArg = arguments[1]; var value; for (var i = 0; i < length; i++) { value = list[i]; if (predicate.call(thisArg, value, i, list)) { return i; } } return -1; }; } // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/includes if (!String.prototype.includes) { String.prototype.includes = function(search, start) { //'use strict'; if (typeof start !== 'number') { start = 0; } if (start + search.length > this.length) { return false; } else { return this.indexOf(search, start) !== -1; } }; } // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/startsWith if (!String.prototype.startsWith) { String.prototype.startsWith = function(searchString, position){ position = position || 0; return this.substr(position, searchString.length) === searchString; }; } // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/endsWith if (!String.prototype.endsWith) { String.prototype.endsWith = function(searchString, position) { var subjectString = this.toString(); if (typeof position !== 'number' || !isFinite(position) || Math.floor(position) !== position || position > subjectString.length) { position = subjectString.length; } position -= searchString.length; var lastIndex = subjectString.indexOf(searchString, position); return lastIndex !== -1 && lastIndex === position; }; } /*============================================================================= ##### ###### #### # ## ##### ## ##### # #### # # #### # # # # # # # # # # # # # # # # ## # # # # ##### # # # # # # # # # # # # # # # #### # # # # # ###### ##### ###### # # # # # # # # # # # # # # # # # # # # # # # # # ## # # ##### ###### #### ###### # # # # # # # # #### # # #### */ /*----------------------------------------------------------------------------- * Define prototypes (instead of constructor functions) */ var vectorProto = {}; vectorProto.type = 'jsdataframe.Vector'; var numVecProto = Object.create(vectorProto); var boolVecProto = Object.create(vectorProto); var strVecProto = Object.create(vectorProto); var dateVecProto = Object.create(vectorProto); var dfProto = {}; dfProto.type = 'jsdataframe.DataFrame'; // Supporting types var rangeProto = {}; rangeProto.type = 'jsdataframe.Range'; var byDtypeProto = {}; byDtypeProto.type = 'jsdataframe.ByDtype'; var exclusionProto = {}; exclusionProto.type = 'jsdataframe.Exclusion'; // Private helper types var abstractIndexProto = {}; abstractIndexProto.type = 'jsdataframe.AbstractIndex'; var nestedIndexProto = Object.create(abstractIndexProto); nestedIndexProto.type = 'jsdataframe.NestedIndex'; /*----------------------------------------------------------------------------- * Constants */ var VALID_DTYPES = Object.create(null); VALID_DTYPES.number = true; VALID_DTYPES.boolean = true; VALID_DTYPES.string = true; VALID_DTYPES.date = true; VALID_DTYPES.object = true; var NA_VALUE = { number: NaN, boolean: null, string: null, date: null, object: null }; var PROTO_MAP = { number: numVecProto, boolean: boolVecProto, string: strVecProto, date: dateVecProto, object: vectorProto }; var COERCE_FUNC = { number: coerceToNum, boolean: coerceToBool, string: coerceToStr, date: coerceToDate, object: function(x) { return x; } }; // Private exports for testing purposes jd._private_export = {}; /*============================================================================= #### ##### ## ##### # #### ###### # # # # #### #### # # # # # # # # # # # ## # # # # #### # # # # # # ##### # # # # # # #### # # ###### # # # # # # # # # # # # # # # # # # # # # # # # ## # # # # #### # # # # # #### # #### # # #### #### */ /*----------------------------------------------------------------------------- * Vector Creation */ jd.vector = function(array, dtype, copyArray) { if (!Array.isArray(array)) { throw new Error('"array" argument must be an Array'); } if (isUndefined(copyArray) || copyArray) { array = array.slice(); } if (isUndefined(dtype)) { dtype = null; } return (dtype === null) ? inferVectorDtype(array) : enforceVectorDtype(array, dtype); }; jd.seq = function(start, stop, step, includeStop) { if (arguments.length === 1) { if (!isNumber(start)) { throw new Error('both "start" and "stop" arguments must be ' + 'specified for non-numeric sequences'); } stop = start; start = 0; } else if (inferDtype(start) !== inferDtype(stop)) { throw new Error('"start" and "stop" must have the same dtype'); } step = isUndefined(step) ? 1 : step; includeStop = isUndefined(includeStop) ? false : includeStop; // Handle character sequence case if (isString(start)) { if (start.length !== 1 || stop.length !== 1) { throw new Error('both "start" and "stop" must be single characters ' + 'for character sequences'); } var charCodeSeq = jd.seq(start.charCodeAt(0), stop.charCodeAt(0), step, includeStop); return charCodeSeq.map(charCodeToStr); } // Validate step sign if (step === 0) { throw new Error('"step" must be nonzero'); } if (start < stop && step < 0) { throw new Error('"step" must be positive when start < stop'); } if (start > stop && step > 0) { throw new Error('"step" must be negative when start > stop'); } // Generate sequence var array = []; var curr = start; while ( step > 0 ? (includeStop ? curr <= stop : curr < stop) : (includeStop ? curr >= stop : curr > stop) ) { array.push(curr); curr += step; } return newVector(array, 'number'); }; function charCodeToStr(charCode) { return String.fromCharCode(charCode); } jd.seqOut = function(start, lengthOut, step) { if (arguments.length < 3) { step = 1; } // Validate arguments step = +step; validateNonnegInt(lengthOut, 'lengthOut'); // Handle character sequence case if (isString(start)) { if (start.length !== 1) { throw new Error('"start" must be a single character ' + 'for character sequences'); } var charCodeSeq = jd.seqOut(start.charCodeAt(0), lengthOut, step); return charCodeSeq.map(charCodeToStr); } // Generate sequence var array = allocArray(lengthOut); var curr = start; for (var i = 0; i < lengthOut; i++) { array[i] = curr; curr += step; } return newVector(array, 'number'); }; jd.linspace = function(start, stop, length) { // Validate arguments start = +start; stop = +stop; validateNonnegInt(length, 'length'); // Generate sequence var array = allocArray(length); var step = (length === 1) ? 0 : (stop - start) / (length - 1); for (var i = 0; i < length; i++) { array[i] = start + i * step; } return newVector(array, 'number'); }; jd.rep = function(values, times) { validateNonnegInt(times, 'times'); values = ensureVector(values); var inputArr = values.values; var inputLength = inputArr.length; var outputArr = allocArray(inputLength * times); for (var repInd = 0; repInd < times; repInd++) { var offset = repInd * inputLength; for (var inputInd = 0; inputInd < inputLength; inputInd++) { outputArr[offset + inputInd] = inputArr[inputInd]; } } return newVector(outputArr, values.dtype); }; jd.repEach = function(values, times) { validateNonnegInt(times, 'times'); values = ensureVector(values); var inputArr = values.values; var inputLength = inputArr.length; var outputArr = allocArray(inputLength * times); for (var inputInd = 0; inputInd < inputLength; inputInd++) { var offset = inputInd * times; for (var repInd = 0; repInd < times; repInd++) { outputArr[offset + repInd] = inputArr[inputInd]; } } return newVector(outputArr, values.dtype); }; jd.repNa = function(times, dtype) { validateNonnegInt(times, 'times'); validateDtype(dtype); var naValue = NA_VALUE[dtype]; var array = allocArray(times); for (var i = 0; i < times; i++) { array[i] = naValue; } return newVector(array, dtype); }; /*----------------------------------------------------------------------------- * DataFrame Creation */ jd.df = function(columns, colNames) { // Standardize 'colNames' argument to string vector if present if (!isUndefined(colNames)) { colNames = ensureStringVector(colNames); } // Standardize 'columns' argument to array format var numCols; if (columns.type === vectorProto.type) { throw new Error('"columns" should not itself be a vector'); } else if (Array.isArray(columns)) { numCols = columns.length; if (isUndefined(colNames)) { colNames = generateColNames(numCols); } else if (colNames.size() !== numCols) { throw new Error('the length of "colNames" (' + colNames.size() + ') does not match the length of "columns" (' + numCols + ')'); } } else if (typeof columns === 'object') { var keys = Object.keys(columns); numCols = keys.length; var colMap = columns; if (isUndefined(colNames)) { colNames = newVector(keys, 'string'); } else { if (colNames.isNa().any()) { throw new Error('"colNames" cannot have null entries when ' + '"columns" is an object'); } else if (colNames.duplicated().any()) { throw new Error('"colNames" cannot have duplicate entries when ' + '"columns" is an object'); } else if (colNames.size() !== numCols || colNames.isIn(keys).not().any()) { throw new Error('"colNames" must match all the keys in ' + '"columns" if "columns" is an object'); } } columns = allocArray(numCols); for (var i = 0; i < numCols; i++) { columns[i] = colMap[colNames.values[i]]; } } else { throw new Error('expected "columns" to be an array or object but got: ', columns); } return newDataFrame(columns, colNames); }; jd.dfFromObjArray = function(objArray, colOrder) { if (!Array.isArray(objArray)) { throw new Error('"objArray" must be an array'); } var nRow = objArray.length; var definedOrder = true; var j; var columns; if (isUndefined(colOrder)) { colOrder = []; columns = []; definedOrder = false; } else { colOrder = ensureStringVector(colOrder); if (colOrder.isNa().any()) { throw new Error('"colOrder" cannot have null entries'); } else if (colOrder.duplicated().any()) { throw new Error('"colOrder" cannot have duplicate entries'); } colOrder = colOrder.values; columns = allocArray(colOrder.length); for (j = 0; j < colOrder.length; j++) { columns[j] = allocArray(nRow); } } // Populate columns var foundCols = Object.create(null); for (var i = 0; i < nRow; i++) { var rowObj = objArray[i]; if (!definedOrder) { var keys = Object.keys(rowObj); for (j = 0; j < keys.length; j++) { var key = keys[j]; if (!(key in foundCols)) { colOrder.push(key); var newColArr = allocArray(nRow); for (var k = 0; k < i; k++) { newColArr[k] = null; } columns.push(newColArr); foundCols[key] = key; } } } for (j = 0; j < colOrder.length; j++) { columns[j][i] = rowObj.propertyIsEnumerable(colOrder[j]) ? rowObj[colOrder[j]] : null; } } return newDataFrame(columns, newVector(colOrder, 'string')); }; jd.dfFromMatrix = function(matrix, colNames) { if (!Array.isArray(matrix)) { throw new Error('"matrix" must be an array'); } var nCol = matrix.length > 0 ? matrix[0].length : 0; colNames = isUndefined(colNames) ? generateColNames(nCol) : ensureStringVector(colNames); if (nCol > 0 && nCol !== colNames.size()) { throw new Error('"colNames" must have the same length as each ' + 'row array'); } return dfFromMatrixHelper(matrix, 0, colNames); }; // Forms a data frame using 'matrix' starting with 'startRow' and // setting column names to the 'colNames' string vector function dfFromMatrixHelper(matrix, startRow, colNames) { var nCol = colNames.size(); var nRow = matrix.length - startRow; var columns = allocArray(nCol); var j; for (j = 0; j < nCol; j++) { columns[j] = allocArray(nRow); } for (var i = 0; i < nRow; i++) { var rowArray = matrix[i + startRow]; if (rowArray.length !== nCol) { throw new Error('all row arrays must be of the same size'); } for (j = 0; j < nCol; j++) { columns[j][i] = rowArray[j]; } } return newDataFrame(columns, colNames); } jd.dfFromMatrixWithHeader = function(matrix) { if (!Array.isArray(matrix)) { throw new Error('"matrix" must be an array'); } else if (matrix.length === 0) { throw new Error('"matrix" must not have length 0'); } var colNames = ensureStringVector(matrix[0]); if (matrix.length > 1 && colNames.size() !== matrix[1].length) { throw new Error('header row must have the same length as other ' + 'row arrays'); } return dfFromMatrixHelper(matrix, 1, colNames); }; /*----------------------------------------------------------------------------- * Conversion */ jd.unpack = function(obj) { if (obj.type === vectorProto.type) { return unpackVector(obj); } else if (obj.type === dfProto.type) { var names = unpackVector(obj.names); var cols = obj.cols.map(function(col) { return unpackVector(col); }); return jd.df(cols, names); } else { throw new Error('"obj" has unrecognized type: ' + obj.type); } }; function unpackVector(obj) { return jd.vector(obj.values, obj.dtype); } /*----------------------------------------------------------------------------- * Concatenation */ jd.vCat = function() { var numArgs = arguments.length; // First pass: determine total output length and defaultDtype var vectorArgs = allocArray(numArgs); var defaultDtype = null; var outputLen = 0; for (var i = 0; i < numArgs; i++) { var currArg = arguments[i]; if (!isUndefined(currArg) && currArg !== null && currArg.type === dfProto.type) { throw new Error('cannot pass data frame arguments to jd.vCat'); } var vector = ensureVector(currArg); if (defaultDtype === null && vector.dtype !== 'object') { defaultDtype = vector.dtype; } outputLen += vector.size(); vectorArgs[i] = vector; } defaultDtype = (defaultDtype === null) ? 'object' : defaultDtype; // Second pass: populate output array var outputArr = allocArray(outputLen); var index = 0; for (i = 0; i < numArgs; i++) { var argArray = vectorArgs[i].values; var argArrLen = argArray.length; for (var j = 0; j < argArrLen; j++) { outputArr[index] = argArray[j]; index++; } } return inferVectorDtype(outputArr, defaultDtype); }; jd.colCat = function() { var numArgs = arguments.length; var args = allocArray(numArgs); for (var i = 0; i < numArgs; i++) { args[i] = arguments[i]; } return jd._colCatArray(args); }; jd._colCatArray = function(array) { var arrLen = array.length; var columns = []; var colNameArray = []; var j; for (var i = 0; i < arrLen; i++) { var elem = array[i]; if (isUndefined(elem) || elem === null) { // treat as scalar columns.push(elem); colNameArray.push(null); } else if (elem.type === dfProto.type) { // elem is a data frame var nCol = elem._cols.length; for (j = 0; j < nCol; j++) { columns.push(elem._cols[j]); colNameArray.push(elem._names.values[j]); } } else if (typeof elem === 'object' && elem.type !== vectorProto.type && !Array.isArray(elem)) { // elem is an object for column name wrapping var keys = Object.keys(elem); for (j = 0; j < keys.length; j++) { var key = keys[j]; columns.push(elem[key]); colNameArray.push(key); } } else { // elem is a vector, array, or scalar columns.push(elem); colNameArray.push(null); } } return newDataFrame(columns, newVector(colNameArray, 'string')); }; jd.rowCat = function() { var numArgs = arguments.length; var args = allocArray(numArgs); for (var i = 0; i < numArgs; i++) { args[i] = arguments[i]; } return jd._rowCatArray(args); }; // Define types of elements for rowCatArray function var ROW_ELEM_TYPES = { SCALAR: 0, ARRAY: 1, VECTOR: 2, DATA_FRAME: 3 }; jd._rowCatArray = function(array) { var arrLen = array.length; // Check column and row count and resolve column names var elemTypes = allocArray(arrLen); var colNameArr = null; var numRows = 0; var numCols = -1; var elem, i, j; for (i = 0; i < arrLen; i++) { elem = array[i]; var elemColCount; if (isUndefined(elem) || elem === null || typeof elem !== 'object') { elemTypes[i] = ROW_ELEM_TYPES.SCALAR; numRows++; elemColCount = numCols; } else if (elem.type === dfProto.type) { elemTypes[i] = ROW_ELEM_TYPES.DATA_FRAME; elemColCount = elem.nCol(); if (elemColCount === 0) { continue; } numRows += elem.nRow(); // Check column names if (colNameArr === null) { colNameArr = elem._names.values.slice(); } else { var len = Math.min(colNameArr.length, elemColCount); var elemNameArr = elem._names.values; for (j = 0; j < len; j++) { if (elemNameArr[j] !== colNameArr[j]) { colNameArr[j] = null; } } } } else if (elem.type === vectorProto.type) { elemTypes[i] = ROW_ELEM_TYPES.VECTOR; numRows++; elemColCount = elem.values.length; } else if (Array.isArray(elem)) { elemTypes[i] = ROW_ELEM_TYPES.ARRAY; numRows++; elemColCount = elem.length; } else { // treat object as scalar elemTypes[i] = ROW_ELEM_TYPES.SCALAR; numRows++; elemColCount = numCols; } // Check column counts if (numCols === -1) { numCols = elemColCount; } else if (numCols !== elemColCount) { throw new Error('arguments imply differing number of columns: ' + numCols + ', ' + elemColCount); } } if (numRows === 0) { return jd.df([]); } if (numCols === -1) { numCols = 1; } var colNames = (colNameArr === null) ? jd.repNa(numCols, 'string') : newVector(colNameArr, 'string'); // Assign values for new data frame var columns = allocArray(numCols); for (j = 0; j < numCols; j++) { columns[j] = allocArray(numRows); } var currRow = 0; for (i = 0; i < arrLen; i++) { elem = array[i]; switch (elemTypes[i]) { case ROW_ELEM_TYPES.SCALAR: for (j = 0; j < numCols; j++) { columns[j][currRow] = elem; } currRow++; break; case ROW_ELEM_TYPES.VECTOR: elem = elem.values; /* falls through */ case ROW_ELEM_TYPES.ARRAY: for (j = 0; j < numCols; j++) { columns[j][currRow] = elem[j]; } currRow++; break; case ROW_ELEM_TYPES.DATA_FRAME: var nRow = elem.nRow(); for (j = 0; j < numCols; j++) { for (var k = 0; k < nRow; k++) { columns[j][currRow + k] = elem._cols[j].values[k]; } } currRow += nRow; break; } } return newDataFrame(columns, colNames); }; jd.strCat = function() { var numArgs = arguments.length; if (numArgs === 0) { throw new Error('"strCat" must be called with at least one argument'); } var argArrays = allocArray(numArgs); for (var i = 0; i < numArgs; i++) { argArrays[i] = ensureVector(arguments[i]).values; } var internCache = Object.create(null); var resultArr = combineMultipleArrays(argArrays, elemStrCat, internCache); return newVector(resultArr, 'string'); }; var elemStrCat = useStringInterning(function() { var argLen = arguments.length; var args = allocArray(argLen); for (var i = 0; i < argLen; i++) { var value = arguments[i]; if (isMissing(value)) { return null; } args[i] = elemToString(value); } return args.join(''); }); /*----------------------------------------------------------------------------- * Printing */ // Constants var _MIN_MAX_WIDTH = 55; var _MIN_MAX_LINES = 4; var _MAX_STR_WIDTH = 45; var _FIXED_NUM_DIGITS = 6; var _EXP_FRAC_DIGITS = 6; var _NUM_FIXED_LOWER_BOUND = Math.pow(10, 1 - _FIXED_NUM_DIGITS); var _NUM_FIXED_UPPER_BOUND = 1e7 - 1e-9; var _PRINT_SEP = ' '; var _SKIP_MARKER = '..'; var _ROW_ID_SUFFIX = ':'; jd.printingOpts = {}; jd.printingOpts._maxWidth = 79; jd.printingOpts._maxLines = 10; jd.printingOpts._printCallback = function(stringToPrint) { console.log(stringToPrint); }; jd.printingOpts.getMaxWidth = function() { return this._maxWidth; }; jd.printingOpts.setMaxWidth = function(maxWidth) { validatePrintMax(maxWidth, _MIN_MAX_WIDTH, 'maxWidth'); this._maxWidth = maxWidth; }; jd.printingOpts.getMaxLines = function() { return this._maxLines; }; jd.printingOpts.setMaxLines = function(maxLines) { validatePrintMax(maxLines, _MIN_MAX_LINES, 'maxLines'); this._maxLines = maxLines; }; jd.printingOpts.setPrintFunction = function(callback) { validateFunction(callback, 'callback'); this._printCallback = callback; }; vectorProto.p = function(maxLines) { var printStr = this.printToString(maxLines); jd.printingOpts._printCallback(printStr); }; vectorProto.printToString = function(maxLines) { if (isUndefined(maxLines)) { maxLines = jd.printingOpts._maxLines; } else { validatePrintMax(maxLines, _MIN_MAX_LINES, 'maxLines'); } if (this.values.length === 0) { return this.toString(); } var rowIds = rightAlign(makeRowIds(this.values.length, maxLines)); var printVector = rightAlign(this._toTruncatedPrintVector(maxLines)); var printLines = jd.strCat(rowIds, _PRINT_SEP, printVector); return this.toString() + '\n' + printLines.strJoin('\n'); }; dfProto.p = function(maxLines) { var printStr = this.printToString(maxLines); jd.printingOpts._printCallback(printStr); }; dfProto.printToString = function(maxLines) { if (isUndefined(maxLines)) { maxLines = jd.printingOpts._maxLines; } else { validatePrintMax(maxLines, _MIN_MAX_LINES, 'maxLines'); } var rowIds = rightAlign(jd.vCat('', makeRowIds(this.nRow(), maxLines))); var printVectors = [rowIds]; var colIdx = 0; var totalWidth = rowIds.at(0).length; var stopWidth = jd.printingOpts._maxWidth - _SKIP_MARKER.length - _PRINT_SEP.length; while (totalWidth <= stopWidth && colIdx < this.nCol()) { var colVec = this._cols[colIdx]._toTruncatedPrintVector(maxLines); var printVec = rightAlign(jd.vCat( toPrintString(this._names.at(colIdx)), colVec)); printVectors.push(_PRINT_SEP); printVectors.push(printVec); totalWidth += _PRINT_SEP.length + printVec.at(0).length; colIdx++; } if (totalWidth > stopWidth) { printVectors.pop(); printVectors.push(_SKIP_MARKER); } var printLines = jd.strCat.apply(jd, printVectors); return this.toString() + '\n' + printLines.strJoin('\n'); }; // Helper for converting a vector to a string vector of printable // elements, truncating if the number of elements is more than 'maxLines' vectorProto._toTruncatedPrintVector = function(maxLines) { if (this.values.length > maxLines) { var halfCount = Math.ceil(maxLines / 2 - 1); var headRange = jd.rng(0, halfCount); var tailRange = jd.rng(-halfCount); var printVec = this.s([headRange, tailRange])._toPrintVector(); return jd.vCat( printVec.s(headRange), _SKIP_MARKER, printVec.s(tailRange)); } else { return this._toPrintVector(); } }; // Helper for converting each element to a printable string vectorProto._toPrintVector = function() { return this.map(toPrintString); }; // Helper for converting each element to a printable string numVecProto._toPrintVector = function() { if (this.values.some(numIsBelowFixedThreshold) || this.values.some(numIsAboveFixedThreshold)) { return this.map(function(num) { return num.toExponential(_EXP_FRAC_DIGITS); }); } else { var fracDigits = Math.min(_FIXED_NUM_DIGITS, this.map(fractionDigits).max()); return this.map(function(num) { return num.toFixed(fracDigits); }); } }; function numIsBelowFixedThreshold(num) { return num !== 0 && Math.abs(num) < _NUM_FIXED_LOWER_BOUND; } function numIsAboveFixedThreshold(num) { return Math.abs(num) > _NUM_FIXED_UPPER_BOUND; } // Helper for right-aligning every element in a string vector, // padding with spaces so all elements are the same width function rightAlign(strVec) { var maxWidth = strVec.nChar().max(); var padding = jd.rep(' ', maxWidth).strJoin(''); return strVec.map(function(str) { return (padding + str).slice(-padding.length); }); } // Helper to create column of row ids for printing function makeRowIds(numRows, maxLines) { var printVec = jd.seq(numRows)._toTruncatedPrintVector(maxLines); return printVec.map(function(str) { return str === _SKIP_MARKER ? str : str + _ROW_ID_SUFFIX; }); } // Helper for converting a value to a printable string function toPrintString(value) { if (isUndefined(value)) { return 'undefined'; } else if (value === null) { return 'null'; } else if (Number.isNaN(value)) { return 'NaN'; } else { var str = coerceToStr(value); var lines = str.split('\n', 2); if (lines.length > 1) { str = lines[0] + '...'; } if (str.length > _MAX_STR_WIDTH) { str = str.slice(0, _MAX_STR_WIDTH - 3) + '...'; } return str; } } jd._private_export.toPrintString = toPrintString; // Helper to validate a candidate print maximum function validatePrintMax(candidate, lowerBound, label) { if (typeof candidate !== 'number' || Number.isNaN(candidate)) { throw new Error('"' + label + '" must be a number'); } else if (candidate < lowerBound) { throw new Error('"' + label + '" too small'); } } // Helper for retrieving the number of digits after the decimal point for // the given number. // This function doesn't work for numbers represented in scientific // notation, but such numbers will trigger different printing logic anyway. function fractionDigits(number) { var splitArr = number.toString().split('.'); return (splitArr.length > 1) ? splitArr[1].length : 0; } jd._private_export.fractionDigits = fractionDigits; /*============================================================================= # # ###### #### ##### #### ##### # # # # # # # # # # # # ##### # # # # # # # # # # # # # ##### # # # # # # # # # # ## ###### #### # #### # # */ vectorProto.dtype = 'object'; // Initializes the vector instance's properties vectorProto._init = function(array) { this.values = array; this._index = null; }; vectorProto.size = function() { return this.values.length; }; vectorProto.toString = function() { return 'Vector[dtype:' + this.dtype + ', size:' + this.values.length + ']'; }; /*----------------------------------------------------------------------------- * Conversion */ vectorProto.toArray = function() { return this.values.slice(); }; vectorProto.toDtype = function(dtype) { if (this.dtype === dtype) { return this; } if (dtype === 'string') { var internCache = Object.create(null); var resultArr = combineMultipleArrays( [this.values], coerceToStrInterned, internCache); return newVector(resultArr, 'string'); } return jd.vector(this.values, dtype); }; var coerceToStrInterned = useStringInterning(coerceToStr); vectorProto.pack = function() { return packVector(this); }; // Helper for packing the given vector, including metadata by default function packVector(vector, includeMetadata) { includeMetadata = isUndefined(includeMetadata) ? true : includeMetadata; var dtype = vector.dtype; if (vector.dtype === 'date') { vector = vector.toDtype('number'); } var values = (vector.dtype !== 'number') ? vector.values.slice() : vector.values.map(function(x) { return Number.isNaN(x) ? null : x; }); var result = {dtype: dtype, values: values}; if (includeMetadata) { result.version = jd.version; result.type = vectorProto.type; } return result; } /*----------------------------------------------------------------------------- * Missing Values */ vectorProto.isNa = function() { return newVector(this.values.map(isMissing), 'boolean'); }; vectorProto.dropNa = function() { return this.filter(isNotMissing); }; function isNotMissing(value) { return !isMissing(value); } vectorProto.replaceNa = function(value) { var coerceFunc = COERCE_FUNC[this.dtype]; value = coerceFunc(value); var array = this.values.slice(); for (var i = 0; i < array.length; i++) { if (isMissing(array[i])) { array[i] = value; } } return newVector(array, this.dtype); }; /*----------------------------------------------------------------------------- * Subset Selection / Modification */ vectorProto.s = function(selector) { var intIdxVec = standardIndexing(selector, this.values.length); if (intIdxVec === null) { return this; } var newArray = subsetArray(this.values, intIdxVec.values); return newVector(newArray, this.dtype); }; vectorProto.sMod = function(selector, values) { var intIdxVec = standardIndexing(selector, this.values.length); if (intIdxVec === null) { intIdxVec = jd.seq(this.values.length); } values = ensureVector(values, this.dtype); validateVectorIsDtype(values, this.dtype); var isSingleValue = (values.size() === 1); if (!isSingleValue && values.size() !== intIdxVec.size()) { throw new Error('length mismatch: cannot assign ' + values.size() + ' values to a selection of length ' + intIdxVec.size()); } var intIdxArr = intIdxVec.values; var result = this.values.slice(); for (var i = 0; i < intIdxArr.length; i++) { result[intIdxArr[i]] = isSingleValue ? values.values[0] : values.values[i]; } return newVector(result, this.dtype); }; vectorProto.at = function(i) { i = ensureScalar(i); i = resolveIntIdx(i, this.size()); return this.values[i]; }; vectorProto.head = function(n) { if (isUndefined(n)) { n = 6; } validateInt(n, 'n'); return this.s(jd.rng(0, n)); }; vectorProto.tail = function(n) { if (isUndefined(n)) { n = 6; } validateInt(n, 'n'); var start = (n < 0) ? -n : this.size() - n; return this.s(jd.rng(start, undefined)); }; vectorProto.ifElse = function(cond, other) { cond = ensureVector(cond, 'boolean'); if (cond.size() !== this.size()) { throw new Error('"cond" must be the same length as this vector'); } validateVectorIsDtype(cond, 'boolean'); other = ensureVector(other, this.dtype); validateVectorIsDtype(other, this.dtype); var resultArr = combineMultipleArrays( [this.values, cond.values, other.values, [NA_VALUE[this.dtype]]], elemIfElse ); return newVector(resultArr, this.dtype); }; function elemIfElse(thisElem, cond, other, naValue) { return ( isMissing(cond) ? naValue : cond ? thisElem : other ); } vectorProto.ex = function() { return jd.ex(this); }; /*----------------------------------------------------------------------------- * Array.prototype Adaptations */ vectorProto.map = function() { var array = Array.prototype.map.apply(this.values, arguments); return inferVectorDtype(array, this.dtype); }; vectorProto.reduce = function() { return Array.prototype.reduce.apply(this.values, arguments); }; vectorProto.reduceRight = function() { return Array.prototype.reduceRight.apply(this.values, arguments); }; vectorProto.findIndex = function() { return Array.prototype.findIndex.apply(this.values, arguments); }; vectorProto.indexOf = function(searchElement) { if (this.dtype === 'object') { return this.values.indexOf(searchElement); } var intInds = this._getIndex().lookupKey([searchElement]); if (intInds === null) { return -1; } else if (typeof intInds === 'number') { return intInds; } else { return intInds[0]; } }; vectorProto.sort = function(compareFunction) { if (isUndefined(compareFunction)) { compareFunction = compare; } var array = this.values.slice(); Array.prototype.sort.call(array, compareFunction); return newVector(array, this.dtype); }; vectorProto.reverse = function() { var array = this.values.slice(); Array.prototype.reverse.call(array); return newVector(array, this.dtype); }; vectorProto.filter = function() { var array = Array.prototype.filter.apply(this.values, arguments); return newVector(array, this.dtype); }; vectorProto.strJoin = function(separator) { return this.values.map(elemToString).join(separator); }; vectorProto.combine = function() { var numArgs = arguments.length; if (numArgs < 2) { throw new Error('must supply at least one "varg"'); } var func = arguments[numArgs - 1]; if (typeof func !== 'function') { throw new Error('the last argument to "combine" must be a function'); } var vargArrays = allocArray(numArgs); vargArrays[0] = this.values; for (var j = 1; j < numArgs; j++) { vargArrays[j] = ensureVector(arguments[j - 1]).values; } var resultArray = combineMultipleArrays(vargArrays, func); return jd.vector(resultArray); }; /*----------------------------------------------------------------------------- * Comparison */ vectorProto.eq = function(other) { other = ensureVector(other, this.dtype); var outputLen = validateArrayLengths(this.size(), other.size()); if (this.dtype !== other.dtype) { return jd.repNa(outputLen, 'boolean'); } var array = (this.dtype === 'object') ? combineArrays(this.values, other.values, null, elemObjEq) : combineArrays(this.values, other.values, null, elemEq); return newVector(array, 'boolean'); }; function elemEq(x, y) { return compare(x, y) === 0; } // Returns true if x === y or if x and y are both NaN. This is meant // for shallow equals over elements with "object" dtype only. function elemObjEq(x, y) { return (Number.isNaN(x) && Number.isNaN(y)) || x === y; } vectorProto.neq = function(other) { return this.eq(other).not(); }; vectorProto.lt = function(other) { other = ensureVector(other, this.dtype); var outputLen = validateArrayLengths(this.size(), other.size()); if (this.dtype !== other.dtype) { return jd.repNa(outputLen, 'boolean'); } var array = combineArrays(this.values, other.values, null, elemLt); return newVector(array, 'boolean'); }; function elemLt(x, y) { return compare(x, y) < 0; } vectorProto.gt = function(other) { other = ensureVector(other, this.dtype); var outputLen = validateArrayLengths(this.size(), other.size()); if (this.dtype !== other.dtype) { return jd.repNa(outputLen, 'boolean'); } var array = combineArrays(this.values, other.values, null, elemGt); return newVector(array, 'boolean'); }; function elemGt(x, y) { return compare(x, y) > 0; } vectorProto.lte = function(other) { return this.lt(other).or(this.eq(other)); }; vectorProto.gte = function(other) { return this.gt(other).or(this.eq(other)); }; vectorProto.between = function(lower, upper, inclusive) { // TODO throw new Error('unimplemented method (TODO)'); }; vectorProto.equals = function(other, tolerance) { if (isMissing(other) || other.type !== vectorProto.type || this.size() !== other.size() || this.dtype !== other.dtype) { return false; } if (this === other) { return true; } var eqFunc = elemEq; if (this.dtype === 'number') { eqFunc = isUndefined(tolerance) ? numClose : function(x, y) { return (Number.isNaN(x) && Number.isNaN(y)) || Math.abs(x - y) <= tolerance; }; } else if (this.dtype === 'object'){ eqFunc = elemObjEq; } var array1 = this.values; var array2 = other.values; for (var i = 0; i < array1.length; i++) { if (!eqFunc(array1[i], array2[i])) { return false; } } return true; }; // Returns true if x and y are within 1e-7 tolerance or are both NaN function numClose(x, y) { return (Number.isNaN(x) && Number.isNaN(y)) || Math.abs(x - y) <= 1e-7; } /*----------------------------------------------------------------------------- * Order-based */ vectorProto.min = function(skipNa) { var ind = this.idxMin(skipNa); return Number.isNaN(ind) ? NA_VALUE[this.dtype] : this.values[ind]; }; vectorProto.max = function(skipNa) { var ind = this.idxMax(skipNa); return Number.isNaN(ind) ? NA_VALUE[this.dtype] : this.values[ind]; }; vectorProto.cuMin = function(skipNa) { if (isUndefined(skipNa)) { skipNa = true; } var array = skipNa ? cumulativeReduce(this.values, elemMin) : cumulativeReduce(this.values, NA_VALUE[this.dtype], elemMin); return newVector(array, this.dtype); }; function elemMin(x, y) { return compare(y, x) < 0 ? y : x; } vectorProto.cuMax = function(skipNa) { if (isUndefined(skipNa)) { skipNa = true; } var array = skipNa ? cumulativeReduce(this.values, elemMax) : cumulativeReduce(this.values, NA_VALUE[this.dtype], elemMax); return newVector(array, this.dtype); }; function elemMax(x, y) { return compare(y, x) > 0 ? y : x; } vectorProto.idxMin = function(skipNa) { if (isUndefined(skipNa)) { skipNa = true; } var thisArray = this.values; var minIndex = NaN; var minValue = null; for (var i = 0; i < thisArray.length; i++) { var currVal = thisArray[i]; if (!isMissing(currVal)) { if (compare(currVal, minValue) < 0 || Number.isNaN(minIndex)) { minValue = currVal; minIndex = i; } } else if (!skipNa) { return NaN; } } return minIndex; }; vectorProto.idxMax = function(skipNa) { if (isUndefined(skipNa)) { skipNa = true; } var thisArray = this.values; var maxIndex = NaN; var maxValue = null; for (var i = 0; i < thisArray.length; i++) { var currVal = thisArray[i]; if (!isMissing(currVal)) { if (compare(currVal, maxValue) > 0 || Number.isNaN(maxIndex)) { maxValue = currVal; maxIndex = i; } } else if (!skipNa) { return NaN; } } return maxIndex; }; vectorProto.pMin = function(other) { other = ensureVector(other, this.dtype); validateVectorIsDtype(other, this.dtype); var array = combineArrays(this.values, other.values, NA_VALUE[this.dtype], elemMin); return newVector(array, this.dtype); }; vectorProto.pMax = function(other) { other = ensureVector(other, this.dtype); validateVectorIsDtype(other, this.dtype); var array = combineArrays(this.values, other.values, NA_VALUE[this.dtype], elemMax); return newVector(array, this.dtype); }; vectorProto.clip = function(lower, upper) { lower = ensureVector(lower, this.dtype); upper = ensureVector(upper, this.dtype); validateVectorIsDtype(lower, this.dtype); validateVectorIsDtype(upper, this.dtype); var resultArr = combineMultipleArrays( [this.values, lower.values, upper.values], elemClip ); return newVector(resultArr, this.dtype); }; function elemClip(elem, lower, upper) { var missingLower = isMissing(lower); var missingUpper = isMissing(upper); if (!missingLower && !missingUpper && lower > upper) { throw new Error('invalid range: lower (' + lower + ') > upper (' + upper + ')'); } return ( isMissing(elem) ? elem : (!missingLower && elem < lower) ? lower : (!missingUpper && elem > upper) ? upper : elem ); } vectorProto.rank = function() { // TODO throw new Error('unimplemented method (TODO)'); }; /*----------------------------------------------------------------------------- * Membership */ vectorProto.contains = function(value) { validateVectorIsNotDtype(this, 'object'); value = ensureScalar(value); var valDtype = inferDtype(value); if (valDtype !== this.dtype && valDtype !== null) { throw new Error('"value" does not match the dtype of this vector'); } return this._getIndex().lookupKey([value]) !== null; }; vectorProto.isIn = function(values) { validateVectorIsNotDtype(this, 'object'); values = ensureVector(values, this.dtype); validateVectorIsDtype(values, this.dtype); return values._getIndex().has([this]); }; vectorProto.valueCounts = function() { validateVectorIsNotDtype(this, 'object'); var valCountObj = this._getIndex().valueCounts(); var df = jd.df([valCountObj.vectors[0], valCountObj.counts], ['value', 'count']); return df.sort(['count', 'value'], [false, true]); }; vectorProto.unique = function() { validateVectorIsNotDtype(this, 'object'); return this._getIndex().unique()[0]; }; vectorProto.nUnique = function() { validateVectorIsNotDtype(this, 'object'); return this._getIndex().size; }; vectorProto.duplicated = function(keep) { validateVectorIsNotDtype(this, 'object'); return this._getIndex().duplicated(keep); }; vectorProto.replace = function() { // TODO throw new Error('unimplemented method (TODO)'); }; vectorProto.describe = function() { // TODO throw new Error('unimplemented method (TODO)'); }; // Private helper for retrieving the index or creating one if it's not // yet present vectorProto._getIndex = function() { if (this._index === null) { this._index = newNestedIndex([this]); } return this._index; }; /*----------------------------------------------------------------------------- * Set Operations */ vectorProto.union = function(other) { validateVectorIsNotDtype(this, 'object'); other = ensureVector(other, this.dtype); validateVectorIsDtype(other, this.dtype); return jd.vCat(this, other).unique(); }; vectorProto.intersect = function(other) { validateVectorIsNotDtype(this, 'object'); other = ensureVector(other, this.dtype); validateVectorIsDtype(other, this.dtype); var unique = this.unique(); return unique.s(unique.isIn(other)); }; vectorProto.setdiff = function(other) { validateVectorIsNotDtype(this, 'object'); other = ensureVector(other, this.dtype); validateVectorIsDtype(other, this.dtype); var unique = this.unique(); return unique.s(unique.isIn(other).ex()); }; /*============================================================================= # # # # # # # # ###### #### ## # # # ## ## # # # # # # # # # # # ## # # # ##### # # # # # # # # # # # # # ## # # # # # # # # # # # #### # # ## ###### #### */ numVecProto.dtype = 'number'; /*----------------------------------------------------------------------------- * Operators */ numVecProto.add = function(other) { other = ensureVector(other, 'number'); validateVectorIsDtype(other, 'number'); var array = combineArrays(this.values, other.values, NaN, numberAdd); return newVector(array, 'number'); }; function numberAdd(x, y) { return x + y; } numVecProto.sub = function(other) { other = ensureVector(other, 'number'); validateVectorIsDtype(other, 'number'); var array = combineArrays(this.values, other.values, NaN, numberSub); return newVector(array, 'number'); }; function numberSub(x, y) { return x - y; } numVecProto.mul = function(other) { other = ensureVector(other, 'number'); validateVectorIsDtype(other, 'number'); var array = combineArrays(this.values, other.values, NaN, numberMul); return newVector(array, 'number'); }; function numberMul(x, y) { return x * y; } numVecProto.div = function(other) { other = ensureVector(other, 'number'); validateVectorIsDtype(other, 'number'); var array = combineArrays(this.values, other.values, NaN, numberDiv); return newVector(array, 'number'); }; function numberDiv(x, y) { return x / y; } numVecProto.mod = function(other) { other = ensureVector(other, 'number'); validateVectorIsDtype(other, 'number'); var array = combineArrays(this.values, other.values, NaN, numberMod); return newVector(array, 'number'); }; function numberMod(x, y) { return x % y; } numVecProto.pow = function(other) { other = ensureVector(other, 'number'); validateVectorIsDtype(other, 'number'); var array = combineArrays(this.values, other.values, NaN, Math.pow); return newVector(array, 'number'); }; /*----------------------------------------------------------------------------- * Unary functions */ numVecProto.abs = function() { return newVector(this.values.map(Math.abs), 'number'); }; numVecProto.sqrt = function() { return newVector(this.values.map(Math.sqrt), 'number'); }; numVecProto.sign = function() { return newVector(this.values.map(Math.sign), 'number'); }; numVecProto.ceil = function() { return newVector(this.values.map(Math.ceil), 'number'); }; numVecProto.floor = function() { return newVector(this.values.map(Math.floor), 'number'); }; numVecProto.round = function() { return newVector(this.values.map(Math.round), 'number'); }; numVecProto.exp = function() { return newVector(this.values.map(Math.exp), 'number'); }; numVecProto.log = function() { return newVector(this.values.map(Math.log), 'number'); }; numVecProto.sin = function() { return newVector(this.values.map(Math.sin), 'number'); }; numVecProto.cos = function() { return newVector(this.values.map(Math.cos), 'number'); }; numVecProto.tan = function() { return newVector(this.values.map(Math.tan), 'number'); }; numVecProto.asin = function() { return newVector(this.values.map(Math.asin), 'number'); }; numVecProto.acos = function() { return newVector(this.values.map(Math.acos), 'number'); }; numVecProto.atan = function() { return newVector(this.values.map(Math.atan), 'number'); }; /*----------------------------------------------------------------------------- * Aggregation */ numVecProto.sum = function(skipNa) { if (isUndefined(skipNa)) { skipNa = true; } return skipNa ? reduceNonNa(this.values, 0, numberAdd) : reduceUnless(this.values, 0, isMissing, numberAdd); }; numVecProto.cuSum = function(skipNa) { if (isUndefined(skipNa)) { skipNa = true; } var array = skipNa ? cumulativeReduce(this.values, numberAdd) : cumulativeReduce(this.values, NA_VALUE.number, numberAdd); return newVector(array, 'number'); }; numVecProto.mean = function(skipNa) { if (isUndefined(skipNa)) { skipNa = true; } var stats = {n: 0, sum: 0.0}; var result = skipNa ? reduceNonNa(this.values, stats, meanReducer) : reduceUnless(this.values, stats, isMissing, meanReducer); return (Number.isNaN(result) || result.n === 0) ? NaN : result.sum / result.n; }; function meanReducer(stats, x) { stats.n++; stats.sum += x; return stats; } numVecProto.stdev = function(skipNa) { var variance = this.var(skipNa); return Number.isNaN(variance) ? NaN : Math.sqrt(variance); }; // Implement the "online algorithm" for variance: // https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Online_algorithm numVecProto.var = function(skipNa) { if (isUndefined(skipNa)) { skipNa = true; } var stats = {n: 0, mean: 0.0, M2: 0.0}; var result = skipNa ? reduceNonNa(this.values, stats, varReducer) : reduceUnless(this.values, stats, isMissing, varReducer); return (Number.isNaN(result) || result.n < 2) ? NaN : result.M2 / (result.n - 1); }; function varReducer(stats, x) { stats.n++; var delta = x - stats.mean; stats.mean += delta / stats.n; stats.M2 += delta * (x - stats.mean); return stats; } /*============================================================================= ##### #### #### # # # ###### #### # # # # # # # # # # # # ##### # # # # # # # ##### # #