pandas-js
Version:
Pandas for JavaScript
1,460 lines (1,286 loc) • 75.6 kB
JavaScript
'use strict';
Object.defineProperty(exports, "__esModule", {
value: true
});
exports._concatDataFrame = exports.mergeDataFrame = undefined;
var _toConsumableArray2 = require('babel-runtime/helpers/toConsumableArray');
var _toConsumableArray3 = _interopRequireDefault(_toConsumableArray2);
var _slicedToArray2 = require('babel-runtime/helpers/slicedToArray');
var _slicedToArray3 = _interopRequireDefault(_slicedToArray2);
var _classCallCheck2 = require('babel-runtime/helpers/classCallCheck');
var _classCallCheck3 = _interopRequireDefault(_classCallCheck2);
var _createClass2 = require('babel-runtime/helpers/createClass');
var _createClass3 = _interopRequireDefault(_createClass2);
var _possibleConstructorReturn2 = require('babel-runtime/helpers/possibleConstructorReturn');
var _possibleConstructorReturn3 = _interopRequireDefault(_possibleConstructorReturn2);
var _get2 = require('babel-runtime/helpers/get');
var _get3 = _interopRequireDefault(_get2);
var _inherits2 = require('babel-runtime/helpers/inherits');
var _inherits3 = _interopRequireDefault(_inherits2);
var _typeof2 = require('babel-runtime/helpers/typeof');
var _typeof3 = _interopRequireDefault(_typeof2);
var _immutable = require('immutable');
var _immutable2 = _interopRequireDefault(_immutable);
var _exceptions = require('./exceptions');
var _generic = require('./generic');
var _generic2 = _interopRequireDefault(_generic);
var _series = require('./series');
var _series2 = _interopRequireDefault(_series);
var _utils = require('./utils');
function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }
// eslint-disable-next-line
// import { MultiIndex } from './multiindex';
// import { saveAs } from 'file-saver'; TODO figure out if best way
var parseArrayToSeriesMap = function parseArrayToSeriesMap(array, index) {
var dataMap = _immutable2.default.Map({});
array.forEach(function (el) {
if (el instanceof _immutable2.default.Map) {
el.keySeq().forEach(function (k) {
if (dataMap.has(k)) {
dataMap = dataMap.set(k, dataMap.get(k).push(el.get(k)));
} else {
dataMap = dataMap.set(k, _immutable2.default.List.of(el.get(k)));
}
});
} else if ((typeof el === 'undefined' ? 'undefined' : (0, _typeof3.default)(el)) === 'object') {
Object.keys(el).forEach(function (k) {
if (dataMap.has(k)) {
dataMap = dataMap.set(k, dataMap.get(k).push(el[k]));
} else {
dataMap = dataMap.set(k, _immutable2.default.List.of(el[k]));
}
});
}
});
dataMap.keySeq().forEach(function (k) {
dataMap = dataMap.set(k, new _series2.default(dataMap.get(k), { name: k, index: index }));
});
return _immutable2.default.Map(dataMap);
};
// import { Workbook, Sheet } from './structs'; TODO
/**
* DataFrame object
*/
var DataFrame = function (_NDFrame) {
(0, _inherits3.default)(DataFrame, _NDFrame);
/**
* Two-dimensional size-mutable, potentially heterogeneous tabular data
* structure with labeled axes (rows and columns). Arithmetic operations
* align on both row and column labels. Can be thought of as a Immutable.Map-like
* container for Series objects. The primary pandas data structure
*
* @param data {Array|Object}
* Data to be stored in DataFrame
* @param {Object} kwargs
* Extra optional arguments for a DataFrame
* @param {Array|Object} [kwargs.index]
*
* @example
* const df = new DataFrame([{x: 1, y: 2}, {x: 2, y: 3}, {x: 3, y: 4}])
*
* // Returns:
* // x | y
* // 0 1 | 2
* // 1 2 | 3
* // 2 3 | 4
* df.toString();
*/
function DataFrame(data) {
var kwargs = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {};
(0, _classCallCheck3.default)(this, DataFrame);
var _this = (0, _possibleConstructorReturn3.default)(this, (DataFrame.__proto__ || Object.getPrototypeOf(DataFrame)).call(this, data, kwargs));
if (Array.isArray(data)) {
_this.set_axis(0, (0, _utils.parseIndex)(kwargs.index, _immutable2.default.List(data)));
_this._data = parseArrayToSeriesMap(data, _this.index);
_this.set_axis(1, _this._data.keySeq());
} else if (data instanceof _immutable2.default.Map) {
_this._data = _immutable2.default.OrderedMap(data.keySeq().map(function (k) {
if (data instanceof _immutable2.default.Map && !(data.get(k) instanceof _series2.default)) throw new Error('Map must have [column, series] key-value pairs');
if (data instanceof _immutable2.default.Map) return [k, data.get(k).copy()];
throw new Error('Data is not Map');
}));
_this.set_axis(1, _this._data.keySeq());
_this.set_axis(0, _this._data.get(_this.columns.get(0)).index);
} else if (data instanceof _immutable2.default.List) {
// List of List of row values
var columns = void 0;
if (Array.isArray(kwargs.columns) || kwargs.columns instanceof _immutable2.default.Seq) columns = _immutable2.default.List(kwargs.columns);else if (kwargs.columns instanceof _immutable2.default.List) columns = kwargs.columns;else if (typeof kwargs.columns === 'undefined') columns = _immutable2.default.Range(0, data.get(0).size).toList();else throw new Error('Invalid columns');
_this._values = data; // Cache the values since we're in List of List or row data already
_this._data = _immutable2.default.OrderedMap(columns.map(function (c, colIdx) {
return [c, new _series2.default(data.map(function (row) {
return row.get(colIdx);
}), { index: kwargs.index })];
}));
_this.set_axis(1, _this._data.keySeq());
_this.set_axis(0, _this._data.get(_this.columns.get(0)).index);
} else if (typeof data === 'undefined') {
_this._data = _immutable2.default.Map({});
_this.set_axis(0, _immutable2.default.List.of());
_this.set_axis(1, _immutable2.default.Seq.of());
}
_this._setup_axes(_immutable2.default.List.of(0, 1));
return _this;
}
(0, _createClass3.default)(DataFrame, [{
key: 'toString',
value: function toString() {
var _this2 = this;
var string = '\t|';
this.columns.forEach(function (k) {
string += ' ' + k + ' |';
});
var headerRow = '-'.repeat(string.length);
string += '\n' + headerRow + '\n';
var stringUpdate = function stringUpdate(idx) {
var s = '';
_this2.columns.forEach(function (k) {
s += ' ' + _this2._data.get(k).iloc(idx) + ' |';
});
return s;
};
for (var idx = 0; idx < this.length; idx += 1) {
string += this.index.get(idx) + '\t|';
string += stringUpdate(idx);
string += '\n';
}
return string;
}
/**
* Return a new deep copy of the `DataFrame`
*
* pandas equivalent: [DataFrame.copy](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.copy.html)
*
* @returns {DataFrame}
*
* @example
* const df = const df = new DataFrame([{x: 1, y: 2}, {x: 2, y: 3}, {x: 3, y: 4}]);
* const df2 = df.copy();
* df2.index = [1, 2, 3];
* df.index // [0, 1, 2];
* df2.index // [1, 2, 3];
*/
}, {
key: 'copy',
value: function copy() {
return new DataFrame(this._data, { index: this.index });
}
// $FlowFixMe
}, {
key: Symbol.iterator,
value: function value() {
var _this3 = this;
var index = -1;
return {
next: function next() {
index += 1;
var done = !(index >= 0 && index < _this3.length);
var value = done ? undefined : _immutable2.default.Map(_this3.columns.map(function (k, idx) {
return [k, _this3.values.get(index).get(idx)];
}));
return { value: value, done: done };
}
};
}
/**
* A generator which returns [row, index location] tuples
*
* pandas equivalent: [DataFrame.iterrows](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.iterrows.html)
*
* @returns {*}
*
* @example
* const df = new DataFrame([{x: 1, y: 2}, {x: 2, y: 3}, {x: 3, y: 4}]);
*
* // Logs 2 4 6
* for(const [row, idx] of df) {
* console.log(row.get('x') * 2);
* }
*/
}, {
key: 'iterrows',
value: function iterrows() {
return (0, _utils.enumerate)(this);
}
}, {
key: 'set',
/**
* Set a `Series` at `column`
*
* @param {string|number} column
* @param {Series|List|Array} series
* @returns {DataFrame}
*
* @example
* const df = new DataFrame([{x: 1}, {x: 2}, {x: 3}]);
*
* // Returns DataFrame([{x: 1, y: 2}, {x: 2, y: 3}, {x: 3, y: 4}]);
* df.set('y', new Series([2, 3, 4]));
*/
value: function set(column, series) {
if (series instanceof _series2.default) return new DataFrame(this._data.set(column, series), this.kwargs);else if (series instanceof _immutable2.default.List || Array.isArray(series)) return new DataFrame(this._data.set(column, // $FlowFixMe TODO
new _series2.default(series, { index: this.index, name: column })), this.kwargs);
throw new TypeError('series must be a Series!');
}
/**
* Reset the index for a DataFrame
*
* pandas equivalent: [DataFrame.reset_index](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.reset_index.html)
*
* @param {object} args
* @param {boolean} args.drop
* Drop the index when resetting? Otherwise, add as new column
*
* @returns {DataFrame}
*
* @example
* const df = new DataFrame([{x: 1, y: 2}, {x: 2, y: 3}], {index: [1, 2]});
*
* // returns DataFrame([{index: 1, x: 1, y: 2}, {index: 2, x: 2, y: 3}], {index: [0, 1]})
* df.reset_index();
*
* // returns DataFrame([{x: 1, y: 2}, {x: 2, y: 3}], {index: [0, 1]});
* df.reset_index({drop: true});
*
* const df2 = new DataFrame([{index: 1}, {index: 2}], {index: [1, 2]});
* // returns DataFrame([{level_0: 1, index: 1}, {level_0: 1, index: 2}], {index: [1, 2]});
* df2.reset_index();
*/
}, {
key: 'reset_index',
value: function reset_index() {
var _this4 = this;
var args = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : { drop: false };
if (typeof args.drop !== 'undefined' && typeof args.drop !== 'boolean') throw new TypeError('drop must be a boolean');
var drop = typeof args.drop === 'undefined' ? false : args.drop;
var indexName = 'index';
if (this.columnExists('index')) {
var i = 0;
while (this.columnExists('level_' + i)) {
i += 1;
}
indexName = 'level_' + i;
}
var data = _immutable2.default.Map(this.columns.map(function (c) {
return [c, new _series2.default(_this4.get(c).values)];
}));
if (!args.drop) data = data.set(indexName, new _series2.default(this.index));
return new DataFrame(data);
}
/**
* Return new DataFrame subset at [rowIdx, colIdx]
*
* pandas equivalent: [DataFrame.iloc](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.iloc.html)
*
* @param {number|Array.<number>} rowIdx
* @param {number|Array.<number>=} colIdx
*
* @returns {DataFrame}
*
* @example
* const df = new DataFrame([{x: 1, y: 2, z: 3}, {x: 2, y: 3, z: 4}, {x: 3, y: 4, z: 5}]);
*
* // Returns DataFrame([{y: 3}], {index: [1]})
* df.iloc(1, 1);
*
* // Returns DataFrame([{y: 3, z: 4}}], {index: [1]})
* df.iloc(1, [1, 3]);
*
* // Returns DataFrame([{y: 3, z: 4}, {y: 4, z: 5}], {index: [1, 2]})
* df.iloc([1, 3], [1, 3]);
*
* // Returns DataFrame([{y: 3}, {y: 4}], {index: [1, 2]})
* df.iloc([1, 3], 1);
*
* // Returns DataFrame([{y: 2}, {y: 3}, {y: 4}], {index: [0, 1, 2]})
* df.iloc(1);
*/
}, {
key: 'iloc',
value: function iloc(rowIdx, colIdx) {
var _this5 = this;
if (typeof rowIdx === 'number') {
if (typeof colIdx === 'number') {
if (colIdx < 0 || colIdx >= this.shape[1]) throw new Error('colIdx out of bounds');
var getCol = this.columns.get(colIdx);
return new DataFrame(_immutable2.default.Map([[getCol, this.get(getCol).iloc(rowIdx, rowIdx + 1)]]), { index: this.index.slice(rowIdx, rowIdx + 1) });
} else if (Array.isArray(colIdx)) {
if (colIdx.length !== 2) throw new Error('colIdx must be length 2 (start and end positions)');
if (colIdx[1] <= colIdx[0]) throw new Error('colIdx end position cannot be less than or equal tostart position');
if (colIdx[0] < 0 || colIdx[1] > this.shape[1]) throw new Error('colIdx position out of bounds');
return new DataFrame(_immutable2.default.Map(_immutable2.default.Range(colIdx[0], colIdx[1]).map(function (idx) {
var getCol = _this5.columns.get(idx);
// $FlowFixMe TODO
return [getCol, _this5.get(getCol).iloc(rowIdx, rowIdx + 1)];
}).toArray()), { index: this.index.slice(rowIdx, rowIdx + 1) });
} else if (typeof colIdx === 'undefined') {
return new DataFrame(_immutable2.default.Map(this.columns.map(function (c) {
return (
// $FlowFixMe TODO
[c, _this5.get(c).iloc(rowIdx, rowIdx + 1)]
);
}).toArray()), { index: this.index.slice(rowIdx, rowIdx + 1) });
}
throw new TypeError('colIdx must be either integer or Array of integers');
} else if (Array.isArray(rowIdx)) {
if (typeof colIdx === 'number') {
if (colIdx < 0 || colIdx >= this.shape[1]) throw new Error('colIdx out of bounds');
var _getCol = this.columns.get(colIdx);
return new DataFrame(_immutable2.default.Map([[_getCol, this.get(_getCol).iloc(rowIdx[0], rowIdx[1])]]), { index: this.index.slice(rowIdx[0], rowIdx[1]) });
} else if (Array.isArray(colIdx)) {
if (colIdx.length !== 2) throw new Error('colIdx must be length 2 (start and end positions)');
if (colIdx[1] <= colIdx[0]) throw new Error('colIdx end position cannot be less than or equal tostart position');
if (colIdx[0] < 0 || colIdx[1] > this.shape[1]) throw new Error('colIdx position out of bounds');
return new DataFrame(_immutable2.default.Map(_immutable2.default.Range(colIdx[0], colIdx[1]).map(function (idx) {
var getCol = _this5.columns.get(idx);
// $FlowFixMe TODO
return [getCol, _this5.get(getCol).iloc(rowIdx[0], rowIdx[1])];
}).toArray()), { index: this.index.slice(rowIdx[0], rowIdx[1]) });
} else if (typeof colIdx === 'undefined') {
return new DataFrame(_immutable2.default.Map(this.columns.map(function (c) {
return (
// $FlowFixMe TODO
[c, _this5.get(c).iloc(rowIdx[0], rowIdx[1])]
);
}).toArray()), { index: this.index.slice(rowIdx[0], rowIdx[1]) });
}
throw new TypeError('colIdx must be either integer or Array of integers');
}
throw new TypeError('rowIdx must be either integer or Array of integers');
}
/**
* Return new DataFrame composed of first n rows of this DataFrame
*
* pandas equivalent: [DataFrame.head](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.head.html)
*
* @param {number} n=10
* Integer number of n rows to return from the DataFrame
* @returns {DataFrame}
*
* @example
* const df = new DataFrame([{x: 1, y: 2}, {x: 2, y: 3}, {x: 3, y: 4}, {x: 4, y: 5}]);
*
* // returns DataFrame([{x: 1, y: 2}, {x: 2, y: 3}])
* df.head(2);
*/
}, {
key: 'head',
value: function head() {
var n = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : 10;
return this.iloc([0, n]);
}
/**
* Return new DataFrame composed of last n rows of this DataFrame
*
* pandas equivalent: [DataFrame.tail](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.tail.html)
*
* @param {number} n=10
* Integer number of n rows to return from the DataFrame
* @returns {DataFrame}
*
* @example
* const df = new DataFrame([{x: 1, y: 2}, {x: 2, y: 3}, {x: 3, y: 4}, {x: 4, y: 5}]);
*
* // returns DataFrame([{x: 3, y: 4}, {x: 4, y: 5}])
* df.tail(2);
*/
}, {
key: 'tail',
value: function tail() {
var n = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : 10;
return this.iloc([this.length - n, this.length]);
}
}, {
key: '_assertColumnExists',
value: function _assertColumnExists(col) {
if (!this.columnExists(col)) throw new Error('Column ' + col + ' not in DataFrame');
}
}, {
key: 'columnExists',
value: function columnExists(col) {
return this.columns.indexOf(col) >= 0;
}
/**
* Return the `Series` at the column
*
* pandas equivalent: df['column_name']
*
* @param {string|Array.<string>|Immutable.List.<string>|Immutable.Seq.<string>} columns
* Name of the column to retrieve or list of columns to retrieve
*
* @returns {Series}
*
* @example
* const df = new DataFrame([{x: 1, y: 2}, {x: 2, y: 3}, {x: 3, y: 4}]);
*
* // Returns Series([1, 2, 3], {name: 'x', index: [0, 1, 2]})
* df.get('x');
*
* // Returns DataFrame([{y: 2}, {y: 3}, {y: 4}])
* df.get(['y']);
*/
}, {
key: 'get',
value: function get(columns) {
var _this6 = this;
if ((typeof columns === 'string' || typeof columns === 'number') && this.columnExists(columns)) return this._data.get(columns);else if (Array.isArray(columns) || columns instanceof _immutable2.default.List || columns instanceof _immutable2.default.Seq) {
columns.forEach(function (c) {
if (!_this6.columnExists(c)) throw new Error('KeyError: ' + c + ' not found');
});
return new DataFrame(_immutable2.default.Map(columns.map(function (c) {
return [c, _this6.get(c)];
})), this.kwargs);
}
throw new Error('KeyError: ' + columns + ' not found');
}
/**
* Return an object of same shape as self and whose corresponding entries are from self
* where cond is True and otherwise are from other.
*
* pandas equivalent [DataFrame.where](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.where.html)
*
* @param {Array|List|Series|DataFrame|number|string} other
* Iterable or value to compare to DataFrame
* @param {function} op
* Function which takes (a, b) values and returns a boolean
*
* @returns {DataFrame}
*
* @example
* const df = new DataFrame([{x: 1, y: 2}, {x: 2, y: 3}]);
*
* // Returns DataFrame(Map({x: Series([true, false]), y: Series([false, true])})
* df.where(new Series([1, 3]), (a, b) => a === b);
*
* // Returns DataFrame(Map({x: Series([true, false]), y: Series([false, true])})
* df.where(new DataFrame(Map({
* a: new Series([1, 1]),
* b: new Series([3, 3])})),
* (a, b) => a === b);
*/
}, {
key: 'where',
value: function where(other, op) {
if (!Array.isArray(other) && !(other instanceof _immutable2.default.List) && !(other instanceof _series2.default) && !(other instanceof DataFrame)) {
// noinspection Eslint
return new DataFrame(_immutable2.default.Map(this._data.mapEntries(function (_ref) {
var _ref2 = (0, _slicedToArray3.default)(_ref, 2),
k = _ref2[0],
v = _ref2[1];
return [k, v.where(other, op)];
})));
} else if (Array.isArray(other) || other instanceof _series2.default || other instanceof _immutable2.default.List) {
if ((Array.isArray(other) || other instanceof _series2.default) && other.length !== this.length) throw new Error('Array or Series must be same length as DataFrame');
if (other instanceof _immutable2.default.List && other.size !== this.length) throw new Error('Immutable List must be same size as DataFrame');
// noinspection Eslint
return new DataFrame(_immutable2.default.Map(this._data.mapEntries(function (_ref3) {
var _ref4 = (0, _slicedToArray3.default)(_ref3, 2),
k = _ref4[0],
v = _ref4[1];
return [k, v.where(other, op)];
})));
} else if (other instanceof DataFrame) {
if (!other.shape.equals(this.shape)) throw new Error('DataFrame must have the same shape');
// noinspection Eslint
return new DataFrame(_immutable2.default.Map(this._data.mapEntries(function (_ref5, idx) {
var _ref6 = (0, _slicedToArray3.default)(_ref5, 2),
k = _ref6[0],
v = _ref6[1];
// $FlowFixMe TODO
return [k, v.where(other.get(other.columns.get(idx)), op)];
})));
}
throw new Error('Unsupported comparison value, or non-matching lengths');
}
/**
* Equal to `DataFrame` and other, element wise
*
* pandas equivalent: df == val
*
* @param {Array|List|Series|DataFrame|number|string} other
* Other Iterable or scalar value to check for equal to
*
* @returns {DataFrame}
*
* @example
* const df = new DataFrame(Map({x: new Series([1, 2]), y: new Series([2, 3])}));
*
* // Returns DataFrame(Map({x: Series([true, false]), y: Series([false, true])})
* df.eq(new Series([1, 3]));
*
* // Returns DataFrame(Map({x: Series([true, false]), y: Series([false, false])})
* df.gt(new DataFrame(Map({
* a: new Series([1, 1]),
* b: new Series([1, 2])})));
*/
}, {
key: 'eq',
value: function eq(other) {
return this.where(other, function (a, b) {
return a === b;
});
}
/**
* Greater than of `DataFrame` and other, element wise
*
* pandas equivalent: df > val
*
* @param {Array|List|Series|DataFrame|number|string} other
* Other Iterable or scalar value to check for greater than
*
* @returns {DataFrame}
*
* @example
* const df = new DataFrame(Map({x: new Series([1, 2]), y: new Series([2, 3])}));
*
* // Returns DataFrame(Map({x: Series([false, false]), y: Series([true, false])})
* df.gt(new Series([1, 3]));
*
* // Returns DataFrame(Map({x: Series([false, true]), y: Series([true, true])})
* df.gt(new DataFrame(Map({
* a: new Series([1, 1]),
* b: new Series([1, 2])})));
*/
}, {
key: 'gt',
value: function gt(other) {
return this.where(other, function (a, b) {
return a > b;
});
}
/**
* Greater than or equal to of `DataFrame` and other, element wise
*
* pandas equivalent: df >= val
*
* @param {Array|List|Series|DataFrame|number|string} other
* Other Iterable or scalar value to check for greater than or equal to
*
* @returns {DataFrame}
*
* @example
* const df = new DataFrame(Map({x: new Series([1, 2]), y: new Series([2, 3])}));
*
* // Returns DataFrame(Map({x: Series([true, false]), y: Series([true, true])})
* df.gte(new Series([1, 3]));
*
* // Returns DataFrame(Map({x: Series([true, true]), y: Series([true, true])})
* df.gte(new DataFrame(Map({
* a: new Series([1, 1]),
* b: new Series([1, 2])})));
*/
}, {
key: 'gte',
value: function gte(other) {
return this.where(other, function (a, b) {
return a >= b;
});
}
/**
* Less than of `DataFrame` and other, element wise
*
* pandas equivalent: df < val
*
* @param {Array|List|Series|DataFrame|number|string} other
* Other Iterable or scalar value to check for less than
*
* @returns {DataFrame}
*
* @example
* const df = new DataFrame(Map({x: new Series([1, 2]), y: new Series([2, 3])}));
*
* // Returns DataFrame(Map({x: Series([false, true]), y: Series([false, false])})
* df.lt(new Series([1, 3]));
*
* // Returns DataFrame(Map({x: Series([false, false]), y: Series([false, false])})
* df.lt(new DataFrame(Map({
* a: new Series([1, 1]),
* b: new Series([1, 2])})));
*/
}, {
key: 'lt',
value: function lt(other) {
return this.where(other, function (a, b) {
return a < b;
});
}
/**
* Less than or equal to of `DataFrame` and other, element wise
*
* pandas equivalent: df <= val
*
* @param {Array|List|Series|DataFrame|number|string} other
* Other Iterable or scalar value to check for less than or equal to
*
* @returns {DataFrame}
*
* @example
* const df = new DataFrame(Map({x: new Series([1, 2]), y: new Series([2, 3])}));
*
* // Returns DataFrame(Map({x: Series([true, true]), y: Series([false, true])})
* df.lte(new Series([1, 3]));
*
* // Returns DataFrame(Map({x: Series([true, false]), y: Series([false, false])})
* df.lte(new DataFrame(Map({
* a: new Series([1, 1]),
* b: new Series([1, 2])})));
*/
}, {
key: 'lte',
value: function lte(other) {
return this.where(other, function (a, b) {
return a <= b;
});
}
/**
* Merge this `DataFrame` with another `DataFrame`, optionally on some set of columns
*
* pandas equivalent: `DataFrame.merge`
*
* @param {DataFrame} df
* `DataFrame` with which to merge this `DataFrame`
* @param {Array} on
* Array of columns on which to merge
* @param {string} how='inner'
* Merge method, either 'inner' or 'outer'
*
* @returns {DataFrame}
*
* @example
* const df = new DataFrame([{x: 1, y: 2}, {x: 2, y: 3}, {x: 3, y: 4}]);
* const df2 = new DataFrame([{x: 1, z: 3}, {x: 3, z: 5}, {x: 2, z: 10}]);
*
* // Returns
* // x | y | z
* // 0 1 | 2 | 3
* // 1 2 | 3 | 10
* // 2 3 | 4 | 5
* df.merge(df2, ['x'], 'inner');
*/
}, {
key: 'merge',
value: function merge(df, on) {
var how = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : 'inner';
// eslint-disable-next-line
return mergeDataFrame(this, df, on, how);
}
/**
* Convert the `DataFrame` to a csv string
*
* pandas equivalent: [DataFrame.to_csv](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_csv.html)
*
* @returns {string}
*
* @example
* const df = new DataFrame([{x: 1, y: 2}, {x: 2, y: 3}, {x: 3, y: 4}]);
*
* // Returns x,y,\r\n1,2,\r\n2,3\r\n3,4\r\n
* df.to_csv();
*/
}, {
key: 'to_csv',
value: function to_csv() {
var _this7 = this;
var csvString = '';
this.columns.forEach(function (k) {
csvString += k + ',';
});
csvString += '\r\n';
var updateString = function updateString(idx) {
var s = ''; // $FlowFixMe TODO
_this7.columns.forEach(function (k) {
s += _this7.get(k).iloc(idx) + ',';
});
return s;
};
for (var idx = 0; idx < this.length; idx += 1) {
csvString += updateString(idx);
csvString += '\r\n';
}
return csvString;
}
/**
* Write the `DataFrame` to a Workbook object
*
* @param {string|Workbook} excel_writer
* File path or existing Workbook object
* @param {string} sheetName
* Name of values which will contain DataFrame
* @param {boolean} download
* Download the excel file?
* @param {Object} kwargs
* @param {boolean} kwargs.index=true
*
* @return {Workbook}
*
*/ // eslint-disable-next-line
}, {
key: 'to_excel',
value: function to_excel(excel_writer) {
var sheetName = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 'Sheet1';
var download = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : false;
var kwargs = arguments.length > 3 && arguments[3] !== undefined ? arguments[3] : { index: true };
throw new Error('to_excel not yet implemented');
// let wb;
//
// const sheetObject = () => {
// if (kwargs.index) {
// const colRow = Immutable.List.of('').concat(this.columns.toList());
// return new Sheet(
// Immutable.List.of(colRow)
// .concat(this.values.map((v, idx) =>
// Immutable.List.of(this.index.get(idx)).concat(v))));
// }
//
// return new Sheet(Immutable.List.of(this.columns.toList()).concat(this.values));
// };
//
// if (excel_writer instanceof Workbook) {
// wb = excel_writer.copy();
// wb.addSheet(sheetName, sheetObject());
// } else if (typeof excel_writer === 'string') {
// wb = new Workbook();
// wb.addSheet(sheetName, sheetObject());
// } else throw new Error('excel_writer must be a file path or Workbook object');
//
// function s2ab(s) {
// const buf = new ArrayBuffer(s.length);
// const view = new Uint8Array(buf);
// for (let i = 0; i < s.length; i += 1) { // noinspection Eslint
// view[i] = s.charCodeAt(i) & 0xFF;
// }
// return buf;
// }
//
// if (download) {
// saveAs(new Blob([s2ab(wb.writeWorkbook())],
// {type: "application/octet-stream"}),
// typeof excel_writer === 'string' ? excel_writer : 'StratoDem Download.xlsx');
// }
//
// return wb;
}
/**
* Convert the DataFrame to a json object
*
* pandas equivalent: [DataFrame.to_json](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_json.html)
*
* @param kwargs
* @param {string} [kwargs.orient=columns] orientation of JSON
*
* @returns {*}
*
* @example
* const df = new DataFrame([{x: 1, y: 2}, {x: 2, y: 3}, {x: 3, y: 4}]);
*
* // Returns {x: {0: 1, 1: 2, 2: 3}, y: {0: 1, 1: 2, 2: 3}}
* df.to_json();
*
* // Returns [{x: 1, y: 2}, {x: 2, y: 3}, {x: 3, y: 4}]
* df.to_json({orient: 'records'});
*
* // Returns {0: {x: 1, y: 2}, 1: {x: 2, y: 3}, 2: {x: 3, y: 4}}
* df.to_json({orient: 'index'});
*
* // Returns {index: [0, 1, 2], columns: ['x', 'y'], values: [[1, 2], [2, 3], [3, 4]]}
* df.to_json({orient: 'split'});
*
* // Returns [[1, 2], [2, 3], [3, 4]]
* df.to_json({orient: 'values'});
*/
}, {
key: 'to_json',
value: function to_json() {
var _this8 = this;
var kwargs = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : { orient: 'columns' };
var ALLOWED_ORIENT = ['records', 'split', 'index', 'values', 'columns'];
var orient = 'columns';
if (typeof kwargs.orient !== 'undefined') {
if (ALLOWED_ORIENT.indexOf(kwargs.orient) < 0) throw new TypeError('orient must be in ' + ALLOWED_ORIENT.toString());
orient = kwargs.orient;
}
var json = void 0;
switch (orient) {
case 'records':
return this.values.map(function (row) {
var rowObj = {};
row.forEach(function (val, idx) {
rowObj[_this8.columns.get(idx)] = val;
});
return rowObj;
}).toArray();
case 'split':
return {
index: this.index.toArray(),
columns: this.columns.toArray(),
values: this.values.toJS()
};
case 'index':
json = {};
this.values.forEach(function (row, idx) {
var rowObj = {};
row.forEach(function (val, idx2) {
rowObj[_this8.columns.get(idx2)] = val;
});
json[_this8.index.get(idx)] = rowObj;
});
return json;
case 'values':
return this.values.toJS();
case 'columns':
json = {};
this.columns.forEach(function (c) {
json[c] = _this8.get(c).to_json({ orient: 'index' });
});
return json;
default:
throw new TypeError('orient must be in ' + ALLOWED_ORIENT.toString());
}
}
/**
* Return the sum of the values in the `DataFrame` along the axis
*
* pandas equivalent: [DataFrame.sum](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.sum.html)
*
* @param {number} axis=0
* Axis along which to sum values
*
* @returns {Series}
*
* @example
* const df = new DataFrame([{x: 1, y: 2}, {x: 2, y: 3}, {x: 3, y: 4}]);
*
* // Returns
* // x 6
* // y 9
* // Name: , dtype: dtype(int)
* df.sum().toString();
*
* @example
* const df = new DataFrame([{x: 1, y: 2}, {x: 2, y: 3}, {x: 3, y: 4}]);
*
* // Returns
* // 0 3
* // 1 5
* // 2 7
* // Name: , dtype: dtype('int')
* df.sum(1).toString();
*/
}, {
key: 'sum',
value: function sum() {
var _this9 = this;
var axis = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : 0;
if (axis === 0) {
return new _series2.default(this.columns.toArray().map(function (k) {
return _this9.get(k).sum();
}), { index: this.columns.toArray() });
} else if (axis === 1) {
return new _series2.default(_immutable2.default.Range(0, this.length).map(function (idx) {
return _this9.values.get(idx).reduce(function (s, k) {
return s + k;
}, 0);
}).toList(), { index: this.index });
}
throw new _exceptions.InvalidAxisError();
}
/**
* Return the mean of the values in the `DataFrame` along the axis
*
* pandas equivalent: [DataFrame.mean](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.mean.html)
*
* @param {number} axis=0
* Axis along which to average values
*
* @returns {Series}
*
* @example
* const df = new DataFrame([{x: 1, y: 2}, {x: 2, y: 3}, {x: 3, y: 4}]);
*
* // Returns
* // x 2
* // y 3
* // Name: , dtype: dtype('int')
* df.mean().toString();
*
* @example
* const df = new DataFrame([{x: 1, y: 2}, {x: 2, y: 3}, {x: 3, y: 4}]);
*
* // Returns
* // 0 1.5
* // 1 2.5
* // 2 3.5
* // Name: , dtype: dtype('float')
* df.mean(1).toString();
*/
}, {
key: 'mean',
value: function mean() {
var _this10 = this;
var axis = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : 0;
if (axis === 0) {
return new _series2.default(this.columns.toArray().map(function (k) {
return _this10.get(k).mean();
}), { index: this.columns.toArray() });
} else if (axis === 1) {
return new _series2.default(_immutable2.default.Range(0, this.length).map(function (idx) {
return _this10.values.get(idx).reduce(function (s, k) {
return s + k / _this10.columns.size;
}, 0);
}).toList(), { index: this.index });
}
throw new _exceptions.InvalidAxisError();
}
/**
* Return the standard deviation of the values in the `DataFrame` along the axis
*
* pandas equivalent: [DataFrame.std](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.std.html)
*
* @param {number} axis=0
* Axis along which to calculate the standard deviation
*
* @returns {Series}
*
* @example
* const df = new DataFrame([{x: 1, y: 2}, {x: 2, y: 3}, {x: 3, y: 4}]);
*
* // Returns
* // x 1
* // y 1
* // Name: , dtype: dtype('int')
* df.std().toString();
*
* @example
* const df = new DataFrame([{x: 1, y: 1}, {x: 2, y: 2}, {x: 3, y: 3}]);
*
* // Returns
* // 0 0
* // 1 0
* // 2 0
* // Name: , dtype: dtype('int')
* df.std(1).toString();
*/
}, {
key: 'std',
value: function std() {
var _this11 = this;
var axis = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : 0;
if (axis === 0) {
return new _series2.default(this.columns.toArray().map(function (k) {
return _this11.get(k).std();
}), { index: this.columns.toArray() });
} else if (axis === 1) {
return this.variance(axis).map(function (v) {
return Math.sqrt(v);
});
}
throw new _exceptions.InvalidAxisError();
}
/**
* Return the variance of the values in the `DataFrame` along the axis
*
* pandas equivalent: [DataFrame.var](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.var.html)
*
* @param {number} axis=0
* Axis along which to calculate the variance
*
* @returns {Series}
*
* @example
* const df = new DataFrame([{x: 1, y: 2}, {x: 2, y: 3}, {x: 3, y: 4}]);
*
* // Returns
* // x 1
* // y 1
* // Name: , dtype: dtype('int')
* df.std().toString();
*
* @example
* const df = new DataFrame([{x: 1, y: 1}, {x: 2, y: 2}, {x: 3, y: 3}]);
*
* // Returns
* // 0 0
* // 1 0
* // 2 0
* // Name: , dtype: dtype('int')
* df.std(1).toString();
*/
}, {
key: 'variance',
value: function variance() {
var _this12 = this;
var axis = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : 0;
if (axis === 0) {
return new _series2.default(this.columns.toArray().map(function (k) {
return _this12.get(k).variance();
}), { index: this.columns.toArray() });
} else if (axis === 1) {
var means = this.mean(axis).values;
return new _series2.default(_immutable2.default.Range(0, this.length).map(function (idx) {
return _this12.values.get(idx).reduce(function (s, k) {
var diff = k - means.get(idx);
return s + diff * diff / (_this12.columns.size - 1);
}, 0);
}).toArray(), { index: this.index });
}
throw new _exceptions.InvalidAxisError();
}
}, {
key: '_pairwiseDataFrame',
value: function _pairwiseDataFrame(func) {
// Apply the func between all Series in the DataFrame, takes two series and returns a value
var valArray = [];
// Calculate upper triangle
for (var idx1 = 0; idx1 < this.columns.size; idx1 += 1) {
valArray.push({});
var ds1 = this.get(this.columns.get(idx1));
for (var idx2 = idx1; idx2 < this.columns.size; idx2 += 1) {
var col2 = this.columns.get(idx2);
var ds2 = this.get(col2);
valArray[idx1][col2] = func(ds1, ds2);
}
}
// Take upper triangle and fill in lower triangle
for (var _idx = 0; _idx < this.columns.size; _idx += 1) {
var col1 = this.columns.get(_idx);
for (var _idx2 = _idx + 1; _idx2 < this.columns.size; _idx2 += 1) {
var _col = this.columns.get(_idx2);
valArray[_idx2][col1] = valArray[_idx][_col];
}
}
return new DataFrame(valArray, { index: this.columns.toList() });
}
/**
* Calculate the covariance between all `Series` in the `DataFrame`
*
* pandas equivalent: [DataFrame.cov](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.cov.html)
*
* @return {DataFrame}
*
* @example
* const df = new DataFrame([{x: 1, y: 2, z: 3}, {x: 2, y: 1, z: 5}, {x: 3, y: 0, z: 7}]);
*
* // Returns DataFrame([{x: 1, y: -1, z: 2}, {x: -1, y: 1, z: -2}, {x: 2, y: -2, z: 4}])
* df.cov();
*/
}, {
key: 'cov',
value: function cov() {
// $FlowFixMe TODO
return this._pairwiseDataFrame(function (ds1, ds2) {
return ds1.cov(ds2);
});
}
/**
* Calculate the correlation between all `Series` in the `DataFrame`
*
* pandas equivalent: [DataFrame.corr](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.corr.html)
*
* @return {DataFrame}
*
* @example
* const df = new DataFrame([{x: 1, y: 2, z: 3}, {x: 2, y: 1, z: 5}, {x: 3, y: 0, z: 7}]);
*
* // Returns DataFrame([{x: 1, y: -1, z: 1}, {x: -1, y: 1, z: -1}, {x: 1, y: -1, z: 1}])
* df.corr();
*/
}, {
key: 'corr',
value: function corr() {
// noinspection Eslint
var corrFunc = function corrFunc(ds1, ds2) {
// $FlowFixMe TODO
return ds1.values === ds2.values ? 1 : ds1.corr(ds2);
};
return this._pairwiseDataFrame(corrFunc);
}
/**
* Return the difference over a given number of periods along the axis
*
* pandas equivalent: [DataFrame.diff](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.diff.html)
*
* @param {number} periods=1
* Number of periods to use for difference calculation
* @param {number} axis=0
* Axis along which to calculate difference
*
* @returns {DataFrame}
*
* @example
* const df = new DataFrame([{x: 1, y: 2}, {x: 2, y: 3}, {x: 3, y: 4}]);
*
* // Returns
* // x | y
* // 0 null | null
* // 1 1 | 1
* // 2 1 | 1
* df.diff().toString();
*/
}, {
key: 'diff',
value: function diff() {
var _this13 = this;
var periods = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : 1;
var axis = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 0;
if (typeof periods !== 'number' || !Number.isInteger(periods)) throw new Error('periods must be an integer');
if (periods <= 0) throw new Error('periods must be positive');
if (axis === 0) {
return new DataFrame(_immutable2.default.Map(this.columns.map(function (k) {
return [k, _this13._data.get(k).diff(periods)];
})), { index: this.index });
} else if (axis === 1) {
return new DataFrame(_immutable2.default.Map(this.columns.map(function (k, idx) {
if (idx < periods) return [k, new _series2.default(_immutable2.default.Repeat(null, _this13.length).toList(), { name: k, index: _this13.index })];
var compareCol = _this13.get(_this13.columns.get(idx - periods)); // $FlowFixMe TODO
return [k, _this13.get(k).map(function (v, vIdx) {
return v - compareCol.iloc(vIdx);
})];
})), { index: this.index });
}
throw new _exceptions.InvalidAxisError();
}
/**
* Return the percentage change over a given number of periods along the axis
*
* pandas equivalent: [DataFrame.pct_change](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.pct_change.html)
*
* @param {number} periods=1
* Number of periods to use for percentage change calculation
* @param {number} axis=0
* Axis along which to calculate percentage change
*
* @returns {DataFrame}
*
* @example
* const df = new DataFrame([{x: 1, y: 2}, {x: 2, y: 3}, {x: 3, y: 4}]);
*
* // Returns
* // x | y
* // 0 null | null
* // 1 1 | 0.5
* // 2 0.5 | 0.3333
* df.pct_change().toString();
*/
}, {
key: 'pct_change',
value: function pct_change() {
var _this14 = this;
var periods = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : 1;
var axis = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 0;
if (typeof periods !== 'number' || !Number.isInteger(periods)) throw new Error('periods must be an integer');
if (periods <= 0) throw new Error('periods must be positive');
if (axis === 0) {
return new DataFrame(_immutable2.default.Map(this.columns.map(function (k) {
return [k, _this14._data.get(k).pct_change(periods)];
})), { index: this.index });
} else if (axis === 1) {
return new DataFrame(_immutable2.default.Map(this.columns.map(function (k, idx) {
if (idx < periods) return [k, new _series2.default(_immutable2.default.Repeat(null, _this14.length).toList(), { name: k, index: _this14.index })];
var compareCol = _this14.get(_this14.columns.get(idx - periods));
// $FlowFixMe TODO
return [k, _this14.get(k).map(function (v, vIdx) {
return v / compareCol.iloc(vIdx) - 1;
})];
})), { index: this.index });
}
throw new _exceptions.InvalidAxisError();
}
/**
* Filter the DataFrame by an Iterable (Series, Array, or List) of booleans and return the subset
*
* pandas equivalent: df[df condition]
*
* @param {Series|Array|List} iterBool
* Iterable of booleans
*
* @returns {DataFrame}
*
* @example
* const df = new DataFrame(Immutable.Map({x: new Series([1, 2]), y: new Series([2, 3])}));
*
* // Returns DataFrame(Immutable.Map({x: Series([2]), y: Series([3]));
* df.filter(df.get('x').gt(1));
*
* // Returns DataFrame(Immutable.Map({x: Series([2]), y: Series([3]));
* df.filter([false, true]);
*
* // Returns DataFrame(Immutable.Map({x: Series([2]), y: Series([3]));
* df.filter(Immutable.Map([false, true]));
*/
}, {
key: 'filter',
value: function filter(iterBool) {
if (!Array.isArray(iterBool) && !(iterBool instanceof _immutable2.default.List) && !(iterBool instanceof _series2.default)) throw new Error('filter must be an Array, List, or Series');
if (Array.isArray(iterBool) && iterBool.length !== this.length) throw new Error('Array must be of equal length to DataFrame');else if (iterBool instanceof _immutable2.default.List && iterBool.size !== this.length) throw new Error('List must be of equal length to DataFrame');else if (iterBool instanceof _series2.default && iterBool.length !== this.length) throw new Error('Series must be of equal length to DataFrame');
// noinspection Eslint
return new DataFrame(_immutable2.default.Map(this._data.mapEntries(function (_ref7) {
var _ref8 = (0, _slicedToArray3.default)(_ref7, 2),
k = _ref8[0],
v = _ref8[1];
return [k, v.filter(iterBool)];
})));
}
/**
* Reshape data (produce a 'pivot' table) based on column values. Uses unique values from
* index / columns to form axes of the resulting DataFrame.
*
* pandas equivalent: [DataFrame.pivot](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.pivot.html)
*
* @param {string|number} index
* Name of the column to use as index
* @param {string|number} columns
* Name of the column to use as column values
* @param {string|number} values
* Name of the column to use as the value
*
* @returns {DataFrame}
*/
}, {
key: 'pivot',
value: function pivot(index, columns, values) {
var _this15 = this;
var uniqueVals = _immutable2.default.Map({});
var uniqueCols = _immutable2.default.List([]);
this.index.forEach(function (v, idx) {
var idxVal = _this15.get(index).iloc(idx);
var colVal = _this15.get(columns).iloc(idx);
if (uniqueVals.hasIn([idxVal, colVal])) throw new Error('pivot index and column must be unique');
var val = _this15.get(values).iloc(idx);
uniqueVals = uniqueVals.setIn([idxVal, colVal], val);
if (!uniqueCols.has(colVal)) uniqueCols = uniqueCols.push(colVal);
});
var sortedIndex = uniqueVals.keySeq().sort().toArray();
var sortedColumns = uniqueCols.sort();
var data = _immutable2.default.OrderedMap(sortedColumns.map(function (col) {
return [col, new _series2.default(sortedIndex.map(function (idx) {
var val = uniqueVals.getIn([idx, col]);
return typeof val === 'undefined' ? null : val;
}), { name: col, index: sortedIndex })];
}));
return new DataFrame(data, { index: sortedIndex });
}
/**
* Reshape data (produce a 'pivot' table) based on a set of index, columns, or values
* columns from the original DataFrame
*
* @param {Array<string>|Immutable.List|string|number} index
* Name(s) of column(s) to use as the index for the pivoted DataFrame
* @param {Array<string>|Immutable.List|string|number} columns
* Name(s) of column(s) to use as the columns for the pivoted DataFrame
* @param {Array<string>|Immutable.List|string|number} values
* Name(s) of column(s) to use as the values for the pivoted DataFrame
* @param {string} aggfunc
* Name of aggregation function
*/
// eslint-disable-next-line
}, {
key: 'pivot_table',
value: function pivot_table(index, columns, values) {
var aggfunc = arguments.length > 3 && arguments[3] !== undefined ? arguments[3] : 'sum';
throw new Error('Not implemented');
// const validateCols = (cols: T_PVINDEX): Immutable.List => {
// if (Array.isArray(cols)) {
// cols.forEach(c => this._assertColumnExists(c));
// return Immutable.List(cols);
// } else if (cols instanceof Immutable.List) {
// cols.forEach(c => this._assertColumnExists(c));
// retu