data-forge
Version:
JavaScript data transformation and analysis toolkit inspired by Pandas and LINQ.
1,216 lines • 213 kB
JavaScript
"use strict";
var __extends = (this && this.__extends) || (function () {
var extendStatics = Object.setPrototypeOf ||
({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) ||
function (d, b) { for (var p in b) if (b.hasOwnProperty(p)) d[p] = b[p]; };
return function (d, b) {
extendStatics(d, b);
function __() { this.constructor = d; }
d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __());
};
})();
var __values = (this && this.__values) || function (o) {
var m = typeof Symbol === "function" && o[Symbol.iterator], i = 0;
if (m) return m.call(o);
return {
next: function () {
if (o && i >= o.length) o = void 0;
return { value: o && o[i++], done: !o };
}
};
};
var __read = (this && this.__read) || function (o, n) {
var m = typeof Symbol === "function" && o[Symbol.iterator];
if (!m) return o;
var i = m.call(o), r, ar = [], e;
try {
while ((n === void 0 || n-- > 0) && !(r = i.next()).done) ar.push(r.value);
}
catch (error) { e = { error: error }; }
finally {
try {
if (r && !r.done && (m = i["return"])) m.call(i);
}
finally { if (e) throw e.error; }
}
return ar;
};
var __spread = (this && this.__spread) || function () {
for (var ar = [], i = 0; i < arguments.length; i++) ar = ar.concat(__read(arguments[i]));
return ar;
};
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
var empty_iterable_1 = require("./iterables/empty-iterable");
var count_iterable_1 = require("./iterables/count-iterable");
var multi_iterable_1 = require("./iterables/multi-iterable");
var select_iterable_1 = require("./iterables/select-iterable");
var select_many_iterable_1 = require("./iterables/select-many-iterable");
var take_iterable_1 = require("./iterables/take-iterable");
var take_while_iterable_1 = require("./iterables/take-while-iterable");
var where_iterable_1 = require("./iterables/where-iterable");
var concat_iterable_1 = require("./iterables/concat-iterable");
var dataframe_window_iterable_1 = require("./iterables/dataframe-window-iterable");
var reverse_iterable_1 = require("./iterables/reverse-iterable");
var zip_iterable_1 = require("./iterables/zip-iterable");
var csv_rows_iterable_1 = require("./iterables/csv-rows-iterable");
var distinct_iterable_1 = require("./iterables/distinct-iterable");
var dataframe_rolling_window_iterable_1 = require("./iterables/dataframe-rolling-window-iterable");
var dataframe_variable_window_iterable_1 = require("./iterables/dataframe-variable-window-iterable");
var ordered_iterable_1 = require("./iterables/ordered-iterable");
var index_1 = require("./index");
var extract_element_iterable_1 = require("./iterables/extract-element-iterable");
var skip_iterable_1 = require("./iterables/skip-iterable");
var skip_while_iterable_1 = require("./iterables/skip-while-iterable");
var repeat_iterable_1 = require("./iterables/repeat-iterable");
var tile_iterable_1 = require("./iterables/tile-iterable");
var ravel_iterable_1 = require("./iterables/ravel-iterable");
// @ts-ignore
var easy_table_1 = __importDefault(require("easy-table"));
// @ts-ignore
var dayjs_1 = __importDefault(require("dayjs"));
var series_1 = require("./series");
var column_names_iterable_1 = require("./iterables/column-names-iterable");
var utils_1 = require("./utils");
var json5_1 = __importDefault(require("json5"));
// @ts-ignore
var papaparse_1 = __importDefault(require("papaparse"));
/**
* Class that represents a dataframe.
* A dataframe contains an indexed sequence of data records.
* Think of it as a spreadsheet or CSV file in memory.
*
* Each data record contains multiple named fields, the value of each field represents one row in a column of data.
* Each column of data is a named {@link Series}.
* You think of a dataframe a collection of named data series.
*
* @typeparam IndexT The type to use for the index.
* @typeparam ValueT The type to use for each row/data record.
*/
var DataFrame = /** @class */ (function () {
/**
* Create a dataframe.
*
* @param config This can be an array, a configuration object or a function that lazily produces a configuration object.
*
* It can be an array that specifies the data records that the dataframe contains.
*
* It can be a {@link IDataFrameConfig} that defines the data and configuration of the dataframe.
*
* Or it can be a function that lazily produces a {@link IDataFrameConfig}.
*
* @example
* <pre>
*
* const df = new DataFrame();
* </pre>
*
* @example
* <pre>
*
* const df = new DataFrame([ { A: 10 }, { A: 20 }, { A: 30 }, { A: 40 }]);
* </pre>
*
* @example
* <pre>
*
* const df = new DataFrame({ index: [1, 2, 3, 4], values: [ { A: 10 }, { A: 20 }, { A: 30 }, { A: 40 }] });
* </pre>
*
* @example
* <pre>
*
* const lazyInit = () => ({ index: [1, 2, 3, 4], values: [ { A: 10 }, { A: 20 }, { A: 30 }, { A: 40 }] });
* const df = new DataFrame(lazyInit);
* </pre>
*/
function DataFrame(config) {
//
// Function to lazy evaluate the configuration of the dataframe.
//
this.configFn = null;
//
// The content of the dataframe.
// When this is null it means the dataframe is yet to be lazy initialised.
//
this.content = null;
//
// Indexed content of the dataframe.
//
this.indexedContent = null;
if (config) {
if (utils_1.isFunction(config)) {
this.configFn = config;
}
else if (utils_1.isArray(config) ||
utils_1.isFunction(config[Symbol.iterator])) {
this.content = DataFrame.initFromArray(config);
}
else {
this.content = DataFrame.initFromConfig(config);
}
}
else {
this.content = DataFrame.initEmpty();
}
}
//
// Initialise dataframe content from an iterable of values.
//
DataFrame.initFromArray = function (arr) {
var firstResult = arr[Symbol.iterator]().next();
var columnNames = !firstResult.done ? Object.keys(firstResult.value) : [];
return {
index: DataFrame.defaultCountIterable,
values: arr,
pairs: new multi_iterable_1.MultiIterable([DataFrame.defaultCountIterable, arr]),
isBaked: true,
columnNames: columnNames,
};
};
//
// Initialise an empty dataframe.
//
DataFrame.initEmpty = function () {
return {
index: DataFrame.defaultEmptyIterable,
values: DataFrame.defaultEmptyIterable,
pairs: DataFrame.defaultEmptyIterable,
isBaked: true,
columnNames: [],
};
};
//
// Initialise dataframe column names.
//
DataFrame.initColumnNames = function (inputColumnNames, isCaseSensitive) {
var outputColumnNames = [];
var columnNamesMap = {};
// Convert column names to strings.
var columnNames = Array.from(inputColumnNames).map(function (columnName) { return columnName.toString(); });
try {
// Search for duplicate column names.
for (var columnNames_1 = __values(columnNames), columnNames_1_1 = columnNames_1.next(); !columnNames_1_1.done; columnNames_1_1 = columnNames_1.next()) {
var columnName = columnNames_1_1.value;
var columnNameLwr = isCaseSensitive !== undefined && isCaseSensitive ? columnName : columnName.toLowerCase();
if (columnNamesMap[columnNameLwr] === undefined) {
columnNamesMap[columnNameLwr] = 1;
}
else {
columnNamesMap[columnNameLwr] += 1;
}
}
}
catch (e_1_1) { e_1 = { error: e_1_1 }; }
finally {
try {
if (columnNames_1_1 && !columnNames_1_1.done && (_a = columnNames_1.return)) _a.call(columnNames_1);
}
finally { if (e_1) throw e_1.error; }
}
var columnNoMap = {};
try {
for (var columnNames_2 = __values(columnNames), columnNames_2_1 = columnNames_2.next(); !columnNames_2_1.done; columnNames_2_1 = columnNames_2.next()) {
var columnName = columnNames_2_1.value;
var columnNameLwr = isCaseSensitive !== undefined && isCaseSensitive ? columnName : columnName.toLowerCase();
if (columnNamesMap[columnNameLwr] > 1) {
var curColumnNo = 1;
// There are duplicates of this column.
if (columnNoMap[columnNameLwr] !== undefined) {
curColumnNo = columnNoMap[columnNameLwr];
}
outputColumnNames.push(columnName + "." + curColumnNo);
columnNoMap[columnNameLwr] = curColumnNo + 1;
}
else {
// No duplicates.
outputColumnNames.push(columnName);
}
}
}
catch (e_2_1) { e_2 = { error: e_2_1 }; }
finally {
try {
if (columnNames_2_1 && !columnNames_2_1.done && (_b = columnNames_2.return)) _b.call(columnNames_2);
}
finally { if (e_2) throw e_2.error; }
}
return outputColumnNames;
var e_1, _a, e_2, _b;
};
//
// Check that a value is an interable.
//
DataFrame.checkIterable = function (input, fieldName) {
if (utils_1.isArray(input)) {
// Ok
}
else if (utils_1.isFunction(input[Symbol.iterator])) {
// Assume it's an iterable.
// Ok
}
else {
// Not ok
throw new Error("Expected '" + fieldName + "' field of DataFrame config object to be an array of values or an iterable of values.");
}
};
//
// Initialise dataframe content from a config object.
//
DataFrame.initFromConfig = function (config) {
var index;
var values;
var pairs;
var isBaked = false;
var columnNames;
if (config.pairs) {
DataFrame.checkIterable(config.pairs, "pairs");
pairs = config.pairs;
}
if (config.columns) {
var columnsConfig = config.columns;
if (utils_1.isArray(columnsConfig) ||
utils_1.isFunction(columnsConfig[Symbol.iterator])) {
var iterableColumnsConfig = columnsConfig;
columnNames = Array.from(iterableColumnsConfig).map(function (column) { return column.name; });
columnsConfig = utils_1.toMap(iterableColumnsConfig, function (column) { return column.name; }, function (column) { return column.series; });
}
else {
if (!utils_1.isObject(columnsConfig))
throw new Error("Expected 'columns' member of 'config' parameter to DataFrame constructor to be an object with fields that define columns.");
columnNames = Object.keys(columnsConfig);
}
var columnIterables = [];
try {
for (var columnNames_3 = __values(columnNames), columnNames_3_1 = columnNames_3.next(); !columnNames_3_1.done; columnNames_3_1 = columnNames_3.next()) {
var columnName = columnNames_3_1.value;
DataFrame.checkIterable(columnsConfig[columnName], columnName);
columnIterables.push(columnsConfig[columnName]);
}
}
catch (e_3_1) { e_3 = { error: e_3_1 }; }
finally {
try {
if (columnNames_3_1 && !columnNames_3_1.done && (_a = columnNames_3.return)) _a.call(columnNames_3);
}
finally { if (e_3) throw e_3.error; }
}
values = new csv_rows_iterable_1.CsvRowsIterable(columnNames, new multi_iterable_1.MultiIterable(columnIterables));
}
else {
if (config.columnNames) {
columnNames = this.initColumnNames(config.columnNames, config.caseSensitive);
}
if (config.rows) {
if (!config.columnNames) {
columnNames = new select_iterable_1.SelectIterable(new count_iterable_1.CountIterable(), function (c) { return "Column." + c.toString(); });
}
DataFrame.checkIterable(config.rows, 'rows');
values = new csv_rows_iterable_1.CsvRowsIterable(columnNames, config.rows); // Convert data from rows to columns.
}
else if (config.values) {
DataFrame.checkIterable(config.values, 'values');
values = config.values;
if (!config.columnNames) {
columnNames = new column_names_iterable_1.ColumnNamesIterable(values, config.considerAllRows || false);
}
}
else if (pairs) {
values = new extract_element_iterable_1.ExtractElementIterable(pairs, 1);
if (!config.columnNames) {
columnNames = new column_names_iterable_1.ColumnNamesIterable(values, config.considerAllRows || false);
}
}
else {
values = DataFrame.defaultEmptyIterable;
if (!config.columnNames) {
columnNames = DataFrame.defaultEmptyIterable;
}
}
}
if (config.index) {
DataFrame.checkIterable(config.index, 'index');
index = config.index;
}
else if (pairs) {
index = new extract_element_iterable_1.ExtractElementIterable(pairs, 0);
}
else {
index = DataFrame.defaultCountIterable;
}
if (!pairs) {
pairs = new multi_iterable_1.MultiIterable([index, values]);
}
if (config.baked !== undefined) {
isBaked = config.baked;
}
return {
index: index,
values: values,
pairs: pairs,
isBaked: isBaked,
columnNames: columnNames,
};
var e_3, _a;
};
//
// Ensure the dataframe content has been initialised.
//
DataFrame.prototype.lazyInit = function () {
if (this.content === null && this.configFn !== null) {
this.content = DataFrame.initFromConfig(this.configFn());
}
};
//
// Ensure the dataframe content is lazy initialised and return it.
//
DataFrame.prototype.getContent = function () {
this.lazyInit();
return this.content;
};
//
// Lazy builds content index, does basic hash lookup.
//
DataFrame.prototype.getRowByIndex = function (index) {
if (!this.indexedContent) {
this.indexedContent = new Map();
try {
for (var _a = __values(this.getContent().pairs), _b = _a.next(); !_b.done; _b = _a.next()) {
var pair = _b.value;
this.indexedContent.set(pair[0], pair[1]);
}
}
catch (e_4_1) { e_4 = { error: e_4_1 }; }
finally {
try {
if (_b && !_b.done && (_c = _a.return)) _c.call(_a);
}
finally { if (e_4) throw e_4.error; }
}
}
return this.indexedContent.get(index);
var e_4, _c;
};
/**
* Get an iterator to enumerate the rows of the dataframe.
* Enumerating the iterator forces lazy evaluation to complete.
* This function is automatically called by `for...of`.
*
* @return An iterator for the dataframe.
*
* @example
* <pre>
*
* for (const row of df) {
* // ... do something with the row ...
* }
* </pre>
*/
DataFrame.prototype[Symbol.iterator] = function () {
return this.getContent().values[Symbol.iterator]();
};
/**
* Get the names of the columns in the dataframe.
*
* @return Returns an array of the column names in the dataframe.
*
* @example
* <pre>
*
* console.log(df.getColumnNames());
* </pre>
*/
DataFrame.prototype.getColumnNames = function () {
return Array.from(this.getContent().columnNames);
};
/**
* Retreive the collection of all columns in the dataframe.
*
* @return Returns a {@link Series} containing the names of the columns in the dataframe.
*
* @example
* <pre>
*
* for (const column in df.getColumns()) {
* console.log("Column name: ");
* console.log(column.name);
*
* console.log("Data:");
* console.log(column.series.toArray());
* }
* </pre>
*/
DataFrame.prototype.getColumns = function () {
var _this = this;
return new series_1.Series(function () {
var columnNames = _this.getColumnNames();
return {
values: columnNames.map(function (columnName) {
var series = _this.getSeries(columnName).skipWhile(function (value) { return value === undefined || value === null; });
var firstValue = series.any() ? series.first() : undefined;
return {
name: columnName,
type: utils_1.determineType(firstValue),
series: series,
};
}),
};
});
};
/**
* Cast the value of the dataframe to a new type.
* This operation has no effect but to retype the value that the dataframe contains.
*
* @return The same dataframe, but with the type changed.
*
* @example
* <pre>
*
* const castDf = df.cast<SomeOtherType>();
* </pre>
*/
DataFrame.prototype.cast = function () {
return this;
};
/**
* Get the index for the dataframe.
*
* @return The {@link Index} for the dataframe.
*
* @example
* <pre>
*
* const index = df.getIndex();
* </pre>
*/
DataFrame.prototype.getIndex = function () {
var _this = this;
return new index_1.Index(function () { return ({ values: _this.getContent().index }); });
};
/**
* Set a named column as the {@link Index} of the dataframe.
*
* @param columnName Name of the column to use as the new {@link Index} of the returned dataframe.
*
* @return Returns a new dataframe with the values of the specified column as the new {@link Index}.
*
* @example
* <pre>
*
* const indexedDf = df.setIndex("SomeColumn");
* </pre>
*/
DataFrame.prototype.setIndex = function (columnName) {
if (!utils_1.isString(columnName))
throw new Error("Expected 'columnName' parameter to 'DataFrame.setIndex' to be a string that specifies the name of the column to set as the index for the dataframe.");
return this.withIndex(this.getSeries(columnName));
};
/**
* Apply a new {@link Index} to the dataframe.
*
* @param newIndex The new array or iterable to be the new {@link Index} of the dataframe. Can also be a selector to choose the {@link Index} for each row in the dataframe.
*
* @return Returns a new dataframe or dataframe with the specified {@link Index} attached.
*
* @example
* <pre>
*
* const indexedDf = df.withIndex([10, 20, 30]);
* </pre>
*
* @example
* <pre>
*
* const indexedDf = df.withIndex(df.getSeries("SomeColumn"));
* </pre>
*
* @example
* <pre>
*
* const indexedDf = df.withIndex(row => row.SomeColumn);
* </pre>
*
* @example
* <pre>
*
* const indexedDf = df.withIndex(row => row.SomeColumn + 20);
* </pre>
*/
DataFrame.prototype.withIndex = function (newIndex) {
var _this = this;
if (utils_1.isFunction(newIndex)) {
return new DataFrame(function () {
var content = _this.getContent();
return {
columnNames: content.columnNames,
values: content.values,
index: _this.deflate(newIndex),
};
});
}
else {
DataFrame.checkIterable(newIndex, 'newIndex');
return new DataFrame(function () {
var content = _this.getContent();
return {
columnNames: content.columnNames,
values: content.values,
index: newIndex,
};
});
}
};
/**
* Resets the {@link Index} of the dataframe back to the default zero-based sequential integer index.
*
* @return Returns a new dataframe with the {@link Index} reset to the default zero-based index.
*
* @example
* <pre>
*
* const dfWithResetIndex = df.resetIndex();
* </pre>
*/
DataFrame.prototype.resetIndex = function () {
var _this = this;
return new DataFrame(function () {
var content = _this.getContent();
return {
columnNames: content.columnNames,
values: content.values,
};
});
};
/**
* Extract a {@link Series} from a named column in the dataframe.
*
* @param columnName Specifies the name of the column that contains the {@link Series} to retreive.
*
* @return Returns the {@link Series} extracted from the named column in the dataframe.
*
* @example
* <pre>
*
* const series = df.getSeries("SomeColumn");
* </pre>
*/
DataFrame.prototype.getSeries = function (columnName) {
var _this = this;
if (!utils_1.isString(columnName))
throw new Error("Expected 'columnName' parameter to 'DataFrame.getSeries' function to be a string that specifies the name of the column to retreive.");
return new series_1.Series(function () { return ({
values: new select_iterable_1.SelectIterable(_this.getContent().values, function (row) { return row[columnName]; }),
index: _this.getContent().index,
}); });
};
/**
* Determine if the dataframe contains a {@link Series} the specified named column.
*
* @param columnName Name of the column to check for.
*
* @return Returns true if the dataframe contains the requested {@link Series}, otherwise returns false.
*
* @example
* <pre>
*
* if (df.hasSeries("SomeColumn")) {
* // ... the dataframe contains a series with the specified column name ...
* }
* </pre>
*/
DataFrame.prototype.hasSeries = function (columnName) {
var columnNameLwr = columnName.toLowerCase();
try {
for (var _a = __values(this.getColumnNames()), _b = _a.next(); !_b.done; _b = _a.next()) {
var existingColumnName = _b.value;
if (existingColumnName.toLowerCase() === columnNameLwr) {
return true;
}
}
}
catch (e_5_1) { e_5 = { error: e_5_1 }; }
finally {
try {
if (_b && !_b.done && (_c = _a.return)) _c.call(_a);
}
finally { if (e_5) throw e_5.error; }
}
return false;
var e_5, _c;
};
/**
* Verify the existence of a name column and extracts the {@link Series} for it.
* Throws an exception if the requested column doesn't exist.
*
* @param columnName Name of the column to extract.
*
* @return Returns the {@link Series} for the column if it exists, otherwise it throws an exception.
*
* @example
* <pre>
*
* try {
* const series = df.expectSeries("SomeColumn");
* // ... do something with the series ...
* }
* catch (err) {
* // ... the dataframe doesn't contain the column "SomeColumn" ...
* }
* </pre>
*/
DataFrame.prototype.expectSeries = function (columnName) {
if (!this.hasSeries(columnName)) {
throw new Error("Expected dataframe to contain series with column name: '" + columnName + "'.");
}
return this.getSeries(columnName);
};
/**
* Create a new dataframe with a replaced or additional column specified by the passed-in series.
*
* @param columnNameOrSpec The name of the column to add or replace or a {@link IColumnGenSpec} that defines the columns to add.
* @param series When columnNameOrSpec is a string that identifies the column to add, this specifies the {@link Series} to add to the dataframe or a function that produces a series (given a dataframe).
*
* @return Returns a new dataframe replacing or adding a particular named column.
*
* @example
* <pre>
*
* const modifiedDf = df.withSeries("ANewColumn", new Series([1, 2, 3]));
* </pre>
*
* @example
* <pre>
*
* const modifiedDf = df.withSeries("ANewColumn", df =>
* df.getSeries("SourceData").select(aTransformation)
* );
* </pre>
*
* @example
* <pre>
*
* const modifiedDf = df.withSeries({
* ANewColumn: new Series([1, 2, 3]),
* SomeOtherColumn: new Series([10, 20, 30])
* });
* <pre>
*
* @example
* <pre>
*
* const modifiedDf = df.withSeries({
* ANewColumn: df => df.getSeries("SourceData").select(aTransformation))
* });
* <pre>
*/
DataFrame.prototype.withSeries = function (columnNameOrSpec, series) {
var _this = this;
if (!utils_1.isObject(columnNameOrSpec)) {
if (!utils_1.isString(columnNameOrSpec))
throw new Error("Expected 'columnNameOrSpec' parameter to 'DataFrame.withSeries' function to be a string that specifies the column to set or replace.");
if (!utils_1.isFunction(series)) {
if (!utils_1.isObject(series))
throw new Error("Expected 'series' parameter to 'DataFrame.withSeries' to be a Series object or a function that takes a dataframe and produces a Series.");
}
}
else {
if (!utils_1.isUndefined(series))
throw new Error("Expected 'series' parameter to 'DataFrame.withSeries' to not be set when 'columnNameOrSpec is an object.");
}
if (utils_1.isObject(columnNameOrSpec)) {
var columnSpec = columnNameOrSpec;
var columnNames = Object.keys(columnSpec);
var workingDataFrame = this;
try {
for (var columnNames_4 = __values(columnNames), columnNames_4_1 = columnNames_4.next(); !columnNames_4_1.done; columnNames_4_1 = columnNames_4.next()) {
var columnName_1 = columnNames_4_1.value;
workingDataFrame = workingDataFrame.withSeries(columnName_1, columnSpec[columnName_1]);
}
}
catch (e_6_1) { e_6 = { error: e_6_1 }; }
finally {
try {
if (columnNames_4_1 && !columnNames_4_1.done && (_a = columnNames_4.return)) _a.call(columnNames_4);
}
finally { if (e_6) throw e_6.error; }
}
return workingDataFrame.cast();
}
var columnName = columnNameOrSpec;
if (this.none()) { // We have an empty data frame.
var importSeries = void 0;
if (utils_1.isFunction(series)) {
importSeries = series(this);
}
else {
importSeries = series;
}
return importSeries.inflate(function (value) {
var row = {};
row[columnName] = value;
return row;
})
.cast();
}
return new DataFrame(function () {
var importSeries;
if (utils_1.isFunction(series)) {
importSeries = series(_this);
}
else {
importSeries = series;
}
var seriesValueMap = utils_1.toMap2(importSeries.toPairs(), function (pair) { return pair[0]; }, function (pair) { return pair[1]; });
var newColumnNames = utils_1.makeDistinct(_this.getColumnNames().concat([columnName]));
return {
columnNames: newColumnNames,
index: _this.getContent().index,
pairs: new select_iterable_1.SelectIterable(_this.getContent().pairs, function (pair) {
var index = pair[0];
var value = pair[1];
var modified = Object.assign({}, value);
modified[columnName] = seriesValueMap.get(index);
return [
index,
modified
];
}),
};
});
var e_6, _a;
};
/**
* Merge multiple dataframes into a single dataframe.
* Rows are merged by indexed.
* Same named columns in subsequent dataframes override columns earlier dataframes.
*
* @param dataFrames An array or series of dataframes to merge.
*
* @returns The merged data frame.
*
* @example
* <pre>
*
* const mergedDF = DataFrame.merge([df1, df2, etc]);
* </pre>
*/
DataFrame.merge = function (dataFrames) {
var rowMap = new Map();
try {
for (var dataFrames_1 = __values(dataFrames), dataFrames_1_1 = dataFrames_1.next(); !dataFrames_1_1.done; dataFrames_1_1 = dataFrames_1.next()) {
var dataFrame = dataFrames_1_1.value;
try {
for (var _a = __values(dataFrame.toPairs()), _b = _a.next(); !_b.done; _b = _a.next()) {
var pair = _b.value;
var index = pair[0];
if (!rowMap.has(index)) {
var clone = Object.assign({}, pair[1]);
rowMap.set(index, clone);
}
else {
rowMap.set(index, Object.assign(rowMap.get(index), pair[1]));
}
}
}
catch (e_7_1) { e_7 = { error: e_7_1 }; }
finally {
try {
if (_b && !_b.done && (_c = _a.return)) _c.call(_a);
}
finally { if (e_7) throw e_7.error; }
}
}
}
catch (e_8_1) { e_8 = { error: e_8_1 }; }
finally {
try {
if (dataFrames_1_1 && !dataFrames_1_1.done && (_d = dataFrames_1.return)) _d.call(dataFrames_1);
}
finally { if (e_8) throw e_8.error; }
}
var allColumnNames = Array.from(dataFrames)
.map(function (dataFrame) { return dataFrame.getColumnNames(); })
.reduce(function (prev, next) { return prev.concat(next); }, []);
var newColumnNames = utils_1.makeDistinct(allColumnNames);
var mergedPairs = Array.from(rowMap.keys()).map(function (index) { return [index, rowMap.get(index)]; });
mergedPairs.sort(function (a, b) {
if (a[0] === b[0]) {
return 0;
}
else if (a[0] > b[0]) {
return 1;
}
else {
return -1;
}
});
return new DataFrame({
columnNames: newColumnNames,
pairs: mergedPairs,
});
var e_8, _d, e_7, _c;
};
/**
* Merge one or more dataframes into this dataframe.
* Rows are merged by indexed.
* Same named columns in subsequent dataframes override columns in earlier dataframes.
*
* @param otherDataFrames... One or more dataframes to merge into this dataframe.
*
* @returns The merged data frame.
*
* @example
* <pre>
*
* const mergedDF = df1.merge(df2);
* </pre>
*
* <pre>
*
* const mergedDF = df1.merge(df2, df3, etc);
* </pre>
*/
DataFrame.prototype.merge = function () {
var otherDataFrames = [];
for (var _i = 0; _i < arguments.length; _i++) {
otherDataFrames[_i] = arguments[_i];
}
return DataFrame.merge([this].concat(otherDataFrames));
};
/**
* Add a series to the dataframe, but only if it doesn't already exist.
*
* @param columnNameOrSpec The name of the series to add or a {@link IColumnGenSpec} that specifies the columns to add.
* @param series If columnNameOrSpec is a string that specifies the name of the series to add, this specifies the actual {@link Series} to add or a selector that generates the series given the dataframe.
*
* @return Returns a new dataframe with the specified series added, if the series didn't already exist. Otherwise if the requested series already exists the same dataframe is returned.
*
* @example
* <pre>
*
* const updatedDf = df.ensureSeries("ANewColumn", new Series([1, 2, 3]));
* </pre>
*
* @example
* <pre>
*
* const updatedDf = df.ensureSeries("ANewColumn", df =>
* df.getSeries("AnExistingSeries").select(aTransformation)
* );
* </pre>
*
* @example
* <pre>
*
* const modifiedDf = df.ensureSeries({
* ANewColumn: new Series([1, 2, 3]),
* SomeOtherColumn: new Series([10, 20, 30])
* });
* <pre>
*
* @example
* <pre>
*
* const modifiedDf = df.ensureSeries({
* ANewColumn: df => df.getSeries("SourceData").select(aTransformation))
* });
* <pre>
*/
DataFrame.prototype.ensureSeries = function (columnNameOrSpec, series) {
if (!utils_1.isObject(columnNameOrSpec)) {
if (!utils_1.isString(columnNameOrSpec))
throw new Error("Expected 'columnNameOrSpec' parameter to 'DataFrame.ensureSeries' function to be a string that specifies the column to set or replace.");
if (!utils_1.isFunction(series)) {
if (!utils_1.isObject(series))
throw new Error("Expected 'series' parameter to 'DataFrame.ensureSeries' to be a Series object or a function that takes a dataframe and produces a Series.");
}
}
else {
if (!utils_1.isUndefined(series))
throw new Error("Expected 'series' parameter to 'DataFrame.ensureSeries' to not be set when 'columnNameOrSpec is an object.");
}
if (utils_1.isObject(columnNameOrSpec)) {
var columnSpec = columnNameOrSpec;
var columnNames = Object.keys(columnNameOrSpec);
var workingDataFrame = this;
try {
for (var columnNames_5 = __values(columnNames), columnNames_5_1 = columnNames_5.next(); !columnNames_5_1.done; columnNames_5_1 = columnNames_5.next()) {
var columnName_2 = columnNames_5_1.value;
workingDataFrame = workingDataFrame.ensureSeries(columnName_2, columnSpec[columnName_2]);
}
}
catch (e_9_1) { e_9 = { error: e_9_1 }; }
finally {
try {
if (columnNames_5_1 && !columnNames_5_1.done && (_a = columnNames_5.return)) _a.call(columnNames_5);
}
finally { if (e_9) throw e_9.error; }
}
return workingDataFrame;
}
var columnName = columnNameOrSpec;
if (this.hasSeries(columnName)) {
return this; // Already have the series.
}
else {
return this.withSeries(columnName, series);
}
var e_9, _a;
};
/**
* Create a new dataframe with just a subset of columns.
*
* @param columnNames Array of column names to include in the new dataframe.
*
* @return Returns a dataframe with a subset of columns from the original dataframe.
*
* @example
* <pre>
* const subsetDf = df.subset(["ColumnA", "ColumnB"]);
* </pre>
*/
DataFrame.prototype.subset = function (columnNames) {
var _this = this;
if (!utils_1.isArray(columnNames))
throw new Error("Expected 'columnNames' parameter to 'DataFrame.subset' to be an array of column names to keep.");
return new DataFrame(function () {
var content = _this.getContent();
return {
columnNames: columnNames,
index: content.index,
values: new select_iterable_1.SelectIterable(content.values, function (value) {
var output = {};
try {
for (var columnNames_6 = __values(columnNames), columnNames_6_1 = columnNames_6.next(); !columnNames_6_1.done; columnNames_6_1 = columnNames_6.next()) {
var columnName = columnNames_6_1.value;
output[columnName] = value[columnName];
}
}
catch (e_10_1) { e_10 = { error: e_10_1 }; }
finally {
try {
if (columnNames_6_1 && !columnNames_6_1.done && (_a = columnNames_6.return)) _a.call(columnNames_6);
}
finally { if (e_10) throw e_10.error; }
}
return output;
var e_10, _a;
}),
pairs: new select_iterable_1.SelectIterable(content.pairs, function (pair) {
var output = {};
var value = pair[1];
try {
for (var columnNames_7 = __values(columnNames), columnNames_7_1 = columnNames_7.next(); !columnNames_7_1.done; columnNames_7_1 = columnNames_7.next()) {
var columnName = columnNames_7_1.value;
output[columnName] = value[columnName];
}
}
catch (e_11_1) { e_11 = { error: e_11_1 }; }
finally {
try {
if (columnNames_7_1 && !columnNames_7_1.done && (_a = columnNames_7.return)) _a.call(columnNames_7);
}
finally { if (e_11) throw e_11.error; }
}
return [pair[0], output];
var e_11, _a;
}),
};
});
};
/**
* Create a new dataframe with the requested column or columns dropped.
*
* @param columnOrColumns Specifies the column name (a string) or columns (array of strings) to drop.
*
* @return Returns a new dataframe with a particular named column or columns removed.
*
* @example
* <pre>
* const modifiedDf = df.dropSeries("SomeColumn");
* </pre>
*
* @example
* <pre>
* const modifiedDf = df.dropSeries(["ColumnA", "ColumnB"]);
* </pre>
*/
DataFrame.prototype.dropSeries = function (columnOrColumns) {
var _this = this;
if (!utils_1.isArray(columnOrColumns)) {
if (!utils_1.isString(columnOrColumns))
throw new Error("'DataFrame.dropSeries' expected either a string or an array or strings.");
columnOrColumns = [columnOrColumns]; // Convert to array for coding convenience.
}
return new DataFrame(function () {
var content = _this.getContent();
var newColumnNames = [];
try {
for (var _a = __values(content.columnNames), _b = _a.next(); !_b.done; _b = _a.next()) {
var columnName = _b.value;
if (columnOrColumns.indexOf(columnName) === -1) {
newColumnNames.push(columnName); // This column is not being dropped.
}
}
}
catch (e_12_1) { e_12 = { error: e_12_1 }; }
finally {
try {
if (_b && !_b.done && (_c = _a.return)) _c.call(_a);
}
finally { if (e_12) throw e_12.error; }
}
return {
columnNames: newColumnNames,
index: content.index,
values: new select_iterable_1.SelectIterable(content.values, function (value) {
var clone = Object.assign({}, value);
try {
for (var columnOrColumns_1 = __values(columnOrColumns), columnOrColumns_1_1 = columnOrColumns_1.next(); !columnOrColumns_1_1.done; columnOrColumns_1_1 = columnOrColumns_1.next()) {
var droppedColumnName = columnOrColumns_1_1.value;
delete clone[droppedColumnName];
}
}
catch (e_13_1) { e_13 = { error: e_13_1 }; }
finally {
try {
if (columnOrColumns_1_1 && !columnOrColumns_1_1.done && (_a = columnOrColumns_1.return)) _a.call(columnOrColumns_1);
}
finally { if (e_13) throw e_13.error; }
}
return clone;
var e_13, _a;
}),
pairs: new select_iterable_1.SelectIterable(content.pairs, function (pair) {
var clone = Object.assign({}, pair[1]);
try {
for (var columnOrColumns_2 = __values(columnOrColumns), columnOrColumns_2_1 = columnOrColumns_2.next(); !columnOrColumns_2_1.done; columnOrColumns_2_1 = columnOrColumns_2.next()) {
var droppedColumnName = columnOrColumns_2_1.value;
delete clone[droppedColumnName];
}
}
catch (e_14_1) { e_14 = { error: e_14_1 }; }
finally {
try {
if (columnOrColumns_2_1 && !columnOrColumns_2_1.done && (_a = columnOrColumns_2.return)) _a.call(columnOrColumns_2);
}
finally { if (e_14) throw e_14.error; }
}
return [pair[0], clone];
var e_14, _a;
}),
};
var e_12, _c;
});
};
/**
* Create a new dataframe with columns reordered.
* New column names create new columns (with undefined values), omitting existing column names causes those columns to be dropped.
*
* @param columnNames Specifies the new order for columns.
*
* @return Returns a new dataframe with columns reordered according to the order of the array of column names that is passed in.
*
* @example
* <pre>
* const reorderedDf = df.reorderSeries(["FirstColumn", "SecondColumn", "etc"]);
* </pre>
*/
DataFrame.prototype.reorderSeries = function (columnNames) {
var _this = this;
if (!utils_1.isArray(columnNames))
throw new Error("Expected parameter 'columnNames' to 'DataFrame.reorderSeries' to be an array with column names.");
try {
for (var columnNames_8 = __values(columnNames), columnNames_8_1 = columnNames_8.next(); !columnNames_8_1.done; columnNames_8_1 = columnNames_8.next()) {
var columnName = columnNames_8_1.value;
if (!utils_1.isString(columnName))
throw new Error("Expected parameter 'columnNames' to 'DataFrame.reorderSeries' to be an array with column names.");
}
}
catch (e_15_1) { e_15 = { error: e_15_1 }; }
finally {
try {
if (columnNames_8_1 && !columnNames_8_1.done && (_a = columnNames_8.return)) _a.call(columnNames_8);
}
finally { if (e_15) throw e_15.error; }
}
return new DataFrame(function () {
var content = _this.getContent();
return {
columnNames: columnNames,
index: content.index,
values: new select_iterable_1.SelectIterable(content.values, function (value) {
var output = {};
try {
for (var columnNames_9 = __values(columnNames), columnNames_9_1 = columnNames_9.next(); !columnNames_9_1.done; columnNames_9_1 = columnNames_9.next()) {
var columnName = columnNames_9_1.value;
output[columnName] = value[columnName];
}
}
catch (e_16_1) { e_16 = { error: e_16_1 }; }
finally {
try {
if (columnNames_9_1 && !columnNames_9_1.done && (_a = columnNames_9.return)) _a.call(columnNames_9);
}
finally { if (e_16) throw e_16.error; }
}
return output;
var e_16, _a;
}),
pairs: new select_iterable_1.SelectIterable(content.pairs, function (pair) {
var value = pair[1];
var output = {};
try {
for (var columnNames_10 = __values(columnNames), columnNames_10_1 = columnNames_10.next(); !columnNames_10_1.done; columnNames_10_1 = columnNames_10.next()) {
var columnName = columnNames_10_1.value;
output[columnName] = value[columnName];
}
}
catch (e_17_1) { e_17 = { error: e_17_1 }; }
finally {
try {
if (columnNames_10_1 && !columnNames_10_1.done && (_a = columnNames_10.return)) _a.call(columnNames_10);
}
finally { if (e_17) throw e_17.error; }
}
return [pair[0], output];
var e_17, _a;
}),
};
});
var e_15, _a;
};
/**
* Bring the column(s) with specified name(s) to the front of the column order, making it (or them) the first column(s) in the output dataframe.
*
* @param columnOrColumns Specifies the column or columns to bring to the front.
*
* @return Returns a new dataframe with 1 or more columns bought to the front of the column ordering.
*
* @example
* <pre>
* const modifiedDf = df.bringToFront("NewFirstColumn");
* </pre>
*
* @example
* <pre>
* const modifiedDf = df.bringToFront(["NewFirstColumn", "NewSecondColumn"]);
* </pre>
*/
DataFrame.prototype.bringToFront = function (columnOrColumns) {
var _this = this;
if (utils_1.isArray(columnOrColumns)) {
try {
for (var columnOrColumns_3 = __values(columnOrColumns), columnOrColumns_3_1 = columnOrColumns_3.next(); !columnOrColumns_3_1.done; columnOrColumns_3_1 = columnOrColumns_3.next()) {
var columnName = columnOrColumns_3_1.value;
if (!utils_1.isString(columnName)) {
throw new Error("Expect 'columnOrColumns' parameter to 'DataFrame.bringToFront' function to specify a column or columns via a string or an array of strings.");
}
}
}
catch (e_18_1) { e_18 = { error: e_18_1 }; }
finally {
try {
if (columnOrC