UNPKG

data-forge

Version:

JavaScript data transformation and analysis toolkit inspired by Pandas and LINQ.

1,216 lines 213 kB
"use strict"; var __extends = (this && this.__extends) || (function () { var extendStatics = Object.setPrototypeOf || ({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) || function (d, b) { for (var p in b) if (b.hasOwnProperty(p)) d[p] = b[p]; }; return function (d, b) { extendStatics(d, b); function __() { this.constructor = d; } d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __()); }; })(); var __values = (this && this.__values) || function (o) { var m = typeof Symbol === "function" && o[Symbol.iterator], i = 0; if (m) return m.call(o); return { next: function () { if (o && i >= o.length) o = void 0; return { value: o && o[i++], done: !o }; } }; }; var __read = (this && this.__read) || function (o, n) { var m = typeof Symbol === "function" && o[Symbol.iterator]; if (!m) return o; var i = m.call(o), r, ar = [], e; try { while ((n === void 0 || n-- > 0) && !(r = i.next()).done) ar.push(r.value); } catch (error) { e = { error: error }; } finally { try { if (r && !r.done && (m = i["return"])) m.call(i); } finally { if (e) throw e.error; } } return ar; }; var __spread = (this && this.__spread) || function () { for (var ar = [], i = 0; i < arguments.length; i++) ar = ar.concat(__read(arguments[i])); return ar; }; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); var empty_iterable_1 = require("./iterables/empty-iterable"); var count_iterable_1 = require("./iterables/count-iterable"); var multi_iterable_1 = require("./iterables/multi-iterable"); var select_iterable_1 = require("./iterables/select-iterable"); var select_many_iterable_1 = require("./iterables/select-many-iterable"); var take_iterable_1 = require("./iterables/take-iterable"); var take_while_iterable_1 = require("./iterables/take-while-iterable"); var where_iterable_1 = require("./iterables/where-iterable"); var concat_iterable_1 = require("./iterables/concat-iterable"); var dataframe_window_iterable_1 = require("./iterables/dataframe-window-iterable"); var reverse_iterable_1 = require("./iterables/reverse-iterable"); var zip_iterable_1 = require("./iterables/zip-iterable"); var csv_rows_iterable_1 = require("./iterables/csv-rows-iterable"); var distinct_iterable_1 = require("./iterables/distinct-iterable"); var dataframe_rolling_window_iterable_1 = require("./iterables/dataframe-rolling-window-iterable"); var dataframe_variable_window_iterable_1 = require("./iterables/dataframe-variable-window-iterable"); var ordered_iterable_1 = require("./iterables/ordered-iterable"); var index_1 = require("./index"); var extract_element_iterable_1 = require("./iterables/extract-element-iterable"); var skip_iterable_1 = require("./iterables/skip-iterable"); var skip_while_iterable_1 = require("./iterables/skip-while-iterable"); var repeat_iterable_1 = require("./iterables/repeat-iterable"); var tile_iterable_1 = require("./iterables/tile-iterable"); var ravel_iterable_1 = require("./iterables/ravel-iterable"); // @ts-ignore var easy_table_1 = __importDefault(require("easy-table")); // @ts-ignore var dayjs_1 = __importDefault(require("dayjs")); var series_1 = require("./series"); var column_names_iterable_1 = require("./iterables/column-names-iterable"); var utils_1 = require("./utils"); var json5_1 = __importDefault(require("json5")); // @ts-ignore var papaparse_1 = __importDefault(require("papaparse")); /** * Class that represents a dataframe. * A dataframe contains an indexed sequence of data records. * Think of it as a spreadsheet or CSV file in memory. * * Each data record contains multiple named fields, the value of each field represents one row in a column of data. * Each column of data is a named {@link Series}. * You think of a dataframe a collection of named data series. * * @typeparam IndexT The type to use for the index. * @typeparam ValueT The type to use for each row/data record. */ var DataFrame = /** @class */ (function () { /** * Create a dataframe. * * @param config This can be an array, a configuration object or a function that lazily produces a configuration object. * * It can be an array that specifies the data records that the dataframe contains. * * It can be a {@link IDataFrameConfig} that defines the data and configuration of the dataframe. * * Or it can be a function that lazily produces a {@link IDataFrameConfig}. * * @example * <pre> * * const df = new DataFrame(); * </pre> * * @example * <pre> * * const df = new DataFrame([ { A: 10 }, { A: 20 }, { A: 30 }, { A: 40 }]); * </pre> * * @example * <pre> * * const df = new DataFrame({ index: [1, 2, 3, 4], values: [ { A: 10 }, { A: 20 }, { A: 30 }, { A: 40 }] }); * </pre> * * @example * <pre> * * const lazyInit = () => ({ index: [1, 2, 3, 4], values: [ { A: 10 }, { A: 20 }, { A: 30 }, { A: 40 }] }); * const df = new DataFrame(lazyInit); * </pre> */ function DataFrame(config) { // // Function to lazy evaluate the configuration of the dataframe. // this.configFn = null; // // The content of the dataframe. // When this is null it means the dataframe is yet to be lazy initialised. // this.content = null; // // Indexed content of the dataframe. // this.indexedContent = null; if (config) { if (utils_1.isFunction(config)) { this.configFn = config; } else if (utils_1.isArray(config) || utils_1.isFunction(config[Symbol.iterator])) { this.content = DataFrame.initFromArray(config); } else { this.content = DataFrame.initFromConfig(config); } } else { this.content = DataFrame.initEmpty(); } } // // Initialise dataframe content from an iterable of values. // DataFrame.initFromArray = function (arr) { var firstResult = arr[Symbol.iterator]().next(); var columnNames = !firstResult.done ? Object.keys(firstResult.value) : []; return { index: DataFrame.defaultCountIterable, values: arr, pairs: new multi_iterable_1.MultiIterable([DataFrame.defaultCountIterable, arr]), isBaked: true, columnNames: columnNames, }; }; // // Initialise an empty dataframe. // DataFrame.initEmpty = function () { return { index: DataFrame.defaultEmptyIterable, values: DataFrame.defaultEmptyIterable, pairs: DataFrame.defaultEmptyIterable, isBaked: true, columnNames: [], }; }; // // Initialise dataframe column names. // DataFrame.initColumnNames = function (inputColumnNames, isCaseSensitive) { var outputColumnNames = []; var columnNamesMap = {}; // Convert column names to strings. var columnNames = Array.from(inputColumnNames).map(function (columnName) { return columnName.toString(); }); try { // Search for duplicate column names. for (var columnNames_1 = __values(columnNames), columnNames_1_1 = columnNames_1.next(); !columnNames_1_1.done; columnNames_1_1 = columnNames_1.next()) { var columnName = columnNames_1_1.value; var columnNameLwr = isCaseSensitive !== undefined && isCaseSensitive ? columnName : columnName.toLowerCase(); if (columnNamesMap[columnNameLwr] === undefined) { columnNamesMap[columnNameLwr] = 1; } else { columnNamesMap[columnNameLwr] += 1; } } } catch (e_1_1) { e_1 = { error: e_1_1 }; } finally { try { if (columnNames_1_1 && !columnNames_1_1.done && (_a = columnNames_1.return)) _a.call(columnNames_1); } finally { if (e_1) throw e_1.error; } } var columnNoMap = {}; try { for (var columnNames_2 = __values(columnNames), columnNames_2_1 = columnNames_2.next(); !columnNames_2_1.done; columnNames_2_1 = columnNames_2.next()) { var columnName = columnNames_2_1.value; var columnNameLwr = isCaseSensitive !== undefined && isCaseSensitive ? columnName : columnName.toLowerCase(); if (columnNamesMap[columnNameLwr] > 1) { var curColumnNo = 1; // There are duplicates of this column. if (columnNoMap[columnNameLwr] !== undefined) { curColumnNo = columnNoMap[columnNameLwr]; } outputColumnNames.push(columnName + "." + curColumnNo); columnNoMap[columnNameLwr] = curColumnNo + 1; } else { // No duplicates. outputColumnNames.push(columnName); } } } catch (e_2_1) { e_2 = { error: e_2_1 }; } finally { try { if (columnNames_2_1 && !columnNames_2_1.done && (_b = columnNames_2.return)) _b.call(columnNames_2); } finally { if (e_2) throw e_2.error; } } return outputColumnNames; var e_1, _a, e_2, _b; }; // // Check that a value is an interable. // DataFrame.checkIterable = function (input, fieldName) { if (utils_1.isArray(input)) { // Ok } else if (utils_1.isFunction(input[Symbol.iterator])) { // Assume it's an iterable. // Ok } else { // Not ok throw new Error("Expected '" + fieldName + "' field of DataFrame config object to be an array of values or an iterable of values."); } }; // // Initialise dataframe content from a config object. // DataFrame.initFromConfig = function (config) { var index; var values; var pairs; var isBaked = false; var columnNames; if (config.pairs) { DataFrame.checkIterable(config.pairs, "pairs"); pairs = config.pairs; } if (config.columns) { var columnsConfig = config.columns; if (utils_1.isArray(columnsConfig) || utils_1.isFunction(columnsConfig[Symbol.iterator])) { var iterableColumnsConfig = columnsConfig; columnNames = Array.from(iterableColumnsConfig).map(function (column) { return column.name; }); columnsConfig = utils_1.toMap(iterableColumnsConfig, function (column) { return column.name; }, function (column) { return column.series; }); } else { if (!utils_1.isObject(columnsConfig)) throw new Error("Expected 'columns' member of 'config' parameter to DataFrame constructor to be an object with fields that define columns."); columnNames = Object.keys(columnsConfig); } var columnIterables = []; try { for (var columnNames_3 = __values(columnNames), columnNames_3_1 = columnNames_3.next(); !columnNames_3_1.done; columnNames_3_1 = columnNames_3.next()) { var columnName = columnNames_3_1.value; DataFrame.checkIterable(columnsConfig[columnName], columnName); columnIterables.push(columnsConfig[columnName]); } } catch (e_3_1) { e_3 = { error: e_3_1 }; } finally { try { if (columnNames_3_1 && !columnNames_3_1.done && (_a = columnNames_3.return)) _a.call(columnNames_3); } finally { if (e_3) throw e_3.error; } } values = new csv_rows_iterable_1.CsvRowsIterable(columnNames, new multi_iterable_1.MultiIterable(columnIterables)); } else { if (config.columnNames) { columnNames = this.initColumnNames(config.columnNames, config.caseSensitive); } if (config.rows) { if (!config.columnNames) { columnNames = new select_iterable_1.SelectIterable(new count_iterable_1.CountIterable(), function (c) { return "Column." + c.toString(); }); } DataFrame.checkIterable(config.rows, 'rows'); values = new csv_rows_iterable_1.CsvRowsIterable(columnNames, config.rows); // Convert data from rows to columns. } else if (config.values) { DataFrame.checkIterable(config.values, 'values'); values = config.values; if (!config.columnNames) { columnNames = new column_names_iterable_1.ColumnNamesIterable(values, config.considerAllRows || false); } } else if (pairs) { values = new extract_element_iterable_1.ExtractElementIterable(pairs, 1); if (!config.columnNames) { columnNames = new column_names_iterable_1.ColumnNamesIterable(values, config.considerAllRows || false); } } else { values = DataFrame.defaultEmptyIterable; if (!config.columnNames) { columnNames = DataFrame.defaultEmptyIterable; } } } if (config.index) { DataFrame.checkIterable(config.index, 'index'); index = config.index; } else if (pairs) { index = new extract_element_iterable_1.ExtractElementIterable(pairs, 0); } else { index = DataFrame.defaultCountIterable; } if (!pairs) { pairs = new multi_iterable_1.MultiIterable([index, values]); } if (config.baked !== undefined) { isBaked = config.baked; } return { index: index, values: values, pairs: pairs, isBaked: isBaked, columnNames: columnNames, }; var e_3, _a; }; // // Ensure the dataframe content has been initialised. // DataFrame.prototype.lazyInit = function () { if (this.content === null && this.configFn !== null) { this.content = DataFrame.initFromConfig(this.configFn()); } }; // // Ensure the dataframe content is lazy initialised and return it. // DataFrame.prototype.getContent = function () { this.lazyInit(); return this.content; }; // // Lazy builds content index, does basic hash lookup. // DataFrame.prototype.getRowByIndex = function (index) { if (!this.indexedContent) { this.indexedContent = new Map(); try { for (var _a = __values(this.getContent().pairs), _b = _a.next(); !_b.done; _b = _a.next()) { var pair = _b.value; this.indexedContent.set(pair[0], pair[1]); } } catch (e_4_1) { e_4 = { error: e_4_1 }; } finally { try { if (_b && !_b.done && (_c = _a.return)) _c.call(_a); } finally { if (e_4) throw e_4.error; } } } return this.indexedContent.get(index); var e_4, _c; }; /** * Get an iterator to enumerate the rows of the dataframe. * Enumerating the iterator forces lazy evaluation to complete. * This function is automatically called by `for...of`. * * @return An iterator for the dataframe. * * @example * <pre> * * for (const row of df) { * // ... do something with the row ... * } * </pre> */ DataFrame.prototype[Symbol.iterator] = function () { return this.getContent().values[Symbol.iterator](); }; /** * Get the names of the columns in the dataframe. * * @return Returns an array of the column names in the dataframe. * * @example * <pre> * * console.log(df.getColumnNames()); * </pre> */ DataFrame.prototype.getColumnNames = function () { return Array.from(this.getContent().columnNames); }; /** * Retreive the collection of all columns in the dataframe. * * @return Returns a {@link Series} containing the names of the columns in the dataframe. * * @example * <pre> * * for (const column in df.getColumns()) { * console.log("Column name: "); * console.log(column.name); * * console.log("Data:"); * console.log(column.series.toArray()); * } * </pre> */ DataFrame.prototype.getColumns = function () { var _this = this; return new series_1.Series(function () { var columnNames = _this.getColumnNames(); return { values: columnNames.map(function (columnName) { var series = _this.getSeries(columnName).skipWhile(function (value) { return value === undefined || value === null; }); var firstValue = series.any() ? series.first() : undefined; return { name: columnName, type: utils_1.determineType(firstValue), series: series, }; }), }; }); }; /** * Cast the value of the dataframe to a new type. * This operation has no effect but to retype the value that the dataframe contains. * * @return The same dataframe, but with the type changed. * * @example * <pre> * * const castDf = df.cast<SomeOtherType>(); * </pre> */ DataFrame.prototype.cast = function () { return this; }; /** * Get the index for the dataframe. * * @return The {@link Index} for the dataframe. * * @example * <pre> * * const index = df.getIndex(); * </pre> */ DataFrame.prototype.getIndex = function () { var _this = this; return new index_1.Index(function () { return ({ values: _this.getContent().index }); }); }; /** * Set a named column as the {@link Index} of the dataframe. * * @param columnName Name of the column to use as the new {@link Index} of the returned dataframe. * * @return Returns a new dataframe with the values of the specified column as the new {@link Index}. * * @example * <pre> * * const indexedDf = df.setIndex("SomeColumn"); * </pre> */ DataFrame.prototype.setIndex = function (columnName) { if (!utils_1.isString(columnName)) throw new Error("Expected 'columnName' parameter to 'DataFrame.setIndex' to be a string that specifies the name of the column to set as the index for the dataframe."); return this.withIndex(this.getSeries(columnName)); }; /** * Apply a new {@link Index} to the dataframe. * * @param newIndex The new array or iterable to be the new {@link Index} of the dataframe. Can also be a selector to choose the {@link Index} for each row in the dataframe. * * @return Returns a new dataframe or dataframe with the specified {@link Index} attached. * * @example * <pre> * * const indexedDf = df.withIndex([10, 20, 30]); * </pre> * * @example * <pre> * * const indexedDf = df.withIndex(df.getSeries("SomeColumn")); * </pre> * * @example * <pre> * * const indexedDf = df.withIndex(row => row.SomeColumn); * </pre> * * @example * <pre> * * const indexedDf = df.withIndex(row => row.SomeColumn + 20); * </pre> */ DataFrame.prototype.withIndex = function (newIndex) { var _this = this; if (utils_1.isFunction(newIndex)) { return new DataFrame(function () { var content = _this.getContent(); return { columnNames: content.columnNames, values: content.values, index: _this.deflate(newIndex), }; }); } else { DataFrame.checkIterable(newIndex, 'newIndex'); return new DataFrame(function () { var content = _this.getContent(); return { columnNames: content.columnNames, values: content.values, index: newIndex, }; }); } }; /** * Resets the {@link Index} of the dataframe back to the default zero-based sequential integer index. * * @return Returns a new dataframe with the {@link Index} reset to the default zero-based index. * * @example * <pre> * * const dfWithResetIndex = df.resetIndex(); * </pre> */ DataFrame.prototype.resetIndex = function () { var _this = this; return new DataFrame(function () { var content = _this.getContent(); return { columnNames: content.columnNames, values: content.values, }; }); }; /** * Extract a {@link Series} from a named column in the dataframe. * * @param columnName Specifies the name of the column that contains the {@link Series} to retreive. * * @return Returns the {@link Series} extracted from the named column in the dataframe. * * @example * <pre> * * const series = df.getSeries("SomeColumn"); * </pre> */ DataFrame.prototype.getSeries = function (columnName) { var _this = this; if (!utils_1.isString(columnName)) throw new Error("Expected 'columnName' parameter to 'DataFrame.getSeries' function to be a string that specifies the name of the column to retreive."); return new series_1.Series(function () { return ({ values: new select_iterable_1.SelectIterable(_this.getContent().values, function (row) { return row[columnName]; }), index: _this.getContent().index, }); }); }; /** * Determine if the dataframe contains a {@link Series} the specified named column. * * @param columnName Name of the column to check for. * * @return Returns true if the dataframe contains the requested {@link Series}, otherwise returns false. * * @example * <pre> * * if (df.hasSeries("SomeColumn")) { * // ... the dataframe contains a series with the specified column name ... * } * </pre> */ DataFrame.prototype.hasSeries = function (columnName) { var columnNameLwr = columnName.toLowerCase(); try { for (var _a = __values(this.getColumnNames()), _b = _a.next(); !_b.done; _b = _a.next()) { var existingColumnName = _b.value; if (existingColumnName.toLowerCase() === columnNameLwr) { return true; } } } catch (e_5_1) { e_5 = { error: e_5_1 }; } finally { try { if (_b && !_b.done && (_c = _a.return)) _c.call(_a); } finally { if (e_5) throw e_5.error; } } return false; var e_5, _c; }; /** * Verify the existence of a name column and extracts the {@link Series} for it. * Throws an exception if the requested column doesn't exist. * * @param columnName Name of the column to extract. * * @return Returns the {@link Series} for the column if it exists, otherwise it throws an exception. * * @example * <pre> * * try { * const series = df.expectSeries("SomeColumn"); * // ... do something with the series ... * } * catch (err) { * // ... the dataframe doesn't contain the column "SomeColumn" ... * } * </pre> */ DataFrame.prototype.expectSeries = function (columnName) { if (!this.hasSeries(columnName)) { throw new Error("Expected dataframe to contain series with column name: '" + columnName + "'."); } return this.getSeries(columnName); }; /** * Create a new dataframe with a replaced or additional column specified by the passed-in series. * * @param columnNameOrSpec The name of the column to add or replace or a {@link IColumnGenSpec} that defines the columns to add. * @param series When columnNameOrSpec is a string that identifies the column to add, this specifies the {@link Series} to add to the dataframe or a function that produces a series (given a dataframe). * * @return Returns a new dataframe replacing or adding a particular named column. * * @example * <pre> * * const modifiedDf = df.withSeries("ANewColumn", new Series([1, 2, 3])); * </pre> * * @example * <pre> * * const modifiedDf = df.withSeries("ANewColumn", df => * df.getSeries("SourceData").select(aTransformation) * ); * </pre> * * @example * <pre> * * const modifiedDf = df.withSeries({ * ANewColumn: new Series([1, 2, 3]), * SomeOtherColumn: new Series([10, 20, 30]) * }); * <pre> * * @example * <pre> * * const modifiedDf = df.withSeries({ * ANewColumn: df => df.getSeries("SourceData").select(aTransformation)) * }); * <pre> */ DataFrame.prototype.withSeries = function (columnNameOrSpec, series) { var _this = this; if (!utils_1.isObject(columnNameOrSpec)) { if (!utils_1.isString(columnNameOrSpec)) throw new Error("Expected 'columnNameOrSpec' parameter to 'DataFrame.withSeries' function to be a string that specifies the column to set or replace."); if (!utils_1.isFunction(series)) { if (!utils_1.isObject(series)) throw new Error("Expected 'series' parameter to 'DataFrame.withSeries' to be a Series object or a function that takes a dataframe and produces a Series."); } } else { if (!utils_1.isUndefined(series)) throw new Error("Expected 'series' parameter to 'DataFrame.withSeries' to not be set when 'columnNameOrSpec is an object."); } if (utils_1.isObject(columnNameOrSpec)) { var columnSpec = columnNameOrSpec; var columnNames = Object.keys(columnSpec); var workingDataFrame = this; try { for (var columnNames_4 = __values(columnNames), columnNames_4_1 = columnNames_4.next(); !columnNames_4_1.done; columnNames_4_1 = columnNames_4.next()) { var columnName_1 = columnNames_4_1.value; workingDataFrame = workingDataFrame.withSeries(columnName_1, columnSpec[columnName_1]); } } catch (e_6_1) { e_6 = { error: e_6_1 }; } finally { try { if (columnNames_4_1 && !columnNames_4_1.done && (_a = columnNames_4.return)) _a.call(columnNames_4); } finally { if (e_6) throw e_6.error; } } return workingDataFrame.cast(); } var columnName = columnNameOrSpec; if (this.none()) { // We have an empty data frame. var importSeries = void 0; if (utils_1.isFunction(series)) { importSeries = series(this); } else { importSeries = series; } return importSeries.inflate(function (value) { var row = {}; row[columnName] = value; return row; }) .cast(); } return new DataFrame(function () { var importSeries; if (utils_1.isFunction(series)) { importSeries = series(_this); } else { importSeries = series; } var seriesValueMap = utils_1.toMap2(importSeries.toPairs(), function (pair) { return pair[0]; }, function (pair) { return pair[1]; }); var newColumnNames = utils_1.makeDistinct(_this.getColumnNames().concat([columnName])); return { columnNames: newColumnNames, index: _this.getContent().index, pairs: new select_iterable_1.SelectIterable(_this.getContent().pairs, function (pair) { var index = pair[0]; var value = pair[1]; var modified = Object.assign({}, value); modified[columnName] = seriesValueMap.get(index); return [ index, modified ]; }), }; }); var e_6, _a; }; /** * Merge multiple dataframes into a single dataframe. * Rows are merged by indexed. * Same named columns in subsequent dataframes override columns earlier dataframes. * * @param dataFrames An array or series of dataframes to merge. * * @returns The merged data frame. * * @example * <pre> * * const mergedDF = DataFrame.merge([df1, df2, etc]); * </pre> */ DataFrame.merge = function (dataFrames) { var rowMap = new Map(); try { for (var dataFrames_1 = __values(dataFrames), dataFrames_1_1 = dataFrames_1.next(); !dataFrames_1_1.done; dataFrames_1_1 = dataFrames_1.next()) { var dataFrame = dataFrames_1_1.value; try { for (var _a = __values(dataFrame.toPairs()), _b = _a.next(); !_b.done; _b = _a.next()) { var pair = _b.value; var index = pair[0]; if (!rowMap.has(index)) { var clone = Object.assign({}, pair[1]); rowMap.set(index, clone); } else { rowMap.set(index, Object.assign(rowMap.get(index), pair[1])); } } } catch (e_7_1) { e_7 = { error: e_7_1 }; } finally { try { if (_b && !_b.done && (_c = _a.return)) _c.call(_a); } finally { if (e_7) throw e_7.error; } } } } catch (e_8_1) { e_8 = { error: e_8_1 }; } finally { try { if (dataFrames_1_1 && !dataFrames_1_1.done && (_d = dataFrames_1.return)) _d.call(dataFrames_1); } finally { if (e_8) throw e_8.error; } } var allColumnNames = Array.from(dataFrames) .map(function (dataFrame) { return dataFrame.getColumnNames(); }) .reduce(function (prev, next) { return prev.concat(next); }, []); var newColumnNames = utils_1.makeDistinct(allColumnNames); var mergedPairs = Array.from(rowMap.keys()).map(function (index) { return [index, rowMap.get(index)]; }); mergedPairs.sort(function (a, b) { if (a[0] === b[0]) { return 0; } else if (a[0] > b[0]) { return 1; } else { return -1; } }); return new DataFrame({ columnNames: newColumnNames, pairs: mergedPairs, }); var e_8, _d, e_7, _c; }; /** * Merge one or more dataframes into this dataframe. * Rows are merged by indexed. * Same named columns in subsequent dataframes override columns in earlier dataframes. * * @param otherDataFrames... One or more dataframes to merge into this dataframe. * * @returns The merged data frame. * * @example * <pre> * * const mergedDF = df1.merge(df2); * </pre> * * <pre> * * const mergedDF = df1.merge(df2, df3, etc); * </pre> */ DataFrame.prototype.merge = function () { var otherDataFrames = []; for (var _i = 0; _i < arguments.length; _i++) { otherDataFrames[_i] = arguments[_i]; } return DataFrame.merge([this].concat(otherDataFrames)); }; /** * Add a series to the dataframe, but only if it doesn't already exist. * * @param columnNameOrSpec The name of the series to add or a {@link IColumnGenSpec} that specifies the columns to add. * @param series If columnNameOrSpec is a string that specifies the name of the series to add, this specifies the actual {@link Series} to add or a selector that generates the series given the dataframe. * * @return Returns a new dataframe with the specified series added, if the series didn't already exist. Otherwise if the requested series already exists the same dataframe is returned. * * @example * <pre> * * const updatedDf = df.ensureSeries("ANewColumn", new Series([1, 2, 3])); * </pre> * * @example * <pre> * * const updatedDf = df.ensureSeries("ANewColumn", df => * df.getSeries("AnExistingSeries").select(aTransformation) * ); * </pre> * * @example * <pre> * * const modifiedDf = df.ensureSeries({ * ANewColumn: new Series([1, 2, 3]), * SomeOtherColumn: new Series([10, 20, 30]) * }); * <pre> * * @example * <pre> * * const modifiedDf = df.ensureSeries({ * ANewColumn: df => df.getSeries("SourceData").select(aTransformation)) * }); * <pre> */ DataFrame.prototype.ensureSeries = function (columnNameOrSpec, series) { if (!utils_1.isObject(columnNameOrSpec)) { if (!utils_1.isString(columnNameOrSpec)) throw new Error("Expected 'columnNameOrSpec' parameter to 'DataFrame.ensureSeries' function to be a string that specifies the column to set or replace."); if (!utils_1.isFunction(series)) { if (!utils_1.isObject(series)) throw new Error("Expected 'series' parameter to 'DataFrame.ensureSeries' to be a Series object or a function that takes a dataframe and produces a Series."); } } else { if (!utils_1.isUndefined(series)) throw new Error("Expected 'series' parameter to 'DataFrame.ensureSeries' to not be set when 'columnNameOrSpec is an object."); } if (utils_1.isObject(columnNameOrSpec)) { var columnSpec = columnNameOrSpec; var columnNames = Object.keys(columnNameOrSpec); var workingDataFrame = this; try { for (var columnNames_5 = __values(columnNames), columnNames_5_1 = columnNames_5.next(); !columnNames_5_1.done; columnNames_5_1 = columnNames_5.next()) { var columnName_2 = columnNames_5_1.value; workingDataFrame = workingDataFrame.ensureSeries(columnName_2, columnSpec[columnName_2]); } } catch (e_9_1) { e_9 = { error: e_9_1 }; } finally { try { if (columnNames_5_1 && !columnNames_5_1.done && (_a = columnNames_5.return)) _a.call(columnNames_5); } finally { if (e_9) throw e_9.error; } } return workingDataFrame; } var columnName = columnNameOrSpec; if (this.hasSeries(columnName)) { return this; // Already have the series. } else { return this.withSeries(columnName, series); } var e_9, _a; }; /** * Create a new dataframe with just a subset of columns. * * @param columnNames Array of column names to include in the new dataframe. * * @return Returns a dataframe with a subset of columns from the original dataframe. * * @example * <pre> * const subsetDf = df.subset(["ColumnA", "ColumnB"]); * </pre> */ DataFrame.prototype.subset = function (columnNames) { var _this = this; if (!utils_1.isArray(columnNames)) throw new Error("Expected 'columnNames' parameter to 'DataFrame.subset' to be an array of column names to keep."); return new DataFrame(function () { var content = _this.getContent(); return { columnNames: columnNames, index: content.index, values: new select_iterable_1.SelectIterable(content.values, function (value) { var output = {}; try { for (var columnNames_6 = __values(columnNames), columnNames_6_1 = columnNames_6.next(); !columnNames_6_1.done; columnNames_6_1 = columnNames_6.next()) { var columnName = columnNames_6_1.value; output[columnName] = value[columnName]; } } catch (e_10_1) { e_10 = { error: e_10_1 }; } finally { try { if (columnNames_6_1 && !columnNames_6_1.done && (_a = columnNames_6.return)) _a.call(columnNames_6); } finally { if (e_10) throw e_10.error; } } return output; var e_10, _a; }), pairs: new select_iterable_1.SelectIterable(content.pairs, function (pair) { var output = {}; var value = pair[1]; try { for (var columnNames_7 = __values(columnNames), columnNames_7_1 = columnNames_7.next(); !columnNames_7_1.done; columnNames_7_1 = columnNames_7.next()) { var columnName = columnNames_7_1.value; output[columnName] = value[columnName]; } } catch (e_11_1) { e_11 = { error: e_11_1 }; } finally { try { if (columnNames_7_1 && !columnNames_7_1.done && (_a = columnNames_7.return)) _a.call(columnNames_7); } finally { if (e_11) throw e_11.error; } } return [pair[0], output]; var e_11, _a; }), }; }); }; /** * Create a new dataframe with the requested column or columns dropped. * * @param columnOrColumns Specifies the column name (a string) or columns (array of strings) to drop. * * @return Returns a new dataframe with a particular named column or columns removed. * * @example * <pre> * const modifiedDf = df.dropSeries("SomeColumn"); * </pre> * * @example * <pre> * const modifiedDf = df.dropSeries(["ColumnA", "ColumnB"]); * </pre> */ DataFrame.prototype.dropSeries = function (columnOrColumns) { var _this = this; if (!utils_1.isArray(columnOrColumns)) { if (!utils_1.isString(columnOrColumns)) throw new Error("'DataFrame.dropSeries' expected either a string or an array or strings."); columnOrColumns = [columnOrColumns]; // Convert to array for coding convenience. } return new DataFrame(function () { var content = _this.getContent(); var newColumnNames = []; try { for (var _a = __values(content.columnNames), _b = _a.next(); !_b.done; _b = _a.next()) { var columnName = _b.value; if (columnOrColumns.indexOf(columnName) === -1) { newColumnNames.push(columnName); // This column is not being dropped. } } } catch (e_12_1) { e_12 = { error: e_12_1 }; } finally { try { if (_b && !_b.done && (_c = _a.return)) _c.call(_a); } finally { if (e_12) throw e_12.error; } } return { columnNames: newColumnNames, index: content.index, values: new select_iterable_1.SelectIterable(content.values, function (value) { var clone = Object.assign({}, value); try { for (var columnOrColumns_1 = __values(columnOrColumns), columnOrColumns_1_1 = columnOrColumns_1.next(); !columnOrColumns_1_1.done; columnOrColumns_1_1 = columnOrColumns_1.next()) { var droppedColumnName = columnOrColumns_1_1.value; delete clone[droppedColumnName]; } } catch (e_13_1) { e_13 = { error: e_13_1 }; } finally { try { if (columnOrColumns_1_1 && !columnOrColumns_1_1.done && (_a = columnOrColumns_1.return)) _a.call(columnOrColumns_1); } finally { if (e_13) throw e_13.error; } } return clone; var e_13, _a; }), pairs: new select_iterable_1.SelectIterable(content.pairs, function (pair) { var clone = Object.assign({}, pair[1]); try { for (var columnOrColumns_2 = __values(columnOrColumns), columnOrColumns_2_1 = columnOrColumns_2.next(); !columnOrColumns_2_1.done; columnOrColumns_2_1 = columnOrColumns_2.next()) { var droppedColumnName = columnOrColumns_2_1.value; delete clone[droppedColumnName]; } } catch (e_14_1) { e_14 = { error: e_14_1 }; } finally { try { if (columnOrColumns_2_1 && !columnOrColumns_2_1.done && (_a = columnOrColumns_2.return)) _a.call(columnOrColumns_2); } finally { if (e_14) throw e_14.error; } } return [pair[0], clone]; var e_14, _a; }), }; var e_12, _c; }); }; /** * Create a new dataframe with columns reordered. * New column names create new columns (with undefined values), omitting existing column names causes those columns to be dropped. * * @param columnNames Specifies the new order for columns. * * @return Returns a new dataframe with columns reordered according to the order of the array of column names that is passed in. * * @example * <pre> * const reorderedDf = df.reorderSeries(["FirstColumn", "SecondColumn", "etc"]); * </pre> */ DataFrame.prototype.reorderSeries = function (columnNames) { var _this = this; if (!utils_1.isArray(columnNames)) throw new Error("Expected parameter 'columnNames' to 'DataFrame.reorderSeries' to be an array with column names."); try { for (var columnNames_8 = __values(columnNames), columnNames_8_1 = columnNames_8.next(); !columnNames_8_1.done; columnNames_8_1 = columnNames_8.next()) { var columnName = columnNames_8_1.value; if (!utils_1.isString(columnName)) throw new Error("Expected parameter 'columnNames' to 'DataFrame.reorderSeries' to be an array with column names."); } } catch (e_15_1) { e_15 = { error: e_15_1 }; } finally { try { if (columnNames_8_1 && !columnNames_8_1.done && (_a = columnNames_8.return)) _a.call(columnNames_8); } finally { if (e_15) throw e_15.error; } } return new DataFrame(function () { var content = _this.getContent(); return { columnNames: columnNames, index: content.index, values: new select_iterable_1.SelectIterable(content.values, function (value) { var output = {}; try { for (var columnNames_9 = __values(columnNames), columnNames_9_1 = columnNames_9.next(); !columnNames_9_1.done; columnNames_9_1 = columnNames_9.next()) { var columnName = columnNames_9_1.value; output[columnName] = value[columnName]; } } catch (e_16_1) { e_16 = { error: e_16_1 }; } finally { try { if (columnNames_9_1 && !columnNames_9_1.done && (_a = columnNames_9.return)) _a.call(columnNames_9); } finally { if (e_16) throw e_16.error; } } return output; var e_16, _a; }), pairs: new select_iterable_1.SelectIterable(content.pairs, function (pair) { var value = pair[1]; var output = {}; try { for (var columnNames_10 = __values(columnNames), columnNames_10_1 = columnNames_10.next(); !columnNames_10_1.done; columnNames_10_1 = columnNames_10.next()) { var columnName = columnNames_10_1.value; output[columnName] = value[columnName]; } } catch (e_17_1) { e_17 = { error: e_17_1 }; } finally { try { if (columnNames_10_1 && !columnNames_10_1.done && (_a = columnNames_10.return)) _a.call(columnNames_10); } finally { if (e_17) throw e_17.error; } } return [pair[0], output]; var e_17, _a; }), }; }); var e_15, _a; }; /** * Bring the column(s) with specified name(s) to the front of the column order, making it (or them) the first column(s) in the output dataframe. * * @param columnOrColumns Specifies the column or columns to bring to the front. * * @return Returns a new dataframe with 1 or more columns bought to the front of the column ordering. * * @example * <pre> * const modifiedDf = df.bringToFront("NewFirstColumn"); * </pre> * * @example * <pre> * const modifiedDf = df.bringToFront(["NewFirstColumn", "NewSecondColumn"]); * </pre> */ DataFrame.prototype.bringToFront = function (columnOrColumns) { var _this = this; if (utils_1.isArray(columnOrColumns)) { try { for (var columnOrColumns_3 = __values(columnOrColumns), columnOrColumns_3_1 = columnOrColumns_3.next(); !columnOrColumns_3_1.done; columnOrColumns_3_1 = columnOrColumns_3.next()) { var columnName = columnOrColumns_3_1.value; if (!utils_1.isString(columnName)) { throw new Error("Expect 'columnOrColumns' parameter to 'DataFrame.bringToFront' function to specify a column or columns via a string or an array of strings."); } } } catch (e_18_1) { e_18 = { error: e_18_1 }; } finally { try { if (columnOrC