data-forge

import { IIndex } from './index'; import { ISeries, SelectorWithIndexFn, PredicateFn, ComparerFn, SelectorFn, AggregateFn, Zip2Fn, Zip3Fn, ZipNFn, CallbackFn, JoinFn, GapFillFn } from './series'; import { ISerializedDataFrame } from "@data-forge/serialization"; /** * An object whose fields specify the data for named columns. */ export interface IColumnSpec { [index: string]: Iterable<any> | ISeries<any, any>; } /** * Specifies the format per column when converting columns to strings. */ export interface IFormatSpec { [index: string]: string; } /** * An function that aggregates a series. */ export declare type SeriesAggregatorFn<IndexT, ValueT, OutputT> = (values: ISeries<IndexT, ValueT>) => OutputT; /** * Specification that can produce multiple output columns from a single input column of a dataframe. */ export interface IColumnAggregatorSpec { [outputColumnName: string]: SeriesAggregatorFn<any, any, any>; } /** * Specification that can aggregate multiple input columns in a dataframe to produce multiple output columns. */ export interface IMultiColumnAggregatorSpec { [inputColumnName: string]: SeriesAggregatorFn<any, any, any> | IColumnAggregatorSpec; } /** * Defines the configuration for a new column. */ export interface IColumnConfig { /** * The name of the new column. */ name: string; /** * The series of values for the column. */ series: Iterable<any> | ISeries<any, any>; } /** * Options for CSV output. * * The options object is passed directly to [PapaParse.unparse](https://www.papaparse.com/docs#unparse), please see [PapaParse docs for additional options](https://www.papaparse.com/docs#unparse-config-default). */ export interface ICSVOutputOptions { /** * Enable or disable output of the CSV header line. * Defaults to true. */ header?: boolean; } /** * Used to configure a dataframe. */ export interface IDataFrameConfig<IndexT, ValueT> { /** * Values to put in the dataframe. * This should be array or iterable of JavaScript objects. * Each element in the array contains fields that match the columns of the dataframe. */ values?: Iterable<ValueT>; /** * CSV style rows to put in the dataframe. * An array of arrays. Each element in the top level array is a row of data. * Each row of data contains field values in column order. */ rows?: Iterable<any[]>; /*** * The index for the dataframe. * If omitted the index will default to a 0-based index. */ index?: Iterable<IndexT>; /** * Array or iterable of index,value pairs to put in the dataframe. * If index and values are not separately specified they can be extracted * from the pairs. */ pairs?: Iterable<[IndexT, ValueT]>; /** * Array or iterable of column names that are in the dataframe. * The order matters. This arrays specifies the ordering of columns which * is important when rendering tables or writing out CSV data files. * If this is omitted column names will automatically be determined * from the fields of the first row/value in the dataframe. */ columnNames?: Iterable<string>; /*** * Set to true when the dataframe has been baked into memory * and does not need to be lazily evaluated. */ baked?: boolean; /** * Set to true to consider all rows/values in the dataframe when * determining the column names. Otherwise only the first row is considered. * You should use this if you have irregular fields in the objects that * make up the rows/values of the dataframe. */ considerAllRows?: boolean; /** * Explicitly specify data for named columns to put in the dataframe. */ columns?: Iterable<IColumnConfig> | IColumnSpec; /** * Explicitly set this value if you want columnNames to be caseSensitive. * Default behaviour is to treat column names as case insensitive */ caseSensitive?: boolean; } /** * Represents a named column in a dataframe. */ export interface IColumn { /** * The name of the column. */ name: string; /** * The data type of the column. */ type: string; /** * The data series from the column. */ series: ISeries<any, any>; } /** * An object whose fields specify data for named named columns or user-defined generator functions that generate the data for the columns. */ export interface IColumnGenSpec { [index: string]: ISeries<any, any> | SeriesSelectorFn<any, any, any>; } /** * A string-to-string mapping that specifies how to rename columns. */ export interface IColumnRenameSpec { [index: string]: string; } /** * Specifies columns to transform and the user-defined selector function that does the transformation. */ export interface IColumnTransformSpec { [columnName: string]: SelectorWithIndexFn<any, any>; } /** * Specifies columns that should be aggregated and a user-defined aggregator function to do the aggregation. */ export interface IColumnAggregateSpec { [index: string]: AggregateFn<any, any>; } /** * A selector function that can select a series from a dataframe. */ export declare type SeriesSelectorFn<IndexT, DataFrameValueT, SeriesValueT> = (dataFrame: IDataFrame<IndexT, DataFrameValueT>) => ISeries<IndexT, SeriesValueT>; export declare type DataFrameConfigFn<IndexT, ValueT> = () => IDataFrameConfig<IndexT, ValueT>; /** * Represents the frequency of a type in a series or dataframe. */ export interface ITypeFrequency { /** * Name of the column containing the value. */ Column: string; /** * The name of the type. */ Type: string; /** * The frequency of the type's appearance in the series or dataframe. */ Frequency: number; } /** * Represents the frequency of a value in a series or dataframe. */ export interface IValueFrequency { /** * Name of the column containing the value. */ Column: string; /** * The value. */ Value: any; /** * The frequency of the value's appearance in the series or dataframe. */ Frequency: number; } /** * Interface that represents a dataframe. * A dataframe contains an indexed sequence of data records. * Think of it as a spreadsheet or CSV file in memory. * * Each data record contains multiple named fields, the value of each field represents one row in a column of data. * Each column of data is a named {@link Series}. * You think of a dataframe a collection of named data series. * * @typeparam IndexT The type to use for the index. * @typeparam ValueT The type to use for each row/data record. */ export interface IDataFrame<IndexT = number, ValueT = any> extends Iterable<ValueT> { /** * Get an iterator to enumerate the rows of the dataframe. * Enumerating the iterator forces lazy evaluation to complete. * This function is automatically called by `for...of`. * * @return An iterator for the rows in the dataframe. * * @example * <pre> * * for (const row of df) { * // ... do something with the row ... * } * </pre> */ [Symbol.iterator](): Iterator<ValueT>; /** * Get the names of the columns in the dataframe. * * @return Returns an array of the column names in the dataframe. * * @example * <pre> * * console.log(df.getColumnNames()); * </pre> */ getColumnNames(): string[]; /** * Retreive the collection of all columns in the dataframe. * * @return Returns a {@link Series} containing the names of the columns in the dataframe. * * @example * <pre> * * for (const column in df.getColumns()) { * console.log("Column name: "); * console.log(column.name); * * console.log("Data:"); * console.log(column.series.toArray()); * } * </pre> */ getColumns(): ISeries<number, IColumn>; /** * Cast the value of the dataframe to a new type. * This operation has no effect but to retype the r9ws that the dataframe contains. * * @return The same dataframe, but with the type changed. * * @example * <pre> * * const castDf = df.cast<SomeOtherType>(); * </pre> */ cast<NewValueT>(): IDataFrame<IndexT, NewValueT>; /** * Get the index for the dataframe. * * @return The {@link Index} for the dataframe. * * @example * <pre> * * const index = df.getIndex(); * </pre> */ getIndex(): IIndex<IndexT>; /** * Set a named column as the {@link Index} of the dataframe. * * @param columnName Name of the column to use as the new {@link Index} of the returned dataframe. * * @return Returns a new dataframe with the values of the specified column as the new {@link Index}. * * @example * <pre> * * const indexedDf = df.setIndex("SomeColumn"); * </pre> */ setIndex<NewIndexT = any>(columnName: string): IDataFrame<NewIndexT, ValueT>; /** * Apply a new {@link Index} to the dataframe. * * @param newIndex The new array or iterable to be the new {@link Index} of the dataframe. Can also be a selector to choose the {@link Index} for each row in the dataframe. * * @return Returns a new dataframe with the specified {@link Index} attached. * * @example * <pre> * * const indexedDf = df.withIndex([10, 20, 30]); * </pre> * * @example * <pre> * * const indexedDf = df.withIndex(df.getSeries("SomeColumn")); * </pre> * * @example * <pre> * * const indexedDf = df.withIndex(row => row.SomeColumn); * </pre> * * @example * <pre> * * const indexedDf = df.withIndex(row => row.SomeColumn + 20); * </pre> */ withIndex<NewIndexT>(newIndex: Iterable<NewIndexT> | SelectorFn<ValueT, NewIndexT>): IDataFrame<NewIndexT, ValueT>; /** * Resets the {@link Index} of the dataframe back to the default zero-based sequential integer index. * * @return Returns a new dataframe with the {@link Index} reset to the default zero-based index. * * @example * <pre> * * const dfWithResetIndex = df.resetIndex(); * </pre> */ resetIndex(): IDataFrame<number, ValueT>; /** * Extract a {@link Series} from a named column in the dataframe. * * @param columnName Specifies the name of the column that contains the {@link Series} to retreive. * * @return Returns the {@link Series} extracted from the named column in the dataframe. * * @example * <pre> * * const series = df.getSeries("SomeColumn"); * </pre> */ getSeries<SeriesValueT = any>(columnName: string): ISeries<IndexT, SeriesValueT>; /** * Determine if the dataframe contains a {@link Series} the specified named column. * * @param columnName Name of the column to check for. * * @return Returns true if the dataframe contains the requested {@link Series}, otherwise returns false. * * @example * <pre> * * if (df.hasSeries("SomeColumn")) { * // ... the dataframe contains a series with the specified column name ... * } * </pre> */ hasSeries(columnName: string): boolean; /** * Verify the existence of a name column and extracts the {@link Series} for it. * Throws an exception if the requested column doesn't exist. * * @param columnName Name of the column to extract. * * @return Returns the {@link Series} for the column if it exists, otherwise it throws an exception. * * @example * <pre> * * try { * const series = df.expectSeries("SomeColumn"); * // ... do something with the series ... * } * catch (err) { * // ... the dataframe doesn't contain the column "SomeColumn" ... * } * </pre> */ expectSeries<SeriesValueT>(columnName: string): ISeries<IndexT, SeriesValueT>; /** * Create a new dataframe with a replaced or additional column specified by the passed-in series. * * @param columnNameOrSpec The name of the column to add or replace or a {@link IColumnGenSpec} that defines the columns to add. * @param series When columnNameOrSpec is a string that identifies the column to add, this specifies the {@link Series} to add to the dataframe or a function that produces a series (given a dataframe). * * @return Returns a new dataframe replacing or adding a particular named column. * * @example * <pre> * * const modifiedDf = df.withSeries("ANewColumn", new Series([1, 2, 3])); * </pre> * * @example * <pre> * * const modifiedDf = df.withSeries("ANewColumn", df => * df.getSeries("SourceData").select(aTransformation) * ); * </pre> * * @example * <pre> * * const modifiedDf = df.withSeries({ * ANewColumn: new Series([1, 2, 3]), * SomeOtherColumn: new Series([10, 20, 30]) * }); * <pre> * * @example * <pre> * * const modifiedDf = df.withSeries({ * ANewColumn: df => df.getSeries("SourceData").select(aTransformation)) * }); * <pre> */ withSeries<OutputValueT = any, SeriesValueT = any>(columnNameOrSpec: string | IColumnGenSpec, series?: ISeries<IndexT, SeriesValueT> | SeriesSelectorFn<IndexT, ValueT, SeriesValueT>): IDataFrame<IndexT, OutputValueT>; /** * Merge one or more dataframes into this single dataframe. * Rows are merged by indexed. * Same named columns in subsequent dataframes override columns in earlier dataframes. * * @param otherDataFrames... One or more dataframes to merge into this dataframe. * * @returns The merged data frame. * * @example * <pre> * * const mergedDF = df1.merge(df2); * </pre> * * <pre> * * const mergedDF = df1.merge(df2, df3, etc); * </pre> */ merge<MergedValueT = any>(...otherDataFrames: IDataFrame<IndexT, any>[]): IDataFrame<IndexT, MergedValueT>; /** * Add a series to the dataframe, but only if it doesn't already exist. * * @param columnNameOrSpec The name of the series to add or a {@link IColumnGenSpec} that specifies the columns to add. * @param series If columnNameOrSpec is a string that specifies the name of the series to add, this specifies the actual {@link Series} to add or a selector that generates the series given the dataframe. * * @return Returns a new dataframe with the specified series added, if the series didn't already exist. Otherwise if the requested series already exists the same dataframe is returned. * * @example * <pre> * * const updatedDf = df.ensureSeries("ANewColumn", new Series([1, 2, 3])); * </pre> * * @example * <pre> * * const updatedDf = df.ensureSeries("ANewColumn", df => * df.getSeries("AnExistingSeries").select(aTransformation) * ); * </pre> * * @example * <pre> * * const modifiedDf = df.ensureSeries({ * ANewColumn: new Series([1, 2, 3]), * SomeOtherColumn: new Series([10, 20, 30]) * }); * <pre> * * @example * <pre> * * const modifiedDf = df.ensureSeries({ * ANewColumn: df => df.getSeries("SourceData").select(aTransformation)) * }); * <pre> */ ensureSeries<SeriesValueT>(columnNameOrSpec: string | IColumnGenSpec, series?: ISeries<IndexT, SeriesValueT> | SeriesSelectorFn<IndexT, ValueT, SeriesValueT>): IDataFrame<IndexT, ValueT>; /** * Create a new dataframe with just a subset of columns. * * @param columnNames Array of column names to include in the new dataframe. * * @return Returns a dataframe with a subset of columns from the original dataframe. * * @example * <pre> * const subsetDf = df.subset(["ColumnA", "ColumnB"]); * </pre> */ subset<NewValueT = ValueT>(columnNames: string[]): IDataFrame<IndexT, NewValueT>; /** * Create a new dataframe with the requested column or columns dropped. * * @param columnOrColumns Specifies the column name (a string) or columns (array of strings) to drop. * * @return Returns a new dataframe with a particular named column or columns removed. * * @example * <pre> * const modifiedDf = df.dropSeries("SomeColumn"); * </pre> * * @example * <pre> * const modifiedDf = df.dropSeries(["ColumnA", "ColumnB"]); * </pre> */ dropSeries<NewValueT = ValueT>(columnOrColumns: string | string[]): IDataFrame<IndexT, NewValueT>; /** * Create a new dataframe with columns reordered. * New column names create new columns (with undefined values), omitting existing column names causes those columns to be dropped. * * @param columnNames Specifies the new order for columns. * * @return Returns a new dataframe with columns reordered according to the order of the array of column names that is passed in. * * @example * <pre> * const reorderedDf = df.reorderSeries(["FirstColumn", "SecondColumn", "etc"]); * </pre> */ reorderSeries<NewValueT = ValueT>(columnNames: string[]): IDataFrame<IndexT, NewValueT>; /** * Bring the column(s) with specified name(s) to the front of the column order, making it (or them) the first column(s) in the output dataframe. * * @param columnOrColumns Specifies the column or columns to bring to the front. * * @return Returns a new dataframe with 1 or more columns bought to the front of the column ordering. * * @example * <pre> * const modifiedDf = df.bringToFront("NewFirstColumn"); * </pre> * * @example * <pre> * const modifiedDf = df.bringToFront(["NewFirstColumn", "NewSecondColumn"]); * </pre> */ bringToFront(columnOrColumns: string | string[]): IDataFrame<IndexT, ValueT>; /** * Bring the column(s) with specified name(s) to the back of the column order, making it (or them) the last column(s) in the output dataframe. * * @param columnOrColumns Specifies the column or columns to bring to the back. * * @return Returns a new dataframe with 1 or more columns bought to the back of the column ordering. * * @example * <pre> * const modifiedDf = df.bringToBack("NewLastColumn"); * </pre> * * @example * <pre> * const modifiedDf = df.bringToBack(["NewSecondLastColumn, ""NewLastColumn"]); * </pre> */ bringToBack(columnOrColumns: string | string[]): IDataFrame<IndexT, ValueT>; /** * Create a new dataframe with 1 or more columns renamed. * * @param newColumnNames A column rename spec - a JavaScript hash that maps existing column names to new column names. * * @return Returns a new dataframe with specified columns renamed. * * @example * <pre> * * const renamedDf = df.renameSeries({ OldColumnName, NewColumnName }); * </pre> * * @example * <pre> * * const renamedDf = df.renameSeries({ * Column1: ColumnA, * Column2: ColumnB * }); * </pre> */ renameSeries<NewValueT = ValueT>(newColumnNames: IColumnRenameSpec): IDataFrame<IndexT, NewValueT>; /** * Extract rows from the dataframe as an array. * Each element of the array is one row of the dataframe represented as * a JavaScript object with the fields as the dataframe's columns. * This forces lazy evaluation to complete. * * @return Returns an array of the rows contained within the dataframe. * * @example * <pre> * const values = df.toArray(); * </pre> */ toArray(): ValueT[]; /** * Retreive the index, row pairs from the dataframe as an array. * Each pair is [index, row]. * This forces lazy evaluation to complete. * * @return Returns an array of pairs that contains the dataframe's rows. Each pair is a two element array that contains an index and a row. * * @example * <pre> * const pairs = df.toPairs(); * </pre> */ toPairs(): ([IndexT, ValueT])[]; /** * Convert the dataframe to a JavaScript object. * * @param keySelector User-defined selector function that selects keys for the resulting object. * @param valueSelector User-defined selector function that selects values for the resulting object. * * @return Returns a JavaScript object generated from the dataframe by applying the key and value selector functions. * * @example * <pre> * * const someObject = df.toObject( * row => row.SomeColumn, // Specify the column to use for field names in the output object. * row => row.SomeOtherColumn // Specify the column to use as the value for each field. * ); * </pre> */ toObject<KeyT = any, FieldT = any, OutT = any>(keySelector: (value: ValueT) => KeyT, valueSelector: (value: ValueT) => FieldT): OutT; /** * Bake the data frame to an array of rows were each rows is an array of values in column order. * * @return Returns an array of rows. Each row is an array of values in column order. * * @example * <pre> * const rows = df.toRows(); * </pre> */ toRows(): any[][]; /** * Generates a new dataframe by repeatedly calling a user-defined selector function on each row in the original dataframe. * * @param selector A user-defined selector function that transforms each row to create the new dataframe. * * @return Returns a new dataframe with each row transformed by the selector function. * * @example * <pre> * * function transformRow (inputRow) { * const outputRow = { * // ... construct output row derived from input row ... * }; * * return outputRow; * } * * const transformedDf = df.select(row => transformRow(row)); * </pre> */ select<ToT>(selector: SelectorWithIndexFn<ValueT, ToT>): IDataFrame<IndexT, ToT>; /** * Generates a new dataframe by repeatedly calling a user-defined selector function on each row in the original dataframe. * * * Similar to the {@link select} function, but in this case the selector function produces a collection of output rows that are flattened and merged to create the new dataframe. * * @param selector A user-defined selector function that transforms each row into a collection of output rows. * * @return Returns a new dataframe where each row has been transformed into 0 or more new rows by the selector function. * * @example * <pre> * * function produceOutputRows (inputRow) { * const outputRows = []; * while (someCondition) { * // ... generate zero or more output rows ... * outputRows.push(... some generated row ...); * } * return outputRows; * } * * const modifiedDf = df.selectMany(row => produceOutputRows(row)); * </pre> */ selectMany<ToT>(selector: SelectorWithIndexFn<ValueT, Iterable<ToT>>): IDataFrame<IndexT, ToT>; /** * Transform one or more columns. * * This is equivalent to extracting a {@link Series} with {@link getSeries}, then transforming it with {@link Series.select}, * and finally plugging it back in as the same column using {@link withSeries}. * * @param columnSelectors Object with field names for each column to be transformed. Each field specifies a selector function that transforms that column. * * @return Returns a new dataframe with 1 or more columns transformed. * * @example * <pre> * * const modifiedDf = df.transformSeries({ * AColumnToTransform: columnValue => transformRow(columnValue) * }); * </pre> * * @example * <pre> * * const modifiedDf = df.transformSeries({ * ColumnA: columnValue => transformColumnA(columnValue), * ColumnB: columnValue => transformColumnB(columnValue) * }); * </pre> */ transformSeries<NewValueT = ValueT>(columnSelectors: IColumnTransformSpec): IDataFrame<IndexT, NewValueT>; /** * Generate new columns based on existing rows. * * This is equivalent to calling {@link select} to transform the original dataframe to a new dataframe with different column, * then using {@link withSeries} to merge each the of both the new and original dataframes. * * @param generator Generator function that transforms each row to produce 1 or more new columns. * Or use a column spec that has fields for each column, the fields specify a generate function that produces the value for each new column. * * @return Returns a new dataframe with 1 or more new columns. * * @example * <pre> * * function produceNewColumns (inputRow) { * const newColumns = { * // ... specify new columns and their values based on the input row ... * }; * * return newColumns; * }; * * const dfWithNewSeries = df.generateSeries(row => produceNewColumns(row)); * </pre> * * @example * <pre> * * const dfWithNewSeries = df.generateSeries({ * NewColumnA: row => produceNewColumnA(row), * NewColumnB: row => produceNewColumnB(row), * }) * </pre> */ generateSeries<NewValueT = ValueT>(generator: SelectorWithIndexFn<any, any> | IColumnTransformSpec): IDataFrame<IndexT, NewValueT>; /** * Converts (deflates) a dataframe to a {@link Series}. * * @param selector Optional user-defined selector function that transforms each row to produce the series. * * @return Returns a series that was created from the original dataframe. * * @example * <pre> * * const series = df.deflate(); // Deflate to a series of object. * </pre> * * @example * <pre> * * const series = df.deflate(row => row.SomeColumn); // Extract a particular column. * </pre> */ deflate<ToT = ValueT>(selector?: SelectorWithIndexFn<ValueT, ToT>): ISeries<IndexT, ToT>; /** * Inflate a named {@link Series} in the dataframe to 1 or more new series in the new dataframe. * * This is the equivalent of extracting the series using {@link getSeries}, transforming them with {@link Series.select} * and then running {@link Series.inflate} to create a new dataframe, then merging each column of the new dataframe * into the original dataframe using {@link withSeries}. * * @param columnName Name of the series to inflate. * @param selector Optional selector function that transforms each value in the column to new columns. If not specified it is expected that each value in the column is an object whose fields define the new column names. * * @return Returns a new dataframe with a column inflated to 1 or more new columns. * * @example * <pre> * * function newColumnGenerator (row) { * const newColumns = { * // ... create 1 field per new column ... * }; * * return row; * } * * const dfWithNewSeries = df.inflateSeries("SomeColumn", newColumnGenerator); * </pre> */ inflateSeries<NewValueT = ValueT>(columnName: string, selector?: SelectorWithIndexFn<IndexT, any>): IDataFrame<IndexT, ValueT>; /** * Partition a dataframe into a {@link Series} of *data windows*. * Each value in the new series is a chunk of data from the original dataframe. * * @param period The number of rows to include in each data window. * * @return Returns a new series, each value of which is a chunk (data window) of the original dataframe. * * @example * <pre> * * const windows = df.window(2); // Get rows in pairs. * const pctIncrease = windows.select(pair => (pair.last().SalesAmount - pair.first().SalesAmount) / pair.first().SalesAmount); * console.log(pctIncrease.toString()); * </pre> * * @example * <pre> * * const salesDf = ... // Daily sales data. * const weeklySales = salesDf.window(7); // Partition up into weekly data sets. * console.log(weeklySales.toString()); * </pre> */ window(period: number): ISeries<number, IDataFrame<IndexT, ValueT>>; /** * Partition a dataframe into a {@link Series} of *rolling data windows*. * Each value in the new series is a rolling chunk of data from the original dataframe. * * @param period The number of data rows to include in each data window. * * @return Returns a new series, each value of which is a rolling chunk of the original dataframe. * * @example * <pre> * * const salesDf = ... // Daily sales data. * const rollingWeeklySales = salesDf.rollingWindow(7); // Get rolling window over weekly sales data. * console.log(rollingWeeklySales.toString()); * </pre> */ rollingWindow(period: number): ISeries<number, IDataFrame<IndexT, ValueT>>; /** * Partition a dataframe into a {@link Series} of variable-length *data windows* * where the divisions between the data chunks are * defined by a user-provided *comparer* function. * * @param comparer Function that compares two adjacent data rows and returns true if they should be in the same window. * * @return Returns a new series, each value of which is a chunk of data from the original dataframe. * * @example * <pre> * * function rowComparer (rowA, rowB) { * if (... rowA should be in the same data window as rowB ...) { * return true; * } * else { * return false; * } * }; * * const variableWindows = df.variableWindow(rowComparer); */ variableWindow(comparer: ComparerFn<ValueT, ValueT>): ISeries<number, IDataFrame<IndexT, ValueT>>; /** * Eliminates adjacent duplicate rows. * * For each group of adjacent values that are equivalent only returns the last index/row for the group, * thus adjacent equivalent rows are collapsed down to the last row. * * @param selector Optional selector function to determine the value used to compare for equivalence. * * @return Returns a new dataframe with groups of adjacent duplicate rows collapsed to a single row per group. * * @example * <pre> * * const dfWithDuplicateRowsRemoved = df.sequentialDistinct(row => row.ColumnA); * </pre> */ sequentialDistinct<ToT = ValueT>(selector?: SelectorFn<ValueT, ToT>): IDataFrame<IndexT, ValueT>; /** * Aggregate the rows in the dataframe to a single result. * * @param seed Optional seed value for producing the aggregation. * @param selector Function that takes the seed and then each row in the dataframe and produces the aggregate value. * * @return Returns a new value that has been aggregated from the dataframe using the 'selector' function. * * @example * <pre> * * const dailySalesDf = ... daily sales figures for the past month ... * const totalSalesForthisMonth = dailySalesDf.aggregate( * 0, // Seed - the starting value. * (accumulator, row) => accumulator + row.SalesAmount // Aggregation function. * ); * </pre> * * @example * <pre> * * const totalSalesAllTime = 500; // We'll seed the aggregation with this value. * const dailySalesDf = ... daily sales figures for the past month ... * const updatedTotalSalesAllTime = dailySalesDf.aggregate( * totalSalesAllTime, * (accumulator, row) => accumulator + row.SalesAmount * ); * </pre> * * @example * <pre> * * var salesDataSummary = salesDataDf.aggregate({ * TotalSales: df => df.count(), * AveragePrice: df => df.deflate(row => row.Price).average(), * TotalRevenue: df => df.deflate(row => row.Revenue).sum(), * }); * </pre> */ aggregate<ToT = ValueT>(seedOrSelector: AggregateFn<ValueT, ToT> | ToT | IColumnAggregateSpec, selector?: AggregateFn<ValueT, ToT>): ToT; /** * Skip a number of rows in the dataframe. * * @param numValues Number of rows to skip. * * @return Returns a new dataframe with the specified number of rows skipped. * * @example * <pre> * * const dfWithRowsSkipped = df.skip(10); // Skip 10 rows in the original dataframe. * </pre> */ skip(numValues: number): IDataFrame<IndexT, ValueT>; /** * Skips values in the dataframe while a condition evaluates to true or truthy. * * @param predicate Returns true/truthy to continue to skip rows in the original dataframe. * * @return Returns a new dataframe with all initial sequential rows removed while the predicate returned true/truthy. * * @example * <pre> * * const dfWithRowsSkipped = df.skipWhile(row => row.CustomerName === "Fred"); // Skip initial customers named Fred. * </pre> */ skipWhile(predicate: PredicateFn<ValueT>): IDataFrame<IndexT, ValueT>; /** * Skips values in the dataframe untils a condition evaluates to true or truthy. * * @param predicate Return true/truthy to stop skipping rows in the original dataframe. * * @return Returns a new dataframe with all initial sequential rows removed until the predicate returned true/truthy. * * @example * <pre> * * const dfWithRowsSkipped = df.skipUntil(row => row.CustomerName === "Fred"); // Skip initial customers until we find Fred. * </pre> */ skipUntil(predicate: PredicateFn<ValueT>): IDataFrame<IndexT, ValueT>; /** * Take a number of rows in the dataframe. * * @param numValues Number of rows to take. * * @return Returns a new dataframe with only the specified number of rows taken from the original dataframe. * * @example * <pre> * * const dfWithRowsTaken = df.take(15); // Take only the first 15 rows from the original dataframe. * </pre> */ take(numRows: number): IDataFrame<IndexT, ValueT>; /** * Takes values from the dataframe while a condition evaluates to true or truthy. * * @param predicate Returns true/truthy to continue to take rows from the original dataframe. * * @return Returns a new dataframe with only the initial sequential rows that were taken while the predicate returned true/truthy. * * @example * <pre> * * const dfWithRowsTaken = df.takeWhile(row => row.CustomerName === "Fred"); // Take only initial customers named Fred. * </pre> */ takeWhile(predicate: PredicateFn<ValueT>): IDataFrame<IndexT, ValueT>; /** * Takes values from the dataframe untils a condition evaluates to true or truthy. * * @param predicate Return true/truthy to stop taking rows in the original dataframe. * * @return Returns a new dataframe with only the initial sequential rows taken until the predicate returned true/truthy. * * @example * <pre> * * const dfWithRowsTaken = df.takeUntil(row => row.CustomerName === "Fred"); // Take all initial customers until we find Fred. * </pre> */ takeUntil(predicate: PredicateFn<ValueT>): IDataFrame<IndexT, ValueT>; /** * Count the number of rows in the dataframe * * @return Returns the count of all rows. * * @example * <pre> * * const numRows = df.count(); * </pre> */ count(): number; /** * Get the first row of the dataframe. * * @return Returns the first row of the dataframe. * * @example * <pre> * * const firstRow = df.first(); * </pre> */ first(): ValueT; /** * Get the last row of the dataframe. * * @return Returns the last row of the dataframe. * * @example * <pre> * * const lastRow = df.last(); * </pre> */ last(): ValueT; /** * Get the row, if there is one, with the specified index. * * @param index Index to for which to retreive the row. * * @return Returns the row from the specified index in the dataframe or undefined if there is no such index in the present in the dataframe. * * @example * <pre> * * const row = df.at(5); // Get the row at index 5 (with a default 0-based index). * </pre> * * @example * <pre> * * const date = ... some date ... * // Retreive the row with specified date from a time-series dataframe (assuming date indexed has been applied). * const row = df.at(date); * </pre> */ at(index: IndexT): ValueT | undefined; /** * Get X rows from the start of the dataframe. * Pass in a negative value to get all rows at the head except for X rows at the tail. * * @param numValues Number of rows to take. * * @return Returns a new dataframe that has only the specified number of rows taken from the start of the original dataframe. * * @examples * <pre> * * const sample = df.head(10); // Take a sample of 10 rows from the start of the dataframe. * </pre> */ head(numValues: number): IDataFrame<IndexT, ValueT>; /** * Get X rows from the end of the dataframe. * Pass in a negative value to get all rows at the tail except X rows at the head. * * @param numValues Number of rows to take. * * @return Returns a new dataframe that has only the specified number of rows taken from the end of the original dataframe. * * @examples * <pre> * * const sample = df.tail(12); // Take a sample of 12 rows from the end of the dataframe. * </pre> */ tail(numValues: number): IDataFrame<IndexT, ValueT>; /** * Filter the dataframe using user-defined predicate function. * * @param predicate Predicate function to filter rows from the dataframe. Returns true/truthy to keep rows, or false/falsy to omit rows. * * @return Returns a new dataframe containing only the rows from the original dataframe that matched the predicate. * * @example * <pre> * * const filteredDf = df.where(row => row.CustomerName === "Fred"); // Filter so we only have customers named Fred. * </pre> */ where(predicate: PredicateFn<ValueT>): IDataFrame<IndexT, ValueT>; /** * Invoke a callback function for each row in the dataframe. * * @param callback The calback function to invoke for each row. * * @return Returns the original dataframe with no modifications. * * @example * <pre> * * df.forEach(row => { * // ... do something with the row ... * }); * </pre> */ forEach(callback: CallbackFn<ValueT>): IDataFrame<IndexT, ValueT>; /** * Evaluates a predicate function for every row in the dataframe to determine * if some condition is true/truthy for **all** rows in the dataframe. * * @param predicate Predicate function that receives each row. It should returns true/truthy for a match, otherwise false/falsy. * * @return Returns true if the predicate has returned true or truthy for every row in the dataframe, otherwise returns false. Returns false for an empty dataframe. * * @example * <pre> * * const everyoneIsNamedFred = df.all(row => row.CustomerName === "Fred"); // Check if all customers are named Fred. * </pre> */ all(predicate: PredicateFn<ValueT>): boolean; /** * Evaluates a predicate function for every row in the dataframe to determine * if some condition is true/truthy for **any** of rows in the dataframe. * * If no predicate is specified then it simply checks if the dataframe contains more than zero rows. * * @param predicate Optional predicate function that receives each row. It should return true/truthy for a match, otherwise false/falsy. * * @return Returns true if the predicate has returned truthy for any row in the sequence, otherwise returns false. * If no predicate is passed it returns true if the dataframe contains any rows at all. * Returns false for an empty dataframe. * * @example * <pre> * * const anyFreds = df.any(row => row.CustomerName === "Fred"); // Do we have any customers named Fred? * </pre> * * @example * <pre> * * const anyCustomers = df.any(); // Do we have any customers at all? * </pre> */ any(predicate?: PredicateFn<ValueT>): boolean; /** * Evaluates a predicate function for every row in the dataframe to determine * if some condition is true/truthy for **none** of rows in the dataframe. * * If no predicate is specified then it simply checks if the dataframe contains zero rows. * * @param predicate Optional predicate function that receives each row. It should return true/truthy for a match, otherwise false/falsy. * * @return Returns true if the predicate has returned truthy for zero rows in the dataframe, otherwise returns false. Returns false for an empty dataframe. * * @example * <pre> * * const noFreds = df.none(row => row.CustomerName === "Fred"); // Do we have zero customers named Fred? * </pre> * * @example * <pre> * * const noCustomers = df.none(); // Do we have zero customers? * </pre> */ none(predicate?: PredicateFn<ValueT>): boolean; /** * Gets a new dataframe containing all rows starting at and after the specified index value. * * @param indexValue The index value at which to start the new dataframe. * * @return Returns a new dataframe containing all rows starting at and after the specified index value. * * @example * <pre> * * const df = new DataFrame({ * index: [0, 1, 2, 3], // This is the default index. * values: [10, 20, 30, 40], * }); * * const lastHalf = df.startAt(2); * expect(lastHalf.toArray()).to.eql([30, 40]); * </pre> * * @example * <pre> * * const timeSeriesDf = ... a dataframe indexed by date/time ... * * // Get all rows starting at (or after) a particular date. * const allRowsFromStartDate = df.startAt(new Date(2016, 5, 4)); * </pre> */ startAt(indexValue: IndexT): IDataFrame<IndexT, ValueT>; /** * Gets a new dataframe containing all rows up until and including the specified index value (inclusive). * * @param indexValue The index value at which to end the new dataframe. * * @return Returns a new dataframe containing all rows up until and including the specified index value. * * @example * <pre> * * const df = new DataFrame({ * index: [0, 1, 2, 3], // This is the default index. * values: [10, 20, 30, 40], * }); * * const firstHalf = df.endAt(1); * expect(firstHalf.toArray()).to.eql([10, 20]); * </pre> * * @example * <pre> * * const timeSeriesDf = ... a dataframe indexed by date/time ... * * // Get all rows ending at a particular date. * const allRowsUpToAndIncludingTheExactEndDate = df.endAt(new Date(2016, 5, 4)); * </pre> */ endAt(indexValue: IndexT): IDataFrame<IndexT, ValueT>; /** * Gets a new dataframe containing all rows up to the specified index value (exclusive). * * @param indexValue The index value at which to end the new dataframe. * * @return Returns a new dataframe containing all rows up to (but not including) the specified index value. * * @example * <pre> * * const df = new DataFrame({ * index: [0, 1, 2, 3], // This is the default index. * values: [10, 20, 30, 40], * }); * * const firstHalf = df.before(2); * expect(firstHalf.toArray()).to.eql([10, 20]); * </pre> * * @example * <pre> * * const timeSeriesDf = ... a dataframe indexed by date/time ... * * // Get all rows before the specified date. * const allRowsBeforeEndDate = df.before(new Date(2016, 5, 4)); * </pre> */ before(indexValue: IndexT): IDataFrame<IndexT, ValueT>; /** * Gets a new dataframe containing all rows after the specified index value (exclusive). * * @param indexValue The index value after which to start the new dataframe. * * @return Returns a new dataframe containing all rows after the specified index value. * * @example * <pre> * * const df = new DataFrame({ * index: [0, 1, 2, 3], // This is the default index. * values: [10, 20, 30, 40], * }); * * const lastHalf = df.before(1); * expect(lastHalf.toArray()).to.eql([30, 40]); * </pre> * * @example * <pre> * * const timeSeriesDf = ... a dataframe indexed by date/time ... * * // Get all rows after the specified date. * const allRowsAfterStartDate = df.after(new Date(2016, 5, 4)); * </pre> */ after(indexValue: IndexT): IDataFrame<IndexT, ValueT>; /** * Gets a new dataframe containing all rows between the specified index values (inclusive). * * @param startIndexValue The index at which to start the new dataframe. * @param endIndexValue The index at which to end the new dataframe. * * @return Returns a new dataframe containing all values between the specified index values (inclusive). * * @example * <pre> * * const df = new DataFrame({ * index: [0, 1, 2, 3, 4, 6], // This is the default index. * values: [10, 20, 30, 40, 50, 60], * }); * * const middleSection = df.between(1, 4); * expect(middleSection.toArray()).to.eql([20, 30, 40, 50]); * </pre> * * @example * <pre> * * const timeSeriesDf = ... a dataframe indexed by date/time ... * * // Get all rows between the start and end dates (inclusive). * const allRowsBetweenDates = df.after(new Date(2016, 5, 4), new Date(2016, 5, 22)); * </pre> */ between(startIndexValue: IndexT, endIndexValue: IndexT): IDataFrame<IndexT, ValueT>; /** * Format the dataframe for display as a string. * This forces lazy evaluation to complete. * * @return Generates and returns a string representation of the dataframe or dataframe. * * @example * <pre> * * console.log(df.toString()); * </pre> */ toString(): string; /** * Parse a column with string values and convert it to a column with int values. * * @param columnNameOrNames Specifies the column name or array of column names to parse. * * @return Returns a new dataframe with a particular named column parsed as ints. * * @example * <pre> * * const withParsedColumn = df.parseInts("MyIntColumn"); * </pre> * * @example * <pre> * * const withParsedColumns = df.parseInts(["MyIntColumnA", "MyIntColumnA"]); * </pre> */ parseInts(columnNameOrNames: string | string[]): IDataFrame<IndexT, ValueT>; /** * Parse a column with string values and convert it to a column with float values. * * @param columnNameOrNames Specifies the column name or array of column names to parse. * * @return Returns a new dataframe with a particular named column parsed as floats. * * @example * <pre> * * const withParsedColumn = df.parseFloats("MyFloatColumn"); * </pre> * * @example * <pre> * * c