arquero
Version:
Query processing and transformation of array-backed data tables.
413 lines (362 loc) • 13.9 kB
text/typescript
import { Table } from './Table.js';
import { BitSet } from './BitSet.js';
/** A table column value. */
export type DataValue = any;
/**
* Interface for table columns.
* Compatible with arrays, typed arrays, and Arrow columns.
*/
export interface ColumnType<T> {
/** The number of rows in the column. */
length: number;
/** Retrieve the value at the given row index. */
at(row: number): T;
/** Return a column value iterator. */
[Symbol.iterator]() : Iterator<T>;
/** Optional toArray method. */
toArray?() : ColumnType<T>;
}
/** A named collection of columns. */
export type ColumnData = Record<string, ColumnType<DataValue>>;
/** Table expression parameters. */
export type Params = Record<string, any>;
/** A typed array constructor. */
export type TypedArrayConstructor =
| Uint8ArrayConstructor
| Uint16ArrayConstructor
| Uint32ArrayConstructor
| BigUint64ArrayConstructor
| Int8ArrayConstructor
| Int16ArrayConstructor
| Int32ArrayConstructor
| BigInt64ArrayConstructor
| Float32ArrayConstructor
| Float64ArrayConstructor;
/** A typed array instance. */
export type TypedArray =
| Uint8Array
| Uint16Array
| Uint32Array
| BigUint64Array
| Int8Array
| Int16Array
| Int32Array
| BigInt64Array
| Float32Array
| Float64Array;
/** Table row object. */
export type RowObject = Record<string, DataValue>;
/** A table groupby specification. */
export interface GroupBySpec {
/** The number of groups. */
size: number;
/** Column names for each group. */
names: string[];
/** Value accessor functions for each group. */
get: RowExpression[];
/** Indices of an example table row for each group. */
rows: number[] | Uint32Array;
/** Per-row group indices, length is total rows of table. */
keys: number[] | Uint32Array;
}
/** An expression evaluated over a table row. */
export type RowExpression = (
/** The table row. */
row: number,
/** The backing table data store. */
data: ColumnData
) => DataValue;
/** Column value accessor. */
export type ColumnGetter = (
/** The table row. */
row: number
) => DataValue;
/**
* Comparator function for sorting table rows. Returns a negative value
* if rowA < rowB, positive if rowA > rowB, otherwise zero.
*/
export type RowComparator = (
/** The table row index for the first row. */
rowA: number,
/** The table row index for the second row. */
rowB: number,
/** The backing table data store. */
data: ColumnData
) => number;
/** Options for derived table creation. */
export interface CreateOptions {
/** The backing column data. */
data?: ColumnData;
/** An ordered list of column names. */
names?: readonly string[];
/** An additional filter BitSet to apply. */
filter?: BitSet;
/** The groupby specification to use, or null for no groups. */
groups?: GroupBySpec;
/** The orderby comparator function to use, or null for no order. */
order?: RowComparator
}
/** Options for generating row objects. */
export interface PrintOptions {
/** The maximum number of objects to create, default `Infinity`. */
limit?: number;
/** The row offset indicating how many initial rows to skip, default `0`. */
offset?: number;
/**
* An ordered set of columns to include. The input may consist of column
* name strings, column integer indices, objects with current column names
* as keys and new column names as values (for renaming), or selection
* helper functions such as *all*, *not*, or *range*.
*/
columns?: Select;
}
/** Options for generating row objects. */
export interface ObjectsOptions {
/** The maximum number of objects to create, default `Infinity`. */
limit?: number;
/** The row offset indicating how many initial rows to skip, default `0`. */
offset?: number;
/**
* An ordered set of columns to include. The input may consist of column
* name strings, column integer indices, objects with current column names
* as keys and new column names as values (for renaming), or selection
* helper functions such as *all*, *not*, or *range*.
*/
columns?: Select;
/**
* The export format for groups of rows. The default (false) is to ignore
* groups, returning a flat array of objects. The valid values are 'map' or
* true (for Map instances), 'object' (for standard objects), or 'entries'
* (for arrays in the style of Object.entries). For the 'object' format,
* groupby keys are coerced to strings to use as object property names; note
* that this can lead to undesirable behavior if the groupby keys are object
* values. The 'map' and 'entries' options preserve the groupby key values.
*/
grouped?: 'map' | 'entries' | 'object' | boolean;
}
/** A reference to a column by string name or integer index. */
export type ColumnRef = string | number;
/** A value that can be coerced to a string. */
export interface Stringable {
/** String coercion method. */
toString(): string;
}
/** A table expression provided as a string or string-coercible value. */
export type TableExprString = string | Stringable;
/** A struct object with arbitrary named properties. */
export type Struct = Record<string, any>;
/** A function defined over a table row. */
export type TableExprFunc = (d: Struct, $: Params) => any;
/** A table expression defined over a single table. */
export type TableExpr = TableExprFunc | TableExprString;
/** A function defined over rows from two tables. */
export type TableExprFunc2 = (a: Struct, b: Struct, $: Params) => any;
/** A table expression defined over two tables. */
export type TableExpr2 = TableExprFunc2 | TableExprString;
/** An object that maps current column names to new column names. */
export type RenameMap = Record<string, string>;
/** A selection helper function. */
export type SelectHelper = (table: Table) => string[];
/**
* One or more column selections, potentially with renaming.
* The input may consist of a column name string, column integer index, a
* rename map object with current column names as keys and new column names
* as values, or a select helper function that takes a table as input and
* returns a valid selection parameter.
*/
export type SelectEntry = ColumnRef | RenameMap | SelectHelper;
/** An ordered set of column selections, potentially with renaming. */
export type Select = SelectEntry | SelectEntry[];
/** An object of column name / table expression pairs. */
export type ExprObject = Record<string, TableExpr>;
/** An object of column name / two-table expression pairs. */
export type Expr2Object = Record<string, TableExpr2>;
/** An ordered set of one or more column values. */
export type ListEntry = ColumnRef | SelectHelper | ExprObject;
/**
* An ordered set of column values.
* Entries may be column name strings, column index numbers, value objects
* with output column names for keys and table expressions for values,
* or a selection helper function.
*/
export type ExprList = ListEntry | ListEntry[];
/** A reference to a data table instance. */
export type TableRef = Table | string;
/** A list of one or more table references. */
export type TableRefList = TableRef | TableRef[];
/**
* One or more orderby sort criteria.
* If a string, order by the column with that name.
* If a number, order by the column with that index.
* If a function, must be a valid table expression; aggregate functions
* are permitted, but window functions are not.
* If an object, object values must be valid values parameters
* with output column names for keys and table expressions
* for values. The output name keys will subsequently be ignored.
*/
export type OrderKey = ColumnRef | TableExpr | ExprObject;
/** An ordered set of orderby sort criteria, in precedence order. */
export type OrderKeys = OrderKey | OrderKey[];
/** Column values to use as a join key. */
export type JoinKey = ColumnRef | TableExprFunc;
/** An ordered set of join keys. */
export type JoinKeys =
| JoinKey
| [JoinKey[]]
| [JoinKey, JoinKey]
| [JoinKey[], JoinKey[]];
/** A predicate specification for joining two tables. */
export type JoinPredicate = JoinKeys | TableExprFunc2 | null;
/** An array of per-table join values to extract. */
export type JoinList =
| [ExprList]
| [ExprList, ExprList]
| [ExprList, ExprList, Expr2Object];
/** A specification of join values to extract. */
export type JoinValues = JoinList | Expr2Object;
// -- Transform Options -----------------------------------------------------
/** Options for count transformations. */
export interface CountOptions {
/** The name of the output count column, default `count`. */
as?: string;
}
/** Options for derive transformations. */
export interface DeriveOptions {
/**
* A flag (default `false`) indicating if the original columns should be
* dropped, leaving only the derived columns. If true, the before and after
* options are ignored.
*/
drop?: boolean;
/**
* An anchor column that relocated columns should be placed before.
* The value can be any legal column selection. If multiple columns are
* selected, only the first column will be used as an anchor.
* It is an error to specify both before and after options.
*/
before?: Select;
/**
* An anchor column that relocated columns should be placed after.
* The value can be any legal column selection. If multiple columns are
* selected, only the last column will be used as an anchor.
* It is an error to specify both before and after options.
*/
after?: Select;
}
/** Options for relocate transformations. */
export interface RelocateOptions {
/**
* An anchor column that relocated columns should be placed before.
* The value can be any legal column selection. If multiple columns are
* selected, only the first column will be used as an anchor.
* It is an error to specify both before and after options.
*/
before?: Select;
/**
* An anchor column that relocated columns should be placed after.
* The value can be any legal column selection. If multiple columns are
* selected, only the last column will be used as an anchor.
* It is an error to specify both before and after options.
*/
after?: Select;
}
/** Options for sample transformations. */
export interface SampleOptions {
/** Flag for sampling with replacement (default `false`). */
replace?: boolean;
/** Flag to ensure randomly ordered rows (default `true`). */
shuffle?: boolean;
/**
* Column values to use as weights for sampling. Rows will be sampled with
* probability proportional to their relative weight. The input should be a
* column name string or a table expression compatible with *derive*.
*/
weight?: string | TableExprFunc;
}
/** Options for impute transformations. */
export interface ImputeOptions {
/**
* Column values to combine to impute missing rows. For column names and
* indices, all unique column values are considered. Otherwise, each entry
* should be an object of name-expresion pairs, with valid table expressions
* for *rollup*. All combinations of values are checked for each set of
* unique groupby values.
*/
expand?: ExprList;
}
/** Options for fold transformations. */
export interface FoldOptions {
/**
* An array indicating the output column names to use for the key and value
* columns, respectively. The default is `['key', 'value']`.
*/
as?: string[];
}
/** Options for pivot transformations. */
export interface PivotOptions {
/** The maximum number of new columns to generate (default `Infinity`). */
limit?: number;
/** A string to place between multiple key names (default `_`); */
keySeparator?: string;
/** A string to place between key and value names (default `_`). */
valueSeparator?: string;
/** Flag for alphabetical sorting of new column names (default `true`). */
sort?: boolean;
}
/** Options for spread transformations. */
export interface SpreadOptions {
/**
* Flag (default `true`) indicating if input columns to the
* spread operation should be dropped in the output table.
*/
drop?: boolean;
/** The maximum number of new columns to generate (default `Infinity`). */
limit?: number;
/**
* Output column names to use. This option only applies when a single
* column is spread. If the given array of names is shorter than the
* number of generated columns and no limit option is specified, the
* additional generated columns will be dropped.
*/
as?: string[];
}
/** Options for unroll transformations. */
export interface UnrollOptions {
/**
* The maximum number of new rows to generate per array value
* (default `Infinity`).
*/
limit?: number;
/**
* Flag or column name to add zero-based array index values as an output
* column (default `false`). If true, a column named "index" will be
* included. If string-valued, a column with the given name will be added.
*/
index?: boolean | string;
/**
* Columns to drop from the output. The input may consist of column name
* strings, column integer indices, objects with column names as keys, or
* functions that take a table as input and return a valid selection
* parameter (typically the output of selection helper functions such as
* *all*, *not*, or *range*.
*/
drop?: Select;
}
/** Options for join transformations. */
export interface JoinOptions {
/**
* Flag indicating a left outer join (default `false`). If both the
* *left* and *right* flags are true, indicates a full outer join.
*/
left?: boolean;
/**
* Flag indicating a right outer join (default `false`). If both the
* *left* and *right* flags are true, indicates a full outer join.
*/
right?: boolean;
/**
* Column name suffixes to append if two columns with the same name are
* produced by the join. The default is `['_1', '_2']`.
*/
suffix?: string[];
}