arquero
Version:
Query processing and transformation of array-backed data tables.
440 lines (392 loc) • 13.9 kB
JavaScript
import { functions } from './functions/index.js';
import { toArray } from '../util/to-array.js';
import { toString } from '../util/to-string.js';
export class Op {
constructor(name, fields, params) {
this.name = name;
this.fields = fields;
this.params = params;
}
toString() {
const args = [
...this.fields.map(f => `d[${toString(f)}]`),
...this.params.map(toString)
];
return `d => op.${this.name}(${args})`;
}
toObject() {
return { expr: this.toString(), func: true };
}
}
/**
* @param {string} name
* @param {any | any[]} [fields]
* @param {any | any[]} [params]
*/
export function op(name, fields = [], params = []) {
return new Op(name, toArray(fields), toArray(params));
}
export const any = (field) => op('any', field);
export const count = () => op('count');
export const array_agg = (field) => op('array_agg', field);
export const array_agg_distinct = (field) => op('array_agg_distinct', field);
export const map_agg = (key, value) => op('map_agg', [key, value]);
export const object_agg = (key, value) => op('object_agg', [key, value]);
export const entries_agg = (key, value) => op('entries_agg', [key, value]);
/**
* @typedef {import('../table/types.js').Struct} Struct
*/
/**
* All table expression operations including normal functions,
* aggregate functions, and window functions.
*/
export const opApi = {
...functions,
/**
* Generate an object representing the current table row.
* @param {...string} names The column names to include in the object.
* If unspecified, all columns are included.
* @return {Struct} The generated row object.
*/
row_object: (...names) => op('row_object', null, names.flat()),
/**
* Aggregate function to count the number of records (rows).
* @returns {number} The count of records.
*/
count,
/**
* Aggregate function returning an arbitrary observed value.
* @template T
* @param {T} field The data field.
* @return {T} An arbitrary observed value.
*/
any,
/**
* Aggregate function to collect an array of values.
* @template T
* @param {T} field The data field.
* @return {Array<T>} A list of values.
*/
array_agg,
/**
* Aggregate function to collect an array of distinct (unique) values.
* @template T
* @param {T} field The data field.
* @return {Array<T>} An array of unique values.
*/
array_agg_distinct,
/**
* Aggregate function to create an object given input key and value fields.
* @template K, V
* @param {K} key The object key field.
* @param {V} value The object value field.
* @return {Record<K, V>} An object of key-value pairs.
*/
object_agg,
/**
* Aggregate function to create a Map given input key and value fields.
* @template K, V
* @param {K} key The object key field.
* @param {V} value The object value field.
* @return {Map<K, V>} A Map of key-value pairs.
*/
map_agg,
/**
* Aggregate function to create an array in the style of Object.entries()
* given input key and value fields.
* @template K, V
* @param {K} key The object key field.
* @param {V} value The object value field.
* @return {[K, V][]} An array of [key, value] arrays.
*/
entries_agg,
/**
* Aggregate function to count the number of valid values.
* Invalid values are null, undefined, or NaN.
* @param {*} field The data field.
* @return {number} The count of valid values.
*/
// @ts-ignore
valid: (field) => op('valid', field),
/**
* Aggregate function to count the number of invalid values.
* Invalid values are null, undefined, or NaN.
* @param {*} field The data field.
* @return {number} The count of invalid values.
*/
// @ts-ignore
invalid: (field) => op('invalid', field),
/**
* Aggregate function to count the number of distinct values.
* @param {*} field The data field.
* @return {number} The count of distinct values.
*/
// @ts-ignore
distinct: (field) => op('distinct', field),
/**
* Aggregate function to determine the mode (most frequent) value.
* @template T
* @param {T} field The data field.
* @return {T} The mode value.
*/
// @ts-ignore
mode: (field) => op('mode', field),
/**
* Aggregate function to sum values.
* @param {*} field The data field.
* @return {number} The sum of the values.
*/
// @ts-ignore
sum: (field) => op('sum', field),
/**
* Aggregate function to multiply values.
* @param {*} field The data field.
* @return {number} The product of the values.
*/
// @ts-ignore
product: (field) => op('product', field),
/**
* Aggregate function for the mean (average) value.
* @param {*} field The data field.
* @return {number} The mean (average) of the values.
*/
// @ts-ignore
mean: (field) => op('mean', field),
/**
* Aggregate function for the average (mean) value.
* @param {*} field The data field.
* @return {number} The average (mean) of the values.
*/
// @ts-ignore
average: (field) => op('average', field),
/**
* Aggregate function for the sample variance.
* @param {*} field The data field.
* @return {number} The sample variance of the values.
*/
// @ts-ignore
variance: (field) => op('variance', field),
/**
* Aggregate function for the population variance.
* @param {*} field The data field.
* @return {number} The population variance of the values.
*/
// @ts-ignore
variancep: (field) => op('variancep', field),
/**
* Aggregate function for the sample standard deviation.
* @param {*} field The data field.
* @return {number} The sample standard deviation of the values.
*/
// @ts-ignore
stdev: (field) => op('stdev', field),
/**
* Aggregate function for the population standard deviation.
* @param {*} field The data field.
* @return {number} The population standard deviation of the values.
*/
// @ts-ignore
stdevp: (field) => op('stdevp', field),
/**
* Aggregate function for the minimum value.
* @template T
* @param {T} field The data field.
* @return {T} The minimum value.
*/
// @ts-ignore
min: (field) => op('min', field),
/**
* Aggregate function for the maximum value.
* @template T
* @param {T} field The data field.
* @return {T} The maximum value.
*/
// @ts-ignore
max: (field) => op('max', field),
/**
* Aggregate function to compute the quantile boundary
* of a data field for a probability threshold.
* @param {*} field The data field.
* @param {number} p The probability threshold.
* @return {number} The quantile value.
*/
// @ts-ignore
quantile: (field, p) => op('quantile', field, p),
/**
* Aggregate function for the median value.
* This is a shorthand for the 0.5 quantile value.
* @param {*} field The data field.
* @return {number} The median value.
*/
// @ts-ignore
median: (field) => op('median', field),
/**
* Aggregate function for the sample covariance between two variables.
* @param {*} field1 The first data field.
* @param {*} field2 The second data field.
* @return {number} The sample covariance of the values.
*/
// @ts-ignore
covariance: (field1, field2) => op('covariance', [field1, field2]),
/**
* Aggregate function for the population covariance between two variables.
* @param {*} field1 The first data field.
* @param {*} field2 The second data field.
* @return {number} The population covariance of the values.
*/
// @ts-ignore
covariancep: (field1, field2) => op('covariancep', [field1, field2]),
/**
* Aggregate function for the product-moment correlation between two variables.
* To instead compute a rank correlation, compute the average ranks for each
* variable and then apply this function to the result.
* @param {*} field1 The first data field.
* @param {*} field2 The second data field.
* @return {number} The correlation between the field values.
*/
// @ts-ignore
corr: (field1, field2) => op('corr', [field1, field2]),
/**
* Aggregate function for calculating a binning scheme in terms of
* the minimum bin boundary, maximum bin boundary, and step size.
* @param {*} field The data field.
* @param {number} [maxbins=15] The maximum number of allowed bins.
* @param {boolean} [nice=true] Flag indicating if the bin min and max
* should snap to "nice" human-friendly values.
* @param {number} [minstep] The minimum allowed step size between bins.
* @param {number} [step] The exact step size to use between bins.
* If specified, the maxbins and minstep arguments are ignored.
* @return {[number, number, number]} The bin [min, max, and step] values.
*/
// @ts-ignore
bins: (field, maxbins, nice, minstep, step) => op(
'bins',
field,
[maxbins, nice, minstep, step]
),
/**
* Window function to assign consecutive row numbers, starting from 1.
* @return {number} The row number value.
*/
// @ts-ignore
row_number: () => op('row_number'),
/**
* Window function to assign a rank to each value in a group, starting
* from 1. Peer values are assigned the same rank. Subsequent ranks
* reflect the number of prior values: if the first two values tie for
* rank 1, the third value is assigned rank 3.
* @return {number} The rank value.
*/
// @ts-ignore
rank: () => op('rank'),
/**
* Window function to assign a fractional (average) rank to each value in
* a group, starting from 1. Peer values are assigned the average of their
* indices: if the first two values tie, both will be assigned rank 1.5.
* @return {number} The peer-averaged rank value.
*/
// @ts-ignore
avg_rank: () => op('avg_rank'),
/**
* Window function to assign a dense rank to each value in a group,
* starting from 1. Peer values are assigned the same rank. Subsequent
* ranks do not reflect the number of prior values: if the first two
* values tie for rank 1, the third value is assigned rank 2.
* @return {number} The dense rank value.
*/
// @ts-ignore
dense_rank: () => op('dense_rank'),
/**
* Window function to assign a percentage rank to each value in a group.
* The percent is calculated as (rank - 1) / (group_size - 1).
* @return {number} The percentage rank value.
*/
// @ts-ignore
percent_rank: () => op('percent_rank'),
/**
* Window function to assign a cumulative distribution value between 0 and 1
* to each value in a group.
* @return {number} The cumulative distribution value.
*/
// @ts-ignore
cume_dist: () => op('cume_dist'),
/**
* Window function to assign a quantile (e.g., percentile) value to each
* value in a group. Accepts an integer parameter indicating the number of
* buckets to use (e.g., 100 for percentiles, 5 for quintiles).
* @param {number} num The number of buckets for ntile calculation.
* @return {number} The quantile value.
*/
// @ts-ignore
ntile: (num) => op('ntile', null, num),
/**
* Window function to assign a value that precedes the current value by
* a specified number of positions. If no such value exists, returns a
* default value instead.
* @template T
* @param {T} field The data field.
* @param {number} [offset=1] The lag offset from the current value.
* @param {T} [defaultValue=undefined] The default value.
* @return {T} The lagging value.
*/
// @ts-ignore
lag: (field, offset, defaultValue) => op('lag', field, [offset, defaultValue]),
/**
* Window function to assign a value that follows the current value by
* a specified number of positions. If no such value exists, returns a
* default value instead.
* @template T
* @param {T} field The data field.
* @param {number} [offset=1] The lead offset from the current value.
* @param {T} [defaultValue=undefined] The default value.
* @return {T} The leading value.
*/
// @ts-ignore
lead: (field, offset, defaultValue) => op('lead', field, [offset, defaultValue]),
/**
* Window function to assign the first value in a sliding window frame.
* @template T
* @param {T} field The data field.
* @return {T} The first value in the current frame.
*/
// @ts-ignore
first_value: (field) => op('first_value', field),
/**
* Window function to assign the last value in a sliding window frame.
* @template T
* @param {T} field The data field.
* @return {T} The last value in the current frame.
*/
// @ts-ignore
last_value: (field) => op('last_value', field),
/**
* Window function to assign the nth value in a sliding window frame
* (counting from 1), or undefined if no such value exists.
* @template T
* @param {T} field The data field.
* @param {number} nth The nth position, starting from 1.
* @return {T} The nth value in the current frame.
*/
// @ts-ignore
nth_value: (field, nth) => op('nth_value', field, nth),
/**
* Window function to fill in missing values with preceding values.
* @template T
* @param {T} field The data field.
* @param {T} [defaultValue=undefined] The default value.
* @return {T} The current value if valid, otherwise the first preceding
* valid value. If no such value exists, returns the default value.
*/
// @ts-ignore
fill_down: (field, defaultValue) => op('fill_down', field, defaultValue),
/**
* Window function to fill in missing values with subsequent values.
* @template T
* @param {T} field The data field.
* @param {T} [defaultValue=undefined] The default value.
* @return {T} The current value if valid, otherwise the first subsequent
* valid value. If no such value exists, returns the default value.
*/
// @ts-ignore
fill_up: (field, defaultValue) => op('fill_up', field, defaultValue)
};