UNPKG

arquero

Version:

Query processing and transformation of array-backed data tables.

394 lines (357 loc) 9.3 kB
import { bins } from '../util/bins.js'; import { distinctMap } from '../util/distinct-map.js'; import { isBigInt } from '../util/is-bigint.js'; import { noop } from '../util/no-op.js'; import { NULL } from '../util/null.js'; import { product } from '../util/product.js'; /** * Initialize an aggregate operator. */ function initOp(op) { op.init = op.init || noop; op.add = op.add || noop; op.rem = op.rem || noop; return op; } function initProduct(s, value) { s.product_v = false; return s.product = value; } /** * Initialize an aggregate operator. * @callback AggregateInit * @param {object} state The aggregate state object. * @return {void} */ /** * Add a value to an aggregate operator. * @callback AggregateAdd * @param {object} state The aggregate state object. * @param {*} value The value to add. * @return {void} */ /** * Remove a value from an aggregate operator. * @callback AggregateRem * @param {object} state The aggregate state object. * @param {*} value The value to remove. * @return {void} */ /** * Retrive an output value from an aggregate operator. * @callback AggregateValue * @param {object} state The aggregate state object. * @return {*} The output value. */ /** * An operator instance for an aggregate function. * @typedef {object} AggregateOperator * @property {AggregateInit} init Initialize the operator. * @property {AggregateAdd} [add] Add a value to the operator state. * @property {AggregateRem} [rem] Remove a value from the operator state. * @property {AggregateValue} value Retrieve an output value. */ /** * Create a new aggregate operator instance. * @callback AggregateCreate * @param {...any} params The aggregate operator parameters. * @return {AggregateOperator} The instantiated aggregate operator. */ /** * An operator definition for an aggregate function. * @typedef {object} AggregateDef * @property {AggregateCreate} create Create a new operator instance. * @property {number[]} param Two-element array containing the * counts of input fields and additional parameters. * @property {string[]} [req] Names of operators required by this one. * @property {string[]} [stream] Names of operators required by this one * for streaming operations (value removes). */ /** * Aggregate operator definitions. */ export const aggregateFunctions = { /** @type {AggregateDef} */ count: { create: () => initOp({ value: s => s.count }), param: [] }, /** @type {AggregateDef} */ array_agg: { create: () => initOp({ init: s => s.values = true, value: s => s.list.values(s.stream) }), param: [1] }, /** @type {AggregateDef} */ object_agg: { create: () => initOp({ init: s => s.values = true, value: s => Object.fromEntries(s.list.values()) }), param: [2] }, /** @type {AggregateDef} */ map_agg: { create: () => initOp({ init: s => s.values = true, value: s => new Map(s.list.values()) }), param: [2] }, /** @type {AggregateDef} */ entries_agg: { create: () => initOp({ init: s => s.values = true, value: s => s.list.values(s.stream) }), param: [2] }, /** @type {AggregateDef} */ any: { create: () => initOp({ add: (s, v) => { if (s.any == null) s.any = v; }, value: s => s.valid ? s.any : NULL }), param: [1] }, /** @type {AggregateDef} */ valid: { create: () => initOp({ value: s => s.valid }), param: [1] }, /** @type {AggregateDef} */ invalid: { create: () => initOp({ value: s => s.count - s.valid }), param: [1] }, /** @type {AggregateDef} */ distinct: { create: () => ({ init: s => s.distinct = distinctMap(), value: s => s.distinct.count() + (s.valid === s.count ? 0 : 1), add: (s, v) => s.distinct.increment(v), rem: (s, v) => s.distinct.decrement(v) }), param: [1] }, /** @type {AggregateDef} */ array_agg_distinct: { create: () => initOp({ value: s => s.distinct.values() }), param: [1], req: ['distinct'] }, /** @type {AggregateDef} */ mode: { create: () => initOp({ value: s => { let mode = NULL; let max = 0; s.distinct.forEach((value, count) => { if (count > max) { max = count; mode = value; } }); return mode; } }), param: [1], req: ['distinct'] }, /** @type {AggregateDef} */ sum: { create: () => ({ init: s => s.sum = 0, value: s => s.valid ? s.sum : NULL, add: (s, v) => isBigInt(v) ? (s.sum === 0 ? s.sum = v : s.sum += v) : s.sum += +v, rem: (s, v) => s.sum -= v }), param: [1] }, /** @type {AggregateDef} */ product: { create: () => ({ init: s => initProduct(s, 1), value: s => s.valid ? ( s.product_v ? initProduct(s, product(s.list.values())) : s.product ) : undefined, add: (s, v) => isBigInt(v) ? (s.product === 1 ? s.product = v : s.product *= v) : s.product *= v, rem: (s, v) => (v == 0 || v === Infinity || v === -Infinity) ? s.product_v = true : s.product /= v }), param: [1], stream: ['array_agg'] }, /** @type {AggregateDef} */ mean: { create: () => ({ init: s => s.mean = 0, value: s => s.valid ? s.mean : NULL, add: (s, v) => { s.mean_d = v - s.mean; s.mean += s.mean_d / s.valid; }, rem: (s, v) => { s.mean_d = v - s.mean; s.mean -= s.valid ? s.mean_d / s.valid : s.mean; } }), param: [1] }, /** @type {AggregateDef} */ average: { create: () => initOp({ value: s => s.valid ? s.mean : NULL }), param: [1], req: ['mean'] }, /** @type {AggregateDef} */ variance: { create: () => ({ init: s => s.dev = 0, value: s => s.valid > 1 ? s.dev / (s.valid - 1) : NULL, add: (s, v) => s.dev += s.mean_d * (v - s.mean), rem: (s, v) => s.dev -= s.mean_d * (v - s.mean) }), param: [1], req: ['mean'] }, /** @type {AggregateDef} */ variancep: { create: () => initOp({ value: s => s.valid > 1 ? s.dev / s.valid : NULL }), param: [1], req: ['variance'] }, /** @type {AggregateDef} */ stdev: { create: () => initOp({ value: s => s.valid > 1 ? Math.sqrt(s.dev / (s.valid - 1)) : NULL }), param: [1], req: ['variance'] }, /** @type {AggregateDef} */ stdevp: { create: () => initOp({ value: s => s.valid > 1 ? Math.sqrt(s.dev / s.valid) : NULL }), param: [1], req: ['variance'] }, /** @type {AggregateDef} */ min: { create: () => ({ init: s => s.min = NULL, value: s => s.min = (Number.isNaN(s.min) ? s.list.min() : s.min), add: (s, v) => { if (v < s.min || s.min === NULL) s.min = v; }, rem: (s, v) => { if (v <= s.min) s.min = NaN; } }), param: [1], stream: ['array_agg'] }, /** @type {AggregateDef} */ max: { create: () => ({ init: s => s.max = NULL, value: s => s.max = (Number.isNaN(s.max) ? s.list.max() : s.max), add: (s, v) => { if (v > s.max || s.max === NULL) s.max = v; }, rem: (s, v) => { if (v >= s.max) s.max = NaN; } }), param: [1], stream: ['array_agg'] }, /** @type {AggregateDef} */ quantile: { create: (p) => initOp({ value: s => s.list.quantile(p) }), param: [1, 1], req: ['array_agg'] }, /** @type {AggregateDef} */ median: { create: () => initOp({ value: s => s.list.quantile(0.5) }), param: [1], req: ['array_agg'] }, /** @type {AggregateDef} */ covariance: { create: () => ({ init: s => { s.cov = s.mean_x = s.mean_y = s.dev_x = s.dev_y = 0; }, value: s => s.valid > 1 ? s.cov / (s.valid - 1) : NULL, add: (s, x, y) => { const dx = x - s.mean_x; const dy = y - s.mean_y; s.mean_x += dx / s.valid; s.mean_y += dy / s.valid; const dy2 = y - s.mean_y; s.dev_x += dx * (x - s.mean_x); s.dev_y += dy * dy2; s.cov += dx * dy2; }, rem: (s, x, y) => { const dx = x - s.mean_x; const dy = y - s.mean_y; s.mean_x -= s.valid ? dx / s.valid : s.mean_x; s.mean_y -= s.valid ? dy / s.valid : s.mean_y; const dy2 = y - s.mean_y; s.dev_x -= dx * (x - s.mean_x); s.dev_y -= dy * dy2; s.cov -= dx * dy2; } }), param: [2] }, /** @type {AggregateDef} */ covariancep: { create: () => initOp({ value: s => s.valid > 1 ? s.cov / s.valid : NULL }), param: [2], req: ['covariance'] }, /** @type {AggregateDef} */ corr: { create: () => initOp({ value: s => s.valid > 1 ? s.cov / (Math.sqrt(s.dev_x) * Math.sqrt(s.dev_y)) : NULL }), param: [2], req: ['covariance'] }, /** @type {AggregateDef} */ bins: { create: (maxbins, nice, minstep, step) => initOp({ value: s => bins(s.min, s.max, maxbins, nice, minstep, step) }), param: [1, 4], req: ['min', 'max'] } };