arquero
Version:
Query processing and transformation of array-backed data tables.
130 lines (112 loc) • 3.87 kB
JavaScript
import { aggregate, aggregateGet, groupOutput } from './reduce/util.js';
import { parseValue } from './util/parse.js';
import { ungroup } from './ungroup.js';
import { any } from '../op/op-api.js';
import { columnSet } from '../table/ColumnSet.js';
// TODO: enforce aggregates only (no output changes) for values
export function pivot(table, on, values, options) {
return _pivot(
table,
parseValue('fold', table, on),
parseValue('fold', table, values, { preparse, window: false, aggronly: true }),
options
);
}
// map direct field reference to "any" aggregate
function preparse(map) {
map.forEach((value, key) =>
value.field ? map.set(key, any(value + '')) : 0
);
}
const opt = (value, defaultValue) => value != null ? value : defaultValue;
export function _pivot(table, on, values, options = {}) {
const { keys, keyColumn } = pivotKeys(table, on, options);
const vsep = opt(options.valueSeparator, '_');
const namefn = values.names.length > 1
? (i, name) => name + vsep + keys[i]
: i => keys[i];
// perform separate aggregate operations for each key
// if keys do not match, emit NaN so aggregate skips it
// use custom toString method for proper field resolution
const results = keys.map(
k => aggregate(table, values.ops.map(op => {
if (op.name === 'count') { // fix #273
const fn = r => k === keyColumn[r] ? 1 : NaN;
fn.toString = () => k + ':1';
return { ...op, name: 'sum', fields: [fn] };
}
const fields = op.fields.map(f => {
const fn = (r, d) => k === keyColumn[r] ? f(r, d) : NaN;
fn.toString = () => k + ':' + f;
return fn;
});
return { ...op, fields };
}))
);
return output(values, namefn, table.groups(), results).new(table);
}
function pivotKeys(table, on, options) {
const limit = options.limit > 0 ? +options.limit : Infinity;
const sort = opt(options.sort, true);
const ksep = opt(options.keySeparator, '_');
// construct key accessor function
const get = aggregateGet(table, on.ops, on.exprs);
const key = get.length === 1
? get[0]
: (row, data) => get.map(fn => fn(row, data)).join(ksep);
// generate vector of per-row key values
const kcol = Array(table.totalRows());
table.scan((row, data) => kcol[row] = key(row, data));
// collect unique key values
const uniq = aggregate(
ungroup(table),
[ {
id: 0,
name: 'array_agg_distinct',
fields: [(row => kcol[row])], params: []
} ]
)[0][0];
// get ordered set of unique key values
const keys = sort ? uniq.sort() : uniq;
// return key values
return {
keys: Number.isFinite(limit) ? keys.slice(0, limit) : keys,
keyColumn: kcol
};
}
function output({ names, exprs }, namefn, groups, results) {
const size = groups ? groups.size : 1;
const cols = columnSet();
const m = results.length;
const n = names.length;
let result;
const op = (id, row) => result[id][row];
// write groupby fields to output
if (groups) groupOutput(cols, groups);
// write pivot values to output
for (let i = 0; i < n; ++i) {
const get = exprs[i];
if (get.field != null) {
// if expression is op only, use aggregates directly
for (let j = 0; j < m; ++j) {
cols.add(namefn(j, names[i]), results[j][get.field]);
}
} else if (size > 1) {
// if multiple groups, evaluate expression for each
for (let j = 0; j < m; ++j) {
result = results[j];
const col = cols.add(namefn(j, names[i]), Array(size));
for (let k = 0; k < size; ++k) {
col[k] = get(k, null, op);
}
}
} else {
// if only one group, no need to loop
for (let j = 0; j < m; ++j) {
result = results[j];
cols.add(namefn(j, names[i]), [ get(0, null, op) ]);
}
}
}
return cols;
}