UNPKG

arquero

Version:

Query processing and transformation of array-backed data tables.

163 lines (141 loc) 4.56 kB
import { aggregateGet } from './reduce/util.js'; import { _rollup } from './rollup.js'; import { ungroup } from './ungroup.js'; import { parseValue } from './util/parse.js'; import { parse } from '../expression/parse.js'; import { array_agg_distinct } from '../op/op-api.js'; import { columnSet } from '../table/ColumnSet.js'; import { error } from '../util/error.js'; import { isValid } from '../util/is-valid.js'; import { keyFunction } from '../util/key-function.js'; import { toString } from '../util/to-string.js'; import { unroll } from '../util/unroll.js'; export function impute(table, values, options = {}) { values = parse(values, { table }); values.names.forEach(name => table.column(name) ? 0 : error(`Invalid impute column ${toString(name)}`) ); if (options.expand) { const opt = { preparse, window: false, aggronly: true }; const params = parseValue('impute', table, options.expand, opt); const result = _rollup(ungroup(table), params); return _impute( table, values, params.names, params.names.map(name => result.get(name, 0)) ); } else { return _impute(table, values); } } // map direct field reference to "unique" aggregate function preparse(map) { map.forEach((value, key) => value.field ? map.set(key, array_agg_distinct(value + '')) : 0 ); } export function _impute(table, values, keys, arrays) { const write = keys && keys.length; table = write ? expand(table, keys, arrays) : table; const { names, exprs, ops } = values; const gets = aggregateGet(table, ops, exprs); const cols = write ? null : columnSet(table); const rows = table.totalRows(); names.forEach((name, i) => { const col = table.column(name); const out = write ? col : cols.add(name, Array(rows)); const get = gets[i]; table.scan(idx => { const v = col.at(idx); out[idx] = !isValid(v) ? get(idx) : v; }); }); return write ? table : table.create(cols); } function expand(table, keys, values) { const groups = table.groups(); const data = table.data(); // expansion keys and accessors const keyNames = (groups ? groups.names : []).concat(keys); const keyGet = (groups ? groups.get : []) .concat(keys.map(key => table.getter(key))); // build hash of existing rows const hash = new Set(); const keyTable = keyFunction(keyGet); table.scan((idx, data) => hash.add(keyTable(idx, data))); // initialize output table data const names = table.columnNames(); const cols = columnSet(); const out = names.map(name => cols.add(name, [])); names.forEach((name, i) => { const old = data[name]; const col = out[i]; table.scan(row => col.push(old.at(row))); }); // enumerate expanded value sets and augment output table const keyEnum = keyFunction(keyGet.map((k, i) => a => a[i])); const set = unroll( 'v', '{' + out.map((_, i) => `_${i}.push(v[$${i}]);`).join('') + '}', out, names.map(name => keyNames.indexOf(name)) ); if (groups) { let row = groups.keys.length; const prod = values.reduce((p, a) => p * a.length, groups.size); const keys = new Uint32Array(prod + (row - hash.size)); keys.set(groups.keys); enumerate(groups, values, (vec, idx) => { if (!hash.has(keyEnum(vec))) { set(vec); keys[row++] = idx[0]; } }); cols.groupby({ ...groups, keys }); } else { enumerate(groups, values, vec => { if (!hash.has(keyEnum(vec))) set(vec); }); } return cols.new(table); } function enumerate(groups, values, callback) { const offset = groups ? groups.get.length : 0; const pad = groups ? 1 : 0; const len = pad + values.length; const lens = new Int32Array(len); const idxs = new Int32Array(len); const set = []; if (groups) { const { get, rows, size } = groups; lens[0] = size; set.push((vec, idx) => { const row = rows[idx]; for (let i = 0; i < offset; ++i) { vec[i] = get[i](row); } }); } values.forEach((a, i) => { const j = i + offset; lens[i + pad] = a.length; set.push((vec, idx) => vec[j] = a[idx]); }); const vec = Array(offset + values.length); // initialize value vector for (let i = 0; i < len; ++i) { set[i](vec, 0); } callback(vec, idxs); // enumerate all combinations of values for (let i = len - 1; i >= 0;) { const idx = ++idxs[i]; if (idx < lens[i]) { set[i](vec, idx); callback(vec, idxs); i = len - 1; } else { idxs[i] = 0; set[i](vec, 0); --i; } } }