UNPKG

vega-transforms

Version:

Data processing transforms for Vega dataflows.

110 lines (98 loc) 4.59 kB
import {partition} from './util/util.js'; import {randomKDE} from 'vega-statistics'; import {Transform, ingest} from 'vega-dataflow'; import {sampleCurve} from 'vega-statistics'; import {accessorName, error, extent, inherits} from 'vega-util'; /** * Compute kernel density estimates (KDE) for one or more data groups. * @constructor * @param {object} params - The parameters for this operator. * @param {Array<function(object): *>} [params.groupby] - An array of accessors * to groupby. * @param {function(object): *} params.field - An accessor for the data field * to estimate. * @param {number} [params.bandwidth=0] - The KDE kernel bandwidth. * If zero or unspecified, the bandwidth is automatically determined. * @param {boolean} [params.counts=false] - A boolean flag indicating if the * output values should be probability estimates (false, default) or * smoothed counts (true). * @param {string} [params.cumulative=false] - A boolean flag indicating if a * density (false) or cumulative distribution (true) should be generated. * @param {Array<number>} [params.extent] - The domain extent over which to * plot the density. If unspecified, the [min, max] data extent is used. * @param {string} [params.resolve='independent'] - Indicates how parameters for * multiple densities should be resolved. If "independent" (the default), each * density may have its own domain extent and dynamic number of curve sample * steps. If "shared", the KDE transform will ensure that all densities are * defined over a shared domain and curve steps, enabling stacking. * @param {number} [params.minsteps=25] - The minimum number of curve samples * for plotting the density. * @param {number} [params.maxsteps=200] - The maximum number of curve samples * for plotting the density. * @param {number} [params.steps] - The exact number of curve samples for * plotting the density. If specified, overrides both minsteps and maxsteps * to set an exact number of uniform samples. Useful in conjunction with * a fixed extent to ensure consistent sample points for stacked densities. */ export default function KDE(params) { Transform.call(this, null, params); } KDE.Definition = { 'type': 'KDE', 'metadata': {'generates': true}, 'params': [ { 'name': 'groupby', 'type': 'field', 'array': true }, { 'name': 'field', 'type': 'field', 'required': true }, { 'name': 'cumulative', 'type': 'boolean', 'default': false }, { 'name': 'counts', 'type': 'boolean', 'default': false }, { 'name': 'bandwidth', 'type': 'number', 'default': 0 }, { 'name': 'extent', 'type': 'number', 'array': true, 'length': 2 }, { 'name': 'resolve', 'type': 'enum', 'values': ['shared', 'independent'], 'default': 'independent' }, { 'name': 'steps', 'type': 'number' }, { 'name': 'minsteps', 'type': 'number', 'default': 25 }, { 'name': 'maxsteps', 'type': 'number', 'default': 200 }, { 'name': 'as', 'type': 'string', 'array': true, 'default': ['value', 'density'] } ] }; inherits(KDE, Transform, { transform(_, pulse) { const out = pulse.fork(pulse.NO_SOURCE | pulse.NO_FIELDS); if (!this.value || pulse.changed() || _.modified()) { const source = pulse.materialize(pulse.SOURCE).source, groups = partition(source, _.groupby, _.field), names = (_.groupby || []).map(accessorName), bandwidth = _.bandwidth, method = _.cumulative ? 'cdf' : 'pdf', as = _.as || ['value', 'density'], values = []; let domain = _.extent, minsteps = _.steps || _.minsteps || 25, maxsteps = _.steps || _.maxsteps || 200; if (method !== 'pdf' && method !== 'cdf') { error('Invalid density method: ' + method); } if (_.resolve === 'shared') { if (!domain) domain = extent(source, _.field); minsteps = maxsteps = _.steps || maxsteps; } groups.forEach(g => { const density = randomKDE(g, bandwidth)[method], scale = _.counts ? g.length : 1, local = domain || extent(g); sampleCurve(density, local, minsteps, maxsteps) .forEach(v => { const t = {}; for (let i=0; i<names.length; ++i) { t[names[i]] = g.dims[i]; } t[as[0]] = v[0]; t[as[1]] = v[1] * scale; values.push(ingest(t)); }); }); if (this.value) out.rem = this.value; this.value = out.add = out.source = values; } return out; } });