UNPKG

vega-regression

Version:

Regression transform for Vega dataflows.

126 lines (109 loc) 4.33 kB
import partition from './partition.js'; import {Transform, ingest} from 'vega-dataflow'; import { regressionConstant, regressionExp, regressionLinear, regressionLog, regressionPoly, regressionPow, regressionQuad, sampleCurve } from 'vega-statistics'; import {accessorName, error, extent, hasOwnProperty, inherits} from 'vega-util'; const Methods = { constant: regressionConstant, linear: regressionLinear, log: regressionLog, exp: regressionExp, pow: regressionPow, quad: regressionQuad, poly: regressionPoly }; const degreesOfFreedom = (method, order) => method === 'poly' ? order : method === 'quad' ? 2 : 1; /** * Compute regression fits for one or more data groups. * @constructor * @param {object} params - The parameters for this operator. * @param {function(object): *} params.x - An accessor for the predictor data field. * @param {function(object): *} params.y - An accessor for the predicted data field. * @param {string} [params.method='linear'] - The regression method to apply. * @param {Array<function(object): *>} [params.groupby] - An array of accessors to groupby. * @param {Array<number>} [params.extent] - The domain extent over which to plot the regression line. * @param {number} [params.order=3] - The polynomial order. Only applies to the 'poly' method. */ export default function Regression(params) { Transform.call(this, null, params); } Regression.Definition = { 'type': 'Regression', 'metadata': {'generates': true}, 'params': [ { 'name': 'x', 'type': 'field', 'required': true }, { 'name': 'y', 'type': 'field', 'required': true }, { 'name': 'groupby', 'type': 'field', 'array': true }, { 'name': 'method', 'type': 'string', 'default': 'linear', 'values': Object.keys(Methods) }, { 'name': 'order', 'type': 'number', 'default': 3 }, { 'name': 'extent', 'type': 'number', 'array': true, 'length': 2 }, { 'name': 'params', 'type': 'boolean', 'default': false }, { 'name': 'as', 'type': 'string', 'array': true } ] }; inherits(Regression, Transform, { transform(_, pulse) { const out = pulse.fork(pulse.NO_SOURCE | pulse.NO_FIELDS); if (!this.value || pulse.changed() || _.modified()) { const source = pulse.materialize(pulse.SOURCE).source, groups = partition(source, _.groupby), names = (_.groupby || []).map(accessorName), method = _.method || 'linear', order = _.order == null ? 3 : _.order, dof = degreesOfFreedom(method, order), as = _.as || [accessorName(_.x), accessorName(_.y)], fit = Methods[method], values = []; let domain = _.extent; if (!hasOwnProperty(Methods, method)) { error('Invalid regression method: ' + method); } if (domain != null) { if (method === 'log' && domain[0] <= 0) { pulse.dataflow.warn('Ignoring extent with values <= 0 for log regression.'); domain = null; } } groups.forEach(g => { const n = g.length; if (n <= dof) { pulse.dataflow.warn('Skipping regression with more parameters than data points.'); return; } const model = fit(g, _.x, _.y, order); if (_.params) { // if parameter vectors requested return those values.push(ingest({ keys: g.dims, coef: model.coef, rSquared: model.rSquared })); return; } const dom = domain || extent(g, _.x), add = p => { const t = {}; for (let i=0; i<names.length; ++i) { t[names[i]] = g.dims[i]; } t[as[0]] = p[0]; t[as[1]] = p[1]; values.push(ingest(t)); }; if (method === 'linear' || method === 'constant') { // for linear or constant regression we only need the end points dom.forEach(x => add([x, model.predict(x)])); } else { // otherwise return trend line sample points sampleCurve(model.predict, dom, 25, 200).forEach(add); } }); if (this.value) out.rem = this.value; this.value = out.add = out.source = values; } return out; } });