vega-regression
Version:
Regression transform for Vega dataflows.
235 lines (228 loc) • 6.77 kB
JavaScript
import { regressionLoess, regressionPoly, regressionQuad, regressionPow, regressionExp, regressionLog, regressionLinear, regressionConstant, sampleCurve } from 'vega-statistics';
import { Transform, ingest } from 'vega-dataflow';
import { inherits, accessorName, hasOwnProperty, error, extent } from 'vega-util';
function partition (data, groupby) {
var groups = [],
get = function (f) {
return f(t);
},
map,
i,
n,
t,
k,
g;
// partition data points into stack groups
if (groupby == null) {
groups.push(data);
} else {
for (map = {}, i = 0, n = data.length; i < n; ++i) {
t = data[i];
k = groupby.map(get);
g = map[k];
if (!g) {
map[k] = g = [];
g.dims = k;
groups.push(g);
}
g.push(t);
}
}
return groups;
}
/**
* Compute locally-weighted regression fits for one or more data groups.
* @constructor
* @param {object} params - The parameters for this operator.
* @param {function(object): *} params.x - An accessor for the predictor data field.
* @param {function(object): *} params.y - An accessor for the predicted data field.
* @param {Array<function(object): *>} [params.groupby] - An array of accessors to groupby.
* @param {number} [params.bandwidth=0.3] - The loess bandwidth.
*/
function Loess(params) {
Transform.call(this, null, params);
}
Loess.Definition = {
'type': 'Loess',
'metadata': {
'generates': true
},
'params': [{
'name': 'x',
'type': 'field',
'required': true
}, {
'name': 'y',
'type': 'field',
'required': true
}, {
'name': 'groupby',
'type': 'field',
'array': true
}, {
'name': 'bandwidth',
'type': 'number',
'default': 0.3
}, {
'name': 'as',
'type': 'string',
'array': true
}]
};
inherits(Loess, Transform, {
transform(_, pulse) {
const out = pulse.fork(pulse.NO_SOURCE | pulse.NO_FIELDS);
if (!this.value || pulse.changed() || _.modified()) {
const source = pulse.materialize(pulse.SOURCE).source,
groups = partition(source, _.groupby),
names = (_.groupby || []).map(accessorName),
m = names.length,
as = _.as || [accessorName(_.x), accessorName(_.y)],
values = [];
groups.forEach(g => {
regressionLoess(g, _.x, _.y, _.bandwidth || 0.3).forEach(p => {
const t = {};
for (let i = 0; i < m; ++i) {
t[names[i]] = g.dims[i];
}
t[as[0]] = p[0];
t[as[1]] = p[1];
values.push(ingest(t));
});
});
if (this.value) out.rem = this.value;
this.value = out.add = out.source = values;
}
return out;
}
});
const Methods = {
constant: regressionConstant,
linear: regressionLinear,
log: regressionLog,
exp: regressionExp,
pow: regressionPow,
quad: regressionQuad,
poly: regressionPoly
};
const degreesOfFreedom = (method, order) => method === 'poly' ? order : method === 'quad' ? 2 : 1;
/**
* Compute regression fits for one or more data groups.
* @constructor
* @param {object} params - The parameters for this operator.
* @param {function(object): *} params.x - An accessor for the predictor data field.
* @param {function(object): *} params.y - An accessor for the predicted data field.
* @param {string} [params.method='linear'] - The regression method to apply.
* @param {Array<function(object): *>} [params.groupby] - An array of accessors to groupby.
* @param {Array<number>} [params.extent] - The domain extent over which to plot the regression line.
* @param {number} [params.order=3] - The polynomial order. Only applies to the 'poly' method.
*/
function Regression(params) {
Transform.call(this, null, params);
}
Regression.Definition = {
'type': 'Regression',
'metadata': {
'generates': true
},
'params': [{
'name': 'x',
'type': 'field',
'required': true
}, {
'name': 'y',
'type': 'field',
'required': true
}, {
'name': 'groupby',
'type': 'field',
'array': true
}, {
'name': 'method',
'type': 'string',
'default': 'linear',
'values': Object.keys(Methods)
}, {
'name': 'order',
'type': 'number',
'default': 3
}, {
'name': 'extent',
'type': 'number',
'array': true,
'length': 2
}, {
'name': 'params',
'type': 'boolean',
'default': false
}, {
'name': 'as',
'type': 'string',
'array': true
}]
};
inherits(Regression, Transform, {
transform(_, pulse) {
const out = pulse.fork(pulse.NO_SOURCE | pulse.NO_FIELDS);
if (!this.value || pulse.changed() || _.modified()) {
const source = pulse.materialize(pulse.SOURCE).source,
groups = partition(source, _.groupby),
names = (_.groupby || []).map(accessorName),
method = _.method || 'linear',
order = _.order == null ? 3 : _.order,
dof = degreesOfFreedom(method, order),
as = _.as || [accessorName(_.x), accessorName(_.y)],
fit = Methods[method],
values = [];
let domain = _.extent;
if (!hasOwnProperty(Methods, method)) {
error('Invalid regression method: ' + method);
}
if (domain != null) {
if (method === 'log' && domain[0] <= 0) {
pulse.dataflow.warn('Ignoring extent with values <= 0 for log regression.');
domain = null;
}
}
groups.forEach(g => {
const n = g.length;
if (n <= dof) {
pulse.dataflow.warn('Skipping regression with more parameters than data points.');
return;
}
const model = fit(g, _.x, _.y, order);
if (_.params) {
// if parameter vectors requested return those
values.push(ingest({
keys: g.dims,
coef: model.coef,
rSquared: model.rSquared
}));
return;
}
const dom = domain || extent(g, _.x),
add = p => {
const t = {};
for (let i = 0; i < names.length; ++i) {
t[names[i]] = g.dims[i];
}
t[as[0]] = p[0];
t[as[1]] = p[1];
values.push(ingest(t));
};
if (method === 'linear' || method === 'constant') {
// for linear or constant regression we only need the end points
dom.forEach(x => add([x, model.predict(x)]));
} else {
// otherwise return trend line sample points
sampleCurve(model.predict, dom, 25, 200).forEach(add);
}
});
if (this.value) out.rem = this.value;
this.value = out.add = out.source = values;
}
return out;
}
});
export { Loess as loess, Regression as regression };
//# sourceMappingURL=vega-regression.js.map