vega-lite
Version:
Vega-Lite is a concise high-level language for interactive visualization.
474 lines (425 loc) • 15.5 kB
text/typescript
import {AncestorParse, DataComponent} from './index.js';
import {
Data,
isGenerator,
isGraticuleGenerator,
isInlineData,
isNamedData,
isSequenceGenerator,
isUrlData,
DataSourceType,
ParseValue,
} from '../../data.js';
import {getDataSourcesForHandlingInvalidValues, DataSourcesForHandlingInvalidValues} from '../invalid/datasources.js';
import * as log from '../../log/index.js';
import {isPathMark} from '../../mark.js';
import {
isAggregate,
isBin,
isCalculate,
isDensity,
isExtent,
isFilter,
isFlatten,
isFold,
isImpute,
isJoinAggregate,
isLoess,
isLookup,
isPivot,
isQuantile,
isRegression,
isSample,
isStack,
isTimeUnit,
isWindow,
} from '../../transform.js';
import {deepEqual, mergeDeep} from '../../util.js';
import {getMarkPropOrConfig} from '../common.js';
import {isFacetModel, isLayerModel, isUnitModel, Model} from '../model.js';
import {requiresSelectionId} from '../selection/index.js';
import {materializeSelections} from '../selection/parse.js';
import {AggregateNode} from './aggregate.js';
import {BinNode} from './bin.js';
import {CalculateNode} from './calculate.js';
import {DataFlowNode, OutputNode} from './dataflow.js';
import {DensityTransformNode} from './density.js';
import {ExtentTransformNode} from './extent.js';
import {FacetNode} from './facet.js';
import {FilterNode} from './filter.js';
import {FilterInvalidNode} from './filterinvalid.js';
import {FlattenTransformNode} from './flatten.js';
import {FoldTransformNode} from './fold.js';
import {
getImplicitFromEncoding,
getImplicitFromFilterTransform,
getImplicitFromSelection,
ParseNode,
} from './formatparse.js';
import {GeoJSONNode} from './geojson.js';
import {GeoPointNode} from './geopoint.js';
import {GraticuleNode} from './graticule.js';
import {IdentifierNode} from './identifier.js';
import {ImputeNode} from './impute.js';
import {JoinAggregateTransformNode} from './joinaggregate.js';
import {makeJoinAggregateFromFacet} from './joinaggregatefacet.js';
import {LoessTransformNode} from './loess.js';
import {LookupNode} from './lookup.js';
import {PivotTransformNode} from './pivot.js';
import {QuantileTransformNode} from './quantile.js';
import {RegressionTransformNode} from './regression.js';
import {SampleTransformNode} from './sample.js';
import {SequenceNode} from './sequence.js';
import {SourceNode} from './source.js';
import {StackNode} from './stack.js';
import {TimeUnitNode} from './timeunit.js';
import {WindowTransformNode} from './window.js';
export function findSource(data: Data, sources: SourceNode[]) {
for (const other of sources) {
const otherData = other.data;
// if both datasets have a name defined, we cannot merge
if (data.name && other.hasName() && data.name !== other.dataName) {
continue;
}
const formatMesh = (data as any).format?.mesh;
const otherFeature = otherData.format?.feature;
// feature and mesh are mutually exclusive
if (formatMesh && otherFeature) {
continue;
}
// we have to extract the same feature or mesh
const formatFeature = (data as any).format?.feature;
if ((formatFeature || otherFeature) && formatFeature !== otherFeature) {
continue;
}
const otherMesh = otherData.format?.mesh;
if ((formatMesh || otherMesh) && formatMesh !== otherMesh) {
continue;
}
if (isInlineData(data) && isInlineData(otherData)) {
if (deepEqual(data.values, otherData.values)) {
return other;
}
} else if (isUrlData(data) && isUrlData(otherData)) {
if (data.url === otherData.url) {
return other;
}
} else if (isNamedData(data)) {
if (data.name === other.dataName) {
return other;
}
}
}
return null;
}
function parseRoot(model: Model, sources: SourceNode[]): DataFlowNode {
if (model.data || !model.parent) {
// if the model defines a data source or is the root, create a source node
if (model.data === null) {
// data: null means we should ignore the parent's data so we just create a new data source
const source = new SourceNode({values: []});
sources.push(source);
return source;
}
const existingSource = findSource(model.data, sources);
if (existingSource) {
if (!isGenerator(model.data)) {
existingSource.data.format = mergeDeep({}, model.data.format, existingSource.data.format);
}
// if the new source has a name but the existing one does not, we can set it
if (!existingSource.hasName() && model.data.name) {
existingSource.dataName = model.data.name;
}
return existingSource;
} else {
const source = new SourceNode(model.data);
sources.push(source);
return source;
}
} else {
// If we don't have a source defined (overriding parent's data), use the parent's facet root or main.
return model.parent.component.data.facetRoot
? model.parent.component.data.facetRoot
: model.parent.component.data.main;
}
}
/**
* Parses a transform array into a chain of connected dataflow nodes.
*/
export function parseTransformArray(head: DataFlowNode, model: Model, ancestorParse: AncestorParse): DataFlowNode {
let lookupCounter = 0;
for (const t of model.transforms) {
let derivedType: ParseValue = undefined;
let transformNode: DataFlowNode;
if (isCalculate(t)) {
transformNode = head = new CalculateNode(head, t);
derivedType = 'derived';
} else if (isFilter(t)) {
const implicit = getImplicitFromFilterTransform(t);
transformNode = head = ParseNode.makeWithAncestors(head, {}, implicit, ancestorParse) ?? head;
head = new FilterNode(head, model, t.filter);
} else if (isBin(t)) {
transformNode = head = BinNode.makeFromTransform(head, t, model);
derivedType = 'number';
} else if (isTimeUnit(t)) {
derivedType = 'date';
const parsedAs = ancestorParse.getWithExplicit(t.field);
// Create parse node because the input to time unit is always date.
if (parsedAs.value === undefined) {
head = new ParseNode(head, {[t.field]: derivedType});
ancestorParse.set(t.field, derivedType, false);
}
transformNode = head = TimeUnitNode.makeFromTransform(head, t);
} else if (isAggregate(t)) {
transformNode = head = AggregateNode.makeFromTransform(head, t);
derivedType = 'number';
if (requiresSelectionId(model)) {
head = new IdentifierNode(head);
}
} else if (isLookup(t)) {
transformNode = head = LookupNode.make(head, model, t, lookupCounter++);
derivedType = 'derived';
} else if (isWindow(t)) {
transformNode = head = new WindowTransformNode(head, t);
derivedType = 'number';
} else if (isJoinAggregate(t)) {
transformNode = head = new JoinAggregateTransformNode(head, t);
derivedType = 'number';
} else if (isStack(t)) {
transformNode = head = StackNode.makeFromTransform(head, t);
derivedType = 'derived';
} else if (isFold(t)) {
transformNode = head = new FoldTransformNode(head, t);
derivedType = 'derived';
} else if (isExtent(t)) {
transformNode = head = new ExtentTransformNode(head, t);
derivedType = 'derived';
} else if (isFlatten(t)) {
transformNode = head = new FlattenTransformNode(head, t);
derivedType = 'derived';
} else if (isPivot(t)) {
transformNode = head = new PivotTransformNode(head, t);
derivedType = 'derived';
} else if (isSample(t)) {
head = new SampleTransformNode(head, t);
} else if (isImpute(t)) {
transformNode = head = ImputeNode.makeFromTransform(head, t);
derivedType = 'derived';
} else if (isDensity(t)) {
transformNode = head = new DensityTransformNode(head, t);
derivedType = 'derived';
} else if (isQuantile(t)) {
transformNode = head = new QuantileTransformNode(head, t);
derivedType = 'derived';
} else if (isRegression(t)) {
transformNode = head = new RegressionTransformNode(head, t);
derivedType = 'derived';
} else if (isLoess(t)) {
transformNode = head = new LoessTransformNode(head, t);
derivedType = 'derived';
} else {
log.warn(log.message.invalidTransformIgnored(t));
continue;
}
if (transformNode && derivedType !== undefined) {
for (const field of transformNode.producedFields() ?? []) {
ancestorParse.set(field, derivedType, false);
}
}
}
return head;
}
/*
Description of the dataflow (http://asciiflow.com/):
+--------+
| Source |
+---+----+
|
v
FormatParse
(explicit)
|
v
Transforms
(Filter, Calculate, Binning, TimeUnit, Aggregate, Window, ...)
|
v
FormatParse
(implicit)
|
v
Binning (in `encoding`)
|
v
Timeunit (in `encoding`)
|
v
Formula From Sort Array
|
v
+--+--+
| Raw |
+-----+
|
v
Aggregate (in `encoding`)
|
v
Stack (in `encoding`)
|
v
+- - - - - - - - - - -+
| PreFilterInvalid | - - - -> scale domains
|(when scales need it)|
+- - - - - - - - - - -+
|
v
Invalid Filter (if the main data source needs it)
|
v
+----------+
| Main | - - - -> scale domains
+----------+
|
v
+- - - - - - - - - - -+
| PostFilterInvalid | - - - -> scale domains
|(when scales need it)|
+- - - - - - - - - - -+
|
v
+-------+
| Facet |----> "column", "column-layout", and "row"
+-------+
|
v
...Child data...
*/
export function parseData(model: Model): DataComponent {
let head = parseRoot(model, model.component.data.sources);
const {outputNodes, outputNodeRefCounts} = model.component.data;
const data = model.data;
const newData = data && (isGenerator(data) || isUrlData(data) || isInlineData(data));
const ancestorParse =
!newData && model.parent ? model.parent.component.data.ancestorParse.clone() : new AncestorParse();
if (isGenerator(data)) {
// insert generator transform
if (isSequenceGenerator(data)) {
head = new SequenceNode(head, data.sequence);
} else if (isGraticuleGenerator(data)) {
head = new GraticuleNode(head, data.graticule);
}
// no parsing necessary for generator
ancestorParse.parseNothing = true;
} else if (data?.format?.parse === null) {
// format.parse: null means disable parsing
ancestorParse.parseNothing = true;
}
head = ParseNode.makeExplicit(head, model, ancestorParse) ?? head;
// Default discrete selections require an identifer transform to
// uniquely identify data points. Add this transform at the head of
// the pipeline such that the identifier field is available for all
// subsequent datasets. During optimization, we will remove this
// transform if it proves to be unnecessary. Additional identifier
// transforms will be necessary when new tuples are constructed
// (e.g., post-aggregation).
head = new IdentifierNode(head);
// HACK: This is equivalent for merging bin extent for union scale.
// FIXME(https://github.com/vega/vega-lite/issues/2270): Correctly merge extent / bin node for shared bin scale
const parentIsLayer = model.parent && isLayerModel(model.parent);
if (isUnitModel(model) || isFacetModel(model)) {
if (parentIsLayer) {
head = BinNode.makeFromEncoding(head, model) ?? head;
}
}
if (model.transforms.length > 0) {
head = parseTransformArray(head, model, ancestorParse);
}
// create parse nodes for fields that need to be parsed (or flattened) implicitly
const implicitSelection = getImplicitFromSelection(model);
const implicitEncoding = getImplicitFromEncoding(model);
head = ParseNode.makeWithAncestors(head, {}, {...implicitSelection, ...implicitEncoding}, ancestorParse) ?? head;
if (isUnitModel(model)) {
head = GeoJSONNode.parseAll(head, model);
head = GeoPointNode.parseAll(head, model);
}
if (isUnitModel(model) || isFacetModel(model)) {
if (!parentIsLayer) {
head = BinNode.makeFromEncoding(head, model) ?? head;
}
head = TimeUnitNode.makeFromEncoding(head, model) ?? head;
head = CalculateNode.parseAllForSortIndex(head, model);
}
// add an output node pre aggregation
const raw = (head = makeOutputNode(DataSourceType.Raw, model, head));
if (isUnitModel(model)) {
const agg = AggregateNode.makeFromEncoding(head, model);
if (agg) {
head = agg;
if (requiresSelectionId(model)) {
head = new IdentifierNode(head);
}
}
head = ImputeNode.makeFromEncoding(head, model) ?? head;
head = StackNode.makeFromEncoding(head, model) ?? head;
}
let preFilterInvalid: OutputNode | undefined;
let dataSourcesForHandlingInvalidValues: DataSourcesForHandlingInvalidValues | undefined;
if (isUnitModel(model)) {
const {markDef, mark, config} = model;
const invalid = getMarkPropOrConfig('invalid', markDef, config);
const {marks, scales} = (dataSourcesForHandlingInvalidValues = getDataSourcesForHandlingInvalidValues({
invalid,
isPath: isPathMark(mark),
}));
if (marks !== scales && scales === 'include-invalid-values') {
// Create a seperate preFilterInvalid dataSource if scales need pre-filter data but marks needs post-filter.
preFilterInvalid = head = makeOutputNode(DataSourceType.PreFilterInvalid, model, head);
}
if (marks === 'exclude-invalid-values') {
head = FilterInvalidNode.make(head, model, dataSourcesForHandlingInvalidValues) ?? head;
}
}
// output "main" node for marks
const main = (head = makeOutputNode(DataSourceType.Main, model, head));
let postFilterInvalid: OutputNode | undefined;
if (isUnitModel(model) && dataSourcesForHandlingInvalidValues) {
const {marks, scales} = dataSourcesForHandlingInvalidValues;
if (marks === 'include-invalid-values' && scales === 'exclude-invalid-values') {
// Create a seperate postFilterInvalid dataSource if scales need post-filter data but marks needs pre-filter.
head = FilterInvalidNode.make(head, model, dataSourcesForHandlingInvalidValues) ?? head;
postFilterInvalid = head = makeOutputNode(DataSourceType.PostFilterInvalid, model, head);
}
}
if (isUnitModel(model)) {
materializeSelections(model, main);
}
// add facet marker
let facetRoot = null;
if (isFacetModel(model)) {
const facetName = model.getName('facet');
// Derive new aggregate for facet's sort field
// augment data source with new fields for crossed facet
head = makeJoinAggregateFromFacet(head, model.facet) ?? head;
facetRoot = new FacetNode(head, model, facetName, main.getSource());
outputNodes[facetName] = facetRoot;
}
return {
...model.component.data,
outputNodes,
outputNodeRefCounts,
raw,
main,
facetRoot,
ancestorParse,
preFilterInvalid,
postFilterInvalid,
};
}
function makeOutputNode(dataSourceType: DataSourceType, model: Model, head: DataFlowNode) {
const {outputNodes, outputNodeRefCounts} = model.component.data;
const name = model.getDataName(dataSourceType);
const node = new OutputNode(head, name, dataSourceType, outputNodeRefCounts);
outputNodes[name] = node;
return node;
}