UNPKG

vega-dataflow

Version:

Reactive dataflow processing.

1,584 lines (1,521 loc) 67.5 kB
import { identity, isFunction, constant, array, isArray, error, id, truthy, debounce, extend, visitArray, inherits, logger, Error, hasOwnProperty } from 'vega-util'; import { responseType, read, loader } from 'vega-loader'; import { defaultLocale } from 'vega-format'; function UniqueList(idFunc) { const $ = idFunc || identity, list = [], ids = {}; list.add = _ => { const id = $(_); if (!ids[id]) { ids[id] = 1; list.push(_); } return list; }; list.remove = _ => { const id = $(_); if (ids[id]) { ids[id] = 0; const idx = list.indexOf(_); if (idx >= 0) list.splice(idx, 1); } return list; }; return list; } /** * Invoke and await a potentially async callback function. If * an error occurs, trap it and route to Dataflow.error. * @param {Dataflow} df - The dataflow instance * @param {function} callback - A callback function to invoke * and then await. The dataflow will be passed as the single * argument to the function. */ async function asyncCallback (df, callback) { try { await callback(df); } catch (err) { df.error(err); } } const TUPLE_ID_KEY = Symbol('vega_id'); let TUPLE_ID = 1; /** * Checks if an input value is a registered tuple. * @param {*} t - The value to check. * @return {boolean} True if the input is a tuple, false otherwise. */ function isTuple(t) { return !!(t && tupleid(t)); } /** * Returns the id of a tuple. * @param {object} t - The input tuple. * @return {*} the tuple id. */ function tupleid(t) { return t[TUPLE_ID_KEY]; } /** * Sets the id of a tuple. * @param {object} t - The input tuple. * @param {*} id - The id value to set. * @return {object} the input tuple. */ function setid(t, id) { t[TUPLE_ID_KEY] = id; return t; } /** * Ingest an object or value as a data tuple. * If the input value is an object, an id field will be added to it. For * efficiency, the input object is modified directly. A copy is not made. * If the input value is a literal, it will be wrapped in a new object * instance, with the value accessible as the 'data' property. * @param datum - The value to ingest. * @return {object} The ingested data tuple. */ function ingest$1(datum) { const t = datum === Object(datum) ? datum : { data: datum }; return tupleid(t) ? t : setid(t, TUPLE_ID++); } /** * Given a source tuple, return a derived copy. * @param {object} t - The source tuple. * @return {object} The derived tuple. */ function derive(t) { return rederive(t, ingest$1({})); } /** * Rederive a derived tuple by copying values from the source tuple. * @param {object} t - The source tuple. * @param {object} d - The derived tuple. * @return {object} The derived tuple. */ function rederive(t, d) { for (const k in t) d[k] = t[k]; return d; } /** * Replace an existing tuple with a new tuple. * @param {object} t - The existing data tuple. * @param {object} d - The new tuple that replaces the old. * @return {object} The new tuple. */ function replace(t, d) { return setid(d, tupleid(t)); } /** * Generate an augmented comparator function that provides stable * sorting by tuple id when the given comparator produces ties. * @param {function} cmp - The comparator to augment. * @param {function} [f] - Optional tuple accessor function. * @return {function} An augmented comparator function. */ function stableCompare(cmp, f) { return !cmp ? null : f ? (a, b) => cmp(a, b) || tupleid(f(a)) - tupleid(f(b)) : (a, b) => cmp(a, b) || tupleid(a) - tupleid(b); } function isChangeSet(v) { return v && v.constructor === changeset; } function changeset() { const add = [], // insert tuples rem = [], // remove tuples mod = [], // modify tuples remp = [], // remove by predicate modp = []; // modify by predicate let clean = null, reflow = false; return { constructor: changeset, insert(t) { const d = array(t), n = d.length; for (let i = 0; i < n; ++i) add.push(d[i]); return this; }, remove(t) { const a = isFunction(t) ? remp : rem, d = array(t), n = d.length; for (let i = 0; i < n; ++i) a.push(d[i]); return this; }, modify(t, field, value) { const m = { field: field, value: constant(value) }; if (isFunction(t)) { m.filter = t; modp.push(m); } else { m.tuple = t; mod.push(m); } return this; }, encode(t, set) { if (isFunction(t)) modp.push({ filter: t, field: set });else mod.push({ tuple: t, field: set }); return this; }, clean(value) { clean = value; return this; }, reflow() { reflow = true; return this; }, pulse(pulse, tuples) { const cur = {}, out = {}; let i, n, m, f, t, id; // build lookup table of current tuples for (i = 0, n = tuples.length; i < n; ++i) { cur[tupleid(tuples[i])] = 1; } // process individual tuples to remove for (i = 0, n = rem.length; i < n; ++i) { t = rem[i]; cur[tupleid(t)] = -1; } // process predicate-based removals for (i = 0, n = remp.length; i < n; ++i) { f = remp[i]; tuples.forEach(t => { if (f(t)) cur[tupleid(t)] = -1; }); } // process all add tuples for (i = 0, n = add.length; i < n; ++i) { t = add[i]; id = tupleid(t); if (cur[id]) { // tuple already resides in dataset // if flagged for both add and remove, cancel cur[id] = 1; } else { // tuple does not reside in dataset, add pulse.add.push(ingest$1(add[i])); } } // populate pulse rem list for (i = 0, n = tuples.length; i < n; ++i) { t = tuples[i]; if (cur[tupleid(t)] < 0) pulse.rem.push(t); } // modify helper method function modify(t, f, v) { if (v) { t[f] = v(t); } else { pulse.encode = f; } if (!reflow) out[tupleid(t)] = t; } // process individual tuples to modify for (i = 0, n = mod.length; i < n; ++i) { m = mod[i]; t = m.tuple; f = m.field; id = cur[tupleid(t)]; if (id > 0) { modify(t, f, m.value); pulse.modifies(f); } } // process predicate-based modifications for (i = 0, n = modp.length; i < n; ++i) { m = modp[i]; f = m.filter; tuples.forEach(t => { if (f(t) && cur[tupleid(t)] > 0) { modify(t, m.field, m.value); } }); pulse.modifies(m.field); } // upon reflow request, populate mod with all non-removed tuples // otherwise, populate mod with modified tuples only if (reflow) { pulse.mod = rem.length || remp.length ? tuples.filter(t => cur[tupleid(t)] > 0) : tuples.slice(); } else { for (id in out) pulse.mod.push(out[id]); } // set pulse garbage collection request if (clean || clean == null && (rem.length || remp.length)) { pulse.clean(true); } return pulse; } }; } const CACHE = '_:mod:_'; /** * Hash that tracks modifications to assigned values. * Callers *must* use the set method to update values. */ function Parameters() { Object.defineProperty(this, CACHE, { writable: true, value: {} }); } Parameters.prototype = { /** * Set a parameter value. If the parameter value changes, the parameter * will be recorded as modified. * @param {string} name - The parameter name. * @param {number} index - The index into an array-value parameter. Ignored if * the argument is undefined, null or less than zero. * @param {*} value - The parameter value to set. * @param {boolean} [force=false] - If true, records the parameter as modified * even if the value is unchanged. * @return {Parameters} - This parameter object. */ set(name, index, value, force) { const o = this, v = o[name], mod = o[CACHE]; if (index != null && index >= 0) { if (v[index] !== value || force) { v[index] = value; mod[index + ':' + name] = -1; mod[name] = -1; } } else if (v !== value || force) { o[name] = value; mod[name] = isArray(value) ? 1 + value.length : -1; } return o; }, /** * Tests if one or more parameters has been modified. If invoked with no * arguments, returns true if any parameter value has changed. If the first * argument is array, returns trues if any parameter name in the array has * changed. Otherwise, tests if the given name and optional array index has * changed. * @param {string} name - The parameter name to test. * @param {number} [index=undefined] - The parameter array index to test. * @return {boolean} - Returns true if a queried parameter was modified. */ modified(name, index) { const mod = this[CACHE]; if (!arguments.length) { for (const k in mod) { if (mod[k]) return true; } return false; } else if (isArray(name)) { for (let k = 0; k < name.length; ++k) { if (mod[name[k]]) return true; } return false; } return index != null && index >= 0 ? index + 1 < mod[name] || !!mod[index + ':' + name] : !!mod[name]; }, /** * Clears the modification records. After calling this method, * all parameters are considered unmodified. */ clear() { this[CACHE] = {}; return this; } }; let OP_ID = 0; const PULSE = 'pulse', NO_PARAMS = new Parameters(); // Boolean Flags const SKIP$1 = 1, MODIFIED = 2; /** * An Operator is a processing node in a dataflow graph. * Each operator stores a value and an optional value update function. * Operators can accept a hash of named parameters. Parameter values can * either be direct (JavaScript literals, arrays, objects) or indirect * (other operators whose values will be pulled dynamically). Operators * included as parameters will have this operator added as a dependency. * @constructor * @param {*} [init] - The initial value for this operator. * @param {function(object, Pulse)} [update] - An update function. Upon * evaluation of this operator, the update function will be invoked and the * return value will be used as the new value of this operator. * @param {object} [params] - The parameters for this operator. * @param {boolean} [react=true] - Flag indicating if this operator should * listen for changes to upstream operators included as parameters. * @see parameters */ function Operator(init, update, params, react) { this.id = ++OP_ID; this.value = init; this.stamp = -1; this.rank = -1; this.qrank = -1; this.flags = 0; if (update) { this._update = update; } if (params) this.parameters(params, react); } function flag(bit) { return function (state) { const f = this.flags; if (arguments.length === 0) return !!(f & bit); this.flags = state ? f | bit : f & ~bit; return this; }; } Operator.prototype = { /** * Returns a list of target operators dependent on this operator. * If this list does not exist, it is created and then returned. * @return {UniqueList} */ targets() { return this._targets || (this._targets = UniqueList(id)); }, /** * Sets the value of this operator. * @param {*} value - the value to set. * @return {Number} Returns 1 if the operator value has changed * according to strict equality, returns 0 otherwise. */ set(value) { if (this.value !== value) { this.value = value; return 1; } else { return 0; } }, /** * Indicates that operator evaluation should be skipped on the next pulse. * This operator will still propagate incoming pulses, but its update function * will not be invoked. The skip flag is reset after every pulse, so calling * this method will affect processing of the next pulse only. */ skip: flag(SKIP$1), /** * Indicates that this operator's value has been modified on its most recent * pulse. Normally modification is checked via strict equality; however, in * some cases it is more efficient to update the internal state of an object. * In those cases, the modified flag can be used to trigger propagation. Once * set, the modification flag persists across pulses until unset. The flag can * be used with the last timestamp to test if a modification is recent. */ modified: flag(MODIFIED), /** * Sets the parameters for this operator. The parameter values are analyzed for * operator instances. If found, this operator will be added as a dependency * of the parameterizing operator. Operator values are dynamically marshalled * from each operator parameter prior to evaluation. If a parameter value is * an array, the array will also be searched for Operator instances. However, * the search does not recurse into sub-arrays or object properties. * @param {object} params - A hash of operator parameters. * @param {boolean} [react=true] - A flag indicating if this operator should * automatically update (react) when parameter values change. In other words, * this flag determines if the operator registers itself as a listener on * any upstream operators included in the parameters. * @param {boolean} [initonly=false] - A flag indicating if this operator * should calculate an update only upon its initial evaluation, then * deregister dependencies and suppress all future update invocations. * @return {Operator[]} - An array of upstream dependencies. */ parameters(params, react, initonly) { react = react !== false; const argval = this._argval = this._argval || new Parameters(), argops = this._argops = this._argops || [], deps = []; let name, value, n, i; const add = (name, index, value) => { if (value instanceof Operator) { if (value !== this) { if (react) value.targets().add(this); deps.push(value); } argops.push({ op: value, name: name, index: index }); } else { argval.set(name, index, value); } }; for (name in params) { value = params[name]; if (name === PULSE) { array(value).forEach(op => { if (!(op instanceof Operator)) { error('Pulse parameters must be operator instances.'); } else if (op !== this) { op.targets().add(this); deps.push(op); } }); this.source = value; } else if (isArray(value)) { argval.set(name, -1, Array(n = value.length)); for (i = 0; i < n; ++i) add(name, i, value[i]); } else { add(name, -1, value); } } this.marshall().clear(); // initialize values if (initonly) argops.initonly = true; return deps; }, /** * Internal method for marshalling parameter values. * Visits each operator dependency to pull the latest value. * @return {Parameters} A Parameters object to pass to the update function. */ marshall(stamp) { const argval = this._argval || NO_PARAMS, argops = this._argops; let item, i, op, mod; if (argops) { const n = argops.length; for (i = 0; i < n; ++i) { item = argops[i]; op = item.op; mod = op.modified() && op.stamp === stamp; argval.set(item.name, item.index, op.value, mod); } if (argops.initonly) { for (i = 0; i < n; ++i) { item = argops[i]; item.op.targets().remove(this); } this._argops = null; this._update = null; } } return argval; }, /** * Detach this operator from the dataflow. * Unregisters listeners on upstream dependencies. */ detach() { const argops = this._argops; let i, n, item, op; if (argops) { for (i = 0, n = argops.length; i < n; ++i) { item = argops[i]; op = item.op; if (op._targets) { op._targets.remove(this); } } } // remove references to the source and pulse object, // if present, to prevent memory leaks of old data. this.pulse = null; this.source = null; }, /** * Delegate method to perform operator processing. * Subclasses can override this method to perform custom processing. * By default, it marshalls parameters and calls the update function * if that function is defined. If the update function does not * change the operator value then StopPropagation is returned. * If no update function is defined, this method does nothing. * @param {Pulse} pulse - the current dataflow pulse. * @return The output pulse or StopPropagation. A falsy return value * (including undefined) will let the input pulse pass through. */ evaluate(pulse) { const update = this._update; if (update) { const params = this.marshall(pulse.stamp), v = update.call(this, params, pulse); params.clear(); if (v !== this.value) { this.value = v; } else if (!this.modified()) { return pulse.StopPropagation; } } }, /** * Run this operator for the current pulse. If this operator has already * been run at (or after) the pulse timestamp, returns StopPropagation. * Internally, this method calls {@link evaluate} to perform processing. * If {@link evaluate} returns a falsy value, the input pulse is returned. * This method should NOT be overridden, instead overrride {@link evaluate}. * @param {Pulse} pulse - the current dataflow pulse. * @return the output pulse for this operator (or StopPropagation) */ run(pulse) { if (pulse.stamp < this.stamp) return pulse.StopPropagation; let rv; if (this.skip()) { this.skip(false); rv = 0; } else { rv = this.evaluate(pulse); } return this.pulse = rv || pulse; } }; /** * Add an operator to the dataflow graph. This function accepts a * variety of input argument types. The basic signature supports an * initial value, update function and parameters. If the first parameter * is an Operator instance, it will be added directly. If it is a * constructor for an Operator subclass, a new instance will be instantiated. * Otherwise, if the first parameter is a function instance, it will be used * as the update function and a null initial value is assumed. * @param {*} init - One of: the operator to add, the initial value of * the operator, an operator class to instantiate, or an update function. * @param {function} [update] - The operator update function. * @param {object} [params] - The operator parameters. * @param {boolean} [react=true] - Flag indicating if this operator should * listen for changes to upstream operators included as parameters. * @return {Operator} - The added operator. */ function add (init, update, params, react) { let shift = 1, op; if (init instanceof Operator) { op = init; } else if (init && init.prototype instanceof Operator) { op = new init(); } else if (isFunction(init)) { op = new Operator(null, init); } else { shift = 0; op = new Operator(init, update); } this.rank(op); if (shift) { react = params; params = update; } if (params) this.connect(op, op.parameters(params, react)); this.touch(op); return op; } /** * Connect a target operator as a dependent of source operators. * If necessary, this method will rerank the target operator and its * dependents to ensure propagation proceeds in a topologically sorted order. * @param {Operator} target - The target operator. * @param {Array<Operator>} - The source operators that should propagate * to the target operator. */ function connect (target, sources) { const targetRank = target.rank, n = sources.length; for (let i = 0; i < n; ++i) { if (targetRank < sources[i].rank) { this.rerank(target); return; } } } let STREAM_ID = 0; /** * Models an event stream. * @constructor * @param {function(Object, number): boolean} [filter] - Filter predicate. * Events pass through when truthy, events are suppressed when falsy. * @param {function(Object): *} [apply] - Applied to input events to produce * new event values. * @param {function(Object)} [receive] - Event callback function to invoke * upon receipt of a new event. Use to override standard event processing. */ function EventStream(filter, apply, receive) { this.id = ++STREAM_ID; this.value = null; if (receive) this.receive = receive; if (filter) this._filter = filter; if (apply) this._apply = apply; } /** * Creates a new event stream instance with the provided * (optional) filter, apply and receive functions. * @param {function(Object, number): boolean} [filter] - Filter predicate. * Events pass through when truthy, events are suppressed when falsy. * @param {function(Object): *} [apply] - Applied to input events to produce * new event values. * @see EventStream */ function stream(filter, apply, receive) { return new EventStream(filter, apply, receive); } EventStream.prototype = { _filter: truthy, _apply: identity, targets() { return this._targets || (this._targets = UniqueList(id)); }, consume(_) { if (!arguments.length) return !!this._consume; this._consume = !!_; return this; }, receive(evt) { if (this._filter(evt)) { const val = this.value = this._apply(evt), trg = this._targets, n = trg ? trg.length : 0; for (let i = 0; i < n; ++i) trg[i].receive(val); if (this._consume) { evt.preventDefault(); evt.stopPropagation(); } } }, filter(filter) { const s = stream(filter); this.targets().add(s); return s; }, apply(apply) { const s = stream(null, apply); this.targets().add(s); return s; }, merge() { const s = stream(); this.targets().add(s); for (let i = 0, n = arguments.length; i < n; ++i) { arguments[i].targets().add(s); } return s; }, throttle(pause) { let t = -1; return this.filter(() => { const now = Date.now(); if (now - t > pause) { t = now; return 1; } else { return 0; } }); }, debounce(delay) { const s = stream(); this.targets().add(stream(null, null, debounce(delay, e => { const df = e.dataflow; s.receive(e); if (df && df.run) df.run(); }))); return s; }, between(a, b) { let active = false; a.targets().add(stream(null, null, () => active = true)); b.targets().add(stream(null, null, () => active = false)); return this.filter(() => active); }, detach() { // ensures compatibility with operators (#2753) // remove references to other streams and filter functions that may // be bound to subcontexts that need to be garbage collected. this._filter = truthy; this._targets = null; } }; /** * Create a new event stream from an event source. * @param {object} source - The event source to monitor. The input must * support the addEventListener method. * @param {string} type - The event type. * @param {function(object): boolean} [filter] - Event filter function. * @param {function(object): *} [apply] - Event application function. * If provided, this function will be invoked and the result will be * used as the downstream event value. * @return {EventStream} */ function events (source, type, filter, apply) { const df = this, s = stream(filter, apply), send = function (e) { e.dataflow = df; try { s.receive(e); } catch (error) { df.error(error); } finally { df.run(); } }; let sources; if (typeof source === 'string' && typeof document !== 'undefined') { sources = document.querySelectorAll(source); } else { sources = array(source); } const n = sources.length; for (let i = 0; i < n; ++i) { sources[i].addEventListener(type, send); } return s; } function parse(data, format) { const locale = this.locale(); return read(data, format, locale.timeParse, locale.utcParse); } /** * Ingests new data into the dataflow. First parses the data using the * vega-loader read method, then pulses a changeset to the target operator. * @param {Operator} target - The Operator to target with ingested data, * typically a Collect transform instance. * @param {*} data - The input data, prior to parsing. For JSON this may * be a string or an object. For CSV, TSV, etc should be a string. * @param {object} format - The data format description for parsing * loaded data. This object is passed to the vega-loader read method. * @returns {Dataflow} */ function ingest(target, data, format) { data = this.parse(data, format); return this.pulse(target, this.changeset().insert(data)); } /** * Request data from an external source, parse it, and return a Promise. * @param {string} url - The URL from which to load the data. This string * is passed to the vega-loader load method. * @param {object} [format] - The data format description for parsing * loaded data. This object is passed to the vega-loader read method. * @return {Promise} A Promise that resolves upon completion of the request. * The resolved object contains the following properties: * - data: an array of parsed data (or null upon error) * - status: a code for success (0), load fail (-1), or parse fail (-2) */ async function request(url, format) { const df = this; let status = 0, data; try { data = await df.loader().load(url, { context: 'dataflow', response: responseType(format && format.type) }); try { data = df.parse(data, format); } catch (err) { status = -2; df.warn('Data ingestion failed', url, err); } } catch (err) { status = -1; df.warn('Loading failed', url, err); } return { data, status }; } async function preload(target, url, format) { const df = this, pending = df._pending || loadPending(df); pending.requests += 1; const res = await df.request(url, format); df.pulse(target, df.changeset().remove(truthy).insert(res.data || [])); pending.done(); return res; } function loadPending(df) { let accept; const pending = new Promise(a => accept = a); pending.requests = 0; pending.done = () => { if (--pending.requests === 0) { df._pending = null; accept(df); } }; return df._pending = pending; } const SKIP = { skip: true }; /** * Perform operator updates in response to events. Applies an * update function to compute a new operator value. If the update function * returns a {@link ChangeSet}, the operator will be pulsed with those tuple * changes. Otherwise, the operator value will be updated to the return value. * @param {EventStream|Operator} source - The event source to react to. * This argument can be either an EventStream or an Operator. * @param {Operator|function(object):Operator} target - The operator to update. * This argument can either be an Operator instance or (if the source * argument is an EventStream), a function that accepts an event object as * input and returns an Operator to target. * @param {function(Parameters,Event): *} [update] - Optional update function * to compute the new operator value, or a literal value to set. Update * functions expect to receive a parameter object and event as arguments. * This function can either return a new operator value or (if the source * argument is an EventStream) a {@link ChangeSet} instance to pulse * the target operator with tuple changes. * @param {object} [params] - The update function parameters. * @param {object} [options] - Additional options hash. If not overridden, * updated operators will be skipped by default. * @param {boolean} [options.skip] - If true, the operator will * be skipped: it will not be evaluated, but its dependents will be. * @param {boolean} [options.force] - If true, the operator will * be re-evaluated even if its value has not changed. * @return {Dataflow} */ function on (source, target, update, params, options) { const fn = source instanceof Operator ? onOperator : onStream; fn(this, source, target, update, params, options); return this; } function onStream(df, stream, target, update, params, options) { const opt = extend({}, options, SKIP); let func, op; if (!isFunction(target)) target = constant(target); if (update === undefined) { func = e => df.touch(target(e)); } else if (isFunction(update)) { op = new Operator(null, update, params, false); func = e => { op.evaluate(e); const t = target(e), v = op.value; isChangeSet(v) ? df.pulse(t, v, options) : df.update(t, v, opt); }; } else { func = e => df.update(target(e), update, opt); } stream.apply(func); } function onOperator(df, source, target, update, params, options) { if (update === undefined) { source.targets().add(target); } else { const opt = options || {}, op = new Operator(null, updater(target, update), params, false); op.modified(opt.force); op.rank = source.rank; // immediately follow source source.targets().add(op); // add dependency if (target) { op.skip(true); // skip first invocation op.value = target.value; // initialize value op.targets().add(target); // chain dependencies df.connect(target, [op]); // rerank as needed, #1672 } } } function updater(target, update) { update = isFunction(update) ? update : constant(update); return target ? function (_, pulse) { const value = update(_, pulse); if (!target.skip()) { target.skip(value !== this.value).value = value; } return value; } : update; } /** * Assigns a rank to an operator. Ranks are assigned in increasing order * by incrementing an internal rank counter. * @param {Operator} op - The operator to assign a rank. */ function rank(op) { op.rank = ++this._rank; } /** * Re-ranks an operator and all downstream target dependencies. This * is necessary when upstream dependencies of higher rank are added to * a target operator. * @param {Operator} op - The operator to re-rank. */ function rerank(op) { const queue = [op]; let cur, list, i; while (queue.length) { this.rank(cur = queue.pop()); if (list = cur._targets) { for (i = list.length; --i >= 0;) { queue.push(cur = list[i]); if (cur === op) error('Cycle detected in dataflow graph.'); } } } } /** * Sentinel value indicating pulse propagation should stop. */ const StopPropagation = {}; // Pulse visit type flags const ADD = 1 << 0, REM = 1 << 1, MOD = 1 << 2, ADD_REM = ADD | REM, ADD_MOD = ADD | MOD, ALL = ADD | REM | MOD, REFLOW = 1 << 3, SOURCE = 1 << 4, NO_SOURCE = 1 << 5, NO_FIELDS = 1 << 6; /** * A Pulse enables inter-operator communication during a run of the * dataflow graph. In addition to the current timestamp, a pulse may also * contain a change-set of added, removed or modified data tuples, as well as * a pointer to a full backing data source. Tuple change sets may not * be fully materialized; for example, to prevent needless array creation * a change set may include larger arrays and corresponding filter functions. * The pulse provides a {@link visit} method to enable proper and efficient * iteration over requested data tuples. * * In addition, each pulse can track modification flags for data tuple fields. * Responsible transform operators should call the {@link modifies} method to * indicate changes to data fields. The {@link modified} method enables * querying of this modification state. * * @constructor * @param {Dataflow} dataflow - The backing dataflow instance. * @param {number} stamp - The current propagation timestamp. * @param {string} [encode] - An optional encoding set name, which is then * accessible as Pulse.encode. Operators can respond to (or ignore) this * setting as appropriate. This parameter can be used in conjunction with * the Encode transform in the vega-encode module. */ function Pulse(dataflow, stamp, encode) { this.dataflow = dataflow; this.stamp = stamp == null ? -1 : stamp; this.add = []; this.rem = []; this.mod = []; this.fields = null; this.encode = encode || null; } function materialize(data, filter) { const out = []; visitArray(data, filter, _ => out.push(_)); return out; } function filter(pulse, flags) { const map = {}; pulse.visit(flags, t => { map[tupleid(t)] = 1; }); return t => map[tupleid(t)] ? null : t; } function addFilter(a, b) { return a ? (t, i) => a(t, i) && b(t, i) : b; } Pulse.prototype = { /** * Sentinel value indicating pulse propagation should stop. */ StopPropagation, /** * Boolean flag indicating ADD (added) tuples. */ ADD, /** * Boolean flag indicating REM (removed) tuples. */ REM, /** * Boolean flag indicating MOD (modified) tuples. */ MOD, /** * Boolean flag indicating ADD (added) and REM (removed) tuples. */ ADD_REM, /** * Boolean flag indicating ADD (added) and MOD (modified) tuples. */ ADD_MOD, /** * Boolean flag indicating ADD, REM and MOD tuples. */ ALL, /** * Boolean flag indicating all tuples in a data source * except for the ADD, REM and MOD tuples. */ REFLOW, /** * Boolean flag indicating a 'pass-through' to a * backing data source, ignoring ADD, REM and MOD tuples. */ SOURCE, /** * Boolean flag indicating that source data should be * suppressed when creating a forked pulse. */ NO_SOURCE, /** * Boolean flag indicating that field modifications should be * suppressed when creating a forked pulse. */ NO_FIELDS, /** * Creates a new pulse based on the values of this pulse. * The dataflow, time stamp and field modification values are copied over. * By default, new empty ADD, REM and MOD arrays are created. * @param {number} flags - Integer of boolean flags indicating which (if any) * tuple arrays should be copied to the new pulse. The supported flag values * are ADD, REM and MOD. Array references are copied directly: new array * instances are not created. * @return {Pulse} - The forked pulse instance. * @see init */ fork(flags) { return new Pulse(this.dataflow).init(this, flags); }, /** * Creates a copy of this pulse with new materialized array * instances for the ADD, REM, MOD, and SOURCE arrays. * The dataflow, time stamp and field modification values are copied over. * @return {Pulse} - The cloned pulse instance. * @see init */ clone() { const p = this.fork(ALL); p.add = p.add.slice(); p.rem = p.rem.slice(); p.mod = p.mod.slice(); if (p.source) p.source = p.source.slice(); return p.materialize(ALL | SOURCE); }, /** * Returns a pulse that adds all tuples from a backing source. This is * useful for cases where operators are added to a dataflow after an * upstream data pipeline has already been processed, ensuring that * new operators can observe all tuples within a stream. * @return {Pulse} - A pulse instance with all source tuples included * in the add array. If the current pulse already has all source * tuples in its add array, it is returned directly. If the current * pulse does not have a backing source, it is returned directly. */ addAll() { let p = this; const reuse = !p.source || p.add === p.rem // special case for indexed set (e.g., crossfilter) || !p.rem.length && p.source.length === p.add.length; if (reuse) { return p; } else { p = new Pulse(this.dataflow).init(this); p.add = p.source; p.rem = []; // new operators can ignore rem #2769 return p; } }, /** * Initialize this pulse based on the values of another pulse. This method * is used internally by {@link fork} to initialize a new forked tuple. * The dataflow, time stamp and field modification values are copied over. * By default, new empty ADD, REM and MOD arrays are created. * @param {Pulse} src - The source pulse to copy from. * @param {number} flags - Integer of boolean flags indicating which (if any) * tuple arrays should be copied to the new pulse. The supported flag values * are ADD, REM and MOD. Array references are copied directly: new array * instances are not created. By default, source data arrays are copied * to the new pulse. Use the NO_SOURCE flag to enforce a null source. * @return {Pulse} - Returns this Pulse instance. */ init(src, flags) { const p = this; p.stamp = src.stamp; p.encode = src.encode; if (src.fields && !(flags & NO_FIELDS)) { p.fields = src.fields; } if (flags & ADD) { p.addF = src.addF; p.add = src.add; } else { p.addF = null; p.add = []; } if (flags & REM) { p.remF = src.remF; p.rem = src.rem; } else { p.remF = null; p.rem = []; } if (flags & MOD) { p.modF = src.modF; p.mod = src.mod; } else { p.modF = null; p.mod = []; } if (flags & NO_SOURCE) { p.srcF = null; p.source = null; } else { p.srcF = src.srcF; p.source = src.source; if (src.cleans) p.cleans = src.cleans; } return p; }, /** * Schedules a function to run after pulse propagation completes. * @param {function} func - The function to run. */ runAfter(func) { this.dataflow.runAfter(func); }, /** * Indicates if tuples have been added, removed or modified. * @param {number} [flags] - The tuple types (ADD, REM or MOD) to query. * Defaults to ALL, returning true if any tuple type has changed. * @return {boolean} - Returns true if one or more queried tuple types have * changed, false otherwise. */ changed(flags) { const f = flags || ALL; return f & ADD && this.add.length || f & REM && this.rem.length || f & MOD && this.mod.length; }, /** * Forces a "reflow" of tuple values, such that all tuples in the backing * source are added to the MOD set, unless already present in the ADD set. * @param {boolean} [fork=false] - If true, returns a forked copy of this * pulse, and invokes reflow on that derived pulse. * @return {Pulse} - The reflowed pulse instance. */ reflow(fork) { if (fork) return this.fork(ALL).reflow(); const len = this.add.length, src = this.source && this.source.length; if (src && src !== len) { this.mod = this.source; if (len) this.filter(MOD, filter(this, ADD)); } return this; }, /** * Get/set metadata to pulse requesting garbage collection * to reclaim currently unused resources. */ clean(value) { if (arguments.length) { this.cleans = !!value; return this; } else { return this.cleans; } }, /** * Marks one or more data field names as modified to assist dependency * tracking and incremental processing by transform operators. * @param {string|Array<string>} _ - The field(s) to mark as modified. * @return {Pulse} - This pulse instance. */ modifies(_) { const hash = this.fields || (this.fields = {}); if (isArray(_)) { _.forEach(f => hash[f] = true); } else { hash[_] = true; } return this; }, /** * Checks if one or more data fields have been modified during this pulse * propagation timestamp. * @param {string|Array<string>} _ - The field(s) to check for modified. * @param {boolean} nomod - If true, will check the modified flag even if * no mod tuples exist. If false (default), mod tuples must be present. * @return {boolean} - Returns true if any of the provided fields has been * marked as modified, false otherwise. */ modified(_, nomod) { const fields = this.fields; return !((nomod || this.mod.length) && fields) ? false : !arguments.length ? !!fields : isArray(_) ? _.some(f => fields[f]) : fields[_]; }, /** * Adds a filter function to one more tuple sets. Filters are applied to * backing tuple arrays, to determine the actual set of tuples considered * added, removed or modified. They can be used to delay materialization of * a tuple set in order to avoid expensive array copies. In addition, the * filter functions can serve as value transformers: unlike standard predicate * function (which return boolean values), Pulse filters should return the * actual tuple value to process. If a tuple set is already filtered, the * new filter function will be appended into a conjuntive ('and') query. * @param {number} flags - Flags indicating the tuple set(s) to filter. * @param {function(*):object} filter - Filter function that will be applied * to the tuple set array, and should return a data tuple if the value * should be included in the tuple set, and falsy (or null) otherwise. * @return {Pulse} - Returns this pulse instance. */ filter(flags, filter) { const p = this; if (flags & ADD) p.addF = addFilter(p.addF, filter); if (flags & REM) p.remF = addFilter(p.remF, filter); if (flags & MOD) p.modF = addFilter(p.modF, filter); if (flags & SOURCE) p.srcF = addFilter(p.srcF, filter); return p; }, /** * Materialize one or more tuple sets in this pulse. If the tuple set(s) have * a registered filter function, it will be applied and the tuple set(s) will * be replaced with materialized tuple arrays. * @param {number} flags - Flags indicating the tuple set(s) to materialize. * @return {Pulse} - Returns this pulse instance. */ materialize(flags) { flags = flags || ALL; const p = this; if (flags & ADD && p.addF) { p.add = materialize(p.add, p.addF); p.addF = null; } if (flags & REM && p.remF) { p.rem = materialize(p.rem, p.remF); p.remF = null; } if (flags & MOD && p.modF) { p.mod = materialize(p.mod, p.modF); p.modF = null; } if (flags & SOURCE && p.srcF) { p.source = p.source.filter(p.srcF); p.srcF = null; } return p; }, /** * Visit one or more tuple sets in this pulse. * @param {number} flags - Flags indicating the tuple set(s) to visit. * Legal values are ADD, REM, MOD and SOURCE (if a backing data source * has been set). * @param {function(object):*} - Visitor function invoked per-tuple. * @return {Pulse} - Returns this pulse instance. */ visit(flags, visitor) { const p = this, v = visitor; if (flags & SOURCE) { visitArray(p.source, p.srcF, v); return p; } if (flags & ADD) visitArray(p.add, p.addF, v); if (flags & REM) visitArray(p.rem, p.remF, v); if (flags & MOD) visitArray(p.mod, p.modF, v); const src = p.source; if (flags & REFLOW && src) { const sum = p.add.length + p.mod.length; if (sum === src.length) ; else if (sum) { visitArray(src, filter(p, ADD_MOD), v); } else { // if no add/rem/mod tuples, visit source visitArray(src, p.srcF, v); } } return p; } }; /** * Represents a set of multiple pulses. Used as input for operators * that accept multiple pulses at a time. Contained pulses are * accessible via the public "pulses" array property. This pulse doe * not carry added, removed or modified tuples directly. However, * the visit method can be used to traverse all such tuples contained * in sub-pulses with a timestamp matching this parent multi-pulse. * @constructor * @param {Dataflow} dataflow - The backing dataflow instance. * @param {number} stamp - The timestamp. * @param {Array<Pulse>} pulses - The sub-pulses for this multi-pulse. */ function MultiPulse(dataflow, stamp, pulses, encode) { const p = this; let c = 0; this.dataflow = dataflow; this.stamp = stamp; this.fields = null; this.encode = encode || null; this.pulses = pulses; for (const pulse of pulses) { if (pulse.stamp !== stamp) continue; if (pulse.fields) { const hash = p.fields || (p.fields = {}); for (const f in pulse.fields) { hash[f] = 1; } } if (pulse.changed(p.ADD)) c |= p.ADD; if (pulse.changed(p.REM)) c |= p.REM; if (pulse.changed(p.MOD)) c |= p.MOD; } this.changes = c; } inherits(MultiPulse, Pulse, { /** * Creates a new pulse based on the values of this pulse. * The dataflow, time stamp and field modification values are copied over. * @return {Pulse} */ fork(flags) { const p = new Pulse(this.dataflow).init(this, flags & this.NO_FIELDS); if (flags !== undefined) { if (flags & p.ADD) this.visit(p.ADD, t => p.add.push(t)); if (flags & p.REM) this.visit(p.REM, t => p.rem.push(t)); if (flags & p.MOD) this.visit(p.MOD, t => p.mod.push(t)); } return p; }, changed(flags) { return this.changes & flags; }, modified(_) { const p = this, fields = p.fields; return !(fields && p.changes & p.MOD) ? 0 : isArray(_) ? _.some(f => fields[f]) : fields[_]; }, filter() { error('MultiPulse does not support filtering.'); }, materialize() { error('MultiPulse does not support materialization.'); }, visit(flags, visitor) { const p = this, pulses = p.pulses, n = pulses.length; let i = 0; if (flags & p.SOURCE) { for (; i < n; ++i) { pulses[i].visit(flags, visitor); } } else { for (; i < n; ++i) { if (pulses[i].stamp === p.stamp) { pulses[i].visit(flags, visitor); } } } return p; } }); /** * Evaluates the dataflow and returns a Promise that resolves when pulse * propagation completes. This method will increment the current timestamp * and process all updated, pulsed and touched operators. When invoked for * the first time, all registered operators will be processed. This method * should not be invoked by third-party clients, use {@link runAsync} or * {@link run} instead. * @param {string} [encode] - The name of an encoding set to invoke during * propagation. This value is added to generated Pulse instances; * operators can then respond to (or ignore) this setting as appropriate. * This parameter can be used in conjunction with the Encode transform in * the vega-encode package. * @param {function} [prerun] - An optional callback function to invoke * immediately before dataflow evaluation commences. * @param {function} [postrun] - An optional callback function to invoke * after dataflow evaluation completes. The callback will be invoked * after those registered via {@link runAfter}. * @return {Promise} - A promise that resolves to this dataflow after * evaluation completes. */ async function evaluate(encode, prerun, postrun) { const df = this, async = []; // if the pulse value is set, this is a re-entrant call if (df._pulse) return reentrant(df); // wait for pending datasets to load if (df._pending) await df._pending; // invoke prerun function, if provided if (prerun) await asyncCallback(df, prerun); // exit early if there are no updates if (!df._touched.length) { df.debug('Dataflow invoked, but nothing to do.'); return df; } // increment timestamp clock const stamp = ++df._clock; // set the current pulse df._pulse = new Pulse(df, stamp, encode); // initialize priority queue, reset touched operators df._touched.forEach(op => df._enqueue(op, true)); df._touched = UniqueList(id); let count = 0, op, next, error; try { while (df._heap.size() > 0) { // dequeue operator with highest priority op = df._heap.pop(); // re-queue if rank changed if (op.rank !== op.qrank) { df._enqueue(op, true); continue; } // otherwise, evaluate the operator next = op.run(df._getPulse(op, encode)); if (next.then) { // await if operator returns a promise directly next = await next; } else if (next.async) { // queue parallel asynchronous execution async.push(next.async); next = StopPropagation; } // propagate evaluation, enqueue dependent operators if (next !== StopPropagation) { if (op._targets) op._targets.forEach(op => df._enqueue(op)); } // increment visit counter ++count; } } catch (err) { df._heap.clear(); error = err; } // reset pulse map df._input = {}; df._pulse = null; df.debug(`Pulse ${stamp}: ${count} operators`); if (error) { df._postrun = []; df.error(error); } // invoke callbacks queued via runAfter if (df._postrun.length) { const pr = df._postrun.sort((a, b) => b.priority - a.priority); df._postrun = []; for (let i = 0; i < pr.length; ++i) { await asyncCallback(df, pr[i].callback); } } // invoke postrun function, if provided if (postrun) await asyncCallback(df, postrun); // handle non-blocking asynchronous callbacks if (async.length) { Promise.all(async).then(cb => df.runAsync(null, () => { cb.forEach(f => { try { f(df); } catch (err) { df.error(err); } }); })); } return df; } /** * Queues dataflow evaluation to run once any other queued evaluations have * completed and returns a Promise that resolves when the queued pulse * propagation completes. If provided, a callback function will be invoke