@eagleoutice/flowr
Version:
Static Dataflow Analyzer and Program Slicer for the R Programming Language
515 lines • 20.6 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.DataflowGraph = exports.FunctionArgument = void 0;
const assert_1 = require("../../util/assert");
const vertex_1 = require("./vertex");
const arrays_1 = require("../../util/collections/arrays");
const r_function_call_1 = require("../../r-bridge/lang-4.x/ast/model/nodes/r-function-call");
const node_id_1 = require("../../r-bridge/lang-4.x/ast/model/processing/node-id");
const environment_1 = require("../environments/environment");
const clone_1 = require("../environments/clone");
/**
* Helper functions to work with {@link FunctionArgument}s.
* @see {@link EmptyArgument} - the marker for empty arguments
*/
exports.FunctionArgument = {
name: 'FunctionArgument',
/**
* Checks whether the given argument is a positional argument.
* @example
* ```r
* foo(b=3, 2) # the second argument is positional
* ```
*/
isPositional(arg) {
return arg !== r_function_call_1.EmptyArgument && arg.name === undefined;
},
/**
* Checks whether the given argument is a named argument.
* @example
* ```r
* foo(b=3, 2) # the first argument is named
* ```
* @see {@link isPositional}
* @see {@link isEmpty}
* @see {@link hasName}
*/
isNamed(arg) {
return arg !== r_function_call_1.EmptyArgument && arg.name !== undefined;
},
/**
* Checks whether the given argument is an unnamed argument (either positional or empty).
* @example
* ```r
* foo(, 2) # the first argument is unnamed (empty)
* foo(3, 2) # both arguments are unnamed (positional)
* ```
* @see {@link isNamed}
*/
isUnnamed(arg) {
return arg === r_function_call_1.EmptyArgument || arg.name === undefined;
},
/**
* Checks whether the given argument is an empty argument.
* @example
* ```r
* foo(, 2) # the first argument is empty
* ```
* @see {@link isNotEmpty}
*/
isEmpty(arg) {
return arg === r_function_call_1.EmptyArgument;
},
/**
* Checks whether the given argument is not an empty argument.
* @see {@link FunctionArgument.isEmpty}
*/
isNotEmpty(arg) {
return arg !== r_function_call_1.EmptyArgument;
},
/**
* Returns the id of a non-empty argument.
* @example
* ```r
* foo(a=3, 2) # returns the node id of either `a` or `2`
* ```
* @see {@link FunctionArgument.getReference}
*/
getId(arg) {
if (arg !== r_function_call_1.EmptyArgument) {
return arg?.nodeId;
}
return undefined;
},
/**
* Returns the reference of a non-empty argument.
* @example
* ```r
* foo(a=3, 2) # returns the node id of either `3` or `2`, but skips a
* ```
* @see {@link FunctionArgument.getId}
*/
getReference(arg) {
if (arg === r_function_call_1.EmptyArgument) {
return undefined;
}
else if (arg.name === undefined) {
return arg.nodeId;
}
return arg.valueId;
},
/**
* Checks whether the given argument is a named argument with the specified name.
* Please note that this only checks whether the name is exactly identical and not whether
* R's argument matching resolves to the correct argument.
* For this, please refer to the {@link pMatch} function!
* @see {@link isNamed}
*/
hasName(arg, name) {
return exports.FunctionArgument.isNamed(arg) && arg.name === name;
}
};
/**
* The dataflow graph holds the dataflow information found within the given AST.
* We differentiate the directed edges in {@link EdgeType} and the vertices indicated by {@link DataflowGraphVertexArgument}.
* The helper object associated with the DFG is {@link Dataflow}.
*
* The vertices of the graph are organized in a hierarchical fashion, with a function-definition node containing the node ids of its subgraph.
* However, all *edges* are hoisted at the top level in the form of an (attributed) adjacency list.
* After the dataflow analysis, all sources and targets of the edges *must* be part of the vertices.
* However, this does not have to hold during the construction as edges may point from or to vertices which are yet to be constructed.
*
* All methods return the modified graph to allow for chaining.
* @see {@link DataflowGraph#addEdge|`addEdge`} - to add an edge to the graph
* @see {@link DataflowGraph#addVertex|`addVertex`} - to add a vertex to the graph
* @see {@link DataflowGraph#fromJson|`fromJson`} - to construct a dataflow graph object from a deserialized JSON object.
* @see {@link emptyGraph|`emptyGraph`} - to create an empty graph (useful in tests)
*/
class DataflowGraph {
_idMap;
/*
* Set of vertices which have sideEffects that we do not know anything about.
* As a (temporary) solution until we have FD edges, a side effect may also store known target links
* that have to be/should be resolved (as globals) as a separate pass before the df analysis ends.
*/
_unknownSideEffects = new Set();
constructor(idMap) {
this._idMap = idMap;
}
/** Contains the vertices of the root level graph (i.e., included those vertices from the complete graph, that are nested within function definitions) */
rootVertices = new Set();
/** All vertices in the complete graph (including those nested in function definition) */
vertexInformation = new Map();
/** All edges in the complete graph (including those nested in function definition) */
edgeInformation = new Map();
types = new Map();
toJSON() {
return {
rootVertices: Array.from(this.rootVertices),
vertexInformation: Array.from(this.vertexInformation.entries()),
edgeInformation: Array.from(this.edgeInformation.entries()).map(([id, edges]) => [id, Array.from(edges.entries())]),
_unknownSideEffects: Array.from(this._unknownSideEffects)
};
}
/**
* Get the {@link DataflowGraphVertexInfo} attached to a node as well as all outgoing edges.
* @param id - The id of the node to get
* @param includeDefinedFunctions - If true this will search function definitions as well and not just the toplevel
* @returns the node info for the given id (if it exists)
* @see #getVertex
*/
get(id, includeDefinedFunctions = true) {
// if we do not want to include function definitions, only retrieve the value if the id is part of the root vertices
const vertex = includeDefinedFunctions ? this.getVertex(id) : this.getRootVertex(id);
return vertex === undefined ? undefined : [vertex, this.outgoingEdges(id) ?? new Map()];
}
/**
* Get the {@link DataflowGraphVertexInfo} attached to a vertex.
* @param id - The id of the node to get
* @returns the node info for the given id (if it exists)
* @see #get
* @see #getRootVertex
*/
getVertex(id) {
return this.vertexInformation.get(id);
}
/**
* Get the {@link DataflowGraphVertexInfo} attached to a root-level vertex.
* @param id - The id of the node to get
* @returns the node info for the given id (if it exists)
* @see #get
* @see #getVertex
*/
getRootVertex(id) {
if (!this.rootVertices.has(id)) {
return undefined;
}
return this.vertexInformation.get(id);
}
outgoingEdges(id) {
return this.edgeInformation.get(id);
}
ingoingEdges(id) {
const edges = new Map();
for (const [source, outgoing] of this.edgeInformation.entries()) {
const o = outgoing.get(id);
if (o) {
edges.set(source, o);
}
}
return edges;
}
/**
* Given a node in the normalized AST this either:
* returns the id if the node directly exists in the DFG
* returns the ids of all vertices in the DFG that are linked to this
* returns undefined if the node is not part of the DFG and not linked to any node
*/
getLinked(nodeId) {
if (this.vertexInformation.has(nodeId)) {
return [nodeId];
}
const linked = [];
for (const [id, vtx] of this.vertexInformation) {
if (vtx.link?.origin.includes(nodeId)) {
linked.push(id);
}
}
return linked.length > 0 ? linked : undefined;
}
/** Retrieves the id-map to the normalized AST attached to the dataflow graph */
get idMap() {
return this._idMap;
}
/**
* Retrieves the set of vertices which have side effects that we do not know anything about.
*/
get unknownSideEffects() {
return this._unknownSideEffects;
}
/** Allows setting the id-map explicitly (which should only be used when, e.g., you plan to compare two dataflow graphs on the same AST-basis) */
setIdMap(idMap) {
this._idMap = idMap;
}
/**
* @param includeDefinedFunctions - If true this will iterate over function definitions as well and not just the toplevel
* @returns the ids of all toplevel vertices in the graph together with their vertex information
* @see #edges
*/
*vertices(includeDefinedFunctions) {
if (includeDefinedFunctions) {
yield* this.vertexInformation.entries();
}
else {
for (const id of this.rootVertices) {
yield [id, this.vertexInformation.get(id)];
}
}
}
*verticesOfType(type) {
const ids = this.types.get(type) ?? [];
for (const id of ids) {
yield [id, this.vertexInformation.get(id)];
}
}
vertexIdsOfType(type) {
return this.types.get(type) ?? [];
}
/**
* @returns the ids of all edges in the graph together with their edge information
* @see #vertices
*/
*edges() {
yield* this.edgeInformation.entries();
}
/**
* Returns true if the graph contains a node with the given id.
* @param id - The id to check for
* @param includeDefinedFunctions - If true this will check function definitions as well and not just the toplevel
*/
hasVertex(id, includeDefinedFunctions = true) {
return includeDefinedFunctions ? this.vertexInformation.has(id) : this.rootVertices.has(id);
}
/**
* Returns true if the root level of the graph contains a node with the given id.
*/
isRoot(id) {
return this.rootVertices.has(id);
}
rootIds() {
return this.rootVertices;
}
/**
* Adds a new vertex to the graph, for ease of use, some arguments are optional and filled automatically.
* @param vertex - The vertex to add
* @param fallbackEnv - A clean environment to use if no environment is given in the vertex
* @param asRoot - If false, this will only add the vertex but do not add it to the {@link rootIds|root vertices} of the graph.
* This is probably only of use, when you construct dataflow graphs for tests.
* @param overwrite - If true, this will overwrite the vertex if it already exists in the graph (based on the id).
* @see DataflowGraphVertexInfo
* @see DataflowGraphVertexArgument
*/
addVertex(vertex, fallbackEnv, asRoot = true, overwrite = false) {
const vid = vertex.id;
const oldVertex = this.vertexInformation.get(vid);
if (oldVertex !== undefined && !overwrite) {
return this;
}
const vtag = vertex.tag;
// keep a clone of the original environment
vertex.environment = vertex.environment ? (0, clone_1.cloneEnvironmentInformation)(vertex.environment) : (vtag === vertex_1.VertexType.FunctionDefinition || (vtag === vertex_1.VertexType.FunctionCall && !vertex.onlyBuiltin) ? fallbackEnv : undefined);
this.vertexInformation.set(vid, vertex);
const has = this.types.get(vertex.tag);
if (has) {
has.push(vid);
}
else {
this.types.set(vertex.tag, [vid]);
}
if (asRoot) {
this.rootVertices.add(vid);
}
return this;
}
addEdge(fromId, toId, type) {
if (fromId === toId) {
return this;
}
const existingFrom = this.edgeInformation.get(fromId);
const edgeInFrom = existingFrom?.get(toId);
if (edgeInFrom === undefined) {
const edge = { types: type };
if (existingFrom === undefined) {
this.edgeInformation.set(fromId, new Map([[toId, edge]]));
}
else {
existingFrom.set(toId, edge);
}
}
else {
// adding the type
edgeInFrom.types |= type;
}
return this;
}
/**
* Merges the other graph into *this* one (in-place). The return value is only for convenience.
* @param otherGraph - The graph to merge into this one
* @param mergeRootVertices - If false, this will only merge the vertices and edges but exclude the root vertices this is probably only of use
* in the context of function definitions
*/
mergeWith(otherGraph, mergeRootVertices = true) {
if (otherGraph === undefined) {
return this;
}
this.mergeVertices(otherGraph, mergeRootVertices);
for (const [type, ids] of otherGraph.types) {
const existing = this.types.get(type);
this.types.set(type, existing ? existing.concat(ids) : ids.slice());
}
this.mergeEdges(otherGraph);
return this;
}
mergeVertices(otherGraph, mergeRootVertices = true) {
// merge root ids
if (mergeRootVertices) {
for (const root of otherGraph.rootVertices) {
this.rootVertices.add(root);
}
}
for (const unknown of otherGraph.unknownSideEffects) {
this._unknownSideEffects.add(unknown);
}
for (const [id, info] of otherGraph.vertexInformation) {
const currentInfo = this.vertexInformation.get(id);
this.vertexInformation.set(id, currentInfo === undefined ? info : mergeNodeInfos(currentInfo, info));
}
}
mergeEdges(otherGraph) {
for (const [id, edges] of otherGraph.edgeInformation.entries()) {
for (const [target, edge] of edges) {
const existing = this.edgeInformation.get(id);
if (existing === undefined) {
this.edgeInformation.set(id, new Map([[target, edge]]));
}
else {
const get = existing.get(target);
if (get === undefined) {
existing.set(target, edge);
}
else {
get.types |= edge.types;
}
}
}
}
}
/**
* Marks a vertex in the graph to be a definition
* @param reference - The reference to the vertex to mark as definition
* @param sourceIds - The id of the source vertex of the def, if available
*/
setDefinitionOfVertex(reference, sourceIds) {
const vertex = this.getVertex(reference.nodeId);
(0, assert_1.guard)(vertex !== undefined, () => `node must be defined for ${JSON.stringify(reference)} to set reference`);
if (vertex.tag === vertex_1.VertexType.FunctionDefinition || vertex.tag === vertex_1.VertexType.VariableDefinition) {
vertex.cds = reference.cds;
}
else {
const oldTag = vertex.tag;
vertex.tag = vertex_1.VertexType.VariableDefinition;
if (sourceIds) {
vertex.source = sourceIds;
}
this.types.set(oldTag, (this.types.get(oldTag) ?? []).filter(id => id !== reference.nodeId));
this.types.set(vertex_1.VertexType.VariableDefinition, (this.types.get(vertex_1.VertexType.VariableDefinition) ?? []).concat([reference.nodeId]));
}
}
/**
* Marks a vertex in the graph to be a function call with the new information
* @param info - The information about the new function call node
*/
updateToFunctionCall(info) {
const infoId = info.id;
const vertex = this.getVertex(infoId);
(0, assert_1.guard)(vertex !== undefined && (vertex.tag === vertex_1.VertexType.Use || vertex.tag === vertex_1.VertexType.Value), () => `node must be a use or value node for ${JSON.stringify(info.id)} to update it to a function call but is ${vertex?.tag}`);
const previousTag = vertex.tag;
this.vertexInformation.set(infoId, { ...vertex, ...info, tag: vertex_1.VertexType.FunctionCall });
this.types.set(previousTag, (this.types.get(previousTag) ?? []).filter(id => id !== infoId));
const g = this.types.get(vertex_1.VertexType.FunctionCall);
if (g) {
g.push(infoId);
}
else {
this.types.set(vertex_1.VertexType.FunctionCall, [infoId]);
}
}
/** If you do not pass the `to` node, this will just mark the node as maybe */
addControlDependency(from, to, when) {
to = node_id_1.NodeId.normalize(to);
const vertex = this.getVertex(from);
(0, assert_1.guard)(vertex !== undefined, () => `node must be defined for ${from} to add control dependency`);
if (vertex.cds) {
for (const { id, when: cond } of vertex.cds) {
if (id === to && when !== cond) {
return this;
}
}
}
else {
vertex.cds = [];
}
vertex.cds.push({ id: to, when });
return this;
}
/** Marks the given node as having unknown side effects */
markIdForUnknownSideEffects(id, target) {
if (target) {
this._unknownSideEffects.add({
id: node_id_1.NodeId.normalize(id),
linkTo: typeof target.callName === 'string' ? { ...target, callName: new RegExp(target.callName) } : target
});
return this;
}
this._unknownSideEffects.add(node_id_1.NodeId.normalize(id));
return this;
}
/**
* Constructs a dataflow graph instance from the given JSON data and returns the result.
* This can be useful for data sent by the flowR server when analyzing it further.
* @param data - The JSON data to construct the graph from
*/
static fromJson(data) {
const graph = new DataflowGraph(undefined);
graph.rootVertices = new Set(data.rootVertices);
graph.vertexInformation = new Map(data.vertexInformation);
for (const [, vertex] of graph.vertexInformation) {
if (vertex.environment) {
vertex.environment = renvFromJson(vertex.environment);
}
}
graph.edgeInformation = new Map(data.edgeInformation.map(([id, edges]) => [id, new Map(edges)]));
for (const unknown of data._unknownSideEffects) {
graph._unknownSideEffects.add(unknown);
}
return graph;
}
}
exports.DataflowGraph = DataflowGraph;
function mergeNodeInfos(current, next) {
if (current.tag !== next.tag) {
return current;
}
else if (current.tag === vertex_1.VertexType.FunctionDefinition) {
const n = next;
current.exitPoints = (0, arrays_1.uniqueArrayMerge)(current.exitPoints, n.exitPoints);
if (n.mode && n.mode.length > 0) {
current.mode ??= [];
for (const m of n.mode) {
if (!current.mode.includes(m)) {
current.mode.push(m);
}
}
}
}
return current;
}
function envFromJson(json) {
const parent = json.parent ? envFromJson(json.parent) : undefined;
const memory = new Map();
for (const [key, value] of Object.entries(json.memory)) {
memory.set(key, value);
}
const obj = new environment_1.Environment(parent, json.builtInEnv);
obj.id = json.id;
obj.memory = memory;
return obj;
}
function renvFromJson(json) {
const current = envFromJson(json.current);
return {
current,
level: json.level
};
}
//# sourceMappingURL=graph.js.map