@eagleoutice/flowr
Version:
Static Dataflow Analyzer and Program Slicer for the R Programming Language
410 lines • 17.8 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.DataflowGraph = void 0;
exports.isPositionalArgument = isPositionalArgument;
exports.isNamedArgument = isNamedArgument;
exports.getReferenceOfArgument = getReferenceOfArgument;
const assert_1 = require("../../util/assert");
const diff_dataflow_graph_1 = require("./diff-dataflow-graph");
const vertex_1 = require("./vertex");
const arrays_1 = require("../../util/collections/arrays");
const r_function_call_1 = require("../../r-bridge/lang-4.x/ast/model/nodes/r-function-call");
const node_id_1 = require("../../r-bridge/lang-4.x/ast/model/processing/node-id");
const environment_1 = require("../environments/environment");
const clone_1 = require("../environments/clone");
const json_1 = require("../../util/json");
const logger_1 = require("../logger");
/**
* Check if the given argument is a {@link PositionalFunctionArgument}.
*/
function isPositionalArgument(arg) {
return arg !== r_function_call_1.EmptyArgument && arg.name === undefined;
}
/**
* Check if the given argument is a {@link NamedFunctionArgument}.
*/
function isNamedArgument(arg) {
return arg !== r_function_call_1.EmptyArgument && arg.name !== undefined;
}
/**
* Returns the reference of a non-empty argument.
*/
function getReferenceOfArgument(arg) {
if (arg !== r_function_call_1.EmptyArgument) {
return arg?.nodeId;
}
return undefined;
}
/**
* The dataflow graph holds the dataflow information found within the given AST.
* We differentiate the directed edges in {@link EdgeType} and the vertices indicated by {@link DataflowGraphVertexArgument}
*
* The vertices of the graph are organized in a hierarchical fashion, with a function-definition node containing the node ids of its subgraph.
* However, all *edges* are hoisted at the top level in the form of an (attributed) adjacency list.
* After the dataflow analysis, all sources and targets of the edges *must* be part of the vertices.
* However, this does not have to hold during the construction as edges may point from or to vertices which are yet to be constructed.
*
* All methods return the modified graph to allow for chaining.
*
* @see {@link DataflowGraph#addEdge|`addEdge`} - to add an edge to the graph
* @see {@link DataflowGraph#addVertex|`addVertex`} - to add a vertex to the graph
* @see {@link DataflowGraph#fromJson|`fromJson`} - to construct a dataflow graph object from a deserialized JSON object.
* @see {@link emptyGraph|`emptyGraph`} - to create an empty graph (useful in tests)
*/
class DataflowGraph {
static DEFAULT_ENVIRONMENT = undefined;
_idMap;
/** all file paths included in this dfg */
_sourced = [];
/*
* Set of vertices which have sideEffects that we do not know anything about.
* As a (temporary) solution until we have FD edges, a side effect may also store known target links
* that have to be/should be resolved (as globals) as a separate pass before the df analysis ends.
*/
_unknownSideEffects = new Set();
constructor(idMap) {
DataflowGraph.DEFAULT_ENVIRONMENT ??= (0, environment_1.initializeCleanEnvironments)();
this._idMap = idMap;
}
/** Contains the vertices of the root level graph (i.e., included those vertices from the complete graph, that are nested within function definitions) */
rootVertices = new Set();
/** All vertices in the complete graph (including those nested in function definition) */
vertexInformation = new Map();
/** All edges in the complete graph (including those nested in function definition) */
edgeInformation = new Map();
/**
* Get the {@link DataflowGraphVertexInfo} attached to a node as well as all outgoing edges.
*
* @param id - The id of the node to get
* @param includeDefinedFunctions - If true this will search function definitions as well and not just the toplevel
* @returns the node info for the given id (if it exists)
*
* @see #getVertex
*/
get(id, includeDefinedFunctions = true) {
// if we do not want to include function definitions, only retrieve the value if the id is part of the root vertices
const vertex = this.getVertex(id, includeDefinedFunctions);
return vertex === undefined ? undefined : [vertex, this.outgoingEdges(id) ?? new Map()];
}
/**
* Get the {@link DataflowGraphVertexInfo} attached to a vertex.
*
* @param id - The id of the node to get
* @param includeDefinedFunctions - If true this will search function definitions as well and not just the toplevel
* @returns the node info for the given id (if it exists)
*
* @see #get
*/
getVertex(id, includeDefinedFunctions = true) {
return includeDefinedFunctions || this.rootVertices.has(id) ? this.vertexInformation.get(id) : undefined;
}
outgoingEdges(id) {
return this.edgeInformation.get(id);
}
ingoingEdges(id) {
const edges = new Map();
for (const [source, outgoing] of this.edgeInformation.entries()) {
if (outgoing.has(id)) {
edges.set(source, outgoing.get(id));
}
}
return edges;
}
/**
* Given a node in the normalized AST this either:
* * returns the id if the node directly exists in the DFG
* * returns the ids of all vertices in the DFG that are linked to this
* * returns undefined if the node is not part of the DFG and not linked to any node
*/
getLinked(nodeId) {
if (this.vertexInformation.has(nodeId)) {
return [nodeId];
}
const linked = [];
for (const [id, vtx] of this.vertexInformation) {
if (vtx.link?.origin.includes(nodeId)) {
linked.push(id);
}
}
return linked.length > 0 ? linked : undefined;
}
/** Retrieves the id-map to the normalized AST attached to the dataflow graph */
get idMap() {
return this._idMap;
}
get sourced() {
return this._sourced;
}
/** Mark this file as being part of the dfg */
addFile(source) {
this._sourced.push(source);
}
/**
* Retrieves the set of vertices which have side effects that we do not know anything about.
*/
get unknownSideEffects() {
return this._unknownSideEffects;
}
/** Allows setting the id-map explicitly (which should only be used when, e.g., you plan to compare two dataflow graphs on the same AST-basis) */
setIdMap(idMap) {
this._idMap = idMap;
}
/**
* @param includeDefinedFunctions - If true this will iterate over function definitions as well and not just the toplevel
* @returns the ids of all toplevel vertices in the graph together with their vertex information
*
* @see #edges
*/
*vertices(includeDefinedFunctions) {
if (includeDefinedFunctions) {
yield* this.vertexInformation.entries();
}
else {
for (const id of this.rootVertices) {
yield [id, this.vertexInformation.get(id)];
}
}
}
/**
* @returns the ids of all edges in the graph together with their edge information
*
* @see #vertices
*/
*edges() {
yield* this.edgeInformation.entries();
}
/**
* Returns true if the graph contains a node with the given id.
*
* @param id - The id to check for
* @param includeDefinedFunctions - If true this will check function definitions as well and not just the toplevel
*/
hasVertex(id, includeDefinedFunctions = true) {
return includeDefinedFunctions ? this.vertexInformation.has(id) : this.rootVertices.has(id);
}
/**
* Returns true if the root level of the graph contains a node with the given id.
*/
isRoot(id) {
return this.rootVertices.has(id);
}
rootIds() {
return this.rootVertices;
}
/**
* Adds a new vertex to the graph, for ease of use, some arguments are optional and filled automatically.
*
* @param vertex - The vertex to add
* @param asRoot - If false, this will only add the vertex but do not add it to the {@link rootIds|root vertices} of the graph.
* This is probably only of use, when you construct dataflow graphs for tests.
* @param overwrite - If true, this will overwrite the vertex if it already exists in the graph (based on the id).
*
* @see DataflowGraphVertexInfo
* @see DataflowGraphVertexArgument
*/
addVertex(vertex, asRoot = true, overwrite = false) {
const oldVertex = this.vertexInformation.get(vertex.id);
if (oldVertex !== undefined && !overwrite) {
return this;
}
const fallback = vertex.tag === vertex_1.VertexType.VariableDefinition || vertex.tag === vertex_1.VertexType.Use || vertex.tag === vertex_1.VertexType.Value || (vertex.tag === vertex_1.VertexType.FunctionCall && vertex.onlyBuiltin) ? undefined : DataflowGraph.DEFAULT_ENVIRONMENT;
// keep a clone of the original environment
const environment = vertex.environment ? (0, clone_1.cloneEnvironmentInformation)(vertex.environment) : fallback;
this.vertexInformation.set(vertex.id, {
...vertex,
environment
});
if (asRoot) {
this.rootVertices.add(vertex.id);
}
return this;
}
addEdge(from, to, type) {
const [fromId, toId] = extractEdgeIds(from, to);
if (fromId === toId) {
return this;
}
/* we now that we pass all required arguments */
const edge = { types: type };
const existingFrom = this.edgeInformation.get(fromId);
const edgeInFrom = existingFrom?.get(toId);
if (edgeInFrom === undefined) {
if (existingFrom === undefined) {
this.edgeInformation.set(fromId, new Map([[toId, edge]]));
}
else {
existingFrom.set(toId, edge);
}
}
else {
// adding the type
edgeInFrom.types |= type;
}
return this;
}
/**
* Merges the other graph into *this* one (in-place). The return value is only for convenience.
*
* @param otherGraph - The graph to merge into this one
* @param mergeRootVertices - If false, this will only merge the vertices and edges but exclude the root vertices this is probably only of use
* in the context of function definitions
*/
mergeWith(otherGraph, mergeRootVertices = true) {
if (otherGraph === undefined) {
return this;
}
// merge root ids
if (mergeRootVertices) {
for (const root of otherGraph.rootVertices) {
this.rootVertices.add(root);
}
}
this._sourced = this._sourced.concat(otherGraph.sourced);
for (const unknown of otherGraph.unknownSideEffects) {
this._unknownSideEffects.add(unknown);
}
for (const [id, info] of otherGraph.vertexInformation) {
const currentInfo = this.vertexInformation.get(id);
this.vertexInformation.set(id, currentInfo === undefined ? info : mergeNodeInfos(currentInfo, info));
}
this.mergeEdges(otherGraph);
return this;
}
mergeEdges(otherGraph) {
for (const [id, edges] of otherGraph.edgeInformation.entries()) {
for (const [target, edge] of edges) {
const existing = this.edgeInformation.get(id);
if (existing === undefined) {
this.edgeInformation.set(id, new Map([[target, edge]]));
}
else {
const get = existing.get(target);
if (get === undefined) {
existing.set(target, edge);
}
else {
get.types |= edge.types;
}
}
}
}
}
/**
* Marks a vertex in the graph to be a definition
* @param reference - The reference to the vertex to mark as definition
*/
setDefinitionOfVertex(reference) {
const vertex = this.getVertex(reference.nodeId, true);
(0, assert_1.guard)(vertex !== undefined, () => `node must be defined for ${JSON.stringify(reference)} to set reference`);
if (vertex.tag === vertex_1.VertexType.FunctionDefinition || vertex.tag === vertex_1.VertexType.VariableDefinition) {
vertex.cds = reference.controlDependencies;
}
else {
this.vertexInformation.set(reference.nodeId, { ...vertex, tag: vertex_1.VertexType.VariableDefinition });
}
}
/**
* Marks a vertex in the graph to be a function call with the new information
* @param info - The information about the new function call node
*/
updateToFunctionCall(info) {
const vertex = this.getVertex(info.id, true);
(0, assert_1.guard)(vertex !== undefined && (vertex.tag === vertex_1.VertexType.Use || vertex.tag === vertex_1.VertexType.Value), () => `node must be a use or value node for ${JSON.stringify(info.id)} to update it to a function call but is ${vertex?.tag}`);
this.vertexInformation.set(info.id, { ...vertex, ...info, tag: vertex_1.VertexType.FunctionCall });
}
/** If you do not pass the `to` node, this will just mark the node as maybe */
addControlDependency(from, to, when) {
to = to ? (0, node_id_1.normalizeIdToNumberIfPossible)(to) : undefined;
const vertex = this.getVertex(from, true);
(0, assert_1.guard)(vertex !== undefined, () => `node must be defined for ${from} to add control dependency`);
vertex.cds ??= [];
if (to) {
let hasControlDependency = false;
for (const { id, when: cond } of vertex.cds) {
if (id === to && when !== cond) {
hasControlDependency = true;
break;
}
}
if (!hasControlDependency) {
vertex.cds.push({ id: to, when });
}
}
return this;
}
/** Marks the given node as having unknown side effects */
markIdForUnknownSideEffects(id, target) {
if (target) {
this._unknownSideEffects.add({ id: (0, node_id_1.normalizeIdToNumberIfPossible)(id), linkTo: typeof target.callName === 'string' ? { ...target, callName: new RegExp(target.callName) } : target });
return this;
}
this._unknownSideEffects.add((0, node_id_1.normalizeIdToNumberIfPossible)(id));
return this;
}
/**
* Constructs a dataflow graph instance from the given JSON data and returns the result.
* This can be useful for data sent by the flowR server when analyzing it further.
* @param data - The JSON data to construct the graph from
*/
static fromJson(data) {
const graph = new DataflowGraph(undefined);
graph.rootVertices = new Set(data.rootVertices);
graph.vertexInformation = new Map(data.vertexInformation);
for (const [, vertex] of graph.vertexInformation) {
if (vertex.environment) {
vertex.environment = renvFromJson(vertex.environment);
}
}
graph.edgeInformation = new Map(data.edgeInformation.map(([id, edges]) => [id, new Map(edges)]));
if (data.sourced) {
graph._sourced = data.sourced;
}
return graph;
}
}
exports.DataflowGraph = DataflowGraph;
function mergeNodeInfos(current, next) {
if (current.tag !== next.tag) {
logger_1.dataflowLogger.warn(() => `nodes to be joined for the same id should have the same tag, but ${JSON.stringify(current, json_1.jsonReplacer)} vs. ${JSON.stringify(next, json_1.jsonReplacer)} -- we are currently not handling cases in which vertices may be either! Keeping current.`);
return current;
}
if (current.tag === vertex_1.VertexType.VariableDefinition) {
(0, assert_1.guard)(current.scope === next.scope, 'nodes to be joined for the same id must have the same scope');
}
else if (current.tag === vertex_1.VertexType.FunctionCall) {
(0, assert_1.guard)((0, diff_dataflow_graph_1.equalFunctionArguments)(current.id, current.args, next.args), 'nodes to be joined for the same id must have the same function call information');
}
else if (current.tag === vertex_1.VertexType.FunctionDefinition) {
(0, assert_1.guard)(current.scope === next.scope, 'nodes to be joined for the same id must have the same scope');
(0, assert_1.guard)((0, arrays_1.arrayEqual)(current.exitPoints, next.exitPoints), 'nodes to be joined must have same exist points');
}
return current;
}
/**
* Returns the ids of the dataflow vertices referenced by a {@link ReferenceForEdge}.
*/
function extractEdgeIds(from, to) {
const fromId = typeof from === 'object' ? from.nodeId : from;
const toId = typeof to === 'object' ? to.nodeId : to;
return [fromId, toId];
}
function envFromJson(json) {
const parent = json.parent ? envFromJson(json.parent) : undefined;
const memory = new Map();
for (const [key, value] of Object.entries(json.memory)) {
memory.set(key, value);
}
return {
id: json.id,
parent: parent,
memory
};
}
function renvFromJson(json) {
const current = envFromJson(json.current);
return {
current,
level: json.level
};
}
//# sourceMappingURL=graph.js.map