UNPKG

@eagleoutice/flowr

Version:

Static Dataflow Analyzer and Program Slicer for the R Programming Language

515 lines 20.6 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.DataflowGraph = exports.FunctionArgument = void 0; const assert_1 = require("../../util/assert"); const vertex_1 = require("./vertex"); const arrays_1 = require("../../util/collections/arrays"); const r_function_call_1 = require("../../r-bridge/lang-4.x/ast/model/nodes/r-function-call"); const node_id_1 = require("../../r-bridge/lang-4.x/ast/model/processing/node-id"); const environment_1 = require("../environments/environment"); const clone_1 = require("../environments/clone"); /** * Helper functions to work with {@link FunctionArgument}s. * @see {@link EmptyArgument} - the marker for empty arguments */ exports.FunctionArgument = { name: 'FunctionArgument', /** * Checks whether the given argument is a positional argument. * @example * ```r * foo(b=3, 2) # the second argument is positional * ``` */ isPositional(arg) { return arg !== r_function_call_1.EmptyArgument && arg.name === undefined; }, /** * Checks whether the given argument is a named argument. * @example * ```r * foo(b=3, 2) # the first argument is named * ``` * @see {@link isPositional} * @see {@link isEmpty} * @see {@link hasName} */ isNamed(arg) { return arg !== r_function_call_1.EmptyArgument && arg.name !== undefined; }, /** * Checks whether the given argument is an unnamed argument (either positional or empty). * @example * ```r * foo(, 2) # the first argument is unnamed (empty) * foo(3, 2) # both arguments are unnamed (positional) * ``` * @see {@link isNamed} */ isUnnamed(arg) { return arg === r_function_call_1.EmptyArgument || arg.name === undefined; }, /** * Checks whether the given argument is an empty argument. * @example * ```r * foo(, 2) # the first argument is empty * ``` * @see {@link isNotEmpty} */ isEmpty(arg) { return arg === r_function_call_1.EmptyArgument; }, /** * Checks whether the given argument is not an empty argument. * @see {@link FunctionArgument.isEmpty} */ isNotEmpty(arg) { return arg !== r_function_call_1.EmptyArgument; }, /** * Returns the id of a non-empty argument. * @example * ```r * foo(a=3, 2) # returns the node id of either `a` or `2` * ``` * @see {@link FunctionArgument.getReference} */ getId(arg) { if (arg !== r_function_call_1.EmptyArgument) { return arg?.nodeId; } return undefined; }, /** * Returns the reference of a non-empty argument. * @example * ```r * foo(a=3, 2) # returns the node id of either `3` or `2`, but skips a * ``` * @see {@link FunctionArgument.getId} */ getReference(arg) { if (arg === r_function_call_1.EmptyArgument) { return undefined; } else if (arg.name === undefined) { return arg.nodeId; } return arg.valueId; }, /** * Checks whether the given argument is a named argument with the specified name. * Please note that this only checks whether the name is exactly identical and not whether * R's argument matching resolves to the correct argument. * For this, please refer to the {@link pMatch} function! * @see {@link isNamed} */ hasName(arg, name) { return exports.FunctionArgument.isNamed(arg) && arg.name === name; } }; /** * The dataflow graph holds the dataflow information found within the given AST. * We differentiate the directed edges in {@link EdgeType} and the vertices indicated by {@link DataflowGraphVertexArgument}. * The helper object associated with the DFG is {@link Dataflow}. * * The vertices of the graph are organized in a hierarchical fashion, with a function-definition node containing the node ids of its subgraph. * However, all *edges* are hoisted at the top level in the form of an (attributed) adjacency list. * After the dataflow analysis, all sources and targets of the edges *must* be part of the vertices. * However, this does not have to hold during the construction as edges may point from or to vertices which are yet to be constructed. * * All methods return the modified graph to allow for chaining. * @see {@link DataflowGraph#addEdge|`addEdge`} - to add an edge to the graph * @see {@link DataflowGraph#addVertex|`addVertex`} - to add a vertex to the graph * @see {@link DataflowGraph#fromJson|`fromJson`} - to construct a dataflow graph object from a deserialized JSON object. * @see {@link emptyGraph|`emptyGraph`} - to create an empty graph (useful in tests) */ class DataflowGraph { _idMap; /* * Set of vertices which have sideEffects that we do not know anything about. * As a (temporary) solution until we have FD edges, a side effect may also store known target links * that have to be/should be resolved (as globals) as a separate pass before the df analysis ends. */ _unknownSideEffects = new Set(); constructor(idMap) { this._idMap = idMap; } /** Contains the vertices of the root level graph (i.e., included those vertices from the complete graph, that are nested within function definitions) */ rootVertices = new Set(); /** All vertices in the complete graph (including those nested in function definition) */ vertexInformation = new Map(); /** All edges in the complete graph (including those nested in function definition) */ edgeInformation = new Map(); types = new Map(); toJSON() { return { rootVertices: Array.from(this.rootVertices), vertexInformation: Array.from(this.vertexInformation.entries()), edgeInformation: Array.from(this.edgeInformation.entries()).map(([id, edges]) => [id, Array.from(edges.entries())]), _unknownSideEffects: Array.from(this._unknownSideEffects) }; } /** * Get the {@link DataflowGraphVertexInfo} attached to a node as well as all outgoing edges. * @param id - The id of the node to get * @param includeDefinedFunctions - If true this will search function definitions as well and not just the toplevel * @returns the node info for the given id (if it exists) * @see #getVertex */ get(id, includeDefinedFunctions = true) { // if we do not want to include function definitions, only retrieve the value if the id is part of the root vertices const vertex = includeDefinedFunctions ? this.getVertex(id) : this.getRootVertex(id); return vertex === undefined ? undefined : [vertex, this.outgoingEdges(id) ?? new Map()]; } /** * Get the {@link DataflowGraphVertexInfo} attached to a vertex. * @param id - The id of the node to get * @returns the node info for the given id (if it exists) * @see #get * @see #getRootVertex */ getVertex(id) { return this.vertexInformation.get(id); } /** * Get the {@link DataflowGraphVertexInfo} attached to a root-level vertex. * @param id - The id of the node to get * @returns the node info for the given id (if it exists) * @see #get * @see #getVertex */ getRootVertex(id) { if (!this.rootVertices.has(id)) { return undefined; } return this.vertexInformation.get(id); } outgoingEdges(id) { return this.edgeInformation.get(id); } ingoingEdges(id) { const edges = new Map(); for (const [source, outgoing] of this.edgeInformation.entries()) { const o = outgoing.get(id); if (o) { edges.set(source, o); } } return edges; } /** * Given a node in the normalized AST this either: * returns the id if the node directly exists in the DFG * returns the ids of all vertices in the DFG that are linked to this * returns undefined if the node is not part of the DFG and not linked to any node */ getLinked(nodeId) { if (this.vertexInformation.has(nodeId)) { return [nodeId]; } const linked = []; for (const [id, vtx] of this.vertexInformation) { if (vtx.link?.origin.includes(nodeId)) { linked.push(id); } } return linked.length > 0 ? linked : undefined; } /** Retrieves the id-map to the normalized AST attached to the dataflow graph */ get idMap() { return this._idMap; } /** * Retrieves the set of vertices which have side effects that we do not know anything about. */ get unknownSideEffects() { return this._unknownSideEffects; } /** Allows setting the id-map explicitly (which should only be used when, e.g., you plan to compare two dataflow graphs on the same AST-basis) */ setIdMap(idMap) { this._idMap = idMap; } /** * @param includeDefinedFunctions - If true this will iterate over function definitions as well and not just the toplevel * @returns the ids of all toplevel vertices in the graph together with their vertex information * @see #edges */ *vertices(includeDefinedFunctions) { if (includeDefinedFunctions) { yield* this.vertexInformation.entries(); } else { for (const id of this.rootVertices) { yield [id, this.vertexInformation.get(id)]; } } } *verticesOfType(type) { const ids = this.types.get(type) ?? []; for (const id of ids) { yield [id, this.vertexInformation.get(id)]; } } vertexIdsOfType(type) { return this.types.get(type) ?? []; } /** * @returns the ids of all edges in the graph together with their edge information * @see #vertices */ *edges() { yield* this.edgeInformation.entries(); } /** * Returns true if the graph contains a node with the given id. * @param id - The id to check for * @param includeDefinedFunctions - If true this will check function definitions as well and not just the toplevel */ hasVertex(id, includeDefinedFunctions = true) { return includeDefinedFunctions ? this.vertexInformation.has(id) : this.rootVertices.has(id); } /** * Returns true if the root level of the graph contains a node with the given id. */ isRoot(id) { return this.rootVertices.has(id); } rootIds() { return this.rootVertices; } /** * Adds a new vertex to the graph, for ease of use, some arguments are optional and filled automatically. * @param vertex - The vertex to add * @param fallbackEnv - A clean environment to use if no environment is given in the vertex * @param asRoot - If false, this will only add the vertex but do not add it to the {@link rootIds|root vertices} of the graph. * This is probably only of use, when you construct dataflow graphs for tests. * @param overwrite - If true, this will overwrite the vertex if it already exists in the graph (based on the id). * @see DataflowGraphVertexInfo * @see DataflowGraphVertexArgument */ addVertex(vertex, fallbackEnv, asRoot = true, overwrite = false) { const vid = vertex.id; const oldVertex = this.vertexInformation.get(vid); if (oldVertex !== undefined && !overwrite) { return this; } const vtag = vertex.tag; // keep a clone of the original environment vertex.environment = vertex.environment ? (0, clone_1.cloneEnvironmentInformation)(vertex.environment) : (vtag === vertex_1.VertexType.FunctionDefinition || (vtag === vertex_1.VertexType.FunctionCall && !vertex.onlyBuiltin) ? fallbackEnv : undefined); this.vertexInformation.set(vid, vertex); const has = this.types.get(vertex.tag); if (has) { has.push(vid); } else { this.types.set(vertex.tag, [vid]); } if (asRoot) { this.rootVertices.add(vid); } return this; } addEdge(fromId, toId, type) { if (fromId === toId) { return this; } const existingFrom = this.edgeInformation.get(fromId); const edgeInFrom = existingFrom?.get(toId); if (edgeInFrom === undefined) { const edge = { types: type }; if (existingFrom === undefined) { this.edgeInformation.set(fromId, new Map([[toId, edge]])); } else { existingFrom.set(toId, edge); } } else { // adding the type edgeInFrom.types |= type; } return this; } /** * Merges the other graph into *this* one (in-place). The return value is only for convenience. * @param otherGraph - The graph to merge into this one * @param mergeRootVertices - If false, this will only merge the vertices and edges but exclude the root vertices this is probably only of use * in the context of function definitions */ mergeWith(otherGraph, mergeRootVertices = true) { if (otherGraph === undefined) { return this; } this.mergeVertices(otherGraph, mergeRootVertices); for (const [type, ids] of otherGraph.types) { const existing = this.types.get(type); this.types.set(type, existing ? existing.concat(ids) : ids.slice()); } this.mergeEdges(otherGraph); return this; } mergeVertices(otherGraph, mergeRootVertices = true) { // merge root ids if (mergeRootVertices) { for (const root of otherGraph.rootVertices) { this.rootVertices.add(root); } } for (const unknown of otherGraph.unknownSideEffects) { this._unknownSideEffects.add(unknown); } for (const [id, info] of otherGraph.vertexInformation) { const currentInfo = this.vertexInformation.get(id); this.vertexInformation.set(id, currentInfo === undefined ? info : mergeNodeInfos(currentInfo, info)); } } mergeEdges(otherGraph) { for (const [id, edges] of otherGraph.edgeInformation.entries()) { for (const [target, edge] of edges) { const existing = this.edgeInformation.get(id); if (existing === undefined) { this.edgeInformation.set(id, new Map([[target, edge]])); } else { const get = existing.get(target); if (get === undefined) { existing.set(target, edge); } else { get.types |= edge.types; } } } } } /** * Marks a vertex in the graph to be a definition * @param reference - The reference to the vertex to mark as definition * @param sourceIds - The id of the source vertex of the def, if available */ setDefinitionOfVertex(reference, sourceIds) { const vertex = this.getVertex(reference.nodeId); (0, assert_1.guard)(vertex !== undefined, () => `node must be defined for ${JSON.stringify(reference)} to set reference`); if (vertex.tag === vertex_1.VertexType.FunctionDefinition || vertex.tag === vertex_1.VertexType.VariableDefinition) { vertex.cds = reference.cds; } else { const oldTag = vertex.tag; vertex.tag = vertex_1.VertexType.VariableDefinition; if (sourceIds) { vertex.source = sourceIds; } this.types.set(oldTag, (this.types.get(oldTag) ?? []).filter(id => id !== reference.nodeId)); this.types.set(vertex_1.VertexType.VariableDefinition, (this.types.get(vertex_1.VertexType.VariableDefinition) ?? []).concat([reference.nodeId])); } } /** * Marks a vertex in the graph to be a function call with the new information * @param info - The information about the new function call node */ updateToFunctionCall(info) { const infoId = info.id; const vertex = this.getVertex(infoId); (0, assert_1.guard)(vertex !== undefined && (vertex.tag === vertex_1.VertexType.Use || vertex.tag === vertex_1.VertexType.Value), () => `node must be a use or value node for ${JSON.stringify(info.id)} to update it to a function call but is ${vertex?.tag}`); const previousTag = vertex.tag; this.vertexInformation.set(infoId, { ...vertex, ...info, tag: vertex_1.VertexType.FunctionCall }); this.types.set(previousTag, (this.types.get(previousTag) ?? []).filter(id => id !== infoId)); const g = this.types.get(vertex_1.VertexType.FunctionCall); if (g) { g.push(infoId); } else { this.types.set(vertex_1.VertexType.FunctionCall, [infoId]); } } /** If you do not pass the `to` node, this will just mark the node as maybe */ addControlDependency(from, to, when) { to = node_id_1.NodeId.normalize(to); const vertex = this.getVertex(from); (0, assert_1.guard)(vertex !== undefined, () => `node must be defined for ${from} to add control dependency`); if (vertex.cds) { for (const { id, when: cond } of vertex.cds) { if (id === to && when !== cond) { return this; } } } else { vertex.cds = []; } vertex.cds.push({ id: to, when }); return this; } /** Marks the given node as having unknown side effects */ markIdForUnknownSideEffects(id, target) { if (target) { this._unknownSideEffects.add({ id: node_id_1.NodeId.normalize(id), linkTo: typeof target.callName === 'string' ? { ...target, callName: new RegExp(target.callName) } : target }); return this; } this._unknownSideEffects.add(node_id_1.NodeId.normalize(id)); return this; } /** * Constructs a dataflow graph instance from the given JSON data and returns the result. * This can be useful for data sent by the flowR server when analyzing it further. * @param data - The JSON data to construct the graph from */ static fromJson(data) { const graph = new DataflowGraph(undefined); graph.rootVertices = new Set(data.rootVertices); graph.vertexInformation = new Map(data.vertexInformation); for (const [, vertex] of graph.vertexInformation) { if (vertex.environment) { vertex.environment = renvFromJson(vertex.environment); } } graph.edgeInformation = new Map(data.edgeInformation.map(([id, edges]) => [id, new Map(edges)])); for (const unknown of data._unknownSideEffects) { graph._unknownSideEffects.add(unknown); } return graph; } } exports.DataflowGraph = DataflowGraph; function mergeNodeInfos(current, next) { if (current.tag !== next.tag) { return current; } else if (current.tag === vertex_1.VertexType.FunctionDefinition) { const n = next; current.exitPoints = (0, arrays_1.uniqueArrayMerge)(current.exitPoints, n.exitPoints); if (n.mode && n.mode.length > 0) { current.mode ??= []; for (const m of n.mode) { if (!current.mode.includes(m)) { current.mode.push(m); } } } } return current; } function envFromJson(json) { const parent = json.parent ? envFromJson(json.parent) : undefined; const memory = new Map(); for (const [key, value] of Object.entries(json.memory)) { memory.set(key, value); } const obj = new environment_1.Environment(parent, json.builtInEnv); obj.id = json.id; obj.memory = memory; return obj; } function renvFromJson(json) { const current = envFromJson(json.current); return { current, level: json.level }; } //# sourceMappingURL=graph.js.map