UNPKG

@eagleoutice/flowr

Version:

Static Dataflow Analyzer and Program Slicer for the R Programming Language

github.com/flowr-analysis/flowr

flowr-analysis/flowr

410 lines • 17.8 kB

JavaScript

"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.DataflowGraph = void 0; exports.isPositionalArgument = isPositionalArgument; exports.isNamedArgument = isNamedArgument; exports.getReferenceOfArgument = getReferenceOfArgument; const assert_1 = require("../../util/assert"); const diff_dataflow_graph_1 = require("./diff-dataflow-graph"); const vertex_1 = require("./vertex"); const arrays_1 = require("../../util/collections/arrays"); const r_function_call_1 = require("../../r-bridge/lang-4.x/ast/model/nodes/r-function-call"); const node_id_1 = require("../../r-bridge/lang-4.x/ast/model/processing/node-id"); const environment_1 = require("../environments/environment"); const clone_1 = require("../environments/clone"); const json_1 = require("../../util/json"); const logger_1 = require("../logger"); /** * Check if the given argument is a {@link PositionalFunctionArgument}. */ function isPositionalArgument(arg) { return arg !== r_function_call_1.EmptyArgument && arg.name === undefined; } /** * Check if the given argument is a {@link NamedFunctionArgument}. */ function isNamedArgument(arg) { return arg !== r_function_call_1.EmptyArgument && arg.name !== undefined; } /** * Returns the reference of a non-empty argument. */ function getReferenceOfArgument(arg) { if (arg !== r_function_call_1.EmptyArgument) { return arg?.nodeId; } return undefined; } /** * The dataflow graph holds the dataflow information found within the given AST. * We differentiate the directed edges in {@link EdgeType} and the vertices indicated by {@link DataflowGraphVertexArgument} * * The vertices of the graph are organized in a hierarchical fashion, with a function-definition node containing the node ids of its subgraph. * However, all *edges* are hoisted at the top level in the form of an (attributed) adjacency list. * After the dataflow analysis, all sources and targets of the edges *must* be part of the vertices. * However, this does not have to hold during the construction as edges may point from or to vertices which are yet to be constructed. * * All methods return the modified graph to allow for chaining. * * @see {@link DataflowGraph#addEdge|`addEdge`} - to add an edge to the graph * @see {@link DataflowGraph#addVertex|`addVertex`} - to add a vertex to the graph * @see {@link DataflowGraph#fromJson|`fromJson`} - to construct a dataflow graph object from a deserialized JSON object. * @see {@link emptyGraph|`emptyGraph`} - to create an empty graph (useful in tests) */ class DataflowGraph { static DEFAULT_ENVIRONMENT = undefined; _idMap; /** all file paths included in this dfg */ _sourced = []; /* * Set of vertices which have sideEffects that we do not know anything about. * As a (temporary) solution until we have FD edges, a side effect may also store known target links * that have to be/should be resolved (as globals) as a separate pass before the df analysis ends. */ _unknownSideEffects = new Set(); constructor(idMap) { DataflowGraph.DEFAULT_ENVIRONMENT ??= (0, environment_1.initializeCleanEnvironments)(); this._idMap = idMap; } /** Contains the vertices of the root level graph (i.e., included those vertices from the complete graph, that are nested within function definitions) */ rootVertices = new Set(); /** All vertices in the complete graph (including those nested in function definition) */ vertexInformation = new Map(); /** All edges in the complete graph (including those nested in function definition) */ edgeInformation = new Map(); /** * Get the {@link DataflowGraphVertexInfo} attached to a node as well as all outgoing edges. * * @param id - The id of the node to get * @param includeDefinedFunctions - If true this will search function definitions as well and not just the toplevel * @returns the node info for the given id (if it exists) * * @see #getVertex */ get(id, includeDefinedFunctions = true) { // if we do not want to include function definitions, only retrieve the value if the id is part of the root vertices const vertex = this.getVertex(id, includeDefinedFunctions); return vertex === undefined ? undefined : [vertex, this.outgoingEdges(id) ?? new Map()]; } /** * Get the {@link DataflowGraphVertexInfo} attached to a vertex. * * @param id - The id of the node to get * @param includeDefinedFunctions - If true this will search function definitions as well and not just the toplevel * @returns the node info for the given id (if it exists) * * @see #get */ getVertex(id, includeDefinedFunctions = true) { return includeDefinedFunctions || this.rootVertices.has(id) ? this.vertexInformation.get(id) : undefined; } outgoingEdges(id) { return this.edgeInformation.get(id); } ingoingEdges(id) { const edges = new Map(); for (const [source, outgoing] of this.edgeInformation.entries()) { if (outgoing.has(id)) { edges.set(source, outgoing.get(id)); } } return edges; } /** * Given a node in the normalized AST this either: * * returns the id if the node directly exists in the DFG * * returns the ids of all vertices in the DFG that are linked to this * * returns undefined if the node is not part of the DFG and not linked to any node */ getLinked(nodeId) { if (this.vertexInformation.has(nodeId)) { return [nodeId]; } const linked = []; for (const [id, vtx] of this.vertexInformation) { if (vtx.link?.origin.includes(nodeId)) { linked.push(id); } } return linked.length > 0 ? linked : undefined; } /** Retrieves the id-map to the normalized AST attached to the dataflow graph */ get idMap() { return this._idMap; } get sourced() { return this._sourced; } /** Mark this file as being part of the dfg */ addFile(source) { this._sourced.push(source); } /** * Retrieves the set of vertices which have side effects that we do not know anything about. */ get unknownSideEffects() { return this._unknownSideEffects; } /** Allows setting the id-map explicitly (which should only be used when, e.g., you plan to compare two dataflow graphs on the same AST-basis) */ setIdMap(idMap) { this._idMap = idMap; } /** * @param includeDefinedFunctions - If true this will iterate over function definitions as well and not just the toplevel * @returns the ids of all toplevel vertices in the graph together with their vertex information * * @see #edges */ *vertices(includeDefinedFunctions) { if (includeDefinedFunctions) { yield* this.vertexInformation.entries(); } else { for (const id of this.rootVertices) { yield [id, this.vertexInformation.get(id)]; } } } /** * @returns the ids of all edges in the graph together with their edge information * * @see #vertices */ *edges() { yield* this.edgeInformation.entries(); } /** * Returns true if the graph contains a node with the given id. * * @param id - The id to check for * @param includeDefinedFunctions - If true this will check function definitions as well and not just the toplevel */ hasVertex(id, includeDefinedFunctions = true) { return includeDefinedFunctions ? this.vertexInformation.has(id) : this.rootVertices.has(id); } /** * Returns true if the root level of the graph contains a node with the given id. */ isRoot(id) { return this.rootVertices.has(id); } rootIds() { return this.rootVertices; } /** * Adds a new vertex to the graph, for ease of use, some arguments are optional and filled automatically. * * @param vertex - The vertex to add * @param asRoot - If false, this will only add the vertex but do not add it to the {@link rootIds|root vertices} of the graph. * This is probably only of use, when you construct dataflow graphs for tests. * @param overwrite - If true, this will overwrite the vertex if it already exists in the graph (based on the id). * * @see DataflowGraphVertexInfo * @see DataflowGraphVertexArgument */ addVertex(vertex, asRoot = true, overwrite = false) { const oldVertex = this.vertexInformation.get(vertex.id); if (oldVertex !== undefined && !overwrite) { return this; } const fallback = vertex.tag === vertex_1.VertexType.VariableDefinition || vertex.tag === vertex_1.VertexType.Use || vertex.tag === vertex_1.VertexType.Value || (vertex.tag === vertex_1.VertexType.FunctionCall && vertex.onlyBuiltin) ? undefined : DataflowGraph.DEFAULT_ENVIRONMENT; // keep a clone of the original environment const environment = vertex.environment ? (0, clone_1.cloneEnvironmentInformation)(vertex.environment) : fallback; this.vertexInformation.set(vertex.id, { ...vertex, environment }); if (asRoot) { this.rootVertices.add(vertex.id); } return this; } addEdge(from, to, type) { const [fromId, toId] = extractEdgeIds(from, to); if (fromId === toId) { return this; } /* we now that we pass all required arguments */ const edge = { types: type }; const existingFrom = this.edgeInformation.get(fromId); const edgeInFrom = existingFrom?.get(toId); if (edgeInFrom === undefined) { if (existingFrom === undefined) { this.edgeInformation.set(fromId, new Map([[toId, edge]])); } else { existingFrom.set(toId, edge); } } else { // adding the type edgeInFrom.types |= type; } return this; } /** * Merges the other graph into *this* one (in-place). The return value is only for convenience. * * @param otherGraph - The graph to merge into this one * @param mergeRootVertices - If false, this will only merge the vertices and edges but exclude the root vertices this is probably only of use * in the context of function definitions */ mergeWith(otherGraph, mergeRootVertices = true) { if (otherGraph === undefined) { return this; } // merge root ids if (mergeRootVertices) { for (const root of otherGraph.rootVertices) { this.rootVertices.add(root); } } this._sourced = this._sourced.concat(otherGraph.sourced); for (const unknown of otherGraph.unknownSideEffects) { this._unknownSideEffects.add(unknown); } for (const [id, info] of otherGraph.vertexInformation) { const currentInfo = this.vertexInformation.get(id); this.vertexInformation.set(id, currentInfo === undefined ? info : mergeNodeInfos(currentInfo, info)); } this.mergeEdges(otherGraph); return this; } mergeEdges(otherGraph) { for (const [id, edges] of otherGraph.edgeInformation.entries()) { for (const [target, edge] of edges) { const existing = this.edgeInformation.get(id); if (existing === undefined) { this.edgeInformation.set(id, new Map([[target, edge]])); } else { const get = existing.get(target); if (get === undefined) { existing.set(target, edge); } else { get.types |= edge.types; } } } } } /** * Marks a vertex in the graph to be a definition * @param reference - The reference to the vertex to mark as definition */ setDefinitionOfVertex(reference) { const vertex = this.getVertex(reference.nodeId, true); (0, assert_1.guard)(vertex !== undefined, () => `node must be defined for ${JSON.stringify(reference)} to set reference`); if (vertex.tag === vertex_1.VertexType.FunctionDefinition || vertex.tag === vertex_1.VertexType.VariableDefinition) { vertex.cds = reference.controlDependencies; } else { this.vertexInformation.set(reference.nodeId, { ...vertex, tag: vertex_1.VertexType.VariableDefinition }); } } /** * Marks a vertex in the graph to be a function call with the new information * @param info - The information about the new function call node */ updateToFunctionCall(info) { const vertex = this.getVertex(info.id, true); (0, assert_1.guard)(vertex !== undefined && (vertex.tag === vertex_1.VertexType.Use || vertex.tag === vertex_1.VertexType.Value), () => `node must be a use or value node for ${JSON.stringify(info.id)} to update it to a function call but is ${vertex?.tag}`); this.vertexInformation.set(info.id, { ...vertex, ...info, tag: vertex_1.VertexType.FunctionCall }); } /** If you do not pass the `to` node, this will just mark the node as maybe */ addControlDependency(from, to, when) { to = to ? (0, node_id_1.normalizeIdToNumberIfPossible)(to) : undefined; const vertex = this.getVertex(from, true); (0, assert_1.guard)(vertex !== undefined, () => `node must be defined for ${from} to add control dependency`); vertex.cds ??= []; if (to) { let hasControlDependency = false; for (const { id, when: cond } of vertex.cds) { if (id === to && when !== cond) { hasControlDependency = true; break; } } if (!hasControlDependency) { vertex.cds.push({ id: to, when }); } } return this; } /** Marks the given node as having unknown side effects */ markIdForUnknownSideEffects(id, target) { if (target) { this._unknownSideEffects.add({ id: (0, node_id_1.normalizeIdToNumberIfPossible)(id), linkTo: typeof target.callName === 'string' ? { ...target, callName: new RegExp(target.callName) } : target }); return this; } this._unknownSideEffects.add((0, node_id_1.normalizeIdToNumberIfPossible)(id)); return this; } /** * Constructs a dataflow graph instance from the given JSON data and returns the result. * This can be useful for data sent by the flowR server when analyzing it further. * @param data - The JSON data to construct the graph from */ static fromJson(data) { const graph = new DataflowGraph(undefined); graph.rootVertices = new Set(data.rootVertices); graph.vertexInformation = new Map(data.vertexInformation); for (const [, vertex] of graph.vertexInformation) { if (vertex.environment) { vertex.environment = renvFromJson(vertex.environment); } } graph.edgeInformation = new Map(data.edgeInformation.map(([id, edges]) => [id, new Map(edges)])); if (data.sourced) { graph._sourced = data.sourced; } return graph; } } exports.DataflowGraph = DataflowGraph; function mergeNodeInfos(current, next) { if (current.tag !== next.tag) { logger_1.dataflowLogger.warn(() => `nodes to be joined for the same id should have the same tag, but ${JSON.stringify(current, json_1.jsonReplacer)} vs. ${JSON.stringify(next, json_1.jsonReplacer)} -- we are currently not handling cases in which vertices may be either! Keeping current.`); return current; } if (current.tag === vertex_1.VertexType.VariableDefinition) { (0, assert_1.guard)(current.scope === next.scope, 'nodes to be joined for the same id must have the same scope'); } else if (current.tag === vertex_1.VertexType.FunctionCall) { (0, assert_1.guard)((0, diff_dataflow_graph_1.equalFunctionArguments)(current.id, current.args, next.args), 'nodes to be joined for the same id must have the same function call information'); } else if (current.tag === vertex_1.VertexType.FunctionDefinition) { (0, assert_1.guard)(current.scope === next.scope, 'nodes to be joined for the same id must have the same scope'); (0, assert_1.guard)((0, arrays_1.arrayEqual)(current.exitPoints, next.exitPoints), 'nodes to be joined must have same exist points'); } return current; } /** * Returns the ids of the dataflow vertices referenced by a {@link ReferenceForEdge}. */ function extractEdgeIds(from, to) { const fromId = typeof from === 'object' ? from.nodeId : from; const toId = typeof to === 'object' ? to.nodeId : to; return [fromId, toId]; } function envFromJson(json) { const parent = json.parent ? envFromJson(json.parent) : undefined; const memory = new Map(); for (const [key, value] of Object.entries(json.memory)) { memory.set(key, value); } return { id: json.id, parent: parent, memory }; } function renvFromJson(json) { const current = envFromJson(json.current); return { current, level: json.level }; } //# sourceMappingURL=graph.js.map