UNPKG

@eagleoutice/flowr

Version:

Static Dataflow Analyzer and Program Slicer for the R Programming Language

236 lines (235 loc) 11.7 kB
import type { DataflowGraphEdge, EdgeType } from './edge'; import type { DataflowInformation } from '../info'; import type { DataflowGraphVertexArgument, DataflowGraphVertexFunctionCall, DataflowGraphVertexInfo } from './vertex'; import { EmptyArgument } from '../../r-bridge/lang-4.x/ast/model/nodes/r-function-call'; import type { Identifier, IdentifierDefinition, IdentifierReference } from '../environments/identifier'; import type { NodeId } from '../../r-bridge/lang-4.x/ast/model/processing/node-id'; import type { AstIdMap } from '../../r-bridge/lang-4.x/ast/model/processing/decorate'; import type { LinkTo } from '../../queries/catalog/call-context-query/call-context-query-format'; /** * Describes the information we store per function body. * The {@link DataflowFunctionFlowInformation#exitPoints} are stored within the enclosing {@link DataflowGraphVertexFunctionDefinition} vertex. */ export type DataflowFunctionFlowInformation = Omit<DataflowInformation, 'graph' | 'exitPoints'> & { graph: Set<NodeId>; }; /** * A reference with a name, e.g. `a` and `b` in the following function call: * * ```r * foo(a = 3, b = 2) * ``` * * @see #isNamedArgument * @see PositionalFunctionArgument */ export interface NamedFunctionArgument extends IdentifierReference { readonly name: string; } /** * A reference which does not have a name, like the references to the arguments `3` and `2` in the following: * * ```r * foo(3, 2) * ``` * * @see #isPositionalArgument * @see NamedFunctionArgument */ export interface PositionalFunctionArgument extends Omit<IdentifierReference, 'name'> { readonly name?: undefined; } /** Summarizes either named (`foo(a = 3, b = 2)`), unnamed (`foo(3, 2)`), or empty (`foo(,)`) arguments within a function. */ export type FunctionArgument = NamedFunctionArgument | PositionalFunctionArgument | typeof EmptyArgument; /** * Check if the given argument is a {@link PositionalFunctionArgument}. */ export declare function isPositionalArgument(arg: FunctionArgument): arg is PositionalFunctionArgument; /** * Check if the given argument is a {@link NamedFunctionArgument}. */ export declare function isNamedArgument(arg: FunctionArgument): arg is NamedFunctionArgument; /** * Returns the reference of a non-empty argument. */ export declare function getReferenceOfArgument(arg: FunctionArgument): NodeId | undefined; /** * A reference that is enough to indicate start and end points of an edge within the dataflow graph. */ type ReferenceForEdge = Pick<IdentifierReference, 'nodeId' | 'controlDependencies'> | IdentifierDefinition; /** * Maps the edges target to the edge information */ export type OutgoingEdges<Edge extends DataflowGraphEdge = DataflowGraphEdge> = Map<NodeId, Edge>; /** * Similar to {@link OutgoingEdges}, but inverted regarding the edge direction. * In other words, it maps the source to the edge information. */ export type IngoingEdges<Edge extends DataflowGraphEdge = DataflowGraphEdge> = Map<NodeId, Edge>; /** * The structure of the serialized {@link DataflowGraph}. */ export interface DataflowGraphJson { readonly rootVertices: NodeId[]; readonly vertexInformation: [NodeId, DataflowGraphVertexInfo][]; readonly edgeInformation: [NodeId, [NodeId, DataflowGraphEdge][]][]; readonly sourced?: (string | '<inline>')[]; } /** * An unknown side effect describes something that we cannot handle correctly (in all cases). * For example, `eval` will be marked as an unknown side effect as we have no idea of how it will affect the program. * Linked side effects are used whenever we know that a call may be affected by another one in a way that we cannot * grasp from the dataflow perspective (e.g., an indirect dependency based on the currently active graphic device). */ export type UnknownSidEffect = NodeId | { id: NodeId; linkTo: LinkTo<RegExp>; }; /** * The dataflow graph holds the dataflow information found within the given AST. * We differentiate the directed edges in {@link EdgeType} and the vertices indicated by {@link DataflowGraphVertexArgument} * * The vertices of the graph are organized in a hierarchical fashion, with a function-definition node containing the node ids of its subgraph. * However, all *edges* are hoisted at the top level in the form of an (attributed) adjacency list. * After the dataflow analysis, all sources and targets of the edges *must* be part of the vertices. * However, this does not have to hold during the construction as edges may point from or to vertices which are yet to be constructed. * * All methods return the modified graph to allow for chaining. * * @see {@link DataflowGraph#addEdge|`addEdge`} - to add an edge to the graph * @see {@link DataflowGraph#addVertex|`addVertex`} - to add a vertex to the graph * @see {@link DataflowGraph#fromJson|`fromJson`} - to construct a dataflow graph object from a deserialized JSON object. * @see {@link emptyGraph|`emptyGraph`} - to create an empty graph (useful in tests) */ export declare class DataflowGraph<Vertex extends DataflowGraphVertexInfo = DataflowGraphVertexInfo, Edge extends DataflowGraphEdge = DataflowGraphEdge> { private static DEFAULT_ENVIRONMENT; private _idMap; /** all file paths included in this dfg */ private _sourced; private readonly _unknownSideEffects; constructor(idMap: AstIdMap | undefined); /** Contains the vertices of the root level graph (i.e., included those vertices from the complete graph, that are nested within function definitions) */ protected rootVertices: Set<NodeId>; /** All vertices in the complete graph (including those nested in function definition) */ private vertexInformation; /** All edges in the complete graph (including those nested in function definition) */ private edgeInformation; /** * Get the {@link DataflowGraphVertexInfo} attached to a node as well as all outgoing edges. * * @param id - The id of the node to get * @param includeDefinedFunctions - If true this will search function definitions as well and not just the toplevel * @returns the node info for the given id (if it exists) * * @see #getVertex */ get(id: NodeId, includeDefinedFunctions?: boolean): [Vertex, OutgoingEdges] | undefined; /** * Get the {@link DataflowGraphVertexInfo} attached to a vertex. * * @param id - The id of the node to get * @param includeDefinedFunctions - If true this will search function definitions as well and not just the toplevel * @returns the node info for the given id (if it exists) * * @see #get */ getVertex(id: NodeId, includeDefinedFunctions?: boolean): Vertex | undefined; outgoingEdges(id: NodeId): OutgoingEdges | undefined; ingoingEdges(id: NodeId): IngoingEdges | undefined; /** * Given a node in the normalized AST this either: * * returns the id if the node directly exists in the DFG * * returns the ids of all vertices in the DFG that are linked to this * * returns undefined if the node is not part of the DFG and not linked to any node */ getLinked(nodeId: NodeId): NodeId[] | undefined; /** Retrieves the id-map to the normalized AST attached to the dataflow graph */ get idMap(): AstIdMap | undefined; get sourced(): (string | '<inline>')[]; /** Mark this file as being part of the dfg */ addFile(source: string | '<inline>'): void; /** * Retrieves the set of vertices which have side effects that we do not know anything about. */ get unknownSideEffects(): Set<UnknownSidEffect>; /** Allows setting the id-map explicitly (which should only be used when, e.g., you plan to compare two dataflow graphs on the same AST-basis) */ setIdMap(idMap: AstIdMap): void; /** * @param includeDefinedFunctions - If true this will iterate over function definitions as well and not just the toplevel * @returns the ids of all toplevel vertices in the graph together with their vertex information * * @see #edges */ vertices(includeDefinedFunctions: boolean): IterableIterator<[NodeId, Vertex]>; /** * @returns the ids of all edges in the graph together with their edge information * * @see #vertices */ edges(): IterableIterator<[NodeId, OutgoingEdges]>; /** * Returns true if the graph contains a node with the given id. * * @param id - The id to check for * @param includeDefinedFunctions - If true this will check function definitions as well and not just the toplevel */ hasVertex(id: NodeId, includeDefinedFunctions?: boolean): boolean; /** * Returns true if the root level of the graph contains a node with the given id. */ isRoot(id: NodeId): boolean; rootIds(): ReadonlySet<NodeId>; /** * Adds a new vertex to the graph, for ease of use, some arguments are optional and filled automatically. * * @param vertex - The vertex to add * @param asRoot - If false, this will only add the vertex but do not add it to the {@link rootIds|root vertices} of the graph. * This is probably only of use, when you construct dataflow graphs for tests. * @param overwrite - If true, this will overwrite the vertex if it already exists in the graph (based on the id). * * @see DataflowGraphVertexInfo * @see DataflowGraphVertexArgument */ addVertex(vertex: DataflowGraphVertexArgument & Omit<Vertex, keyof DataflowGraphVertexArgument>, asRoot?: boolean, overwrite?: boolean): this; /** {@inheritDoc} */ addEdge(from: NodeId, to: NodeId, type: EdgeType | number): this; /** {@inheritDoc} */ addEdge(from: ReferenceForEdge, to: ReferenceForEdge, type: EdgeType | number): this; /** {@inheritDoc} */ addEdge(from: NodeId | ReferenceForEdge, to: NodeId | ReferenceForEdge, type: EdgeType | number): this; /** * Merges the other graph into *this* one (in-place). The return value is only for convenience. * * @param otherGraph - The graph to merge into this one * @param mergeRootVertices - If false, this will only merge the vertices and edges but exclude the root vertices this is probably only of use * in the context of function definitions */ mergeWith(otherGraph: DataflowGraph<Vertex, Edge> | undefined, mergeRootVertices?: boolean): this; private mergeEdges; /** * Marks a vertex in the graph to be a definition * @param reference - The reference to the vertex to mark as definition */ setDefinitionOfVertex(reference: IdentifierReference): void; /** * Marks a vertex in the graph to be a function call with the new information * @param info - The information about the new function call node */ updateToFunctionCall(info: DataflowGraphVertexFunctionCall): void; /** If you do not pass the `to` node, this will just mark the node as maybe */ addControlDependency(from: NodeId, to?: NodeId, when?: boolean): this; /** Marks the given node as having unknown side effects */ markIdForUnknownSideEffects(id: NodeId, target?: LinkTo): this; /** * Constructs a dataflow graph instance from the given JSON data and returns the result. * This can be useful for data sent by the flowR server when analyzing it further. * @param data - The JSON data to construct the graph from */ static fromJson(data: DataflowGraphJson): DataflowGraph; } export interface IEnvironmentJson { readonly id: number; parent: IEnvironmentJson; memory: Record<Identifier, IdentifierDefinition[]>; } export {};