UNPKG

@eagleoutice/flowr

Version:

Static Dataflow Analyzer and Program Slicer for the R Programming Language

297 lines (296 loc) 14.5 kB
import type { DfEdge, EdgeType } from './edge'; import type { DataflowInformation } from '../info'; import { type DataflowGraphVertexArgument, type DataflowGraphVertexFunctionCall, type DataflowGraphVertexInfo } from './vertex'; import { EmptyArgument } from '../../r-bridge/lang-4.x/ast/model/nodes/r-function-call'; import type { BrandedIdentifier, IdentifierDefinition, IdentifierReference } from '../environments/identifier'; import { NodeId } from '../../r-bridge/lang-4.x/ast/model/processing/node-id'; import { type REnvironmentInformation } from '../environments/environment'; import type { AstIdMap } from '../../r-bridge/lang-4.x/ast/model/processing/decorate'; import type { LinkTo } from '../../queries/catalog/call-context-query/call-context-query-format'; /** * Describes the information we store per function body. * The {@link DataflowFunctionFlowInformation#exitPoints} are stored within the enclosing {@link DataflowGraphVertexFunctionDefinition} vertex. */ export type DataflowFunctionFlowInformation = Omit<DataflowInformation, 'graph' | 'exitPoints'> & { graph: Set<NodeId>; }; /** * A reference with a name, e.g. `a` and `b` in the following function call: * * ```r * foo(a = 3, b = 2) * ``` * @see #isNamedArgument * @see PositionalFunctionArgument */ export interface NamedFunctionArgument extends IdentifierReference { readonly name: string; readonly valueId: NodeId | undefined; } /** * A reference which does not have a name, like the references to the arguments `3` and `2` in the following: * * ```r * foo(3, 2) * ``` * @see NamedFunctionArgument */ export interface PositionalFunctionArgument extends Omit<IdentifierReference, 'name'> { readonly name?: undefined; } /** * Summarizes either named (`foo(a = 3, b = 2)`), unnamed (`foo(3, 2)`), or empty (`foo(,)`) arguments within a function. * See the {@link FunctionArgument} helper functions to check for the specific types. * @see {@link FunctionArgument.isNamed|`FunctionArgument.isNamed`} - to check for named arguments * @see {@link FunctionArgument.isPositional|`FunctionArgument.isPositional`} - to check for positional arguments * @see {@link FunctionArgument.isEmpty|`FunctionArgument.isEmpty`} - to check for empty arguments */ export type FunctionArgument = NamedFunctionArgument | PositionalFunctionArgument | typeof EmptyArgument; /** * Helper functions to work with {@link FunctionArgument}s. * @see {@link EmptyArgument} - the marker for empty arguments */ export declare const FunctionArgument: { readonly name: "FunctionArgument"; /** * Checks whether the given argument is a positional argument. * @example * ```r * foo(b=3, 2) # the second argument is positional * ``` */ readonly isPositional: (this: void, arg: FunctionArgument) => arg is PositionalFunctionArgument; /** * Checks whether the given argument is a named argument. * @example * ```r * foo(b=3, 2) # the first argument is named * ``` * @see {@link isPositional} * @see {@link isEmpty} * @see {@link hasName} */ readonly isNamed: (this: void, arg: FunctionArgument) => arg is NamedFunctionArgument; /** * Checks whether the given argument is an unnamed argument (either positional or empty). * @example * ```r * foo(, 2) # the first argument is unnamed (empty) * foo(3, 2) # both arguments are unnamed (positional) * ``` * @see {@link isNamed} */ readonly isUnnamed: (this: void, arg: FunctionArgument) => arg is PositionalFunctionArgument | typeof EmptyArgument; /** * Checks whether the given argument is an empty argument. * @example * ```r * foo(, 2) # the first argument is empty * ``` * @see {@link isNotEmpty} */ readonly isEmpty: (this: void, arg: unknown) => arg is typeof EmptyArgument; /** * Checks whether the given argument is not an empty argument. * @see {@link FunctionArgument.isEmpty} */ readonly isNotEmpty: <T>(this: void, arg: T) => arg is Exclude<T, typeof EmptyArgument>; /** * Returns the id of a non-empty argument. * @example * ```r * foo(a=3, 2) # returns the node id of either `a` or `2` * ``` * @see {@link FunctionArgument.getReference} */ readonly getId: (this: void, arg: FunctionArgument) => NodeId | undefined; /** * Returns the reference of a non-empty argument. * @example * ```r * foo(a=3, 2) # returns the node id of either `3` or `2`, but skips a * ``` * @see {@link FunctionArgument.getId} */ readonly getReference: (this: void, arg: FunctionArgument) => NodeId | undefined; /** * Checks whether the given argument is a named argument with the specified name. * Please note that this only checks whether the name is exactly identical and not whether * R's argument matching resolves to the correct argument. * For this, please refer to the {@link pMatch} function! * @see {@link isNamed} */ readonly hasName: (this: void, arg: FunctionArgument, name: string | undefined) => arg is NamedFunctionArgument; }; /** * Maps the edges target to the edge information */ export type OutgoingEdges<Edge extends DfEdge = DfEdge> = Map<NodeId, Edge>; /** * Similar to {@link OutgoingEdges}, but inverted regarding the edge direction. * In other words, it maps the source to the edge information. */ export type IngoingEdges<Edge extends DfEdge = DfEdge> = Map<NodeId, Edge>; /** * The structure of the serialized {@link DataflowGraph}. */ export interface DataflowGraphJson { readonly rootVertices: NodeId[]; readonly vertexInformation: [NodeId, DataflowGraphVertexInfo][]; readonly edgeInformation: [NodeId, [NodeId, DfEdge][]][]; readonly _unknownSideEffects: UnknownSideEffect[]; } /** * An unknown side effect describes something that we cannot handle correctly (in all cases). * For example, `load` will be marked as an unknown side effect as we have no idea of how it will affect the program. * Linked side effects are used whenever we know that a call may be affected by another one in a way that we cannot * grasp from the dataflow perspective (e.g., an indirect dependency based on the currently active graphic device). */ export type UnknownSideEffect = NodeId | { id: NodeId; linkTo: LinkTo<RegExp>; }; /** * The dataflow graph holds the dataflow information found within the given AST. * We differentiate the directed edges in {@link EdgeType} and the vertices indicated by {@link DataflowGraphVertexArgument}. * The helper object associated with the DFG is {@link Dataflow}. * * The vertices of the graph are organized in a hierarchical fashion, with a function-definition node containing the node ids of its subgraph. * However, all *edges* are hoisted at the top level in the form of an (attributed) adjacency list. * After the dataflow analysis, all sources and targets of the edges *must* be part of the vertices. * However, this does not have to hold during the construction as edges may point from or to vertices which are yet to be constructed. * * All methods return the modified graph to allow for chaining. * @see {@link DataflowGraph#addEdge|`addEdge`} - to add an edge to the graph * @see {@link DataflowGraph#addVertex|`addVertex`} - to add a vertex to the graph * @see {@link DataflowGraph#fromJson|`fromJson`} - to construct a dataflow graph object from a deserialized JSON object. * @see {@link emptyGraph|`emptyGraph`} - to create an empty graph (useful in tests) */ export declare class DataflowGraph<Vertex extends DataflowGraphVertexInfo = DataflowGraphVertexInfo, Edge extends DfEdge = DfEdge> { private _idMap; private readonly _unknownSideEffects; constructor(idMap: AstIdMap | undefined); /** Contains the vertices of the root level graph (i.e., included those vertices from the complete graph, that are nested within function definitions) */ protected rootVertices: Set<NodeId>; /** All vertices in the complete graph (including those nested in function definition) */ private vertexInformation; /** All edges in the complete graph (including those nested in function definition) */ private edgeInformation; private readonly types; toJSON(): DataflowGraphJson; /** * Get the {@link DataflowGraphVertexInfo} attached to a node as well as all outgoing edges. * @param id - The id of the node to get * @param includeDefinedFunctions - If true this will search function definitions as well and not just the toplevel * @returns the node info for the given id (if it exists) * @see #getVertex */ get(id: NodeId, includeDefinedFunctions?: boolean): [Vertex, OutgoingEdges] | undefined; /** * Get the {@link DataflowGraphVertexInfo} attached to a vertex. * @param id - The id of the node to get * @returns the node info for the given id (if it exists) * @see #get * @see #getRootVertex */ getVertex(id: NodeId): Vertex | undefined; /** * Get the {@link DataflowGraphVertexInfo} attached to a root-level vertex. * @param id - The id of the node to get * @returns the node info for the given id (if it exists) * @see #get * @see #getVertex */ getRootVertex(id: NodeId): Vertex | undefined; outgoingEdges(id: NodeId): OutgoingEdges | undefined; ingoingEdges(id: NodeId): IngoingEdges | undefined; /** * Given a node in the normalized AST this either: * returns the id if the node directly exists in the DFG * returns the ids of all vertices in the DFG that are linked to this * returns undefined if the node is not part of the DFG and not linked to any node */ getLinked(nodeId: NodeId): NodeId[] | undefined; /** Retrieves the id-map to the normalized AST attached to the dataflow graph */ get idMap(): AstIdMap | undefined; /** * Retrieves the set of vertices which have side effects that we do not know anything about. */ get unknownSideEffects(): Set<UnknownSideEffect>; /** Allows setting the id-map explicitly (which should only be used when, e.g., you plan to compare two dataflow graphs on the same AST-basis) */ setIdMap(idMap: AstIdMap): void; /** * @param includeDefinedFunctions - If true this will iterate over function definitions as well and not just the toplevel * @returns the ids of all toplevel vertices in the graph together with their vertex information * @see #edges */ vertices(includeDefinedFunctions: boolean): MapIterator<[NodeId, Vertex]>; verticesOfType<T extends Vertex['tag']>(type: T): MapIterator<[NodeId, Vertex & { tag: T; }]>; vertexIdsOfType<T extends Vertex['tag']>(type: T): NodeId[]; /** * @returns the ids of all edges in the graph together with their edge information * @see #vertices */ edges(): MapIterator<[NodeId, OutgoingEdges]>; /** * Returns true if the graph contains a node with the given id. * @param id - The id to check for * @param includeDefinedFunctions - If true this will check function definitions as well and not just the toplevel */ hasVertex(id: NodeId, includeDefinedFunctions?: boolean): boolean; /** * Returns true if the root level of the graph contains a node with the given id. */ isRoot(id: NodeId): boolean; rootIds(): ReadonlySet<NodeId>; /** * Adds a new vertex to the graph, for ease of use, some arguments are optional and filled automatically. * @param vertex - The vertex to add * @param fallbackEnv - A clean environment to use if no environment is given in the vertex * @param asRoot - If false, this will only add the vertex but do not add it to the {@link rootIds|root vertices} of the graph. * This is probably only of use, when you construct dataflow graphs for tests. * @param overwrite - If true, this will overwrite the vertex if it already exists in the graph (based on the id). * @see DataflowGraphVertexInfo * @see DataflowGraphVertexArgument */ addVertex(vertex: DataflowGraphVertexArgument & Omit<Vertex, keyof DataflowGraphVertexArgument>, fallbackEnv: REnvironmentInformation, asRoot?: boolean, overwrite?: boolean): this; addEdge(fromId: NodeId, toId: NodeId, type: EdgeType | number): this; /** * Merges the other graph into *this* one (in-place). The return value is only for convenience. * @param otherGraph - The graph to merge into this one * @param mergeRootVertices - If false, this will only merge the vertices and edges but exclude the root vertices this is probably only of use * in the context of function definitions */ mergeWith(otherGraph: DataflowGraph<Vertex, Edge> | undefined, mergeRootVertices?: boolean): this; mergeVertices(otherGraph: DataflowGraph<Vertex, Edge>, mergeRootVertices?: boolean): void; private mergeEdges; /** * Marks a vertex in the graph to be a definition * @param reference - The reference to the vertex to mark as definition * @param sourceIds - The id of the source vertex of the def, if available */ setDefinitionOfVertex(reference: IdentifierReference, sourceIds: readonly NodeId[] | undefined): void; /** * Marks a vertex in the graph to be a function call with the new information * @param info - The information about the new function call node */ updateToFunctionCall(info: DataflowGraphVertexFunctionCall): void; /** If you do not pass the `to` node, this will just mark the node as maybe */ addControlDependency(from: NodeId, to: NodeId, when?: boolean): this; /** Marks the given node as having unknown side effects */ markIdForUnknownSideEffects(id: NodeId, target?: LinkTo<RegExp | string>): this; /** * Constructs a dataflow graph instance from the given JSON data and returns the result. * This can be useful for data sent by the flowR server when analyzing it further. * @param data - The JSON data to construct the graph from */ static fromJson(data: DataflowGraphJson): DataflowGraph; } export interface IEnvironmentJson { readonly id: number; parent: IEnvironmentJson; memory: Record<BrandedIdentifier, IdentifierDefinition[]>; builtInEnv: true | undefined; }