@eagleoutice/flowr
Version:
Static Dataflow Analyzer and Program Slicer for the R Programming Language
297 lines (296 loc) • 14.5 kB
TypeScript
import type { DfEdge, EdgeType } from './edge';
import type { DataflowInformation } from '../info';
import { type DataflowGraphVertexArgument, type DataflowGraphVertexFunctionCall, type DataflowGraphVertexInfo } from './vertex';
import { EmptyArgument } from '../../r-bridge/lang-4.x/ast/model/nodes/r-function-call';
import type { BrandedIdentifier, IdentifierDefinition, IdentifierReference } from '../environments/identifier';
import { NodeId } from '../../r-bridge/lang-4.x/ast/model/processing/node-id';
import { type REnvironmentInformation } from '../environments/environment';
import type { AstIdMap } from '../../r-bridge/lang-4.x/ast/model/processing/decorate';
import type { LinkTo } from '../../queries/catalog/call-context-query/call-context-query-format';
/**
* Describes the information we store per function body.
* The {@link DataflowFunctionFlowInformation#exitPoints} are stored within the enclosing {@link DataflowGraphVertexFunctionDefinition} vertex.
*/
export type DataflowFunctionFlowInformation = Omit<DataflowInformation, 'graph' | 'exitPoints'> & {
graph: Set<NodeId>;
};
/**
* A reference with a name, e.g. `a` and `b` in the following function call:
*
* ```r
* foo(a = 3, b = 2)
* ```
* @see #isNamedArgument
* @see PositionalFunctionArgument
*/
export interface NamedFunctionArgument extends IdentifierReference {
readonly name: string;
readonly valueId: NodeId | undefined;
}
/**
* A reference which does not have a name, like the references to the arguments `3` and `2` in the following:
*
* ```r
* foo(3, 2)
* ```
* @see NamedFunctionArgument
*/
export interface PositionalFunctionArgument extends Omit<IdentifierReference, 'name'> {
readonly name?: undefined;
}
/**
* Summarizes either named (`foo(a = 3, b = 2)`), unnamed (`foo(3, 2)`), or empty (`foo(,)`) arguments within a function.
* See the {@link FunctionArgument} helper functions to check for the specific types.
* @see {@link FunctionArgument.isNamed|`FunctionArgument.isNamed`} - to check for named arguments
* @see {@link FunctionArgument.isPositional|`FunctionArgument.isPositional`} - to check for positional arguments
* @see {@link FunctionArgument.isEmpty|`FunctionArgument.isEmpty`} - to check for empty arguments
*/
export type FunctionArgument = NamedFunctionArgument | PositionalFunctionArgument | typeof EmptyArgument;
/**
* Helper functions to work with {@link FunctionArgument}s.
* @see {@link EmptyArgument} - the marker for empty arguments
*/
export declare const FunctionArgument: {
readonly name: "FunctionArgument";
/**
* Checks whether the given argument is a positional argument.
* @example
* ```r
* foo(b=3, 2) # the second argument is positional
* ```
*/
readonly isPositional: (this: void, arg: FunctionArgument) => arg is PositionalFunctionArgument;
/**
* Checks whether the given argument is a named argument.
* @example
* ```r
* foo(b=3, 2) # the first argument is named
* ```
* @see {@link isPositional}
* @see {@link isEmpty}
* @see {@link hasName}
*/
readonly isNamed: (this: void, arg: FunctionArgument) => arg is NamedFunctionArgument;
/**
* Checks whether the given argument is an unnamed argument (either positional or empty).
* @example
* ```r
* foo(, 2) # the first argument is unnamed (empty)
* foo(3, 2) # both arguments are unnamed (positional)
* ```
* @see {@link isNamed}
*/
readonly isUnnamed: (this: void, arg: FunctionArgument) => arg is PositionalFunctionArgument | typeof EmptyArgument;
/**
* Checks whether the given argument is an empty argument.
* @example
* ```r
* foo(, 2) # the first argument is empty
* ```
* @see {@link isNotEmpty}
*/
readonly isEmpty: (this: void, arg: unknown) => arg is typeof EmptyArgument;
/**
* Checks whether the given argument is not an empty argument.
* @see {@link FunctionArgument.isEmpty}
*/
readonly isNotEmpty: <T>(this: void, arg: T) => arg is Exclude<T, typeof EmptyArgument>;
/**
* Returns the id of a non-empty argument.
* @example
* ```r
* foo(a=3, 2) # returns the node id of either `a` or `2`
* ```
* @see {@link FunctionArgument.getReference}
*/
readonly getId: (this: void, arg: FunctionArgument) => NodeId | undefined;
/**
* Returns the reference of a non-empty argument.
* @example
* ```r
* foo(a=3, 2) # returns the node id of either `3` or `2`, but skips a
* ```
* @see {@link FunctionArgument.getId}
*/
readonly getReference: (this: void, arg: FunctionArgument) => NodeId | undefined;
/**
* Checks whether the given argument is a named argument with the specified name.
* Please note that this only checks whether the name is exactly identical and not whether
* R's argument matching resolves to the correct argument.
* For this, please refer to the {@link pMatch} function!
* @see {@link isNamed}
*/
readonly hasName: (this: void, arg: FunctionArgument, name: string | undefined) => arg is NamedFunctionArgument;
};
/**
* Maps the edges target to the edge information
*/
export type OutgoingEdges<Edge extends DfEdge = DfEdge> = Map<NodeId, Edge>;
/**
* Similar to {@link OutgoingEdges}, but inverted regarding the edge direction.
* In other words, it maps the source to the edge information.
*/
export type IngoingEdges<Edge extends DfEdge = DfEdge> = Map<NodeId, Edge>;
/**
* The structure of the serialized {@link DataflowGraph}.
*/
export interface DataflowGraphJson {
readonly rootVertices: NodeId[];
readonly vertexInformation: [NodeId, DataflowGraphVertexInfo][];
readonly edgeInformation: [NodeId, [NodeId, DfEdge][]][];
readonly _unknownSideEffects: UnknownSideEffect[];
}
/**
* An unknown side effect describes something that we cannot handle correctly (in all cases).
* For example, `load` will be marked as an unknown side effect as we have no idea of how it will affect the program.
* Linked side effects are used whenever we know that a call may be affected by another one in a way that we cannot
* grasp from the dataflow perspective (e.g., an indirect dependency based on the currently active graphic device).
*/
export type UnknownSideEffect = NodeId | {
id: NodeId;
linkTo: LinkTo<RegExp>;
};
/**
* The dataflow graph holds the dataflow information found within the given AST.
* We differentiate the directed edges in {@link EdgeType} and the vertices indicated by {@link DataflowGraphVertexArgument}.
* The helper object associated with the DFG is {@link Dataflow}.
*
* The vertices of the graph are organized in a hierarchical fashion, with a function-definition node containing the node ids of its subgraph.
* However, all *edges* are hoisted at the top level in the form of an (attributed) adjacency list.
* After the dataflow analysis, all sources and targets of the edges *must* be part of the vertices.
* However, this does not have to hold during the construction as edges may point from or to vertices which are yet to be constructed.
*
* All methods return the modified graph to allow for chaining.
* @see {@link DataflowGraph#addEdge|`addEdge`} - to add an edge to the graph
* @see {@link DataflowGraph#addVertex|`addVertex`} - to add a vertex to the graph
* @see {@link DataflowGraph#fromJson|`fromJson`} - to construct a dataflow graph object from a deserialized JSON object.
* @see {@link emptyGraph|`emptyGraph`} - to create an empty graph (useful in tests)
*/
export declare class DataflowGraph<Vertex extends DataflowGraphVertexInfo = DataflowGraphVertexInfo, Edge extends DfEdge = DfEdge> {
private _idMap;
private readonly _unknownSideEffects;
constructor(idMap: AstIdMap | undefined);
/** Contains the vertices of the root level graph (i.e., included those vertices from the complete graph, that are nested within function definitions) */
protected rootVertices: Set<NodeId>;
/** All vertices in the complete graph (including those nested in function definition) */
private vertexInformation;
/** All edges in the complete graph (including those nested in function definition) */
private edgeInformation;
private readonly types;
toJSON(): DataflowGraphJson;
/**
* Get the {@link DataflowGraphVertexInfo} attached to a node as well as all outgoing edges.
* @param id - The id of the node to get
* @param includeDefinedFunctions - If true this will search function definitions as well and not just the toplevel
* @returns the node info for the given id (if it exists)
* @see #getVertex
*/
get(id: NodeId, includeDefinedFunctions?: boolean): [Vertex, OutgoingEdges] | undefined;
/**
* Get the {@link DataflowGraphVertexInfo} attached to a vertex.
* @param id - The id of the node to get
* @returns the node info for the given id (if it exists)
* @see #get
* @see #getRootVertex
*/
getVertex(id: NodeId): Vertex | undefined;
/**
* Get the {@link DataflowGraphVertexInfo} attached to a root-level vertex.
* @param id - The id of the node to get
* @returns the node info for the given id (if it exists)
* @see #get
* @see #getVertex
*/
getRootVertex(id: NodeId): Vertex | undefined;
outgoingEdges(id: NodeId): OutgoingEdges | undefined;
ingoingEdges(id: NodeId): IngoingEdges | undefined;
/**
* Given a node in the normalized AST this either:
* returns the id if the node directly exists in the DFG
* returns the ids of all vertices in the DFG that are linked to this
* returns undefined if the node is not part of the DFG and not linked to any node
*/
getLinked(nodeId: NodeId): NodeId[] | undefined;
/** Retrieves the id-map to the normalized AST attached to the dataflow graph */
get idMap(): AstIdMap | undefined;
/**
* Retrieves the set of vertices which have side effects that we do not know anything about.
*/
get unknownSideEffects(): Set<UnknownSideEffect>;
/** Allows setting the id-map explicitly (which should only be used when, e.g., you plan to compare two dataflow graphs on the same AST-basis) */
setIdMap(idMap: AstIdMap): void;
/**
* @param includeDefinedFunctions - If true this will iterate over function definitions as well and not just the toplevel
* @returns the ids of all toplevel vertices in the graph together with their vertex information
* @see #edges
*/
vertices(includeDefinedFunctions: boolean): MapIterator<[NodeId, Vertex]>;
verticesOfType<T extends Vertex['tag']>(type: T): MapIterator<[NodeId, Vertex & {
tag: T;
}]>;
vertexIdsOfType<T extends Vertex['tag']>(type: T): NodeId[];
/**
* @returns the ids of all edges in the graph together with their edge information
* @see #vertices
*/
edges(): MapIterator<[NodeId, OutgoingEdges]>;
/**
* Returns true if the graph contains a node with the given id.
* @param id - The id to check for
* @param includeDefinedFunctions - If true this will check function definitions as well and not just the toplevel
*/
hasVertex(id: NodeId, includeDefinedFunctions?: boolean): boolean;
/**
* Returns true if the root level of the graph contains a node with the given id.
*/
isRoot(id: NodeId): boolean;
rootIds(): ReadonlySet<NodeId>;
/**
* Adds a new vertex to the graph, for ease of use, some arguments are optional and filled automatically.
* @param vertex - The vertex to add
* @param fallbackEnv - A clean environment to use if no environment is given in the vertex
* @param asRoot - If false, this will only add the vertex but do not add it to the {@link rootIds|root vertices} of the graph.
* This is probably only of use, when you construct dataflow graphs for tests.
* @param overwrite - If true, this will overwrite the vertex if it already exists in the graph (based on the id).
* @see DataflowGraphVertexInfo
* @see DataflowGraphVertexArgument
*/
addVertex(vertex: DataflowGraphVertexArgument & Omit<Vertex, keyof DataflowGraphVertexArgument>, fallbackEnv: REnvironmentInformation, asRoot?: boolean, overwrite?: boolean): this;
addEdge(fromId: NodeId, toId: NodeId, type: EdgeType | number): this;
/**
* Merges the other graph into *this* one (in-place). The return value is only for convenience.
* @param otherGraph - The graph to merge into this one
* @param mergeRootVertices - If false, this will only merge the vertices and edges but exclude the root vertices this is probably only of use
* in the context of function definitions
*/
mergeWith(otherGraph: DataflowGraph<Vertex, Edge> | undefined, mergeRootVertices?: boolean): this;
mergeVertices(otherGraph: DataflowGraph<Vertex, Edge>, mergeRootVertices?: boolean): void;
private mergeEdges;
/**
* Marks a vertex in the graph to be a definition
* @param reference - The reference to the vertex to mark as definition
* @param sourceIds - The id of the source vertex of the def, if available
*/
setDefinitionOfVertex(reference: IdentifierReference, sourceIds: readonly NodeId[] | undefined): void;
/**
* Marks a vertex in the graph to be a function call with the new information
* @param info - The information about the new function call node
*/
updateToFunctionCall(info: DataflowGraphVertexFunctionCall): void;
/** If you do not pass the `to` node, this will just mark the node as maybe */
addControlDependency(from: NodeId, to: NodeId, when?: boolean): this;
/** Marks the given node as having unknown side effects */
markIdForUnknownSideEffects(id: NodeId, target?: LinkTo<RegExp | string>): this;
/**
* Constructs a dataflow graph instance from the given JSON data and returns the result.
* This can be useful for data sent by the flowR server when analyzing it further.
* @param data - The JSON data to construct the graph from
*/
static fromJson(data: DataflowGraphJson): DataflowGraph;
}
export interface IEnvironmentJson {
readonly id: number;
parent: IEnvironmentJson;
memory: Record<BrandedIdentifier, IdentifierDefinition[]>;
builtInEnv: true | undefined;
}