@eagleoutice/flowr
Version:
Static Dataflow Analyzer and Program Slicer for the R Programming Language
236 lines (235 loc) • 11.7 kB
TypeScript
import type { DataflowGraphEdge, EdgeType } from './edge';
import type { DataflowInformation } from '../info';
import type { DataflowGraphVertexArgument, DataflowGraphVertexFunctionCall, DataflowGraphVertexInfo } from './vertex';
import { EmptyArgument } from '../../r-bridge/lang-4.x/ast/model/nodes/r-function-call';
import type { Identifier, IdentifierDefinition, IdentifierReference } from '../environments/identifier';
import type { NodeId } from '../../r-bridge/lang-4.x/ast/model/processing/node-id';
import type { AstIdMap } from '../../r-bridge/lang-4.x/ast/model/processing/decorate';
import type { LinkTo } from '../../queries/catalog/call-context-query/call-context-query-format';
/**
* Describes the information we store per function body.
* The {@link DataflowFunctionFlowInformation#exitPoints} are stored within the enclosing {@link DataflowGraphVertexFunctionDefinition} vertex.
*/
export type DataflowFunctionFlowInformation = Omit<DataflowInformation, 'graph' | 'exitPoints'> & {
graph: Set<NodeId>;
};
/**
* A reference with a name, e.g. `a` and `b` in the following function call:
*
* ```r
* foo(a = 3, b = 2)
* ```
*
* @see #isNamedArgument
* @see PositionalFunctionArgument
*/
export interface NamedFunctionArgument extends IdentifierReference {
readonly name: string;
}
/**
* A reference which does not have a name, like the references to the arguments `3` and `2` in the following:
*
* ```r
* foo(3, 2)
* ```
*
* @see #isPositionalArgument
* @see NamedFunctionArgument
*/
export interface PositionalFunctionArgument extends Omit<IdentifierReference, 'name'> {
readonly name?: undefined;
}
/** Summarizes either named (`foo(a = 3, b = 2)`), unnamed (`foo(3, 2)`), or empty (`foo(,)`) arguments within a function. */
export type FunctionArgument = NamedFunctionArgument | PositionalFunctionArgument | typeof EmptyArgument;
/**
* Check if the given argument is a {@link PositionalFunctionArgument}.
*/
export declare function isPositionalArgument(arg: FunctionArgument): arg is PositionalFunctionArgument;
/**
* Check if the given argument is a {@link NamedFunctionArgument}.
*/
export declare function isNamedArgument(arg: FunctionArgument): arg is NamedFunctionArgument;
/**
* Returns the reference of a non-empty argument.
*/
export declare function getReferenceOfArgument(arg: FunctionArgument): NodeId | undefined;
/**
* A reference that is enough to indicate start and end points of an edge within the dataflow graph.
*/
type ReferenceForEdge = Pick<IdentifierReference, 'nodeId' | 'controlDependencies'> | IdentifierDefinition;
/**
* Maps the edges target to the edge information
*/
export type OutgoingEdges<Edge extends DataflowGraphEdge = DataflowGraphEdge> = Map<NodeId, Edge>;
/**
* Similar to {@link OutgoingEdges}, but inverted regarding the edge direction.
* In other words, it maps the source to the edge information.
*/
export type IngoingEdges<Edge extends DataflowGraphEdge = DataflowGraphEdge> = Map<NodeId, Edge>;
/**
* The structure of the serialized {@link DataflowGraph}.
*/
export interface DataflowGraphJson {
readonly rootVertices: NodeId[];
readonly vertexInformation: [NodeId, DataflowGraphVertexInfo][];
readonly edgeInformation: [NodeId, [NodeId, DataflowGraphEdge][]][];
readonly sourced?: (string | '<inline>')[];
}
/**
* An unknown side effect describes something that we cannot handle correctly (in all cases).
* For example, `eval` will be marked as an unknown side effect as we have no idea of how it will affect the program.
* Linked side effects are used whenever we know that a call may be affected by another one in a way that we cannot
* grasp from the dataflow perspective (e.g., an indirect dependency based on the currently active graphic device).
*/
export type UnknownSidEffect = NodeId | {
id: NodeId;
linkTo: LinkTo<RegExp>;
};
/**
* The dataflow graph holds the dataflow information found within the given AST.
* We differentiate the directed edges in {@link EdgeType} and the vertices indicated by {@link DataflowGraphVertexArgument}
*
* The vertices of the graph are organized in a hierarchical fashion, with a function-definition node containing the node ids of its subgraph.
* However, all *edges* are hoisted at the top level in the form of an (attributed) adjacency list.
* After the dataflow analysis, all sources and targets of the edges *must* be part of the vertices.
* However, this does not have to hold during the construction as edges may point from or to vertices which are yet to be constructed.
*
* All methods return the modified graph to allow for chaining.
*
* @see {@link DataflowGraph#addEdge|`addEdge`} - to add an edge to the graph
* @see {@link DataflowGraph#addVertex|`addVertex`} - to add a vertex to the graph
* @see {@link DataflowGraph#fromJson|`fromJson`} - to construct a dataflow graph object from a deserialized JSON object.
* @see {@link emptyGraph|`emptyGraph`} - to create an empty graph (useful in tests)
*/
export declare class DataflowGraph<Vertex extends DataflowGraphVertexInfo = DataflowGraphVertexInfo, Edge extends DataflowGraphEdge = DataflowGraphEdge> {
private static DEFAULT_ENVIRONMENT;
private _idMap;
/** all file paths included in this dfg */
private _sourced;
private readonly _unknownSideEffects;
constructor(idMap: AstIdMap | undefined);
/** Contains the vertices of the root level graph (i.e., included those vertices from the complete graph, that are nested within function definitions) */
protected rootVertices: Set<NodeId>;
/** All vertices in the complete graph (including those nested in function definition) */
private vertexInformation;
/** All edges in the complete graph (including those nested in function definition) */
private edgeInformation;
/**
* Get the {@link DataflowGraphVertexInfo} attached to a node as well as all outgoing edges.
*
* @param id - The id of the node to get
* @param includeDefinedFunctions - If true this will search function definitions as well and not just the toplevel
* @returns the node info for the given id (if it exists)
*
* @see #getVertex
*/
get(id: NodeId, includeDefinedFunctions?: boolean): [Vertex, OutgoingEdges] | undefined;
/**
* Get the {@link DataflowGraphVertexInfo} attached to a vertex.
*
* @param id - The id of the node to get
* @param includeDefinedFunctions - If true this will search function definitions as well and not just the toplevel
* @returns the node info for the given id (if it exists)
*
* @see #get
*/
getVertex(id: NodeId, includeDefinedFunctions?: boolean): Vertex | undefined;
outgoingEdges(id: NodeId): OutgoingEdges | undefined;
ingoingEdges(id: NodeId): IngoingEdges | undefined;
/**
* Given a node in the normalized AST this either:
* * returns the id if the node directly exists in the DFG
* * returns the ids of all vertices in the DFG that are linked to this
* * returns undefined if the node is not part of the DFG and not linked to any node
*/
getLinked(nodeId: NodeId): NodeId[] | undefined;
/** Retrieves the id-map to the normalized AST attached to the dataflow graph */
get idMap(): AstIdMap | undefined;
get sourced(): (string | '<inline>')[];
/** Mark this file as being part of the dfg */
addFile(source: string | '<inline>'): void;
/**
* Retrieves the set of vertices which have side effects that we do not know anything about.
*/
get unknownSideEffects(): Set<UnknownSidEffect>;
/** Allows setting the id-map explicitly (which should only be used when, e.g., you plan to compare two dataflow graphs on the same AST-basis) */
setIdMap(idMap: AstIdMap): void;
/**
* @param includeDefinedFunctions - If true this will iterate over function definitions as well and not just the toplevel
* @returns the ids of all toplevel vertices in the graph together with their vertex information
*
* @see #edges
*/
vertices(includeDefinedFunctions: boolean): IterableIterator<[NodeId, Vertex]>;
/**
* @returns the ids of all edges in the graph together with their edge information
*
* @see #vertices
*/
edges(): IterableIterator<[NodeId, OutgoingEdges]>;
/**
* Returns true if the graph contains a node with the given id.
*
* @param id - The id to check for
* @param includeDefinedFunctions - If true this will check function definitions as well and not just the toplevel
*/
hasVertex(id: NodeId, includeDefinedFunctions?: boolean): boolean;
/**
* Returns true if the root level of the graph contains a node with the given id.
*/
isRoot(id: NodeId): boolean;
rootIds(): ReadonlySet<NodeId>;
/**
* Adds a new vertex to the graph, for ease of use, some arguments are optional and filled automatically.
*
* @param vertex - The vertex to add
* @param asRoot - If false, this will only add the vertex but do not add it to the {@link rootIds|root vertices} of the graph.
* This is probably only of use, when you construct dataflow graphs for tests.
* @param overwrite - If true, this will overwrite the vertex if it already exists in the graph (based on the id).
*
* @see DataflowGraphVertexInfo
* @see DataflowGraphVertexArgument
*/
addVertex(vertex: DataflowGraphVertexArgument & Omit<Vertex, keyof DataflowGraphVertexArgument>, asRoot?: boolean, overwrite?: boolean): this;
/** {@inheritDoc} */
addEdge(from: NodeId, to: NodeId, type: EdgeType | number): this;
/** {@inheritDoc} */
addEdge(from: ReferenceForEdge, to: ReferenceForEdge, type: EdgeType | number): this;
/** {@inheritDoc} */
addEdge(from: NodeId | ReferenceForEdge, to: NodeId | ReferenceForEdge, type: EdgeType | number): this;
/**
* Merges the other graph into *this* one (in-place). The return value is only for convenience.
*
* @param otherGraph - The graph to merge into this one
* @param mergeRootVertices - If false, this will only merge the vertices and edges but exclude the root vertices this is probably only of use
* in the context of function definitions
*/
mergeWith(otherGraph: DataflowGraph<Vertex, Edge> | undefined, mergeRootVertices?: boolean): this;
private mergeEdges;
/**
* Marks a vertex in the graph to be a definition
* @param reference - The reference to the vertex to mark as definition
*/
setDefinitionOfVertex(reference: IdentifierReference): void;
/**
* Marks a vertex in the graph to be a function call with the new information
* @param info - The information about the new function call node
*/
updateToFunctionCall(info: DataflowGraphVertexFunctionCall): void;
/** If you do not pass the `to` node, this will just mark the node as maybe */
addControlDependency(from: NodeId, to?: NodeId, when?: boolean): this;
/** Marks the given node as having unknown side effects */
markIdForUnknownSideEffects(id: NodeId, target?: LinkTo): this;
/**
* Constructs a dataflow graph instance from the given JSON data and returns the result.
* This can be useful for data sent by the flowR server when analyzing it further.
* @param data - The JSON data to construct the graph from
*/
static fromJson(data: DataflowGraphJson): DataflowGraph;
}
export interface IEnvironmentJson {
readonly id: number;
parent: IEnvironmentJson;
memory: Record<Identifier, IdentifierDefinition[]>;
}
export {};