@eagleoutice/flowr
Version:
Static Dataflow Analyzer and Program Slicer for the R Programming Language
462 lines (461 loc) • 20.7 kB
TypeScript
import type { CfgExpressionVertex, CfgStatementVertex, ControlFlowInformation } from './control-flow-graph';
import type { DataflowCfgGuidedVisitorConfiguration } from './dfg-cfg-guided-visitor';
import { DataflowAwareCfgGuidedVisitor } from './dfg-cfg-guided-visitor';
import type { NormalizedAst, ParentInformation } from '../r-bridge/lang-4.x/ast/model/processing/decorate';
import type { SyntaxCfgGuidedVisitorConfiguration } from './syntax-cfg-guided-visitor';
import type { NodeId } from '../r-bridge/lang-4.x/ast/model/processing/node-id';
import type { Origin } from '../dataflow/origin/dfg-get-origin';
import type { DataflowGraphVertexFunctionCall, DataflowGraphVertexFunctionDefinition, DataflowGraphVertexUse, DataflowGraphVertexValue, DataflowGraphVertexVariableDefinition } from '../dataflow/graph/vertex';
import type { RString } from '../r-bridge/lang-4.x/ast/model/nodes/r-string';
import type { RNumber } from '../r-bridge/lang-4.x/ast/model/nodes/r-number';
import type { RLogical } from '../r-bridge/lang-4.x/ast/model/nodes/r-logical';
import type { DataflowGraph, FunctionArgument } from '../dataflow/graph/graph';
import type { NoInfo, RNode } from '../r-bridge/lang-4.x/ast/model/model';
import type { RSymbol } from '../r-bridge/lang-4.x/ast/model/nodes/r-symbol';
import type { BuiltInProcessorMapper } from '../dataflow/environments/built-in';
import type { RExpressionList } from '../r-bridge/lang-4.x/ast/model/nodes/r-expression-list';
export interface SemanticCfgGuidedVisitorConfiguration<OtherInfo = NoInfo, ControlFlow extends ControlFlowInformation = ControlFlowInformation, Ast extends NormalizedAst<OtherInfo> = NormalizedAst<OtherInfo>, Dfg extends DataflowGraph = DataflowGraph> extends DataflowCfgGuidedVisitorConfiguration<ControlFlow, Dfg>, SyntaxCfgGuidedVisitorConfiguration<OtherInfo, ControlFlow, Ast> {
}
/**
* This visitor extends on the {@link DataflowAwareCfgGuidedVisitor} by dispatching visitors for separate function calls as well,
* providing more information!
* In a way, this is the mixin of syntactic and dataflow guided visitation.
*
* Overwrite the functions starting with `on` to implement your logic.
* In general, there is just one special case that you need to be aware of:
*
* In the context of a function call, flowR may be unsure to which origin the call relates!
* Consider the following example:
*
* ```r
* if(u) foo <- library else foo <- rm
* foo(x)
* ```
*
* Obtaining the origins of the call to `foo` will return both built-in functions `library` and `rm`.
* The general semantic visitor cannot decide on how to combine these cases,
* and it is up to your overload of {@link SemanticCfgGuidedVisitor#onDispatchFunctionCallOrigins|onDispatchFunctionCallOrigins}
* to decide how to handle this.
*
* Use {@link BasicCfgGuidedVisitor#start} to start the traversal.
*/
export declare class SemanticCfgGuidedVisitor<OtherInfo = NoInfo, ControlFlow extends ControlFlowInformation = ControlFlowInformation, Ast extends NormalizedAst<OtherInfo> = NormalizedAst<OtherInfo>, Dfg extends DataflowGraph = DataflowGraph, Config extends SemanticCfgGuidedVisitorConfiguration<OtherInfo, ControlFlow, Ast, Dfg> = SemanticCfgGuidedVisitorConfiguration<OtherInfo, ControlFlow, Ast, Dfg>> extends DataflowAwareCfgGuidedVisitor<ControlFlow, Dfg, Config> {
/**
* A helper function to get the normalized AST node for the given id or fail if it does not exist.
*/
protected getNormalizedAst(id: NodeId): RNode<OtherInfo & ParentInformation> | undefined;
/**
* See {@link DataflowAwareCfgGuidedVisitor#visitValue} for the base implementation.
* This now dispatches the value to the appropriate event handler based on its type.
*/
protected visitValue(val: DataflowGraphVertexValue): void;
/**
* See {@link DataflowAwareCfgGuidedVisitor#visitVariableUse} for the base implementation.
*
* This function is called for every use of a variable in the program and dispatches the appropriate event.
* You probably do not have to overwrite it and just use {@link SemanticCfgGuidedVisitor#onVariableUse|`onVariableUse`} instead.
*
* @protected
*/
protected visitVariableUse(vertex: DataflowGraphVertexUse): void;
/**
* See {@link DataflowAwareCfgGuidedVisitor#visitVariableDefinition} for the base implementation.
*
* This function is called for every variable definition in the program and dispatches the appropriate event.
* You probably do not have to overwrite it and just use {@link SemanticCfgGuidedVisitor#onVariableDefinition|`onVariableDefinition`} instead.
*
* @protected
*/
protected visitVariableDefinition(vertex: DataflowGraphVertexVariableDefinition): void;
/**
* See {@link DataflowAwareCfgGuidedVisitor#visitFunctionDefinition} for the base implementation.
*
* This function is called for every function definition in the program and dispatches the appropriate event.
* You probably do not have to overwrite it and just use {@link SemanticCfgGuidedVisitor#onFunctionDefinition|`onFunctionDefinition`} instead.
*
* @protected
*/
protected visitFunctionDefinition(vertex: DataflowGraphVertexFunctionDefinition): void;
/**
* See {@link DataflowAwareCfgGuidedVisitor#visitFunctionCall} for the base implementation.
*
* This function is called for every function call in the program and dispatches the appropriate event.
* You probably do not have to overwrite it and just use {@link SemanticCfgGuidedVisitor#onUnnamedCall|`onUnnamedCall`} for anonymous calls,
* or {@link SemanticCfgGuidedVisitor#onDispatchFunctionCallOrigins|`onDispatchFunctionCallOrigins`} for named calls (or just overwrite
* the events you are interested in directly).
*
* @protected
*/
protected visitFunctionCall(vertex: DataflowGraphVertexFunctionCall): void;
/**
* See {@link DataflowAwareCfgGuidedVisitor#visitUnknown} for the base implementation.
* This function is called for every unknown vertex in the program.
* It dispatches the appropriate event based on the type of the vertex.
* In case you have to overwrite this function please make sure to still call this implementation to get a correctly working {@link SemanticCfgGuidedVisitor#onProgram|`onProgram`}.
*
* @protected
*/
protected visitUnknown(vertex: CfgStatementVertex | CfgExpressionVertex): void;
/**
* Given a function call that has multiple targets (e.g., two potential built-in definitions).
* This function is responsible for calling {@link onDispatchFunctionCallOrigin} for each of the origins,
* and aggregating their results (which is just additive by default).
* If you want to change the behavior in case of multiple potential function definition targets, simply overwrite this function
* with the logic you desire.
*
* @protected
*/
protected onDispatchFunctionCallOrigins(call: DataflowGraphVertexFunctionCall, origins: readonly string[]): void;
/**
* This function is responsible for dispatching the appropriate event
* based on a given dataflow vertex. The default serves as a backend
* for the event functions, but you may overwrite and extend this function at will.
*
* @see {@link onDispatchFunctionCallOrigins} for the aggregation in case the function call target is ambiguous.
*
* @protected
*/
protected onDispatchFunctionCallOrigin(call: DataflowGraphVertexFunctionCall, origin: keyof typeof BuiltInProcessorMapper | string): void;
/**
* This event is called for the root program node, i.e., the program that is being analyzed.
*
* @protected
*/
protected onProgram(_data: RExpressionList<OtherInfo>): void;
/**
* A helper function to request the {@link getOriginInDfg|origins} of the given node.
*/
protected getOrigins(id: NodeId): Origin[] | undefined;
/** Called for every occurrence of a `NULL` in the program. */
protected onNullConstant(_data: {
vertex: DataflowGraphVertexValue;
node: RSymbol<OtherInfo & ParentInformation, 'NULL'>;
}): void;
/**
* Called for every constant string value in the program.
*
* For example, `"Hello World"` in `print("Hello World")`.
*/
protected onStringConstant(_data: {
vertex: DataflowGraphVertexValue;
node: RString;
}): void;
/**
* Called for every constant number value in the program.
*
* For example, `42` in `print(42)`.
*/
protected onNumberConstant(_data: {
vertex: DataflowGraphVertexValue;
node: RNumber;
}): void;
/**
* Called for every constant logical value in the program.
*
* For example, `TRUE` in `if(TRUE) { ... }`.
*/
protected onLogicalConstant(_data: {
vertex: DataflowGraphVertexValue;
node: RLogical;
}): void;
/**
* Called for every variable that is read within the program.
* You can use {@link getOrigins} to get the origins of the variable.
*
* For example, `x` in `print(x)`.
*/
protected onVariableUse(_data: {
vertex: DataflowGraphVertexUse;
}): void;
/**
* Called for every variable that is written within the program.
* You can use {@link getOrigins} to get the origins of the variable.
*
* For example, `x` in `x <- 42` or `x` in `assign("x", 42)`.
* See {@link SemanticCfgGuidedVisitor#onAssignmentCall} for the assignment call. This event handler also provides you with information on the source.
*/
protected onVariableDefinition(_data: {
vertex: DataflowGraphVertexVariableDefinition;
}): void;
/**
* Called for every anonymous function definition.
*
* For example, `function(x) { x + 1 }` in `lapply(1:10, function(x) { x + 1 })`.
*/
protected onFunctionDefinition(_data: {
vertex: DataflowGraphVertexFunctionDefinition;
parameters?: readonly NodeId[];
}): void;
/**
* This event triggers for every anonymous call within the program.
*
* For example, `(function(x) { x + 1 })(42)` or the second call in `a()()`.
*
* This is separate from {@link SemanticCfgGuidedVisitor#onDefaultFunctionCall|`onDefaultFunctionCall`} which is used for named function calls that do not trigger any of these events.
* The main differentiation for these calls is that you may not infer their semantics from any name alone and probably _have_
* to rely on {@link SemanticCfgGuidedVisitor#getOrigins|`getOrigins`} to get more information.
*
* @protected
*/
protected onUnnamedCall(_data: {
vertex: DataflowGraphVertexFunctionCall;
}): void;
/**
* This event triggers for every function call that is not handled by a specific overload,
* and hence may be a function that targets a user-defined function. In a way, these are functions that are named,
* but flowR does not specifically care about them (currently) wrt. to their dataflow impact.
*
* Use {@link SemanticCfgGuidedVisitor#getOrigins|`getOrigins`} to get the origins of the call.
*
* For example, this triggers for `foo(x)` in
*
* ```r
* foo <- function(x) { x + 1 }
* foo(x)
* ```
*
* This explicitly will not trigger for scenarios in which the function has no name (i.e., if it is anonymous).
* For such cases, you may rely on the {@link SemanticCfgGuidedVisitor#onUnnamedCall|`onUnnamedCall`} event.
* The main reason for this separation is part of flowR's handling of these functions, as anonymous calls cannot be resolved using the active environment.
*
* @protected
*/
protected onDefaultFunctionCall(_data: {
call: DataflowGraphVertexFunctionCall;
}): void;
/**
* This event triggers for every call to the `eval` function.
*
* For example, `eval` in `eval(parse(text = "x + 1"))`.
*
* More specifically, this relates to the corresponding {@link BuiltInProcessorMapper} handler.
*
* @protected
*/
protected onEvalFunctionCall(_data: {
call: DataflowGraphVertexFunctionCall;
}): void;
/**
* This event triggers for every call to any of the `*apply` functions.
*
* For example, `lapply` in `lapply(1:10, function(x) { x + 1 })`.
*
* More specifically, this relates to the corresponding {@link BuiltInProcessorMapper} handler.
*
* @protected
*/
protected onApplyFunctionCall(_data: {
call: DataflowGraphVertexFunctionCall;
}): void;
/**
* This event triggers for every expression list - implicit or explicit, _but_ not for the root program (see {@link SemanticCfgGuidedVisitor#onProgram|`onProgram`} for that).
*
* For example, this triggers for the expression list created by `{` and `}` in `ìf (TRUE) { x <- 1; y <- 2; }`. But also for the implicit
* expression list `x <- x + 1` in `for(x in 1:10) x <- x + 1`.
*
* @protected
*/
protected onExpressionList(_data: {
call: DataflowGraphVertexFunctionCall;
}): void;
/**
* This event triggers for every call to the `source` function.
*
* For example, `source` in `source("script.R")`.
*
* By default, this does not provide the resolved source file. Yet you can access the {@link DataflowGraph} to ask for sourced files.
*
* More specifically, this relates to the corresponding {@link BuiltInProcessorMapper} handler.
*
* @protected
*/
protected onSourceCall(_data: {
call: DataflowGraphVertexFunctionCall;
}): void;
/**
* This event triggers for every subsetting call, i.e., for every call to `[[`, `[`, or `$`.
*
* @protected
*/
protected onAccessCall(_data: {
call: DataflowGraphVertexFunctionCall;
}): void;
/**
* This event triggers for every call to the `if` function, which is used to implement the `if-then-else` control flow.
*
* @protected
*/
protected onIfThenElseCall(_data: {
call: DataflowGraphVertexFunctionCall;
condition: NodeId | undefined;
then: NodeId | undefined;
else: NodeId | undefined;
}): void;
/**
* This event triggers for every call to the `get` function, which is used to access variables in the global environment.
*
* For example, `get` in `get("x")`.
*
* Please be aware, that with flowR resolving the `get` during the dataflow analysis,
* this may very well trigger a {@link SemanticCfgGuidedVisitor#onVariableUse|`onVariableUse`} event as well.
*
* @protected
*/
protected onGetCall(_data: {
call: DataflowGraphVertexFunctionCall;
}): void;
/**
* This event triggers for every call to the `rm` function, which is used to remove variables from the environment.
*
* For example, `rm` in `rm(x)`.
*
* @protected
*/
protected onRmCall(_data: {
call: DataflowGraphVertexFunctionCall;
}): void;
/**
* This event triggers for every call to a function which loads a library.
*
* For example, `library` in `library(dplyr)`.
*
* More specifically, this relates to the corresponding {@link BuiltInProcessorMapper} handler.
*
* @protected
*/
protected onLibraryCall(_data: {
call: DataflowGraphVertexFunctionCall;
}): void;
/**
* This event triggers for every assignment call, i.e., for every call to `<-` or `=` that assigns a value to a variable.
*
* For example, this triggers for `<-` in `x <- 42` or `assign` in `assign("x", 42)`.
* This also triggers for the `data.table` assign `:=` active within subsetting calls, e.g., `DT[, x := 42]`.
*
* Please be aware that replacements (e.g. assignments with a function call on the target side) like `names(x) <- 3` are subject to {@link SemanticCfgGuidedVisitor#onReplacementCall|`onReplacementCall`} instead.
* @protected
*/
protected onAssignmentCall(_data: {
call: DataflowGraphVertexFunctionCall;
target?: NodeId;
source?: NodeId;
}): void;
/**
* This event triggers for every call to a special binary operator, i.e., every binary function call that starts and ends with a `%` sign.
*
* For example, this triggers for`%in%` in `x %in% y`.
*
* More specifically, this relates to the corresponding {@link BuiltInProcessorMapper} handler.
*
* @protected
*/
protected onSpecialBinaryOpCall(_data: {
call: DataflowGraphVertexFunctionCall;
lhs?: FunctionArgument;
rhs?: FunctionArgument;
}): void;
/**
* This event triggers for every call to R's pipe operator, i.e., for every call to `|>`.
*
* @protected
*/
protected onPipeCall(_data: {
call: DataflowGraphVertexFunctionCall;
lhs?: FunctionArgument;
rhs?: FunctionArgument;
}): void;
/**
* This event triggers for every call to the `quote` function, which is used to quote expressions.
*
* For example, `quote` in `quote(x + 1)`.
*
* More specifically, this relates to the corresponding {@link BuiltInProcessorMapper} handler.
*
* @protected
*/
protected onQuoteCall(_data: {
call: DataflowGraphVertexFunctionCall;
}): void;
/**
* This event triggers for every call to the `for` loop function, which is used to implement the `for` loop control flow.
*
* For example, this triggers for `for` in `for(i in 1:10) { print(i) }`.
*
* More specifically, this relates to the corresponding {@link BuiltInProcessorMapper} handler.
*
* @protected
*/
protected onForLoopCall(_data: {
call: DataflowGraphVertexFunctionCall;
variable: FunctionArgument;
vector: FunctionArgument;
body: FunctionArgument;
}): void;
/**
* This event triggers for every call to the `while` loop function, which is used to implement the `while` loop control flow.
*
* For example, this triggers for `while` in `while(i < 10) { i <- i + 1 }`.
*
* More specifically, this relates to the corresponding {@link BuiltInProcessorMapper} handler.
*
* @protected
*/
protected onWhileLoopCall(_data: {
call: DataflowGraphVertexFunctionCall;
condition: FunctionArgument;
body: FunctionArgument;
}): void;
/**
* This event triggers for every call to the `repeat` loop function, which is used to implement the `repeat` loop control flow.
*
* For example, this triggers for `repeat` in `repeat { i <- i + 1; if(i >= 10) break }`.
*
* More specifically, this relates to the corresponding {@link BuiltInProcessorMapper} handler.
*
* @protected
*/
protected onRepeatLoopCall(_data: {
call: DataflowGraphVertexFunctionCall;
body: FunctionArgument;
}): void;
/**
* This event triggers for every call to a function that replaces a value in a container, such as `names(x) <- 3`.
*
* This is different from {@link SemanticCfgGuidedVisitor#onAssignmentCall|`onAssignmentCall`} in that it does not assign a value to a variable,
* but rather replaces a value in a container.
*
* For example, this triggers for `names` in `names(x) <- 3`, but not for `x <- 3`.
*
* More specifically, this relates to the corresponding {@link BuiltInProcessorMapper} handler.
* @protected
*/
protected onReplacementCall(_data: {
call: DataflowGraphVertexFunctionCall;
source: NodeId | undefined;
target: NodeId | undefined;
}): void;
/**
* This event triggers for every call that (to the knowledge of flowr) constructs a (new) list.
*
* For example, this triggers for `list` in `list(1, 2, 3)`.
*
* More specifically, this relates to the corresponding {@link BuiltInProcessorMapper} handler.
*
* @protected
*/
protected onListCall(_data: {
call: DataflowGraphVertexFunctionCall;
}): void;
/**
* This event triggers for every call that (to the knowledge of flowr) constructs a (new) vector.
*
* For example, this triggers for `c` in `c(1, 2, 3)`.
*
* More specifically, this relates to the corresponding {@link BuiltInProcessorMapper} handler.
*
* @protected
*/
protected onVectorCall(_data: {
call: DataflowGraphVertexFunctionCall;
}): void;
}