@eagleoutice/flowr
Version:
Static Dataflow Analyzer and Program Slicer for the R Programming Language
525 lines • 23.1 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.SemanticCfgGuidedVisitor = void 0;
const dfg_cfg_guided_visitor_1 = require("./dfg-cfg-guided-visitor");
const dfg_get_origin_1 = require("../dataflow/origin/dfg-get-origin");
const type_1 = require("../r-bridge/lang-4.x/ast/model/type");
const edge_1 = require("../dataflow/graph/edge");
const assert_1 = require("../util/assert");
const r_function_call_1 = require("../r-bridge/lang-4.x/ast/model/nodes/r-function-call");
/**
* This visitor extends on the {@link DataflowAwareCfgGuidedVisitor} by dispatching visitors for separate function calls as well,
* providing more information!
* In a way, this is the mixin of syntactic and dataflow guided visitation.
*
* Overwrite the functions starting with `on` to implement your logic.
* In general, there is just one special case that you need to be aware of:
*
* In the context of a function call, flowR may be unsure to which origin the call relates!
* Consider the following example:
*
* ```r
* if(u) foo <- library else foo <- rm
* foo(x)
* ```
*
* Obtaining the origins of the call to `foo` will return both built-in functions `library` and `rm`.
* The general semantic visitor cannot decide on how to combine these cases,
* and it is up to your overload of {@link SemanticCfgGuidedVisitor#onDispatchFunctionCallOrigins|onDispatchFunctionCallOrigins}
* to decide how to handle this.
*
* Use {@link BasicCfgGuidedVisitor#start} to start the traversal.
*/
class SemanticCfgGuidedVisitor extends dfg_cfg_guided_visitor_1.DataflowAwareCfgGuidedVisitor {
/**
* A helper function to get the normalized AST node for the given id or fail if it does not exist.
*/
getNormalizedAst(id) {
return this.config.normalizedAst.idMap.get(id);
}
/**
* See {@link DataflowAwareCfgGuidedVisitor#visitValue} for the base implementation.
* This now dispatches the value to the appropriate event handler based on its type.
*/
visitValue(val) {
super.visitValue(val);
const astNode = this.getNormalizedAst(val.id);
if (!astNode) {
return;
}
switch (astNode.type) {
case type_1.RType.String: return this.onStringConstant({ vertex: val, node: astNode });
case type_1.RType.Number: return this.onNumberConstant({ vertex: val, node: astNode });
case type_1.RType.Logical: return this.onLogicalConstant({ vertex: val, node: astNode });
case type_1.RType.Symbol:
(0, assert_1.guard)(astNode.lexeme === 'NULL', `Expected NULL constant, got ${astNode.lexeme}`);
return this.onNullConstant({ vertex: val, node: astNode });
}
(0, assert_1.guard)(false, `Unexpected value type ${astNode.type} for value ${astNode.lexeme}`);
}
/**
* See {@link DataflowAwareCfgGuidedVisitor#visitVariableUse} for the base implementation.
*
* This function is called for every use of a variable in the program and dispatches the appropriate event.
* You probably do not have to overwrite it and just use {@link SemanticCfgGuidedVisitor#onVariableUse|`onVariableUse`} instead.
*
* @protected
*/
visitVariableUse(vertex) {
super.visitVariableUse(vertex);
this.onVariableUse({ vertex });
}
/**
* See {@link DataflowAwareCfgGuidedVisitor#visitVariableDefinition} for the base implementation.
*
* This function is called for every variable definition in the program and dispatches the appropriate event.
* You probably do not have to overwrite it and just use {@link SemanticCfgGuidedVisitor#onVariableDefinition|`onVariableDefinition`} instead.
*
* @protected
*/
visitVariableDefinition(vertex) {
super.visitVariableDefinition(vertex);
this.onVariableDefinition({ vertex });
}
/**
* See {@link DataflowAwareCfgGuidedVisitor#visitFunctionDefinition} for the base implementation.
*
* This function is called for every function definition in the program and dispatches the appropriate event.
* You probably do not have to overwrite it and just use {@link SemanticCfgGuidedVisitor#onFunctionDefinition|`onFunctionDefinition`} instead.
*
* @protected
*/
visitFunctionDefinition(vertex) {
super.visitFunctionDefinition(vertex);
const ast = this.getNormalizedAst(vertex.id);
if (ast?.type === type_1.RType.FunctionDefinition) {
this.onFunctionDefinition({ vertex, parameters: ast.parameters.map(p => p.info.id) });
}
else {
this.onFunctionDefinition({ vertex });
}
}
/**
* See {@link DataflowAwareCfgGuidedVisitor#visitFunctionCall} for the base implementation.
*
* This function is called for every function call in the program and dispatches the appropriate event.
* You probably do not have to overwrite it and just use {@link SemanticCfgGuidedVisitor#onUnnamedCall|`onUnnamedCall`} for anonymous calls,
* or {@link SemanticCfgGuidedVisitor#onDispatchFunctionCallOrigins|`onDispatchFunctionCallOrigins`} for named calls (or just overwrite
* the events you are interested in directly).
*
* @protected
*/
visitFunctionCall(vertex) {
super.visitFunctionCall(vertex);
if (vertex.origin === 'unnamed') {
this.onUnnamedCall({ vertex });
}
else {
this.onDispatchFunctionCallOrigins(vertex, vertex.origin);
}
}
/**
* See {@link DataflowAwareCfgGuidedVisitor#visitUnknown} for the base implementation.
* This function is called for every unknown vertex in the program.
* It dispatches the appropriate event based on the type of the vertex.
* In case you have to overwrite this function please make sure to still call this implementation to get a correctly working {@link SemanticCfgGuidedVisitor#onProgram|`onProgram`}.
*
* @protected
*/
visitUnknown(vertex) {
super.visitUnknown(vertex);
const ast = this.getNormalizedAst(vertex.id);
if (ast && ast.type === type_1.RType.ExpressionList && ast.info.parent === undefined) {
this.onProgram(ast);
}
}
/**
* Given a function call that has multiple targets (e.g., two potential built-in definitions).
* This function is responsible for calling {@link onDispatchFunctionCallOrigin} for each of the origins,
* and aggregating their results (which is just additive by default).
* If you want to change the behavior in case of multiple potential function definition targets, simply overwrite this function
* with the logic you desire.
*
* @protected
*/
onDispatchFunctionCallOrigins(call, origins) {
for (const origin of origins) {
this.onDispatchFunctionCallOrigin(call, origin);
}
}
/**
* This function is responsible for dispatching the appropriate event
* based on a given dataflow vertex. The default serves as a backend
* for the event functions, but you may overwrite and extend this function at will.
*
* @see {@link onDispatchFunctionCallOrigins} for the aggregation in case the function call target is ambiguous.
*
* @protected
*/
onDispatchFunctionCallOrigin(call, origin) {
switch (origin) {
case 'builtin:eval':
return this.onEvalFunctionCall({ call });
case 'builtin:apply':
return this.onApplyFunctionCall({ call });
case 'builtin:expression-list':
return this.onExpressionList({ call });
case 'builtin:source':
return this.onSourceCall({ call });
case 'builtin:access':
return this.onAccessCall({ call });
case 'builtin:if-then-else': {
// recover dead arguments from ast
const ast = this.getNormalizedAst(call.id);
if (!ast || ast.type !== type_1.RType.IfThenElse) {
return this.onIfThenElseCall({
call,
condition: call.args[0] === r_function_call_1.EmptyArgument ? undefined : call.args[0].nodeId,
then: call.args[1] === r_function_call_1.EmptyArgument ? undefined : call.args[1].nodeId,
else: call.args[2] === r_function_call_1.EmptyArgument ? undefined : call.args[2].nodeId
});
}
else {
return this.onIfThenElseCall({
call,
condition: ast.condition.info.id,
then: ast.then.info.id,
else: ast.otherwise?.info.id
});
}
}
case 'builtin:get':
return this.onGetCall({ call });
case 'builtin:rm':
return this.onRmCall({ call });
case 'builtin:list':
return this.onListCall({ call });
case 'builtin:vector':
return this.onVectorCall({ call });
case 'table:assign':
case 'builtin:assignment': {
const outgoing = this.config.dfg.outgoingEdges(call.id);
if (outgoing) {
const target = [...outgoing.entries()].filter(([, e]) => (0, edge_1.edgeIncludesType)(e.types, edge_1.EdgeType.Returns));
if (target.length === 1) {
const targetOut = this.config.dfg.outgoingEdges(target[0][0]);
if (targetOut) {
const source = [...targetOut.entries()].filter(([t, e]) => (0, edge_1.edgeIncludesType)(e.types, edge_1.EdgeType.DefinedBy) && t !== call.id);
if (source.length === 1) {
return this.onAssignmentCall({ call, target: target[0][0], source: source[0][0] });
}
}
}
}
return this.onAssignmentCall({ call, target: undefined, source: undefined });
}
case 'builtin:special-bin-op':
if (call.args.length !== 2) {
return this.onSpecialBinaryOpCall({ call });
}
return this.onSpecialBinaryOpCall({ call, lhs: call.args[0], rhs: call.args[1] });
case 'builtin:pipe':
if (call.args.length !== 2) {
return this.onPipeCall({ call });
}
return this.onPipeCall({ call, lhs: call.args[0], rhs: call.args[1] });
case 'builtin:quote':
return this.onQuoteCall({ call });
case 'builtin:for-loop':
return this.onForLoopCall({ call, variable: call.args[0], vector: call.args[1], body: call.args[2] });
case 'builtin:repeat-loop':
return this.onRepeatLoopCall({ call, body: call.args[0] });
case 'builtin:while-loop':
return this.onWhileLoopCall({ call, condition: call.args[0], body: call.args[1] });
case 'builtin:replacement': {
const outgoing = this.config.dfg.outgoingEdges(call.id);
if (outgoing) {
const target = [...outgoing.entries()].filter(([, e]) => (0, edge_1.edgeIncludesType)(e.types, edge_1.EdgeType.Returns));
if (target.length === 1) {
const targetOut = this.config.dfg.outgoingEdges(target[0][0]);
if (targetOut) {
const source = [...targetOut.entries()].filter(([t, e]) => (0, edge_1.edgeIncludesType)(e.types, edge_1.EdgeType.DefinedBy) && t !== call.id);
if (source.length === 1) {
return this.onReplacementCall({ call, target: target[0][0], source: source[0][0] });
}
}
}
}
return this.onReplacementCall({ call, target: undefined, source: undefined });
}
case 'builtin:library':
return this.onLibraryCall({ call });
case 'builtin:default':
default:
return this.onDefaultFunctionCall({ call });
}
}
/**
* This event is called for the root program node, i.e., the program that is being analyzed.
*
* @protected
*/
onProgram(_data) { }
/**
* A helper function to request the {@link getOriginInDfg|origins} of the given node.
*/
getOrigins(id) {
return (0, dfg_get_origin_1.getOriginInDfg)(this.config.dfg, id);
}
/** Called for every occurrence of a `NULL` in the program. */
onNullConstant(_data) { }
/**
* Called for every constant string value in the program.
*
* For example, `"Hello World"` in `print("Hello World")`.
*/
onStringConstant(_data) { }
/**
* Called for every constant number value in the program.
*
* For example, `42` in `print(42)`.
*/
onNumberConstant(_data) { }
/**
* Called for every constant logical value in the program.
*
* For example, `TRUE` in `if(TRUE) { ... }`.
*/
onLogicalConstant(_data) { }
/**
* Called for every variable that is read within the program.
* You can use {@link getOrigins} to get the origins of the variable.
*
* For example, `x` in `print(x)`.
*/
onVariableUse(_data) { }
/**
* Called for every variable that is written within the program.
* You can use {@link getOrigins} to get the origins of the variable.
*
* For example, `x` in `x <- 42` or `x` in `assign("x", 42)`.
* See {@link SemanticCfgGuidedVisitor#onAssignmentCall} for the assignment call. This event handler also provides you with information on the source.
*/
onVariableDefinition(_data) { }
/**
* Called for every anonymous function definition.
*
* For example, `function(x) { x + 1 }` in `lapply(1:10, function(x) { x + 1 })`.
*/
onFunctionDefinition(_data) { }
/**
* This event triggers for every anonymous call within the program.
*
* For example, `(function(x) { x + 1 })(42)` or the second call in `a()()`.
*
* This is separate from {@link SemanticCfgGuidedVisitor#onDefaultFunctionCall|`onDefaultFunctionCall`} which is used for named function calls that do not trigger any of these events.
* The main differentiation for these calls is that you may not infer their semantics from any name alone and probably _have_
* to rely on {@link SemanticCfgGuidedVisitor#getOrigins|`getOrigins`} to get more information.
*
* @protected
*/
onUnnamedCall(_data) { }
/**
* This event triggers for every function call that is not handled by a specific overload,
* and hence may be a function that targets a user-defined function. In a way, these are functions that are named,
* but flowR does not specifically care about them (currently) wrt. to their dataflow impact.
*
* Use {@link SemanticCfgGuidedVisitor#getOrigins|`getOrigins`} to get the origins of the call.
*
* For example, this triggers for `foo(x)` in
*
* ```r
* foo <- function(x) { x + 1 }
* foo(x)
* ```
*
* This explicitly will not trigger for scenarios in which the function has no name (i.e., if it is anonymous).
* For such cases, you may rely on the {@link SemanticCfgGuidedVisitor#onUnnamedCall|`onUnnamedCall`} event.
* The main reason for this separation is part of flowR's handling of these functions, as anonymous calls cannot be resolved using the active environment.
*
* @protected
*/
onDefaultFunctionCall(_data) { }
/**
* This event triggers for every call to the `eval` function.
*
* For example, `eval` in `eval(parse(text = "x + 1"))`.
*
* More specifically, this relates to the corresponding {@link BuiltInProcessorMapper} handler.
*
* @protected
*/
onEvalFunctionCall(_data) { }
/**
* This event triggers for every call to any of the `*apply` functions.
*
* For example, `lapply` in `lapply(1:10, function(x) { x + 1 })`.
*
* More specifically, this relates to the corresponding {@link BuiltInProcessorMapper} handler.
*
* @protected
*/
onApplyFunctionCall(_data) { }
/**
* This event triggers for every expression list - implicit or explicit, _but_ not for the root program (see {@link SemanticCfgGuidedVisitor#onProgram|`onProgram`} for that).
*
* For example, this triggers for the expression list created by `{` and `}` in `ìf (TRUE) { x <- 1; y <- 2; }`. But also for the implicit
* expression list `x <- x + 1` in `for(x in 1:10) x <- x + 1`.
*
* @protected
*/
onExpressionList(_data) { }
/**
* This event triggers for every call to the `source` function.
*
* For example, `source` in `source("script.R")`.
*
* By default, this does not provide the resolved source file. Yet you can access the {@link DataflowGraph} to ask for sourced files.
*
* More specifically, this relates to the corresponding {@link BuiltInProcessorMapper} handler.
*
* @protected
*/
onSourceCall(_data) { }
/**
* This event triggers for every subsetting call, i.e., for every call to `[[`, `[`, or `$`.
*
* @protected
*/
onAccessCall(_data) { }
/**
* This event triggers for every call to the `if` function, which is used to implement the `if-then-else` control flow.
*
* @protected
*/
onIfThenElseCall(_data) { }
/**
* This event triggers for every call to the `get` function, which is used to access variables in the global environment.
*
* For example, `get` in `get("x")`.
*
* Please be aware, that with flowR resolving the `get` during the dataflow analysis,
* this may very well trigger a {@link SemanticCfgGuidedVisitor#onVariableUse|`onVariableUse`} event as well.
*
* @protected
*/
onGetCall(_data) { }
/**
* This event triggers for every call to the `rm` function, which is used to remove variables from the environment.
*
* For example, `rm` in `rm(x)`.
*
* @protected
*/
onRmCall(_data) { }
/**
* This event triggers for every call to a function which loads a library.
*
* For example, `library` in `library(dplyr)`.
*
* More specifically, this relates to the corresponding {@link BuiltInProcessorMapper} handler.
*
* @protected
*/
onLibraryCall(_data) { }
/**
* This event triggers for every assignment call, i.e., for every call to `<-` or `=` that assigns a value to a variable.
*
* For example, this triggers for `<-` in `x <- 42` or `assign` in `assign("x", 42)`.
* This also triggers for the `data.table` assign `:=` active within subsetting calls, e.g., `DT[, x := 42]`.
*
* Please be aware that replacements (e.g. assignments with a function call on the target side) like `names(x) <- 3` are subject to {@link SemanticCfgGuidedVisitor#onReplacementCall|`onReplacementCall`} instead.
* @protected
*/
onAssignmentCall(_data) { }
/**
* This event triggers for every call to a special binary operator, i.e., every binary function call that starts and ends with a `%` sign.
*
* For example, this triggers for`%in%` in `x %in% y`.
*
* More specifically, this relates to the corresponding {@link BuiltInProcessorMapper} handler.
*
* @protected
*/
onSpecialBinaryOpCall(_data) { }
/**
* This event triggers for every call to R's pipe operator, i.e., for every call to `|>`.
*
* @protected
*/
onPipeCall(_data) { }
/**
* This event triggers for every call to the `quote` function, which is used to quote expressions.
*
* For example, `quote` in `quote(x + 1)`.
*
* More specifically, this relates to the corresponding {@link BuiltInProcessorMapper} handler.
*
* @protected
*/
onQuoteCall(_data) { }
/**
* This event triggers for every call to the `for` loop function, which is used to implement the `for` loop control flow.
*
* For example, this triggers for `for` in `for(i in 1:10) { print(i) }`.
*
* More specifically, this relates to the corresponding {@link BuiltInProcessorMapper} handler.
*
* @protected
*/
onForLoopCall(_data) { }
/**
* This event triggers for every call to the `while` loop function, which is used to implement the `while` loop control flow.
*
* For example, this triggers for `while` in `while(i < 10) { i <- i + 1 }`.
*
* More specifically, this relates to the corresponding {@link BuiltInProcessorMapper} handler.
*
* @protected
*/
onWhileLoopCall(_data) { }
/**
* This event triggers for every call to the `repeat` loop function, which is used to implement the `repeat` loop control flow.
*
* For example, this triggers for `repeat` in `repeat { i <- i + 1; if(i >= 10) break }`.
*
* More specifically, this relates to the corresponding {@link BuiltInProcessorMapper} handler.
*
* @protected
*/
onRepeatLoopCall(_data) { }
/**
* This event triggers for every call to a function that replaces a value in a container, such as `names(x) <- 3`.
*
* This is different from {@link SemanticCfgGuidedVisitor#onAssignmentCall|`onAssignmentCall`} in that it does not assign a value to a variable,
* but rather replaces a value in a container.
*
* For example, this triggers for `names` in `names(x) <- 3`, but not for `x <- 3`.
*
* More specifically, this relates to the corresponding {@link BuiltInProcessorMapper} handler.
* @protected
*/
onReplacementCall(_data) { }
/**
* This event triggers for every call that (to the knowledge of flowr) constructs a (new) list.
*
* For example, this triggers for `list` in `list(1, 2, 3)`.
*
* More specifically, this relates to the corresponding {@link BuiltInProcessorMapper} handler.
*
* @protected
*/
onListCall(_data) { }
/**
* This event triggers for every call that (to the knowledge of flowr) constructs a (new) vector.
*
* For example, this triggers for `c` in `c(1, 2, 3)`.
*
* More specifically, this relates to the corresponding {@link BuiltInProcessorMapper} handler.
*
* @protected
*/
onVectorCall(_data) { }
}
exports.SemanticCfgGuidedVisitor = SemanticCfgGuidedVisitor;
//# sourceMappingURL=semantic-cfg-guided-visitor.js.map