UNPKG

@eagleoutice/flowr

Version:

Static Dataflow Analyzer and Program Slicer for the R Programming Language

645 lines 30 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.SemanticCfgGuidedVisitor = void 0; const control_flow_graph_1 = require("./control-flow-graph"); const dfg_cfg_guided_visitor_1 = require("./dfg-cfg-guided-visitor"); const type_1 = require("../r-bridge/lang-4.x/ast/model/type"); const edge_1 = require("../dataflow/graph/edge"); const assert_1 = require("../util/assert"); const r_function_call_1 = require("../r-bridge/lang-4.x/ast/model/nodes/r-function-call"); const convert_values_1 = require("../r-bridge/lang-4.x/convert-values"); const df_helper_1 = require("../dataflow/graph/df-helper"); const built_in_proc_name_1 = require("../dataflow/environments/built-in-proc-name"); /** * This visitor extends on the {@link DataflowAwareCfgGuidedVisitor} by dispatching visitors for separate function calls as well, * providing more information! * In a way, this is the mixin of syntactic and dataflow guided visitation. * * Overwrite the functions starting with `on` to implement your logic. * In general, there is just one special case that you need to be aware of: * * In the context of a function call, flowR may be unsure to which origin the call relates! * Consider the following example: * * ```r * if(u) foo <- library else foo <- rm * foo(x) * ``` * * Obtaining the origins of the call to `foo` will return both built-in functions `library` and `rm`. * The general semantic visitor cannot decide on how to combine these cases, * and it is up to your overload of {@link SemanticCfgGuidedVisitor#onDispatchFunctionCallOrigins|onDispatchFunctionCallOrigins} * to decide how to handle this. * * Use {@link BasicCfgGuidedVisitor#start} to start the traversal. */ class SemanticCfgGuidedVisitor extends dfg_cfg_guided_visitor_1.DataflowAwareCfgGuidedVisitor { /** * A helper function to get the normalized AST node for the given id or fail if it does not exist. */ getNormalizedAst(id) { return id === undefined ? undefined : this.config.normalizedAst.idMap.get(id); } /** * See {@link DataflowAwareCfgGuidedVisitor#visitValue} for the base implementation. * This now dispatches the value to the appropriate event handler based on its type. */ visitValue(val) { super.visitValue(val); const astNode = this.getNormalizedAst(val.id); if (!astNode) { return; } switch (astNode.type) { case type_1.RType.String: return this.onStringConstant({ vertex: val, node: astNode }); case type_1.RType.Number: return this.onNumberConstant({ vertex: val, node: astNode }); case type_1.RType.Logical: return this.onLogicalConstant({ vertex: val, node: astNode }); case type_1.RType.Symbol: if (astNode.lexeme === convert_values_1.RNull) { return this.onNullConstant({ vertex: val, node: astNode }); } else { return this.onSymbolConstant({ vertex: val, node: astNode }); } } (0, assert_1.guard)(false, `Unexpected value type ${astNode.type} for value ${astNode.lexeme}`); } /** * See {@link DataflowAwareCfgGuidedVisitor#visitVariableUse} for the base implementation. * * This function is called for every use of a variable in the program and dispatches the appropriate event. * You probably do not have to overwrite it and just use {@link SemanticCfgGuidedVisitor#onVariableUse|`onVariableUse`} instead. * @protected */ visitVariableUse(vertex) { super.visitVariableUse(vertex); this.onVariableUse({ vertex }); } /** * See {@link DataflowAwareCfgGuidedVisitor#visitVariableDefinition} for the base implementation. * * This function is called for every variable definition in the program and dispatches the appropriate event. * You probably do not have to overwrite it and just use {@link SemanticCfgGuidedVisitor#onVariableDefinition|`onVariableDefinition`} instead. * @protected */ visitVariableDefinition(vertex) { super.visitVariableDefinition(vertex); this.onVariableDefinition({ vertex }); } /** * See {@link DataflowAwareCfgGuidedVisitor#visitFunctionDefinition} for the base implementation. * * This function is called for every function definition in the program and dispatches the appropriate event. * You probably do not have to overwrite it and just use {@link SemanticCfgGuidedVisitor#onFunctionDefinition|`onFunctionDefinition`} instead. * @protected */ visitFunctionDefinition(vertex) { super.visitFunctionDefinition(vertex); const ast = this.getNormalizedAst(vertex.id); if (ast?.type === type_1.RType.FunctionDefinition) { this.onFunctionDefinition({ vertex, parameters: ast.parameters.map(p => p.info.id) }); } else { this.onFunctionDefinition({ vertex }); } } /** * See {@link DataflowAwareCfgGuidedVisitor#visitFunctionCall} for the base implementation. * * This function is called for every function call in the program and dispatches the appropriate event. * You probably do not have to overwrite it and just use {@link SemanticCfgGuidedVisitor#onUnnamedCall|`onUnnamedCall`} for anonymous calls, * or {@link SemanticCfgGuidedVisitor#onDispatchFunctionCallOrigins|`onDispatchFunctionCallOrigins`} for named calls (or just overwrite * the events you are interested in directly). * @protected */ visitFunctionCall(vertex) { super.visitFunctionCall(vertex); if (vertex.origin === built_in_proc_name_1.BuiltInProcName.Unnamed) { this.onUnnamedCall({ call: vertex }); } else { this.onDispatchFunctionCallOrigins(vertex, vertex.origin); } } /** * See {@link DataflowAwareCfgGuidedVisitor#visitUnknown} for the base implementation. * This function is called for every unknown vertex in the program. * It dispatches the appropriate event based on the type of the vertex. * In case you have to overwrite this function please make sure to still call this implementation to get a correctly working {@link SemanticCfgGuidedVisitor#onProgram|`onProgram`}. * @protected */ visitUnknown(vertex) { super.visitUnknown(vertex); const ast = this.getNormalizedAst(control_flow_graph_1.CfgVertex.getId(vertex)); if (ast && ast.type === type_1.RType.ExpressionList && ast.info.parent === undefined) { this.onProgram(ast); } } /** * Given a function call that has multiple targets (e.g., two potential built-in definitions). * This function is responsible for calling {@link onDispatchFunctionCallOrigin} for each of the origins, * and aggregating their results (which is just additive by default). * If you want to change the behavior in case of multiple potential function definition targets, simply overwrite this function * with the logic you desire. * @protected */ onDispatchFunctionCallOrigins(call, origins) { for (const origin of origins) { this.onDispatchFunctionCallOrigin(call, origin); } } /** * This function is responsible for dispatching the appropriate event * based on a given dataflow vertex. The default serves as a backend * for the event functions, but you may overwrite and extend this function at will. * @see {@link onDispatchFunctionCallOrigins} for the aggregation in case the function call target is ambiguous. * @protected */ onDispatchFunctionCallOrigin(call, origin) { switch (origin) { case built_in_proc_name_1.BuiltInProcName.Eval: return this.onEvalFunctionCall({ call }); case built_in_proc_name_1.BuiltInProcName.Apply: return this.onApplyFunctionCall({ call }); case built_in_proc_name_1.BuiltInProcName.ExpressionList: return this.onExpressionList({ call }); case built_in_proc_name_1.BuiltInProcName.Source: return this.onSourceCall({ call }); case built_in_proc_name_1.BuiltInProcName.Access: return this.onAccessCall({ call }); case built_in_proc_name_1.BuiltInProcName.IfThenElse: { // recover dead arguments from ast const ast = this.getNormalizedAst(call.id); if (!ast || ast.type !== type_1.RType.IfThenElse) { return this.onIfThenElseCall({ call, condition: call.args[0] === r_function_call_1.EmptyArgument ? undefined : call.args[0].nodeId, yes: call.args[1] === r_function_call_1.EmptyArgument ? undefined : call.args[1].nodeId, no: call.args[2] === r_function_call_1.EmptyArgument ? undefined : call.args[2].nodeId }); } else { return this.onIfThenElseCall({ call, condition: ast.condition.info.id, yes: ast.then.info.id, no: ast.otherwise?.info.id }); } } case built_in_proc_name_1.BuiltInProcName.Get: return this.onGetCall({ call }); case built_in_proc_name_1.BuiltInProcName.Rm: return this.onRmCall({ call }); case built_in_proc_name_1.BuiltInProcName.List: return this.onListCall({ call }); case built_in_proc_name_1.BuiltInProcName.Vector: return this.onVectorCall({ call }); case built_in_proc_name_1.BuiltInProcName.Assignment: case built_in_proc_name_1.BuiltInProcName.AssignmentLike: case built_in_proc_name_1.BuiltInProcName.TableAssignment: { const outgoing = this.config.dfg.outgoingEdges(call.id); if (outgoing) { const target = outgoing.entries().filter(([, e]) => edge_1.DfEdge.includesType(e, edge_1.EdgeType.Returns)).toArray(); if (target.length === 1) { const targetOut = this.config.dfg.outgoingEdges(target[0][0]); if (targetOut) { const source = targetOut.entries().filter(([t, e]) => edge_1.DfEdge.includesType(e, edge_1.EdgeType.DefinedBy) && t !== call.id).toArray(); if (source.length === 1) { return this.onAssignmentCall({ call, target: target[0][0], source: source[0][0] }); } } } } return this.onAssignmentCall({ call, target: undefined, source: undefined }); } case built_in_proc_name_1.BuiltInProcName.SpecialBinOp: if (call.args.length !== 2) { return this.onSpecialBinaryOpCall({ call }); } return this.onSpecialBinaryOpCall({ call, lhs: call.args[0], rhs: call.args[1] }); case built_in_proc_name_1.BuiltInProcName.Pipe: if (call.args.length !== 2) { return this.onPipeCall({ call }); } return this.onPipeCall({ call, lhs: call.args[0], rhs: call.args[1] }); case built_in_proc_name_1.BuiltInProcName.Quote: return this.onQuoteCall({ call }); case built_in_proc_name_1.BuiltInProcName.ForLoop: return this.onForLoopCall({ call, variable: call.args[0], vector: call.args[1], body: call.args[2] }); case built_in_proc_name_1.BuiltInProcName.RepeatLoop: return this.onRepeatLoopCall({ call, body: call.args[0] }); case built_in_proc_name_1.BuiltInProcName.WhileLoop: return this.onWhileLoopCall({ call, condition: call.args[0], body: call.args[1] }); case built_in_proc_name_1.BuiltInProcName.Replacement: { const outgoing = this.config.dfg.outgoingEdges(call.id); if (outgoing) { const target = outgoing.entries().filter(([, e]) => edge_1.DfEdge.includesType(e, edge_1.EdgeType.Returns)).toArray(); if (target.length === 1) { const targetOut = this.config.dfg.outgoingEdges(target[0][0]); if (targetOut) { const source = targetOut.entries().filter(([t, e]) => edge_1.DfEdge.includesType(e, edge_1.EdgeType.DefinedBy) && t !== call.id).toArray(); if (source.length === 1) { return this.onReplacementCall({ call, target: target[0][0], source: source[0][0] }); } } } } return this.onReplacementCall({ call, target: undefined, source: undefined }); } case built_in_proc_name_1.BuiltInProcName.Library: return this.onLibraryCall({ call }); case built_in_proc_name_1.BuiltInProcName.Try: return this.onTryCall({ call }); case built_in_proc_name_1.BuiltInProcName.Stop: return this.onStopCall({ call }); case built_in_proc_name_1.BuiltInProcName.StopIfNot: return this.onStopIfNotCall({ call }); case built_in_proc_name_1.BuiltInProcName.RegisterHook: return this.onRegisterHookCall({ call }); case built_in_proc_name_1.BuiltInProcName.Local: return this.onLocalCall({ call }); case built_in_proc_name_1.BuiltInProcName.S3Dispatch: return this.onS3DispatchCall({ call }); case built_in_proc_name_1.BuiltInProcName.S3DispatchNext: return this.onS3DispatchNextCall({ call }); case built_in_proc_name_1.BuiltInProcName.S7NewGeneric: return this.onS7NewGenericCall({ call }); case built_in_proc_name_1.BuiltInProcName.S7Dispatch: return this.onS7DispatchCall({ call }); case built_in_proc_name_1.BuiltInProcName.Break: return this.onBreakCall({ call }); case built_in_proc_name_1.BuiltInProcName.Return: return this.onReturnCall({ call }); case built_in_proc_name_1.BuiltInProcName.Unnamed: return this.onUnnamedCall({ call }); case built_in_proc_name_1.BuiltInProcName.Recall: return this.onRecallCall({ call }); case built_in_proc_name_1.BuiltInProcName.Default: case built_in_proc_name_1.BuiltInProcName.DefaultReadAllArgs: case built_in_proc_name_1.BuiltInProcName.Function: case built_in_proc_name_1.BuiltInProcName.FunctionDefinition: return this.onDefaultFunctionCall({ call }); default: (0, assert_1.assertUnreachable)(origin); } } /** * This event is called for the root program node, i.e., the program that is being analyzed. * @protected */ onProgram(_data) { } /** * A helper function to request the {@link getOriginInDfg|origins} of the given node. */ getOrigins(id) { return df_helper_1.Dataflow.origin(this.config.dfg, id); } /** * Called for every occurrence of a `NULL` in the program. * * For other symbols that are not referenced as a variable, see {@link SemanticCfgGuidedVisitor#onSymbolConstant|`onSymbolConstant`}. */ onNullConstant(_data) { } /** * Called for every constant string value in the program. * * For example, `"Hello World"` in `print("Hello World")`. */ onStringConstant(_data) { } /** * Called for every constant number value in the program. * * For example, `42` in `print(42)`. */ onNumberConstant(_data) { } /** * Called for every constant logical value in the program. * * For example, `TRUE` in `if(TRUE) { ... }`. */ onLogicalConstant(_data) { } /** * Called for every constant symbol value in the program. * * For example, `foo` in `library(foo)` or `a` in `l$a`. This most likely happens as part of non-standard-evaluation, i.e., the symbol is not evaluated to a value, * but used as a symbol in and of itself. * * Please note, that due to its special behaviors, `NULL` is handled in {@link SemanticCfgGuidedVisitor#onNullConstant|`onNullConstant`} and not here. */ onSymbolConstant(_data) { } /** * Called for every variable that is read within the program. * You can use {@link getOrigins} to get the origins of the variable. * * For example, `x` in `print(x)`. */ onVariableUse(_data) { } /** * Called for every variable that is written within the program. * You can use {@link getOrigins} to get the origins of the variable. * * For example, `x` in `x <- 42` or `x` in `assign("x", 42)`. * See {@link SemanticCfgGuidedVisitor#onAssignmentCall} for the assignment call. This event handler also provides you with information on the source. */ onVariableDefinition(_data) { } /** * Called for every anonymous function definition. * * For example, `function(x) { x + 1 }` in `lapply(1:10, function(x) { x + 1 })`. */ onFunctionDefinition(_data) { } /** * This event triggers for every anonymous call within the program. * * For example, `(function(x) { x + 1 })(42)` or the second call in `a()()`. * * This is separate from {@link SemanticCfgGuidedVisitor#onDefaultFunctionCall|`onDefaultFunctionCall`} which is used for named function calls that do not trigger any of these events. * The main differentiation for these calls is that you may not infer their semantics from any name alone and probably _have_ * to rely on {@link SemanticCfgGuidedVisitor#getOrigins|`getOrigins`} to get more information. * @protected */ onUnnamedCall(_data) { } /** * This event triggers for every function call that is not handled by a specific overload, * and hence may be a function that targets a user-defined function. In a way, these are functions that are named, * but flowR does not specifically care about them (currently) wrt. to their dataflow impact. * * Use {@link SemanticCfgGuidedVisitor#getOrigins|`getOrigins`} to get the origins of the call. * * For example, this triggers for `foo(x)` in * * ```r * foo <- function(x) { x + 1 } * foo(x) * ``` * * This explicitly will not trigger for scenarios in which the function has no name (i.e., if it is anonymous). * For such cases, you may rely on the {@link SemanticCfgGuidedVisitor#onUnnamedCall|`onUnnamedCall`} event. * The main reason for this separation is part of flowR's handling of these functions, as anonymous calls cannot be resolved using the active environment. * @protected */ onDefaultFunctionCall(_data) { } /** * This event triggers for every call to the `eval` function. * * For example, `eval` in `eval(parse(text = "x + 1"))`. * * More specifically, this relates to the corresponding {@link BuiltInProcessorMapper} handler. * @protected */ onEvalFunctionCall(_data) { } /** * This event triggers for every call to any of the `*apply` functions. * * For example, `lapply` in `lapply(1:10, function(x) { x + 1 })`. * * More specifically, this relates to the corresponding {@link BuiltInProcessorMapper} handler. * @protected */ onApplyFunctionCall(_data) { } /** * This event triggers for every expression list - implicit or explicit, _but_ not for the root program (see {@link SemanticCfgGuidedVisitor#onProgram|`onProgram`} for that). * * For example, this triggers for the expression list created by `{` and `}` in `ìf (TRUE) { x <- 1; y <- 2; }`. But also for the implicit * expression list `x <- x + 1` in `for(x in 1:10) x <- x + 1`. * @protected */ onExpressionList(_data) { } /** * This event triggers for every call to the `source` function. * * For example, `source` in `source("script.R")`. * * By default, this does not provide the resolved source file. Yet you can access the {@link DataflowGraph} to ask for sourced files. * * More specifically, this relates to the corresponding {@link BuiltInProcessorMapper} handler. * @protected */ onSourceCall(_data) { } /** * This event triggers for every subsetting call, i.e., for every call to `[[`, `[`, or `$`. * @protected */ onAccessCall(_data) { } /** * This event triggers for every call to the `if` function, which is used to implement the `if-then-else` control flow. * @protected */ onIfThenElseCall(_data) { } /** * This event triggers for every call to the `get` function, which is used to access variables in the global environment. * * For example, `get` in `get("x")`. * * Please be aware, that with flowR resolving the `get` during the dataflow analysis, * this may very well trigger a {@link SemanticCfgGuidedVisitor#onVariableUse|`onVariableUse`} event as well. * @protected */ onGetCall(_data) { } /** * This event triggers for every call to the `rm` function, which is used to remove variables from the environment. * * For example, `rm` in `rm(x)`. * @protected */ onRmCall(_data) { } /** * This event triggers for every call to a function which loads a library. * * For example, `library` in `library(dplyr)`. * * More specifically, this relates to the corresponding {@link BuiltInProcessorMapper} handler. * @protected */ onLibraryCall(_data) { } /** * This event triggers for every assignment call, i.e., for every call to `<-` or `=` that assigns a value to a variable. * * For example, this triggers for `<-` in `x <- 42` or `assign` in `assign("x", 42)`. * This also triggers for the `data.table` assign `:=` active within subsetting calls, e.g., `DT[, x := 42]`. * * Please be aware that replacements (e.g. assignments with a function call on the target side) like `names(x) <- 3` are subject to {@link SemanticCfgGuidedVisitor#onReplacementCall|`onReplacementCall`} instead. * @protected */ onAssignmentCall(_data) { } /** * This event triggers for every call to a special binary operator, i.e., every binary function call that starts and ends with a `%` sign. * * For example, this triggers for`%in%` in `x %in% y`. * * More specifically, this relates to the corresponding {@link BuiltInProcessorMapper} handler. * @protected */ onSpecialBinaryOpCall(_data) { } /** * This event triggers for every call to R's pipe operator, i.e., for every call to `|>`. * @protected */ onPipeCall(_data) { } /** * This event triggers for every call to the `quote` function, which is used to quote expressions. * * For example, `quote` in `quote(x + 1)`. * * More specifically, this relates to the corresponding {@link BuiltInProcessorMapper} handler. * @protected */ onQuoteCall(_data) { } /** * This event triggers for every call to the `for` loop function, which is used to implement the `for` loop control flow. * * For example, this triggers for `for` in `for(i in 1:10) { print(i) }`. * * More specifically, this relates to the corresponding {@link BuiltInProcessorMapper} handler. * @protected */ onForLoopCall(_data) { } /** * This event triggers for every call to the `while` loop function, which is used to implement the `while` loop control flow. * * For example, this triggers for `while` in `while(i < 10) { i <- i + 1 }`. * * More specifically, this relates to the corresponding {@link BuiltInProcessorMapper} handler. * @protected */ onWhileLoopCall(_data) { } /** * This event triggers for every call to the `repeat` loop function, which is used to implement the `repeat` loop control flow. * * For example, this triggers for `repeat` in `repeat { i <- i + 1; if(i >= 10) break }`. * * More specifically, this relates to the corresponding {@link BuiltInProcessorMapper} handler. * @protected */ onRepeatLoopCall(_data) { } /** * This event triggers for every call to a function that replaces a value in a container, such as `names(x) <- 3`. * * This is different from {@link SemanticCfgGuidedVisitor#onAssignmentCall|`onAssignmentCall`} in that it does not assign a value to a variable, * but rather replaces a value in a container. * * For example, this triggers for `names` in `names(x) <- 3`, but not for `x <- 3`. * * More specifically, this relates to the corresponding {@link BuiltInProcessorMapper} handler. * @protected */ onReplacementCall(_data) { } /** * This event triggers for every call that (to the knowledge of flowr) constructs a (new) list. * * For example, this triggers for `list` in `list(1, 2, 3)`. * * More specifically, this relates to the corresponding {@link BuiltInProcessorMapper} handler. * @protected */ onListCall(_data) { } /** * This event triggers for every call that (to the knowledge of flowr) constructs a (new) vector. * * For example, this triggers for `c` in `c(1, 2, 3)`. * * More specifically, this relates to the corresponding {@link BuiltInProcessorMapper} handler. * @protected */ onVectorCall(_data) { } /** * This event triggers for every call to the `stop` function. * * For example, this triggers for `stop` in `stop()`. * * More specifically, this relates to the corresponding {@link BuiltInProcessorMapper} handler. * @protected */ onStopCall(_data) { } /** * This event triggers for every call to the `stopifnot` function. * * For example, this triggers for `stopifnot` in `stopifnot(x > 0)`. * * More specifically, this relates to the corresponding {@link BuiltInProcessorMapper} handler. * @protected */ onStopIfNotCall(_data) { } /** * This event triggers for every call the `try` function, which is used to catch possible errors. * * For example, this triggers for `try` in `try(stop("error"))`. * * More specifically, this relates to the corresponding {@link BuiltInProcessorMapper} handler. * @protected */ onTryCall(_data) { } /** * This event triggers for every call to a function that performs a local call, such as `local`. * * For example, this triggers for `local` in `local({ x <- 1; y <- 2; x + y })`. * * More specifically, this relates to the corresponding {@link BuiltInProcessorMapper} handler. * @protected */ onLocalCall(_data) { } /** * This event triggers for every call to a function that performs an S3-like dispatch. * * For example, this triggers for `UseMethod` in `UseMethod("print")`. * @see {@link SemanticCfgGuidedVisitor#onS3DispatchNextCall|`onS3DispatchNextCall`} for `NextMethod` calls. * @protected */ onS3DispatchCall(_data) { } /** * This event triggers for every call to a function that performs an S3-like *next* dispatch. * * For example, this triggers for `NextMethod`. * @see {@link SemanticCfgGuidedVisitor#onS3DispatchCall|`onS3DispatchCall`} for `UseMethod` calls. * @protected */ onS3DispatchNextCall(_data) { } /** * This event triggers for every call to a function that creates a new S7 generic, such as `new_generic`. * @protected */ onS7NewGenericCall(_data) { } /** * This event triggers for every call to a function that performs an S7 dispatch, such as `S7_dispatch`. * @protected */ onS7DispatchCall(_data) { } /** * This event triggers for every call to a function that registers a hook, such as `on.exit`. * * For example, this triggers for `on.exit` in `on.exit(print("exiting function"))`. * * More specifically, this relates to the corresponding {@link BuiltInProcessorMapper} handler. * @protected */ onRegisterHookCall(_data) { } /** * This event triggers for every call to `break` to exit a loop. * * For example, this triggers for `break` in `repeat { break }`. * * More specifically, this relates to the corresponding {@link BuiltInProcessorMapper} handler. * @protected */ onBreakCall(_data) { } /** * This event triggers for every call to `return` to explicitly return a value in a function. * * For example, this triggers for `return` in `f <- function() { return(42) }`. * * More specifically, this relates to the corresponding {@link BuiltInProcessorMapper} handler. * @protected */ onReturnCall(_data) { } /** * This event triggers for every call to `Recall`, which is used to recall the function closure (usually in recursive functions). * @protected */ onRecallCall(_data) { } } exports.SemanticCfgGuidedVisitor = SemanticCfgGuidedVisitor; //# sourceMappingURL=semantic-cfg-guided-visitor.js.map