UNPKG

@eagleoutice/flowr

Version:

Static Dataflow Analyzer and Program Slicer for the R Programming Language

525 lines 23.1 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.SemanticCfgGuidedVisitor = void 0; const dfg_cfg_guided_visitor_1 = require("./dfg-cfg-guided-visitor"); const dfg_get_origin_1 = require("../dataflow/origin/dfg-get-origin"); const type_1 = require("../r-bridge/lang-4.x/ast/model/type"); const edge_1 = require("../dataflow/graph/edge"); const assert_1 = require("../util/assert"); const r_function_call_1 = require("../r-bridge/lang-4.x/ast/model/nodes/r-function-call"); /** * This visitor extends on the {@link DataflowAwareCfgGuidedVisitor} by dispatching visitors for separate function calls as well, * providing more information! * In a way, this is the mixin of syntactic and dataflow guided visitation. * * Overwrite the functions starting with `on` to implement your logic. * In general, there is just one special case that you need to be aware of: * * In the context of a function call, flowR may be unsure to which origin the call relates! * Consider the following example: * * ```r * if(u) foo <- library else foo <- rm * foo(x) * ``` * * Obtaining the origins of the call to `foo` will return both built-in functions `library` and `rm`. * The general semantic visitor cannot decide on how to combine these cases, * and it is up to your overload of {@link SemanticCfgGuidedVisitor#onDispatchFunctionCallOrigins|onDispatchFunctionCallOrigins} * to decide how to handle this. * * Use {@link BasicCfgGuidedVisitor#start} to start the traversal. */ class SemanticCfgGuidedVisitor extends dfg_cfg_guided_visitor_1.DataflowAwareCfgGuidedVisitor { /** * A helper function to get the normalized AST node for the given id or fail if it does not exist. */ getNormalizedAst(id) { return this.config.normalizedAst.idMap.get(id); } /** * See {@link DataflowAwareCfgGuidedVisitor#visitValue} for the base implementation. * This now dispatches the value to the appropriate event handler based on its type. */ visitValue(val) { super.visitValue(val); const astNode = this.getNormalizedAst(val.id); if (!astNode) { return; } switch (astNode.type) { case type_1.RType.String: return this.onStringConstant({ vertex: val, node: astNode }); case type_1.RType.Number: return this.onNumberConstant({ vertex: val, node: astNode }); case type_1.RType.Logical: return this.onLogicalConstant({ vertex: val, node: astNode }); case type_1.RType.Symbol: (0, assert_1.guard)(astNode.lexeme === 'NULL', `Expected NULL constant, got ${astNode.lexeme}`); return this.onNullConstant({ vertex: val, node: astNode }); } (0, assert_1.guard)(false, `Unexpected value type ${astNode.type} for value ${astNode.lexeme}`); } /** * See {@link DataflowAwareCfgGuidedVisitor#visitVariableUse} for the base implementation. * * This function is called for every use of a variable in the program and dispatches the appropriate event. * You probably do not have to overwrite it and just use {@link SemanticCfgGuidedVisitor#onVariableUse|`onVariableUse`} instead. * * @protected */ visitVariableUse(vertex) { super.visitVariableUse(vertex); this.onVariableUse({ vertex }); } /** * See {@link DataflowAwareCfgGuidedVisitor#visitVariableDefinition} for the base implementation. * * This function is called for every variable definition in the program and dispatches the appropriate event. * You probably do not have to overwrite it and just use {@link SemanticCfgGuidedVisitor#onVariableDefinition|`onVariableDefinition`} instead. * * @protected */ visitVariableDefinition(vertex) { super.visitVariableDefinition(vertex); this.onVariableDefinition({ vertex }); } /** * See {@link DataflowAwareCfgGuidedVisitor#visitFunctionDefinition} for the base implementation. * * This function is called for every function definition in the program and dispatches the appropriate event. * You probably do not have to overwrite it and just use {@link SemanticCfgGuidedVisitor#onFunctionDefinition|`onFunctionDefinition`} instead. * * @protected */ visitFunctionDefinition(vertex) { super.visitFunctionDefinition(vertex); const ast = this.getNormalizedAst(vertex.id); if (ast?.type === type_1.RType.FunctionDefinition) { this.onFunctionDefinition({ vertex, parameters: ast.parameters.map(p => p.info.id) }); } else { this.onFunctionDefinition({ vertex }); } } /** * See {@link DataflowAwareCfgGuidedVisitor#visitFunctionCall} for the base implementation. * * This function is called for every function call in the program and dispatches the appropriate event. * You probably do not have to overwrite it and just use {@link SemanticCfgGuidedVisitor#onUnnamedCall|`onUnnamedCall`} for anonymous calls, * or {@link SemanticCfgGuidedVisitor#onDispatchFunctionCallOrigins|`onDispatchFunctionCallOrigins`} for named calls (or just overwrite * the events you are interested in directly). * * @protected */ visitFunctionCall(vertex) { super.visitFunctionCall(vertex); if (vertex.origin === 'unnamed') { this.onUnnamedCall({ vertex }); } else { this.onDispatchFunctionCallOrigins(vertex, vertex.origin); } } /** * See {@link DataflowAwareCfgGuidedVisitor#visitUnknown} for the base implementation. * This function is called for every unknown vertex in the program. * It dispatches the appropriate event based on the type of the vertex. * In case you have to overwrite this function please make sure to still call this implementation to get a correctly working {@link SemanticCfgGuidedVisitor#onProgram|`onProgram`}. * * @protected */ visitUnknown(vertex) { super.visitUnknown(vertex); const ast = this.getNormalizedAst(vertex.id); if (ast && ast.type === type_1.RType.ExpressionList && ast.info.parent === undefined) { this.onProgram(ast); } } /** * Given a function call that has multiple targets (e.g., two potential built-in definitions). * This function is responsible for calling {@link onDispatchFunctionCallOrigin} for each of the origins, * and aggregating their results (which is just additive by default). * If you want to change the behavior in case of multiple potential function definition targets, simply overwrite this function * with the logic you desire. * * @protected */ onDispatchFunctionCallOrigins(call, origins) { for (const origin of origins) { this.onDispatchFunctionCallOrigin(call, origin); } } /** * This function is responsible for dispatching the appropriate event * based on a given dataflow vertex. The default serves as a backend * for the event functions, but you may overwrite and extend this function at will. * * @see {@link onDispatchFunctionCallOrigins} for the aggregation in case the function call target is ambiguous. * * @protected */ onDispatchFunctionCallOrigin(call, origin) { switch (origin) { case 'builtin:eval': return this.onEvalFunctionCall({ call }); case 'builtin:apply': return this.onApplyFunctionCall({ call }); case 'builtin:expression-list': return this.onExpressionList({ call }); case 'builtin:source': return this.onSourceCall({ call }); case 'builtin:access': return this.onAccessCall({ call }); case 'builtin:if-then-else': { // recover dead arguments from ast const ast = this.getNormalizedAst(call.id); if (!ast || ast.type !== type_1.RType.IfThenElse) { return this.onIfThenElseCall({ call, condition: call.args[0] === r_function_call_1.EmptyArgument ? undefined : call.args[0].nodeId, then: call.args[1] === r_function_call_1.EmptyArgument ? undefined : call.args[1].nodeId, else: call.args[2] === r_function_call_1.EmptyArgument ? undefined : call.args[2].nodeId }); } else { return this.onIfThenElseCall({ call, condition: ast.condition.info.id, then: ast.then.info.id, else: ast.otherwise?.info.id }); } } case 'builtin:get': return this.onGetCall({ call }); case 'builtin:rm': return this.onRmCall({ call }); case 'builtin:list': return this.onListCall({ call }); case 'builtin:vector': return this.onVectorCall({ call }); case 'table:assign': case 'builtin:assignment': { const outgoing = this.config.dfg.outgoingEdges(call.id); if (outgoing) { const target = [...outgoing.entries()].filter(([, e]) => (0, edge_1.edgeIncludesType)(e.types, edge_1.EdgeType.Returns)); if (target.length === 1) { const targetOut = this.config.dfg.outgoingEdges(target[0][0]); if (targetOut) { const source = [...targetOut.entries()].filter(([t, e]) => (0, edge_1.edgeIncludesType)(e.types, edge_1.EdgeType.DefinedBy) && t !== call.id); if (source.length === 1) { return this.onAssignmentCall({ call, target: target[0][0], source: source[0][0] }); } } } } return this.onAssignmentCall({ call, target: undefined, source: undefined }); } case 'builtin:special-bin-op': if (call.args.length !== 2) { return this.onSpecialBinaryOpCall({ call }); } return this.onSpecialBinaryOpCall({ call, lhs: call.args[0], rhs: call.args[1] }); case 'builtin:pipe': if (call.args.length !== 2) { return this.onPipeCall({ call }); } return this.onPipeCall({ call, lhs: call.args[0], rhs: call.args[1] }); case 'builtin:quote': return this.onQuoteCall({ call }); case 'builtin:for-loop': return this.onForLoopCall({ call, variable: call.args[0], vector: call.args[1], body: call.args[2] }); case 'builtin:repeat-loop': return this.onRepeatLoopCall({ call, body: call.args[0] }); case 'builtin:while-loop': return this.onWhileLoopCall({ call, condition: call.args[0], body: call.args[1] }); case 'builtin:replacement': { const outgoing = this.config.dfg.outgoingEdges(call.id); if (outgoing) { const target = [...outgoing.entries()].filter(([, e]) => (0, edge_1.edgeIncludesType)(e.types, edge_1.EdgeType.Returns)); if (target.length === 1) { const targetOut = this.config.dfg.outgoingEdges(target[0][0]); if (targetOut) { const source = [...targetOut.entries()].filter(([t, e]) => (0, edge_1.edgeIncludesType)(e.types, edge_1.EdgeType.DefinedBy) && t !== call.id); if (source.length === 1) { return this.onReplacementCall({ call, target: target[0][0], source: source[0][0] }); } } } } return this.onReplacementCall({ call, target: undefined, source: undefined }); } case 'builtin:library': return this.onLibraryCall({ call }); case 'builtin:default': default: return this.onDefaultFunctionCall({ call }); } } /** * This event is called for the root program node, i.e., the program that is being analyzed. * * @protected */ onProgram(_data) { } /** * A helper function to request the {@link getOriginInDfg|origins} of the given node. */ getOrigins(id) { return (0, dfg_get_origin_1.getOriginInDfg)(this.config.dfg, id); } /** Called for every occurrence of a `NULL` in the program. */ onNullConstant(_data) { } /** * Called for every constant string value in the program. * * For example, `"Hello World"` in `print("Hello World")`. */ onStringConstant(_data) { } /** * Called for every constant number value in the program. * * For example, `42` in `print(42)`. */ onNumberConstant(_data) { } /** * Called for every constant logical value in the program. * * For example, `TRUE` in `if(TRUE) { ... }`. */ onLogicalConstant(_data) { } /** * Called for every variable that is read within the program. * You can use {@link getOrigins} to get the origins of the variable. * * For example, `x` in `print(x)`. */ onVariableUse(_data) { } /** * Called for every variable that is written within the program. * You can use {@link getOrigins} to get the origins of the variable. * * For example, `x` in `x <- 42` or `x` in `assign("x", 42)`. * See {@link SemanticCfgGuidedVisitor#onAssignmentCall} for the assignment call. This event handler also provides you with information on the source. */ onVariableDefinition(_data) { } /** * Called for every anonymous function definition. * * For example, `function(x) { x + 1 }` in `lapply(1:10, function(x) { x + 1 })`. */ onFunctionDefinition(_data) { } /** * This event triggers for every anonymous call within the program. * * For example, `(function(x) { x + 1 })(42)` or the second call in `a()()`. * * This is separate from {@link SemanticCfgGuidedVisitor#onDefaultFunctionCall|`onDefaultFunctionCall`} which is used for named function calls that do not trigger any of these events. * The main differentiation for these calls is that you may not infer their semantics from any name alone and probably _have_ * to rely on {@link SemanticCfgGuidedVisitor#getOrigins|`getOrigins`} to get more information. * * @protected */ onUnnamedCall(_data) { } /** * This event triggers for every function call that is not handled by a specific overload, * and hence may be a function that targets a user-defined function. In a way, these are functions that are named, * but flowR does not specifically care about them (currently) wrt. to their dataflow impact. * * Use {@link SemanticCfgGuidedVisitor#getOrigins|`getOrigins`} to get the origins of the call. * * For example, this triggers for `foo(x)` in * * ```r * foo <- function(x) { x + 1 } * foo(x) * ``` * * This explicitly will not trigger for scenarios in which the function has no name (i.e., if it is anonymous). * For such cases, you may rely on the {@link SemanticCfgGuidedVisitor#onUnnamedCall|`onUnnamedCall`} event. * The main reason for this separation is part of flowR's handling of these functions, as anonymous calls cannot be resolved using the active environment. * * @protected */ onDefaultFunctionCall(_data) { } /** * This event triggers for every call to the `eval` function. * * For example, `eval` in `eval(parse(text = "x + 1"))`. * * More specifically, this relates to the corresponding {@link BuiltInProcessorMapper} handler. * * @protected */ onEvalFunctionCall(_data) { } /** * This event triggers for every call to any of the `*apply` functions. * * For example, `lapply` in `lapply(1:10, function(x) { x + 1 })`. * * More specifically, this relates to the corresponding {@link BuiltInProcessorMapper} handler. * * @protected */ onApplyFunctionCall(_data) { } /** * This event triggers for every expression list - implicit or explicit, _but_ not for the root program (see {@link SemanticCfgGuidedVisitor#onProgram|`onProgram`} for that). * * For example, this triggers for the expression list created by `{` and `}` in `ìf (TRUE) { x <- 1; y <- 2; }`. But also for the implicit * expression list `x <- x + 1` in `for(x in 1:10) x <- x + 1`. * * @protected */ onExpressionList(_data) { } /** * This event triggers for every call to the `source` function. * * For example, `source` in `source("script.R")`. * * By default, this does not provide the resolved source file. Yet you can access the {@link DataflowGraph} to ask for sourced files. * * More specifically, this relates to the corresponding {@link BuiltInProcessorMapper} handler. * * @protected */ onSourceCall(_data) { } /** * This event triggers for every subsetting call, i.e., for every call to `[[`, `[`, or `$`. * * @protected */ onAccessCall(_data) { } /** * This event triggers for every call to the `if` function, which is used to implement the `if-then-else` control flow. * * @protected */ onIfThenElseCall(_data) { } /** * This event triggers for every call to the `get` function, which is used to access variables in the global environment. * * For example, `get` in `get("x")`. * * Please be aware, that with flowR resolving the `get` during the dataflow analysis, * this may very well trigger a {@link SemanticCfgGuidedVisitor#onVariableUse|`onVariableUse`} event as well. * * @protected */ onGetCall(_data) { } /** * This event triggers for every call to the `rm` function, which is used to remove variables from the environment. * * For example, `rm` in `rm(x)`. * * @protected */ onRmCall(_data) { } /** * This event triggers for every call to a function which loads a library. * * For example, `library` in `library(dplyr)`. * * More specifically, this relates to the corresponding {@link BuiltInProcessorMapper} handler. * * @protected */ onLibraryCall(_data) { } /** * This event triggers for every assignment call, i.e., for every call to `<-` or `=` that assigns a value to a variable. * * For example, this triggers for `<-` in `x <- 42` or `assign` in `assign("x", 42)`. * This also triggers for the `data.table` assign `:=` active within subsetting calls, e.g., `DT[, x := 42]`. * * Please be aware that replacements (e.g. assignments with a function call on the target side) like `names(x) <- 3` are subject to {@link SemanticCfgGuidedVisitor#onReplacementCall|`onReplacementCall`} instead. * @protected */ onAssignmentCall(_data) { } /** * This event triggers for every call to a special binary operator, i.e., every binary function call that starts and ends with a `%` sign. * * For example, this triggers for`%in%` in `x %in% y`. * * More specifically, this relates to the corresponding {@link BuiltInProcessorMapper} handler. * * @protected */ onSpecialBinaryOpCall(_data) { } /** * This event triggers for every call to R's pipe operator, i.e., for every call to `|>`. * * @protected */ onPipeCall(_data) { } /** * This event triggers for every call to the `quote` function, which is used to quote expressions. * * For example, `quote` in `quote(x + 1)`. * * More specifically, this relates to the corresponding {@link BuiltInProcessorMapper} handler. * * @protected */ onQuoteCall(_data) { } /** * This event triggers for every call to the `for` loop function, which is used to implement the `for` loop control flow. * * For example, this triggers for `for` in `for(i in 1:10) { print(i) }`. * * More specifically, this relates to the corresponding {@link BuiltInProcessorMapper} handler. * * @protected */ onForLoopCall(_data) { } /** * This event triggers for every call to the `while` loop function, which is used to implement the `while` loop control flow. * * For example, this triggers for `while` in `while(i < 10) { i <- i + 1 }`. * * More specifically, this relates to the corresponding {@link BuiltInProcessorMapper} handler. * * @protected */ onWhileLoopCall(_data) { } /** * This event triggers for every call to the `repeat` loop function, which is used to implement the `repeat` loop control flow. * * For example, this triggers for `repeat` in `repeat { i <- i + 1; if(i >= 10) break }`. * * More specifically, this relates to the corresponding {@link BuiltInProcessorMapper} handler. * * @protected */ onRepeatLoopCall(_data) { } /** * This event triggers for every call to a function that replaces a value in a container, such as `names(x) <- 3`. * * This is different from {@link SemanticCfgGuidedVisitor#onAssignmentCall|`onAssignmentCall`} in that it does not assign a value to a variable, * but rather replaces a value in a container. * * For example, this triggers for `names` in `names(x) <- 3`, but not for `x <- 3`. * * More specifically, this relates to the corresponding {@link BuiltInProcessorMapper} handler. * @protected */ onReplacementCall(_data) { } /** * This event triggers for every call that (to the knowledge of flowr) constructs a (new) list. * * For example, this triggers for `list` in `list(1, 2, 3)`. * * More specifically, this relates to the corresponding {@link BuiltInProcessorMapper} handler. * * @protected */ onListCall(_data) { } /** * This event triggers for every call that (to the knowledge of flowr) constructs a (new) vector. * * For example, this triggers for `c` in `c(1, 2, 3)`. * * More specifically, this relates to the corresponding {@link BuiltInProcessorMapper} handler. * * @protected */ onVectorCall(_data) { } } exports.SemanticCfgGuidedVisitor = SemanticCfgGuidedVisitor; //# sourceMappingURL=semantic-cfg-guided-visitor.js.map