UNPKG

@eagleoutice/flowr

Version:

Static Dataflow Analyzer and Program Slicer for the R Programming Language

318 lines 16 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.AbstractInterpretationVisitor = void 0; const control_flow_graph_1 = require("../control-flow/control-flow-graph"); const semantic_cfg_guided_visitor_1 = require("../control-flow/semantic-cfg-guided-visitor"); const vertex_1 = require("../dataflow/graph/vertex"); const model_1 = require("../r-bridge/lang-4.x/ast/model/model"); const r_function_call_1 = require("../r-bridge/lang-4.x/ast/model/nodes/r-function-call"); const type_1 = require("../r-bridge/lang-4.x/ast/model/type"); const assert_1 = require("../util/assert"); const abstract_domain_1 = require("./domains/abstract-domain"); const state_abstract_domain_1 = require("./domains/state-abstract-domain"); const df_helper_1 = require("../dataflow/graph/df-helper"); const built_in_proc_name_1 = require("../dataflow/environments/built-in-proc-name"); /** * A control flow graph visitor to perform abstract interpretation. * * However, the visitor does not yet support inter-procedural abstract interpretation and abstract condition semantics. */ class AbstractInterpretationVisitor extends semantic_cfg_guided_visitor_1.SemanticCfgGuidedVisitor { /** * The abstract trace of the abstract interpretation visitor mapping node IDs to the abstract state at the respective node. */ trace = new Map(); /** * The current abstract state domain at the currently processed AST node. */ _currentState; /** * A set of nodes representing variable definitions that have already been visited but whose assignment has not yet been processed. */ unassigned = new Set(); /** * Whether the current abstract state has been copied/cloned and is save to modify in place. */ stateCopied = false; constructor(config) { super({ ...config, defaultVisitingOrder: 'forward', defaultVisitingType: 'exit' }); this._currentState = new state_abstract_domain_1.MutableStateAbstractDomain(new Map()); } get currentState() { return this._currentState; } removeState(node) { if (!this.stateCopied) { this._currentState = this._currentState.create(this.currentState.value); this.stateCopied = true; } this._currentState.remove(node); } updateState(node, value) { if (!this.stateCopied) { this._currentState = this._currentState.create(this.currentState.value); this.stateCopied = true; } this._currentState.set(node, value); } /** * Resolves the inferred abstract value of an AST node. * This requires that the abstract interpretation visitor has been completed, or at least started. * @param id - The ID of the node to get the inferred value for * @param state - An optional state abstract domain used to resolve the inferred abstract value (defaults to the state at the requested node) * @returns The inferred abstract value of the node, or `undefined` if no value was inferred for the node */ getAbstractValue(id, state) { const node = (id === undefined || typeof id === 'object') ? id : this.getNormalizedAst(id); state ??= node !== undefined ? this.getAbstractState(node.info.id) : undefined; if (node === undefined) { return; } else if (state?.has(node.info.id)) { return state.get(node.info.id); } const vertex = this.getDataflowGraph(node.info.id); const call = (0, vertex_1.isFunctionCallVertex)(vertex) ? vertex : undefined; const origins = Array.isArray(call?.origin) ? call.origin : []; if (node.type === type_1.RType.Symbol) { const values = this.getVariableOrigins(node.info.id).map(origin => state?.get(origin)); if (values.length > 0 && values.every(assert_1.isNotUndefined)) { return abstract_domain_1.AbstractDomain.joinAll(values); } } else if (node.type === type_1.RType.Argument && node.value !== undefined) { return this.getAbstractValue(node.value, state); } else if (node.type === type_1.RType.ExpressionList && node.children.length > 0) { return this.getAbstractValue(node.children.at(-1), state); } else if (origins.includes(built_in_proc_name_1.BuiltInProcName.Pipe)) { if (node.type === type_1.RType.Pipe || node.type === type_1.RType.BinaryOp) { return this.getAbstractValue(node.rhs, state); } else if (call?.args.length === 2 && call?.args[1] !== r_function_call_1.EmptyArgument) { return this.getAbstractValue(call.args[1].nodeId, state); } } else if (origins.includes(built_in_proc_name_1.BuiltInProcName.IfThenElse)) { let values = []; if (node.type === type_1.RType.IfThenElse && node.otherwise !== undefined) { values = [node.then, node.otherwise].map(entry => this.getAbstractValue(entry, state)); } else if (call?.args.every(arg => arg !== r_function_call_1.EmptyArgument) && call.args.length === 3) { values = call.args.slice(1, 3).map(entry => this.getAbstractValue(entry.nodeId, state)); } if (values.length > 0 && values.every(assert_1.isNotUndefined)) { return abstract_domain_1.AbstractDomain.joinAll(values); } } } /** * Gets the inferred abstract state at the location of a specific AST node. * This requires that the abstract interpretation visitor has been completed, or at least started. * @param id - The ID of the node to get the abstract state at * @returns The abstract state at the node, or `undefined` if the node has no abstract state (i.e. the node has not been visited or is unreachable). */ getAbstractState(id) { return id === undefined ? undefined : this.trace.get(id); } /** * Gets the inferred abstract state at the end of the program (exit nodes of the control flow graph). * This requires that the abstract interpretation visitor has been completed, or at least started. * @returns The inferred abstract state at the end of the program */ getEndState() { const exitPoints = this.config.controlFlow.exitPoints.map(id => this.getCfgVertex(id)).filter(assert_1.isNotUndefined); const exitNodes = exitPoints.map(control_flow_graph_1.CfgVertex.getRootId).filter(assert_1.isNotUndefined); const states = exitNodes.map(node => this.trace.get(node)).filter(assert_1.isNotUndefined); return abstract_domain_1.AbstractDomain.joinAll(states, this._currentState.top()); } /** * Gets the inferred abstract trace mapping AST nodes to the inferred abstract state at the respective node. * @returns The inferred abstract trace of the program */ getAbstractTrace() { return this.trace; } start() { (0, assert_1.guard)(this.trace.size === 0, 'Abstract interpretation visitor has already been started'); super.start(); this.unassigned.clear(); } startVisitor(start) { const stack = Array.from(start); while (stack.length > 0) { const current = stack.pop(); if (!this.visitNode(current)) { continue; } for (const next of this.config.controlFlow.graph.ingoingEdges(current)?.keys().toArray().reverse() ?? []) { if (!stack.includes(next)) { // prevent double entries in working list stack.push(next); } } } } visitNode(vertexId) { const vertex = this.getCfgVertex(vertexId); // skip exit vertices of widening points and entry vertices of complex nodes if (vertex === undefined || this.shouldSkipVertex(vertex)) { return true; } const predecessors = this.getPredecessorNodes(control_flow_graph_1.CfgVertex.getId(vertex)); const predecessorStates = predecessors.map(pred => this.trace.get(pred)).filter(assert_1.isNotUndefined); // retrieve new abstract state by joining states of predecessor nodes if (predecessorStates.length === 1) { this._currentState = predecessorStates[0]; } else { this._currentState = abstract_domain_1.AbstractDomain.joinAll(predecessorStates, this._currentState.top()); this.stateCopied = true; } const nodeId = control_flow_graph_1.CfgVertex.getRootId(vertex); // differentiate between widening points and other vertices if (this.isWideningPoint(nodeId)) { const oldState = this.trace.get(nodeId) ?? this._currentState.top(); if (this.shouldWiden(vertex)) { this._currentState = oldState.widen(this._currentState); this.stateCopied = true; } this.trace.set(nodeId, this._currentState); this.stateCopied = false; const visitedCount = this.visited.get(nodeId) ?? 0; this.visited.set(nodeId, visitedCount + 1); // continue visiting after widening point if visited for the first time or the state changed return visitedCount === 0 || !oldState.equals(this._currentState); } else { this.onVisitNode(vertexId); // discard the inferred abstract state when encountering functions with unknown side effects (e.g. `eval`) if (this.config.dfg.unknownSideEffects.has(nodeId)) { this._currentState = this._currentState.top(); this.stateCopied = true; } this.trace.set(nodeId, this._currentState); this.stateCopied = false; const predecessorVisits = predecessors.map(pred => this.visited.get(pred) ?? 0); const visitedCount = this.visited.get(nodeId) ?? 0; this.visited.set(nodeId, visitedCount + 1); // continue visiting if vertex is not a join vertex or number of visits of predecessors is the same return predecessors.length <= 1 || predecessorVisits.every(visits => visits === predecessorVisits[0]); } } onDispatchFunctionCallOrigin(call, origin) { super.onDispatchFunctionCallOrigin(call, origin); switch (origin) { case built_in_proc_name_1.BuiltInProcName.ExpressionList: case built_in_proc_name_1.BuiltInProcName.IfThenElse: case built_in_proc_name_1.BuiltInProcName.ForLoop: case built_in_proc_name_1.BuiltInProcName.WhileLoop: case built_in_proc_name_1.BuiltInProcName.RepeatLoop: case built_in_proc_name_1.BuiltInProcName.FunctionDefinition: case built_in_proc_name_1.BuiltInProcName.Assignment: case built_in_proc_name_1.BuiltInProcName.AssignmentLike: case built_in_proc_name_1.BuiltInProcName.TableAssignment: case built_in_proc_name_1.BuiltInProcName.Replacement: case built_in_proc_name_1.BuiltInProcName.Access: case built_in_proc_name_1.BuiltInProcName.Pipe: case built_in_proc_name_1.BuiltInProcName.Break: case built_in_proc_name_1.BuiltInProcName.Return: return; default: return this.onFunctionCall({ call }); } } onVariableDefinition({ vertex }) { if (this.currentState.get(vertex.id) === undefined) { this.unassigned.add(vertex.id); } } onAssignmentCall({ target, source }) { if (target === undefined || source === undefined) { return; } const value = this.getAbstractValue(source); this.unassigned.delete(target); if (value !== undefined) { this.updateState(target, value); this.trace.set(target, this._currentState); this.stateCopied = false; } } onReplacementCall({ target }) { if (target !== undefined) { this.unassigned.delete(target); } } /** * This event triggers for every function call that is not a condition, loop, assignment, replacement call, or access operation. * * * For example, this triggers for `data.frame` in `x <- data.frame(id = 1:5, name = letters[1:5])`. * * This bundles all function calls that are no conditions, loops, assignments, replacement calls, and access operations. * @protected */ onFunctionCall(_data) { } /** Gets all AST nodes for the predecessor vertices that are leaf nodes and exit vertices */ getPredecessorNodes(vertexId) { return this.config.controlFlow.graph.outgoingEdges(vertexId)?.keys() // outgoing dependency edges are ingoing CFG edges .map(id => this.getCfgVertex(id)) .flatMap(vertex => { if (vertex === undefined) { return []; } else if (this.shouldSkipVertex(vertex)) { return this.getPredecessorNodes(control_flow_graph_1.CfgVertex.getId(vertex)); } else { return [control_flow_graph_1.CfgVertex.getRootId(vertex)]; } }) .toArray() ?? []; } /** Gets each variable origin that has already been visited and whose assignment has already been processed */ getVariableOrigins(nodeId) { return df_helper_1.Dataflow.origin(this.config.dfg, nodeId) ?.filter(origin => origin.type === 0 /* OriginType.ReadVariableOrigin */) .map(origin => origin.id) .filter(origin => this.trace.has(origin) && !this.unassigned.has(origin)) ?? []; } /** We only perform widening at `for`, `while`, or `repeat` loops with more than one ingoing CFG edge */ isWideningPoint(nodeId) { const ingoingEdges = this.config.controlFlow.graph.outgoingEdges(nodeId)?.size; // outgoing dependency edges are ingoing CFG edges if (ingoingEdges === undefined || ingoingEdges <= 1) { return false; } const node = this.getNormalizedAst(nodeId); if (model_1.RLoopConstructs.is(node)) { return true; } const dataflowVertex = this.getDataflowGraph(nodeId); if (dataflowVertex?.tag !== vertex_1.VertexType.FunctionCall || !Array.isArray(dataflowVertex.origin)) { return false; } const origin = dataflowVertex.origin; return origin.includes(built_in_proc_name_1.BuiltInProcName.ForLoop) || origin.includes(built_in_proc_name_1.BuiltInProcName.WhileLoop) || origin.includes(built_in_proc_name_1.BuiltInProcName.RepeatLoop); } /** * Checks whether a control flow graph vertex should be skipped during visitation. * By default, we only process entry vertices of widening points, vertices of leaf nodes, and exit vertices (no entry nodes of complex nodes). */ shouldSkipVertex(vertex) { if (this.isWideningPoint(control_flow_graph_1.CfgVertex.getRootId(vertex))) { // skip exit vertices of widening points return control_flow_graph_1.CfgVertex.isMarker(vertex); } return !control_flow_graph_1.CfgVertex.isMarker(vertex) && !control_flow_graph_1.CfgVertex.isBlock(vertex) && control_flow_graph_1.CfgVertex.getEnd(vertex) !== undefined; } /** * Whether widening should be performed at a widening point. * By default, we perform widening when the number of visits of the widening point reaches the widening threshold of the config. */ shouldWiden(wideningPoint) { return (this.visited.get(control_flow_graph_1.CfgVertex.getId(wideningPoint)) ?? 0) >= this.config.ctx.config.abstractInterpretation.wideningThreshold; } } exports.AbstractInterpretationVisitor = AbstractInterpretationVisitor; //# sourceMappingURL=absint-visitor.js.map