UNPKG

@eagleoutice/flowr

Version:

Static Dataflow Analyzer and Program Slicer for the R Programming Language

326 lines 17 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.AbstractInterpretationVisitor = void 0; const control_flow_graph_1 = require("../control-flow/control-flow-graph"); const semantic_cfg_guided_visitor_1 = require("../control-flow/semantic-cfg-guided-visitor"); const built_in_proc_name_1 = require("../dataflow/environments/built-in-proc-name"); const df_helper_1 = require("../dataflow/graph/df-helper"); const vertex_1 = require("../dataflow/graph/vertex"); const model_1 = require("../r-bridge/lang-4.x/ast/model/model"); const r_function_call_1 = require("../r-bridge/lang-4.x/ast/model/nodes/r-function-call"); const type_1 = require("../r-bridge/lang-4.x/ast/model/type"); const assert_1 = require("../util/assert"); const abstract_domain_1 = require("./domains/abstract-domain"); const unsupported_functions_1 = require("./unsupported-functions"); /** * A control flow graph visitor to perform abstract interpretation. * * However, the visitor does not yet support inter-procedural abstract interpretation and abstract condition semantics. */ class AbstractInterpretationVisitor extends semantic_cfg_guided_visitor_1.SemanticCfgGuidedVisitor { /** * The abstract trace of the abstract interpretation visitor mapping node IDs to the abstract state at the respective node. */ trace = new Map(); /** * The current abstract state domain at the currently processed AST node. */ currentState; /** * The current worklist stack of next vertex IDs to visit. */ stack = []; /** * A set of nodes representing variable definitions that have already been visited but whose assignment has not yet been processed. */ unassigned = new Set(); /** * A map mapping assignments of replacement calls to their replacement calls for replacement calls that have already been visited but whose assignment has not yet been processed. */ replacements = new Map(); constructor(config, stateDomain) { super({ ...config, defaultVisitingOrder: 'forward', defaultVisitingType: 'exit' }); this.currentState = stateDomain.top(); } /** * Resolves the inferred abstract value of an AST node. * This requires that the abstract interpretation visitor has been completed, or at least started. * @param id - The ID of the node to get the inferred value for * @param state - An optional state abstract domain used to resolve the inferred abstract value (defaults to the state at the requested node) * @returns The inferred abstract value of the node, or `undefined` if no value was inferred for the node */ getAbstractValue(id, state) { const node = (id === undefined || typeof id === 'object') ? id : this.getNormalizedAst(id); state ??= node !== undefined ? this.getAbstractState(node.info.id) : undefined; if (state?.isBottom()) { return this.currentState.domain.bottom(); } else if (node === undefined) { return; } else if (state?.has(node.info.id)) { return state.get(node.info.id); } const vertex = this.getDataflowGraph(node.info.id); const call = (0, vertex_1.isFunctionCallVertex)(vertex) ? vertex : undefined; const origins = Array.isArray(call?.origin) ? call.origin : []; if (node.type === type_1.RType.Symbol) { const values = this.getVariableOrigins(node.info.id) .map(origin => (this.getAbstractState(origin)?.isBottom() ? this.currentState.domain.bottom() : state?.get(origin))); if (values.length > 0 && values.every(assert_1.isNotUndefined)) { return abstract_domain_1.AbstractDomain.joinAll(values); } } else if (node.type === type_1.RType.Argument && node.value !== undefined) { return this.getAbstractValue(node.value, state); } else if (node.type === type_1.RType.ExpressionList && node.children.length > 0) { return this.getAbstractValue(node.children.at(-1), state); } else if (origins.includes(built_in_proc_name_1.BuiltInProcName.Pipe)) { if (node.type === type_1.RType.Pipe || node.type === type_1.RType.BinaryOp) { return this.getAbstractValue(node.rhs, state); } else if (call?.args.length === 2 && call?.args[1] !== r_function_call_1.EmptyArgument) { return this.getAbstractValue(call.args[1].nodeId, state); } } else if (origins.includes(built_in_proc_name_1.BuiltInProcName.IfThenElse)) { let values = []; if (node.type === type_1.RType.IfThenElse && node.otherwise !== undefined) { values = [node.then, node.otherwise].map(entry => this.getAbstractValue(entry, state)); } else if (call?.args.every(arg => arg !== r_function_call_1.EmptyArgument) && call.args.length === 3) { values = call.args.slice(1, 3).map(entry => this.getAbstractValue(entry.nodeId, state)); } if (values.length > 0 && values.every(assert_1.isNotUndefined)) { return abstract_domain_1.AbstractDomain.joinAll(values); } } } /** * Gets the inferred abstract state at the location of a specific AST node. * This requires that the abstract interpretation visitor has been completed, or at least started. * @param id - The ID of the node to get the abstract state at * @returns The abstract state at the node, or `undefined` if the node has no abstract state (i.e. the node has not been visited or is unreachable). */ getAbstractState(id) { return id === undefined ? undefined : this.trace.get(id); } /** * Gets the inferred abstract state at the end of the program (exit nodes of the control flow graph). * This requires that the abstract interpretation visitor has been completed, or at least started. * @returns The inferred abstract state at the end of the program */ getEndState() { const exitPoints = this.config.controlFlow.exitPoints.map(id => this.getCfgVertex(id)).filter(assert_1.isNotUndefined); const exitNodes = exitPoints.map(control_flow_graph_1.CfgVertex.getRootId).filter(assert_1.isNotUndefined); const states = exitNodes.map(node => this.trace.get(node)).filter(assert_1.isNotUndefined); return abstract_domain_1.AbstractDomain.joinAll(states, this.currentState.bottom()); } /** * Gets the inferred abstract trace mapping AST nodes to the inferred abstract state at the respective node. * @returns The inferred abstract trace of the program */ getAbstractTrace() { return this.trace; } start() { (0, assert_1.guard)(this.trace.size === 0, 'Abstract interpretation visitor has already been started'); super.start(); this.unassigned.clear(); } startVisitor(start) { this.stack = Array.from(start); while (this.stack.length > 0) { const current = this.stack.pop(); if (!this.visitNode(current)) { continue; } const successors = this.config.controlFlow.graph.ingoingEdges(current)?.keys().toArray().reverse() ?? []; for (const next of successors) { if (!this.stack.includes(next)) { // prevent double entries in working list this.stack.push(next); } } } } visitNode(vertexId) { const vertex = this.getCfgVertex(vertexId); // skip exit vertices of widening points and entry vertices of complex nodes if (vertex === undefined || this.shouldSkipVertex(vertex)) { return true; } // retrieve new abstract state by joining states of predecessor nodes const predecessors = this.getPredecessorNodes(control_flow_graph_1.CfgVertex.getId(vertex)); const predecessorStates = predecessors.map(pred => this.trace.get(pred)).filter(assert_1.isNotUndefined); this.currentState = abstract_domain_1.AbstractDomain.joinAll(predecessorStates, this.currentState.top()); const nodeId = control_flow_graph_1.CfgVertex.getRootId(vertex); // differentiate between widening points and other vertices if (this.isWideningPoint(nodeId)) { const oldState = this.trace.get(nodeId); if (oldState !== undefined && this.shouldWiden(vertex)) { this.currentState = oldState.widen(this.currentState); } this.trace.set(nodeId, this.currentState); const visitedCount = this.visited.get(nodeId) ?? 0; this.visited.set(nodeId, visitedCount + 1); // continue visiting after widening point if visited for the first time or the state changed return visitedCount === 0 || !oldState?.equals(this.currentState); } else { this.onVisitNode(vertexId); // discard the inferred abstract state when encountering unsupported function calls if (this.isUnsupportedFunctionCall(nodeId)) { this.currentState = this.currentState.top(); } this.trace.set(nodeId, this.currentState); const predecessorVisits = predecessors.map(pred => this.visited.get(pred) ?? 0); const visitedCount = this.visited.get(nodeId) ?? 0; this.visited.set(nodeId, visitedCount + 1); // continue visiting if vertex is not a join vertex or number of visits of predecessors is the same return predecessors.length <= 1 || this.stack.length === 0 || predecessorVisits.every(visits => visits === predecessorVisits[0]); } } visitUnknown(vertex) { const nodeId = control_flow_graph_1.CfgVertex.getRootId(vertex); const replacements = this.replacements.get(nodeId); if (replacements !== undefined) { this.replacements.delete(nodeId); for (const replacement of replacements) { const call = this.getDataflowGraph(replacement); if ((0, vertex_1.isFunctionCallVertex)(call)) { this.onReplacementCall({ call, ...this.getSourceAndTarget(call) }); } } } } onDispatchFunctionCallOrigin(call, origin) { if (origin === built_in_proc_name_1.BuiltInProcName.Replacement) { const node = this.getNormalizedAst(call.id); const assignment = model_1.RNode.iterateParents(node, this.config.normalizedAst.idMap) .find(parent => this.getDataflowGraph(parent.info.id) === undefined); if (node !== undefined && assignment !== undefined) { const replacements = this.replacements.get(assignment.info.id) ?? []; replacements.push(node.info.id); this.replacements.set(assignment.info.id, replacements); return; } } super.onDispatchFunctionCallOrigin(call, origin); switch (origin) { case built_in_proc_name_1.BuiltInProcName.ExpressionList: case built_in_proc_name_1.BuiltInProcName.IfThenElse: case built_in_proc_name_1.BuiltInProcName.ForLoop: case built_in_proc_name_1.BuiltInProcName.WhileLoop: case built_in_proc_name_1.BuiltInProcName.RepeatLoop: case built_in_proc_name_1.BuiltInProcName.FunctionDefinition: case built_in_proc_name_1.BuiltInProcName.Assignment: case built_in_proc_name_1.BuiltInProcName.AssignmentLike: case built_in_proc_name_1.BuiltInProcName.TableAssignment: case built_in_proc_name_1.BuiltInProcName.Replacement: case built_in_proc_name_1.BuiltInProcName.Access: case built_in_proc_name_1.BuiltInProcName.Pipe: case built_in_proc_name_1.BuiltInProcName.Break: case built_in_proc_name_1.BuiltInProcName.Return: return; default: return this.onFunctionCall({ call }); } } onVariableDefinition({ vertex }) { if (this.currentState.get(vertex.id) === undefined) { this.unassigned.add(vertex.id); } } onAssignmentCall({ target, source }) { if (target === undefined || source === undefined) { return; } const value = this.getAbstractValue(source); this.unassigned.delete(target); if (value !== undefined) { this.currentState.set(target, value); this.trace.set(target, this.currentState); } } onReplacementCall({ target }) { if (target !== undefined) { this.unassigned.delete(target); } } /** * This event triggers for every function call that is not a condition, loop, assignment, replacement call, or access operation. * * * For example, this triggers for `data.frame` in `x <- data.frame(id = 1:5, name = letters[1:5])`. * * This bundles all function calls that are no conditions, loops, assignments, replacement calls, and access operations. * @protected */ onFunctionCall(_data) { } /** Gets all AST nodes for the predecessor vertices that are leaf nodes and exit vertices */ getPredecessorNodes(vertexId) { return this.config.controlFlow.graph.outgoingEdges(vertexId)?.keys() // outgoing dependency edges are ingoing CFG edges .map(id => this.getCfgVertex(id)) .flatMap(vertex => { if (vertex === undefined) { return []; } else if (this.shouldSkipVertex(vertex)) { return this.getPredecessorNodes(control_flow_graph_1.CfgVertex.getId(vertex)); } else { return [control_flow_graph_1.CfgVertex.getRootId(vertex)]; } }) .toArray() ?? []; } /** Gets each variable origin that has already been visited and whose assignment has already been processed */ getVariableOrigins(nodeId) { return df_helper_1.Dataflow.origin(this.config.dfg, nodeId) ?.filter(origin => origin.type === 0 /* OriginType.ReadVariableOrigin */) .map(origin => origin.id) .filter(origin => this.trace.has(origin) && !this.unassigned.has(origin)) ?? []; } /** Checks whether a node represents a unsupported (environment-changing) function call (e.g. `eval`, `load`, `attach`, `rm`, ...) */ isUnsupportedFunctionCall(nodeId) { return unsupported_functions_1.UnsupportedFunctions.isUnsupportedCall(this.getDataflowGraph(nodeId)); } /** We only perform widening at `for`, `while`, or `repeat` loops with more than one ingoing CFG edge */ isWideningPoint(nodeId) { const ingoingEdges = this.config.controlFlow.graph.outgoingEdges(nodeId)?.size; // outgoing dependency edges are ingoing CFG edges if (ingoingEdges === undefined || ingoingEdges <= 1) { return false; } else if (model_1.RLoopConstructs.is(this.getNormalizedAst(nodeId))) { return true; } const dataflowVertex = this.getDataflowGraph(nodeId); if (dataflowVertex?.tag !== vertex_1.VertexType.FunctionCall || !Array.isArray(dataflowVertex.origin)) { return false; } const origin = dataflowVertex.origin; return origin.includes(built_in_proc_name_1.BuiltInProcName.ForLoop) || origin.includes(built_in_proc_name_1.BuiltInProcName.WhileLoop) || origin.includes(built_in_proc_name_1.BuiltInProcName.RepeatLoop); } /** * Checks whether a control flow graph vertex should be skipped during visitation. * By default, we only process entry vertices of widening points, vertices of leaf nodes, and exit vertices (no entry nodes of complex nodes). */ shouldSkipVertex(vertex) { if (this.isWideningPoint(control_flow_graph_1.CfgVertex.getRootId(vertex))) { // skip exit vertices of widening points return control_flow_graph_1.CfgVertex.isMarker(vertex); } return !control_flow_graph_1.CfgVertex.isMarker(vertex) && !control_flow_graph_1.CfgVertex.isBlock(vertex) && control_flow_graph_1.CfgVertex.getEnd(vertex) !== undefined; } /** * Whether widening should be performed at a widening point. * By default, we perform widening when the number of visits of the widening point reaches the widening threshold of the config. */ shouldWiden(wideningPoint) { return (this.visited.get(control_flow_graph_1.CfgVertex.getId(wideningPoint)) ?? 0) >= this.config.ctx.config.abstractInterpretation.wideningThreshold; } } exports.AbstractInterpretationVisitor = AbstractInterpretationVisitor; //# sourceMappingURL=absint-visitor.js.map