@eagleoutice/flowr
Version:
Static Dataflow Analyzer and Program Slicer for the R Programming Language
326 lines • 17 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.AbstractInterpretationVisitor = void 0;
const control_flow_graph_1 = require("../control-flow/control-flow-graph");
const semantic_cfg_guided_visitor_1 = require("../control-flow/semantic-cfg-guided-visitor");
const built_in_proc_name_1 = require("../dataflow/environments/built-in-proc-name");
const df_helper_1 = require("../dataflow/graph/df-helper");
const vertex_1 = require("../dataflow/graph/vertex");
const model_1 = require("../r-bridge/lang-4.x/ast/model/model");
const r_function_call_1 = require("../r-bridge/lang-4.x/ast/model/nodes/r-function-call");
const type_1 = require("../r-bridge/lang-4.x/ast/model/type");
const assert_1 = require("../util/assert");
const abstract_domain_1 = require("./domains/abstract-domain");
const unsupported_functions_1 = require("./unsupported-functions");
/**
* A control flow graph visitor to perform abstract interpretation.
*
* However, the visitor does not yet support inter-procedural abstract interpretation and abstract condition semantics.
*/
class AbstractInterpretationVisitor extends semantic_cfg_guided_visitor_1.SemanticCfgGuidedVisitor {
/**
* The abstract trace of the abstract interpretation visitor mapping node IDs to the abstract state at the respective node.
*/
trace = new Map();
/**
* The current abstract state domain at the currently processed AST node.
*/
currentState;
/**
* The current worklist stack of next vertex IDs to visit.
*/
stack = [];
/**
* A set of nodes representing variable definitions that have already been visited but whose assignment has not yet been processed.
*/
unassigned = new Set();
/**
* A map mapping assignments of replacement calls to their replacement calls for replacement calls that have already been visited but whose assignment has not yet been processed.
*/
replacements = new Map();
constructor(config, stateDomain) {
super({ ...config, defaultVisitingOrder: 'forward', defaultVisitingType: 'exit' });
this.currentState = stateDomain.top();
}
/**
* Resolves the inferred abstract value of an AST node.
* This requires that the abstract interpretation visitor has been completed, or at least started.
* @param id - The ID of the node to get the inferred value for
* @param state - An optional state abstract domain used to resolve the inferred abstract value (defaults to the state at the requested node)
* @returns The inferred abstract value of the node, or `undefined` if no value was inferred for the node
*/
getAbstractValue(id, state) {
const node = (id === undefined || typeof id === 'object') ? id : this.getNormalizedAst(id);
state ??= node !== undefined ? this.getAbstractState(node.info.id) : undefined;
if (state?.isBottom()) {
return this.currentState.domain.bottom();
}
else if (node === undefined) {
return;
}
else if (state?.has(node.info.id)) {
return state.get(node.info.id);
}
const vertex = this.getDataflowGraph(node.info.id);
const call = (0, vertex_1.isFunctionCallVertex)(vertex) ? vertex : undefined;
const origins = Array.isArray(call?.origin) ? call.origin : [];
if (node.type === type_1.RType.Symbol) {
const values = this.getVariableOrigins(node.info.id)
.map(origin => (this.getAbstractState(origin)?.isBottom() ? this.currentState.domain.bottom() : state?.get(origin)));
if (values.length > 0 && values.every(assert_1.isNotUndefined)) {
return abstract_domain_1.AbstractDomain.joinAll(values);
}
}
else if (node.type === type_1.RType.Argument && node.value !== undefined) {
return this.getAbstractValue(node.value, state);
}
else if (node.type === type_1.RType.ExpressionList && node.children.length > 0) {
return this.getAbstractValue(node.children.at(-1), state);
}
else if (origins.includes(built_in_proc_name_1.BuiltInProcName.Pipe)) {
if (node.type === type_1.RType.Pipe || node.type === type_1.RType.BinaryOp) {
return this.getAbstractValue(node.rhs, state);
}
else if (call?.args.length === 2 && call?.args[1] !== r_function_call_1.EmptyArgument) {
return this.getAbstractValue(call.args[1].nodeId, state);
}
}
else if (origins.includes(built_in_proc_name_1.BuiltInProcName.IfThenElse)) {
let values = [];
if (node.type === type_1.RType.IfThenElse && node.otherwise !== undefined) {
values = [node.then, node.otherwise].map(entry => this.getAbstractValue(entry, state));
}
else if (call?.args.every(arg => arg !== r_function_call_1.EmptyArgument) && call.args.length === 3) {
values = call.args.slice(1, 3).map(entry => this.getAbstractValue(entry.nodeId, state));
}
if (values.length > 0 && values.every(assert_1.isNotUndefined)) {
return abstract_domain_1.AbstractDomain.joinAll(values);
}
}
}
/**
* Gets the inferred abstract state at the location of a specific AST node.
* This requires that the abstract interpretation visitor has been completed, or at least started.
* @param id - The ID of the node to get the abstract state at
* @returns The abstract state at the node, or `undefined` if the node has no abstract state (i.e. the node has not been visited or is unreachable).
*/
getAbstractState(id) {
return id === undefined ? undefined : this.trace.get(id);
}
/**
* Gets the inferred abstract state at the end of the program (exit nodes of the control flow graph).
* This requires that the abstract interpretation visitor has been completed, or at least started.
* @returns The inferred abstract state at the end of the program
*/
getEndState() {
const exitPoints = this.config.controlFlow.exitPoints.map(id => this.getCfgVertex(id)).filter(assert_1.isNotUndefined);
const exitNodes = exitPoints.map(control_flow_graph_1.CfgVertex.getRootId).filter(assert_1.isNotUndefined);
const states = exitNodes.map(node => this.trace.get(node)).filter(assert_1.isNotUndefined);
return abstract_domain_1.AbstractDomain.joinAll(states, this.currentState.bottom());
}
/**
* Gets the inferred abstract trace mapping AST nodes to the inferred abstract state at the respective node.
* @returns The inferred abstract trace of the program
*/
getAbstractTrace() {
return this.trace;
}
start() {
(0, assert_1.guard)(this.trace.size === 0, 'Abstract interpretation visitor has already been started');
super.start();
this.unassigned.clear();
}
startVisitor(start) {
this.stack = Array.from(start);
while (this.stack.length > 0) {
const current = this.stack.pop();
if (!this.visitNode(current)) {
continue;
}
const successors = this.config.controlFlow.graph.ingoingEdges(current)?.keys().toArray().reverse() ?? [];
for (const next of successors) {
if (!this.stack.includes(next)) { // prevent double entries in working list
this.stack.push(next);
}
}
}
}
visitNode(vertexId) {
const vertex = this.getCfgVertex(vertexId);
// skip exit vertices of widening points and entry vertices of complex nodes
if (vertex === undefined || this.shouldSkipVertex(vertex)) {
return true;
}
// retrieve new abstract state by joining states of predecessor nodes
const predecessors = this.getPredecessorNodes(control_flow_graph_1.CfgVertex.getId(vertex));
const predecessorStates = predecessors.map(pred => this.trace.get(pred)).filter(assert_1.isNotUndefined);
this.currentState = abstract_domain_1.AbstractDomain.joinAll(predecessorStates, this.currentState.top());
const nodeId = control_flow_graph_1.CfgVertex.getRootId(vertex);
// differentiate between widening points and other vertices
if (this.isWideningPoint(nodeId)) {
const oldState = this.trace.get(nodeId);
if (oldState !== undefined && this.shouldWiden(vertex)) {
this.currentState = oldState.widen(this.currentState);
}
this.trace.set(nodeId, this.currentState);
const visitedCount = this.visited.get(nodeId) ?? 0;
this.visited.set(nodeId, visitedCount + 1);
// continue visiting after widening point if visited for the first time or the state changed
return visitedCount === 0 || !oldState?.equals(this.currentState);
}
else {
this.onVisitNode(vertexId);
// discard the inferred abstract state when encountering unsupported function calls
if (this.isUnsupportedFunctionCall(nodeId)) {
this.currentState = this.currentState.top();
}
this.trace.set(nodeId, this.currentState);
const predecessorVisits = predecessors.map(pred => this.visited.get(pred) ?? 0);
const visitedCount = this.visited.get(nodeId) ?? 0;
this.visited.set(nodeId, visitedCount + 1);
// continue visiting if vertex is not a join vertex or number of visits of predecessors is the same
return predecessors.length <= 1 || this.stack.length === 0 || predecessorVisits.every(visits => visits === predecessorVisits[0]);
}
}
visitUnknown(vertex) {
const nodeId = control_flow_graph_1.CfgVertex.getRootId(vertex);
const replacements = this.replacements.get(nodeId);
if (replacements !== undefined) {
this.replacements.delete(nodeId);
for (const replacement of replacements) {
const call = this.getDataflowGraph(replacement);
if ((0, vertex_1.isFunctionCallVertex)(call)) {
this.onReplacementCall({ call, ...this.getSourceAndTarget(call) });
}
}
}
}
onDispatchFunctionCallOrigin(call, origin) {
if (origin === built_in_proc_name_1.BuiltInProcName.Replacement) {
const node = this.getNormalizedAst(call.id);
const assignment = model_1.RNode.iterateParents(node, this.config.normalizedAst.idMap)
.find(parent => this.getDataflowGraph(parent.info.id) === undefined);
if (node !== undefined && assignment !== undefined) {
const replacements = this.replacements.get(assignment.info.id) ?? [];
replacements.push(node.info.id);
this.replacements.set(assignment.info.id, replacements);
return;
}
}
super.onDispatchFunctionCallOrigin(call, origin);
switch (origin) {
case built_in_proc_name_1.BuiltInProcName.ExpressionList:
case built_in_proc_name_1.BuiltInProcName.IfThenElse:
case built_in_proc_name_1.BuiltInProcName.ForLoop:
case built_in_proc_name_1.BuiltInProcName.WhileLoop:
case built_in_proc_name_1.BuiltInProcName.RepeatLoop:
case built_in_proc_name_1.BuiltInProcName.FunctionDefinition:
case built_in_proc_name_1.BuiltInProcName.Assignment:
case built_in_proc_name_1.BuiltInProcName.AssignmentLike:
case built_in_proc_name_1.BuiltInProcName.TableAssignment:
case built_in_proc_name_1.BuiltInProcName.Replacement:
case built_in_proc_name_1.BuiltInProcName.Access:
case built_in_proc_name_1.BuiltInProcName.Pipe:
case built_in_proc_name_1.BuiltInProcName.Break:
case built_in_proc_name_1.BuiltInProcName.Return:
return;
default:
return this.onFunctionCall({ call });
}
}
onVariableDefinition({ vertex }) {
if (this.currentState.get(vertex.id) === undefined) {
this.unassigned.add(vertex.id);
}
}
onAssignmentCall({ target, source }) {
if (target === undefined || source === undefined) {
return;
}
const value = this.getAbstractValue(source);
this.unassigned.delete(target);
if (value !== undefined) {
this.currentState.set(target, value);
this.trace.set(target, this.currentState);
}
}
onReplacementCall({ target }) {
if (target !== undefined) {
this.unassigned.delete(target);
}
}
/**
* This event triggers for every function call that is not a condition, loop, assignment, replacement call, or access operation.
*
*
* For example, this triggers for `data.frame` in `x <- data.frame(id = 1:5, name = letters[1:5])`.
*
* This bundles all function calls that are no conditions, loops, assignments, replacement calls, and access operations.
* @protected
*/
onFunctionCall(_data) { }
/** Gets all AST nodes for the predecessor vertices that are leaf nodes and exit vertices */
getPredecessorNodes(vertexId) {
return this.config.controlFlow.graph.outgoingEdges(vertexId)?.keys() // outgoing dependency edges are ingoing CFG edges
.map(id => this.getCfgVertex(id))
.flatMap(vertex => {
if (vertex === undefined) {
return [];
}
else if (this.shouldSkipVertex(vertex)) {
return this.getPredecessorNodes(control_flow_graph_1.CfgVertex.getId(vertex));
}
else {
return [control_flow_graph_1.CfgVertex.getRootId(vertex)];
}
})
.toArray() ?? [];
}
/** Gets each variable origin that has already been visited and whose assignment has already been processed */
getVariableOrigins(nodeId) {
return df_helper_1.Dataflow.origin(this.config.dfg, nodeId)
?.filter(origin => origin.type === 0 /* OriginType.ReadVariableOrigin */)
.map(origin => origin.id)
.filter(origin => this.trace.has(origin) && !this.unassigned.has(origin)) ?? [];
}
/** Checks whether a node represents a unsupported (environment-changing) function call (e.g. `eval`, `load`, `attach`, `rm`, ...) */
isUnsupportedFunctionCall(nodeId) {
return unsupported_functions_1.UnsupportedFunctions.isUnsupportedCall(this.getDataflowGraph(nodeId));
}
/** We only perform widening at `for`, `while`, or `repeat` loops with more than one ingoing CFG edge */
isWideningPoint(nodeId) {
const ingoingEdges = this.config.controlFlow.graph.outgoingEdges(nodeId)?.size; // outgoing dependency edges are ingoing CFG edges
if (ingoingEdges === undefined || ingoingEdges <= 1) {
return false;
}
else if (model_1.RLoopConstructs.is(this.getNormalizedAst(nodeId))) {
return true;
}
const dataflowVertex = this.getDataflowGraph(nodeId);
if (dataflowVertex?.tag !== vertex_1.VertexType.FunctionCall || !Array.isArray(dataflowVertex.origin)) {
return false;
}
const origin = dataflowVertex.origin;
return origin.includes(built_in_proc_name_1.BuiltInProcName.ForLoop) || origin.includes(built_in_proc_name_1.BuiltInProcName.WhileLoop) || origin.includes(built_in_proc_name_1.BuiltInProcName.RepeatLoop);
}
/**
* Checks whether a control flow graph vertex should be skipped during visitation.
* By default, we only process entry vertices of widening points, vertices of leaf nodes, and exit vertices (no entry nodes of complex nodes).
*/
shouldSkipVertex(vertex) {
if (this.isWideningPoint(control_flow_graph_1.CfgVertex.getRootId(vertex))) {
// skip exit vertices of widening points
return control_flow_graph_1.CfgVertex.isMarker(vertex);
}
return !control_flow_graph_1.CfgVertex.isMarker(vertex) && !control_flow_graph_1.CfgVertex.isBlock(vertex) && control_flow_graph_1.CfgVertex.getEnd(vertex) !== undefined;
}
/**
* Whether widening should be performed at a widening point.
* By default, we perform widening when the number of visits of the widening point reaches the widening threshold of the config.
*/
shouldWiden(wideningPoint) {
return (this.visited.get(control_flow_graph_1.CfgVertex.getId(wideningPoint)) ?? 0) >= this.config.ctx.config.abstractInterpretation.wideningThreshold;
}
}
exports.AbstractInterpretationVisitor = AbstractInterpretationVisitor;
//# sourceMappingURL=absint-visitor.js.map