@eagleoutice/flowr
Version:
Static Dataflow Analyzer and Program Slicer for the R Programming Language
318 lines • 16 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.AbstractInterpretationVisitor = void 0;
const control_flow_graph_1 = require("../control-flow/control-flow-graph");
const semantic_cfg_guided_visitor_1 = require("../control-flow/semantic-cfg-guided-visitor");
const vertex_1 = require("../dataflow/graph/vertex");
const model_1 = require("../r-bridge/lang-4.x/ast/model/model");
const r_function_call_1 = require("../r-bridge/lang-4.x/ast/model/nodes/r-function-call");
const type_1 = require("../r-bridge/lang-4.x/ast/model/type");
const assert_1 = require("../util/assert");
const abstract_domain_1 = require("./domains/abstract-domain");
const state_abstract_domain_1 = require("./domains/state-abstract-domain");
const df_helper_1 = require("../dataflow/graph/df-helper");
const built_in_proc_name_1 = require("../dataflow/environments/built-in-proc-name");
/**
* A control flow graph visitor to perform abstract interpretation.
*
* However, the visitor does not yet support inter-procedural abstract interpretation and abstract condition semantics.
*/
class AbstractInterpretationVisitor extends semantic_cfg_guided_visitor_1.SemanticCfgGuidedVisitor {
/**
* The abstract trace of the abstract interpretation visitor mapping node IDs to the abstract state at the respective node.
*/
trace = new Map();
/**
* The current abstract state domain at the currently processed AST node.
*/
_currentState;
/**
* A set of nodes representing variable definitions that have already been visited but whose assignment has not yet been processed.
*/
unassigned = new Set();
/**
* Whether the current abstract state has been copied/cloned and is save to modify in place.
*/
stateCopied = false;
constructor(config) {
super({ ...config, defaultVisitingOrder: 'forward', defaultVisitingType: 'exit' });
this._currentState = new state_abstract_domain_1.MutableStateAbstractDomain(new Map());
}
get currentState() {
return this._currentState;
}
removeState(node) {
if (!this.stateCopied) {
this._currentState = this._currentState.create(this.currentState.value);
this.stateCopied = true;
}
this._currentState.remove(node);
}
updateState(node, value) {
if (!this.stateCopied) {
this._currentState = this._currentState.create(this.currentState.value);
this.stateCopied = true;
}
this._currentState.set(node, value);
}
/**
* Resolves the inferred abstract value of an AST node.
* This requires that the abstract interpretation visitor has been completed, or at least started.
* @param id - The ID of the node to get the inferred value for
* @param state - An optional state abstract domain used to resolve the inferred abstract value (defaults to the state at the requested node)
* @returns The inferred abstract value of the node, or `undefined` if no value was inferred for the node
*/
getAbstractValue(id, state) {
const node = (id === undefined || typeof id === 'object') ? id : this.getNormalizedAst(id);
state ??= node !== undefined ? this.getAbstractState(node.info.id) : undefined;
if (node === undefined) {
return;
}
else if (state?.has(node.info.id)) {
return state.get(node.info.id);
}
const vertex = this.getDataflowGraph(node.info.id);
const call = (0, vertex_1.isFunctionCallVertex)(vertex) ? vertex : undefined;
const origins = Array.isArray(call?.origin) ? call.origin : [];
if (node.type === type_1.RType.Symbol) {
const values = this.getVariableOrigins(node.info.id).map(origin => state?.get(origin));
if (values.length > 0 && values.every(assert_1.isNotUndefined)) {
return abstract_domain_1.AbstractDomain.joinAll(values);
}
}
else if (node.type === type_1.RType.Argument && node.value !== undefined) {
return this.getAbstractValue(node.value, state);
}
else if (node.type === type_1.RType.ExpressionList && node.children.length > 0) {
return this.getAbstractValue(node.children.at(-1), state);
}
else if (origins.includes(built_in_proc_name_1.BuiltInProcName.Pipe)) {
if (node.type === type_1.RType.Pipe || node.type === type_1.RType.BinaryOp) {
return this.getAbstractValue(node.rhs, state);
}
else if (call?.args.length === 2 && call?.args[1] !== r_function_call_1.EmptyArgument) {
return this.getAbstractValue(call.args[1].nodeId, state);
}
}
else if (origins.includes(built_in_proc_name_1.BuiltInProcName.IfThenElse)) {
let values = [];
if (node.type === type_1.RType.IfThenElse && node.otherwise !== undefined) {
values = [node.then, node.otherwise].map(entry => this.getAbstractValue(entry, state));
}
else if (call?.args.every(arg => arg !== r_function_call_1.EmptyArgument) && call.args.length === 3) {
values = call.args.slice(1, 3).map(entry => this.getAbstractValue(entry.nodeId, state));
}
if (values.length > 0 && values.every(assert_1.isNotUndefined)) {
return abstract_domain_1.AbstractDomain.joinAll(values);
}
}
}
/**
* Gets the inferred abstract state at the location of a specific AST node.
* This requires that the abstract interpretation visitor has been completed, or at least started.
* @param id - The ID of the node to get the abstract state at
* @returns The abstract state at the node, or `undefined` if the node has no abstract state (i.e. the node has not been visited or is unreachable).
*/
getAbstractState(id) {
return id === undefined ? undefined : this.trace.get(id);
}
/**
* Gets the inferred abstract state at the end of the program (exit nodes of the control flow graph).
* This requires that the abstract interpretation visitor has been completed, or at least started.
* @returns The inferred abstract state at the end of the program
*/
getEndState() {
const exitPoints = this.config.controlFlow.exitPoints.map(id => this.getCfgVertex(id)).filter(assert_1.isNotUndefined);
const exitNodes = exitPoints.map(control_flow_graph_1.CfgVertex.getRootId).filter(assert_1.isNotUndefined);
const states = exitNodes.map(node => this.trace.get(node)).filter(assert_1.isNotUndefined);
return abstract_domain_1.AbstractDomain.joinAll(states, this._currentState.top());
}
/**
* Gets the inferred abstract trace mapping AST nodes to the inferred abstract state at the respective node.
* @returns The inferred abstract trace of the program
*/
getAbstractTrace() {
return this.trace;
}
start() {
(0, assert_1.guard)(this.trace.size === 0, 'Abstract interpretation visitor has already been started');
super.start();
this.unassigned.clear();
}
startVisitor(start) {
const stack = Array.from(start);
while (stack.length > 0) {
const current = stack.pop();
if (!this.visitNode(current)) {
continue;
}
for (const next of this.config.controlFlow.graph.ingoingEdges(current)?.keys().toArray().reverse() ?? []) {
if (!stack.includes(next)) { // prevent double entries in working list
stack.push(next);
}
}
}
}
visitNode(vertexId) {
const vertex = this.getCfgVertex(vertexId);
// skip exit vertices of widening points and entry vertices of complex nodes
if (vertex === undefined || this.shouldSkipVertex(vertex)) {
return true;
}
const predecessors = this.getPredecessorNodes(control_flow_graph_1.CfgVertex.getId(vertex));
const predecessorStates = predecessors.map(pred => this.trace.get(pred)).filter(assert_1.isNotUndefined);
// retrieve new abstract state by joining states of predecessor nodes
if (predecessorStates.length === 1) {
this._currentState = predecessorStates[0];
}
else {
this._currentState = abstract_domain_1.AbstractDomain.joinAll(predecessorStates, this._currentState.top());
this.stateCopied = true;
}
const nodeId = control_flow_graph_1.CfgVertex.getRootId(vertex);
// differentiate between widening points and other vertices
if (this.isWideningPoint(nodeId)) {
const oldState = this.trace.get(nodeId) ?? this._currentState.top();
if (this.shouldWiden(vertex)) {
this._currentState = oldState.widen(this._currentState);
this.stateCopied = true;
}
this.trace.set(nodeId, this._currentState);
this.stateCopied = false;
const visitedCount = this.visited.get(nodeId) ?? 0;
this.visited.set(nodeId, visitedCount + 1);
// continue visiting after widening point if visited for the first time or the state changed
return visitedCount === 0 || !oldState.equals(this._currentState);
}
else {
this.onVisitNode(vertexId);
// discard the inferred abstract state when encountering functions with unknown side effects (e.g. `eval`)
if (this.config.dfg.unknownSideEffects.has(nodeId)) {
this._currentState = this._currentState.top();
this.stateCopied = true;
}
this.trace.set(nodeId, this._currentState);
this.stateCopied = false;
const predecessorVisits = predecessors.map(pred => this.visited.get(pred) ?? 0);
const visitedCount = this.visited.get(nodeId) ?? 0;
this.visited.set(nodeId, visitedCount + 1);
// continue visiting if vertex is not a join vertex or number of visits of predecessors is the same
return predecessors.length <= 1 || predecessorVisits.every(visits => visits === predecessorVisits[0]);
}
}
onDispatchFunctionCallOrigin(call, origin) {
super.onDispatchFunctionCallOrigin(call, origin);
switch (origin) {
case built_in_proc_name_1.BuiltInProcName.ExpressionList:
case built_in_proc_name_1.BuiltInProcName.IfThenElse:
case built_in_proc_name_1.BuiltInProcName.ForLoop:
case built_in_proc_name_1.BuiltInProcName.WhileLoop:
case built_in_proc_name_1.BuiltInProcName.RepeatLoop:
case built_in_proc_name_1.BuiltInProcName.FunctionDefinition:
case built_in_proc_name_1.BuiltInProcName.Assignment:
case built_in_proc_name_1.BuiltInProcName.AssignmentLike:
case built_in_proc_name_1.BuiltInProcName.TableAssignment:
case built_in_proc_name_1.BuiltInProcName.Replacement:
case built_in_proc_name_1.BuiltInProcName.Access:
case built_in_proc_name_1.BuiltInProcName.Pipe:
case built_in_proc_name_1.BuiltInProcName.Break:
case built_in_proc_name_1.BuiltInProcName.Return:
return;
default:
return this.onFunctionCall({ call });
}
}
onVariableDefinition({ vertex }) {
if (this.currentState.get(vertex.id) === undefined) {
this.unassigned.add(vertex.id);
}
}
onAssignmentCall({ target, source }) {
if (target === undefined || source === undefined) {
return;
}
const value = this.getAbstractValue(source);
this.unassigned.delete(target);
if (value !== undefined) {
this.updateState(target, value);
this.trace.set(target, this._currentState);
this.stateCopied = false;
}
}
onReplacementCall({ target }) {
if (target !== undefined) {
this.unassigned.delete(target);
}
}
/**
* This event triggers for every function call that is not a condition, loop, assignment, replacement call, or access operation.
*
*
* For example, this triggers for `data.frame` in `x <- data.frame(id = 1:5, name = letters[1:5])`.
*
* This bundles all function calls that are no conditions, loops, assignments, replacement calls, and access operations.
* @protected
*/
onFunctionCall(_data) { }
/** Gets all AST nodes for the predecessor vertices that are leaf nodes and exit vertices */
getPredecessorNodes(vertexId) {
return this.config.controlFlow.graph.outgoingEdges(vertexId)?.keys() // outgoing dependency edges are ingoing CFG edges
.map(id => this.getCfgVertex(id))
.flatMap(vertex => {
if (vertex === undefined) {
return [];
}
else if (this.shouldSkipVertex(vertex)) {
return this.getPredecessorNodes(control_flow_graph_1.CfgVertex.getId(vertex));
}
else {
return [control_flow_graph_1.CfgVertex.getRootId(vertex)];
}
})
.toArray() ?? [];
}
/** Gets each variable origin that has already been visited and whose assignment has already been processed */
getVariableOrigins(nodeId) {
return df_helper_1.Dataflow.origin(this.config.dfg, nodeId)
?.filter(origin => origin.type === 0 /* OriginType.ReadVariableOrigin */)
.map(origin => origin.id)
.filter(origin => this.trace.has(origin) && !this.unassigned.has(origin)) ?? [];
}
/** We only perform widening at `for`, `while`, or `repeat` loops with more than one ingoing CFG edge */
isWideningPoint(nodeId) {
const ingoingEdges = this.config.controlFlow.graph.outgoingEdges(nodeId)?.size; // outgoing dependency edges are ingoing CFG edges
if (ingoingEdges === undefined || ingoingEdges <= 1) {
return false;
}
const node = this.getNormalizedAst(nodeId);
if (model_1.RLoopConstructs.is(node)) {
return true;
}
const dataflowVertex = this.getDataflowGraph(nodeId);
if (dataflowVertex?.tag !== vertex_1.VertexType.FunctionCall || !Array.isArray(dataflowVertex.origin)) {
return false;
}
const origin = dataflowVertex.origin;
return origin.includes(built_in_proc_name_1.BuiltInProcName.ForLoop) || origin.includes(built_in_proc_name_1.BuiltInProcName.WhileLoop) || origin.includes(built_in_proc_name_1.BuiltInProcName.RepeatLoop);
}
/**
* Checks whether a control flow graph vertex should be skipped during visitation.
* By default, we only process entry vertices of widening points, vertices of leaf nodes, and exit vertices (no entry nodes of complex nodes).
*/
shouldSkipVertex(vertex) {
if (this.isWideningPoint(control_flow_graph_1.CfgVertex.getRootId(vertex))) {
// skip exit vertices of widening points
return control_flow_graph_1.CfgVertex.isMarker(vertex);
}
return !control_flow_graph_1.CfgVertex.isMarker(vertex) && !control_flow_graph_1.CfgVertex.isBlock(vertex) && control_flow_graph_1.CfgVertex.getEnd(vertex) !== undefined;
}
/**
* Whether widening should be performed at a widening point.
* By default, we perform widening when the number of visits of the widening point reaches the widening threshold of the config.
*/
shouldWiden(wideningPoint) {
return (this.visited.get(control_flow_graph_1.CfgVertex.getId(wideningPoint)) ?? 0) >= this.config.ctx.config.abstractInterpretation.wideningThreshold;
}
}
exports.AbstractInterpretationVisitor = AbstractInterpretationVisitor;
//# sourceMappingURL=absint-visitor.js.map