@eagleoutice/flowr
Version:
Static Dataflow Analyzer and Program Slicer for the R Programming Language
111 lines • 6.22 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.inferDataFrameShapes = inferDataFrameShapes;
exports.resolveIdToDataFrameShape = resolveIdToDataFrameShape;
exports.getVariableOrigins = getVariableOrigins;
const control_flow_graph_1 = require("../../control-flow/control-flow-graph");
const vertex_1 = require("../../dataflow/graph/vertex");
const dfg_get_origin_1 = require("../../dataflow/origin/dfg-get-origin");
const r_function_call_1 = require("../../r-bridge/lang-4.x/ast/model/nodes/r-function-call");
const type_1 = require("../../r-bridge/lang-4.x/ast/model/type");
const assert_1 = require("../../util/assert");
const absint_info_1 = require("./absint-info");
const absint_visitor_1 = require("./absint-visitor");
const domain_1 = require("./domain");
/**
* Infers the shape of data frames by performing abstract interpretation using the control flow graph of a program.
* This directly attaches the inferred data frames shapes to the AST (see {@link AbstractInterpretationInfo}).
*
* @param cfinfo - The control flow information containing the control flow graph
* @param dfg - The data flow graph to resolve variable origins and function arguments
* @param ast - The abstract syntax tree to resolve node IDs to AST nodes
* @param config - The flowR configuration to use for the shape inference
* @returns The abstract data frame state at the exit node of the control flow graph (see {@link DataFrameStateDomain}).
* The abstract data frame states for all other nodes are attached to the AST.
*/
function inferDataFrameShapes(cfinfo, dfg, ast, config) {
const visitor = new absint_visitor_1.DataFrameShapeInferenceVisitor({ controlFlow: cfinfo, dfg: dfg, normalizedAst: ast, flowrConfig: config });
visitor.start();
const exitPoints = cfinfo.exitPoints.map(id => cfinfo.graph.getVertex(id)).filter(assert_1.isNotUndefined);
const exitNodes = exitPoints.map(vertex => ast.idMap.get((0, control_flow_graph_1.getVertexRootId)(vertex))).filter(assert_1.isNotUndefined);
const result = exitNodes.map(node => node.info.dataFrame?.domain ?? new Map());
return (0, domain_1.joinDataFrameStates)(...result);
}
/**
* Resolves the abstract data frame shape of a node in the AST.
* This requires that the data frame shape inference has been executed before using {@link inferDataFrameShapes}.
*
* @param id - The node or node ID to get the data frame shape for
* @param dfg - The data flow graph used to resolve the data frame shape
* @param domain - An optional abstract data frame state domain used to resolve the data frame shape (defaults to the state at the requested node)
* @returns The abstract data frame shape of the node, or `undefined` if no data frame shape was inferred for the node
*/
function resolveIdToDataFrameShape(id, dfg, domain) {
const node = id === undefined || typeof id === 'object' ? id : dfg?.idMap?.get(id);
domain ??= node?.info.dataFrame?.domain;
if (dfg === undefined || node === undefined || domain === undefined) {
return;
}
else if (domain.has(node.info.id)) {
return domain.get(node.info.id);
}
const vertex = dfg.getVertex(node.info.id);
const call = vertex?.tag === vertex_1.VertexType.FunctionCall ? vertex : undefined;
const origins = Array.isArray(call?.origin) ? call.origin : [];
if (node.type === type_1.RType.Symbol) {
const values = getVariableOrigins(node.info.id, dfg).map(origin => domain.get(origin.info.id));
if (values.length > 0 && values.every(assert_1.isNotUndefined)) {
return (0, domain_1.joinDataFrames)(...values);
}
}
else if (node.type === type_1.RType.Argument && node.value !== undefined) {
return resolveIdToDataFrameShape(node.value, dfg, domain);
}
else if (node.type === type_1.RType.ExpressionList && node.children.length > 0) {
return resolveIdToDataFrameShape(node.children[node.children.length - 1], dfg, domain);
}
else if (node.type === type_1.RType.Pipe) {
return resolveIdToDataFrameShape(node.rhs, dfg, domain);
}
else if (origins.includes('builtin:pipe')) {
if (node.type === type_1.RType.BinaryOp) {
return resolveIdToDataFrameShape(node.rhs, dfg, domain);
}
else if (call?.args.length === 2 && call?.args[1] !== r_function_call_1.EmptyArgument) {
return resolveIdToDataFrameShape(call.args[1].nodeId, dfg, domain);
}
}
else if (node.type === type_1.RType.IfThenElse) {
if (node.otherwise !== undefined) {
const values = [node.then, node.otherwise].map(entry => resolveIdToDataFrameShape(entry, dfg, domain));
if (values.length > 0 && values.every(assert_1.isNotUndefined)) {
return (0, domain_1.joinDataFrames)(...values);
}
}
}
else if (origins.includes('builtin:if-then-else') && call?.args.every(arg => arg !== r_function_call_1.EmptyArgument)) {
if (call.args.length === 3) {
const values = call.args.slice(1, 3).map(entry => resolveIdToDataFrameShape(entry.nodeId, dfg, domain));
if (values.length > 0 && values.every(assert_1.isNotUndefined)) {
return (0, domain_1.joinDataFrames)(...values);
}
}
}
}
/**
* Gets all origins of a variable in the data flow graph that have already been visited.
*
* @param node - The node to get the origins for
* @param dfg - The data flow graph for resolving the origins
* @returns The origins nodes of the variable
*/
function getVariableOrigins(node, dfg) {
// get each variable origin that has already been visited and whose assignment has already been processed
return (0, dfg_get_origin_1.getOriginInDfg)(dfg, node)
?.filter(origin => origin.type === 0 /* OriginType.ReadVariableOrigin */)
.map(entry => dfg.idMap?.get(entry.id))
.filter(assert_1.isNotUndefined)
.filter(origin => origin.info.dataFrame?.domain !== undefined)
.filter(origin => !(0, absint_info_1.hasDataFrameInfoMarker)(origin, absint_info_1.DataFrameInfoMarker.Unassigned)) ?? [];
}
//# sourceMappingURL=shape-inference.js.map