UNPKG

@eagleoutice/flowr

Version:

Static Dataflow Analyzer and Program Slicer for the R Programming Language

111 lines 6.22 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.inferDataFrameShapes = inferDataFrameShapes; exports.resolveIdToDataFrameShape = resolveIdToDataFrameShape; exports.getVariableOrigins = getVariableOrigins; const control_flow_graph_1 = require("../../control-flow/control-flow-graph"); const vertex_1 = require("../../dataflow/graph/vertex"); const dfg_get_origin_1 = require("../../dataflow/origin/dfg-get-origin"); const r_function_call_1 = require("../../r-bridge/lang-4.x/ast/model/nodes/r-function-call"); const type_1 = require("../../r-bridge/lang-4.x/ast/model/type"); const assert_1 = require("../../util/assert"); const absint_info_1 = require("./absint-info"); const absint_visitor_1 = require("./absint-visitor"); const domain_1 = require("./domain"); /** * Infers the shape of data frames by performing abstract interpretation using the control flow graph of a program. * This directly attaches the inferred data frames shapes to the AST (see {@link AbstractInterpretationInfo}). * * @param cfinfo - The control flow information containing the control flow graph * @param dfg - The data flow graph to resolve variable origins and function arguments * @param ast - The abstract syntax tree to resolve node IDs to AST nodes * @param config - The flowR configuration to use for the shape inference * @returns The abstract data frame state at the exit node of the control flow graph (see {@link DataFrameStateDomain}). * The abstract data frame states for all other nodes are attached to the AST. */ function inferDataFrameShapes(cfinfo, dfg, ast, config) { const visitor = new absint_visitor_1.DataFrameShapeInferenceVisitor({ controlFlow: cfinfo, dfg: dfg, normalizedAst: ast, flowrConfig: config }); visitor.start(); const exitPoints = cfinfo.exitPoints.map(id => cfinfo.graph.getVertex(id)).filter(assert_1.isNotUndefined); const exitNodes = exitPoints.map(vertex => ast.idMap.get((0, control_flow_graph_1.getVertexRootId)(vertex))).filter(assert_1.isNotUndefined); const result = exitNodes.map(node => node.info.dataFrame?.domain ?? new Map()); return (0, domain_1.joinDataFrameStates)(...result); } /** * Resolves the abstract data frame shape of a node in the AST. * This requires that the data frame shape inference has been executed before using {@link inferDataFrameShapes}. * * @param id - The node or node ID to get the data frame shape for * @param dfg - The data flow graph used to resolve the data frame shape * @param domain - An optional abstract data frame state domain used to resolve the data frame shape (defaults to the state at the requested node) * @returns The abstract data frame shape of the node, or `undefined` if no data frame shape was inferred for the node */ function resolveIdToDataFrameShape(id, dfg, domain) { const node = id === undefined || typeof id === 'object' ? id : dfg?.idMap?.get(id); domain ??= node?.info.dataFrame?.domain; if (dfg === undefined || node === undefined || domain === undefined) { return; } else if (domain.has(node.info.id)) { return domain.get(node.info.id); } const vertex = dfg.getVertex(node.info.id); const call = vertex?.tag === vertex_1.VertexType.FunctionCall ? vertex : undefined; const origins = Array.isArray(call?.origin) ? call.origin : []; if (node.type === type_1.RType.Symbol) { const values = getVariableOrigins(node.info.id, dfg).map(origin => domain.get(origin.info.id)); if (values.length > 0 && values.every(assert_1.isNotUndefined)) { return (0, domain_1.joinDataFrames)(...values); } } else if (node.type === type_1.RType.Argument && node.value !== undefined) { return resolveIdToDataFrameShape(node.value, dfg, domain); } else if (node.type === type_1.RType.ExpressionList && node.children.length > 0) { return resolveIdToDataFrameShape(node.children[node.children.length - 1], dfg, domain); } else if (node.type === type_1.RType.Pipe) { return resolveIdToDataFrameShape(node.rhs, dfg, domain); } else if (origins.includes('builtin:pipe')) { if (node.type === type_1.RType.BinaryOp) { return resolveIdToDataFrameShape(node.rhs, dfg, domain); } else if (call?.args.length === 2 && call?.args[1] !== r_function_call_1.EmptyArgument) { return resolveIdToDataFrameShape(call.args[1].nodeId, dfg, domain); } } else if (node.type === type_1.RType.IfThenElse) { if (node.otherwise !== undefined) { const values = [node.then, node.otherwise].map(entry => resolveIdToDataFrameShape(entry, dfg, domain)); if (values.length > 0 && values.every(assert_1.isNotUndefined)) { return (0, domain_1.joinDataFrames)(...values); } } } else if (origins.includes('builtin:if-then-else') && call?.args.every(arg => arg !== r_function_call_1.EmptyArgument)) { if (call.args.length === 3) { const values = call.args.slice(1, 3).map(entry => resolveIdToDataFrameShape(entry.nodeId, dfg, domain)); if (values.length > 0 && values.every(assert_1.isNotUndefined)) { return (0, domain_1.joinDataFrames)(...values); } } } } /** * Gets all origins of a variable in the data flow graph that have already been visited. * * @param node - The node to get the origins for * @param dfg - The data flow graph for resolving the origins * @returns The origins nodes of the variable */ function getVariableOrigins(node, dfg) { // get each variable origin that has already been visited and whose assignment has already been processed return (0, dfg_get_origin_1.getOriginInDfg)(dfg, node) ?.filter(origin => origin.type === 0 /* OriginType.ReadVariableOrigin */) .map(entry => dfg.idMap?.get(entry.id)) .filter(assert_1.isNotUndefined) .filter(origin => origin.info.dataFrame?.domain !== undefined) .filter(origin => !(0, absint_info_1.hasDataFrameInfoMarker)(origin, absint_info_1.DataFrameInfoMarker.Unassigned)) ?? []; } //# sourceMappingURL=shape-inference.js.map