UNPKG

@eagleoutice/flowr

Version:

Static Dataflow Analyzer and Program Slicer for the R Programming Language

468 lines 21.5 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.ResolvedCallSuffix = void 0; exports.extractCfg = extractCfg; exports.extractCfgQuick = extractCfgQuick; exports.cfg2quads = cfg2quads; const quads_1 = require("../util/quads"); const fold_1 = require("../r-bridge/lang-4.x/ast/model/processing/fold"); const convert_values_1 = require("../r-bridge/lang-4.x/convert-values"); const r_function_call_1 = require("../r-bridge/lang-4.x/ast/model/nodes/r-function-call"); const linker_1 = require("../dataflow/internal/linker"); const vertex_1 = require("../dataflow/graph/vertex"); const control_flow_graph_1 = require("./control-flow-graph"); const cfg_simplification_1 = require("./cfg-simplification"); const assert_1 = require("../util/assert"); const cfgFolds = { foldNumber: cfgLeaf(control_flow_graph_1.CfgVertexType.Expression), foldString: cfgLeaf(control_flow_graph_1.CfgVertexType.Expression), foldLogical: cfgLeaf(control_flow_graph_1.CfgVertexType.Expression), foldSymbol: cfgLeaf(control_flow_graph_1.CfgVertexType.Expression), foldAccess: cfgAccess, foldBinaryOp: cfgBinaryOp, foldPipe: cfgBinaryOp, foldUnaryOp: cfgUnaryOp, other: { foldComment: cfgIgnore, foldLineDirective: cfgIgnore }, loop: { foldFor: cfgFor, foldRepeat: cfgRepeat, foldWhile: cfgWhile, foldBreak: cfgBreak, foldNext: cfgNext }, foldIfThenElse: cfgIfThenElse, foldExprList: cfgExprList, functions: { foldFunctionDefinition: cfgFunctionDefinition, foldFunctionCall: cfgFunctionCall, foldParameter: cfgArgumentOrParameter, foldArgument: cfgArgumentOrParameter } }; function dataflowCfgFolds(dataflowGraph) { return { ...cfgFolds, functions: { ...cfgFolds.functions, foldFunctionCall: cfgFunctionCallWithDataflow(dataflowGraph) } }; } /** * Given a normalized AST, this approximates the control flow graph of the program. * This view is different from the computation of the dataflow graph and may differ, * especially because it focuses on intra-procedural analysis. * * @param ast - the normalized AST * @param config - the flowR config * @param graph - additional dataflow facts to consider by the control flow extraction * @param simplifications - a list of simplification passes to apply to the control flow graph * * @see {@link extractCfgQuick} - for a simplified version of this function */ function extractCfg(ast, config, graph, simplifications) { return (0, cfg_simplification_1.simplifyControlFlowInformation)((0, fold_1.foldAst)(ast.ast, graph ? dataflowCfgFolds(graph) : cfgFolds), { ast, dfg: graph, config }, simplifications); } /** * A version of {@link extractCfg} that is much quicker and does not apply any simplifciations or dataflow information. */ function extractCfgQuick(ast) { return (0, fold_1.foldAst)(ast.ast, cfgFolds); } function cfgLeaf(type) { return ({ info: { id } }) => { return { graph: new control_flow_graph_1.ControlFlowGraph().addVertex({ id, type }), breaks: [], nexts: [], returns: [], exitPoints: [id], entryPoints: [id] }; }; } function cfgBreak(leaf) { return { ...cfgLeaf(control_flow_graph_1.CfgVertexType.Statement)(leaf), breaks: [leaf.info.id], exitPoints: [] }; } function cfgNext(leaf) { return { ...cfgLeaf(control_flow_graph_1.CfgVertexType.Statement)(leaf), nexts: [leaf.info.id], exitPoints: [] }; } function cfgIgnore(_leaf) { return { graph: new control_flow_graph_1.ControlFlowGraph(), breaks: [], nexts: [], returns: [], exitPoints: [], entryPoints: [] }; } function identifyMayStatementType(node) { return node.info.role === "expr-list-child" /* RoleInParent.ExpressionListChild */ ? control_flow_graph_1.CfgVertexType.Statement : control_flow_graph_1.CfgVertexType.Expression; } function cfgIfThenElse(ifNode, condition, then, otherwise) { const ifId = ifNode.info.id; const graph = new control_flow_graph_1.ControlFlowGraph(); graph.addVertex({ id: ifId, type: identifyMayStatementType(ifNode), mid: condition.exitPoints, end: [ifId + '-exit'] }); graph.addVertex({ id: ifId + '-exit', type: control_flow_graph_1.CfgVertexType.EndMarker, root: ifId }); graph.mergeWith(condition.graph); graph.mergeWith(then.graph); if (otherwise) { graph.mergeWith(otherwise.graph); } for (const e of condition.exitPoints) { for (const entryPoint of then.entryPoints) { graph.addEdge(entryPoint, e, { label: 1 /* CfgEdgeType.Cd */, when: convert_values_1.RTrue, caused: ifId }); } for (const entryPoint of otherwise?.entryPoints ?? []) { graph.addEdge(entryPoint, e, { label: 1 /* CfgEdgeType.Cd */, when: convert_values_1.RFalse, caused: ifId }); } } for (const entryPoint of condition.entryPoints) { graph.addEdge(entryPoint, ifId, { label: 0 /* CfgEdgeType.Fd */ }); } for (const exit of [...then.exitPoints, ...otherwise?.exitPoints ?? []]) { graph.addEdge(ifId + '-exit', exit, { label: 0 /* CfgEdgeType.Fd */ }); } if (!otherwise) { for (const e of condition.exitPoints) { graph.addEdge(ifId + '-exit', e, { label: 1 /* CfgEdgeType.Cd */, when: convert_values_1.RFalse, caused: ifId }); } } return { graph, breaks: [...then.breaks, ...otherwise?.breaks ?? []], nexts: [...then.nexts, ...otherwise?.nexts ?? []], returns: [...then.returns, ...otherwise?.returns ?? []], exitPoints: [ifId + '-exit'], entryPoints: [ifId] }; } function cfgRepeat(repeat, body) { const graph = body.graph; graph.addVertex({ id: repeat.info.id, type: identifyMayStatementType(repeat), end: [repeat.info.id + '-exit'] }); graph.addVertex({ id: repeat.info.id + '-exit', type: control_flow_graph_1.CfgVertexType.EndMarker, root: repeat.info.id }); for (const entryPoint of body.entryPoints) { graph.addEdge(entryPoint, repeat.info.id, { label: 0 /* CfgEdgeType.Fd */ }); } // loops automatically for (const next of [...body.nexts, ...body.exitPoints]) { graph.addEdge(repeat.info.id, next, { label: 0 /* CfgEdgeType.Fd */ }); } for (const breakPoint of body.breaks) { graph.addEdge(repeat.info.id + '-exit', breakPoint, { label: 0 /* CfgEdgeType.Fd */ }); } return { graph, breaks: [], nexts: [], returns: body.returns, exitPoints: [repeat.info.id + '-exit'], entryPoints: [repeat.info.id] }; } function cfgWhile(whileLoop, condition, body) { const whileId = whileLoop.info.id; const graph = condition.graph; graph.addVertex({ id: whileId, type: identifyMayStatementType(whileLoop), mid: condition.exitPoints, end: [whileId + '-exit'] }); graph.addVertex({ id: whileId + '-exit', type: control_flow_graph_1.CfgVertexType.EndMarker, root: whileId }); graph.mergeWith(body.graph); for (const entry of condition.entryPoints) { graph.addEdge(entry, whileId, { label: 0 /* CfgEdgeType.Fd */ }); } for (const e of condition.exitPoints) { for (const entry of body.entryPoints) { graph.addEdge(entry, e, { label: 1 /* CfgEdgeType.Cd */, when: convert_values_1.RTrue, caused: whileId }); } } for (const next of [...body.nexts, ...body.exitPoints]) { graph.addEdge(whileId, next, { label: 0 /* CfgEdgeType.Fd */ }); } for (const breakPoint of body.breaks) { graph.addEdge(whileId + '-exit', breakPoint, { label: 0 /* CfgEdgeType.Fd */ }); } // while can break on the condition as well for (const e of condition.exitPoints) { graph.addEdge(whileId + '-exit', e, { label: 1 /* CfgEdgeType.Cd */, when: convert_values_1.RFalse, caused: whileId }); } return { graph, breaks: [], nexts: [], returns: body.returns, exitPoints: [whileId + '-exit'], entryPoints: [whileId] }; } function cfgFor(forLoop, variable, vector, body) { const forLoopId = forLoop.info.id; const graph = variable.graph; graph.addVertex({ id: forLoopId, type: identifyMayStatementType(forLoop), end: [forLoopId + '-exit'], mid: variable.exitPoints }); graph.mergeWith(vector.graph); graph.mergeWith(body.graph); for (const entry of vector.entryPoints) { graph.addEdge(entry, forLoopId, { label: 0 /* CfgEdgeType.Fd */ }); } for (const exit of vector.exitPoints) { for (const entry of variable.entryPoints) { graph.addEdge(entry, exit, { label: 0 /* CfgEdgeType.Fd */ }); } } for (const e of variable.exitPoints) { for (const entry of body.entryPoints) { graph.addEdge(entry, e, { label: 1 /* CfgEdgeType.Cd */, when: convert_values_1.RTrue, caused: forLoopId }); } } for (const next of [...body.nexts, ...body.exitPoints]) { graph.addEdge(forLoopId, next, { label: 0 /* CfgEdgeType.Fd */ }); } for (const breakPoint of body.breaks) { graph.addEdge(forLoopId + '-exit', breakPoint, { label: 0 /* CfgEdgeType.Fd */ }); } const isNotEndless = body.exitPoints.length > 0 || body.breaks.length > 0; if (isNotEndless) { graph.addVertex({ id: forLoopId + '-exit', type: control_flow_graph_1.CfgVertexType.EndMarker, root: forLoopId }); for (const e of variable.exitPoints) { graph.addEdge(forLoopId + '-exit', e, { label: 1 /* CfgEdgeType.Cd */, when: convert_values_1.RFalse, caused: forLoopId }); } } return { graph, breaks: [], nexts: [], returns: body.returns, exitPoints: isNotEndless ? [forLoopId + '-exit'] : [], entryPoints: [forLoopId] }; } function cfgFunctionDefinition(fn, params, body) { const fnId = fn.info.id; const graph = new control_flow_graph_1.ControlFlowGraph(); let paramExits = params.flatMap(e => e.exitPoints); const children = [...paramExits, fnId + '-exit']; graph.addVertex({ id: fnId + '-exit', type: control_flow_graph_1.CfgVertexType.EndMarker, root: fnId }, false); graph.addVertex({ id: fnId, children, type: identifyMayStatementType(fn), mid: paramExits, end: [fnId + '-exit'] }); graph.mergeWith(body.graph, true); children.push(...body.graph.rootIds()); for (const param of params) { graph.mergeWith(param.graph, true); children.push(...param.graph.rootIds()); for (const entry of param.entryPoints) { graph.addEdge(entry, fnId, { label: 0 /* CfgEdgeType.Fd */ }); } } if (paramExits.length === 0) { paramExits = [fnId]; } for (const e of paramExits) { for (const entry of body.entryPoints) { graph.addEdge(entry, e, { label: 0 /* CfgEdgeType.Fd */ }); } } // breaks and nexts should be illegal but safe is safe, I guess for (const next of body.returns.concat(body.breaks, body.nexts, body.exitPoints)) { graph.addEdge(fnId + '-exit', next, { label: 0 /* CfgEdgeType.Fd */ }); } return { graph: graph, breaks: [], nexts: [], returns: [], exitPoints: [fnId], entryPoints: [fnId] }; } function cfgFunctionCall(call, name, args, exit = 'exit') { const callId = call.info.id; const graph = name.graph; const info = { graph, breaks: [...name.breaks], nexts: [...name.nexts], returns: [...name.returns], exitPoints: [callId + '-' + exit], entryPoints: [callId] }; graph.addVertex({ id: callId, type: identifyMayStatementType(call), mid: name.exitPoints, end: [callId + '-' + exit] }); for (const entryPoint of name.entryPoints) { graph.addEdge(entryPoint, callId, { label: 0 /* CfgEdgeType.Fd */ }); } graph.addVertex({ id: callId + '-' + exit, type: control_flow_graph_1.CfgVertexType.EndMarker, root: callId }); let lastArgExits = name.exitPoints; for (const arg of args) { if (arg === r_function_call_1.EmptyArgument) { continue; } graph.mergeWith(arg.graph); info.breaks = info.breaks.concat(arg.breaks); info.nexts = info.nexts.concat(arg.nexts); info.returns = info.returns.concat(arg.returns); for (const entry of arg.entryPoints) { for (const exit of lastArgExits) { graph.addEdge(entry, exit, { label: 0 /* CfgEdgeType.Fd */ }); } } lastArgExits = arg.exitPoints; } for (const exit of lastArgExits) { graph.addEdge(callId + '-exit', exit, { label: 0 /* CfgEdgeType.Fd */ }); } // should not contain any breaks, nexts, or returns, (except for the body if something like 'break()') return info; } exports.ResolvedCallSuffix = '-resolved-call-exit'; function cfgFunctionCallWithDataflow(graph) { return (call, name, args) => { const baseCfg = cfgFunctionCall(call, name, args); /* try to resolve the call and link the target definitions */ const targets = (0, linker_1.getAllFunctionCallTargets)(call.info.id, graph); const exits = []; const callVertex = baseCfg.graph.getVertex(call.info.id); (0, assert_1.guard)(callVertex !== undefined, 'cfgFunctionCallWithDataflow: call vertex not found'); for (const target of targets) { // we have to filter out non-func-call targets as the call targets contains names and call ids if ((0, vertex_1.isFunctionDefinitionVertex)(graph.getVertex(target))) { callVertex.callTargets ??= new Set(); callVertex.callTargets.add(target); exits.push(target + '-exit'); } } if (exits.length > 0) { baseCfg.graph.addVertex({ id: call.info.id + exports.ResolvedCallSuffix, type: control_flow_graph_1.CfgVertexType.EndMarker, root: call.info.id }); for (const exit of [...baseCfg.exitPoints, ...exits]) { baseCfg.graph.addEdge(call.info.id + exports.ResolvedCallSuffix, exit, { label: 0 /* CfgEdgeType.Fd */ }); } return { ...baseCfg, exitPoints: [call.info.id + exports.ResolvedCallSuffix] }; } else { return baseCfg; } }; } function cfgArgumentOrParameter(node, name, value) { const graph = new control_flow_graph_1.ControlFlowGraph(); const info = { graph, breaks: [], nexts: [], returns: [], exitPoints: [node.info.id + '-exit'], entryPoints: [node.info.id] }; let currentExitPoints = name?.exitPoints ?? [node.info.id]; graph.addVertex({ id: node.info.id, type: control_flow_graph_1.CfgVertexType.Expression, mid: currentExitPoints, end: [node.info.id + '-exit'] }); if (name) { graph.mergeWith(name.graph); info.breaks = info.breaks.concat(name.breaks); info.nexts = info.nexts.concat(name.nexts); info.returns = info.returns.concat(name.returns); for (const entry of name.entryPoints) { graph.addEdge(entry, node.info.id, { label: 0 /* CfgEdgeType.Fd */ }); } } if (value) { graph.mergeWith(value.graph); info.breaks = info.breaks.concat(value.breaks); info.nexts = info.nexts.concat(value.nexts); info.returns = info.returns.concat(value.returns); for (const exitPoint of currentExitPoints) { for (const entry of value.entryPoints) { graph.addEdge(entry, exitPoint, { label: 0 /* CfgEdgeType.Fd */ }); } } currentExitPoints = value.exitPoints; } graph.addVertex({ id: node.info.id + '-exit', type: control_flow_graph_1.CfgVertexType.EndMarker, root: node.info.id }); for (const exit of currentExitPoints) { graph.addEdge(node.info.id + '-exit', exit, { label: 0 /* CfgEdgeType.Fd */ }); } return info; } function cfgBinaryOp(binOp, lhs, rhs) { const graph = new control_flow_graph_1.ControlFlowGraph().mergeWith(lhs.graph).mergeWith(rhs.graph); const result = { graph, breaks: [...lhs.breaks, ...rhs.breaks], nexts: [...lhs.nexts, ...rhs.nexts], returns: [...lhs.returns, ...rhs.returns], entryPoints: [binOp.info.id], exitPoints: [binOp.info.id + '-exit'] }; graph.addVertex({ id: binOp.info.id, type: binOp.flavor === 'assignment' ? control_flow_graph_1.CfgVertexType.Statement : control_flow_graph_1.CfgVertexType.Expression, end: [binOp.info.id + '-exit'] }); graph.addVertex({ id: binOp.info.id + '-exit', type: control_flow_graph_1.CfgVertexType.EndMarker, root: binOp.info.id }); for (const exitPoint of lhs.exitPoints) { for (const entryPoint of rhs.entryPoints) { result.graph.addEdge(entryPoint, exitPoint, { label: 0 /* CfgEdgeType.Fd */ }); } } for (const entryPoint of lhs.entryPoints) { graph.addEdge(entryPoint, binOp.info.id, { label: 0 /* CfgEdgeType.Fd */ }); } for (const exitPoint of rhs.exitPoints) { graph.addEdge(binOp.info.id + '-exit', exitPoint, { label: 0 /* CfgEdgeType.Fd */ }); } return result; } function cfgAccess(access, name, accessors) { const result = { ...name }; const graph = result.graph; graph.addVertex({ id: access.info.id, type: control_flow_graph_1.CfgVertexType.Expression, mid: name.exitPoints, end: [access.info.id + '-exit'] }); result.entryPoints = [access.info.id]; for (const entry of name.entryPoints) { graph.addEdge(entry, access.info.id, { label: 0 /* CfgEdgeType.Fd */ }); } result.exitPoints = name.exitPoints; for (const accessor of accessors) { if (accessor === r_function_call_1.EmptyArgument) { continue; } graph.mergeWith(accessor.graph); for (const exitPoint of result.exitPoints) { for (const entry of accessor.entryPoints) { graph.addEdge(entry, exitPoint, { label: 0 /* CfgEdgeType.Fd */ }); } } result.exitPoints = accessor.exitPoints; result.breaks = result.breaks.concat(accessor.breaks); result.nexts = result.nexts.concat(accessor.nexts); result.returns = result.returns.concat(accessor.returns); } for (const exitPoint of result.exitPoints) { graph.addEdge(access.info.id + '-exit', exitPoint, { label: 0 /* CfgEdgeType.Fd */ }); } graph.addVertex({ id: access.info.id + '-exit', type: control_flow_graph_1.CfgVertexType.EndMarker, root: access.info.id }); result.exitPoints = [access.info.id + '-exit']; return result; } function cfgUnaryOp(unary, operand) { const graph = operand.graph; graph.addVertex({ id: unary.info.id, type: control_flow_graph_1.CfgVertexType.EndMarker, root: unary.info.id }); for (const entry of operand.exitPoints) { graph.addEdge(unary.info.id, entry, { label: 0 /* CfgEdgeType.Fd */ }); } return { ...operand, graph, exitPoints: [unary.info.id] }; } function cfgExprList(node, _grouping, expressions) { const result = { graph: new control_flow_graph_1.ControlFlowGraph(), breaks: [], nexts: [], returns: [], exitPoints: [node.info.id], entryPoints: [node.info.id] }; const vtx = { id: node.info.id, type: control_flow_graph_1.CfgVertexType.Expression, end: [] }; result.graph.addVertex(vtx); for (const expression of expressions) { for (const previousExitPoint of result.exitPoints) { for (const entryPoint of expression.entryPoints) { result.graph.addEdge(entryPoint, previousExitPoint, { label: 0 /* CfgEdgeType.Fd */ }); } } result.graph.mergeWith(expression.graph); result.breaks = result.breaks.concat(expression.breaks); result.nexts = result.nexts.concat(expression.nexts); result.returns = result.returns.concat(expression.returns); result.exitPoints = expression.exitPoints; } if (result.exitPoints.length > 0) { result.graph.addVertex({ id: node.info.id + '-exit', type: control_flow_graph_1.CfgVertexType.EndMarker, root: node.info.id }); vtx.end = [node.info.id + '-exit']; } else { vtx.end = undefined; } for (const exit of result.exitPoints) { result.graph.addEdge(node.info.id + '-exit', exit, { label: 0 /* CfgEdgeType.Fd */ }); } result.exitPoints = result.exitPoints.length > 0 ? [node.info.id + '-exit'] : []; return result; } /** * Convert a cfg to RDF quads. * * @see {@link df2quads} * @see {@link serialize2quads} * @see {@link graph2quads} */ function cfg2quads(cfg, config) { return (0, quads_1.graph2quads)({ rootIds: [...cfg.graph.rootIds()], vertices: [...cfg.graph.vertices().entries()] .map(([id, v]) => ({ id, children: v.children })), edges: [...cfg.graph.edges()].flatMap(([fromId, targets]) => [...targets].map(([toId, info]) => ({ from: fromId, to: toId, type: info.label, when: info.when }))), entryPoints: cfg.entryPoints, exitPoints: cfg.exitPoints, breaks: cfg.breaks, nexts: cfg.nexts, returns: cfg.returns }, config); } //# sourceMappingURL=extract-cfg.js.map