UNPKG

@eagleoutice/flowr

Version:

Static Dataflow Analyzer and Program Slicer for the R Programming Language

551 lines 23.5 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.ControlFlowGraph = void 0; exports.emptyControlFlowInformation = emptyControlFlowInformation; exports.extractCFG = extractCFG; exports.equalCfg = equalCfg; exports.cfg2quads = cfg2quads; const set_1 = require("../set"); const quads_1 = require("../quads"); const log_1 = require("../log"); const json_1 = require("../json"); const fold_1 = require("../../r-bridge/lang-4.x/ast/model/processing/fold"); const convert_values_1 = require("../../r-bridge/lang-4.x/convert-values"); const r_function_call_1 = require("../../r-bridge/lang-4.x/ast/model/nodes/r-function-call"); const linker_1 = require("../../dataflow/internal/linker"); const vertex_1 = require("../../dataflow/graph/vertex"); /** * This class represents the control flow graph of an R program. * The control flow may be hierarchical when confronted with function definitions (see {@link CfgVertex} and {@link CFG#rootVertexIds|rootVertexIds()}). */ class ControlFlowGraph { rootVertices = new Set(); vertexInformation = new Map(); edgeInformation = new Map(); addVertex(vertex, rootVertex = true) { if (this.vertexInformation.has(vertex.id)) { throw new Error(`Node with id ${vertex.id} already exists`); } this.vertexInformation.set(vertex.id, vertex); if (rootVertex) { this.rootVertices.add(vertex.id); } return this; } addEdge(from, to, edge) { if (!this.edgeInformation.has(from)) { this.edgeInformation.set(from, new Map()); } this.edgeInformation.get(from)?.set(to, edge); return this; } outgoing(node) { return this.edgeInformation.get(node); } rootVertexIds() { return this.rootVertices; } vertices() { return this.vertexInformation; } edges() { return this.edgeInformation; } merge(other, forceNested = false) { for (const [id, node] of other.vertexInformation) { this.addVertex(node, forceNested ? false : other.rootVertices.has(id)); } for (const [from, edges] of other.edgeInformation) { for (const [to, edge] of edges) { this.addEdge(from, to, edge); } } return this; } } exports.ControlFlowGraph = ControlFlowGraph; function emptyControlFlowInformation() { return { returns: [], breaks: [], nexts: [], entryPoints: [], exitPoints: [], graph: new ControlFlowGraph() }; } const cfgFolds = { foldNumber: cfgLeaf("expression" /* CfgVertexType.Expression */), foldString: cfgLeaf("expression" /* CfgVertexType.Expression */), foldLogical: cfgLeaf("expression" /* CfgVertexType.Expression */), foldSymbol: cfgLeaf("expression" /* CfgVertexType.Expression */), foldAccess: cfgAccess, foldBinaryOp: cfgBinaryOp, foldPipe: cfgBinaryOp, foldUnaryOp: cfgUnaryOp, other: { foldComment: cfgIgnore, foldLineDirective: cfgIgnore }, loop: { foldFor: cfgFor, foldRepeat: cfgRepeat, foldWhile: cfgWhile, foldBreak: cfgBreak, foldNext: cfgNext }, foldIfThenElse: cfgIfThenElse, foldExprList: cfgExprList, functions: { foldFunctionDefinition: cfgFunctionDefinition, foldFunctionCall: cfgFunctionCall, foldParameter: cfgArgumentOrParameter, foldArgument: cfgArgumentOrParameter } }; function dataflowCfgFolds(dataflowGraph) { return { ...cfgFolds, functions: { ...cfgFolds.functions, foldFunctionCall: cfgFunctionCallWithDataflow(dataflowGraph) } }; } /** * Given a normalized AST this approximates the control flow graph of the program. * This few is different from the computation of the dataflow graph and may differ, * especially because it focuses on intra-procedural analysis. * * @param ast - the normalized AST * @param graph - additional dataflow facts to consider by the control flow extraction */ function extractCFG(ast, graph) { return (0, fold_1.foldAst)(ast.ast, graph ? dataflowCfgFolds(graph) : cfgFolds); } function cfgLeaf(type) { return (leaf) => { const graph = new ControlFlowGraph(); graph.addVertex({ id: leaf.info.id, name: leaf.type, type }); return { graph, breaks: [], nexts: [], returns: [], exitPoints: [leaf.info.id], entryPoints: [leaf.info.id] }; }; } function cfgBreak(leaf) { return { ...cfgLeaf("statement" /* CfgVertexType.Statement */)(leaf), breaks: [leaf.info.id] }; } function cfgNext(leaf) { return { ...cfgLeaf("statement" /* CfgVertexType.Statement */)(leaf), nexts: [leaf.info.id] }; } function cfgIgnore(_leaf) { return { graph: new ControlFlowGraph(), breaks: [], nexts: [], returns: [], exitPoints: [], entryPoints: [] }; } function identifyMayStatementType(node) { return node.info.role === "expr-list-child" /* RoleInParent.ExpressionListChild */ ? "statement" /* CfgVertexType.Statement */ : "expression" /* CfgVertexType.Expression */; } function cfgIfThenElse(ifNode, condition, then, otherwise) { const graph = new ControlFlowGraph(); graph.addVertex({ id: ifNode.info.id, name: ifNode.type, type: identifyMayStatementType(ifNode) }); graph.addVertex({ id: ifNode.info.id + '-exit', name: 'if-exit', type: "end-marker" /* CfgVertexType.EndMarker */ }); graph.merge(condition.graph); graph.merge(then.graph); if (otherwise) { graph.merge(otherwise.graph); } for (const exitPoint of condition.exitPoints) { for (const entryPoint of then.entryPoints) { graph.addEdge(entryPoint, exitPoint, { label: 'CD', when: convert_values_1.RTrue, caused: ifNode.info.id }); } for (const entryPoint of otherwise?.entryPoints ?? []) { graph.addEdge(entryPoint, exitPoint, { label: 'CD', when: convert_values_1.RFalse, caused: ifNode.info.id }); } } for (const entryPoint of condition.entryPoints) { graph.addEdge(entryPoint, ifNode.info.id, { label: 'FD' }); } for (const exit of [...then.exitPoints, ...otherwise?.exitPoints ?? []]) { graph.addEdge(ifNode.info.id + '-exit', exit, { label: 'FD' }); } if (!otherwise) { for (const exitPoint of condition.exitPoints) { graph.addEdge(ifNode.info.id + '-exit', exitPoint, { label: 'CD', when: convert_values_1.RFalse, caused: ifNode.info.id }); } } return { graph, breaks: [...then.breaks, ...otherwise?.breaks ?? []], nexts: [...then.nexts, ...otherwise?.nexts ?? []], returns: [...then.returns, ...otherwise?.returns ?? []], exitPoints: [ifNode.info.id + '-exit'], entryPoints: [ifNode.info.id] }; } function cfgRepeat(repeat, body) { const graph = body.graph; graph.addVertex({ id: repeat.info.id, name: repeat.type, type: identifyMayStatementType(repeat) }); graph.addVertex({ id: repeat.info.id + '-exit', name: 'repeat-exit', type: "end-marker" /* CfgVertexType.EndMarker */ }); for (const entryPoint of body.entryPoints) { graph.addEdge(entryPoint, repeat.info.id, { label: 'FD' }); } // loops automatically for (const next of [...body.nexts, ...body.exitPoints]) { graph.addEdge(repeat.info.id, next, { label: 'FD' }); } for (const breakPoint of body.breaks) { graph.addEdge(repeat.info.id + '-exit', breakPoint, { label: 'FD' }); } return { graph, breaks: [], nexts: [], returns: body.returns, exitPoints: [repeat.info.id + '-exit'], entryPoints: [repeat.info.id] }; } function cfgWhile(whileLoop, condition, body) { const graph = condition.graph; graph.addVertex({ id: whileLoop.info.id, name: whileLoop.type, type: identifyMayStatementType(whileLoop) }); graph.addVertex({ id: whileLoop.info.id + '-exit', name: 'while-exit', type: "end-marker" /* CfgVertexType.EndMarker */ }); graph.merge(body.graph); for (const entry of condition.entryPoints) { graph.addEdge(entry, whileLoop.info.id, { label: 'FD' }); } for (const exit of condition.exitPoints) { for (const entry of body.entryPoints) { graph.addEdge(entry, exit, { label: 'CD', when: convert_values_1.RTrue, caused: whileLoop.info.id }); } } for (const entryPoint of body.entryPoints) { graph.addEdge(whileLoop.info.id, entryPoint, { label: 'FD' }); } for (const next of [...body.nexts, ...body.exitPoints]) { graph.addEdge(whileLoop.info.id, next, { label: 'FD' }); } for (const breakPoint of body.breaks) { graph.addEdge(whileLoop.info.id + '-exit', breakPoint, { label: 'FD' }); } // while can break on the condition as well for (const exit of condition.exitPoints) { graph.addEdge(whileLoop.info.id + '-exit', exit, { label: 'CD', when: convert_values_1.RFalse, caused: whileLoop.info.id }); } return { graph, breaks: [], nexts: [], returns: body.returns, exitPoints: [whileLoop.info.id + '-exit'], entryPoints: [whileLoop.info.id] }; } function cfgFor(forLoop, variable, vector, body) { const graph = variable.graph; graph.addVertex({ id: forLoop.info.id, name: forLoop.type, type: identifyMayStatementType(forLoop) }); graph.addVertex({ id: forLoop.info.id + '-exit', name: 'for-exit', type: "end-marker" /* CfgVertexType.EndMarker */ }); graph.merge(vector.graph); graph.merge(body.graph); for (const entry of vector.entryPoints) { graph.addEdge(entry, forLoop.info.id, { label: 'FD' }); } for (const exit of vector.exitPoints) { for (const entry of variable.entryPoints) { graph.addEdge(entry, exit, { label: 'FD' }); } } for (const exit of variable.exitPoints) { for (const entry of body.entryPoints) { graph.addEdge(entry, exit, { label: 'CD', when: convert_values_1.RTrue, caused: forLoop.info.id }); } } for (const next of [...body.nexts, ...body.exitPoints]) { graph.addEdge(forLoop.info.id, next, { label: 'FD' }); } for (const breakPoint of body.breaks) { graph.addEdge(forLoop.info.id + '-exit', breakPoint, { label: 'FD' }); } // while can break on the condition as well for (const exit of variable.exitPoints) { graph.addEdge(forLoop.info.id + '-exit', exit, { label: 'CD', when: convert_values_1.RFalse, caused: forLoop.info.id }); } return { graph, breaks: [], nexts: [], returns: body.returns, exitPoints: [forLoop.info.id + '-exit'], entryPoints: [forLoop.info.id] }; } function cfgFunctionDefinition(fn, params, body) { const graph = new ControlFlowGraph(); const children = [fn.info.id + '-params', fn.info.id + '-exit']; graph.addVertex({ id: fn.info.id + '-params', name: 'function-parameters', type: "mid-marker" /* CfgVertexType.MidMarker */ }, false); graph.addVertex({ id: fn.info.id + '-exit', name: 'function-exit', type: "end-marker" /* CfgVertexType.EndMarker */ }, false); graph.addVertex({ id: fn.info.id, name: fn.type, children, type: identifyMayStatementType(fn) }); graph.merge(body.graph, true); children.push(...body.graph.rootVertexIds()); for (const param of params) { graph.merge(param.graph, true); children.push(...param.graph.rootVertexIds()); for (const entry of param.entryPoints) { graph.addEdge(entry, fn.info.id, { label: 'FD' }); } for (const exit of param.exitPoints) { graph.addEdge(fn.info.id + '-params', exit, { label: 'FD' }); } } if (params.length === 0) { graph.addEdge(fn.info.id + '-params', fn.info.id, { label: 'FD' }); } for (const entry of body.entryPoints) { graph.addEdge(entry, fn.info.id + '-params', { label: 'FD' }); } // breaks and nexts should be illegal but safe is safe i guess for (const next of [...body.returns, ...body.breaks, ...body.nexts, ...body.exitPoints]) { graph.addEdge(fn.info.id + '-exit', next, { label: 'FD' }); } return { graph: graph, breaks: [], nexts: [], returns: body.returns, exitPoints: [fn.info.id], entryPoints: [fn.info.id] }; } function cfgFunctionCall(call, name, args) { const graph = name.graph; const info = { graph, breaks: [...name.breaks], nexts: [...name.nexts], returns: [...name.returns], exitPoints: [call.info.id + '-exit'], entryPoints: [call.info.id] }; graph.addVertex({ id: call.info.id, name: call.type, type: identifyMayStatementType(call) }); for (const entryPoint of name.entryPoints) { graph.addEdge(entryPoint, call.info.id, { label: 'FD' }); } graph.addVertex({ id: call.info.id + '-name', name: 'call-name', type: "mid-marker" /* CfgVertexType.MidMarker */ }); for (const exitPoint of name.exitPoints) { graph.addEdge(call.info.id + '-name', exitPoint, { label: 'FD' }); } graph.addVertex({ id: call.info.id + '-exit', name: 'call-exit', type: "end-marker" /* CfgVertexType.EndMarker */ }); let lastArgExits = [call.info.id + '-name']; for (const arg of args) { if (arg === r_function_call_1.EmptyArgument) { continue; } graph.merge(arg.graph); info.breaks.push(...arg.breaks); info.nexts.push(...arg.nexts); info.returns.push(...arg.returns); for (const entry of arg.entryPoints) { for (const exit of lastArgExits) { graph.addEdge(entry, exit, { label: 'FD' }); } } lastArgExits = arg.exitPoints; } for (const exit of lastArgExits) { graph.addEdge(call.info.id + '-exit', exit, { label: 'FD' }); } // should not contain any breaks, nexts, or returns, (except for the body if something like 'break()') return info; } function cfgFunctionCallWithDataflow(graph) { return (call, name, args) => { const baseCFG = cfgFunctionCall(call, name, args); /* try to resolve the call and link the target definitions */ const targets = (0, linker_1.getAllFunctionCallTargets)(call.info.id, graph); const exits = []; for (const target of targets) { // we have to filter out non func-call targets as the call targets contains names and call ids if ((0, vertex_1.isFunctionDefinitionVertex)(graph.getVertex(target))) { baseCFG.graph.addEdge(call.info.id, target, { label: 'FD' }); exits.push(target + '-exit'); } } if (exits.length > 0) { baseCFG.graph.addVertex({ id: call.info.id + '-resolved-call-exit', name: 'resolved-call-exit', type: "end-marker" /* CfgVertexType.EndMarker */ }); for (const exit of [...baseCFG.exitPoints, ...exits]) { baseCFG.graph.addEdge(call.info.id + '-resolved-call-exit', exit, { label: 'FD' }); } return { ...baseCFG, exitPoints: [call.info.id + '-resolved-call-exit'] }; } else { return baseCFG; } }; } function cfgArgumentOrParameter(node, name, value) { const graph = new ControlFlowGraph(); const info = { graph, breaks: [], nexts: [], returns: [], exitPoints: [node.info.id + '-exit'], entryPoints: [node.info.id] }; graph.addVertex({ id: node.info.id, name: node.type, type: "expression" /* CfgVertexType.Expression */ }); let currentExitPoint = [node.info.id]; if (name) { graph.merge(name.graph); info.breaks.push(...name.breaks); info.nexts.push(...name.nexts); info.returns.push(...name.returns); for (const entry of name.entryPoints) { graph.addEdge(entry, node.info.id, { label: 'FD' }); } currentExitPoint = name.exitPoints; } graph.addVertex({ id: node.info.id + '-before-value', name: 'before-value', type: "mid-marker" /* CfgVertexType.MidMarker */ }); for (const exitPoints of currentExitPoint) { graph.addEdge(node.info.id + '-before-value', exitPoints, { label: 'FD' }); } currentExitPoint = [node.info.id + '-before-value']; if (value) { graph.merge(value.graph); info.breaks.push(...value.breaks); info.nexts.push(...value.nexts); info.returns.push(...value.returns); for (const exitPoint of currentExitPoint) { for (const entry of value.entryPoints) { graph.addEdge(entry, exitPoint, { label: 'FD' }); } } currentExitPoint = value.exitPoints; } graph.addVertex({ id: node.info.id + '-exit', name: 'exit', type: "end-marker" /* CfgVertexType.EndMarker */ }); for (const exit of currentExitPoint) { graph.addEdge(node.info.id + '-exit', exit, { label: 'FD' }); } return info; } function cfgBinaryOp(binOp, lhs, rhs) { const graph = new ControlFlowGraph().merge(lhs.graph).merge(rhs.graph); const result = { graph, breaks: [...lhs.breaks, ...rhs.breaks], nexts: [...lhs.nexts, ...rhs.nexts], returns: [...lhs.returns, ...rhs.returns], entryPoints: [binOp.info.id], exitPoints: [binOp.info.id + '-exit'] }; graph.addVertex({ id: binOp.info.id, name: binOp.type, type: binOp.flavor === 'assignment' ? "statement" /* CfgVertexType.Statement */ : "expression" /* CfgVertexType.Expression */ }); graph.addVertex({ id: binOp.info.id + '-exit', name: 'binOp-exit', type: "end-marker" /* CfgVertexType.EndMarker */ }); for (const exitPoint of lhs.exitPoints) { for (const entryPoint of rhs.entryPoints) { result.graph.addEdge(entryPoint, exitPoint, { label: 'FD' }); } } for (const entryPoint of lhs.entryPoints) { graph.addEdge(entryPoint, binOp.info.id, { label: 'FD' }); } for (const exitPoint of rhs.exitPoints) { graph.addEdge(binOp.info.id + '-exit', exitPoint, { label: 'FD' }); } return result; } function cfgAccess(access, name, accessors) { const result = name; const graph = result.graph; graph.addVertex({ id: access.info.id, name: access.type, type: "expression" /* CfgVertexType.Expression */ }); graph.addVertex({ id: access.info.id + '-exit', name: 'access-exit', type: "end-marker" /* CfgVertexType.EndMarker */ }); for (const entry of name.entryPoints) { graph.addEdge(entry, access.info.id, { label: 'FD' }); } for (const exit of name.exitPoints) { graph.addEdge(access.info.id, exit, { label: 'FD' }); } result.entryPoints = [access.info.id]; result.exitPoints = [access.info.id + '-exit']; for (const accessor of accessors) { if (accessor === r_function_call_1.EmptyArgument) { continue; } graph.merge(accessor.graph); for (const entry of accessor.entryPoints) { graph.addEdge(entry, access.info.id, { label: 'FD' }); } for (const exit of accessor.exitPoints) { graph.addEdge(access.info.id + '-exit', exit, { label: 'FD' }); } } return result; } function cfgUnaryOp(unary, operand) { const graph = operand.graph; graph.addVertex({ id: unary.info.id, name: unary.type, type: "end-marker" /* CfgVertexType.EndMarker */ }); for (const entry of operand.exitPoints) { graph.addEdge(unary.info.id, entry, { label: 'FD' }); } return { ...operand, graph, exitPoints: [unary.info.id] }; } function cfgExprList(_node, _grouping, expressions) { const result = { graph: new ControlFlowGraph(), breaks: [], nexts: [], returns: [], exitPoints: [], entryPoints: [] }; let first = true; for (const expression of expressions) { if (first) { result.entryPoints = expression.entryPoints; first = false; } else { for (const previousExitPoint of result.exitPoints) { for (const entryPoint of expression.entryPoints) { result.graph.addEdge(entryPoint, previousExitPoint, { label: 'FD' }); } } } result.graph.merge(expression.graph); result.breaks.push(...expression.breaks); result.nexts.push(...expression.nexts); result.returns.push(...expression.returns); result.exitPoints = expression.exitPoints; } return result; } function equalChildren(a, b) { if (!a || !b || a.length !== b.length) { return false; } for (let i = 0; i < a.length; ++i) { if (a[i] !== b[i]) { return false; } } return true; } /** * Returns true if the given CFG equals the other CFG. False otherwise. */ function equalCfg(a, b) { if (!a || !b) { return a === b; } else if (!(0, set_1.setEquals)(a.rootVertexIds(), b.rootVertexIds())) { log_1.log.debug(`root vertex ids differ ${JSON.stringify(a.rootVertexIds(), json_1.jsonReplacer)} vs. ${JSON.stringify(b.rootVertexIds(), json_1.jsonReplacer)}.`); return false; } const aVert = a.vertices(); const bVert = b.vertices(); if (aVert.size !== bVert.size) { log_1.log.debug(`vertex count differs ${aVert.size} vs. ${bVert.size}.`); return false; } for (const [id, aInfo] of aVert) { const bInfo = bVert.get(id); if (bInfo === undefined || aInfo.name !== bInfo.name || equalChildren(aInfo.children, bInfo.children)) { log_1.log.debug(`vertex ${id} differs ${JSON.stringify(aInfo, json_1.jsonReplacer)} vs. ${JSON.stringify(bInfo, json_1.jsonReplacer)}.`); return false; } } const aEdges = a.edges(); const bEdges = b.edges(); if (aEdges.size !== bEdges.size) { log_1.log.debug(`edge count differs ${aEdges.size} vs. ${bEdges.size}.`); return false; } for (const [from, aTo] of aEdges) { const bTo = bEdges.get(from); if (bTo === undefined || aTo.size !== bTo.size) { log_1.log.debug(`edge count for ${from} differs ${aTo.size} vs. ${bTo?.size ?? '?'}.`); return false; } for (const [to, aEdge] of aTo) { const bEdge = bTo.get(to); if (bEdge === undefined || aEdge.label !== bEdge.label) { log_1.log.debug(`edge ${from} -> ${to} differs ${JSON.stringify(aEdge, json_1.jsonReplacer)} vs. ${JSON.stringify(bEdge, json_1.jsonReplacer)}.`); return false; } } } return true; } /** * @see df2quads * @see serialize2quads * @see graph2quads */ function cfg2quads(cfg, config) { return (0, quads_1.graph2quads)({ rootIds: [...cfg.graph.rootVertexIds()], vertices: [...cfg.graph.vertices().entries()] .map(([id, v]) => ({ id, name: v.name, children: v.children })), edges: [...cfg.graph.edges()].flatMap(([fromId, targets]) => [...targets].map(([toId, info]) => ({ from: fromId, to: toId, type: info.label, when: info.when }))), entryPoints: cfg.entryPoints, exitPoints: cfg.exitPoints, breaks: cfg.breaks, nexts: cfg.nexts, returns: cfg.returns }, config); } //# sourceMappingURL=cfg.js.map