@eagleoutice/flowr
Version:
Static Dataflow Analyzer and Program Slicer for the R Programming Language
551 lines • 23.5 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.ControlFlowGraph = void 0;
exports.emptyControlFlowInformation = emptyControlFlowInformation;
exports.extractCFG = extractCFG;
exports.equalCfg = equalCfg;
exports.cfg2quads = cfg2quads;
const set_1 = require("../set");
const quads_1 = require("../quads");
const log_1 = require("../log");
const json_1 = require("../json");
const fold_1 = require("../../r-bridge/lang-4.x/ast/model/processing/fold");
const convert_values_1 = require("../../r-bridge/lang-4.x/convert-values");
const r_function_call_1 = require("../../r-bridge/lang-4.x/ast/model/nodes/r-function-call");
const linker_1 = require("../../dataflow/internal/linker");
const vertex_1 = require("../../dataflow/graph/vertex");
/**
* This class represents the control flow graph of an R program.
* The control flow may be hierarchical when confronted with function definitions (see {@link CfgVertex} and {@link CFG#rootVertexIds|rootVertexIds()}).
*/
class ControlFlowGraph {
rootVertices = new Set();
vertexInformation = new Map();
edgeInformation = new Map();
addVertex(vertex, rootVertex = true) {
if (this.vertexInformation.has(vertex.id)) {
throw new Error(`Node with id ${vertex.id} already exists`);
}
this.vertexInformation.set(vertex.id, vertex);
if (rootVertex) {
this.rootVertices.add(vertex.id);
}
return this;
}
addEdge(from, to, edge) {
if (!this.edgeInformation.has(from)) {
this.edgeInformation.set(from, new Map());
}
this.edgeInformation.get(from)?.set(to, edge);
return this;
}
outgoing(node) {
return this.edgeInformation.get(node);
}
rootVertexIds() {
return this.rootVertices;
}
vertices() {
return this.vertexInformation;
}
edges() {
return this.edgeInformation;
}
merge(other, forceNested = false) {
for (const [id, node] of other.vertexInformation) {
this.addVertex(node, forceNested ? false : other.rootVertices.has(id));
}
for (const [from, edges] of other.edgeInformation) {
for (const [to, edge] of edges) {
this.addEdge(from, to, edge);
}
}
return this;
}
}
exports.ControlFlowGraph = ControlFlowGraph;
function emptyControlFlowInformation() {
return {
returns: [],
breaks: [],
nexts: [],
entryPoints: [],
exitPoints: [],
graph: new ControlFlowGraph()
};
}
const cfgFolds = {
foldNumber: cfgLeaf("expression" /* CfgVertexType.Expression */),
foldString: cfgLeaf("expression" /* CfgVertexType.Expression */),
foldLogical: cfgLeaf("expression" /* CfgVertexType.Expression */),
foldSymbol: cfgLeaf("expression" /* CfgVertexType.Expression */),
foldAccess: cfgAccess,
foldBinaryOp: cfgBinaryOp,
foldPipe: cfgBinaryOp,
foldUnaryOp: cfgUnaryOp,
other: {
foldComment: cfgIgnore,
foldLineDirective: cfgIgnore
},
loop: {
foldFor: cfgFor,
foldRepeat: cfgRepeat,
foldWhile: cfgWhile,
foldBreak: cfgBreak,
foldNext: cfgNext
},
foldIfThenElse: cfgIfThenElse,
foldExprList: cfgExprList,
functions: {
foldFunctionDefinition: cfgFunctionDefinition,
foldFunctionCall: cfgFunctionCall,
foldParameter: cfgArgumentOrParameter,
foldArgument: cfgArgumentOrParameter
}
};
function dataflowCfgFolds(dataflowGraph) {
return {
...cfgFolds,
functions: {
...cfgFolds.functions,
foldFunctionCall: cfgFunctionCallWithDataflow(dataflowGraph)
}
};
}
/**
* Given a normalized AST this approximates the control flow graph of the program.
* This few is different from the computation of the dataflow graph and may differ,
* especially because it focuses on intra-procedural analysis.
*
* @param ast - the normalized AST
* @param graph - additional dataflow facts to consider by the control flow extraction
*/
function extractCFG(ast, graph) {
return (0, fold_1.foldAst)(ast.ast, graph ? dataflowCfgFolds(graph) : cfgFolds);
}
function cfgLeaf(type) {
return (leaf) => {
const graph = new ControlFlowGraph();
graph.addVertex({ id: leaf.info.id, name: leaf.type, type });
return { graph, breaks: [], nexts: [], returns: [], exitPoints: [leaf.info.id], entryPoints: [leaf.info.id] };
};
}
function cfgBreak(leaf) {
return { ...cfgLeaf("statement" /* CfgVertexType.Statement */)(leaf), breaks: [leaf.info.id] };
}
function cfgNext(leaf) {
return { ...cfgLeaf("statement" /* CfgVertexType.Statement */)(leaf), nexts: [leaf.info.id] };
}
function cfgIgnore(_leaf) {
return { graph: new ControlFlowGraph(), breaks: [], nexts: [], returns: [], exitPoints: [], entryPoints: [] };
}
function identifyMayStatementType(node) {
return node.info.role === "expr-list-child" /* RoleInParent.ExpressionListChild */ ? "statement" /* CfgVertexType.Statement */ : "expression" /* CfgVertexType.Expression */;
}
function cfgIfThenElse(ifNode, condition, then, otherwise) {
const graph = new ControlFlowGraph();
graph.addVertex({ id: ifNode.info.id, name: ifNode.type, type: identifyMayStatementType(ifNode) });
graph.addVertex({ id: ifNode.info.id + '-exit', name: 'if-exit', type: "end-marker" /* CfgVertexType.EndMarker */ });
graph.merge(condition.graph);
graph.merge(then.graph);
if (otherwise) {
graph.merge(otherwise.graph);
}
for (const exitPoint of condition.exitPoints) {
for (const entryPoint of then.entryPoints) {
graph.addEdge(entryPoint, exitPoint, { label: 'CD', when: convert_values_1.RTrue, caused: ifNode.info.id });
}
for (const entryPoint of otherwise?.entryPoints ?? []) {
graph.addEdge(entryPoint, exitPoint, { label: 'CD', when: convert_values_1.RFalse, caused: ifNode.info.id });
}
}
for (const entryPoint of condition.entryPoints) {
graph.addEdge(entryPoint, ifNode.info.id, { label: 'FD' });
}
for (const exit of [...then.exitPoints, ...otherwise?.exitPoints ?? []]) {
graph.addEdge(ifNode.info.id + '-exit', exit, { label: 'FD' });
}
if (!otherwise) {
for (const exitPoint of condition.exitPoints) {
graph.addEdge(ifNode.info.id + '-exit', exitPoint, { label: 'CD', when: convert_values_1.RFalse, caused: ifNode.info.id });
}
}
return {
graph,
breaks: [...then.breaks, ...otherwise?.breaks ?? []],
nexts: [...then.nexts, ...otherwise?.nexts ?? []],
returns: [...then.returns, ...otherwise?.returns ?? []],
exitPoints: [ifNode.info.id + '-exit'],
entryPoints: [ifNode.info.id]
};
}
function cfgRepeat(repeat, body) {
const graph = body.graph;
graph.addVertex({ id: repeat.info.id, name: repeat.type, type: identifyMayStatementType(repeat) });
graph.addVertex({ id: repeat.info.id + '-exit', name: 'repeat-exit', type: "end-marker" /* CfgVertexType.EndMarker */ });
for (const entryPoint of body.entryPoints) {
graph.addEdge(entryPoint, repeat.info.id, { label: 'FD' });
}
// loops automatically
for (const next of [...body.nexts, ...body.exitPoints]) {
graph.addEdge(repeat.info.id, next, { label: 'FD' });
}
for (const breakPoint of body.breaks) {
graph.addEdge(repeat.info.id + '-exit', breakPoint, { label: 'FD' });
}
return { graph, breaks: [], nexts: [], returns: body.returns, exitPoints: [repeat.info.id + '-exit'], entryPoints: [repeat.info.id] };
}
function cfgWhile(whileLoop, condition, body) {
const graph = condition.graph;
graph.addVertex({ id: whileLoop.info.id, name: whileLoop.type, type: identifyMayStatementType(whileLoop) });
graph.addVertex({ id: whileLoop.info.id + '-exit', name: 'while-exit', type: "end-marker" /* CfgVertexType.EndMarker */ });
graph.merge(body.graph);
for (const entry of condition.entryPoints) {
graph.addEdge(entry, whileLoop.info.id, { label: 'FD' });
}
for (const exit of condition.exitPoints) {
for (const entry of body.entryPoints) {
graph.addEdge(entry, exit, { label: 'CD', when: convert_values_1.RTrue, caused: whileLoop.info.id });
}
}
for (const entryPoint of body.entryPoints) {
graph.addEdge(whileLoop.info.id, entryPoint, { label: 'FD' });
}
for (const next of [...body.nexts, ...body.exitPoints]) {
graph.addEdge(whileLoop.info.id, next, { label: 'FD' });
}
for (const breakPoint of body.breaks) {
graph.addEdge(whileLoop.info.id + '-exit', breakPoint, { label: 'FD' });
}
// while can break on the condition as well
for (const exit of condition.exitPoints) {
graph.addEdge(whileLoop.info.id + '-exit', exit, { label: 'CD', when: convert_values_1.RFalse, caused: whileLoop.info.id });
}
return { graph, breaks: [], nexts: [], returns: body.returns, exitPoints: [whileLoop.info.id + '-exit'], entryPoints: [whileLoop.info.id] };
}
function cfgFor(forLoop, variable, vector, body) {
const graph = variable.graph;
graph.addVertex({ id: forLoop.info.id, name: forLoop.type, type: identifyMayStatementType(forLoop) });
graph.addVertex({ id: forLoop.info.id + '-exit', name: 'for-exit', type: "end-marker" /* CfgVertexType.EndMarker */ });
graph.merge(vector.graph);
graph.merge(body.graph);
for (const entry of vector.entryPoints) {
graph.addEdge(entry, forLoop.info.id, { label: 'FD' });
}
for (const exit of vector.exitPoints) {
for (const entry of variable.entryPoints) {
graph.addEdge(entry, exit, { label: 'FD' });
}
}
for (const exit of variable.exitPoints) {
for (const entry of body.entryPoints) {
graph.addEdge(entry, exit, { label: 'CD', when: convert_values_1.RTrue, caused: forLoop.info.id });
}
}
for (const next of [...body.nexts, ...body.exitPoints]) {
graph.addEdge(forLoop.info.id, next, { label: 'FD' });
}
for (const breakPoint of body.breaks) {
graph.addEdge(forLoop.info.id + '-exit', breakPoint, { label: 'FD' });
}
// while can break on the condition as well
for (const exit of variable.exitPoints) {
graph.addEdge(forLoop.info.id + '-exit', exit, { label: 'CD', when: convert_values_1.RFalse, caused: forLoop.info.id });
}
return { graph, breaks: [], nexts: [], returns: body.returns, exitPoints: [forLoop.info.id + '-exit'], entryPoints: [forLoop.info.id] };
}
function cfgFunctionDefinition(fn, params, body) {
const graph = new ControlFlowGraph();
const children = [fn.info.id + '-params', fn.info.id + '-exit'];
graph.addVertex({ id: fn.info.id + '-params', name: 'function-parameters', type: "mid-marker" /* CfgVertexType.MidMarker */ }, false);
graph.addVertex({ id: fn.info.id + '-exit', name: 'function-exit', type: "end-marker" /* CfgVertexType.EndMarker */ }, false);
graph.addVertex({ id: fn.info.id, name: fn.type, children, type: identifyMayStatementType(fn) });
graph.merge(body.graph, true);
children.push(...body.graph.rootVertexIds());
for (const param of params) {
graph.merge(param.graph, true);
children.push(...param.graph.rootVertexIds());
for (const entry of param.entryPoints) {
graph.addEdge(entry, fn.info.id, { label: 'FD' });
}
for (const exit of param.exitPoints) {
graph.addEdge(fn.info.id + '-params', exit, { label: 'FD' });
}
}
if (params.length === 0) {
graph.addEdge(fn.info.id + '-params', fn.info.id, { label: 'FD' });
}
for (const entry of body.entryPoints) {
graph.addEdge(entry, fn.info.id + '-params', { label: 'FD' });
}
// breaks and nexts should be illegal but safe is safe i guess
for (const next of [...body.returns, ...body.breaks, ...body.nexts, ...body.exitPoints]) {
graph.addEdge(fn.info.id + '-exit', next, { label: 'FD' });
}
return { graph: graph, breaks: [], nexts: [], returns: body.returns, exitPoints: [fn.info.id], entryPoints: [fn.info.id] };
}
function cfgFunctionCall(call, name, args) {
const graph = name.graph;
const info = { graph, breaks: [...name.breaks], nexts: [...name.nexts], returns: [...name.returns], exitPoints: [call.info.id + '-exit'], entryPoints: [call.info.id] };
graph.addVertex({ id: call.info.id, name: call.type, type: identifyMayStatementType(call) });
for (const entryPoint of name.entryPoints) {
graph.addEdge(entryPoint, call.info.id, { label: 'FD' });
}
graph.addVertex({ id: call.info.id + '-name', name: 'call-name', type: "mid-marker" /* CfgVertexType.MidMarker */ });
for (const exitPoint of name.exitPoints) {
graph.addEdge(call.info.id + '-name', exitPoint, { label: 'FD' });
}
graph.addVertex({ id: call.info.id + '-exit', name: 'call-exit', type: "end-marker" /* CfgVertexType.EndMarker */ });
let lastArgExits = [call.info.id + '-name'];
for (const arg of args) {
if (arg === r_function_call_1.EmptyArgument) {
continue;
}
graph.merge(arg.graph);
info.breaks.push(...arg.breaks);
info.nexts.push(...arg.nexts);
info.returns.push(...arg.returns);
for (const entry of arg.entryPoints) {
for (const exit of lastArgExits) {
graph.addEdge(entry, exit, { label: 'FD' });
}
}
lastArgExits = arg.exitPoints;
}
for (const exit of lastArgExits) {
graph.addEdge(call.info.id + '-exit', exit, { label: 'FD' });
}
// should not contain any breaks, nexts, or returns, (except for the body if something like 'break()')
return info;
}
function cfgFunctionCallWithDataflow(graph) {
return (call, name, args) => {
const baseCFG = cfgFunctionCall(call, name, args);
/* try to resolve the call and link the target definitions */
const targets = (0, linker_1.getAllFunctionCallTargets)(call.info.id, graph);
const exits = [];
for (const target of targets) {
// we have to filter out non func-call targets as the call targets contains names and call ids
if ((0, vertex_1.isFunctionDefinitionVertex)(graph.getVertex(target))) {
baseCFG.graph.addEdge(call.info.id, target, { label: 'FD' });
exits.push(target + '-exit');
}
}
if (exits.length > 0) {
baseCFG.graph.addVertex({
id: call.info.id + '-resolved-call-exit',
name: 'resolved-call-exit',
type: "end-marker" /* CfgVertexType.EndMarker */
});
for (const exit of [...baseCFG.exitPoints, ...exits]) {
baseCFG.graph.addEdge(call.info.id + '-resolved-call-exit', exit, { label: 'FD' });
}
return {
...baseCFG,
exitPoints: [call.info.id + '-resolved-call-exit']
};
}
else {
return baseCFG;
}
};
}
function cfgArgumentOrParameter(node, name, value) {
const graph = new ControlFlowGraph();
const info = { graph, breaks: [], nexts: [], returns: [], exitPoints: [node.info.id + '-exit'], entryPoints: [node.info.id] };
graph.addVertex({ id: node.info.id, name: node.type, type: "expression" /* CfgVertexType.Expression */ });
let currentExitPoint = [node.info.id];
if (name) {
graph.merge(name.graph);
info.breaks.push(...name.breaks);
info.nexts.push(...name.nexts);
info.returns.push(...name.returns);
for (const entry of name.entryPoints) {
graph.addEdge(entry, node.info.id, { label: 'FD' });
}
currentExitPoint = name.exitPoints;
}
graph.addVertex({ id: node.info.id + '-before-value', name: 'before-value', type: "mid-marker" /* CfgVertexType.MidMarker */ });
for (const exitPoints of currentExitPoint) {
graph.addEdge(node.info.id + '-before-value', exitPoints, { label: 'FD' });
}
currentExitPoint = [node.info.id + '-before-value'];
if (value) {
graph.merge(value.graph);
info.breaks.push(...value.breaks);
info.nexts.push(...value.nexts);
info.returns.push(...value.returns);
for (const exitPoint of currentExitPoint) {
for (const entry of value.entryPoints) {
graph.addEdge(entry, exitPoint, { label: 'FD' });
}
}
currentExitPoint = value.exitPoints;
}
graph.addVertex({ id: node.info.id + '-exit', name: 'exit', type: "end-marker" /* CfgVertexType.EndMarker */ });
for (const exit of currentExitPoint) {
graph.addEdge(node.info.id + '-exit', exit, { label: 'FD' });
}
return info;
}
function cfgBinaryOp(binOp, lhs, rhs) {
const graph = new ControlFlowGraph().merge(lhs.graph).merge(rhs.graph);
const result = { graph, breaks: [...lhs.breaks, ...rhs.breaks], nexts: [...lhs.nexts, ...rhs.nexts], returns: [...lhs.returns, ...rhs.returns], entryPoints: [binOp.info.id], exitPoints: [binOp.info.id + '-exit'] };
graph.addVertex({ id: binOp.info.id, name: binOp.type, type: binOp.flavor === 'assignment' ? "statement" /* CfgVertexType.Statement */ : "expression" /* CfgVertexType.Expression */ });
graph.addVertex({ id: binOp.info.id + '-exit', name: 'binOp-exit', type: "end-marker" /* CfgVertexType.EndMarker */ });
for (const exitPoint of lhs.exitPoints) {
for (const entryPoint of rhs.entryPoints) {
result.graph.addEdge(entryPoint, exitPoint, { label: 'FD' });
}
}
for (const entryPoint of lhs.entryPoints) {
graph.addEdge(entryPoint, binOp.info.id, { label: 'FD' });
}
for (const exitPoint of rhs.exitPoints) {
graph.addEdge(binOp.info.id + '-exit', exitPoint, { label: 'FD' });
}
return result;
}
function cfgAccess(access, name, accessors) {
const result = name;
const graph = result.graph;
graph.addVertex({ id: access.info.id, name: access.type, type: "expression" /* CfgVertexType.Expression */ });
graph.addVertex({ id: access.info.id + '-exit', name: 'access-exit', type: "end-marker" /* CfgVertexType.EndMarker */ });
for (const entry of name.entryPoints) {
graph.addEdge(entry, access.info.id, { label: 'FD' });
}
for (const exit of name.exitPoints) {
graph.addEdge(access.info.id, exit, { label: 'FD' });
}
result.entryPoints = [access.info.id];
result.exitPoints = [access.info.id + '-exit'];
for (const accessor of accessors) {
if (accessor === r_function_call_1.EmptyArgument) {
continue;
}
graph.merge(accessor.graph);
for (const entry of accessor.entryPoints) {
graph.addEdge(entry, access.info.id, { label: 'FD' });
}
for (const exit of accessor.exitPoints) {
graph.addEdge(access.info.id + '-exit', exit, { label: 'FD' });
}
}
return result;
}
function cfgUnaryOp(unary, operand) {
const graph = operand.graph;
graph.addVertex({ id: unary.info.id, name: unary.type, type: "end-marker" /* CfgVertexType.EndMarker */ });
for (const entry of operand.exitPoints) {
graph.addEdge(unary.info.id, entry, { label: 'FD' });
}
return { ...operand, graph, exitPoints: [unary.info.id] };
}
function cfgExprList(_node, _grouping, expressions) {
const result = { graph: new ControlFlowGraph(), breaks: [], nexts: [], returns: [], exitPoints: [], entryPoints: [] };
let first = true;
for (const expression of expressions) {
if (first) {
result.entryPoints = expression.entryPoints;
first = false;
}
else {
for (const previousExitPoint of result.exitPoints) {
for (const entryPoint of expression.entryPoints) {
result.graph.addEdge(entryPoint, previousExitPoint, { label: 'FD' });
}
}
}
result.graph.merge(expression.graph);
result.breaks.push(...expression.breaks);
result.nexts.push(...expression.nexts);
result.returns.push(...expression.returns);
result.exitPoints = expression.exitPoints;
}
return result;
}
function equalChildren(a, b) {
if (!a || !b || a.length !== b.length) {
return false;
}
for (let i = 0; i < a.length; ++i) {
if (a[i] !== b[i]) {
return false;
}
}
return true;
}
/**
* Returns true if the given CFG equals the other CFG. False otherwise.
*/
function equalCfg(a, b) {
if (!a || !b) {
return a === b;
}
else if (!(0, set_1.setEquals)(a.rootVertexIds(), b.rootVertexIds())) {
log_1.log.debug(`root vertex ids differ ${JSON.stringify(a.rootVertexIds(), json_1.jsonReplacer)} vs. ${JSON.stringify(b.rootVertexIds(), json_1.jsonReplacer)}.`);
return false;
}
const aVert = a.vertices();
const bVert = b.vertices();
if (aVert.size !== bVert.size) {
log_1.log.debug(`vertex count differs ${aVert.size} vs. ${bVert.size}.`);
return false;
}
for (const [id, aInfo] of aVert) {
const bInfo = bVert.get(id);
if (bInfo === undefined || aInfo.name !== bInfo.name || equalChildren(aInfo.children, bInfo.children)) {
log_1.log.debug(`vertex ${id} differs ${JSON.stringify(aInfo, json_1.jsonReplacer)} vs. ${JSON.stringify(bInfo, json_1.jsonReplacer)}.`);
return false;
}
}
const aEdges = a.edges();
const bEdges = b.edges();
if (aEdges.size !== bEdges.size) {
log_1.log.debug(`edge count differs ${aEdges.size} vs. ${bEdges.size}.`);
return false;
}
for (const [from, aTo] of aEdges) {
const bTo = bEdges.get(from);
if (bTo === undefined || aTo.size !== bTo.size) {
log_1.log.debug(`edge count for ${from} differs ${aTo.size} vs. ${bTo?.size ?? '?'}.`);
return false;
}
for (const [to, aEdge] of aTo) {
const bEdge = bTo.get(to);
if (bEdge === undefined || aEdge.label !== bEdge.label) {
log_1.log.debug(`edge ${from} -> ${to} differs ${JSON.stringify(aEdge, json_1.jsonReplacer)} vs. ${JSON.stringify(bEdge, json_1.jsonReplacer)}.`);
return false;
}
}
}
return true;
}
/**
* @see df2quads
* @see serialize2quads
* @see graph2quads
*/
function cfg2quads(cfg, config) {
return (0, quads_1.graph2quads)({
rootIds: [...cfg.graph.rootVertexIds()],
vertices: [...cfg.graph.vertices().entries()]
.map(([id, v]) => ({
id,
name: v.name,
children: v.children
})),
edges: [...cfg.graph.edges()].flatMap(([fromId, targets]) => [...targets].map(([toId, info]) => ({
from: fromId,
to: toId,
type: info.label,
when: info.when
}))),
entryPoints: cfg.entryPoints,
exitPoints: cfg.exitPoints,
breaks: cfg.breaks,
nexts: cfg.nexts,
returns: cfg.returns
}, config);
}
//# sourceMappingURL=cfg.js.map