@eagleoutice/flowr
Version:
Static Dataflow Analyzer and Program Slicer for the R Programming Language
468 lines • 21.5 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.ResolvedCallSuffix = void 0;
exports.extractCfg = extractCfg;
exports.extractCfgQuick = extractCfgQuick;
exports.cfg2quads = cfg2quads;
const quads_1 = require("../util/quads");
const fold_1 = require("../r-bridge/lang-4.x/ast/model/processing/fold");
const convert_values_1 = require("../r-bridge/lang-4.x/convert-values");
const r_function_call_1 = require("../r-bridge/lang-4.x/ast/model/nodes/r-function-call");
const linker_1 = require("../dataflow/internal/linker");
const vertex_1 = require("../dataflow/graph/vertex");
const control_flow_graph_1 = require("./control-flow-graph");
const cfg_simplification_1 = require("./cfg-simplification");
const assert_1 = require("../util/assert");
const cfgFolds = {
foldNumber: cfgLeaf(control_flow_graph_1.CfgVertexType.Expression),
foldString: cfgLeaf(control_flow_graph_1.CfgVertexType.Expression),
foldLogical: cfgLeaf(control_flow_graph_1.CfgVertexType.Expression),
foldSymbol: cfgLeaf(control_flow_graph_1.CfgVertexType.Expression),
foldAccess: cfgAccess,
foldBinaryOp: cfgBinaryOp,
foldPipe: cfgBinaryOp,
foldUnaryOp: cfgUnaryOp,
other: {
foldComment: cfgIgnore,
foldLineDirective: cfgIgnore
},
loop: {
foldFor: cfgFor,
foldRepeat: cfgRepeat,
foldWhile: cfgWhile,
foldBreak: cfgBreak,
foldNext: cfgNext
},
foldIfThenElse: cfgIfThenElse,
foldExprList: cfgExprList,
functions: {
foldFunctionDefinition: cfgFunctionDefinition,
foldFunctionCall: cfgFunctionCall,
foldParameter: cfgArgumentOrParameter,
foldArgument: cfgArgumentOrParameter
}
};
function dataflowCfgFolds(dataflowGraph) {
return {
...cfgFolds,
functions: {
...cfgFolds.functions,
foldFunctionCall: cfgFunctionCallWithDataflow(dataflowGraph)
}
};
}
/**
* Given a normalized AST, this approximates the control flow graph of the program.
* This view is different from the computation of the dataflow graph and may differ,
* especially because it focuses on intra-procedural analysis.
*
* @param ast - the normalized AST
* @param config - the flowR config
* @param graph - additional dataflow facts to consider by the control flow extraction
* @param simplifications - a list of simplification passes to apply to the control flow graph
*
* @see {@link extractCfgQuick} - for a simplified version of this function
*/
function extractCfg(ast, config, graph, simplifications) {
return (0, cfg_simplification_1.simplifyControlFlowInformation)((0, fold_1.foldAst)(ast.ast, graph ? dataflowCfgFolds(graph) : cfgFolds), { ast, dfg: graph, config }, simplifications);
}
/**
* A version of {@link extractCfg} that is much quicker and does not apply any simplifciations or dataflow information.
*/
function extractCfgQuick(ast) {
return (0, fold_1.foldAst)(ast.ast, cfgFolds);
}
function cfgLeaf(type) {
return ({ info: { id } }) => {
return { graph: new control_flow_graph_1.ControlFlowGraph().addVertex({ id, type }), breaks: [], nexts: [], returns: [], exitPoints: [id], entryPoints: [id] };
};
}
function cfgBreak(leaf) {
return { ...cfgLeaf(control_flow_graph_1.CfgVertexType.Statement)(leaf), breaks: [leaf.info.id], exitPoints: [] };
}
function cfgNext(leaf) {
return { ...cfgLeaf(control_flow_graph_1.CfgVertexType.Statement)(leaf), nexts: [leaf.info.id], exitPoints: [] };
}
function cfgIgnore(_leaf) {
return { graph: new control_flow_graph_1.ControlFlowGraph(), breaks: [], nexts: [], returns: [], exitPoints: [], entryPoints: [] };
}
function identifyMayStatementType(node) {
return node.info.role === "expr-list-child" /* RoleInParent.ExpressionListChild */ ? control_flow_graph_1.CfgVertexType.Statement : control_flow_graph_1.CfgVertexType.Expression;
}
function cfgIfThenElse(ifNode, condition, then, otherwise) {
const ifId = ifNode.info.id;
const graph = new control_flow_graph_1.ControlFlowGraph();
graph.addVertex({ id: ifId, type: identifyMayStatementType(ifNode), mid: condition.exitPoints, end: [ifId + '-exit'] });
graph.addVertex({ id: ifId + '-exit', type: control_flow_graph_1.CfgVertexType.EndMarker, root: ifId });
graph.mergeWith(condition.graph);
graph.mergeWith(then.graph);
if (otherwise) {
graph.mergeWith(otherwise.graph);
}
for (const e of condition.exitPoints) {
for (const entryPoint of then.entryPoints) {
graph.addEdge(entryPoint, e, { label: 1 /* CfgEdgeType.Cd */, when: convert_values_1.RTrue, caused: ifId });
}
for (const entryPoint of otherwise?.entryPoints ?? []) {
graph.addEdge(entryPoint, e, { label: 1 /* CfgEdgeType.Cd */, when: convert_values_1.RFalse, caused: ifId });
}
}
for (const entryPoint of condition.entryPoints) {
graph.addEdge(entryPoint, ifId, { label: 0 /* CfgEdgeType.Fd */ });
}
for (const exit of [...then.exitPoints, ...otherwise?.exitPoints ?? []]) {
graph.addEdge(ifId + '-exit', exit, { label: 0 /* CfgEdgeType.Fd */ });
}
if (!otherwise) {
for (const e of condition.exitPoints) {
graph.addEdge(ifId + '-exit', e, { label: 1 /* CfgEdgeType.Cd */, when: convert_values_1.RFalse, caused: ifId });
}
}
return {
graph,
breaks: [...then.breaks, ...otherwise?.breaks ?? []],
nexts: [...then.nexts, ...otherwise?.nexts ?? []],
returns: [...then.returns, ...otherwise?.returns ?? []],
exitPoints: [ifId + '-exit'],
entryPoints: [ifId]
};
}
function cfgRepeat(repeat, body) {
const graph = body.graph;
graph.addVertex({ id: repeat.info.id, type: identifyMayStatementType(repeat), end: [repeat.info.id + '-exit'] });
graph.addVertex({ id: repeat.info.id + '-exit', type: control_flow_graph_1.CfgVertexType.EndMarker, root: repeat.info.id });
for (const entryPoint of body.entryPoints) {
graph.addEdge(entryPoint, repeat.info.id, { label: 0 /* CfgEdgeType.Fd */ });
}
// loops automatically
for (const next of [...body.nexts, ...body.exitPoints]) {
graph.addEdge(repeat.info.id, next, { label: 0 /* CfgEdgeType.Fd */ });
}
for (const breakPoint of body.breaks) {
graph.addEdge(repeat.info.id + '-exit', breakPoint, { label: 0 /* CfgEdgeType.Fd */ });
}
return { graph, breaks: [], nexts: [], returns: body.returns, exitPoints: [repeat.info.id + '-exit'], entryPoints: [repeat.info.id] };
}
function cfgWhile(whileLoop, condition, body) {
const whileId = whileLoop.info.id;
const graph = condition.graph;
graph.addVertex({ id: whileId, type: identifyMayStatementType(whileLoop), mid: condition.exitPoints, end: [whileId + '-exit'] });
graph.addVertex({ id: whileId + '-exit', type: control_flow_graph_1.CfgVertexType.EndMarker, root: whileId });
graph.mergeWith(body.graph);
for (const entry of condition.entryPoints) {
graph.addEdge(entry, whileId, { label: 0 /* CfgEdgeType.Fd */ });
}
for (const e of condition.exitPoints) {
for (const entry of body.entryPoints) {
graph.addEdge(entry, e, { label: 1 /* CfgEdgeType.Cd */, when: convert_values_1.RTrue, caused: whileId });
}
}
for (const next of [...body.nexts, ...body.exitPoints]) {
graph.addEdge(whileId, next, { label: 0 /* CfgEdgeType.Fd */ });
}
for (const breakPoint of body.breaks) {
graph.addEdge(whileId + '-exit', breakPoint, { label: 0 /* CfgEdgeType.Fd */ });
}
// while can break on the condition as well
for (const e of condition.exitPoints) {
graph.addEdge(whileId + '-exit', e, {
label: 1 /* CfgEdgeType.Cd */,
when: convert_values_1.RFalse,
caused: whileId
});
}
return { graph, breaks: [], nexts: [], returns: body.returns, exitPoints: [whileId + '-exit'], entryPoints: [whileId] };
}
function cfgFor(forLoop, variable, vector, body) {
const forLoopId = forLoop.info.id;
const graph = variable.graph;
graph.addVertex({ id: forLoopId, type: identifyMayStatementType(forLoop), end: [forLoopId + '-exit'], mid: variable.exitPoints });
graph.mergeWith(vector.graph);
graph.mergeWith(body.graph);
for (const entry of vector.entryPoints) {
graph.addEdge(entry, forLoopId, { label: 0 /* CfgEdgeType.Fd */ });
}
for (const exit of vector.exitPoints) {
for (const entry of variable.entryPoints) {
graph.addEdge(entry, exit, { label: 0 /* CfgEdgeType.Fd */ });
}
}
for (const e of variable.exitPoints) {
for (const entry of body.entryPoints) {
graph.addEdge(entry, e, { label: 1 /* CfgEdgeType.Cd */, when: convert_values_1.RTrue, caused: forLoopId });
}
}
for (const next of [...body.nexts, ...body.exitPoints]) {
graph.addEdge(forLoopId, next, { label: 0 /* CfgEdgeType.Fd */ });
}
for (const breakPoint of body.breaks) {
graph.addEdge(forLoopId + '-exit', breakPoint, { label: 0 /* CfgEdgeType.Fd */ });
}
const isNotEndless = body.exitPoints.length > 0 || body.breaks.length > 0;
if (isNotEndless) {
graph.addVertex({
id: forLoopId + '-exit',
type: control_flow_graph_1.CfgVertexType.EndMarker,
root: forLoopId
});
for (const e of variable.exitPoints) {
graph.addEdge(forLoopId + '-exit', e, { label: 1 /* CfgEdgeType.Cd */, when: convert_values_1.RFalse, caused: forLoopId });
}
}
return { graph, breaks: [], nexts: [], returns: body.returns, exitPoints: isNotEndless ? [forLoopId + '-exit'] : [], entryPoints: [forLoopId] };
}
function cfgFunctionDefinition(fn, params, body) {
const fnId = fn.info.id;
const graph = new control_flow_graph_1.ControlFlowGraph();
let paramExits = params.flatMap(e => e.exitPoints);
const children = [...paramExits, fnId + '-exit'];
graph.addVertex({ id: fnId + '-exit', type: control_flow_graph_1.CfgVertexType.EndMarker, root: fnId }, false);
graph.addVertex({ id: fnId, children, type: identifyMayStatementType(fn), mid: paramExits, end: [fnId + '-exit'] });
graph.mergeWith(body.graph, true);
children.push(...body.graph.rootIds());
for (const param of params) {
graph.mergeWith(param.graph, true);
children.push(...param.graph.rootIds());
for (const entry of param.entryPoints) {
graph.addEdge(entry, fnId, { label: 0 /* CfgEdgeType.Fd */ });
}
}
if (paramExits.length === 0) {
paramExits = [fnId];
}
for (const e of paramExits) {
for (const entry of body.entryPoints) {
graph.addEdge(entry, e, { label: 0 /* CfgEdgeType.Fd */ });
}
}
// breaks and nexts should be illegal but safe is safe, I guess
for (const next of body.returns.concat(body.breaks, body.nexts, body.exitPoints)) {
graph.addEdge(fnId + '-exit', next, { label: 0 /* CfgEdgeType.Fd */ });
}
return { graph: graph, breaks: [], nexts: [], returns: [], exitPoints: [fnId], entryPoints: [fnId] };
}
function cfgFunctionCall(call, name, args, exit = 'exit') {
const callId = call.info.id;
const graph = name.graph;
const info = { graph, breaks: [...name.breaks], nexts: [...name.nexts], returns: [...name.returns], exitPoints: [callId + '-' + exit], entryPoints: [callId] };
graph.addVertex({ id: callId, type: identifyMayStatementType(call), mid: name.exitPoints, end: [callId + '-' + exit] });
for (const entryPoint of name.entryPoints) {
graph.addEdge(entryPoint, callId, { label: 0 /* CfgEdgeType.Fd */ });
}
graph.addVertex({ id: callId + '-' + exit, type: control_flow_graph_1.CfgVertexType.EndMarker, root: callId });
let lastArgExits = name.exitPoints;
for (const arg of args) {
if (arg === r_function_call_1.EmptyArgument) {
continue;
}
graph.mergeWith(arg.graph);
info.breaks = info.breaks.concat(arg.breaks);
info.nexts = info.nexts.concat(arg.nexts);
info.returns = info.returns.concat(arg.returns);
for (const entry of arg.entryPoints) {
for (const exit of lastArgExits) {
graph.addEdge(entry, exit, { label: 0 /* CfgEdgeType.Fd */ });
}
}
lastArgExits = arg.exitPoints;
}
for (const exit of lastArgExits) {
graph.addEdge(callId + '-exit', exit, { label: 0 /* CfgEdgeType.Fd */ });
}
// should not contain any breaks, nexts, or returns, (except for the body if something like 'break()')
return info;
}
exports.ResolvedCallSuffix = '-resolved-call-exit';
function cfgFunctionCallWithDataflow(graph) {
return (call, name, args) => {
const baseCfg = cfgFunctionCall(call, name, args);
/* try to resolve the call and link the target definitions */
const targets = (0, linker_1.getAllFunctionCallTargets)(call.info.id, graph);
const exits = [];
const callVertex = baseCfg.graph.getVertex(call.info.id);
(0, assert_1.guard)(callVertex !== undefined, 'cfgFunctionCallWithDataflow: call vertex not found');
for (const target of targets) {
// we have to filter out non-func-call targets as the call targets contains names and call ids
if ((0, vertex_1.isFunctionDefinitionVertex)(graph.getVertex(target))) {
callVertex.callTargets ??= new Set();
callVertex.callTargets.add(target);
exits.push(target + '-exit');
}
}
if (exits.length > 0) {
baseCfg.graph.addVertex({
id: call.info.id + exports.ResolvedCallSuffix,
type: control_flow_graph_1.CfgVertexType.EndMarker,
root: call.info.id
});
for (const exit of [...baseCfg.exitPoints, ...exits]) {
baseCfg.graph.addEdge(call.info.id + exports.ResolvedCallSuffix, exit, { label: 0 /* CfgEdgeType.Fd */ });
}
return {
...baseCfg,
exitPoints: [call.info.id + exports.ResolvedCallSuffix]
};
}
else {
return baseCfg;
}
};
}
function cfgArgumentOrParameter(node, name, value) {
const graph = new control_flow_graph_1.ControlFlowGraph();
const info = { graph, breaks: [], nexts: [], returns: [], exitPoints: [node.info.id + '-exit'], entryPoints: [node.info.id] };
let currentExitPoints = name?.exitPoints ?? [node.info.id];
graph.addVertex({ id: node.info.id, type: control_flow_graph_1.CfgVertexType.Expression, mid: currentExitPoints, end: [node.info.id + '-exit'] });
if (name) {
graph.mergeWith(name.graph);
info.breaks = info.breaks.concat(name.breaks);
info.nexts = info.nexts.concat(name.nexts);
info.returns = info.returns.concat(name.returns);
for (const entry of name.entryPoints) {
graph.addEdge(entry, node.info.id, { label: 0 /* CfgEdgeType.Fd */ });
}
}
if (value) {
graph.mergeWith(value.graph);
info.breaks = info.breaks.concat(value.breaks);
info.nexts = info.nexts.concat(value.nexts);
info.returns = info.returns.concat(value.returns);
for (const exitPoint of currentExitPoints) {
for (const entry of value.entryPoints) {
graph.addEdge(entry, exitPoint, { label: 0 /* CfgEdgeType.Fd */ });
}
}
currentExitPoints = value.exitPoints;
}
graph.addVertex({ id: node.info.id + '-exit', type: control_flow_graph_1.CfgVertexType.EndMarker, root: node.info.id });
for (const exit of currentExitPoints) {
graph.addEdge(node.info.id + '-exit', exit, { label: 0 /* CfgEdgeType.Fd */ });
}
return info;
}
function cfgBinaryOp(binOp, lhs, rhs) {
const graph = new control_flow_graph_1.ControlFlowGraph().mergeWith(lhs.graph).mergeWith(rhs.graph);
const result = { graph, breaks: [...lhs.breaks, ...rhs.breaks], nexts: [...lhs.nexts, ...rhs.nexts], returns: [...lhs.returns, ...rhs.returns], entryPoints: [binOp.info.id], exitPoints: [binOp.info.id + '-exit'] };
graph.addVertex({ id: binOp.info.id, type: binOp.flavor === 'assignment' ? control_flow_graph_1.CfgVertexType.Statement : control_flow_graph_1.CfgVertexType.Expression, end: [binOp.info.id + '-exit'] });
graph.addVertex({ id: binOp.info.id + '-exit', type: control_flow_graph_1.CfgVertexType.EndMarker, root: binOp.info.id });
for (const exitPoint of lhs.exitPoints) {
for (const entryPoint of rhs.entryPoints) {
result.graph.addEdge(entryPoint, exitPoint, { label: 0 /* CfgEdgeType.Fd */ });
}
}
for (const entryPoint of lhs.entryPoints) {
graph.addEdge(entryPoint, binOp.info.id, { label: 0 /* CfgEdgeType.Fd */ });
}
for (const exitPoint of rhs.exitPoints) {
graph.addEdge(binOp.info.id + '-exit', exitPoint, { label: 0 /* CfgEdgeType.Fd */ });
}
return result;
}
function cfgAccess(access, name, accessors) {
const result = { ...name };
const graph = result.graph;
graph.addVertex({ id: access.info.id, type: control_flow_graph_1.CfgVertexType.Expression, mid: name.exitPoints, end: [access.info.id + '-exit'] });
result.entryPoints = [access.info.id];
for (const entry of name.entryPoints) {
graph.addEdge(entry, access.info.id, { label: 0 /* CfgEdgeType.Fd */ });
}
result.exitPoints = name.exitPoints;
for (const accessor of accessors) {
if (accessor === r_function_call_1.EmptyArgument) {
continue;
}
graph.mergeWith(accessor.graph);
for (const exitPoint of result.exitPoints) {
for (const entry of accessor.entryPoints) {
graph.addEdge(entry, exitPoint, { label: 0 /* CfgEdgeType.Fd */ });
}
}
result.exitPoints = accessor.exitPoints;
result.breaks = result.breaks.concat(accessor.breaks);
result.nexts = result.nexts.concat(accessor.nexts);
result.returns = result.returns.concat(accessor.returns);
}
for (const exitPoint of result.exitPoints) {
graph.addEdge(access.info.id + '-exit', exitPoint, { label: 0 /* CfgEdgeType.Fd */ });
}
graph.addVertex({ id: access.info.id + '-exit', type: control_flow_graph_1.CfgVertexType.EndMarker, root: access.info.id });
result.exitPoints = [access.info.id + '-exit'];
return result;
}
function cfgUnaryOp(unary, operand) {
const graph = operand.graph;
graph.addVertex({ id: unary.info.id, type: control_flow_graph_1.CfgVertexType.EndMarker, root: unary.info.id });
for (const entry of operand.exitPoints) {
graph.addEdge(unary.info.id, entry, { label: 0 /* CfgEdgeType.Fd */ });
}
return { ...operand, graph, exitPoints: [unary.info.id] };
}
function cfgExprList(node, _grouping, expressions) {
const result = {
graph: new control_flow_graph_1.ControlFlowGraph(),
breaks: [],
nexts: [],
returns: [],
exitPoints: [node.info.id],
entryPoints: [node.info.id]
};
const vtx = { id: node.info.id, type: control_flow_graph_1.CfgVertexType.Expression, end: [] };
result.graph.addVertex(vtx);
for (const expression of expressions) {
for (const previousExitPoint of result.exitPoints) {
for (const entryPoint of expression.entryPoints) {
result.graph.addEdge(entryPoint, previousExitPoint, { label: 0 /* CfgEdgeType.Fd */ });
}
}
result.graph.mergeWith(expression.graph);
result.breaks = result.breaks.concat(expression.breaks);
result.nexts = result.nexts.concat(expression.nexts);
result.returns = result.returns.concat(expression.returns);
result.exitPoints = expression.exitPoints;
}
if (result.exitPoints.length > 0) {
result.graph.addVertex({
id: node.info.id + '-exit',
type: control_flow_graph_1.CfgVertexType.EndMarker,
root: node.info.id
});
vtx.end = [node.info.id + '-exit'];
}
else {
vtx.end = undefined;
}
for (const exit of result.exitPoints) {
result.graph.addEdge(node.info.id + '-exit', exit, { label: 0 /* CfgEdgeType.Fd */ });
}
result.exitPoints = result.exitPoints.length > 0 ? [node.info.id + '-exit'] : [];
return result;
}
/**
* Convert a cfg to RDF quads.
*
* @see {@link df2quads}
* @see {@link serialize2quads}
* @see {@link graph2quads}
*/
function cfg2quads(cfg, config) {
return (0, quads_1.graph2quads)({
rootIds: [...cfg.graph.rootIds()],
vertices: [...cfg.graph.vertices().entries()]
.map(([id, v]) => ({
id,
children: v.children
})),
edges: [...cfg.graph.edges()].flatMap(([fromId, targets]) => [...targets].map(([toId, info]) => ({
from: fromId,
to: toId,
type: info.label,
when: info.when
}))),
entryPoints: cfg.entryPoints,
exitPoints: cfg.exitPoints,
breaks: cfg.breaks,
nexts: cfg.nexts,
returns: cfg.returns
}, config);
}
//# sourceMappingURL=extract-cfg.js.map