@eagleoutice/flowr
Version:
Static Dataflow Analyzer and Program Slicer for the R Programming Language
259 lines • 14.7 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.processFunctionDefinition = processFunctionDefinition;
exports.retrieveActiveEnvironment = retrieveActiveEnvironment;
exports.updateNestedFunctionClosures = updateNestedFunctionClosures;
exports.updateNestedFunctionCalls = updateNestedFunctionCalls;
const processor_1 = require("../../../../../processor");
const linker_1 = require("../../../../linker");
const known_call_handling_1 = require("../known-call-handling");
const unpack_argument_1 = require("../argument/unpack-argument");
const assert_1 = require("../../../../../../util/assert");
const logger_1 = require("../../../../../logger");
const r_function_call_1 = require("../../../../../../r-bridge/lang-4.x/ast/model/nodes/r-function-call");
const graph_1 = require("../../../../../graph/graph");
const identifier_1 = require("../../../../../environments/identifier");
const overwrite_1 = require("../../../../../environments/overwrite");
const vertex_1 = require("../../../../../graph/vertex");
const scoping_1 = require("../../../../../environments/scoping");
const environment_1 = require("../../../../../environments/environment");
const resolve_by_name_1 = require("../../../../../environments/resolve-by-name");
const edge_1 = require("../../../../../graph/edge");
const log_1 = require("../../../../../../util/log");
const built_in_1 = require("../../../../../environments/built-in");
function processFunctionDefinition(name, args, rootId, data) {
if (args.length < 1) {
logger_1.dataflowLogger.warn(`Function Definition ${name.content} does not have an argument, skipping`);
return (0, known_call_handling_1.processKnownFunctionCall)({ name, args, rootId, data, origin: 'default' }).information;
}
/* we remove the last argument, as it is the body */
const parameters = args.slice(0, -1);
const bodyArg = (0, unpack_argument_1.unpackArgument)(args[args.length - 1]);
(0, assert_1.guard)(bodyArg !== undefined, () => `Function Definition ${JSON.stringify(args)} has missing body! This is bad!`);
const originalEnvironment = data.environment;
// within a function def we do not pass on the outer binds as they could be overwritten when called
data = prepareFunctionEnvironment(data);
const subgraph = new graph_1.DataflowGraph(data.completeAst.idMap);
let readInParameters = [];
for (const param of parameters) {
(0, assert_1.guard)(param !== r_function_call_1.EmptyArgument, () => `Empty argument in function definition ${name.content}, ${JSON.stringify(args)}`);
const processed = (0, processor_1.processDataflowFor)(param, data);
subgraph.mergeWith(processed.graph);
const read = [...processed.in, ...processed.unknownReferences];
(0, linker_1.linkInputs)(read, data.environment, readInParameters, subgraph, false);
data = { ...data, environment: (0, overwrite_1.overwriteEnvironment)(data.environment, processed.environment) };
}
const paramsEnvironments = data.environment;
const body = (0, processor_1.processDataflowFor)(bodyArg, data);
// As we know, parameters cannot technically duplicate (i.e., their names are unique), we overwrite their environments.
// This is the correct behavior, even if someone uses non-`=` arguments in functions.
const bodyEnvironment = body.environment;
readInParameters = findPromiseLinkagesForParameters(subgraph, readInParameters, paramsEnvironments, body);
const readInBody = [...body.in, ...body.unknownReferences];
// there is no uncertainty regarding the arguments, as if a function header is executed, so is its body
const remainingRead = (0, linker_1.linkInputs)(readInBody, paramsEnvironments, readInParameters.slice(), body.graph, true /* functions do not have to be called */);
// functions can be called multiple times,
// so if they have a global effect, we have to link them as if they would be executed a loop
/* theoretically, we should just check if there is a global effect-write somewhere within */
if (remainingRead.length > 0) {
const nameIdShares = (0, linker_1.produceNameSharedIdMap)(remainingRead);
const definedInLocalEnvironment = new Set([...bodyEnvironment.current.memory.values()].flat().map(d => d.nodeId));
// Everything that is in body.out but not within the local environment populated for the function scope is a potential escape ~> global definition
const globalBodyOut = body.out.filter(d => !definedInLocalEnvironment.has(d.nodeId));
(0, linker_1.linkCircularRedefinitionsWithinALoop)(body.graph, nameIdShares, globalBodyOut);
}
subgraph.mergeWith(body.graph);
const outEnvironment = (0, overwrite_1.overwriteEnvironment)(paramsEnvironments, bodyEnvironment);
for (const read of remainingRead) {
if (read.name) {
subgraph.addVertex({
tag: vertex_1.VertexType.Use,
id: read.nodeId,
environment: undefined,
cds: undefined
});
}
}
const flow = {
unknownReferences: [],
in: remainingRead,
out: [],
entryPoint: body.entryPoint,
graph: new Set(subgraph.rootIds()),
environment: outEnvironment
};
updateNestedFunctionClosures(subgraph, outEnvironment, name.info.id);
const exitPoints = body.exitPoints;
const graph = new graph_1.DataflowGraph(data.completeAst.idMap).mergeWith(subgraph, false);
graph.addVertex({
tag: vertex_1.VertexType.FunctionDefinition,
id: name.info.id,
environment: (0, scoping_1.popLocalEnvironment)(outEnvironment),
cds: data.controlDependencies,
subflow: flow,
exitPoints: exitPoints?.filter(e => e.type === 1 /* ExitPointType.Return */ || e.type === 0 /* ExitPointType.Default */).map(e => e.nodeId) ?? []
});
return {
/* nothing escapes a function definition, but the function itself, will be forced in assignment: { nodeId: functionDefinition.info.id, scope: data.activeScope, used: 'always', name: functionDefinition.info.id as string } */
unknownReferences: [],
in: [],
out: [],
exitPoints: [],
entryPoint: name.info.id,
graph,
environment: originalEnvironment
};
}
// this is no longer necessary when we update environments to be back to front (e.g., with a list of environments)
// this favors the bigger environment
function retrieveActiveEnvironment(callerEnvironment, baseEnvironment) {
callerEnvironment ??= (0, environment_1.initializeCleanEnvironments)(true);
let level = callerEnvironment.level ?? 0;
if (baseEnvironment.level !== level) {
while (baseEnvironment.level < level) {
baseEnvironment = (0, scoping_1.pushLocalEnvironment)(baseEnvironment);
}
while (baseEnvironment.level > level) {
callerEnvironment = (0, scoping_1.pushLocalEnvironment)(callerEnvironment);
level = callerEnvironment.level;
}
}
return (0, overwrite_1.overwriteEnvironment)(baseEnvironment, callerEnvironment);
}
/**
* Update the closure links of all nested function definitions
* @param graph - dataflow graph to collect the function definitions from and to update the closure links for
* @param outEnvironment - active environment on resolving closures (i.e., exit of the function definition)
* @param fnId - id of the function definition to update the closure links for
*/
function updateNestedFunctionClosures(graph, outEnvironment, fnId) {
// track *all* function definitions - including those nested within the current graph,
// try to resolve their 'in' by only using the lowest scope which will be popped after this definition
for (const [id, { subflow, tag }] of graph.vertices(true)) {
if (tag !== vertex_1.VertexType.FunctionDefinition) {
continue;
}
const ingoingRefs = subflow.in;
const remainingIn = [];
for (const ingoing of ingoingRefs) {
const resolved = ingoing.name ? (0, resolve_by_name_1.resolveByName)(ingoing.name, outEnvironment, ingoing.type) : undefined;
if (resolved === undefined) {
remainingIn.push(ingoing);
continue;
}
(0, log_1.expensiveTrace)(logger_1.dataflowLogger, () => `Found ${resolved.length} references to open ref ${id} in closure of function definition ${fnId}`);
let allBuiltIn = true;
for (const ref of resolved) {
graph.addEdge(ingoing, ref, edge_1.EdgeType.Reads);
if (!(0, identifier_1.isReferenceType)(ref.type, identifier_1.ReferenceType.BuiltInConstant | identifier_1.ReferenceType.BuiltInFunction)) {
allBuiltIn = false;
}
}
if (allBuiltIn) {
remainingIn.push(ingoing);
}
}
(0, log_1.expensiveTrace)(logger_1.dataflowLogger, () => `Keeping ${remainingIn.length} references to open ref ${id} in closure of function definition ${fnId}`);
subflow.in = remainingIn;
}
}
/**
* Update the closure links of all nested function calls, this is probably to be done once at the end of the script
* @param graph - dataflow graph to collect the function calls from and to update the closure links for
* @param outEnvironment - active environment on resolving closures (i.e., exit of the function definition)
*/
function updateNestedFunctionCalls(graph, outEnvironment) {
// track *all* function definitions - including those nested within the current graph,
// try to resolve their 'in' by only using the lowest scope which will be popped after this definition
for (const [id, { onlyBuiltin, tag, environment, name }] of graph.vertices(true)) {
if (tag !== vertex_1.VertexType.FunctionCall || !name || onlyBuiltin) {
continue;
}
// only the call environment counts!
if (environment) {
while (outEnvironment.level > environment.level) {
outEnvironment = (0, scoping_1.popLocalEnvironment)(outEnvironment);
}
while (outEnvironment.level < environment.level) {
outEnvironment = (0, scoping_1.pushLocalEnvironment)(outEnvironment);
}
}
const effectiveEnvironment = environment ? (0, overwrite_1.overwriteEnvironment)(outEnvironment, environment) : outEnvironment;
const targets = (0, linker_1.getAllFunctionCallTargets)(id, graph, effectiveEnvironment);
for (const target of targets) {
const targetVertex = graph.getVertex(target);
if (targetVertex?.tag !== vertex_1.VertexType.FunctionDefinition) {
// support reads on symbols
if (targetVertex?.tag === vertex_1.VertexType.Use) {
graph.addEdge(id, target, edge_1.EdgeType.Reads);
}
continue;
}
graph.addEdge(id, target, edge_1.EdgeType.Calls);
const ingoingRefs = targetVertex.subflow.in;
const remainingIn = [];
for (const ingoing of ingoingRefs) {
const resolved = ingoing.name ? (0, resolve_by_name_1.resolveByName)(ingoing.name, effectiveEnvironment, ingoing.type) : undefined;
if (resolved === undefined) {
remainingIn.push(ingoing);
continue;
}
(0, log_1.expensiveTrace)(logger_1.dataflowLogger, () => `Found ${resolved.length} references to open ref ${id} in closure of function definition ${id}`);
for (const def of resolved) {
if (!(0, built_in_1.isBuiltIn)(def.nodeId)) {
graph.addEdge(ingoing, def, edge_1.EdgeType.DefinedByOnCall);
graph.addEdge(id, def, edge_1.EdgeType.DefinesOnCall);
}
}
}
(0, log_1.expensiveTrace)(logger_1.dataflowLogger, () => `Keeping ${remainingIn.length} references to open ref ${id} in closure of function definition ${id}`);
targetVertex.subflow.in = remainingIn;
}
}
}
function prepareFunctionEnvironment(data) {
let env = (0, environment_1.initializeCleanEnvironments)();
for (let i = 0; i < data.environment.level + 1 /* add another env */; i++) {
env = (0, scoping_1.pushLocalEnvironment)(env);
}
return { ...data, environment: env };
}
/**
* Within something like `f <- function(a=b, m=3) { b <- 1; a; b <- 5; a + 1 }`
* `a` will be defined by `b` and `b` will be a promise object bound by the first definition of b it can find.
* This means that this function returns `2` due to the first `b <- 1` definition.
* If the code is `f <- function(a=b, m=3) { if(m > 3) { b <- 1; }; a; b <- 5; a + 1 }`, we need a link to `b <- 1` and `b <- 6`
* as `b` can be defined by either one of them.
* <p>
* <b>Currently we may be unable to narrow down every definition within the body as we have not implemented ways to track what covers the first definitions precisely</b>
*/
function findPromiseLinkagesForParameters(parameters, readInParameters, parameterEnvs, body) {
// first, we try to bind again within parameters - if we have it, fine
const remainingRead = [];
for (const read of readInParameters) {
const resolved = read.name ? (0, resolve_by_name_1.resolveByName)(read.name, parameterEnvs, read.type) : undefined;
if (resolved !== undefined) {
for (const ref of resolved) {
parameters.addEdge(read, ref, edge_1.EdgeType.Reads);
}
continue;
}
// If not resolved, link all outs within the body as potential reads.
// Regarding the sort, we can ignore equality as nodeIds are unique.
// We sort to get the lowest id - if it is an 'always' flag, we can safely use it instead of all of them.
const writingOuts = body.out.filter(o => o.name === read.name).sort((a, b) => String(a.nodeId) < String(b.nodeId) ? 1 : -1);
if (writingOuts.length === 0) {
remainingRead.push(read);
continue;
}
if (writingOuts[0].controlDependencies === undefined) {
parameters.addEdge(read, writingOuts[0], edge_1.EdgeType.Reads);
continue;
}
for (const out of writingOuts) {
parameters.addEdge(read, out, edge_1.EdgeType.Reads);
}
}
return remainingRead;
}
//# sourceMappingURL=built-in-function-definition.js.map