UNPKG

@eagleoutice/flowr

Version:

Static Dataflow Analyzer and Program Slicer for the R Programming Language

259 lines 14.7 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.processFunctionDefinition = processFunctionDefinition; exports.retrieveActiveEnvironment = retrieveActiveEnvironment; exports.updateNestedFunctionClosures = updateNestedFunctionClosures; exports.updateNestedFunctionCalls = updateNestedFunctionCalls; const processor_1 = require("../../../../../processor"); const linker_1 = require("../../../../linker"); const known_call_handling_1 = require("../known-call-handling"); const unpack_argument_1 = require("../argument/unpack-argument"); const assert_1 = require("../../../../../../util/assert"); const logger_1 = require("../../../../../logger"); const r_function_call_1 = require("../../../../../../r-bridge/lang-4.x/ast/model/nodes/r-function-call"); const graph_1 = require("../../../../../graph/graph"); const identifier_1 = require("../../../../../environments/identifier"); const overwrite_1 = require("../../../../../environments/overwrite"); const vertex_1 = require("../../../../../graph/vertex"); const scoping_1 = require("../../../../../environments/scoping"); const environment_1 = require("../../../../../environments/environment"); const resolve_by_name_1 = require("../../../../../environments/resolve-by-name"); const edge_1 = require("../../../../../graph/edge"); const log_1 = require("../../../../../../util/log"); const built_in_1 = require("../../../../../environments/built-in"); function processFunctionDefinition(name, args, rootId, data) { if (args.length < 1) { logger_1.dataflowLogger.warn(`Function Definition ${name.content} does not have an argument, skipping`); return (0, known_call_handling_1.processKnownFunctionCall)({ name, args, rootId, data, origin: 'default' }).information; } /* we remove the last argument, as it is the body */ const parameters = args.slice(0, -1); const bodyArg = (0, unpack_argument_1.unpackArgument)(args[args.length - 1]); (0, assert_1.guard)(bodyArg !== undefined, () => `Function Definition ${JSON.stringify(args)} has missing body! This is bad!`); const originalEnvironment = data.environment; // within a function def we do not pass on the outer binds as they could be overwritten when called data = prepareFunctionEnvironment(data); const subgraph = new graph_1.DataflowGraph(data.completeAst.idMap); let readInParameters = []; for (const param of parameters) { (0, assert_1.guard)(param !== r_function_call_1.EmptyArgument, () => `Empty argument in function definition ${name.content}, ${JSON.stringify(args)}`); const processed = (0, processor_1.processDataflowFor)(param, data); subgraph.mergeWith(processed.graph); const read = [...processed.in, ...processed.unknownReferences]; (0, linker_1.linkInputs)(read, data.environment, readInParameters, subgraph, false); data = { ...data, environment: (0, overwrite_1.overwriteEnvironment)(data.environment, processed.environment) }; } const paramsEnvironments = data.environment; const body = (0, processor_1.processDataflowFor)(bodyArg, data); // As we know, parameters cannot technically duplicate (i.e., their names are unique), we overwrite their environments. // This is the correct behavior, even if someone uses non-`=` arguments in functions. const bodyEnvironment = body.environment; readInParameters = findPromiseLinkagesForParameters(subgraph, readInParameters, paramsEnvironments, body); const readInBody = [...body.in, ...body.unknownReferences]; // there is no uncertainty regarding the arguments, as if a function header is executed, so is its body const remainingRead = (0, linker_1.linkInputs)(readInBody, paramsEnvironments, readInParameters.slice(), body.graph, true /* functions do not have to be called */); // functions can be called multiple times, // so if they have a global effect, we have to link them as if they would be executed a loop /* theoretically, we should just check if there is a global effect-write somewhere within */ if (remainingRead.length > 0) { const nameIdShares = (0, linker_1.produceNameSharedIdMap)(remainingRead); const definedInLocalEnvironment = new Set([...bodyEnvironment.current.memory.values()].flat().map(d => d.nodeId)); // Everything that is in body.out but not within the local environment populated for the function scope is a potential escape ~> global definition const globalBodyOut = body.out.filter(d => !definedInLocalEnvironment.has(d.nodeId)); (0, linker_1.linkCircularRedefinitionsWithinALoop)(body.graph, nameIdShares, globalBodyOut); } subgraph.mergeWith(body.graph); const outEnvironment = (0, overwrite_1.overwriteEnvironment)(paramsEnvironments, bodyEnvironment); for (const read of remainingRead) { if (read.name) { subgraph.addVertex({ tag: vertex_1.VertexType.Use, id: read.nodeId, environment: undefined, cds: undefined }); } } const flow = { unknownReferences: [], in: remainingRead, out: [], entryPoint: body.entryPoint, graph: new Set(subgraph.rootIds()), environment: outEnvironment }; updateNestedFunctionClosures(subgraph, outEnvironment, name.info.id); const exitPoints = body.exitPoints; const graph = new graph_1.DataflowGraph(data.completeAst.idMap).mergeWith(subgraph, false); graph.addVertex({ tag: vertex_1.VertexType.FunctionDefinition, id: name.info.id, environment: (0, scoping_1.popLocalEnvironment)(outEnvironment), cds: data.controlDependencies, subflow: flow, exitPoints: exitPoints?.filter(e => e.type === 1 /* ExitPointType.Return */ || e.type === 0 /* ExitPointType.Default */).map(e => e.nodeId) ?? [] }); return { /* nothing escapes a function definition, but the function itself, will be forced in assignment: { nodeId: functionDefinition.info.id, scope: data.activeScope, used: 'always', name: functionDefinition.info.id as string } */ unknownReferences: [], in: [], out: [], exitPoints: [], entryPoint: name.info.id, graph, environment: originalEnvironment }; } // this is no longer necessary when we update environments to be back to front (e.g., with a list of environments) // this favors the bigger environment function retrieveActiveEnvironment(callerEnvironment, baseEnvironment) { callerEnvironment ??= (0, environment_1.initializeCleanEnvironments)(true); let level = callerEnvironment.level ?? 0; if (baseEnvironment.level !== level) { while (baseEnvironment.level < level) { baseEnvironment = (0, scoping_1.pushLocalEnvironment)(baseEnvironment); } while (baseEnvironment.level > level) { callerEnvironment = (0, scoping_1.pushLocalEnvironment)(callerEnvironment); level = callerEnvironment.level; } } return (0, overwrite_1.overwriteEnvironment)(baseEnvironment, callerEnvironment); } /** * Update the closure links of all nested function definitions * @param graph - dataflow graph to collect the function definitions from and to update the closure links for * @param outEnvironment - active environment on resolving closures (i.e., exit of the function definition) * @param fnId - id of the function definition to update the closure links for */ function updateNestedFunctionClosures(graph, outEnvironment, fnId) { // track *all* function definitions - including those nested within the current graph, // try to resolve their 'in' by only using the lowest scope which will be popped after this definition for (const [id, { subflow, tag }] of graph.vertices(true)) { if (tag !== vertex_1.VertexType.FunctionDefinition) { continue; } const ingoingRefs = subflow.in; const remainingIn = []; for (const ingoing of ingoingRefs) { const resolved = ingoing.name ? (0, resolve_by_name_1.resolveByName)(ingoing.name, outEnvironment, ingoing.type) : undefined; if (resolved === undefined) { remainingIn.push(ingoing); continue; } (0, log_1.expensiveTrace)(logger_1.dataflowLogger, () => `Found ${resolved.length} references to open ref ${id} in closure of function definition ${fnId}`); let allBuiltIn = true; for (const ref of resolved) { graph.addEdge(ingoing, ref, edge_1.EdgeType.Reads); if (!(0, identifier_1.isReferenceType)(ref.type, identifier_1.ReferenceType.BuiltInConstant | identifier_1.ReferenceType.BuiltInFunction)) { allBuiltIn = false; } } if (allBuiltIn) { remainingIn.push(ingoing); } } (0, log_1.expensiveTrace)(logger_1.dataflowLogger, () => `Keeping ${remainingIn.length} references to open ref ${id} in closure of function definition ${fnId}`); subflow.in = remainingIn; } } /** * Update the closure links of all nested function calls, this is probably to be done once at the end of the script * @param graph - dataflow graph to collect the function calls from and to update the closure links for * @param outEnvironment - active environment on resolving closures (i.e., exit of the function definition) */ function updateNestedFunctionCalls(graph, outEnvironment) { // track *all* function definitions - including those nested within the current graph, // try to resolve their 'in' by only using the lowest scope which will be popped after this definition for (const [id, { onlyBuiltin, tag, environment, name }] of graph.vertices(true)) { if (tag !== vertex_1.VertexType.FunctionCall || !name || onlyBuiltin) { continue; } // only the call environment counts! if (environment) { while (outEnvironment.level > environment.level) { outEnvironment = (0, scoping_1.popLocalEnvironment)(outEnvironment); } while (outEnvironment.level < environment.level) { outEnvironment = (0, scoping_1.pushLocalEnvironment)(outEnvironment); } } const effectiveEnvironment = environment ? (0, overwrite_1.overwriteEnvironment)(outEnvironment, environment) : outEnvironment; const targets = (0, linker_1.getAllFunctionCallTargets)(id, graph, effectiveEnvironment); for (const target of targets) { const targetVertex = graph.getVertex(target); if (targetVertex?.tag !== vertex_1.VertexType.FunctionDefinition) { // support reads on symbols if (targetVertex?.tag === vertex_1.VertexType.Use) { graph.addEdge(id, target, edge_1.EdgeType.Reads); } continue; } graph.addEdge(id, target, edge_1.EdgeType.Calls); const ingoingRefs = targetVertex.subflow.in; const remainingIn = []; for (const ingoing of ingoingRefs) { const resolved = ingoing.name ? (0, resolve_by_name_1.resolveByName)(ingoing.name, effectiveEnvironment, ingoing.type) : undefined; if (resolved === undefined) { remainingIn.push(ingoing); continue; } (0, log_1.expensiveTrace)(logger_1.dataflowLogger, () => `Found ${resolved.length} references to open ref ${id} in closure of function definition ${id}`); for (const def of resolved) { if (!(0, built_in_1.isBuiltIn)(def.nodeId)) { graph.addEdge(ingoing, def, edge_1.EdgeType.DefinedByOnCall); graph.addEdge(id, def, edge_1.EdgeType.DefinesOnCall); } } } (0, log_1.expensiveTrace)(logger_1.dataflowLogger, () => `Keeping ${remainingIn.length} references to open ref ${id} in closure of function definition ${id}`); targetVertex.subflow.in = remainingIn; } } } function prepareFunctionEnvironment(data) { let env = (0, environment_1.initializeCleanEnvironments)(); for (let i = 0; i < data.environment.level + 1 /* add another env */; i++) { env = (0, scoping_1.pushLocalEnvironment)(env); } return { ...data, environment: env }; } /** * Within something like `f <- function(a=b, m=3) { b <- 1; a; b <- 5; a + 1 }` * `a` will be defined by `b` and `b` will be a promise object bound by the first definition of b it can find. * This means that this function returns `2` due to the first `b <- 1` definition. * If the code is `f <- function(a=b, m=3) { if(m > 3) { b <- 1; }; a; b <- 5; a + 1 }`, we need a link to `b <- 1` and `b <- 6` * as `b` can be defined by either one of them. * <p> * <b>Currently we may be unable to narrow down every definition within the body as we have not implemented ways to track what covers the first definitions precisely</b> */ function findPromiseLinkagesForParameters(parameters, readInParameters, parameterEnvs, body) { // first, we try to bind again within parameters - if we have it, fine const remainingRead = []; for (const read of readInParameters) { const resolved = read.name ? (0, resolve_by_name_1.resolveByName)(read.name, parameterEnvs, read.type) : undefined; if (resolved !== undefined) { for (const ref of resolved) { parameters.addEdge(read, ref, edge_1.EdgeType.Reads); } continue; } // If not resolved, link all outs within the body as potential reads. // Regarding the sort, we can ignore equality as nodeIds are unique. // We sort to get the lowest id - if it is an 'always' flag, we can safely use it instead of all of them. const writingOuts = body.out.filter(o => o.name === read.name).sort((a, b) => String(a.nodeId) < String(b.nodeId) ? 1 : -1); if (writingOuts.length === 0) { remainingRead.push(read); continue; } if (writingOuts[0].controlDependencies === undefined) { parameters.addEdge(read, writingOuts[0], edge_1.EdgeType.Reads); continue; } for (const out of writingOuts) { parameters.addEdge(read, out, edge_1.EdgeType.Reads); } } return remainingRead; } //# sourceMappingURL=built-in-function-definition.js.map