UNPKG

@eagleoutice/flowr

Version:

Static Dataflow Analyzer and Program Slicer for the R Programming Language

328 lines 16.8 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.findNonLocalReads = findNonLocalReads; exports.produceNameSharedIdMap = produceNameSharedIdMap; exports.linkArgumentsOnCall = linkArgumentsOnCall; exports.linkFunctionCallWithSingleTarget = linkFunctionCallWithSingleTarget; exports.linkFunctionCalls = linkFunctionCalls; exports.getAllFunctionCallTargets = getAllFunctionCallTargets; exports.getAllLinkedFunctionDefinitions = getAllLinkedFunctionDefinitions; exports.linkInputs = linkInputs; exports.linkCircularRedefinitionsWithinALoop = linkCircularRedefinitionsWithinALoop; const defaultmap_1 = require("../../util/collections/defaultmap"); const assert_1 = require("../../util/assert"); const log_1 = require("../../util/log"); const node_id_1 = require("../../r-bridge/lang-4.x/ast/model/processing/node-id"); const identifier_1 = require("../environments/identifier"); const graph_1 = require("../graph/graph"); const logger_1 = require("../logger"); const r_function_call_1 = require("../../r-bridge/lang-4.x/ast/model/nodes/r-function-call"); const edge_1 = require("../graph/edge"); const type_1 = require("../../r-bridge/lang-4.x/ast/model/type"); const vertex_1 = require("../graph/vertex"); const resolve_by_name_1 = require("../environments/resolve-by-name"); const built_in_1 = require("../environments/built-in"); const prefix_1 = require("../../util/prefix"); function findNonLocalReads(graph, ignore) { const ignores = new Set(ignore.map(i => i.nodeId)); const ids = new Set([...graph.vertices(true)] .filter(([_, info]) => info.tag === vertex_1.VertexType.Use || info.tag === vertex_1.VertexType.FunctionCall) .map(([id, _]) => id)); /* find all variable use ids which do not link to a given id */ const nonLocalReads = []; for (const id of ids) { if (ignores.has(id)) { continue; } const outgoing = graph.outgoingEdges(id); const name = (0, node_id_1.recoverName)(id, graph.idMap); const origin = graph.getVertex(id, true); if (outgoing === undefined) { nonLocalReads.push({ name: (0, node_id_1.recoverName)(id, graph.idMap), nodeId: id, controlDependencies: undefined, type: origin?.tag === vertex_1.VertexType.FunctionCall ? identifier_1.ReferenceType.Function : identifier_1.ReferenceType.Variable }); continue; } for (const [target, { types }] of outgoing) { if ((0, edge_1.edgeIncludesType)(types, edge_1.EdgeType.Reads) && !ids.has(target)) { if (!name) { logger_1.dataflowLogger.warn('found non-local read without name for id ' + id); } nonLocalReads.push({ name: (0, node_id_1.recoverName)(id, graph.idMap), nodeId: id, controlDependencies: undefined, type: origin?.tag === vertex_1.VertexType.FunctionCall ? identifier_1.ReferenceType.Function : identifier_1.ReferenceType.Variable }); break; } } } return nonLocalReads; } function produceNameSharedIdMap(references) { const nameIdShares = new defaultmap_1.DefaultMap(() => []); for (const reference of references) { if (reference.name) { nameIdShares.get(reference.name).push(reference); } } return nameIdShares; } function linkArgumentsOnCall(args, params, graph) { const nameArgMap = new Map(args.filter(graph_1.isNamedArgument).map(a => [a.name, a])); const nameParamMap = new Map(params.filter(p => p !== undefined && p.name !== undefined && p.name.content !== undefined).map(p => [p.name.content, p])); const specialDotParameter = params.find(p => p.special); // all parameters matched by name const matchedParameters = new Set(); // first map names for (const [name, arg] of nameArgMap) { const pmatchName = (0, prefix_1.findByPrefixIfUnique)(name, [...nameParamMap.keys()]) ?? name; const param = nameParamMap.get(pmatchName); if (param !== undefined && param.name) { logger_1.dataflowLogger.trace(`mapping named argument "${name}" to parameter "${param.name.content}"`); graph.addEdge(arg.nodeId, param.name.info.id, edge_1.EdgeType.DefinesOnCall); graph.addEdge(param.name.info.id, arg.nodeId, edge_1.EdgeType.DefinedByOnCall); matchedParameters.add(name); } else if (specialDotParameter !== undefined && specialDotParameter.name) { logger_1.dataflowLogger.trace(`mapping named argument "${name}" to dot-dot-dot parameter`); graph.addEdge(arg.nodeId, specialDotParameter.name.info.id, edge_1.EdgeType.DefinesOnCall); graph.addEdge(specialDotParameter.name.info.id, arg.nodeId, edge_1.EdgeType.DefinedByOnCall); } } const remainingParameter = params.filter(p => !p || !p.name || !matchedParameters.has(p.name.content)); const remainingArguments = args.filter(a => !(0, graph_1.isNamedArgument)(a)); for (let i = 0; i < remainingArguments.length; i++) { const arg = remainingArguments[i]; if (arg === r_function_call_1.EmptyArgument) { logger_1.dataflowLogger.trace(`skipping value argument for ${i}`); continue; } if (remainingParameter.length <= i) { if (specialDotParameter !== undefined) { logger_1.dataflowLogger.trace(`mapping unnamed argument ${i} (id: ${arg.nodeId}) to dot-dot-dot parameter`); graph.addEdge(arg.nodeId, specialDotParameter.name.info.id, edge_1.EdgeType.DefinesOnCall); graph.addEdge(specialDotParameter.name.info.id, arg.nodeId, edge_1.EdgeType.DefinedByOnCall); } else { logger_1.dataflowLogger.warn(`skipping argument ${i} as there is no corresponding parameter - R should block that`); } continue; } const param = remainingParameter[i]; logger_1.dataflowLogger.trace(`mapping unnamed argument ${i} (id: ${arg.nodeId}) to parameter "${param.name?.content ?? '??'}"`); if (param.name) { graph.addEdge(arg.nodeId, param.name.info.id, edge_1.EdgeType.DefinesOnCall); graph.addEdge(param.name.info.id, arg.nodeId, edge_1.EdgeType.DefinedByOnCall); } } } function linkFunctionCallArguments(targetId, idMap, functionCallName, functionRootId, callArgs, finalGraph) { // we get them by just choosing the rhs of the definition const linkedFunction = idMap.get(targetId); if (linkedFunction === undefined) { logger_1.dataflowLogger.trace(`no function definition found for ${functionCallName} (${functionRootId})`); return; } if (linkedFunction.type !== type_1.RType.FunctionDefinition) { logger_1.dataflowLogger.trace(`function call definition base ${functionCallName} does not lead to a function definition (${functionRootId}) but got ${linkedFunction.type}`); return; } (0, log_1.expensiveTrace)(logger_1.dataflowLogger, () => `linking arguments for ${functionCallName} (${functionRootId}) to ${JSON.stringify(linkedFunction.location)}`); linkArgumentsOnCall(callArgs, linkedFunction.parameters, finalGraph); } function linkFunctionCallWithSingleTarget(graph, def, info, idMap) { const id = info.id; if (info.environment !== undefined) { // for each open ingoing reference, try to resolve it here, and if so, add a read edge from the call to signal that it reads it for (const ingoing of def.subflow.in) { const defs = ingoing.name ? (0, resolve_by_name_1.resolveByName)(ingoing.name, info.environment, ingoing.type) : undefined; if (defs === undefined) { continue; } for (const def of defs) { if (!(0, built_in_1.isBuiltIn)(def.nodeId)) { graph.addEdge(ingoing, def, edge_1.EdgeType.DefinedByOnCall); graph.addEdge(id, def, edge_1.EdgeType.DefinesOnCall); } } } } const exitPoints = def.exitPoints; for (const exitPoint of exitPoints) { graph.addEdge(id, exitPoint, edge_1.EdgeType.Returns); } const defName = (0, node_id_1.recoverName)(def.id, idMap); (0, log_1.expensiveTrace)(logger_1.dataflowLogger, () => `recording expression-list-level call from ${(0, node_id_1.recoverName)(info.id, idMap)} to ${defName}`); graph.addEdge(id, def.id, edge_1.EdgeType.Calls); linkFunctionCallArguments(def.id, idMap, defName, id, info.args, graph); } /* there is _a lot_ potential for optimization here */ function linkFunctionCall(graph, id, info, idMap, thisGraph, calledFunctionDefinitions) { const edges = graph.outgoingEdges(id); if (edges === undefined) { /* no outgoing edges */ return; } const readBits = edge_1.EdgeType.Reads | edge_1.EdgeType.Calls; const functionDefinitionReadIds = [...edges].filter(([_, e]) => (0, edge_1.edgeDoesNotIncludeType)(e.types, edge_1.EdgeType.Argument) && (0, edge_1.edgeIncludesType)(e.types, readBits)).map(([target, _]) => target); const functionDefs = getAllLinkedFunctionDefinitions(new Set(functionDefinitionReadIds), graph)[0]; for (const def of functionDefs.values()) { (0, assert_1.guard)(def.tag === vertex_1.VertexType.FunctionDefinition, () => `expected function definition, but got ${def.tag}`); linkFunctionCallWithSingleTarget(graph, def, info, idMap); } if (thisGraph.isRoot(id)) { calledFunctionDefinitions.push({ functionCall: id, called: [...functionDefs.values()] }); } } /** * Returns the called functions within the current graph, which can be used to merge the environments with the call. * Furthermore, it links the corresponding arguments. * * @param graph - The graph to use for search and resolution traversals (ideally a superset of the `thisGraph`) * @param idMap - The map to resolve ids to names * @param thisGraph - The graph to search for function calls in */ function linkFunctionCalls(graph, idMap, thisGraph) { const functionCalls = [...thisGraph.vertices(true)] .filter(([_, info]) => info.tag === vertex_1.VertexType.FunctionCall); const calledFunctionDefinitions = []; for (const [id, info] of functionCalls) { linkFunctionCall(graph, id, info, idMap, thisGraph, calledFunctionDefinitions); } return calledFunctionDefinitions; } /** * convenience function returning all known call targets, as well as the name source which defines them */ function getAllFunctionCallTargets(call, graph, environment) { let found = []; const callVertex = graph.get(call, true); if (callVertex === undefined) { return []; } const [info, outgoingEdges] = callVertex; if (info.tag !== vertex_1.VertexType.FunctionCall) { return []; } if (info.name !== undefined && (environment !== undefined || info.environment !== undefined)) { const functionCallDefs = (0, resolve_by_name_1.resolveByName)(info.name, environment ?? info.environment, identifier_1.ReferenceType.Function)?.map(d => d.nodeId) ?? []; for (const [target, outgoingEdge] of outgoingEdges.entries()) { if ((0, edge_1.edgeIncludesType)(outgoingEdge.types, edge_1.EdgeType.Calls)) { functionCallDefs.push(target); } } const [functionCallTargets, builtInTargets] = getAllLinkedFunctionDefinitions(new Set(functionCallDefs), graph); for (const target of functionCallTargets) { found.push(target.id); } found = found.concat(...builtInTargets, functionCallDefs); } return found; } function getAllLinkedFunctionDefinitions(functionDefinitionReadIds, dataflowGraph) { let potential = [...functionDefinitionReadIds]; const visited = new Set(); const result = new Set(); const builtIns = new Set(); while (potential.length > 0) { const currentId = potential.pop(); // do not traverse builtins further if ((0, built_in_1.isBuiltIn)(currentId)) { builtIns.add(currentId); continue; } const currentInfo = dataflowGraph.get(currentId, true); if (currentInfo === undefined) { continue; } visited.add(currentId); const outgoingEdges = [...currentInfo[1]]; const returnEdges = outgoingEdges.filter(([_, e]) => (0, edge_1.edgeIncludesType)(e.types, edge_1.EdgeType.Returns)); if (returnEdges.length > 0) { // only traverse return edges and do not follow `calls` etc. as this indicates that we have a function call which returns a result, and not the function calls itself potential = potential.concat(...returnEdges.map(([target]) => target).filter(id => !visited.has(id))); continue; } const followBits = edge_1.EdgeType.Reads | edge_1.EdgeType.DefinedBy | edge_1.EdgeType.DefinedByOnCall; const followEdges = outgoingEdges.filter(([_, e]) => (0, edge_1.edgeIncludesType)(e.types, followBits)); if (currentInfo[0].subflow !== undefined) { result.add(currentInfo[0]); } // trace all joined reads potential = potential.concat(followEdges.map(([target]) => target).filter(id => !visited.has(id))); } return [result, builtIns]; } /** * This method links a set of read variables to definitions in an environment. * * @param referencesToLinkAgainstEnvironment - The set of references to link against the environment * @param environmentInformation - The environment information to link against * @param givenInputs - The existing list of inputs that might be extended * @param graph - The graph to enter the found links * @param maybeForRemaining - Each input that can not be linked, will be added to `givenInputs`. If this flag is `true`, it will be marked as `maybe`. * * @returns the given inputs, possibly extended with the remaining inputs (those of `referencesToLinkAgainstEnvironment` that could not be linked against the environment) */ function linkInputs(referencesToLinkAgainstEnvironment, environmentInformation, givenInputs, graph, maybeForRemaining) { for (const bodyInput of referencesToLinkAgainstEnvironment) { const probableTarget = bodyInput.name ? (0, resolve_by_name_1.resolveByName)(bodyInput.name, environmentInformation, bodyInput.type) : undefined; if (probableTarget === undefined) { log_1.log.trace(`found no target for ${bodyInput.name}`); if (maybeForRemaining) { bodyInput.controlDependencies ??= []; } givenInputs.push(bodyInput); } else { let allBuiltIn = true; for (const target of probableTarget) { // we can stick with maybe even if readId.attribute is always graph.addEdge(bodyInput, target, edge_1.EdgeType.Reads); if (!(0, identifier_1.isReferenceType)(target.type, identifier_1.ReferenceType.BuiltInConstant | identifier_1.ReferenceType.BuiltInFunction)) { allBuiltIn = false; } } if (allBuiltIn) { givenInputs.push(bodyInput); } } } // data.graph.get(node.id).definedAtPosition = false return givenInputs; } /** all loops variables which are open read (not already bound by a redefinition within the loop) get a maybe read marker to their last definition within the loop * e.g. with: * ```R * for(i in 1:10) { * x_1 <- x_2 + 1 * } * ``` * `x_2` must get a read marker to `x_1` as `x_1` is the active redefinition in the second loop iteration. */ function linkCircularRedefinitionsWithinALoop(graph, openIns, outgoing) { // first, we preprocess out so that only the last definition of a given identifier survives // this implicitly assumes that the outgoing references are ordered const lastOutgoing = new Map(); for (const out of outgoing) { if (out.name) { lastOutgoing.set(out.name, out); } } for (const [name, targets] of openIns.entries()) { for (const out of lastOutgoing.values()) { if (out.name === name) { for (const target of targets) { graph.addEdge(target.nodeId, out.nodeId, edge_1.EdgeType.Reads); } } } } } //# sourceMappingURL=linker.js.map