UNPKG

@eagleoutice/flowr

Version:

Static Dataflow Analyzer and Program Slicer for the R Programming Language

533 lines 23.9 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.findNonLocalReads = findNonLocalReads; exports.produceNameSharedIdMap = produceNameSharedIdMap; exports.linkArgumentsOnCall = linkArgumentsOnCall; exports.pMatch = pMatch; exports.linkFunctionCallWithSingleTarget = linkFunctionCallWithSingleTarget; exports.linkFunctionCalls = linkFunctionCalls; exports.getAllFunctionCallTargets = getAllFunctionCallTargets; exports.getAllLinkedFunctionDefinitions = getAllLinkedFunctionDefinitions; exports.linkInputs = linkInputs; exports.linkCircularRedefinitionsWithinALoop = linkCircularRedefinitionsWithinALoop; exports.reapplyLoopExitPoints = reapplyLoopExitPoints; const defaultmap_1 = require("../../util/collections/defaultmap"); const assert_1 = require("../../util/assert"); const log_1 = require("../../util/log"); const node_id_1 = require("../../r-bridge/lang-4.x/ast/model/processing/node-id"); const identifier_1 = require("../environments/identifier"); const graph_1 = require("../graph/graph"); const logger_1 = require("../logger"); const r_function_call_1 = require("../../r-bridge/lang-4.x/ast/model/nodes/r-function-call"); const edge_1 = require("../graph/edge"); const type_1 = require("../../r-bridge/lang-4.x/ast/model/type"); const vertex_1 = require("../graph/vertex"); const resolve_by_name_1 = require("../environments/resolve-by-name"); const prefix_1 = require("../../util/prefix"); const info_1 = require("../info"); const unnamed_call_handling_1 = require("./process/functions/call/unnamed-call-handling"); const built_in_proc_name_1 = require("../environments/built-in-proc-name"); /** * Find all reads within the graph that do not reference a local definition in the graph. */ function findNonLocalReads(graph, ignores = new Set()) { const defs = new Set(graph.vertexIdsOfType(vertex_1.VertexType.VariableDefinition).concat(graph.vertexIdsOfType(vertex_1.VertexType.FunctionDefinition))); /* find all variable use ids which do not link to a given id */ const nonLocalReads = []; for (const ids of [graph.vertexIdsOfType(vertex_1.VertexType.Use), graph.vertexIdsOfType(vertex_1.VertexType.FunctionCall)]) { for (const nodeId of ids) { if (ignores.has(nodeId)) { continue; } const outgoing = graph.outgoingEdges(nodeId); const origin = graph.getVertex(nodeId); const name = (0, node_id_1.recoverName)(nodeId, graph.idMap); const type = origin?.tag === vertex_1.VertexType.FunctionCall ? identifier_1.ReferenceType.Function : identifier_1.ReferenceType.Variable; const identifierRef = { nodeId, name, type }; if (outgoing === undefined) { nonLocalReads.push(identifierRef); continue; } for (const [target, e] of outgoing) { if (edge_1.DfEdge.includesType(e, edge_1.EdgeType.Reads) && !defs.has(target)) { nonLocalReads.push(identifierRef); break; } } } } return nonLocalReads; } /** * Produces a map from names to all identifier references sharing that name. */ function produceNameSharedIdMap(references) { const nameIdShares = new defaultmap_1.DefaultMap(() => []); for (const reference of references) { const rn = reference.name; if (rn) { nameIdShares.get(rn).push(reference); } } return nameIdShares; } /** * Links the given arguments to the given parameters within the given graph. * This follows the `pmatch` semantics of R * @see https://cran.r-project.org/doc/manuals/R-lang.html#Argument-matching * This returns the resolved map from argument ids to parameter ids. * If you just want to match by name, use {@link pMatch}. */ function linkArgumentsOnCall(args, params, graph) { const nameArgMap = new Map(args.filter(graph_1.FunctionArgument.isNamed).map(a => [a.name, a])); const nameParamMap = new Map(params.filter(p => p?.name?.content !== undefined) .map(p => [p.name.content, p])); const maps = new Map(); const specialDotParameter = params.find(p => p.special); const sid = specialDotParameter?.name.info.id; // all parameters matched by name const matchedParameters = new Set(); const paramNames = nameParamMap.keys().toArray(); // first map names for (const [name, { nodeId: argId }] of nameArgMap) { const pmatchName = (0, prefix_1.findByPrefixIfUnique)(name, paramNames) ?? name; const param = nameParamMap.get(pmatchName); if (param?.name) { const pid = param.name.info.id; graph.addEdge(argId, pid, edge_1.EdgeType.DefinesOnCall); graph.addEdge(pid, argId, edge_1.EdgeType.DefinedByOnCall); maps.set(argId, pid); matchedParameters.add(name); } else if (sid) { graph.addEdge(argId, sid, edge_1.EdgeType.DefinesOnCall); graph.addEdge(sid, argId, edge_1.EdgeType.DefinedByOnCall); maps.set(argId, sid); } } const remainingParameter = params.filter(p => !p?.name || !matchedParameters.has(p.name.content)); const remainingArguments = args.filter(graph_1.FunctionArgument.isUnnamed); for (let i = 0; i < remainingArguments.length; i++) { const arg = remainingArguments[i]; if (arg === r_function_call_1.EmptyArgument) { continue; } const aid = arg.nodeId; if (remainingParameter.length <= i) { if (sid) { graph.addEdge(aid, sid, edge_1.EdgeType.DefinesOnCall); graph.addEdge(sid, aid, edge_1.EdgeType.DefinedByOnCall); maps.set(aid, sid); } else { logger_1.dataflowLogger.warn(`skipping argument ${i} as there is no corresponding parameter - R should block that`); } continue; } const param = remainingParameter[i]; logger_1.dataflowLogger.trace(`mapping unnamed argument ${i} (id: ${aid}) to parameter "${param.name?.content ?? '??'}"`); if (param.name) { const pid = param.name.info.id; graph.addEdge(aid, pid, edge_1.EdgeType.DefinesOnCall); graph.addEdge(pid, aid, edge_1.EdgeType.DefinedByOnCall); maps.set(aid, pid); } } return maps; } /** * Links the given arguments to the given parameters within the given graph by name only. * @example * ```ts * const parameterSpec = { * 'paramName': 'paramId', * 'anotherParamName': 'anotherParamId', * // we recommend to always add '...' to your specification * // this way you can collect all arguments that could not be matched! * '...': '...' * } as const; * * const match = pMatch(convertFnArguments(args), parameterSpec); * const addParam = match.get('paramId'); * ``` * @note * To obtain the arguments from a {@link RFunctionCall}[], either use {@link processAllArguments} (also available via {@link processKnownFunctionCall}) * or convert them with {@link convertFnArguments}. */ function pMatch(args, params) { const nameArgMap = new Map(args.filter(graph_1.FunctionArgument.isNamed).map(a => [a.name, a])); const maps = new Map(); function addToMaps(key, value) { const e = maps.get(key); if (e) { e.push(value); } else { maps.set(key, [value]); } } const sid = params['...']; const paramNames = Object.keys(params); // all parameters matched by name const matchedParameters = new Set(); // first map names for (const [name, { nodeId: argId }] of nameArgMap) { const pmatchName = (0, prefix_1.findByPrefixIfUnique)(name, paramNames) ?? name; const param = params[pmatchName]; if (param) { addToMaps(param, argId); matchedParameters.add(name); } else if (sid) { addToMaps(sid, argId); } } const remainingParameter = paramNames.filter(p => !matchedParameters.has(p)); const remainingArguments = args.filter(graph_1.FunctionArgument.isUnnamed); for (let i = 0; i < remainingArguments.length; i++) { const arg = remainingArguments[i]; if (arg === r_function_call_1.EmptyArgument) { continue; } const aid = arg.nodeId; if (remainingParameter.length <= i) { if (sid) { addToMaps(sid, aid); } continue; } const param = params[remainingParameter[i]]; if (param) { addToMaps(param, aid); } } return maps; } /** * Links the function call arguments to the target function definition and returns a map from argument ids to parameter ids. */ function linkFunctionCallArguments(targetId, idMap, functionCallName, functionRootId, callArgs, finalGraph) { // we get them by just choosing the rhs of the definition const linkedFunction = idMap.get(targetId); if (linkedFunction === undefined) { logger_1.dataflowLogger.trace(`no fdef found for ${functionCallName} (${functionRootId})`); return; } if (linkedFunction.type !== type_1.RType.FunctionDefinition) { logger_1.dataflowLogger.trace(`function call definition base ${functionCallName} does not lead to a function definition (${functionRootId}) but got ${linkedFunction.type}`); return; } return linkArgumentsOnCall(callArgs, linkedFunction.parameters, finalGraph); } /** * Links a function call with a single target function definition. */ function linkFunctionCallWithSingleTarget(graph, { subflow: fnSubflow, exitPoints, id: fnId, params }, info, idMap) { const id = info.id; if (info.environment !== undefined) { // for each open ingoing reference, try to resolve it here, and if so, add a read edge from the call to signal that it reads it for (const ingoing of fnSubflow.in) { const defs = ingoing.name ? (0, resolve_by_name_1.resolveByName)(ingoing.name, info.environment, ingoing.type) : undefined; if (defs === undefined) { continue; } for (const { nodeId, type, value } of defs) { if (!node_id_1.NodeId.isBuiltIn(nodeId)) { graph.addEdge(ingoing.nodeId, nodeId, edge_1.EdgeType.DefinedByOnCall); graph.addEdge(id, nodeId, edge_1.EdgeType.DefinesOnCall); if (type === identifier_1.ReferenceType.Function && ingoing.type === identifier_1.ReferenceType.S7MethodPrefix && Array.isArray(value)) { for (const v of value) { graph.addEdge(id, v, edge_1.EdgeType.Calls); graph.addEdge(ingoing.nodeId, v, edge_1.EdgeType.Calls); // add s7 to vertex const vInfo = graph.getVertex(v); if (vInfo && vInfo.tag === vertex_1.VertexType.FunctionDefinition) { vInfo.mode ??= []; if (!vInfo.mode.includes('s7')) { vInfo.mode.push('s7'); } } } } } } } } const propagateExitPoints = []; for (const exitPoint of exitPoints) { graph.addEdge(id, exitPoint.nodeId, edge_1.EdgeType.Returns); if ((0, info_1.doesExitPointPropagateCalls)(exitPoint.type)) { // add the exit point to the call! propagateExitPoints.push(exitPoint); } } const defName = (0, node_id_1.recoverName)(fnId, idMap); (0, log_1.expensiveTrace)(logger_1.dataflowLogger, () => `recording expr-list-level call from ${(0, node_id_1.recoverName)(info.id, idMap)} to ${defName}`); graph.addEdge(id, fnId, edge_1.EdgeType.Calls); applyForForcedArgs(graph, info.id, params, linkFunctionCallArguments(fnId, idMap, defName, id, info.args, graph)); return propagateExitPoints; } /** for each parameter that we link that gets forced, add a reads edge from the call to argument to show that it reads it */ function applyForForcedArgs(graph, callId, readParams, maps) { if (maps === undefined) { return; } for (const [arg, param] of maps.entries()) { if (readParams[String(param)]) { graph.addEdge(callId, arg, edge_1.EdgeType.Reads); } } } const FCallLinkReadBits = edge_1.EdgeType.Reads | edge_1.EdgeType.Calls | edge_1.EdgeType.DefinedByOnCall; /* there is _a lot_ potential for optimization here */ function linkFunctionCall(graph, id, info, idMap, thisGraph, calledFunctionDefinitions) { const edges = graph.outgoingEdges(id); if (edges === undefined) { /* no outgoing edges */ return; } const functionDefinitionReadIds = new Set(); for (const [t, e] of edges.entries()) { if (!node_id_1.NodeId.isBuiltIn(t) && edge_1.DfEdge.doesNotIncludeType(e, edge_1.EdgeType.Argument) && edge_1.DfEdge.includesType(e, FCallLinkReadBits)) { functionDefinitionReadIds.add(t); } } const [functionDefs] = getAllLinkedFunctionDefinitions(new Set(functionDefinitionReadIds), graph); const propagateExitPoints = []; for (const def of functionDefs.values()) { // we can skip this if we already linked it const oEdge = graph.outgoingEdges(id)?.get(def.id); if (oEdge && edge_1.DfEdge.includesType(oEdge, edge_1.EdgeType.Calls)) { continue; } for (const ep of linkFunctionCallWithSingleTarget(graph, def, info, idMap)) { propagateExitPoints.push(ep); } } if (thisGraph.isRoot(id) && functionDefs.size > 0) { calledFunctionDefinitions.push({ functionCall: id, called: functionDefs.values().toArray(), propagateExitPoints }); } } /** * Returns the called functions within the current graph, which can be used to merge the environments with the call. * Furthermore, it links the corresponding arguments. * @param graph - The graph to use for search and resolution traversals (ideally a superset of the `thisGraph`) * @param idMap - The map to resolve ids to names * @param thisGraph - The graph to search for function calls in */ function linkFunctionCalls(graph, idMap, thisGraph) { const calledFunctionDefinitions = []; for (const [id, info] of thisGraph.verticesOfType(vertex_1.VertexType.FunctionCall)) { if (!info.onlyBuiltin) { linkFunctionCall(graph, id, info, idMap, thisGraph, calledFunctionDefinitions); } } return calledFunctionDefinitions; } /** * convenience function returning all known call targets, as well as the name source which defines them */ function getAllFunctionCallTargets(call, graph, environment) { const found = new Set(); const callVertex = graph.get(call, true); if (callVertex === undefined) { return []; } const [info, outgoingEdges] = callVertex; if (info.tag !== vertex_1.VertexType.FunctionCall) { return []; } if (environment !== undefined || info.environment !== undefined) { let functionCallDefs = []; const refType = info.origin.includes(built_in_proc_name_1.BuiltInProcName.S3Dispatch) ? identifier_1.ReferenceType.S3MethodPrefix : info.origin.includes(built_in_proc_name_1.BuiltInProcName.S7Dispatch) ? identifier_1.ReferenceType.S7MethodPrefix : identifier_1.ReferenceType.Function; if (info.name !== undefined && !identifier_1.Identifier.getName(info.name).startsWith(unnamed_call_handling_1.UnnamedFunctionCallPrefix)) { functionCallDefs = (0, resolve_by_name_1.resolveByName)(info.name, environment ?? info.environment, refType)?.map(d => d.nodeId) ?? []; } for (const [target, outgoingEdge] of outgoingEdges.entries()) { if (edge_1.DfEdge.includesType(outgoingEdge, edge_1.EdgeType.Calls)) { functionCallDefs.push(target); } } const [functionCallTargets, builtInTargets] = getAllLinkedFunctionDefinitions(new Set(functionCallDefs), graph); for (const target of functionCallTargets) { found.add(target.id); } for (const arr of [builtInTargets, functionCallDefs]) { for (const target of arr) { found.add(target); } } } return Array.from(found); } const LinkedFnFollowBits = edge_1.EdgeType.Reads | edge_1.EdgeType.DefinedBy | edge_1.EdgeType.DefinedByOnCall; /** * Finds all linked function definitions starting from the given set of read ids. * This is a complicated function, please only call it if you know what you are doing. * For example, if you are interested in the called functions of a function call, use {@link getAllFunctionCallTargets} instead. * This function here expects you to handle the accessed objects yourself (e.g,. already resolve the first layer of reads/returns/calls/... or resolve the identifier by name) * and then pass in the relevant read ids. * @example * Consider a scenario like this: * ```R * x <- function() 3 * x() * ``` * To resolve the call `x` in the second line, use {@link getAllFunctionCallTargets}! * To know what fdefs the definition of `x` in the first line links to, you can use {@link getAllLinkedFunctionDefinitions|this function}. */ function getAllLinkedFunctionDefinitions(functionDefinitionReadIds, dataflowGraph) { const result = new Set(); const builtIns = new Set(); if (functionDefinitionReadIds.size === 0) { return [result, builtIns]; } const potential = Array.from(functionDefinitionReadIds); const visited = new Set(); while (potential.length !== 0) { const cid = potential.pop(); visited.add(cid); if (node_id_1.NodeId.isBuiltIn(cid)) { builtIns.add(cid); continue; } const vertex = dataflowGraph.getVertex(cid); if (vertex === undefined) { continue; } // Found a function definition if (vertex.subflow !== undefined) { result.add(vertex); continue; } let hasReturnEdge = false; const outgoing = dataflowGraph.outgoingEdges(cid) ?? []; for (const [target, e] of outgoing) { if (edge_1.DfEdge.includesType(e, edge_1.EdgeType.Returns)) { hasReturnEdge = true; if (!visited.has(target)) { potential.push(target); } } } if (vertex.tag === vertex_1.VertexType.FunctionCall || hasReturnEdge || (vertex.tag === vertex_1.VertexType.VariableDefinition && vertex.par)) { continue; } for (const [target, e] of outgoing) { if (edge_1.DfEdge.includesType(e, LinkedFnFollowBits) && !visited.has(target)) { potential.push(target); } } } return [result, builtIns]; } /** * This method links a set of read variables to definitions in an environment. * @param referencesToLinkAgainstEnvironment - The set of references to link against the environment * @param environmentInformation - The environment information to link against * @param givenInputs - The existing list of inputs that might be extended * @param graph - The graph to enter the found links * @param maybeForRemaining - Each input that can not be linked, will be added to `givenInputs`. If this flag is `true`, it will be marked as `maybe`. * @returns the given inputs, possibly extended with the remaining inputs (those of `referencesToLinkAgainstEnvironment` that could not be linked against the environment) */ function linkInputs(referencesToLinkAgainstEnvironment, environmentInformation, givenInputs, graph, maybeForRemaining) { for (const bodyInput of referencesToLinkAgainstEnvironment) { const probableTarget = bodyInput.name ? (0, resolve_by_name_1.resolveByName)(bodyInput.name, environmentInformation, bodyInput.type) : undefined; if (probableTarget === undefined) { if (maybeForRemaining) { bodyInput.cds ??= []; } givenInputs.push(bodyInput); } else { let allBuiltIn = true; for (const target of probableTarget) { // we can stick with maybe even if readId.attribute is always graph.addEdge(bodyInput.nodeId, target.nodeId, edge_1.EdgeType.Reads); if (!(0, identifier_1.isReferenceType)(target.type, identifier_1.ReferenceType.BuiltInConstant | identifier_1.ReferenceType.BuiltInFunction)) { allBuiltIn = false; } } if (allBuiltIn) { givenInputs.push(bodyInput); } } } // data.graph.get(node.id).definedAtPosition = false return givenInputs; } /** * all loops variables which are open read (not already bound by a redefinition within the loop) get a maybe read marker to their last definition within the loop * e.g. with: * ```R * for(i in 1:10) { * x_1 <- x_2 + 1 * } * ``` * `x_2` must get a read marker to `x_1` as `x_1` is the active redefinition in the second loop iteration. */ function linkCircularRedefinitionsWithinALoop(graph, openIns, outgoing) { // first, we preprocess out so that only the last definition of a given identifier survives // this implicitly assumes that the outgoing references are ordered const lastOutgoing = new Map(); for (const out of outgoing) { const on = out.name; if (on) { lastOutgoing.set(on, out); } } for (const [name, targets] of openIns.entries()) { for (const { name: outName, nodeId } of lastOutgoing.values()) { if (outName !== undefined && identifier_1.Identifier.matches(outName, name)) { for (const target of targets) { graph.addEdge(target.nodeId, nodeId, edge_1.EdgeType.Reads); } } } } } /** * Reapplies the loop exit points' control dependencies to the given identifier references. */ function reapplyLoopExitPoints(exits, references, graph) { // just apply the cds of all exit points not already present const exitCds = exits.flatMap(e => e.cds?.map(info_1.negateControlDependency)) .filter(assert_1.isNotUndefined) .map(cd => ({ ...cd, byIteration: true })); const seenRefs = new Set(); for (const ref of references) { if (seenRefs.has(ref.nodeId)) { continue; } seenRefs.add(ref.nodeId); for (const cd of exitCds) { const { id: cId } = cd; let setVertex = false; if (ref.cds) { if (!ref.cds?.find(c => c.id === cId)) { ref.cds.push(cd); setVertex = true; } } else { ref.cds = [cd]; setVertex = true; } if (setVertex) { const vertex = graph.getVertex(ref.nodeId); if (vertex) { if (vertex.cds) { if (!vertex.cds?.find(c => c.id === cId)) { vertex.cds.push(cd); } } else { vertex.cds = [cd]; } } } } } } //# sourceMappingURL=linker.js.map