@eagleoutice/flowr
Version:
Static Dataflow Analyzer and Program Slicer for the R Programming Language
533 lines • 23.9 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.findNonLocalReads = findNonLocalReads;
exports.produceNameSharedIdMap = produceNameSharedIdMap;
exports.linkArgumentsOnCall = linkArgumentsOnCall;
exports.pMatch = pMatch;
exports.linkFunctionCallWithSingleTarget = linkFunctionCallWithSingleTarget;
exports.linkFunctionCalls = linkFunctionCalls;
exports.getAllFunctionCallTargets = getAllFunctionCallTargets;
exports.getAllLinkedFunctionDefinitions = getAllLinkedFunctionDefinitions;
exports.linkInputs = linkInputs;
exports.linkCircularRedefinitionsWithinALoop = linkCircularRedefinitionsWithinALoop;
exports.reapplyLoopExitPoints = reapplyLoopExitPoints;
const defaultmap_1 = require("../../util/collections/defaultmap");
const assert_1 = require("../../util/assert");
const log_1 = require("../../util/log");
const node_id_1 = require("../../r-bridge/lang-4.x/ast/model/processing/node-id");
const identifier_1 = require("../environments/identifier");
const graph_1 = require("../graph/graph");
const logger_1 = require("../logger");
const r_function_call_1 = require("../../r-bridge/lang-4.x/ast/model/nodes/r-function-call");
const edge_1 = require("../graph/edge");
const type_1 = require("../../r-bridge/lang-4.x/ast/model/type");
const vertex_1 = require("../graph/vertex");
const resolve_by_name_1 = require("../environments/resolve-by-name");
const prefix_1 = require("../../util/prefix");
const info_1 = require("../info");
const unnamed_call_handling_1 = require("./process/functions/call/unnamed-call-handling");
const built_in_proc_name_1 = require("../environments/built-in-proc-name");
/**
* Find all reads within the graph that do not reference a local definition in the graph.
*/
function findNonLocalReads(graph, ignores = new Set()) {
const defs = new Set(graph.vertexIdsOfType(vertex_1.VertexType.VariableDefinition).concat(graph.vertexIdsOfType(vertex_1.VertexType.FunctionDefinition)));
/* find all variable use ids which do not link to a given id */
const nonLocalReads = [];
for (const ids of [graph.vertexIdsOfType(vertex_1.VertexType.Use), graph.vertexIdsOfType(vertex_1.VertexType.FunctionCall)]) {
for (const nodeId of ids) {
if (ignores.has(nodeId)) {
continue;
}
const outgoing = graph.outgoingEdges(nodeId);
const origin = graph.getVertex(nodeId);
const name = (0, node_id_1.recoverName)(nodeId, graph.idMap);
const type = origin?.tag === vertex_1.VertexType.FunctionCall ? identifier_1.ReferenceType.Function : identifier_1.ReferenceType.Variable;
const identifierRef = { nodeId, name, type };
if (outgoing === undefined) {
nonLocalReads.push(identifierRef);
continue;
}
for (const [target, e] of outgoing) {
if (edge_1.DfEdge.includesType(e, edge_1.EdgeType.Reads) && !defs.has(target)) {
nonLocalReads.push(identifierRef);
break;
}
}
}
}
return nonLocalReads;
}
/**
* Produces a map from names to all identifier references sharing that name.
*/
function produceNameSharedIdMap(references) {
const nameIdShares = new defaultmap_1.DefaultMap(() => []);
for (const reference of references) {
const rn = reference.name;
if (rn) {
nameIdShares.get(rn).push(reference);
}
}
return nameIdShares;
}
/**
* Links the given arguments to the given parameters within the given graph.
* This follows the `pmatch` semantics of R
* @see https://cran.r-project.org/doc/manuals/R-lang.html#Argument-matching
* This returns the resolved map from argument ids to parameter ids.
* If you just want to match by name, use {@link pMatch}.
*/
function linkArgumentsOnCall(args, params, graph) {
const nameArgMap = new Map(args.filter(graph_1.FunctionArgument.isNamed).map(a => [a.name, a]));
const nameParamMap = new Map(params.filter(p => p?.name?.content !== undefined)
.map(p => [p.name.content, p]));
const maps = new Map();
const specialDotParameter = params.find(p => p.special);
const sid = specialDotParameter?.name.info.id;
// all parameters matched by name
const matchedParameters = new Set();
const paramNames = nameParamMap.keys().toArray();
// first map names
for (const [name, { nodeId: argId }] of nameArgMap) {
const pmatchName = (0, prefix_1.findByPrefixIfUnique)(name, paramNames) ?? name;
const param = nameParamMap.get(pmatchName);
if (param?.name) {
const pid = param.name.info.id;
graph.addEdge(argId, pid, edge_1.EdgeType.DefinesOnCall);
graph.addEdge(pid, argId, edge_1.EdgeType.DefinedByOnCall);
maps.set(argId, pid);
matchedParameters.add(name);
}
else if (sid) {
graph.addEdge(argId, sid, edge_1.EdgeType.DefinesOnCall);
graph.addEdge(sid, argId, edge_1.EdgeType.DefinedByOnCall);
maps.set(argId, sid);
}
}
const remainingParameter = params.filter(p => !p?.name || !matchedParameters.has(p.name.content));
const remainingArguments = args.filter(graph_1.FunctionArgument.isUnnamed);
for (let i = 0; i < remainingArguments.length; i++) {
const arg = remainingArguments[i];
if (arg === r_function_call_1.EmptyArgument) {
continue;
}
const aid = arg.nodeId;
if (remainingParameter.length <= i) {
if (sid) {
graph.addEdge(aid, sid, edge_1.EdgeType.DefinesOnCall);
graph.addEdge(sid, aid, edge_1.EdgeType.DefinedByOnCall);
maps.set(aid, sid);
}
else {
logger_1.dataflowLogger.warn(`skipping argument ${i} as there is no corresponding parameter - R should block that`);
}
continue;
}
const param = remainingParameter[i];
logger_1.dataflowLogger.trace(`mapping unnamed argument ${i} (id: ${aid}) to parameter "${param.name?.content ?? '??'}"`);
if (param.name) {
const pid = param.name.info.id;
graph.addEdge(aid, pid, edge_1.EdgeType.DefinesOnCall);
graph.addEdge(pid, aid, edge_1.EdgeType.DefinedByOnCall);
maps.set(aid, pid);
}
}
return maps;
}
/**
* Links the given arguments to the given parameters within the given graph by name only.
* @example
* ```ts
* const parameterSpec = {
* 'paramName': 'paramId',
* 'anotherParamName': 'anotherParamId',
* // we recommend to always add '...' to your specification
* // this way you can collect all arguments that could not be matched!
* '...': '...'
* } as const;
*
* const match = pMatch(convertFnArguments(args), parameterSpec);
* const addParam = match.get('paramId');
* ```
* @note
* To obtain the arguments from a {@link RFunctionCall}[], either use {@link processAllArguments} (also available via {@link processKnownFunctionCall})
* or convert them with {@link convertFnArguments}.
*/
function pMatch(args, params) {
const nameArgMap = new Map(args.filter(graph_1.FunctionArgument.isNamed).map(a => [a.name, a]));
const maps = new Map();
function addToMaps(key, value) {
const e = maps.get(key);
if (e) {
e.push(value);
}
else {
maps.set(key, [value]);
}
}
const sid = params['...'];
const paramNames = Object.keys(params);
// all parameters matched by name
const matchedParameters = new Set();
// first map names
for (const [name, { nodeId: argId }] of nameArgMap) {
const pmatchName = (0, prefix_1.findByPrefixIfUnique)(name, paramNames) ?? name;
const param = params[pmatchName];
if (param) {
addToMaps(param, argId);
matchedParameters.add(name);
}
else if (sid) {
addToMaps(sid, argId);
}
}
const remainingParameter = paramNames.filter(p => !matchedParameters.has(p));
const remainingArguments = args.filter(graph_1.FunctionArgument.isUnnamed);
for (let i = 0; i < remainingArguments.length; i++) {
const arg = remainingArguments[i];
if (arg === r_function_call_1.EmptyArgument) {
continue;
}
const aid = arg.nodeId;
if (remainingParameter.length <= i) {
if (sid) {
addToMaps(sid, aid);
}
continue;
}
const param = params[remainingParameter[i]];
if (param) {
addToMaps(param, aid);
}
}
return maps;
}
/**
* Links the function call arguments to the target function definition and returns a map from argument ids to parameter ids.
*/
function linkFunctionCallArguments(targetId, idMap, functionCallName, functionRootId, callArgs, finalGraph) {
// we get them by just choosing the rhs of the definition
const linkedFunction = idMap.get(targetId);
if (linkedFunction === undefined) {
logger_1.dataflowLogger.trace(`no fdef found for ${functionCallName} (${functionRootId})`);
return;
}
if (linkedFunction.type !== type_1.RType.FunctionDefinition) {
logger_1.dataflowLogger.trace(`function call definition base ${functionCallName} does not lead to a function definition (${functionRootId}) but got ${linkedFunction.type}`);
return;
}
return linkArgumentsOnCall(callArgs, linkedFunction.parameters, finalGraph);
}
/**
* Links a function call with a single target function definition.
*/
function linkFunctionCallWithSingleTarget(graph, { subflow: fnSubflow, exitPoints, id: fnId, params }, info, idMap) {
const id = info.id;
if (info.environment !== undefined) {
// for each open ingoing reference, try to resolve it here, and if so, add a read edge from the call to signal that it reads it
for (const ingoing of fnSubflow.in) {
const defs = ingoing.name ? (0, resolve_by_name_1.resolveByName)(ingoing.name, info.environment, ingoing.type) : undefined;
if (defs === undefined) {
continue;
}
for (const { nodeId, type, value } of defs) {
if (!node_id_1.NodeId.isBuiltIn(nodeId)) {
graph.addEdge(ingoing.nodeId, nodeId, edge_1.EdgeType.DefinedByOnCall);
graph.addEdge(id, nodeId, edge_1.EdgeType.DefinesOnCall);
if (type === identifier_1.ReferenceType.Function && ingoing.type === identifier_1.ReferenceType.S7MethodPrefix && Array.isArray(value)) {
for (const v of value) {
graph.addEdge(id, v, edge_1.EdgeType.Calls);
graph.addEdge(ingoing.nodeId, v, edge_1.EdgeType.Calls);
// add s7 to vertex
const vInfo = graph.getVertex(v);
if (vInfo && vInfo.tag === vertex_1.VertexType.FunctionDefinition) {
vInfo.mode ??= [];
if (!vInfo.mode.includes('s7')) {
vInfo.mode.push('s7');
}
}
}
}
}
}
}
}
const propagateExitPoints = [];
for (const exitPoint of exitPoints) {
graph.addEdge(id, exitPoint.nodeId, edge_1.EdgeType.Returns);
if ((0, info_1.doesExitPointPropagateCalls)(exitPoint.type)) {
// add the exit point to the call!
propagateExitPoints.push(exitPoint);
}
}
const defName = (0, node_id_1.recoverName)(fnId, idMap);
(0, log_1.expensiveTrace)(logger_1.dataflowLogger, () => `recording expr-list-level call from ${(0, node_id_1.recoverName)(info.id, idMap)} to ${defName}`);
graph.addEdge(id, fnId, edge_1.EdgeType.Calls);
applyForForcedArgs(graph, info.id, params, linkFunctionCallArguments(fnId, idMap, defName, id, info.args, graph));
return propagateExitPoints;
}
/** for each parameter that we link that gets forced, add a reads edge from the call to argument to show that it reads it */
function applyForForcedArgs(graph, callId, readParams, maps) {
if (maps === undefined) {
return;
}
for (const [arg, param] of maps.entries()) {
if (readParams[String(param)]) {
graph.addEdge(callId, arg, edge_1.EdgeType.Reads);
}
}
}
const FCallLinkReadBits = edge_1.EdgeType.Reads | edge_1.EdgeType.Calls | edge_1.EdgeType.DefinedByOnCall;
/* there is _a lot_ potential for optimization here */
function linkFunctionCall(graph, id, info, idMap, thisGraph, calledFunctionDefinitions) {
const edges = graph.outgoingEdges(id);
if (edges === undefined) {
/* no outgoing edges */
return;
}
const functionDefinitionReadIds = new Set();
for (const [t, e] of edges.entries()) {
if (!node_id_1.NodeId.isBuiltIn(t) && edge_1.DfEdge.doesNotIncludeType(e, edge_1.EdgeType.Argument) && edge_1.DfEdge.includesType(e, FCallLinkReadBits)) {
functionDefinitionReadIds.add(t);
}
}
const [functionDefs] = getAllLinkedFunctionDefinitions(new Set(functionDefinitionReadIds), graph);
const propagateExitPoints = [];
for (const def of functionDefs.values()) {
// we can skip this if we already linked it
const oEdge = graph.outgoingEdges(id)?.get(def.id);
if (oEdge && edge_1.DfEdge.includesType(oEdge, edge_1.EdgeType.Calls)) {
continue;
}
for (const ep of linkFunctionCallWithSingleTarget(graph, def, info, idMap)) {
propagateExitPoints.push(ep);
}
}
if (thisGraph.isRoot(id) && functionDefs.size > 0) {
calledFunctionDefinitions.push({ functionCall: id, called: functionDefs.values().toArray(), propagateExitPoints });
}
}
/**
* Returns the called functions within the current graph, which can be used to merge the environments with the call.
* Furthermore, it links the corresponding arguments.
* @param graph - The graph to use for search and resolution traversals (ideally a superset of the `thisGraph`)
* @param idMap - The map to resolve ids to names
* @param thisGraph - The graph to search for function calls in
*/
function linkFunctionCalls(graph, idMap, thisGraph) {
const calledFunctionDefinitions = [];
for (const [id, info] of thisGraph.verticesOfType(vertex_1.VertexType.FunctionCall)) {
if (!info.onlyBuiltin) {
linkFunctionCall(graph, id, info, idMap, thisGraph, calledFunctionDefinitions);
}
}
return calledFunctionDefinitions;
}
/**
* convenience function returning all known call targets, as well as the name source which defines them
*/
function getAllFunctionCallTargets(call, graph, environment) {
const found = new Set();
const callVertex = graph.get(call, true);
if (callVertex === undefined) {
return [];
}
const [info, outgoingEdges] = callVertex;
if (info.tag !== vertex_1.VertexType.FunctionCall) {
return [];
}
if (environment !== undefined || info.environment !== undefined) {
let functionCallDefs = [];
const refType = info.origin.includes(built_in_proc_name_1.BuiltInProcName.S3Dispatch) ? identifier_1.ReferenceType.S3MethodPrefix :
info.origin.includes(built_in_proc_name_1.BuiltInProcName.S7Dispatch) ? identifier_1.ReferenceType.S7MethodPrefix : identifier_1.ReferenceType.Function;
if (info.name !== undefined && !identifier_1.Identifier.getName(info.name).startsWith(unnamed_call_handling_1.UnnamedFunctionCallPrefix)) {
functionCallDefs = (0, resolve_by_name_1.resolveByName)(info.name, environment ?? info.environment, refType)?.map(d => d.nodeId) ?? [];
}
for (const [target, outgoingEdge] of outgoingEdges.entries()) {
if (edge_1.DfEdge.includesType(outgoingEdge, edge_1.EdgeType.Calls)) {
functionCallDefs.push(target);
}
}
const [functionCallTargets, builtInTargets] = getAllLinkedFunctionDefinitions(new Set(functionCallDefs), graph);
for (const target of functionCallTargets) {
found.add(target.id);
}
for (const arr of [builtInTargets, functionCallDefs]) {
for (const target of arr) {
found.add(target);
}
}
}
return Array.from(found);
}
const LinkedFnFollowBits = edge_1.EdgeType.Reads | edge_1.EdgeType.DefinedBy | edge_1.EdgeType.DefinedByOnCall;
/**
* Finds all linked function definitions starting from the given set of read ids.
* This is a complicated function, please only call it if you know what you are doing.
* For example, if you are interested in the called functions of a function call, use {@link getAllFunctionCallTargets} instead.
* This function here expects you to handle the accessed objects yourself (e.g,. already resolve the first layer of reads/returns/calls/... or resolve the identifier by name)
* and then pass in the relevant read ids.
* @example
* Consider a scenario like this:
* ```R
* x <- function() 3
* x()
* ```
* To resolve the call `x` in the second line, use {@link getAllFunctionCallTargets}!
* To know what fdefs the definition of `x` in the first line links to, you can use {@link getAllLinkedFunctionDefinitions|this function}.
*/
function getAllLinkedFunctionDefinitions(functionDefinitionReadIds, dataflowGraph) {
const result = new Set();
const builtIns = new Set();
if (functionDefinitionReadIds.size === 0) {
return [result, builtIns];
}
const potential = Array.from(functionDefinitionReadIds);
const visited = new Set();
while (potential.length !== 0) {
const cid = potential.pop();
visited.add(cid);
if (node_id_1.NodeId.isBuiltIn(cid)) {
builtIns.add(cid);
continue;
}
const vertex = dataflowGraph.getVertex(cid);
if (vertex === undefined) {
continue;
}
// Found a function definition
if (vertex.subflow !== undefined) {
result.add(vertex);
continue;
}
let hasReturnEdge = false;
const outgoing = dataflowGraph.outgoingEdges(cid) ?? [];
for (const [target, e] of outgoing) {
if (edge_1.DfEdge.includesType(e, edge_1.EdgeType.Returns)) {
hasReturnEdge = true;
if (!visited.has(target)) {
potential.push(target);
}
}
}
if (vertex.tag === vertex_1.VertexType.FunctionCall || hasReturnEdge || (vertex.tag === vertex_1.VertexType.VariableDefinition && vertex.par)) {
continue;
}
for (const [target, e] of outgoing) {
if (edge_1.DfEdge.includesType(e, LinkedFnFollowBits) && !visited.has(target)) {
potential.push(target);
}
}
}
return [result, builtIns];
}
/**
* This method links a set of read variables to definitions in an environment.
* @param referencesToLinkAgainstEnvironment - The set of references to link against the environment
* @param environmentInformation - The environment information to link against
* @param givenInputs - The existing list of inputs that might be extended
* @param graph - The graph to enter the found links
* @param maybeForRemaining - Each input that can not be linked, will be added to `givenInputs`. If this flag is `true`, it will be marked as `maybe`.
* @returns the given inputs, possibly extended with the remaining inputs (those of `referencesToLinkAgainstEnvironment` that could not be linked against the environment)
*/
function linkInputs(referencesToLinkAgainstEnvironment, environmentInformation, givenInputs, graph, maybeForRemaining) {
for (const bodyInput of referencesToLinkAgainstEnvironment) {
const probableTarget = bodyInput.name ? (0, resolve_by_name_1.resolveByName)(bodyInput.name, environmentInformation, bodyInput.type) : undefined;
if (probableTarget === undefined) {
if (maybeForRemaining) {
bodyInput.cds ??= [];
}
givenInputs.push(bodyInput);
}
else {
let allBuiltIn = true;
for (const target of probableTarget) {
// we can stick with maybe even if readId.attribute is always
graph.addEdge(bodyInput.nodeId, target.nodeId, edge_1.EdgeType.Reads);
if (!(0, identifier_1.isReferenceType)(target.type, identifier_1.ReferenceType.BuiltInConstant | identifier_1.ReferenceType.BuiltInFunction)) {
allBuiltIn = false;
}
}
if (allBuiltIn) {
givenInputs.push(bodyInput);
}
}
}
// data.graph.get(node.id).definedAtPosition = false
return givenInputs;
}
/**
* all loops variables which are open read (not already bound by a redefinition within the loop) get a maybe read marker to their last definition within the loop
* e.g. with:
* ```R
* for(i in 1:10) {
* x_1 <- x_2 + 1
* }
* ```
* `x_2` must get a read marker to `x_1` as `x_1` is the active redefinition in the second loop iteration.
*/
function linkCircularRedefinitionsWithinALoop(graph, openIns, outgoing) {
// first, we preprocess out so that only the last definition of a given identifier survives
// this implicitly assumes that the outgoing references are ordered
const lastOutgoing = new Map();
for (const out of outgoing) {
const on = out.name;
if (on) {
lastOutgoing.set(on, out);
}
}
for (const [name, targets] of openIns.entries()) {
for (const { name: outName, nodeId } of lastOutgoing.values()) {
if (outName !== undefined && identifier_1.Identifier.matches(outName, name)) {
for (const target of targets) {
graph.addEdge(target.nodeId, nodeId, edge_1.EdgeType.Reads);
}
}
}
}
}
/**
* Reapplies the loop exit points' control dependencies to the given identifier references.
*/
function reapplyLoopExitPoints(exits, references, graph) {
// just apply the cds of all exit points not already present
const exitCds = exits.flatMap(e => e.cds?.map(info_1.negateControlDependency))
.filter(assert_1.isNotUndefined)
.map(cd => ({ ...cd, byIteration: true }));
const seenRefs = new Set();
for (const ref of references) {
if (seenRefs.has(ref.nodeId)) {
continue;
}
seenRefs.add(ref.nodeId);
for (const cd of exitCds) {
const { id: cId } = cd;
let setVertex = false;
if (ref.cds) {
if (!ref.cds?.find(c => c.id === cId)) {
ref.cds.push(cd);
setVertex = true;
}
}
else {
ref.cds = [cd];
setVertex = true;
}
if (setVertex) {
const vertex = graph.getVertex(ref.nodeId);
if (vertex) {
if (vertex.cds) {
if (!vertex.cds?.find(c => c.id === cId)) {
vertex.cds.push(cd);
}
}
else {
vertex.cds = [cd];
}
}
}
}
}
}
//# sourceMappingURL=linker.js.map