@eagleoutice/flowr
Version:
Static Dataflow Analyzer and Program Slicer for the R Programming Language
319 lines • 16.2 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.findNonLocalReads = findNonLocalReads;
exports.produceNameSharedIdMap = produceNameSharedIdMap;
exports.linkArgumentsOnCall = linkArgumentsOnCall;
exports.linkFunctionCallWithSingleTarget = linkFunctionCallWithSingleTarget;
exports.linkFunctionCalls = linkFunctionCalls;
exports.getAllFunctionCallTargets = getAllFunctionCallTargets;
exports.getAllLinkedFunctionDefinitions = getAllLinkedFunctionDefinitions;
exports.linkInputs = linkInputs;
exports.linkCircularRedefinitionsWithinALoop = linkCircularRedefinitionsWithinALoop;
const defaultmap_1 = require("../../util/defaultmap");
const assert_1 = require("../../util/assert");
const log_1 = require("../../util/log");
const node_id_1 = require("../../r-bridge/lang-4.x/ast/model/processing/node-id");
const identifier_1 = require("../environments/identifier");
const graph_1 = require("../graph/graph");
const logger_1 = require("../logger");
const r_function_call_1 = require("../../r-bridge/lang-4.x/ast/model/nodes/r-function-call");
const edge_1 = require("../graph/edge");
const type_1 = require("../../r-bridge/lang-4.x/ast/model/type");
const vertex_1 = require("../graph/vertex");
const resolve_by_name_1 = require("../environments/resolve-by-name");
const built_in_1 = require("../environments/built-in");
const static_slicer_1 = require("../../slicing/static/static-slicer");
const prefix_1 = require("../../util/prefix");
function findNonLocalReads(graph, ignore) {
const ignores = new Set(ignore.map(i => i.nodeId));
const ids = new Set([...graph.vertices(true)]
.filter(([_, info]) => info.tag === vertex_1.VertexType.Use || info.tag === vertex_1.VertexType.FunctionCall)
.map(([id, _]) => id));
/* find all variable use ids which do not link to a given id */
const nonLocalReads = [];
for (const id of ids) {
if (ignores.has(id)) {
continue;
}
const outgoing = graph.outgoingEdges(id);
const name = (0, node_id_1.recoverName)(id, graph.idMap);
const origin = graph.getVertex(id, true);
if (outgoing === undefined) {
nonLocalReads.push({
name: (0, node_id_1.recoverName)(id, graph.idMap),
nodeId: id,
controlDependencies: undefined,
type: origin?.tag === vertex_1.VertexType.FunctionCall ? identifier_1.ReferenceType.Function : identifier_1.ReferenceType.Variable
});
continue;
}
for (const [target, { types }] of outgoing) {
if ((0, edge_1.edgeIncludesType)(types, edge_1.EdgeType.Reads) && !ids.has(target)) {
if (!name) {
logger_1.dataflowLogger.warn('found non-local read without name for id ' + id);
}
nonLocalReads.push({
name: (0, node_id_1.recoverName)(id, graph.idMap),
nodeId: id,
controlDependencies: undefined,
type: origin?.tag === vertex_1.VertexType.FunctionCall ? identifier_1.ReferenceType.Function : identifier_1.ReferenceType.Variable
});
break;
}
}
}
return nonLocalReads;
}
function produceNameSharedIdMap(references) {
const nameIdShares = new defaultmap_1.DefaultMap(() => []);
for (const reference of references) {
if (reference.name) {
nameIdShares.get(reference.name).push(reference);
}
}
return nameIdShares;
}
function linkArgumentsOnCall(args, params, graph) {
const nameArgMap = new Map(args.filter(graph_1.isNamedArgument).map(a => [a.name, a]));
const nameParamMap = new Map(params.map(p => [p.name.content, p]));
const specialDotParameter = params.find(p => p.special);
// all parameters matched by name
const matchedParameters = new Set();
// first map names
for (const [name, arg] of nameArgMap) {
const pmatchName = (0, prefix_1.findByPrefixIfUnique)(name, [...nameParamMap.keys()]) ?? name;
const param = nameParamMap.get(pmatchName);
if (param !== undefined) {
logger_1.dataflowLogger.trace(`mapping named argument "${name}" to parameter "${param.name.content}"`);
graph.addEdge(arg.nodeId, param.name.info.id, edge_1.EdgeType.DefinesOnCall);
graph.addEdge(param.name.info.id, arg.nodeId, edge_1.EdgeType.DefinedByOnCall);
matchedParameters.add(name);
}
else if (specialDotParameter !== undefined) {
logger_1.dataflowLogger.trace(`mapping named argument "${name}" to dot-dot-dot parameter`);
graph.addEdge(arg.nodeId, specialDotParameter.name.info.id, edge_1.EdgeType.DefinesOnCall);
graph.addEdge(specialDotParameter.name.info.id, arg.nodeId, edge_1.EdgeType.DefinedByOnCall);
}
}
const remainingParameter = params.filter(p => !matchedParameters.has(p.name.content));
const remainingArguments = args.filter(a => !(0, graph_1.isNamedArgument)(a));
for (let i = 0; i < remainingArguments.length; i++) {
const arg = remainingArguments[i];
if (arg === r_function_call_1.EmptyArgument) {
logger_1.dataflowLogger.trace(`skipping value argument for ${i}`);
continue;
}
if (remainingParameter.length <= i) {
if (specialDotParameter !== undefined) {
logger_1.dataflowLogger.trace(`mapping unnamed argument ${i} (id: ${arg.nodeId}) to dot-dot-dot parameter`);
graph.addEdge(arg.nodeId, specialDotParameter.name.info.id, edge_1.EdgeType.DefinesOnCall);
graph.addEdge(specialDotParameter.name.info.id, arg.nodeId, edge_1.EdgeType.DefinedByOnCall);
}
else {
logger_1.dataflowLogger.warn(`skipping argument ${i} as there is no corresponding parameter - R should block that`);
}
continue;
}
const param = remainingParameter[i];
logger_1.dataflowLogger.trace(`mapping unnamed argument ${i} (id: ${arg.nodeId}) to parameter "${param.name.content}"`);
graph.addEdge(arg.nodeId, param.name.info.id, edge_1.EdgeType.DefinesOnCall);
graph.addEdge(param.name.info.id, arg.nodeId, edge_1.EdgeType.DefinedByOnCall);
}
}
function linkFunctionCallArguments(targetId, idMap, functionCallName, functionRootId, callArgs, finalGraph) {
// we get them by just choosing the rhs of the definition
const linkedFunction = idMap.get(targetId);
if (linkedFunction === undefined) {
logger_1.dataflowLogger.trace(`no function definition found for ${functionCallName} (${functionRootId})`);
return;
}
if (linkedFunction.type !== type_1.RType.FunctionDefinition) {
logger_1.dataflowLogger.trace(`function call definition base ${functionCallName} does not lead to a function definition (${functionRootId}) but got ${linkedFunction.type}`);
return;
}
(0, log_1.expensiveTrace)(logger_1.dataflowLogger, () => `linking arguments for ${functionCallName} (${functionRootId}) to ${JSON.stringify(linkedFunction.location)}`);
linkArgumentsOnCall(callArgs, linkedFunction.parameters, finalGraph);
}
function linkFunctionCallWithSingleTarget(graph, def, info, idMap) {
const id = info.id;
if (info.environment !== undefined) {
// for each open ingoing reference, try to resolve it here, and if so, add a read edge from the call to signal that it reads it
for (const ingoing of def.subflow.in) {
const defs = ingoing.name ? (0, resolve_by_name_1.resolveByName)(ingoing.name, info.environment, ingoing.type) : undefined;
if (defs === undefined) {
continue;
}
for (const def of defs) {
graph.addEdge(ingoing, def, edge_1.EdgeType.DefinedByOnCall);
graph.addEdge(id, def, edge_1.EdgeType.DefinesOnCall);
}
}
}
const exitPoints = def.exitPoints;
for (const exitPoint of exitPoints) {
graph.addEdge(id, exitPoint, edge_1.EdgeType.Returns);
}
const defName = (0, node_id_1.recoverName)(def.id, idMap);
(0, log_1.expensiveTrace)(logger_1.dataflowLogger, () => `recording expression-list-level call from ${(0, node_id_1.recoverName)(info.id, idMap)} to ${defName}`);
graph.addEdge(id, def.id, edge_1.EdgeType.Calls);
linkFunctionCallArguments(def.id, idMap, defName, id, info.args, graph);
}
/* there is _a lot_ potential for optimization here */
function linkFunctionCall(graph, id, info, idMap, thisGraph, calledFunctionDefinitions) {
const edges = graph.outgoingEdges(id);
if (edges === undefined) {
/* no outgoing edges */
return;
}
const readBits = edge_1.EdgeType.Reads | edge_1.EdgeType.Calls;
const functionDefinitionReadIds = [...edges].filter(([_, e]) => (0, edge_1.edgeDoesNotIncludeType)(e.types, edge_1.EdgeType.Argument)
&& (0, edge_1.edgeIncludesType)(e.types, readBits)).map(([target, _]) => target);
const functionDefs = getAllLinkedFunctionDefinitions(new Set(functionDefinitionReadIds), graph);
for (const def of functionDefs.values()) {
(0, assert_1.guard)(def.tag === vertex_1.VertexType.FunctionDefinition, () => `expected function definition, but got ${def.tag}`);
linkFunctionCallWithSingleTarget(graph, def, info, idMap);
}
if (thisGraph.isRoot(id)) {
calledFunctionDefinitions.push({ functionCall: id, called: [...functionDefs.values()] });
}
}
/**
* Returns the called functions within the current graph, which can be used to merge the environments with the call.
* Furthermore, it links the corresponding arguments.
*
* @param graph - The graph to use for search and resolution traversals (ideally a superset of the `thisGraph`)
* @param idMap - The map to resolve ids to names
* @param thisGraph - The graph to search for function calls in
*/
function linkFunctionCalls(graph, idMap, thisGraph) {
const functionCalls = [...thisGraph.vertices(true)]
.filter(([_, info]) => info.tag === vertex_1.VertexType.FunctionCall);
const calledFunctionDefinitions = [];
for (const [id, info] of functionCalls) {
linkFunctionCall(graph, id, info, idMap, thisGraph, calledFunctionDefinitions);
}
return calledFunctionDefinitions;
}
/**
* convenience function returning all known call targets, as well as the name source which defines them
*/
function getAllFunctionCallTargets(call, graph, environment) {
const found = [];
const callVertex = graph.get(call, true);
if (callVertex === undefined) {
return [];
}
const [info, outgoingEdges] = callVertex;
if (info.tag !== vertex_1.VertexType.FunctionCall) {
return [];
}
if (info.name !== undefined && (environment !== undefined || info.environment !== undefined)) {
const functionCallDefs = (0, resolve_by_name_1.resolveByName)(info.name, environment ?? info.environment, identifier_1.ReferenceType.Function)?.map(d => d.nodeId) ?? [];
for (const [target, outgoingEdge] of outgoingEdges.entries()) {
if ((0, edge_1.edgeIncludesType)(outgoingEdge.types, edge_1.EdgeType.Calls)) {
functionCallDefs.push(target);
}
}
const functionCallTargets = getAllLinkedFunctionDefinitions(new Set(functionCallDefs), graph);
for (const target of functionCallTargets) {
found.push(target.id);
}
for (const def of functionCallDefs) {
found.push(def);
}
}
return found;
}
function getAllLinkedFunctionDefinitions(functionDefinitionReadIds, dataflowGraph) {
const potential = [...functionDefinitionReadIds];
const visited = new Set();
const result = new Set();
while (potential.length > 0) {
const currentId = potential.pop();
// do not traverse builtins
if (currentId === built_in_1.BuiltIn) {
continue;
}
const currentInfo = dataflowGraph.get(currentId, true);
if (currentInfo === undefined) {
static_slicer_1.slicerLogger.trace('skipping unknown link');
continue;
}
visited.add(currentId);
const outgoingEdges = [...currentInfo[1]];
const returnEdges = outgoingEdges.filter(([_, e]) => (0, edge_1.edgeIncludesType)(e.types, edge_1.EdgeType.Returns));
if (returnEdges.length > 0) {
// only traverse return edges and do not follow `calls` etc. as this indicates that we have a function call which returns a result, and not the function calls itself
potential.push(...returnEdges.map(([target]) => target).filter(id => !visited.has(id)));
continue;
}
const followBits = edge_1.EdgeType.Reads | edge_1.EdgeType.DefinedBy | edge_1.EdgeType.DefinedByOnCall;
const followEdges = outgoingEdges.filter(([_, e]) => (0, edge_1.edgeIncludesType)(e.types, followBits));
if (currentInfo[0].subflow !== undefined) {
result.add(currentInfo[0]);
}
// trace all joined reads
potential.push(...followEdges.map(([target]) => target).filter(id => !visited.has(id)));
}
return result;
}
/**
* This method links a set of read variables to definitions in an environment.
*
* @param referencesToLinkAgainstEnvironment - The set of references to link against the environment
* @param environmentInformation - The environment information to link against
* @param givenInputs - The existing list of inputs that might be extended
* @param graph - The graph to enter the found links
* @param maybeForRemaining - Each input that can not be linked, will be added to `givenInputs`. If this flag is `true`, it will be marked as `maybe`.
*
* @returns the given inputs, possibly extended with the remaining inputs (those of `referencesToLinkAgainstEnvironment` that could not be linked against the environment)
*/
function linkInputs(referencesToLinkAgainstEnvironment, environmentInformation, givenInputs, graph, maybeForRemaining) {
for (const bodyInput of referencesToLinkAgainstEnvironment) {
const probableTarget = bodyInput.name ? (0, resolve_by_name_1.resolveByName)(bodyInput.name, environmentInformation, bodyInput.type) : undefined;
if (probableTarget === undefined) {
log_1.log.trace(`found no target for ${bodyInput.name}`);
if (maybeForRemaining) {
bodyInput.controlDependencies ??= [];
}
givenInputs.push(bodyInput);
}
else {
for (const target of probableTarget) {
// we can stick with maybe even if readId.attribute is always
graph.addEdge(bodyInput, target, edge_1.EdgeType.Reads);
}
}
}
// data.graph.get(node.id).definedAtPosition = false
return givenInputs;
}
/** all loops variables which are open read (not already bound by a redefinition within the loop) get a maybe read marker to their last definition within the loop
* e.g. with:
* ```R
* for(i in 1:10) {
* x_1 <- x_2 + 1
* }
* ```
* `x_2` must get a read marker to `x_1` as `x_1` is the active redefinition in the second loop iteration.
*/
function linkCircularRedefinitionsWithinALoop(graph, openIns, outgoing) {
// first, we preprocess out so that only the last definition of a given identifier survives
// this implicitly assumes that the outgoing references are ordered
const lastOutgoing = new Map();
for (const out of outgoing) {
if (out.name) {
lastOutgoing.set(out.name, out);
}
}
for (const [name, targets] of openIns.entries()) {
for (const out of lastOutgoing.values()) {
if (out.name === name) {
for (const target of targets) {
graph.addEdge(target.nodeId, out.nodeId, edge_1.EdgeType.Reads);
}
}
}
}
}
//# sourceMappingURL=linker.js.map