UNPKG

@eagleoutice/flowr

Version:

Static Dataflow Analyzer and Program Slicer for the R Programming Language

349 lines 13.6 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.getAliases = getAliases; exports.resolveIdToValue = resolveIdToValue; exports.trackAliasInEnvironments = trackAliasInEnvironments; exports.trackAliasesInGraph = trackAliasesInGraph; exports.resolveToConstants = resolveToConstants; const config_1 = require("../../../config"); const node_id_1 = require("../../../r-bridge/lang-4.x/ast/model/processing/node-id"); const type_1 = require("../../../r-bridge/lang-4.x/ast/model/type"); const fingerprint_1 = require("../../../slicing/static/fingerprint"); const visiting_queue_1 = require("../../../slicing/static/visiting-queue"); const assert_1 = require("../../../util/assert"); const environment_1 = require("../../environments/environment"); const identifier_1 = require("../../environments/identifier"); const resolve_by_name_1 = require("../../environments/resolve-by-name"); const edge_1 = require("../../graph/edge"); const unknown_replacement_1 = require("../../graph/unknown-replacement"); const unknown_side_effect_1 = require("../../graph/unknown-side-effect"); const vertex_1 = require("../../graph/vertex"); const general_1 = require("../values/general"); const r_value_1 = require("../values/r-value"); const set_constants_1 = require("../values/sets/set-constants"); const resolve_1 = require("./resolve"); const AliasHandler = { [vertex_1.VertexType.Value]: (sourceId) => [sourceId], [vertex_1.VertexType.Use]: getUseAlias, [vertex_1.VertexType.FunctionCall]: getFunctionCallAlias, [vertex_1.VertexType.FunctionDefinition]: () => undefined, [vertex_1.VertexType.VariableDefinition]: () => undefined }; function getFunctionCallAlias(sourceId, dataflow, environment) { const identifier = (0, node_id_1.recoverName)(sourceId, dataflow.idMap); if (identifier === undefined) { return undefined; } const defs = (0, resolve_by_name_1.resolveByName)(identifier, environment, identifier_1.ReferenceType.Function); if (defs === undefined || defs.length !== 1) { return undefined; } return [sourceId]; } function getUseAlias(sourceId, dataflow, environment) { const definitions = []; // Source is Symbol -> resolve definitions of symbol const identifier = (0, node_id_1.recoverName)(sourceId, dataflow.idMap); if (identifier === undefined) { return undefined; } const defs = (0, resolve_by_name_1.resolveByName)(identifier, environment); if (defs === undefined) { return undefined; } for (const def of defs) { // If one definition is not constant (or a variable aliasing a constant) // we can't say for sure what value the source has if (def.type === identifier_1.ReferenceType.Variable) { if (def.value === undefined) { return undefined; } definitions.push(...def.value); } else if (def.type === identifier_1.ReferenceType.Constant || def.type === identifier_1.ReferenceType.BuiltInConstant) { definitions.push(def.nodeId); } else { return undefined; } } return definitions; } /** * Gets the definitions / aliases of a node * * This function is called by the built-in-assignment processor so that we can * track assignments inside the environment. The returned ids are stored in * the sourceIds value field of their InGraphIdentifierDefinition. This enables * us later, in the {@link trackAliasInEnvironments} function, to get all the * aliases of an identifier. * * @param sourceIds - node ids to get the definitions for * @param dataflow - dataflow graph * @param environment - environment * @returns node id of alias */ function getAliases(sourceIds, dataflow, environment) { const definitions = new Set(); for (const sourceId of sourceIds) { const info = dataflow.getVertex(sourceId); if (info === undefined) { return undefined; } const defs = AliasHandler[info.tag](sourceId, dataflow, environment); for (const def of defs ?? []) { definitions.add(def); } } return [...definitions]; } /** * Evaluates the value of a node in the set domain. * * resolveIdToValue tries to resolve the value using the data it has been given. * If the environment is provided the approximation is more precise, as we can * track aliases in the environment. * Otherwise, the graph is used to try and resolve the nodes value. * If neither is provided the value cannot be resolved. * * This function is also used by the Resolve Value Query and the Dependency Query * to resolve values. For e.g. in the Dependency Query it is used to resolve calls * like `lapply(c("a", "b", "c"), library, character.only = TRUE)` * * @param id - The node id or node to resolve * @param environment - The current environment used for name resolution * @param graph - The graph to resolve in * @param idMap - The id map to resolve the node if given as an id * @param full - Whether to track aliases on resolve */ function resolveIdToValue(id, { environment, graph, idMap, full = true }) { if (id === undefined) { return r_value_1.Top; } idMap ??= graph?.idMap; const node = typeof id === 'object' ? id : idMap?.get(id); if (node === undefined) { return r_value_1.Top; } switch (node.type) { case type_1.RType.Argument: case type_1.RType.Symbol: if (environment) { return full ? trackAliasInEnvironments(node.lexeme, environment, graph, idMap) : r_value_1.Top; } else if (graph && (0, config_1.getConfig)().solver.variables === config_1.VariableResolve.Alias) { return full ? trackAliasesInGraph(node.info.id, graph, idMap) : r_value_1.Top; } else { return r_value_1.Top; } case type_1.RType.FunctionCall: return (0, set_constants_1.setFrom)((0, resolve_1.resolveNode)(node, environment, graph, idMap)); case type_1.RType.String: case type_1.RType.Number: case type_1.RType.Logical: return (0, set_constants_1.setFrom)((0, general_1.valueFromRNodeConstant)(node)); default: return r_value_1.Top; } } /** * Please use {@link resolveIdToValue} * * Uses the aliases that were tracked in the environments (by the * {@link getAliases} function) to resolve a node to a value. * * * @param identifier - Identifier to resolve * @param use - Environment to use * @param graph - Dataflow graph * @param idMap - id map of Dataflow graph * @returns Value of Identifier or Top */ function trackAliasInEnvironments(identifier, use, graph, idMap) { if (identifier === undefined) { return r_value_1.Top; } const defs = (0, resolve_by_name_1.resolveByName)(identifier, use); if (defs === undefined) { return r_value_1.Top; } const values = new Set(); for (const def of defs) { if (def.type === identifier_1.ReferenceType.BuiltInConstant) { values.add((0, general_1.valueFromTsValue)(def.value)); } else if (def.type === identifier_1.ReferenceType.BuiltInFunction) { // Tracked in #1207 } else if (def.value !== undefined) { /* if there is at least one location for which we have no idea, we have to give up for now! */ if (def.value.length === 0) { return r_value_1.Top; } for (const alias of def.value) { const definitionOfAlias = idMap?.get(alias); if (definitionOfAlias !== undefined) { const value = (0, resolve_1.resolveNode)(definitionOfAlias, use, graph, idMap); if ((0, r_value_1.isTop)(value)) { return r_value_1.Top; } values.add(value); } } } } if (values.size == 0) { return r_value_1.Top; } return (0, set_constants_1.setFrom)(...values); } (0, unknown_side_effect_1.onUnknownSideEffect)((_graph, env, _id, target) => { if (target) { return; } let current = env.current; while (current) { current.memory.forEach(mem => mem.forEach((def) => { if (def.type !== identifier_1.ReferenceType.BuiltInConstant && def.type !== identifier_1.ReferenceType.BuiltInFunction && def.value !== undefined) { def.value.length = 0; } })); current = current.parent; } }); (0, unknown_replacement_1.onReplacementOperator)((args) => { if (!args.target) { return; } let current = args.env.current; while (current) { const defs = current.memory.get(args.target); defs?.forEach(def => { if (def.type !== identifier_1.ReferenceType.BuiltInConstant && def.type !== identifier_1.ReferenceType.BuiltInFunction && def.value !== undefined) { def.value.length = 0; } }); current = current.parent; } }); function isNestedInLoop(node, ast) { const parent = node?.info.parent; if (node === undefined || !parent) { return false; } const parentNode = ast.get(parent); if (parentNode === undefined) { return false; } if (parentNode.type === type_1.RType.WhileLoop || parentNode.type === type_1.RType.RepeatLoop) { return true; } return isNestedInLoop(parentNode, ast); } /** * Please use {@link resolveIdToValue} * * Tries to resolve the value of a node by traversing the dataflow graph * * @param id - node to resolve * @param graph - dataflow graph * @param idMap - idmap of dataflow graph * @returns Value of node or Top/Bottom */ function trackAliasesInGraph(id, graph, idMap) { idMap ??= graph.idMap; (0, assert_1.guard)(idMap !== undefined, 'The ID map is required to get the lineage of a node'); const start = graph.getVertex(id); (0, assert_1.guard)(start !== undefined, 'Unable to find start for alias tracking'); const queue = new visiting_queue_1.VisitingQueue(25); const clean = (0, environment_1.initializeCleanEnvironments)(); const cleanFingerprint = (0, fingerprint_1.envFingerprint)(clean); queue.add(id, clean, cleanFingerprint, false); let forceBot = false; const resultIds = []; while (queue.nonEmpty()) { const { id, baseEnvironment } = queue.next(); const res = graph.get(id); if (!res) { continue; } const [vertex, outgoingEdges] = res; const cds = vertex.cds; for (const cd of cds ?? []) { const target = graph.idMap?.get(cd.id); if (target === undefined) { continue; } if (target.type === type_1.RType.WhileLoop || target.type === type_1.RType.RepeatLoop) { forceBot = true; break; } } if (!forceBot && (cds?.length === 0 && isNestedInLoop(idMap.get(id), idMap))) { forceBot = true; } if (forceBot) { break; } if (vertex.tag === vertex_1.VertexType.Value) { resultIds.push(id); continue; } else if (vertex.tag === vertex_1.VertexType.FunctionDefinition) { resultIds.push(id); continue; } const isFn = vertex.tag === vertex_1.VertexType.FunctionCall; // travel all read and defined-by edges for (const [targetId, edge] of outgoingEdges) { if (isFn) { if (edge.types === edge_1.EdgeType.Returns || edge.types === edge_1.EdgeType.DefinedByOnCall || edge.types === edge_1.EdgeType.DefinedBy) { queue.add(targetId, baseEnvironment, cleanFingerprint, false); } continue; } // currently, they have to be exact! if (edge.types === edge_1.EdgeType.Reads || edge.types === edge_1.EdgeType.DefinedBy || edge.types === edge_1.EdgeType.DefinedByOnCall) { queue.add(targetId, baseEnvironment, cleanFingerprint, false); } } } if (forceBot || resultIds.length === 0) { return r_value_1.Bottom; } const values = new Set(); for (const id of resultIds) { const node = idMap.get(id); if (node !== undefined) { values.add((0, general_1.valueFromRNodeConstant)(node)); } } return (0, set_constants_1.setFrom)(...values); } /** * Please use {@link resolveIdToValue} * * Resolve an Identifier to a constant, if the identifier is a constant * * @param name - Identifier to resolve * @param environment - Environment to use * @returns Value of Constant or Top */ function resolveToConstants(name, environment) { if (name === undefined) { return r_value_1.Top; } const definitions = (0, resolve_by_name_1.resolveByName)(name, environment, identifier_1.ReferenceType.Constant); if (definitions === undefined) { return r_value_1.Top; } const values = new Set(); definitions.forEach(def => values.add((0, general_1.valueFromTsValue)(def.value ?? r_value_1.Top))); return (0, set_constants_1.setFrom)(...values); } //# sourceMappingURL=alias-tracking.js.map