UNPKG

@eagleoutice/flowr

Version:

Static Dataflow Analyzer and Program Slicer for the R Programming Language

280 lines 11.5 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.promoteCallName = promoteCallName; exports.executeCallContextQueries = executeCallContextQueries; const node_id_1 = require("../../../r-bridge/lang-4.x/ast/model/processing/node-id"); const vertex_1 = require("../../../dataflow/graph/vertex"); const edge_1 = require("../../../dataflow/graph/edge"); const two_layer_collector_1 = require("../../two-layer-collector"); const objects_1 = require("../../../util/objects"); const identify_link_to_last_call_relation_1 = require("./identify-link-to-last-call-relation"); const cfg_kind_1 = require("../../../project/cfg-kind"); const extract_cfg_1 = require("../../../control-flow/extract-cfg"); const identify_link_to_relation_1 = require("./identify-link-to-relation"); const identifier_1 = require("../../../dataflow/environments/identifier"); /* if the node is effected by nse, we have an ingoing nse edge */ function isQuoted(node, graph) { const vertex = graph.ingoingEdges(node); if (vertex === undefined) { return false; } return vertex.values().some(e => edge_1.DfEdge.includesType(e, edge_1.EdgeType.NonStandardEvaluation)); } function makeReport(collector) { const result = {}; for (const [kind, collected] of collector.store) { const subkinds = {}; for (const [subkind, values] of collected) { if (!Array.isArray(subkinds[subkind])) { subkinds[subkind] = []; } subkinds[subkind] ??= []; const collectIn = subkinds[subkind]; for (const value of values) { collectIn.push(value); } } result[kind] = { subkinds }; } return result; } function isSubCallQuery(query) { return 'linkTo' in query && query.linkTo !== undefined; } /** * */ function promoteCallName(callName, exact = false) { if (Array.isArray(callName)) { const s = new Set(callName); return (t) => s.has(t); } else if (exact) { const s = new Set([typeof callName === 'string' ? callName : callName.source]); return (t) => s.has(t); } else { const r = new RegExp(callName); return (t) => r.test(t); } } function promoteQueryCallNames(queries) { let requiresCfg = false; const promotedQueries = queries.map(q => { if (isSubCallQuery(q)) { requiresCfg = true; return { ...q, callName: promoteCallName(q.callName, q.callNameExact), fileFilter: q.fileFilter && { ...q.fileFilter, filter: promoteCallName(q.fileFilter.filter) }, linkTo: Array.isArray(q.linkTo) ? q.linkTo.map(l => ({ ...l, callName: promoteCallName(l.callName) })) : { ...q.linkTo, /* we have to add another promotion layer whenever we add something without this call name */ callName: promoteCallName(q.linkTo.callName) } }; } else { return { ...q, callName: promoteCallName(q.callName, q.callNameExact), fileFilter: q.fileFilter && { ...q.fileFilter, filter: promoteCallName(q.fileFilter.filter) } }; } }); return { promotedQueries, requiresCfg }; } /* maybe we want to add caches to this */ function retrieveAllCallAliases(nodeId, graph) { /* we want the names of all functions called at the source id, including synonyms and returns */ const aliases = new Map(); const visited = new Set(); /* we store the current call name */ const queue = [[(0, node_id_1.recoverContent)(nodeId, graph) ?? '', nodeId]]; while (queue.length > 0) { const [str, id] = queue.shift(); if (visited.has(id)) { continue; } visited.add(id); if (id !== nodeId) { const present = aliases.get(str); if (present) { present.push(id); } else { aliases.set(str, [id]); } } const vertex = graph.get(id); if (vertex === undefined) { continue; } const [info, outgoing] = vertex; if (info.tag !== vertex_1.VertexType.FunctionCall) { const wantedTypes = edge_1.EdgeType.Reads | edge_1.EdgeType.DefinedBy | edge_1.EdgeType.DefinedByOnCall; const x = outgoing.entries() .filter(([, e]) => edge_1.DfEdge.includesType(e, wantedTypes)) .map(([t]) => [(0, node_id_1.recoverContent)(t, graph) ?? '', t]) .toArray(); /** only follow defined-by and reads */ queue.push(...x); continue; } let track = edge_1.EdgeType.Calls | edge_1.EdgeType.Reads | edge_1.EdgeType.DefinedBy | edge_1.EdgeType.DefinedByOnCall; if (id !== nodeId) { track |= edge_1.EdgeType.Returns; } const out = outgoing.entries() .filter(([, e]) => edge_1.DfEdge.includesType(e, track) && (nodeId !== id || edge_1.DfEdge.doesNotIncludeType(e, edge_1.EdgeType.Argument))) .map(([t]) => t); for (const call of out) { queue.push([(0, node_id_1.recoverContent)(call, graph) ?? (0, node_id_1.recoverContent)(id, graph) ?? '', call]); } } return aliases; } function removeIdenticalDuplicates(collector) { for (const [, collected] of collector.store) { for (const [subkind, values] of collected) { const seen = new Set(); const newValues = values.filter(v => { const str = JSON.stringify(v); if (seen.has(str)) { return false; } seen.add(str); return true; }); collected.set(subkind, newValues); } } } function doesFilepathMatch(file, filter) { if (filter === undefined) { return true; } if (file === undefined) { return filter.includeUndefinedFiles ?? true; } return filter.filter(file); } function isParameterDefaultValue(nodeId, ast) { let node = ast.idMap.get(nodeId); while (node !== undefined) { if (node.info.role === "param-v" /* RoleInParent.ParameterDefaultValue */) { return true; } const nip = node.info.parent; node = nip ? ast.idMap.get(nip) : undefined; } return false; } /** * Multi-stage call context query resolve. * * 1. Resolve all calls in the DF graph that match the respective {@link DefaultCallContextQueryFormat#callName} regex. * 2. If there is an alias attached, consider all call traces. * 3. Identify their respective call targets, if {@link DefaultCallContextQueryFormat#callTargets} is set to be non-any. * This happens during the main resolution! * 4. Attach `linkTo` calls to the respective calls. */ async function executeCallContextQueries({ analyzer }, queries) { const dataflow = await analyzer.dataflow(); const ast = await analyzer.normalize(); /* omit performance page load */ const now = Date.now(); /* the node id and call targets if present */ const initialIdCollector = new two_layer_collector_1.TwoLayerCollector(); /* promote all strings to regex patterns */ const { promotedQueries, requiresCfg } = promoteQueryCallNames(queries); let cfg = undefined; if (requiresCfg) { cfg = await analyzer.controlflow(undefined, cfg_kind_1.CfgKind.Quick); } const calls = cfg ? (0, extract_cfg_1.getCallsInCfg)(cfg, dataflow.graph) : undefined; const queriesWhichWantAliases = promotedQueries.filter(q => q.includeAliases); for (const [nodeId, info] of dataflow.graph.verticesOfType(vertex_1.VertexType.FunctionCall)) { /* if we have a vertex, and we check for aliased calls, we want to know if we define this as desired! */ if (queriesWhichWantAliases.length > 0) { /* * yes, we make an expensive call target check, we can probably do a lot of optimization here, e.g., * by checking all of these queries would be satisfied otherwise, * in general, we first want a call to happen, i.e., trace the called targets of this! */ const targets = retrieveAllCallAliases(nodeId, dataflow.graph); for (const [l, ids] of targets.entries()) { for (const query of queriesWhichWantAliases) { if (query.callName(l)) { initialIdCollector.add(query.kind ?? '.', query.subkind ?? '.', (0, objects_1.compactRecord)({ id: nodeId, name: info.name, aliasRoots: ids })); } } } } const n = identifier_1.Identifier.getName(info.name); for (const query of promotedQueries.filter(q => !q.includeAliases && q.callName(n))) { const file = ast.idMap.get(nodeId)?.info.file; if (!doesFilepathMatch(file, query.fileFilter)) { continue; } let targets = undefined; if (query.callTargets) { targets = (0, identify_link_to_last_call_relation_1.satisfiesCallTargets)(info, dataflow.graph, query.callTargets); if (targets === 'no') { continue; } } if (isQuoted(nodeId, dataflow.graph)) { /* if the call is quoted, we do not want to link to it */ continue; } else if (query.ignoreParameterValues && isParameterDefaultValue(nodeId, ast)) { continue; } let linkedIds = undefined; if (cfg && 'linkTo' in query && query.linkTo !== undefined) { const linked = Array.isArray(query.linkTo) ? query.linkTo : [query.linkTo]; for (const link of linked) { /* if we have a linkTo query, we have to find the last call */ const linkTos = await (0, identify_link_to_relation_1.identifyLinkToRelation)(nodeId, analyzer, link, calls); if (linkTos) { linkedIds ??= new Set(); for (const l of linkTos) { if (link.attachLinkInfo) { linkedIds.add({ id: l, info: link.attachLinkInfo }); } else { linkedIds.add(l); } } } } } initialIdCollector.add(query.kind ?? '.', query.subkind ?? '.', (0, objects_1.compactRecord)({ id: nodeId, name: info.name, calls: targets, linkedIds: linkedIds ? Array.from(linkedIds) : undefined })); } } removeIdenticalDuplicates(initialIdCollector); return { '.meta': { timing: Date.now() - now, }, kinds: makeReport(initialIdCollector) }; } //# sourceMappingURL=call-context-query-executor.js.map