UNPKG

@eagleoutice/flowr

Version:

Static Dataflow Analyzer and Program Slicer for the R Programming Language

463 lines 22.4 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.processAssignmentLike = processAssignmentLike; exports.processAssignment = processAssignment; exports.markAsAssignment = markAsAssignment; const known_call_handling_1 = require("../known-call-handling"); const log_1 = require("../../../../../../util/log"); const unpack_argument_1 = require("../argument/unpack-argument"); const process_named_call_1 = require("../../../process-named-call"); const make_argument_1 = require("../argument/make-argument"); const type_1 = require("../../../../../../r-bridge/lang-4.x/ast/model/type"); const r_function_call_1 = require("../../../../../../r-bridge/lang-4.x/ast/model/nodes/r-function-call"); const logger_1 = require("../../../../../logger"); const identifier_1 = require("../../../../../environments/identifier"); const overwrite_1 = require("../../../../../environments/overwrite"); const retriever_1 = require("../../../../../../r-bridge/retriever"); const vertex_1 = require("../../../../../graph/vertex"); const define_1 = require("../../../../../environments/define"); const edge_1 = require("../../../../../graph/edge"); const resolve_by_name_1 = require("../../../../../environments/resolve-by-name"); const built_in_envir_utils_1 = require("./built-in-envir-utils"); const named_call_handling_1 = require("../named-call-handling"); const built_in_1 = require("../../../../../environments/built-in"); const unknown_side_effect_1 = require("../../../../../graph/unknown-side-effect"); const alias_tracking_1 = require("../../../../../eval/resolve/alias-tracking"); const r_value_1 = require("../../../../../eval/values/r-value"); const built_in_proc_name_1 = require("../../../../../environments/built-in-proc-name"); const built_in_new_env_1 = require("./built-in-new-env"); function toReplacementSymbol(target, prefix, superAssignment) { return { type: type_1.RType.Symbol, info: target.info, /* they are all mapped to `<-` in R, but we mark super as well */ content: identifier_1.Identifier.mapName(prefix, n => n + (superAssignment ? '<<-' : '<-')), lexeme: target.lexeme, location: target.location }; } function getEffectiveOrder(config, args) { return config.swapSourceAndTarget ? [args[1], args[0]] : args; } function findRootAccess(node) { let current = node; while (current.type === type_1.RType.Access) { current = current.accessed; } if (current.type === type_1.RType.Symbol) { return current; } else { return undefined; } } function tryReplacement(rootId, functionName, data, name, args) { const resolved = (0, resolve_by_name_1.resolveByName)(functionName.content, data.environment, identifier_1.ReferenceType.Function) ?? []; // yield for unsupported pass along! if (resolved.length !== 1 || resolved[0].type !== identifier_1.ReferenceType.BuiltInFunction) { return (0, process_named_call_1.processAsNamedCall)(functionName, data, name, args); } const info = built_in_1.BuiltInProcessorMapper[built_in_proc_name_1.BuiltInProcName.Replacement]({ type: type_1.RType.Symbol, info: functionName.info, content: name, lexeme: functionName.lexeme, location: functionName.location }, (0, make_argument_1.wrapArgumentsUnnamed)(args, data.completeAst.idMap), functionName.info.id, data, { ...resolved[0].config, assignRootId: rootId }); (0, named_call_handling_1.markAsOnlyBuiltIn)(info.graph, functionName.info.id); return info; } /** * In contrast to `processAssignment`, this function allows more flexible handling of assignment-like functions. */ function processAssignmentLike(name, /* we expect them to be ordered in the sense that we have (source, target): `<source> <- <target>` */ args, rootId, data, config) { const argsWithNames = new Map(); const argsWithoutNames = []; for (const arg of args) { const name = arg === r_function_call_1.EmptyArgument ? undefined : arg.name?.content; if (name === undefined) { argsWithoutNames.push(arg); } else { argsWithNames.set(name, arg); } } const source = argsWithNames.get(config.source.name) ?? (config.source.idx === undefined ? undefined : argsWithoutNames[config.source.idx]); const target = argsWithNames.get(config.target.name) ?? (config.target.idx === undefined ? undefined : argsWithoutNames[config.target.idx]); if (source && target) { args = [target, source]; } return processAssignment(name, args, rootId, data, { ...config, mayHaveMoreArgs: true }); } /** * Processes an assignment, i.e., `<target> <- <source>`. * Handling it as a function call \`&lt;-\` `(<target>, <source>)`. * This includes handling of replacement functions (e.g., `names(x) <- ...` as \`names&lt;-\` `(x, ...)`). */ function processAssignment(name, /* we expect them to be ordered in the sense that we have (source, target): `<source> <- <target>` */ args, rootId, data, config) { if (!config.mayHaveMoreArgs && args.length !== 2) { logger_1.dataflowLogger.warn(`Assignment ${identifier_1.Identifier.toString(name.content)} has something else than 2 arguments, skipping`); return (0, known_call_handling_1.processKnownFunctionCall)({ name, args, rootId, data, forceArgs: config.forceArgs, origin: 'default' }).information; } /* route into a custom environment when envir resolves to a tracked env variable */ if (config.environmentArg) { const routed = tryRouteToCustomEnv(name, args, rootId, data, config); if (routed !== undefined) { return routed; } } const effectiveArgs = getEffectiveOrder(config, args); const { target, source } = extractSourceAndTarget(effectiveArgs); if (target === undefined || source === undefined) { logger_1.dataflowLogger.warn(`Assignment ${identifier_1.Identifier.toString(name.content)} has an undefined target or source, skipping`); return (0, known_call_handling_1.processKnownFunctionCall)({ name, args, rootId, data, forceArgs: config.forceArgs, origin: 'default' }).information; } const { type, named } = target; if (type === type_1.RType.Symbol) { if (!config.targetVariable) { const res = (0, known_call_handling_1.processKnownFunctionCall)({ name, args, rootId, data, reverseOrder: !config.swapSourceAndTarget, forceArgs: config.forceArgs, origin: built_in_proc_name_1.BuiltInProcName.Assignment }); return processAssignmentToSymbol({ ...config, nameOfAssignmentFunction: name.content, source, targetId: target.info.id, args: getEffectiveOrder(config, res.processedArguments), rootId, data, information: res.information, }); } else { // try to resolve the variable first const n = (0, alias_tracking_1.resolveIdToValue)(target.info.id, { environment: data.environment, resolve: data.ctx.config.solver.variables, idMap: data.completeAst.idMap, full: true, ctx: data.ctx }); if (n.type === 'set' && n.elements.length === 1 && n.elements[0].type === 'string') { const val = n.elements[0].value; if ((0, r_value_1.isValue)(val)) { const res = (0, known_call_handling_1.processKnownFunctionCall)({ name, args, rootId, data, reverseOrder: !config.swapSourceAndTarget, forceArgs: config.forceArgs, origin: built_in_proc_name_1.BuiltInProcName.Assignment }); return processAssignmentToSymbol({ ...config, nameOfAssignmentFunction: name.content, source, targetId: target.info.id, targetName: val.str, args: getEffectiveOrder(config, res.processedArguments), rootId, data, information: res.information, }); } } } } else if (config.canBeReplacement && type === type_1.RType.FunctionCall && named) { /* as replacement functions take precedence over the lhs fn-call (i.e., `names(x) <- ...` is independent from the definition of `names`), we do not have to process the call */ logger_1.dataflowLogger.debug(`Assignment ${identifier_1.Identifier.toString(name.content)} has a function call as target ==> replacement function ${target.lexeme}`); const replacement = toReplacementSymbol(target, target.functionName.content, config.superAssignment ?? false); return tryReplacement(rootId, replacement, data, replacement.content, [...target.arguments, source]); } else if (config.canBeReplacement && type === type_1.RType.Access) { logger_1.dataflowLogger.debug(`Assignment ${identifier_1.Identifier.toString(name.content)} has an access-type node as target ==> replacement function ${target.lexeme}`); const replacement = toReplacementSymbol(target, target.operator, config.superAssignment ?? false); const envRouted = tryRouteDollarEnvAssign(rootId, data, config, target, source, replacement); if (envRouted !== undefined) { return envRouted; } return tryReplacement(rootId, replacement, data, replacement.content, [(0, make_argument_1.toUnnamedArgument)(target.accessed, data.completeAst.idMap), ...target.access, source]); } else if (type === type_1.RType.Access) { const rootArg = findRootAccess(target); if (rootArg) { const res = (0, known_call_handling_1.processKnownFunctionCall)({ name, args: [rootArg, source], rootId, data, reverseOrder: !config.swapSourceAndTarget, forceArgs: config.forceArgs, origin: built_in_proc_name_1.BuiltInProcName.Assignment }); return processAssignmentToSymbol({ ...config, nameOfAssignmentFunction: name.content, source, targetId: rootArg.info.id, args: getEffectiveOrder(config, res.processedArguments), rootId, data, information: res.information, }); } } else if (type === type_1.RType.String) { return processAssignmentToString(target, args, name, rootId, data, config, source); } logger_1.dataflowLogger.warn(`Assignment ${identifier_1.Identifier.toString(name.content)} has an unknown target type ${target.type} => unknown impact`); const info = (0, known_call_handling_1.processKnownFunctionCall)({ name, args: effectiveArgs, rootId, data, forceArgs: config.forceArgs, origin: built_in_proc_name_1.BuiltInProcName.Assignment }).information; (0, unknown_side_effect_1.handleUnknownSideEffect)(info.graph, info.environment, rootId); return info; } function extractSourceAndTarget(args) { const source = (0, unpack_argument_1.unpackArg)(args[1]); const target = (0, unpack_argument_1.unpackArg)(args[0]); return { source, target }; } /** * Promotes the ingoing/unknown references of target (an assignment) to definitions */ function produceWrittenNodes(rootId, target, referenceType, data, makeMaybe, value) { const written = []; for (const refs of [target.in, target.unknownReferences]) { for (const ref of refs) { written.push({ nodeId: ref.nodeId, name: ref.name, type: referenceType, definedAt: rootId, cds: data.cds ?? (makeMaybe ? [] : undefined), value }); } } return written; } function processAssignmentToString(target, args, name, rootId, data, config, source) { const symbol = { type: type_1.RType.Symbol, info: target.info, content: (0, retriever_1.removeRQuotes)(target.lexeme), lexeme: target.lexeme, location: target.location, }; // treat first argument to Symbol; include extra args (e.g. envir=) so they get graph vertices const extraArgs = config.mayHaveMoreArgs ? Array.from(args).slice(2) : []; const mappedArgs = config.swapSourceAndTarget ? [args[0], { ...args[1], value: symbol }, ...extraArgs] : [{ ...args[0], value: symbol }, args[1], ...extraArgs]; const res = (0, known_call_handling_1.processKnownFunctionCall)({ name, args: mappedArgs, rootId, data, reverseOrder: !config.swapSourceAndTarget, forceArgs: config.forceArgs, origin: built_in_proc_name_1.BuiltInProcName.Assignment }); return processAssignmentToSymbol({ ...config, nameOfAssignmentFunction: name.content, source, targetId: symbol.info.id, args: getEffectiveOrder(config, res.processedArguments), rootId, data, information: res.information }); } function checkTargetReferenceType(sourceInfo, fnModes) { const vert = sourceInfo.graph.getVertex(sourceInfo.entryPoint); switch (vert?.tag) { case vertex_1.VertexType.FunctionDefinition: if (fnModes && fnModes.length > 0) { vert.mode ??= []; for (const m of fnModes) { if (!vert.mode.includes(m)) { vert.mode.push(m); } } } return identifier_1.ReferenceType.Function; case vertex_1.VertexType.Use: case vertex_1.VertexType.FunctionCall: return identifier_1.ReferenceType.Unknown; default: return identifier_1.ReferenceType.Variable; } } /** * Returns `true` when the entry-point of `sourceInfo` is a call to a `new.env`-family function. * Used by {@link processAssignmentToSymbol} to attach an initial {@link InGraphIdentifierDefinition#envState}. */ function isEnvCreatorSource(sourceInfo) { const vert = sourceInfo.graph.getVertex(sourceInfo.entryPoint); return (0, vertex_1.isFunctionCallVertex)(vert) && vert.origin.includes(built_in_proc_name_1.BuiltInProcName.NewEnv); } /** * When `e$x <- val` and `e` holds a tracked {@link InGraphIdentifierDefinition#envState}, * adds the field `x` into that envState instead of redefining the whole `e` object. * Returns `undefined` when routing is not applicable. */ function tryRouteDollarEnvAssign(rootId, data, config, target, source, replacement) { if (target.operator !== '$' || target.accessed.type !== type_1.RType.Symbol) { return undefined; } const envirResolution = (0, built_in_envir_utils_1.resolveSymbolToEnvir)(target.accessed.content, target.accessed.info.id, data); if (!envirResolution) { return undefined; } const fieldNode = (0, unpack_argument_1.unpackArg)(target.access[0]); if (!fieldNode) { return undefined; } const fieldName = (fieldNode.type === type_1.RType.String ? fieldNode.content.str : fieldNode.lexeme); /* run normal $<- replacement to get correct graph structure (reads of e, x, source) */ const normalResult = tryReplacement(rootId, replacement, data, replacement.content, [(0, make_argument_1.toUnnamedArgument)(target.accessed, data.completeAst.idMap), ...target.access, source]); const fieldDef = { type: identifier_1.ReferenceType.Variable, name: fieldName, nodeId: fieldNode.info.id, definedAt: rootId, cds: data.cds }; const newEnvState = (0, define_1.define)(fieldDef, false, envirResolution.envDef.envState); const updatedEnvDef = { ...envirResolution.envDef, definedAt: rootId, envState: newEnvState }; const strippedEnv = { current: normalResult.environment.current.removeAll([{ name: envirResolution.envDef.name }]), level: normalResult.environment.level }; return { ...normalResult, environment: (0, define_1.define)(updatedEnvDef, false, strippedEnv) }; } /** * When `config.environmentArg` identifies an `envir`-like parameter (e.g. `'envir'` for `assign`) * and that argument resolves to a variable with a tracked {@link InGraphIdentifierDefinition#envState}, * this function routes the written definitions into that custom environment instead of the current * global scope. * @returns `undefined` if routing is not possible */ function tryRouteToCustomEnv(name, args, rootId, data, config) { const resolution = (0, built_in_envir_utils_1.resolveEnvirArg)(args, data, config.environmentArg); if (!resolution) { return undefined; } /* run the normal assignment path to get the correct graph structure */ const normalResult = processAssignment(name, args, rootId, data, { ...config, environmentArg: undefined // prevent re-entry }); normalResult.graph.addEdge(rootId, resolution.envirNodeId, edge_1.EdgeType.Reads); /* pass rootId as definedAt so only defs made at this call site are routed */ return (0, built_in_envir_utils_1.routeWrittenToCustomEnv)(normalResult, resolution.envDef, rootId, rootId); } /** * Consider a call like `x <- v` * @param information - the information to define the assignment within * @param nodeToDefine - `x` * @param sourceIds - `v` * @param rootIdOfAssignment - `<-` * @param data - The dataflow analysis fold backpack * @param assignmentConfig - configuration for the assignment processing */ function markAsAssignment(information, nodeToDefine, sourceIds, rootIdOfAssignment, data, assignmentConfig) { information.environment = (0, define_1.define)(nodeToDefine, assignmentConfig?.superAssignment, information.environment); information.graph.setDefinitionOfVertex(nodeToDefine, sourceIds); const nid = nodeToDefine.nodeId; if (!assignmentConfig?.quoteSource) { for (const sourceId of sourceIds) { information.graph.addEdge(nid, sourceId, edge_1.EdgeType.DefinedBy); } } information.graph.addEdge(nid, rootIdOfAssignment, edge_1.EdgeType.DefinedBy); // kinda dirty, but we have to remove existing read edges for the symbol, added by the child const out = information.graph.outgoingEdges(nodeToDefine.nodeId); for (const [id, edge] of (out ?? [])) { edge.types &= ~edge_1.EdgeType.Reads; if (edge.types === 0) { out?.delete(id); } } } /** * Helper function whenever it is known that the _target_ of an assignment is a (single) symbol (i.e. `x <- ...`, but not `names(x) <- ...`). */ function processAssignmentToSymbol(config) { const { nameOfAssignmentFunction, source, args: [targetArg, sourceArg], targetId, targetName, rootId, data, information, makeMaybe, quoteSource } = config; const referenceType = checkTargetReferenceType(sourceArg, config.modesForFn); const useSourceIds = [sourceArg.graph.hasVertex(source.info.id) ? source.info.id : sourceArg.entryPoint]; const aliases = (0, alias_tracking_1.getAliases)(useSourceIds, information.graph, information.environment); const writeNodes = targetName ? [{ nodeId: targetId, name: targetName, type: referenceType, definedAt: rootId, cds: data.cds ?? (makeMaybe ? [] : undefined), value: aliases }] : produceWrittenNodes(rootId, targetArg, referenceType, data, makeMaybe ?? false, aliases); if (data.ctx.config.solver.trackEnvironments) { let envState; if (isEnvCreatorSource(sourceArg)) { envState = (0, built_in_new_env_1.createFreshEnvState)(data, sourceArg); } else if (source.type === type_1.RType.Symbol) { const defs = (0, resolve_by_name_1.resolveByName)(source.content, data.environment, identifier_1.ReferenceType.Variable); envState = defs?.find(d => d.envState !== undefined)?.envState; } if (envState) { for (let i = 0; i < writeNodes.length; i++) { writeNodes[i] = { ...writeNodes[i], envState }; } } } if (writeNodes.length !== 1 && log_1.log.settings.minLevel >= 4 /* LogLevel.Warn */) { log_1.log.warn(`Unexpected write number in assignment: ${JSON.stringify(writeNodes)}`); } // we drop the first arg which we use to pass along arguments :D const readFromSourceWritten = sourceArg.out.slice(1); const readTargets = [ { nodeId: rootId, name: nameOfAssignmentFunction, cds: data.cds, type: identifier_1.ReferenceType.Function } ].concat(sourceArg.unknownReferences, sourceArg.in, targetName ? targetArg.in : targetArg.in.filter(i => i.nodeId !== targetId), readFromSourceWritten); information.environment = (0, overwrite_1.overwriteEnvironment)(sourceArg.environment, targetArg.environment); // install assigned variables in environment for (const write of writeNodes) { markAsAssignment(information, write, useSourceIds, rootId, data, config); } information.graph.addEdge(rootId, targetArg.entryPoint, edge_1.EdgeType.Returns); if (quoteSource) { information.graph.addEdge(rootId, source.info.id, edge_1.EdgeType.NonStandardEvaluation); } else { // we read the source information.graph.addEdge(rootId, source.info.id, edge_1.EdgeType.Reads); } return { environment: information.environment, graph: information.graph, exitPoints: information.exitPoints, hooks: information.hooks, unknownReferences: [], entryPoint: rootId, in: readTargets, out: writeNodes.concat(readFromSourceWritten), }; } //# sourceMappingURL=built-in-assignment.js.map