UNPKG

@eagleoutice/flowr

Version:

Static Dataflow Analyzer and Program Slicer for the R Programming Language

288 lines 14.8 kB
"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.inferWdFromScript = inferWdFromScript; exports.platformBasename = platformBasename; exports.platformDirname = platformDirname; exports.findSource = findSource; exports.processSourceCall = processSourceCall; exports.sourceRequest = sourceRequest; exports.standaloneSourceFile = standaloneSourceFile; const processor_1 = require("../../../../../processor"); const info_1 = require("../../../../../info"); const config_1 = require("../../../../../../config"); const known_call_handling_1 = require("../known-call-handling"); const retriever_1 = require("../../../../../../r-bridge/retriever"); const decorate_1 = require("../../../../../../r-bridge/lang-4.x/ast/model/processing/decorate"); const r_function_call_1 = require("../../../../../../r-bridge/lang-4.x/ast/model/nodes/r-function-call"); const logger_1 = require("../../../../../logger"); const type_1 = require("../../../../../../r-bridge/lang-4.x/ast/model/type"); const overwrite_1 = require("../../../../../environments/overwrite"); const log_1 = require("../../../../../../util/log"); const parser_1 = require("../../../../../../r-bridge/lang-4.x/ast/parser/json/parser"); const shell_executor_1 = require("../../../../../../r-bridge/shell-executor"); const assert_1 = require("../../../../../../util/assert"); const path_1 = __importDefault(require("path")); const general_1 = require("../../../../../eval/values/general"); const r_value_1 = require("../../../../../eval/values/r-value"); const unknown_side_effect_1 = require("../../../../../graph/unknown-side-effect"); const alias_tracking_1 = require("../../../../../eval/resolve/alias-tracking"); const edge_1 = require("../../../../../graph/edge"); const built_in_proc_name_1 = require("../../../../../environments/built-in-proc-name"); /** * Infers working directories based on the given option and reference chain */ function inferWdFromScript(option, referenceChain) { switch (option) { case config_1.InferWorkingDirectory.MainScript: return referenceChain[0] ? [platformDirname(referenceChain[0])] : []; case config_1.InferWorkingDirectory.ActiveScript: { const secondToLast = referenceChain.at(-1); return secondToLast ? [platformDirname(secondToLast)] : []; } case config_1.InferWorkingDirectory.AnyScript: return referenceChain.filter(assert_1.isNotUndefined).map(e => platformDirname(e)); case config_1.InferWorkingDirectory.No: default: return []; } } const AnyPathSeparator = /[/\\]/g; /** * Return the basename of a path in a platform-agnostic way * @see {@link platformDirname} - for the dirname counterpart */ function platformBasename(p) { const normalized = p.replaceAll(path_1.default.win32.sep, path_1.default.posix.sep); return path_1.default.posix.basename(normalized); } /** * Return the dirname of a path in a platform-agnostic way */ function platformDirname(p) { const normalized = p.replaceAll(path_1.default.win32.sep, path_1.default.posix.sep); return path_1.default.posix.dirname(normalized); } function returnPlatformPath(p) { return p.replaceAll(AnyPathSeparator, path_1.default.sep); } function applyReplacements(path, replacements) { const results = []; for (const replacement of replacements) { const newPath = Object.entries(replacement).reduce((acc, [key, value]) => acc.replaceAll(new RegExp(key, 'g'), value), path); results.push(newPath); } return results; } /** * Tries to find sourced by a source request and returns the first path that exists * @param resolveSource - options for lax file sourcing * @param seed - the path originally requested in the `source` call * @param data - more information on the loading context */ function findSource(resolveSource, seed, data) { const capitalization = resolveSource?.ignoreCapitalization ?? false; const explorePaths = (resolveSource?.searchPath ?? []).concat(inferWdFromScript(resolveSource?.inferWorkingDirectory ?? config_1.InferWorkingDirectory.No, data.referenceChain)); let tryPaths = [seed]; switch (resolveSource?.dropPaths ?? config_1.DropPathsOption.No) { case config_1.DropPathsOption.Once: { const first = platformBasename(seed); tryPaths.push(first); break; } case config_1.DropPathsOption.All: { const paths = platformDirname(seed).split(AnyPathSeparator); const basename = platformBasename(seed); tryPaths.push(basename); if (paths.length === 1 && paths[0] === '.') { break; } for (let i = 0; i < paths.length; i++) { tryPaths.push(path_1.default.join(...paths.slice(i), basename)); } break; } case config_1.DropPathsOption.No: default: break; } if (resolveSource?.applyReplacements) { const r = resolveSource.applyReplacements; tryPaths = tryPaths.flatMap(t => applyReplacements(t, r)); } const found = []; for (const explore of [undefined, ...explorePaths]) { for (const tryPath of tryPaths) { const effectivePath = explore ? path_1.default.join(explore, tryPath) : tryPath; const context = data.ctx.files; const get = context.exists(effectivePath, capitalization) ?? context.exists(returnPlatformPath(effectivePath), capitalization); if (get && !found.includes(returnPlatformPath(get))) { found.push(returnPlatformPath(get)); } } } if (log_1.log.settings.minLevel >= 3 /* LogLevel.Info */) { log_1.log.info(`Found sourced file ${JSON.stringify(seed)} at ${JSON.stringify(found)}`); } return found; } /** * Processes a named function call (i.e., not an anonymous function) */ function processSourceCall(name, args, rootId, data, config) { if (args.length !== 1) { logger_1.dataflowLogger.warn(`Expected exactly one argument for source currently, but got ${args.length} instead, skipping`); return (0, known_call_handling_1.processKnownFunctionCall)({ name, args, rootId, data, origin: 'default' }).information; } const information = config.includeFunctionCall ? (0, known_call_handling_1.processKnownFunctionCall)({ name, args, rootId, data, origin: built_in_proc_name_1.BuiltInProcName.Source }).information : info_1.DataflowInformation.initialize(rootId, data); const sourceFileArgument = args[0]; if (!config.forceFollow && data.ctx.config.ignoreSourceCalls) { (0, log_1.expensiveTrace)(logger_1.dataflowLogger, () => `Skipping source call ${JSON.stringify(sourceFileArgument)} (disabled in config file)`); (0, unknown_side_effect_1.handleUnknownSideEffect)(information.graph, information.environment, rootId); return information; } let sourceFile; if (sourceFileArgument !== r_function_call_1.EmptyArgument && sourceFileArgument?.value?.type === type_1.RType.String) { sourceFile = [(0, retriever_1.removeRQuotes)(sourceFileArgument.lexeme)]; } else if (sourceFileArgument !== r_function_call_1.EmptyArgument) { const resolved = (0, general_1.valueSetGuard)((0, alias_tracking_1.resolveIdToValue)(sourceFileArgument.info.id, { environment: data.environment, idMap: data.completeAst.idMap, resolve: data.ctx.config.solver.variables, ctx: data.ctx })); sourceFile = resolved?.elements.map(r => r.type === 'string' && (0, r_value_1.isValue)(r.value) ? r.value.str : undefined).filter(assert_1.isNotUndefined); } if (sourceFile?.length === 1) { const path = (0, retriever_1.removeRQuotes)(sourceFile[0]); let filepath = path ? findSource(data.ctx.config.solver.resolveSource, path, data) : path; if (!Array.isArray(filepath)) { filepath = filepath ? [filepath] : undefined; } if (filepath !== undefined && filepath.length > 0) { let result = information; const origCds = data.cds?.slice() ?? []; for (const f of filepath) { // check if the sourced file has already been dataflow analyzed, and if so, skip it const limit = data.ctx.config.solver.resolveSource?.repeatedSourceLimit ?? 0; const findCount = data.referenceChain.filter(e => e !== undefined && f === e).length; if (findCount > limit) { logger_1.dataflowLogger.warn(`Found cycle (>=${limit + 1}) in dataflow analysis for ${JSON.stringify(filepath)}: ${JSON.stringify(data.referenceChain)}, skipping further dataflow analysis`); (0, unknown_side_effect_1.handleUnknownSideEffect)(result.graph, result.environment, rootId); continue; } if (filepath.length > 1) { data = { ...data, cds: [...origCds, { id: rootId, when: true, file: f }] }; } result = sourceRequest(rootId, { request: 'file', content: f }, data, result, true, (0, decorate_1.sourcedDeterministicCountingIdGenerator)((findCount > 0 ? findCount + '::' : '') + f, name.location)); } return result; } } (0, log_1.expensiveTrace)(logger_1.dataflowLogger, () => `Non-constant argument ${JSON.stringify(sourceFile)} for source is currently not supported, skipping`); (0, unknown_side_effect_1.handleUnknownSideEffect)(information.graph, information.environment, rootId); return information; } /** * Processes a source request with the given dataflow processor information and existing dataflow information * Otherwise, this can be an {@link RProjectFile} representing a standalone source file */ function sourceRequest(rootId, request, data, information, makeMaybe, getId) { // parse, normalize and dataflow the sourced file let dataflow; let fst; let filePath; if ('root' in request) { fst = request; filePath = request.filePath; } else { const textRequest = data.ctx.files.resolveRequest(request); if (textRequest === undefined && request.request === 'file') { // if translation failed there is nothing we can do!! logger_1.dataflowLogger.warn(`Failed to analyze sourced file ${JSON.stringify(request)}: file does not exist`); (0, unknown_side_effect_1.handleUnknownSideEffect)(information.graph, information.environment, rootId); return information; } else { (0, assert_1.guard)(textRequest !== undefined, `Expected text request to be defined for sourced file ${JSON.stringify(request)}`); } const parsed = (!data.parser.async ? data.parser : new shell_executor_1.RShellExecutor()).parse(textRequest.r); const normalized = (typeof parsed !== 'string' ? (0, parser_1.normalizeTreeSitter)({ files: [{ parsed, filePath: textRequest.path }] }, getId, data.ctx.config) : (0, parser_1.normalize)({ files: [{ parsed, filePath: textRequest.path }] }, getId)); fst = normalized.ast.files[0]; // this can be improved, see issue #628 for (const [k, v] of normalized.idMap) { data.completeAst.idMap.set(k, v); } // add to the main ast if (!data.completeAst.ast.files.some(f => f.filePath === fst.filePath)) { data.completeAst.ast.files.push(fst); } filePath = textRequest.path; } try { dataflow = (0, processor_1.processDataflowFor)(fst.root, { ...data, environment: information.environment, referenceChain: [...data.referenceChain, fst.filePath] }); } catch (e) { logger_1.dataflowLogger.error(`Failed to analyze sourced file ${JSON.stringify(request)}, skipping: ${e.message}`); logger_1.dataflowLogger.error(e.stack); (0, unknown_side_effect_1.handleUnknownSideEffect)(information.graph, information.environment, rootId); return information; } // take the entry point as well as all the written references, and give them a control dependency to the source call to show that they are conditional if (!String(rootId).startsWith('file-')) { if (makeMaybe) { if (dataflow.graph.hasVertex(dataflow.entryPoint)) { dataflow.graph.addControlDependency(dataflow.entryPoint, rootId, true); } for (const out of dataflow.out) { dataflow.graph.addControlDependency(out.nodeId, rootId, true); } } else { if (dataflow.graph.hasVertex(dataflow.entryPoint)) { dataflow.graph.addEdge(dataflow.entryPoint, rootId, edge_1.EdgeType.Reads); } for (const out of dataflow.out) { dataflow.graph.addEdge(out.nodeId, rootId, edge_1.EdgeType.Reads); } } } data.ctx.files.addConsideredFile(filePath ?? '<inline>'); // update our graph with the sourced file's information return { ...information, environment: (0, overwrite_1.overwriteEnvironment)(information.environment, dataflow.environment), graph: information.graph.mergeWith(dataflow.graph), in: information.in.concat(dataflow.in), out: information.out.concat(dataflow.out), unknownReferences: information.unknownReferences.concat(dataflow.unknownReferences), exitPoints: dataflow.exitPoints }; } /** * Processes a standalone source file (i.e., not from a source function call) */ function standaloneSourceFile(idx, file, data, information) { // check if the sourced file has already been dataflow analyzed, and if so, skip it if (data.referenceChain.some(e => e !== undefined && e === file.filePath)) { logger_1.dataflowLogger.info(`Found loop in dataflow analysis for ${JSON.stringify(file.filePath)}: ${JSON.stringify(data.referenceChain)}, skipping further dataflow analysis`); (0, unknown_side_effect_1.handleUnknownSideEffect)(information.graph, information.environment, file.root.info.id); return information; } return sourceRequest('file-' + idx, file, { ...data, environment: information.environment, referenceChain: [...data.referenceChain, file.filePath] }, information, false); } //# sourceMappingURL=built-in-source.js.map