UNPKG

@eagleoutice/flowr

Version:

Static Dataflow Analyzer and Program Slicer for the R Programming Language

227 lines 11.9 kB
"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.setSourceProvider = setSourceProvider; exports.inferWdFromScript = inferWdFromScript; exports.findSource = findSource; exports.processSourceCall = processSourceCall; exports.sourceRequest = sourceRequest; exports.standaloneSourceFile = standaloneSourceFile; const processor_1 = require("../../../../../processor"); const info_1 = require("../../../../../info"); const config_1 = require("../../../../../../config"); const known_call_handling_1 = require("../known-call-handling"); const retriever_1 = require("../../../../../../r-bridge/retriever"); const decorate_1 = require("../../../../../../r-bridge/lang-4.x/ast/model/processing/decorate"); const r_function_call_1 = require("../../../../../../r-bridge/lang-4.x/ast/model/nodes/r-function-call"); const logger_1 = require("../../../../../logger"); const type_1 = require("../../../../../../r-bridge/lang-4.x/ast/model/type"); const overwrite_1 = require("../../../../../environments/overwrite"); const log_1 = require("../../../../../../util/log"); const fs_1 = __importDefault(require("fs")); const parser_1 = require("../../../../../../r-bridge/lang-4.x/ast/parser/json/parser"); const shell_executor_1 = require("../../../../../../r-bridge/shell-executor"); const resolve_by_name_1 = require("../../../../../environments/resolve-by-name"); const assert_1 = require("../../../../../../util/assert"); const path_1 = __importDefault(require("path")); let sourceProvider = (0, retriever_1.requestProviderFromFile)(); function setSourceProvider(provider) { sourceProvider = provider; } function inferWdFromScript(option, referenceChain) { switch (option) { case config_1.InferWorkingDirectory.MainScript: return referenceChain[0]?.request === 'file' ? [platformDirname(referenceChain[0].content)] : []; case config_1.InferWorkingDirectory.ActiveScript: return referenceChain[referenceChain.length - 1] ? [platformDirname(referenceChain[referenceChain.length - 1].content)] : []; case config_1.InferWorkingDirectory.AnyScript: return referenceChain.filter(e => e.request === 'file').map(e => platformDirname(e.content)); case config_1.InferWorkingDirectory.No: default: return []; } } const AnyPathSeparator = /[/\\]/g; function platformBasename(p) { const normalized = p.replaceAll(path_1.default.win32.sep, path_1.default.posix.sep); return path_1.default.posix.basename(normalized); } function platformDirname(p) { const normalized = p.replaceAll(path_1.default.win32.sep, path_1.default.posix.sep); return path_1.default.posix.dirname(normalized); } function returnPlatformPath(p) { return p.replaceAll(AnyPathSeparator, path_1.default.sep); } /** * Tries to find sourced by a source request and returns the first path that exists * @param seed - the path originally requested in the `source` call * @param data - more information on the loading context */ function findSource(seed, data) { const config = (0, config_1.getConfig)().solver.resolveSource; const capitalization = config?.ignoreCapitalization ?? false; const explorePaths = [ ...(config?.searchPath ?? []), ...(inferWdFromScript(config?.inferWorkingDirectory ?? config_1.InferWorkingDirectory.No, data.referenceChain)) ]; const tryPaths = [seed]; switch (config?.dropPaths ?? config_1.DropPathsOption.No) { case config_1.DropPathsOption.Once: { const first = platformBasename(seed); tryPaths.push(first); break; } case config_1.DropPathsOption.All: { const paths = platformDirname(seed).split(AnyPathSeparator); const basename = platformBasename(seed); if (paths.length === 1 && paths[0] === '.') { break; } for (let i = 0; i < paths.length; i++) { tryPaths.push(path_1.default.join(...paths.slice(i), basename)); } break; } default: case config_1.DropPathsOption.No: break; } const found = []; for (const explore of [undefined, ...explorePaths]) { for (const tryPath of tryPaths) { const effectivePath = explore ? path_1.default.join(explore, tryPath) : tryPath; const get = sourceProvider.exists(effectivePath, capitalization) ?? sourceProvider.exists(returnPlatformPath(effectivePath), capitalization); if (get && !found.includes(effectivePath)) { found.push(returnPlatformPath(effectivePath)); } } } log_1.log.info(`Found sourced file ${JSON.stringify(seed)} at ${JSON.stringify(found)}`); return found; } function processSourceCall(name, args, rootId, data, config) { if (args.length !== 1) { logger_1.dataflowLogger.warn(`Expected exactly one argument for source currently, but got ${args.length} instead, skipping`); return (0, known_call_handling_1.processKnownFunctionCall)({ name, args, rootId, data }).information; } const information = config.includeFunctionCall ? (0, known_call_handling_1.processKnownFunctionCall)({ name, args, rootId, data }).information : (0, info_1.initializeCleanDataflowInformation)(rootId, data); const sourceFileArgument = args[0]; if (!config.forceFollow && (0, config_1.getConfig)().ignoreSourceCalls) { (0, log_1.expensiveTrace)(logger_1.dataflowLogger, () => `Skipping source call ${JSON.stringify(sourceFileArgument)} (disabled in config file)`); information.graph.markIdForUnknownSideEffects(rootId); return information; } let sourceFile; if (sourceFileArgument !== r_function_call_1.EmptyArgument && sourceFileArgument?.value?.type === type_1.RType.String) { sourceFile = [(0, retriever_1.removeRQuotes)(sourceFileArgument.lexeme)]; } else if (sourceFileArgument !== r_function_call_1.EmptyArgument) { sourceFile = (0, resolve_by_name_1.resolveValueOfVariable)(sourceFileArgument.value?.lexeme, data.environment, data.completeAst.idMap)?.map(x => { if (typeof x === 'object' && x && 'str' in x) { return x.str; } else { return undefined; } }).filter(assert_1.isNotUndefined); } if (sourceFile && sourceFile.length === 1) { const path = (0, retriever_1.removeRQuotes)(sourceFile[0]); let filepath = path ? findSource(path, data) : path; if (Array.isArray(filepath)) { filepath = filepath?.[0]; } if (filepath !== undefined) { const request = sourceProvider.createRequest(filepath); // check if the sourced file has already been dataflow analyzed, and if so, skip it const limit = (0, config_1.getConfig)().solver.resolveSource?.repeatedSourceLimit ?? 0; const findCount = data.referenceChain.filter(e => e.request === request.request && e.content === request.content).length; if (findCount > limit) { logger_1.dataflowLogger.warn(`Found cycle (>=${limit + 1}) in dataflow analysis for ${JSON.stringify(request)}: ${JSON.stringify(data.referenceChain)}, skipping further dataflow analysis`); information.graph.markIdForUnknownSideEffects(rootId); return information; } return sourceRequest(rootId, request, data, information, (0, decorate_1.sourcedDeterministicCountingIdGenerator)((findCount > 0 ? findCount + '::' : '') + path, name.location)); } } (0, log_1.expensiveTrace)(logger_1.dataflowLogger, () => `Non-constant argument ${JSON.stringify(sourceFile)} for source is currently not supported, skipping`); information.graph.markIdForUnknownSideEffects(rootId); return information; } function sourceRequest(rootId, request, data, information, getId) { if (request.request === 'file') { /* check if the file exists and if not, fail */ if (!fs_1.default.existsSync(request.content)) { logger_1.dataflowLogger.warn(`Failed to analyze sourced file ${JSON.stringify(request)}: file does not exist`); information.graph.markIdForUnknownSideEffects(rootId); return information; } } // parse, normalize and dataflow the sourced file let normalized; let dataflow; try { const file = request.request === 'file' ? request.content : undefined; const parsed = (!data.parser.async ? data.parser : new shell_executor_1.RShellExecutor()).parse(request); normalized = (typeof parsed !== 'string' ? (0, parser_1.normalizeTreeSitter)({ parsed }, getId, file) : (0, parser_1.normalize)({ parsed }, getId, file)); dataflow = (0, processor_1.processDataflowFor)(normalized.ast, { ...data, currentRequest: request, environment: information.environment, referenceChain: [...data.referenceChain, request] }); } catch (e) { logger_1.dataflowLogger.error(`Failed to analyze sourced file ${JSON.stringify(request)}, skipping: ${e.message}`); logger_1.dataflowLogger.error(e.stack); information.graph.markIdForUnknownSideEffects(rootId); return information; } // take the entry point as well as all the written references, and give them a control dependency to the source call to show that they are conditional if (dataflow.graph.hasVertex(dataflow.entryPoint)) { dataflow.graph.addControlDependency(dataflow.entryPoint, rootId, true); } for (const out of dataflow.out) { dataflow.graph.addControlDependency(out.nodeId, rootId, true); } dataflow.graph.addFile(request.request === 'file' ? request.content : '<inline>'); // update our graph with the sourced file's information const newInformation = { ...information }; newInformation.environment = (0, overwrite_1.overwriteEnvironment)(information.environment, dataflow.environment); newInformation.graph.mergeWith(dataflow.graph); // this can be improved, see issue #628 for (const [k, v] of normalized.idMap) { data.completeAst.idMap.set(k, v); } return { ...newInformation, in: newInformation.in.concat(dataflow.in), out: newInformation.out.concat(dataflow.out), unknownReferences: newInformation.unknownReferences.concat(dataflow.unknownReferences), exitPoints: dataflow.exitPoints }; } function standaloneSourceFile(inputRequest, data, uniqueSourceId, information) { const path = inputRequest.request === 'file' ? inputRequest.content : '-inline-'; /* this way we can still pass content */ const request = inputRequest.request === 'file' ? sourceProvider.createRequest(inputRequest.content) : inputRequest; // check if the sourced file has already been dataflow analyzed, and if so, skip it if (data.referenceChain.find(e => e.request === request.request && e.content === request.content)) { logger_1.dataflowLogger.info(`Found loop in dataflow analysis for ${JSON.stringify(request)}: ${JSON.stringify(data.referenceChain)}, skipping further dataflow analysis`); information.graph.markIdForUnknownSideEffects(uniqueSourceId); return information; } return sourceRequest(uniqueSourceId, request, { ...data, currentRequest: request, environment: information.environment, referenceChain: [...data.referenceChain, inputRequest] }, information, (0, decorate_1.deterministicPrefixIdGenerator)(path + '::' + uniqueSourceId)); } //# sourceMappingURL=built-in-source.js.map