@eagleoutice/flowr
Version:
Static Dataflow Analyzer and Program Slicer for the R Programming Language
239 lines • 13 kB
JavaScript
;
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.setSourceProvider = setSourceProvider;
exports.inferWdFromScript = inferWdFromScript;
exports.findSource = findSource;
exports.processSourceCall = processSourceCall;
exports.sourceRequest = sourceRequest;
exports.standaloneSourceFile = standaloneSourceFile;
const processor_1 = require("../../../../../processor");
const info_1 = require("../../../../../info");
const config_1 = require("../../../../../../config");
const known_call_handling_1 = require("../known-call-handling");
const retriever_1 = require("../../../../../../r-bridge/retriever");
const decorate_1 = require("../../../../../../r-bridge/lang-4.x/ast/model/processing/decorate");
const r_function_call_1 = require("../../../../../../r-bridge/lang-4.x/ast/model/nodes/r-function-call");
const logger_1 = require("../../../../../logger");
const type_1 = require("../../../../../../r-bridge/lang-4.x/ast/model/type");
const overwrite_1 = require("../../../../../environments/overwrite");
const log_1 = require("../../../../../../util/log");
const fs_1 = __importDefault(require("fs"));
const parser_1 = require("../../../../../../r-bridge/lang-4.x/ast/parser/json/parser");
const shell_executor_1 = require("../../../../../../r-bridge/shell-executor");
const assert_1 = require("../../../../../../util/assert");
const path_1 = __importDefault(require("path"));
const general_1 = require("../../../../../eval/values/general");
const r_value_1 = require("../../../../../eval/values/r-value");
const unknown_side_effect_1 = require("../../../../../graph/unknown-side-effect");
const alias_tracking_1 = require("../../../../../eval/resolve/alias-tracking");
let sourceProvider = (0, retriever_1.requestProviderFromFile)();
function setSourceProvider(provider) {
sourceProvider = provider;
}
function inferWdFromScript(option, referenceChain) {
switch (option) {
case config_1.InferWorkingDirectory.MainScript:
return referenceChain[0]?.request === 'file' ? [platformDirname(referenceChain[0].content)] : [];
case config_1.InferWorkingDirectory.ActiveScript:
return referenceChain[referenceChain.length - 1] ? [platformDirname(referenceChain[referenceChain.length - 1].content)] : [];
case config_1.InferWorkingDirectory.AnyScript:
return referenceChain.filter(e => e.request === 'file').map(e => platformDirname(e.content));
case config_1.InferWorkingDirectory.No:
default:
return [];
}
}
const AnyPathSeparator = /[/\\]/g;
function platformBasename(p) {
const normalized = p.replaceAll(path_1.default.win32.sep, path_1.default.posix.sep);
return path_1.default.posix.basename(normalized);
}
function platformDirname(p) {
const normalized = p.replaceAll(path_1.default.win32.sep, path_1.default.posix.sep);
return path_1.default.posix.dirname(normalized);
}
function returnPlatformPath(p) {
return p.replaceAll(AnyPathSeparator, path_1.default.sep);
}
function applyReplacements(path, replacements) {
const results = [];
for (const replacement of replacements) {
const newPath = Object.entries(replacement).reduce((acc, [key, value]) => acc.replace(new RegExp(key, 'g'), value), path);
results.push(newPath);
}
return results;
}
/**
* Tries to find sourced by a source request and returns the first path that exists
* @param seed - the path originally requested in the `source` call
* @param data - more information on the loading context
*/
function findSource(seed, data) {
const config = (0, config_1.getConfig)().solver.resolveSource;
const capitalization = config?.ignoreCapitalization ?? false;
const explorePaths = [
...(config?.searchPath ?? []),
...(inferWdFromScript(config?.inferWorkingDirectory ?? config_1.InferWorkingDirectory.No, data.referenceChain))
];
let tryPaths = [seed];
switch (config?.dropPaths ?? config_1.DropPathsOption.No) {
case config_1.DropPathsOption.Once: {
const first = platformBasename(seed);
tryPaths.push(first);
break;
}
case config_1.DropPathsOption.All: {
const paths = platformDirname(seed).split(AnyPathSeparator);
const basename = platformBasename(seed);
tryPaths.push(basename);
if (paths.length === 1 && paths[0] === '.') {
break;
}
for (let i = 0; i < paths.length; i++) {
tryPaths.push(path_1.default.join(...paths.slice(i), basename));
}
break;
}
default:
case config_1.DropPathsOption.No:
break;
}
if (config?.applyReplacements) {
const r = config.applyReplacements;
tryPaths = tryPaths.flatMap(t => applyReplacements(t, r));
}
const found = [];
for (const explore of [undefined, ...explorePaths]) {
for (const tryPath of tryPaths) {
const effectivePath = explore ? path_1.default.join(explore, tryPath) : tryPath;
const get = sourceProvider.exists(effectivePath, capitalization) ?? sourceProvider.exists(returnPlatformPath(effectivePath), capitalization);
if (get && !found.includes(effectivePath)) {
found.push(returnPlatformPath(effectivePath));
}
}
}
if (log_1.log.settings.minLevel >= 3 /* LogLevel.Info */) {
log_1.log.info(`Found sourced file ${JSON.stringify(seed)} at ${JSON.stringify(found)}`);
}
return found;
}
function processSourceCall(name, args, rootId, data, config) {
if (args.length !== 1) {
logger_1.dataflowLogger.warn(`Expected exactly one argument for source currently, but got ${args.length} instead, skipping`);
return (0, known_call_handling_1.processKnownFunctionCall)({ name, args, rootId, data, origin: 'default' }).information;
}
const information = config.includeFunctionCall ?
(0, known_call_handling_1.processKnownFunctionCall)({ name, args, rootId, data, origin: 'builtin:source' }).information
: (0, info_1.initializeCleanDataflowInformation)(rootId, data);
const sourceFileArgument = args[0];
if (!config.forceFollow && (0, config_1.getConfig)().ignoreSourceCalls) {
(0, log_1.expensiveTrace)(logger_1.dataflowLogger, () => `Skipping source call ${JSON.stringify(sourceFileArgument)} (disabled in config file)`);
(0, unknown_side_effect_1.handleUnknownSideEffect)(information.graph, information.environment, rootId);
return information;
}
let sourceFile;
if (sourceFileArgument !== r_function_call_1.EmptyArgument && sourceFileArgument?.value?.type === type_1.RType.String) {
sourceFile = [(0, retriever_1.removeRQuotes)(sourceFileArgument.lexeme)];
}
else if (sourceFileArgument !== r_function_call_1.EmptyArgument) {
const resolved = (0, general_1.valueSetGuard)((0, alias_tracking_1.resolveIdToValue)(sourceFileArgument.info.id, { environment: data.environment, idMap: data.completeAst.idMap }));
sourceFile = resolved?.elements.map(r => r.type === 'string' && (0, r_value_1.isValue)(r.value) ? r.value.str : undefined).filter(assert_1.isNotUndefined);
}
if (sourceFile && sourceFile.length === 1) {
const path = (0, retriever_1.removeRQuotes)(sourceFile[0]);
let filepath = path ? findSource(path, data) : path;
if (Array.isArray(filepath)) {
filepath = filepath?.[0];
}
if (filepath !== undefined) {
const request = sourceProvider.createRequest(filepath);
// check if the sourced file has already been dataflow analyzed, and if so, skip it
const limit = (0, config_1.getConfig)().solver.resolveSource?.repeatedSourceLimit ?? 0;
const findCount = data.referenceChain.filter(e => e.request === request.request && e.content === request.content).length;
if (findCount > limit) {
logger_1.dataflowLogger.warn(`Found cycle (>=${limit + 1}) in dataflow analysis for ${JSON.stringify(request)}: ${JSON.stringify(data.referenceChain)}, skipping further dataflow analysis`);
(0, unknown_side_effect_1.handleUnknownSideEffect)(information.graph, information.environment, rootId);
return information;
}
return sourceRequest(rootId, request, data, information, (0, decorate_1.sourcedDeterministicCountingIdGenerator)((findCount > 0 ? findCount + '::' : '') + path, name.location));
}
}
(0, log_1.expensiveTrace)(logger_1.dataflowLogger, () => `Non-constant argument ${JSON.stringify(sourceFile)} for source is currently not supported, skipping`);
(0, unknown_side_effect_1.handleUnknownSideEffect)(information.graph, information.environment, rootId);
return information;
}
function sourceRequest(rootId, request, data, information, getId) {
if (request.request === 'file') {
/* check if the file exists and if not, fail */
if (!fs_1.default.existsSync(request.content)) {
logger_1.dataflowLogger.warn(`Failed to analyze sourced file ${JSON.stringify(request)}: file does not exist`);
(0, unknown_side_effect_1.handleUnknownSideEffect)(information.graph, information.environment, rootId);
return information;
}
}
// parse, normalize and dataflow the sourced file
let normalized;
let dataflow;
try {
const file = request.request === 'file' ? request.content : undefined;
const parsed = (!data.parser.async ? data.parser : new shell_executor_1.RShellExecutor()).parse(request);
normalized = (typeof parsed !== 'string' ?
(0, parser_1.normalizeTreeSitter)({ parsed }, getId, file) : (0, parser_1.normalize)({ parsed }, getId, file));
dataflow = (0, processor_1.processDataflowFor)(normalized.ast, {
...data,
currentRequest: request,
environment: information.environment,
referenceChain: [...data.referenceChain, request]
});
}
catch (e) {
logger_1.dataflowLogger.error(`Failed to analyze sourced file ${JSON.stringify(request)}, skipping: ${e.message}`);
logger_1.dataflowLogger.error(e.stack);
(0, unknown_side_effect_1.handleUnknownSideEffect)(information.graph, information.environment, rootId);
return information;
}
// take the entry point as well as all the written references, and give them a control dependency to the source call to show that they are conditional
if (dataflow.graph.hasVertex(dataflow.entryPoint)) {
dataflow.graph.addControlDependency(dataflow.entryPoint, rootId, true);
}
for (const out of dataflow.out) {
dataflow.graph.addControlDependency(out.nodeId, rootId, true);
}
dataflow.graph.addFile(request.request === 'file' ? request.content : '<inline>');
// update our graph with the sourced file's information
const newInformation = { ...information };
newInformation.environment = (0, overwrite_1.overwriteEnvironment)(information.environment, dataflow.environment);
newInformation.graph.mergeWith(dataflow.graph);
// this can be improved, see issue #628
for (const [k, v] of normalized.idMap) {
data.completeAst.idMap.set(k, v);
}
return {
...newInformation,
in: newInformation.in.concat(dataflow.in),
out: newInformation.out.concat(dataflow.out),
unknownReferences: newInformation.unknownReferences.concat(dataflow.unknownReferences),
exitPoints: dataflow.exitPoints
};
}
function standaloneSourceFile(inputRequest, data, uniqueSourceId, information) {
const path = inputRequest.request === 'file' ? inputRequest.content : '-inline-';
/* this way we can still pass content */
const request = inputRequest.request === 'file' ? sourceProvider.createRequest(inputRequest.content) : inputRequest;
// check if the sourced file has already been dataflow analyzed, and if so, skip it
if (data.referenceChain.find(e => e.request === request.request && e.content === request.content)) {
logger_1.dataflowLogger.info(`Found loop in dataflow analysis for ${JSON.stringify(request)}: ${JSON.stringify(data.referenceChain)}, skipping further dataflow analysis`);
(0, unknown_side_effect_1.handleUnknownSideEffect)(information.graph, information.environment, uniqueSourceId);
return information;
}
return sourceRequest(uniqueSourceId, request, {
...data,
currentRequest: request,
environment: information.environment,
referenceChain: [...data.referenceChain, inputRequest]
}, information, (0, decorate_1.deterministicPrefixIdGenerator)(path + '::' + uniqueSourceId));
}
//# sourceMappingURL=built-in-source.js.map