@eagleoutice/flowr
Version:
Static Dataflow Analyzer and Program Slicer for the R Programming Language
288 lines • 14.8 kB
JavaScript
;
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.inferWdFromScript = inferWdFromScript;
exports.platformBasename = platformBasename;
exports.platformDirname = platformDirname;
exports.findSource = findSource;
exports.processSourceCall = processSourceCall;
exports.sourceRequest = sourceRequest;
exports.standaloneSourceFile = standaloneSourceFile;
const processor_1 = require("../../../../../processor");
const info_1 = require("../../../../../info");
const config_1 = require("../../../../../../config");
const known_call_handling_1 = require("../known-call-handling");
const retriever_1 = require("../../../../../../r-bridge/retriever");
const decorate_1 = require("../../../../../../r-bridge/lang-4.x/ast/model/processing/decorate");
const r_function_call_1 = require("../../../../../../r-bridge/lang-4.x/ast/model/nodes/r-function-call");
const logger_1 = require("../../../../../logger");
const type_1 = require("../../../../../../r-bridge/lang-4.x/ast/model/type");
const overwrite_1 = require("../../../../../environments/overwrite");
const log_1 = require("../../../../../../util/log");
const parser_1 = require("../../../../../../r-bridge/lang-4.x/ast/parser/json/parser");
const shell_executor_1 = require("../../../../../../r-bridge/shell-executor");
const assert_1 = require("../../../../../../util/assert");
const path_1 = __importDefault(require("path"));
const general_1 = require("../../../../../eval/values/general");
const r_value_1 = require("../../../../../eval/values/r-value");
const unknown_side_effect_1 = require("../../../../../graph/unknown-side-effect");
const alias_tracking_1 = require("../../../../../eval/resolve/alias-tracking");
const edge_1 = require("../../../../../graph/edge");
const built_in_proc_name_1 = require("../../../../../environments/built-in-proc-name");
/**
* Infers working directories based on the given option and reference chain
*/
function inferWdFromScript(option, referenceChain) {
switch (option) {
case config_1.InferWorkingDirectory.MainScript:
return referenceChain[0] ? [platformDirname(referenceChain[0])] : [];
case config_1.InferWorkingDirectory.ActiveScript: {
const secondToLast = referenceChain.at(-1);
return secondToLast ? [platformDirname(secondToLast)] : [];
}
case config_1.InferWorkingDirectory.AnyScript:
return referenceChain.filter(assert_1.isNotUndefined).map(e => platformDirname(e));
case config_1.InferWorkingDirectory.No:
default:
return [];
}
}
const AnyPathSeparator = /[/\\]/g;
/**
* Return the basename of a path in a platform-agnostic way
* @see {@link platformDirname} - for the dirname counterpart
*/
function platformBasename(p) {
const normalized = p.replaceAll(path_1.default.win32.sep, path_1.default.posix.sep);
return path_1.default.posix.basename(normalized);
}
/**
* Return the dirname of a path in a platform-agnostic way
*/
function platformDirname(p) {
const normalized = p.replaceAll(path_1.default.win32.sep, path_1.default.posix.sep);
return path_1.default.posix.dirname(normalized);
}
function returnPlatformPath(p) {
return p.replaceAll(AnyPathSeparator, path_1.default.sep);
}
function applyReplacements(path, replacements) {
const results = [];
for (const replacement of replacements) {
const newPath = Object.entries(replacement).reduce((acc, [key, value]) => acc.replaceAll(new RegExp(key, 'g'), value), path);
results.push(newPath);
}
return results;
}
/**
* Tries to find sourced by a source request and returns the first path that exists
* @param resolveSource - options for lax file sourcing
* @param seed - the path originally requested in the `source` call
* @param data - more information on the loading context
*/
function findSource(resolveSource, seed, data) {
const capitalization = resolveSource?.ignoreCapitalization ?? false;
const explorePaths = (resolveSource?.searchPath ?? []).concat(inferWdFromScript(resolveSource?.inferWorkingDirectory ?? config_1.InferWorkingDirectory.No, data.referenceChain));
let tryPaths = [seed];
switch (resolveSource?.dropPaths ?? config_1.DropPathsOption.No) {
case config_1.DropPathsOption.Once: {
const first = platformBasename(seed);
tryPaths.push(first);
break;
}
case config_1.DropPathsOption.All: {
const paths = platformDirname(seed).split(AnyPathSeparator);
const basename = platformBasename(seed);
tryPaths.push(basename);
if (paths.length === 1 && paths[0] === '.') {
break;
}
for (let i = 0; i < paths.length; i++) {
tryPaths.push(path_1.default.join(...paths.slice(i), basename));
}
break;
}
case config_1.DropPathsOption.No:
default:
break;
}
if (resolveSource?.applyReplacements) {
const r = resolveSource.applyReplacements;
tryPaths = tryPaths.flatMap(t => applyReplacements(t, r));
}
const found = [];
for (const explore of [undefined, ...explorePaths]) {
for (const tryPath of tryPaths) {
const effectivePath = explore ? path_1.default.join(explore, tryPath) : tryPath;
const context = data.ctx.files;
const get = context.exists(effectivePath, capitalization) ?? context.exists(returnPlatformPath(effectivePath), capitalization);
if (get && !found.includes(returnPlatformPath(get))) {
found.push(returnPlatformPath(get));
}
}
}
if (log_1.log.settings.minLevel >= 3 /* LogLevel.Info */) {
log_1.log.info(`Found sourced file ${JSON.stringify(seed)} at ${JSON.stringify(found)}`);
}
return found;
}
/**
* Processes a named function call (i.e., not an anonymous function)
*/
function processSourceCall(name, args, rootId, data, config) {
if (args.length !== 1) {
logger_1.dataflowLogger.warn(`Expected exactly one argument for source currently, but got ${args.length} instead, skipping`);
return (0, known_call_handling_1.processKnownFunctionCall)({ name, args, rootId, data, origin: 'default' }).information;
}
const information = config.includeFunctionCall ?
(0, known_call_handling_1.processKnownFunctionCall)({ name, args, rootId, data, origin: built_in_proc_name_1.BuiltInProcName.Source }).information
: info_1.DataflowInformation.initialize(rootId, data);
const sourceFileArgument = args[0];
if (!config.forceFollow && data.ctx.config.ignoreSourceCalls) {
(0, log_1.expensiveTrace)(logger_1.dataflowLogger, () => `Skipping source call ${JSON.stringify(sourceFileArgument)} (disabled in config file)`);
(0, unknown_side_effect_1.handleUnknownSideEffect)(information.graph, information.environment, rootId);
return information;
}
let sourceFile;
if (sourceFileArgument !== r_function_call_1.EmptyArgument && sourceFileArgument?.value?.type === type_1.RType.String) {
sourceFile = [(0, retriever_1.removeRQuotes)(sourceFileArgument.lexeme)];
}
else if (sourceFileArgument !== r_function_call_1.EmptyArgument) {
const resolved = (0, general_1.valueSetGuard)((0, alias_tracking_1.resolveIdToValue)(sourceFileArgument.info.id, { environment: data.environment, idMap: data.completeAst.idMap, resolve: data.ctx.config.solver.variables, ctx: data.ctx }));
sourceFile = resolved?.elements.map(r => r.type === 'string' && (0, r_value_1.isValue)(r.value) ? r.value.str : undefined).filter(assert_1.isNotUndefined);
}
if (sourceFile?.length === 1) {
const path = (0, retriever_1.removeRQuotes)(sourceFile[0]);
let filepath = path ? findSource(data.ctx.config.solver.resolveSource, path, data) : path;
if (!Array.isArray(filepath)) {
filepath = filepath ? [filepath] : undefined;
}
if (filepath !== undefined && filepath.length > 0) {
let result = information;
const origCds = data.cds?.slice() ?? [];
for (const f of filepath) {
// check if the sourced file has already been dataflow analyzed, and if so, skip it
const limit = data.ctx.config.solver.resolveSource?.repeatedSourceLimit ?? 0;
const findCount = data.referenceChain.filter(e => e !== undefined && f === e).length;
if (findCount > limit) {
logger_1.dataflowLogger.warn(`Found cycle (>=${limit + 1}) in dataflow analysis for ${JSON.stringify(filepath)}: ${JSON.stringify(data.referenceChain)}, skipping further dataflow analysis`);
(0, unknown_side_effect_1.handleUnknownSideEffect)(result.graph, result.environment, rootId);
continue;
}
if (filepath.length > 1) {
data = { ...data, cds: [...origCds, { id: rootId, when: true, file: f }] };
}
result = sourceRequest(rootId, {
request: 'file',
content: f
}, data, result, true, (0, decorate_1.sourcedDeterministicCountingIdGenerator)((findCount > 0 ? findCount + '::' : '') + f, name.location));
}
return result;
}
}
(0, log_1.expensiveTrace)(logger_1.dataflowLogger, () => `Non-constant argument ${JSON.stringify(sourceFile)} for source is currently not supported, skipping`);
(0, unknown_side_effect_1.handleUnknownSideEffect)(information.graph, information.environment, rootId);
return information;
}
/**
* Processes a source request with the given dataflow processor information and existing dataflow information
* Otherwise, this can be an {@link RProjectFile} representing a standalone source file
*/
function sourceRequest(rootId, request, data, information, makeMaybe, getId) {
// parse, normalize and dataflow the sourced file
let dataflow;
let fst;
let filePath;
if ('root' in request) {
fst = request;
filePath = request.filePath;
}
else {
const textRequest = data.ctx.files.resolveRequest(request);
if (textRequest === undefined && request.request === 'file') {
// if translation failed there is nothing we can do!!
logger_1.dataflowLogger.warn(`Failed to analyze sourced file ${JSON.stringify(request)}: file does not exist`);
(0, unknown_side_effect_1.handleUnknownSideEffect)(information.graph, information.environment, rootId);
return information;
}
else {
(0, assert_1.guard)(textRequest !== undefined, `Expected text request to be defined for sourced file ${JSON.stringify(request)}`);
}
const parsed = (!data.parser.async ? data.parser : new shell_executor_1.RShellExecutor()).parse(textRequest.r);
const normalized = (typeof parsed !== 'string' ?
(0, parser_1.normalizeTreeSitter)({ files: [{ parsed, filePath: textRequest.path }] }, getId, data.ctx.config)
: (0, parser_1.normalize)({ files: [{ parsed, filePath: textRequest.path }] }, getId));
fst = normalized.ast.files[0];
// this can be improved, see issue #628
for (const [k, v] of normalized.idMap) {
data.completeAst.idMap.set(k, v);
}
// add to the main ast
if (!data.completeAst.ast.files.some(f => f.filePath === fst.filePath)) {
data.completeAst.ast.files.push(fst);
}
filePath = textRequest.path;
}
try {
dataflow = (0, processor_1.processDataflowFor)(fst.root, {
...data,
environment: information.environment,
referenceChain: [...data.referenceChain, fst.filePath]
});
}
catch (e) {
logger_1.dataflowLogger.error(`Failed to analyze sourced file ${JSON.stringify(request)}, skipping: ${e.message}`);
logger_1.dataflowLogger.error(e.stack);
(0, unknown_side_effect_1.handleUnknownSideEffect)(information.graph, information.environment, rootId);
return information;
}
// take the entry point as well as all the written references, and give them a control dependency to the source call to show that they are conditional
if (!String(rootId).startsWith('file-')) {
if (makeMaybe) {
if (dataflow.graph.hasVertex(dataflow.entryPoint)) {
dataflow.graph.addControlDependency(dataflow.entryPoint, rootId, true);
}
for (const out of dataflow.out) {
dataflow.graph.addControlDependency(out.nodeId, rootId, true);
}
}
else {
if (dataflow.graph.hasVertex(dataflow.entryPoint)) {
dataflow.graph.addEdge(dataflow.entryPoint, rootId, edge_1.EdgeType.Reads);
}
for (const out of dataflow.out) {
dataflow.graph.addEdge(out.nodeId, rootId, edge_1.EdgeType.Reads);
}
}
}
data.ctx.files.addConsideredFile(filePath ?? '<inline>');
// update our graph with the sourced file's information
return {
...information,
environment: (0, overwrite_1.overwriteEnvironment)(information.environment, dataflow.environment),
graph: information.graph.mergeWith(dataflow.graph),
in: information.in.concat(dataflow.in),
out: information.out.concat(dataflow.out),
unknownReferences: information.unknownReferences.concat(dataflow.unknownReferences),
exitPoints: dataflow.exitPoints
};
}
/**
* Processes a standalone source file (i.e., not from a source function call)
*/
function standaloneSourceFile(idx, file, data, information) {
// check if the sourced file has already been dataflow analyzed, and if so, skip it
if (data.referenceChain.some(e => e !== undefined && e === file.filePath)) {
logger_1.dataflowLogger.info(`Found loop in dataflow analysis for ${JSON.stringify(file.filePath)}: ${JSON.stringify(data.referenceChain)}, skipping further dataflow analysis`);
(0, unknown_side_effect_1.handleUnknownSideEffect)(information.graph, information.environment, file.root.info.id);
return information;
}
return sourceRequest('file-' + idx, file, {
...data,
environment: information.environment,
referenceChain: [...data.referenceChain, file.filePath]
}, information, false);
}
//# sourceMappingURL=built-in-source.js.map