UNPKG

@eagleoutice/flowr

Version:

Static Dataflow Analyzer and Program Slicer for the R Programming Language

285 lines 16.2 kB
"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.FlowrConfig = exports.DropPathsOption = exports.InferWorkingDirectory = exports.VariableResolve = void 0; const objects_1 = require("./util/objects"); const path_1 = __importDefault(require("path")); const fs_1 = __importDefault(require("fs")); const log_1 = require("./util/log"); const files_1 = require("./util/files"); const joi_1 = __importDefault(require("joi")); const object_path_1 = __importDefault(require("object-path")); var VariableResolve; (function (VariableResolve) { /** Don't resolve constants at all */ VariableResolve["Disabled"] = "disabled"; /** Use alias tracking to resolve */ VariableResolve["Alias"] = "alias"; /** Only resolve directly assigned builtin constants */ VariableResolve["Builtin"] = "builtin"; })(VariableResolve || (exports.VariableResolve = VariableResolve = {})); /** * How to infer the working directory from a script */ var InferWorkingDirectory; (function (InferWorkingDirectory) { /** Don't infer the working directory */ InferWorkingDirectory["No"] = "no"; /** Infer the working directory from the main script */ InferWorkingDirectory["MainScript"] = "main-script"; /** Infer the working directory from the active script */ InferWorkingDirectory["ActiveScript"] = "active-script"; /** Infer the working directory from any script */ InferWorkingDirectory["AnyScript"] = "any-script"; })(InferWorkingDirectory || (exports.InferWorkingDirectory = InferWorkingDirectory = {})); /** * How to handle fixed strings in a source path */ var DropPathsOption; (function (DropPathsOption) { /** Don't drop any parts of the sourced path */ DropPathsOption["No"] = "no"; /** try to drop everything but the filename */ DropPathsOption["Once"] = "once"; /** try to drop every folder of the path */ DropPathsOption["All"] = "all"; })(DropPathsOption || (exports.DropPathsOption = DropPathsOption = {})); const defaultEngineConfigs = { 'tree-sitter': { type: 'tree-sitter' }, 'r-shell': { type: 'r-shell' } }; /** * Helper Object to work with {@link FlowrConfig}, provides the default config and the Joi schema for validation. */ exports.FlowrConfig = { name: 'FlowrConfig', /** * The default configuration for flowR, used when no config file is found or when a config file is missing some options. * You can use this as a base for your own config and only specify the options you want to change. */ default() { return { ignoreSourceCalls: false, semantics: { environment: { overwriteBuiltIns: { loadDefaults: true, definitions: [] } } }, repl: { quickStats: false, dfProcessorHeat: false }, project: { resolveUnknownPathsOnDisk: true }, engines: [], defaultEngine: 'tree-sitter', solver: { variables: VariableResolve.Alias, evalStrings: true, resolveSource: { dropPaths: DropPathsOption.No, ignoreCapitalization: true, inferWorkingDirectory: InferWorkingDirectory.ActiveScript, searchPath: [], repeatedSourceLimit: 2 }, instrument: { dataflowExtractors: undefined }, slicer: { threshold: 50, autoExtend: false } }, abstractInterpretation: { wideningThreshold: 4, dataFrame: { maxColNames: 50, readLoadedData: { readExternalFiles: true, maxReadLines: 1e6 } } } }; }, /** * The Joi schema for validating a config file, use this to validate your config file before using it. You can also use this to generate documentation for the config file format. */ Schema: joi_1.default.object({ ignoreSourceCalls: joi_1.default.boolean().optional().description('Whether source calls should be ignored, causing {@link processSourceCall}\'s behavior to be skipped.'), semantics: joi_1.default.object({ environment: joi_1.default.object({ overwriteBuiltIns: joi_1.default.object({ loadDefaults: joi_1.default.boolean().optional().description('Should the default configuration still be loaded?'), definitions: joi_1.default.array().items(joi_1.default.object()).optional().description('The definitions to load/overwrite.') }).optional().description('Do you want to overwrite (parts) of the builtin definition?') }).optional().description('Semantics regarding how to handle the R environment.') }).description('Configure language semantics and how flowR handles them.'), repl: joi_1.default.object({ quickStats: joi_1.default.boolean().optional().description('Whether to show quick stats in the REPL after each evaluation.'), dfProcessorHeat: joi_1.default.boolean().optional().description('This instruments the dataflow processors to count how often each processor is called.') }).description('Configuration options for the REPL.'), project: joi_1.default.object({ resolveUnknownPathsOnDisk: joi_1.default.boolean().optional().description('Whether to resolve unknown paths loaded by the r project disk when trying to source/analyze files.') }).description('Project specific configuration options.'), engines: joi_1.default.array().items(joi_1.default.alternatives(joi_1.default.object({ type: joi_1.default.string().required().valid('tree-sitter').description('Use the tree sitter engine.'), wasmPath: joi_1.default.string().optional().description('The path to the tree-sitter-r WASM binary to use. If this is undefined, this uses the default path.'), treeSitterWasmPath: joi_1.default.string().optional().description('The path to the tree-sitter WASM binary to use. If this is undefined, this uses the default path.'), lax: joi_1.default.boolean().optional().description('Whether to use the lax parser for parsing R code (allowing for syntax errors). If this is undefined, the strict parser will be used.') }).description('The configuration for the tree sitter engine.'), joi_1.default.object({ type: joi_1.default.string().required().valid('r-shell').description('Use the R shell engine.'), rPath: joi_1.default.string().optional().description('The path to the R executable to use. If this is undefined, this uses the default path.') }).description('The configuration for the R shell engine.'))).description('The engine or set of engines to use for interacting with R code. An empty array means all available engines will be used.'), defaultEngine: joi_1.default.string().optional().valid('tree-sitter', 'r-shell').description('The default engine to use for interacting with R code. If this is undefined, an arbitrary engine from the specified list will be used.'), solver: joi_1.default.object({ variables: joi_1.default.string().valid(...Object.values(VariableResolve)).description('How to resolve variables and their values.'), evalStrings: joi_1.default.boolean().description('Should we include eval(parse(text="...")) calls in the dataflow graph?'), instrument: joi_1.default.object({ dataflowExtractors: joi_1.default.any().optional().description('These keys are only intended for use within code, allowing to instrument the dataflow analyzer!') }), resolveSource: joi_1.default.object({ dropPaths: joi_1.default.string().valid(...Object.values(DropPathsOption)).description('Allow to drop the first or all parts of the sourced path, if it is relative.'), ignoreCapitalization: joi_1.default.boolean().description('Search for filenames matching in the lowercase.'), inferWorkingDirectory: joi_1.default.string().valid(...Object.values(InferWorkingDirectory)).description('Try to infer the working directory from the main or any script to analyze.'), searchPath: joi_1.default.array().items(joi_1.default.string()).description('Additionally search in these paths.'), repeatedSourceLimit: joi_1.default.number().optional().description('How often the same file can be sourced within a single run? Please be aware: in case of cyclic sources this may not reach a fixpoint so give this a sensible limit.'), applyReplacements: joi_1.default.array().items(joi_1.default.object()).description('Provide name replacements for loaded files') }).optional().description('If lax source calls are active, flowR searches for sourced files much more freely, based on the configurations you give it. This option is only in effect if `ignoreSourceCalls` is set to false.'), slicer: joi_1.default.object({ threshold: joi_1.default.number().optional().description('The maximum number of iterations to perform on a single function call during slicing.'), autoExtend: joi_1.default.boolean().optional().description('If set, the slicer will gain an additional post-pass.') }).optional().description('The configuration for the slicer.') }).description('How to resolve constants, constraints, cells, ...'), abstractInterpretation: joi_1.default.object({ wideningThreshold: joi_1.default.number().min(1).description('The threshold for the number of visitations of a node at which widening should be performed to ensure the termination of the fixpoint iteration.'), dataFrame: joi_1.default.object({ maxColNames: joi_1.default.number().min(0).description('The maximum number of columns names to infer for data frames before over-approximating the column names to top.'), readLoadedData: joi_1.default.object({ readExternalFiles: joi_1.default.boolean().description('Whether data frame shapes should be extracted from loaded external files, such as CSV files.'), maxReadLines: joi_1.default.number().min(1).description('The maximum number of lines to read when extracting data frame shapes from loaded files, such as CSV files.') }).description('Configuration options for reading data frame shapes from loaded external data files, such as CSV files.') }).description('The configuration of the shape inference for data frames.') }).description('The configuration options for abstract interpretation.') }).description('The configuration file format for flowR.'), /** * Parses the given JSON string as a flowR config file, returning the resulting config object if the parsing and validation were successful, or `undefined` if there was an error. */ parse(jsonString) { try { const parsed = JSON.parse(jsonString); const validate = exports.FlowrConfig.Schema.validate(parsed); if (!validate.error) { // assign default values to all config options except for the specified ones return (0, objects_1.deepMergeObject)(exports.FlowrConfig.default(), parsed); } else { log_1.log.error(`Failed to validate config ${jsonString}: ${validate.error.message}`); return undefined; } } catch (e) { log_1.log.error(`Failed to parse config ${jsonString}: ${e.message}`); } }, /** * Creates a new flowr config that has the updated values. */ // eslint-disable-next-line @typescript-eslint/no-invalid-void-type amend(config, amendmentFunc) { const newConfig = exports.FlowrConfig.clone(config); return amendmentFunc(newConfig) ?? newConfig; }, /** * Clones the given flowr config object. */ clone(config) { return (0, objects_1.deepClonePreserveUnclonable)(config); }, /** * Loads the flowr config from the given file or the default locations. * Please note that you can also use this without a path parameter to * infer the config from flowR's default locations. * This is mostly useful for user-facing features. */ fromFile(configFile, configWorkingDirectory = process.cwd()) { try { return loadConfigFromFile(configFile, configWorkingDirectory); } catch (e) { log_1.log.error(`Failed to load config: ${e.message}`); return exports.FlowrConfig.default(); } }, /** * Gets the configuration for the given engine type from the config. */ getForEngine(config, engine) { const engines = config.engines; if (engines.length > 0) { return engines.find(e => e.type === engine); } else { return defaultEngineConfigs[engine]; } }, /** * Returns a new config object with the given value set at the given key, where the key is a dot-separated path to the value in the config object. * @see {@link setInConfigInPlace} for a version that modifies the config object in place instead of returning a new one. * @example * ```ts * const config = FlowrConfig.default(); * const newConfig = FlowrConfig.setInConfig(config, 'solver.variables', VariableResolve.Builtin); * console.log(config.solver.variables); // Output: "alias" * console.log(newConfig.solver.variables); // Output: "builtin" * ``` */ setInConfig(config, key, value) { const clone = exports.FlowrConfig.clone(config); object_path_1.default.set(clone, key, value); return clone; }, /** * Modifies the given config object in place by setting the given value at the given key, where the key is a dot-separated path to the value in the config object. * @see {@link setInConfig} for a version that returns a new config object instead of modifying the given one in place. */ setInConfigInPlace(config, key, value) { object_path_1.default.set(config, key, value); }, }; function loadConfigFromFile(configFile, workingDirectory) { if (configFile !== undefined) { if (path_1.default.isAbsolute(configFile) && fs_1.default.existsSync(configFile)) { log_1.log.trace(`Found config at ${configFile} (absolute)`); const ret = exports.FlowrConfig.parse(fs_1.default.readFileSync(configFile, { encoding: 'utf-8' })); if (ret) { log_1.log.info(`Using config ${JSON.stringify(ret)}`); return ret; } } let searchPath = path_1.default.resolve(workingDirectory); do { const configPath = path_1.default.join(searchPath, configFile); if (fs_1.default.existsSync(configPath)) { log_1.log.trace(`Found config at ${configPath}`); const ret = exports.FlowrConfig.parse(fs_1.default.readFileSync(configPath, { encoding: 'utf-8' })); if (ret) { log_1.log.info(`Using config ${JSON.stringify(ret)}`); return ret; } } // move up to parent directory searchPath = (0, files_1.getParentDirectory)(searchPath); } while (fs_1.default.existsSync(searchPath)); } log_1.log.info('Using default config'); return exports.FlowrConfig.default(); } //# sourceMappingURL=config.js.map