UNPKG

@eagleoutice/flowr

Version:

Static Dataflow Analyzer and Program Slicer for the R Programming Language

154 lines 9.88 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.SEEDED_RANDOMNESS = void 0; const linter_format_1 = require("../linter-format"); const range_1 = require("../../util/range"); const flowr_search_builder_1 = require("../../search/flowr-search-builder"); const search_enrichers_1 = require("../../search/search-executor/search-enrichers"); const identifier_1 = require("../../dataflow/environments/identifier"); const flowr_search_filters_1 = require("../../search/flowr-search-filters"); const default_builtin_config_1 = require("../../dataflow/environments/default-builtin-config"); const graph_1 = require("../../dataflow/graph/graph"); const cascade_action_1 = require("../../queries/catalog/call-context-query/cascade-action"); const node_id_1 = require("../../r-bridge/lang-4.x/ast/model/processing/node-id"); const linter_tags_1 = require("../linter-tags"); const alias_tracking_1 = require("../../dataflow/eval/resolve/alias-tracking"); const general_1 = require("../../dataflow/eval/values/general"); const config_1 = require("../../config"); const vertex_1 = require("../../dataflow/graph/vertex"); const r_function_call_1 = require("../../r-bridge/lang-4.x/ast/model/nodes/r-function-call"); const r_value_1 = require("../../dataflow/eval/values/r-value"); const info_1 = require("../../dataflow/info"); const built_in_proc_name_1 = require("../../dataflow/environments/built-in-proc-name"); exports.SEEDED_RANDOMNESS = { createSearch: (config) => flowr_search_builder_1.Q.all().filter(vertex_1.VertexType.FunctionCall) .with(search_enrichers_1.Enrichment.CallTargets, { onlyBuiltin: true }) .filter({ name: flowr_search_filters_1.FlowrFilter.MatchesEnrichment, args: { enrichment: search_enrichers_1.Enrichment.CallTargets, test: (0, flowr_search_filters_1.testFunctionsIgnoringPackage)(config.randomnessConsumers) } }) .with(search_enrichers_1.Enrichment.LastCall, [ { callName: config.randomnessProducers.filter(p => p.type === 'function').map(p => p.name) }, { callName: getDefaultAssignments().flatMap(b => b.names).map(identifier_1.Identifier.getName), cascadeIf: () => cascade_action_1.CascadeAction.Continue } ]), processSearchResult: (elements, config, { dataflow, analyzer }) => { const assignmentProducers = new Set(config.randomnessProducers.filter(p => p.type === 'assignment').map(p => p.name)); const assignmentArgIndexes = new Map(getDefaultAssignments().flatMap(a => a.names.map(n => ([identifier_1.Identifier.getName(n), a.config?.swapSourceAndTarget ? 1 : 0])))); const metadata = { consumerCalls: 0, callsWithFunctionProducers: 0, callsWithAssignmentProducers: 0, callsWithNonConstantProducers: 0, callsWithOtherBranchProducers: 0 }; return { results: elements.getElements() // map and filter consumers .flatMap(element => (0, search_enrichers_1.enrichmentContent)(element, search_enrichers_1.Enrichment.CallTargets).targets.map(target => { metadata.consumerCalls++; return { involvedId: element.node.info.id, loc: range_1.SourceLocation.fromNode(element.node) ?? range_1.SourceLocation.invalid(), target: target, searchElement: element }; })) // filter by calls that aren't preceded by a randomness producer .flatMap(element => { const dfgElement = dataflow.graph.getVertex(element.searchElement.node.info.id); const cds = dfgElement ? new Set(dfgElement.cds) : new Set(); const producers = (0, search_enrichers_1.enrichmentContent)(element.searchElement, search_enrichers_1.Enrichment.LastCall).linkedIds .map(e => dataflow.graph.getVertex(e.node.info.id)); const { assignment, func } = Object.groupBy(producers, f => assignmentArgIndexes.has(identifier_1.Identifier.getName(f.name)) ? 'assignment' : 'func'); let nonConstant = false; const cdsOfProduces = new Set(); // function calls are already taken care of through the LastCall enrichment itself for (const f of func ?? []) { if (isConstantArgument(dataflow.graph, f, 0, analyzer.inspectContext())) { const fCds = new Set(f.cds).difference(cds); metadata.callsWithFunctionProducers++; if (fCds.size <= 0 || (0, info_1.happensInEveryBranchSet)(fCds)) { return []; } else { for (const f of fCds) { cdsOfProduces.add(f); } } } else { nonConstant = true; } } // assignments have to be queried for their destination for (const a of assignment ?? []) { const argIdx = assignmentArgIndexes.get(identifier_1.Identifier.getName(a.name)); const dest = graph_1.FunctionArgument.getReference(a.args[argIdx]); if (dest !== undefined && assignmentProducers.has((0, node_id_1.recoverName)(dest, dataflow.graph.idMap))) { // we either have arg index 0 or 1 for the assignmentProducers destination, so we select the assignment value as 1-argIdx here if (isConstantArgument(dataflow.graph, a, 1 - argIdx, analyzer.inspectContext())) { const aCds = new Set(a.cds).difference(cds); if (aCds.size <= 0 || (0, info_1.happensInEveryBranchSet)(aCds)) { metadata.callsWithAssignmentProducers++; return []; } else { for (const f of aCds) { cdsOfProduces.add(f); } } } } } if ((0, info_1.happensInEveryBranchSet)(cdsOfProduces)) { // all producers happen in every branch, so we are good return []; } if (nonConstant) { metadata.callsWithNonConstantProducers++; } if (cdsOfProduces.size > 0) { metadata.callsWithOtherBranchProducers++; } return [{ involvedId: element.involvedId, certainty: cdsOfProduces.size > 0 ? linter_format_1.LintingResultCertainty.Uncertain : linter_format_1.LintingResultCertainty.Certain, function: element.target, loc: element.loc }]; }), '.meta': metadata }; }, info: { defaultConfig: { randomnessProducers: [{ type: 'function', name: 'set.seed' }, { type: 'assignment', name: '.Random.seed' }], randomnessConsumers: ['jitter', 'sample', 'sample.int', 'arima.sim', 'kmeans', 'princomp', 'rcauchy', 'rchisq', 'rexp', 'rgamma', 'rgeom', 'rlnorm', 'rlogis', 'rmultinom', 'rnbinom', 'rnorm', 'rpois', 'runif', 'pointLabel', 'some', 'rbernoulli', 'rdunif', 'generateSeedVectors'], }, tags: [linter_tags_1.LintingRuleTag.Robustness, linter_tags_1.LintingRuleTag.Reproducibility], // only finds proper randomness producers and consumers due to its config, but will not find all producers/consumers since not all existing deprecated functions will be in the config certainty: linter_format_1.LintingRuleCertainty.BestEffort, name: 'Seeded Randomness', description: 'Checks whether randomness-based function calls are preceded by a random seed generation function. For consistent reproducibility, functions that use randomness should only be called after a constant random seed is set using a function like `set.seed`.' }, prettyPrint: { [linter_format_1.LintingPrettyPrintContext.Query]: (result, _meta) => `Function \`${result.function}\` at ${range_1.SourceLocation.format(result.loc)}`, [linter_format_1.LintingPrettyPrintContext.Full]: (result, _meta) => `Function \`${result.function}\` at ${range_1.SourceLocation.format(result.loc)} is called without a preceding random seed function like \`set.seed\`` } }; function getDefaultAssignments() { return default_builtin_config_1.DefaultBuiltinConfig.filter(b => b.type === 'function' && (b.processor === built_in_proc_name_1.BuiltInProcName.Assignment || b.processor === built_in_proc_name_1.BuiltInProcName.AssignmentLike)); } function isConstantArgument(graph, call, argIndex, ctx) { const args = call.args.filter(arg => arg !== r_function_call_1.EmptyArgument && !arg.name).map(graph_1.FunctionArgument.getReference); const values = (0, general_1.valueSetGuard)((0, alias_tracking_1.resolveIdToValue)(args[argIndex], { graph: graph, resolve: config_1.VariableResolve.Alias, ctx })); return values?.elements.every(v => v.type === 'number' || v.type === 'logical' || v.type === 'string' || v.type === 'interval' && v.startInclusive && v.endInclusive && v.start.type === 'number' && v.end.type === 'number' && (0, r_value_1.asValue)(v.start.value).num === (0, r_value_1.asValue)(v.end.value).num) ?? false; } //# sourceMappingURL=seeded-randomness.js.map