@eagleoutice/flowr
Version:
Static Dataflow Analyzer and Program Slicer for the R Programming Language
154 lines • 9.88 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.SEEDED_RANDOMNESS = void 0;
const linter_format_1 = require("../linter-format");
const range_1 = require("../../util/range");
const flowr_search_builder_1 = require("../../search/flowr-search-builder");
const search_enrichers_1 = require("../../search/search-executor/search-enrichers");
const identifier_1 = require("../../dataflow/environments/identifier");
const flowr_search_filters_1 = require("../../search/flowr-search-filters");
const default_builtin_config_1 = require("../../dataflow/environments/default-builtin-config");
const graph_1 = require("../../dataflow/graph/graph");
const cascade_action_1 = require("../../queries/catalog/call-context-query/cascade-action");
const node_id_1 = require("../../r-bridge/lang-4.x/ast/model/processing/node-id");
const linter_tags_1 = require("../linter-tags");
const alias_tracking_1 = require("../../dataflow/eval/resolve/alias-tracking");
const general_1 = require("../../dataflow/eval/values/general");
const config_1 = require("../../config");
const vertex_1 = require("../../dataflow/graph/vertex");
const r_function_call_1 = require("../../r-bridge/lang-4.x/ast/model/nodes/r-function-call");
const r_value_1 = require("../../dataflow/eval/values/r-value");
const info_1 = require("../../dataflow/info");
const built_in_proc_name_1 = require("../../dataflow/environments/built-in-proc-name");
exports.SEEDED_RANDOMNESS = {
createSearch: (config) => flowr_search_builder_1.Q.all().filter(vertex_1.VertexType.FunctionCall)
.with(search_enrichers_1.Enrichment.CallTargets, { onlyBuiltin: true })
.filter({
name: flowr_search_filters_1.FlowrFilter.MatchesEnrichment,
args: {
enrichment: search_enrichers_1.Enrichment.CallTargets,
test: (0, flowr_search_filters_1.testFunctionsIgnoringPackage)(config.randomnessConsumers)
}
})
.with(search_enrichers_1.Enrichment.LastCall, [
{ callName: config.randomnessProducers.filter(p => p.type === 'function').map(p => p.name) },
{ callName: getDefaultAssignments().flatMap(b => b.names).map(identifier_1.Identifier.getName), cascadeIf: () => cascade_action_1.CascadeAction.Continue }
]),
processSearchResult: (elements, config, { dataflow, analyzer }) => {
const assignmentProducers = new Set(config.randomnessProducers.filter(p => p.type === 'assignment').map(p => p.name));
const assignmentArgIndexes = new Map(getDefaultAssignments().flatMap(a => a.names.map(n => ([identifier_1.Identifier.getName(n), a.config?.swapSourceAndTarget ? 1 : 0]))));
const metadata = {
consumerCalls: 0,
callsWithFunctionProducers: 0,
callsWithAssignmentProducers: 0,
callsWithNonConstantProducers: 0,
callsWithOtherBranchProducers: 0
};
return {
results: elements.getElements()
// map and filter consumers
.flatMap(element => (0, search_enrichers_1.enrichmentContent)(element, search_enrichers_1.Enrichment.CallTargets).targets.map(target => {
metadata.consumerCalls++;
return {
involvedId: element.node.info.id,
loc: range_1.SourceLocation.fromNode(element.node) ?? range_1.SourceLocation.invalid(),
target: target,
searchElement: element
};
}))
// filter by calls that aren't preceded by a randomness producer
.flatMap(element => {
const dfgElement = dataflow.graph.getVertex(element.searchElement.node.info.id);
const cds = dfgElement ? new Set(dfgElement.cds) : new Set();
const producers = (0, search_enrichers_1.enrichmentContent)(element.searchElement, search_enrichers_1.Enrichment.LastCall).linkedIds
.map(e => dataflow.graph.getVertex(e.node.info.id));
const { assignment, func } = Object.groupBy(producers, f => assignmentArgIndexes.has(identifier_1.Identifier.getName(f.name)) ? 'assignment' : 'func');
let nonConstant = false;
const cdsOfProduces = new Set();
// function calls are already taken care of through the LastCall enrichment itself
for (const f of func ?? []) {
if (isConstantArgument(dataflow.graph, f, 0, analyzer.inspectContext())) {
const fCds = new Set(f.cds).difference(cds);
metadata.callsWithFunctionProducers++;
if (fCds.size <= 0 || (0, info_1.happensInEveryBranchSet)(fCds)) {
return [];
}
else {
for (const f of fCds) {
cdsOfProduces.add(f);
}
}
}
else {
nonConstant = true;
}
}
// assignments have to be queried for their destination
for (const a of assignment ?? []) {
const argIdx = assignmentArgIndexes.get(identifier_1.Identifier.getName(a.name));
const dest = graph_1.FunctionArgument.getReference(a.args[argIdx]);
if (dest !== undefined && assignmentProducers.has((0, node_id_1.recoverName)(dest, dataflow.graph.idMap))) {
// we either have arg index 0 or 1 for the assignmentProducers destination, so we select the assignment value as 1-argIdx here
if (isConstantArgument(dataflow.graph, a, 1 - argIdx, analyzer.inspectContext())) {
const aCds = new Set(a.cds).difference(cds);
if (aCds.size <= 0 || (0, info_1.happensInEveryBranchSet)(aCds)) {
metadata.callsWithAssignmentProducers++;
return [];
}
else {
for (const f of aCds) {
cdsOfProduces.add(f);
}
}
}
}
}
if ((0, info_1.happensInEveryBranchSet)(cdsOfProduces)) {
// all producers happen in every branch, so we are good
return [];
}
if (nonConstant) {
metadata.callsWithNonConstantProducers++;
}
if (cdsOfProduces.size > 0) {
metadata.callsWithOtherBranchProducers++;
}
return [{
involvedId: element.involvedId,
certainty: cdsOfProduces.size > 0 ? linter_format_1.LintingResultCertainty.Uncertain : linter_format_1.LintingResultCertainty.Certain,
function: element.target,
loc: element.loc
}];
}),
'.meta': metadata
};
},
info: {
defaultConfig: {
randomnessProducers: [{ type: 'function', name: 'set.seed' }, { type: 'assignment', name: '.Random.seed' }],
randomnessConsumers: ['jitter', 'sample', 'sample.int', 'arima.sim', 'kmeans', 'princomp', 'rcauchy', 'rchisq', 'rexp', 'rgamma', 'rgeom', 'rlnorm', 'rlogis', 'rmultinom', 'rnbinom', 'rnorm', 'rpois', 'runif', 'pointLabel', 'some', 'rbernoulli', 'rdunif', 'generateSeedVectors'],
},
tags: [linter_tags_1.LintingRuleTag.Robustness, linter_tags_1.LintingRuleTag.Reproducibility],
// only finds proper randomness producers and consumers due to its config, but will not find all producers/consumers since not all existing deprecated functions will be in the config
certainty: linter_format_1.LintingRuleCertainty.BestEffort,
name: 'Seeded Randomness',
description: 'Checks whether randomness-based function calls are preceded by a random seed generation function. For consistent reproducibility, functions that use randomness should only be called after a constant random seed is set using a function like `set.seed`.'
},
prettyPrint: {
[linter_format_1.LintingPrettyPrintContext.Query]: (result, _meta) => `Function \`${result.function}\` at ${range_1.SourceLocation.format(result.loc)}`,
[linter_format_1.LintingPrettyPrintContext.Full]: (result, _meta) => `Function \`${result.function}\` at ${range_1.SourceLocation.format(result.loc)} is called without a preceding random seed function like \`set.seed\``
}
};
function getDefaultAssignments() {
return default_builtin_config_1.DefaultBuiltinConfig.filter(b => b.type === 'function' && (b.processor === built_in_proc_name_1.BuiltInProcName.Assignment || b.processor === built_in_proc_name_1.BuiltInProcName.AssignmentLike));
}
function isConstantArgument(graph, call, argIndex, ctx) {
const args = call.args.filter(arg => arg !== r_function_call_1.EmptyArgument && !arg.name).map(graph_1.FunctionArgument.getReference);
const values = (0, general_1.valueSetGuard)((0, alias_tracking_1.resolveIdToValue)(args[argIndex], { graph: graph, resolve: config_1.VariableResolve.Alias, ctx }));
return values?.elements.every(v => v.type === 'number' ||
v.type === 'logical' ||
v.type === 'string' ||
v.type === 'interval' && v.startInclusive && v.endInclusive && v.start.type === 'number' && v.end.type === 'number' && (0, r_value_1.asValue)(v.start.value).num === (0, r_value_1.asValue)(v.end.value).num)
?? false;
}
//# sourceMappingURL=seeded-randomness.js.map