@eagleoutice/flowr
Version:
Static Dataflow Analyzer and Program Slicer for the R Programming Language
120 lines • 7.03 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.DATA_FRAME_ACCESS_VALIDATION = void 0;
const shape_inference_1 = require("../../abstract-interpretation/data-frame/shape-inference");
const satisfiable_domain_1 = require("../../abstract-interpretation/domains/satisfiable-domain");
const cfg_kind_1 = require("../../project/cfg-kind");
const type_1 = require("../../r-bridge/lang-4.x/ast/model/type");
const flowr_search_builder_1 = require("../../search/flowr-search-builder");
const search_enrichers_1 = require("../../search/search-executor/search-enrichers");
const logic_1 = require("../../util/logic");
const range_1 = require("../../util/range");
const linter_format_1 = require("../linter-format");
const linter_tags_1 = require("../linter-tags");
const identifier_1 = require("../../dataflow/environments/identifier");
const config_1 = require("../../config");
exports.DATA_FRAME_ACCESS_VALIDATION = {
createSearch: () => flowr_search_builder_1.Q.all().with(search_enrichers_1.Enrichment.CallTargets, { onlyBuiltin: true }),
processSearchResult: async (elements, config, data) => {
let ctx = data.analyzer.inspectContext();
ctx = {
...ctx,
config: config_1.FlowrConfig.amend(data.analyzer.flowrConfig, flowrConfig => {
if (config.readLoadedData !== undefined) {
flowrConfig.abstractInterpretation.dataFrame.readLoadedData.readExternalFiles = config.readLoadedData;
}
return flowrConfig;
})
};
const cfg = await data.analyzer.controlflow(undefined, cfg_kind_1.CfgKind.NoFunctionDefs);
const inference = new shape_inference_1.DataFrameShapeInferenceVisitor({ controlFlow: cfg, dfg: data.dataflow.graph, normalizedAst: data.normalize, ctx });
inference.start();
const accessOperations = getAccessOperations(elements, inference);
const accesses = [];
for (const [nodeId, operations] of accessOperations) {
const access = { nodeId };
for (const operation of operations) {
access.operand ??= operation.operand;
access.operandShape ??= inference.getAbstractValue(operation.operand);
if (operation.operation === 'accessCols' && operation.columns !== undefined) {
access.accessedCols ??= [];
access.accessedCols.push(...operation.columns);
}
else if (operation.operation === 'accessRows' && operation.rows !== undefined) {
access.accessedRows ??= [];
access.accessedRows.push(...operation.rows);
}
}
accesses.push(access);
}
const operations = accessOperations.entries().flatMap(([, operations]) => operations).toArray();
const metadata = {
numOperations: accessOperations.size,
numAccesses: operations.length,
totalAccessed: operations
.map(operation => operation.operation === 'accessCols' ? operation.columns?.length ?? 0 : operation.rows?.length ?? 0)
.reduce((a, b) => a + b, 0)
};
const results = accesses
.flatMap(access => findInvalidDataFrameAccesses(access)
.map(accessed => ({ nodeId: access.nodeId, operand: access.operand, ...accessed })))
.map(({ nodeId, operand, ...accessed }) => ({
...accessed,
node: data.normalize.idMap.get(nodeId),
operand: operand === undefined ? undefined : data.normalize.idMap.get(operand),
}))
.map(({ node, operand, ...accessed }) => ({
...accessed,
involvedId: node?.info.id,
access: node?.lexeme ?? '???',
...(operand?.type === type_1.RType.Symbol ? { operand: identifier_1.Identifier.getName(operand.content) } : {}),
range: range_1.SourceRange.fromNode(node) ?? range_1.SourceRange.invalid(),
certainty: linter_format_1.LintingResultCertainty.Certain
}));
return { results, '.meta': metadata };
},
prettyPrint: {
[linter_format_1.LintingPrettyPrintContext.Query]: result => `Access of ${result.type} ` +
(typeof result.accessed === 'string' ? `"${result.accessed}"` : result.accessed) + ' ' +
(result.operand === undefined ? `at \`${result.access}\`` : `of \`${result.operand}\``) + ` at ${range_1.SourceRange.format(result.range)}`,
[linter_format_1.LintingPrettyPrintContext.Full]: result => `Accessed ${result.type} ` +
(typeof result.accessed === 'string' ? `"${result.accessed}"` : result.accessed) + ' does not exist ' +
(result.operand === undefined ? `at \`${result.access}\`` : `in \`${result.operand}\``) + ` at ${range_1.SourceRange.format(result.range)}`
},
info: {
name: 'Dataframe Access Validation',
tags: [linter_tags_1.LintingRuleTag.Bug, linter_tags_1.LintingRuleTag.Usability, linter_tags_1.LintingRuleTag.Reproducibility],
// this rule is unable to detect all cases of dataframe access, but sufficiently ensures returned results are valid
certainty: linter_format_1.LintingRuleCertainty.BestEffort,
description: 'Validates the existence of accessed columns and rows of dataframes.',
defaultConfig: { readLoadedData: false }
}
};
function getAccessOperations(elements, inference) {
return new Map(elements.getElements()
.map(element => [element.node.info.id, inference.getAbstractOperations(element.node.info.id)
?.filter(({ operation }) => operation === 'accessCols' || operation === 'accessRows')
.map(({ operation, operand, type: _type, options: _options, ...args }) => ({ operation, operand, ...args })) ?? []
])
.filter(([, operations]) => operations.length > 0));
}
function findInvalidDataFrameAccesses({ operandShape, accessedCols, accessedRows }) {
const invalidAccesses = [];
if (operandShape !== undefined) {
for (const row of accessedRows ?? []) {
if (operandShape.rows.satisfies(row, satisfiable_domain_1.NumericalComparator.LessOrEqual) === logic_1.Ternary.Never) {
invalidAccesses.push({ type: 'row', accessed: row });
}
}
for (const col of accessedCols ?? []) {
if (typeof col === 'string' && operandShape.colnames.satisfies([col], satisfiable_domain_1.SetComparator.SubsetOrEqual) === logic_1.Ternary.Never) {
invalidAccesses.push({ type: 'column', accessed: col });
}
else if (typeof col === 'number' && operandShape.cols.satisfies(col, satisfiable_domain_1.NumericalComparator.LessOrEqual) === logic_1.Ternary.Never) {
invalidAccesses.push({ type: 'column', accessed: col });
}
}
}
return invalidAccesses;
}
//# sourceMappingURL=dataframe-access-validation.js.map