@eagleoutice/flowr
Version:
Static Dataflow Analyzer and Program Slicer for the R Programming Language
116 lines • 6.67 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.DATA_FRAME_ACCESS_VALIDATION = void 0;
const absint_info_1 = require("../../abstract-interpretation/data-frame/absint-info");
const domain_1 = require("../../abstract-interpretation/data-frame/domain");
const shape_inference_1 = require("../../abstract-interpretation/data-frame/shape-inference");
const config_1 = require("../../config");
const extract_cfg_1 = require("../../control-flow/extract-cfg");
const type_1 = require("../../r-bridge/lang-4.x/ast/model/type");
const flowr_search_builder_1 = require("../../search/flowr-search-builder");
const search_enrichers_1 = require("../../search/search-executor/search-enrichers");
const dfg_1 = require("../../util/mermaid/dfg");
const range_1 = require("../../util/range");
const linter_format_1 = require("../linter-format");
const linter_tags_1 = require("../linter-tags");
exports.DATA_FRAME_ACCESS_VALIDATION = {
createSearch: () => flowr_search_builder_1.Q.all().with(search_enrichers_1.Enrichment.CallTargets, { onlyBuiltin: true }),
processSearchResult: (elements, config, data) => {
const flowrConfig = (0, config_1.amendConfig)(data.config, flowrConfig => {
if (config.readLoadedData !== undefined) {
flowrConfig.abstractInterpretation.dataFrame.readLoadedData.readExternalFiles = config.readLoadedData;
}
return flowrConfig;
});
const cfg = (0, extract_cfg_1.extractCfg)(data.normalize, flowrConfig, data.dataflow.graph);
(0, shape_inference_1.inferDataFrameShapes)(cfg, data.dataflow.graph, data.normalize, flowrConfig);
const accessOperations = getAccessOperations(elements);
const accesses = [];
for (const [nodeId, operations] of accessOperations) {
const access = { nodeId };
for (const operation of operations) {
access.operand ??= operation.operand;
access.operandShape ??= (0, shape_inference_1.resolveIdToDataFrameShape)(operation.operand, data.dataflow.graph);
if (operation.operation === 'accessCols' && operation.columns !== undefined) {
access.accessedCols ??= [];
access.accessedCols.push(...operation.columns);
}
else if (operation.operation === 'accessRows' && operation.rows !== undefined) {
access.accessedRows ??= [];
access.accessedRows.push(...operation.rows);
}
}
accesses.push(access);
}
const operations = [...accessOperations.entries()].flatMap(([, operations]) => operations);
const metadata = {
numOperations: accessOperations.size,
numAccesses: operations.length,
totalAccessed: operations
.map(operation => operation.operation === 'accessCols' ? operation.columns?.length ?? 0 : operation.rows?.length ?? 0)
.reduce((a, b) => a + b, 0)
};
const results = accesses
.flatMap(access => findInvalidDataFrameAccesses(access)
.map(accessed => ({ nodeId: access.nodeId, operand: access.operand, ...accessed })))
.map(({ nodeId, operand, ...accessed }) => ({
...accessed,
node: data.normalize.idMap.get(nodeId),
operand: operand !== undefined ? data.normalize.idMap.get(operand) : undefined,
}))
.map(({ node, operand, ...accessed }) => ({
...accessed,
access: node?.lexeme ?? '???',
...(operand?.type === type_1.RType.Symbol ? { operand: operand.content } : {}),
range: node?.info.fullRange ?? node?.location ?? (0, range_1.rangeFrom)(-1, -1, -1, -1),
certainty: linter_format_1.LintingResultCertainty.Certain
}));
return { results, '.meta': metadata };
},
prettyPrint: {
[linter_format_1.LintingPrettyPrintContext.Query]: result => `Access of ${result.type} ` +
(typeof result.accessed === 'string' ? `"${result.accessed}"` : result.accessed) + ' ' +
(result.operand !== undefined ? `of \`${result.operand}\`` : `at \`${result.access}\``) + ` at ${(0, dfg_1.formatRange)(result.range)}`,
[linter_format_1.LintingPrettyPrintContext.Full]: result => `Accessed ${result.type} ` +
(typeof result.accessed === 'string' ? `"${result.accessed}"` : result.accessed) + ' does not exist ' +
(result.operand !== undefined ? `in \`${result.operand}\`` : `at \`${result.access}\``) + ` at ${(0, dfg_1.formatRange)(result.range)}`
},
info: {
name: 'Dataframe Access Validation',
tags: [linter_tags_1.LintingRuleTag.Bug, linter_tags_1.LintingRuleTag.Usability, linter_tags_1.LintingRuleTag.Reproducibility],
// this rule is unable to detect all cases of dataframe access, but sufficiently ensures returned results are valid
certainty: linter_format_1.LintingRuleCertainty.BestEffort,
description: 'Validates the existance of accessed columns and rows of dataframes.',
defaultConfig: { readLoadedData: false }
}
};
function getAccessOperations(elements) {
return new Map(elements.getElements()
.map(element => element.node)
.filter(absint_info_1.hasDataFrameExpressionInfo)
.map(node => [node.info.id, node.info.dataFrame.operations
.filter(({ operation }) => operation === 'accessCols' || operation === 'accessRows')
.map(({ operation, operand, type: _type, options: _options, ...args }) => ({ operation, operand, ...args }))
])
.filter(([, operations]) => operations.length > 0));
}
function findInvalidDataFrameAccesses({ operandShape, accessedCols, accessedRows }) {
const invalidAccesses = [];
if (operandShape !== undefined) {
for (const row of accessedRows ?? []) {
if (!(0, domain_1.satisfiesLeqInterval)(operandShape.rows, row)) {
invalidAccesses.push({ type: 'row', accessed: row });
}
}
for (const col of accessedCols ?? []) {
if (typeof col === 'string' && !(0, domain_1.satisfiesColsNames)(operandShape.colnames, col)) {
invalidAccesses.push({ type: 'column', accessed: col });
}
else if (typeof col === 'number' && !(0, domain_1.satisfiesLeqInterval)(operandShape.cols, col)) {
invalidAccesses.push({ type: 'column', accessed: col });
}
}
}
return invalidAccesses;
}
//# sourceMappingURL=dataframe-access-validation.js.map