UNPKG

@eagleoutice/flowr

Version:

Static Dataflow Analyzer and Program Slicer for the R Programming Language

120 lines 7.03 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.DATA_FRAME_ACCESS_VALIDATION = void 0; const shape_inference_1 = require("../../abstract-interpretation/data-frame/shape-inference"); const satisfiable_domain_1 = require("../../abstract-interpretation/domains/satisfiable-domain"); const cfg_kind_1 = require("../../project/cfg-kind"); const type_1 = require("../../r-bridge/lang-4.x/ast/model/type"); const flowr_search_builder_1 = require("../../search/flowr-search-builder"); const search_enrichers_1 = require("../../search/search-executor/search-enrichers"); const logic_1 = require("../../util/logic"); const range_1 = require("../../util/range"); const linter_format_1 = require("../linter-format"); const linter_tags_1 = require("../linter-tags"); const identifier_1 = require("../../dataflow/environments/identifier"); const config_1 = require("../../config"); exports.DATA_FRAME_ACCESS_VALIDATION = { createSearch: () => flowr_search_builder_1.Q.all().with(search_enrichers_1.Enrichment.CallTargets, { onlyBuiltin: true }), processSearchResult: async (elements, config, data) => { let ctx = data.analyzer.inspectContext(); ctx = { ...ctx, config: config_1.FlowrConfig.amend(data.analyzer.flowrConfig, flowrConfig => { if (config.readLoadedData !== undefined) { flowrConfig.abstractInterpretation.dataFrame.readLoadedData.readExternalFiles = config.readLoadedData; } return flowrConfig; }) }; const cfg = await data.analyzer.controlflow(undefined, cfg_kind_1.CfgKind.NoFunctionDefs); const inference = new shape_inference_1.DataFrameShapeInferenceVisitor({ controlFlow: cfg, dfg: data.dataflow.graph, normalizedAst: data.normalize, ctx }); inference.start(); const accessOperations = getAccessOperations(elements, inference); const accesses = []; for (const [nodeId, operations] of accessOperations) { const access = { nodeId }; for (const operation of operations) { access.operand ??= operation.operand; access.operandShape ??= inference.getAbstractValue(operation.operand); if (operation.operation === 'accessCols' && operation.columns !== undefined) { access.accessedCols ??= []; access.accessedCols.push(...operation.columns); } else if (operation.operation === 'accessRows' && operation.rows !== undefined) { access.accessedRows ??= []; access.accessedRows.push(...operation.rows); } } accesses.push(access); } const operations = accessOperations.entries().flatMap(([, operations]) => operations).toArray(); const metadata = { numOperations: accessOperations.size, numAccesses: operations.length, totalAccessed: operations .map(operation => operation.operation === 'accessCols' ? operation.columns?.length ?? 0 : operation.rows?.length ?? 0) .reduce((a, b) => a + b, 0) }; const results = accesses .flatMap(access => findInvalidDataFrameAccesses(access) .map(accessed => ({ nodeId: access.nodeId, operand: access.operand, ...accessed }))) .map(({ nodeId, operand, ...accessed }) => ({ ...accessed, node: data.normalize.idMap.get(nodeId), operand: operand === undefined ? undefined : data.normalize.idMap.get(operand), })) .map(({ node, operand, ...accessed }) => ({ ...accessed, involvedId: node?.info.id, access: node?.lexeme ?? '???', ...(operand?.type === type_1.RType.Symbol ? { operand: identifier_1.Identifier.getName(operand.content) } : {}), range: range_1.SourceRange.fromNode(node) ?? range_1.SourceRange.invalid(), certainty: linter_format_1.LintingResultCertainty.Certain })); return { results, '.meta': metadata }; }, prettyPrint: { [linter_format_1.LintingPrettyPrintContext.Query]: result => `Access of ${result.type} ` + (typeof result.accessed === 'string' ? `"${result.accessed}"` : result.accessed) + ' ' + (result.operand === undefined ? `at \`${result.access}\`` : `of \`${result.operand}\``) + ` at ${range_1.SourceRange.format(result.range)}`, [linter_format_1.LintingPrettyPrintContext.Full]: result => `Accessed ${result.type} ` + (typeof result.accessed === 'string' ? `"${result.accessed}"` : result.accessed) + ' does not exist ' + (result.operand === undefined ? `at \`${result.access}\`` : `in \`${result.operand}\``) + ` at ${range_1.SourceRange.format(result.range)}` }, info: { name: 'Dataframe Access Validation', tags: [linter_tags_1.LintingRuleTag.Bug, linter_tags_1.LintingRuleTag.Usability, linter_tags_1.LintingRuleTag.Reproducibility], // this rule is unable to detect all cases of dataframe access, but sufficiently ensures returned results are valid certainty: linter_format_1.LintingRuleCertainty.BestEffort, description: 'Validates the existence of accessed columns and rows of dataframes.', defaultConfig: { readLoadedData: false } } }; function getAccessOperations(elements, inference) { return new Map(elements.getElements() .map(element => [element.node.info.id, inference.getAbstractOperations(element.node.info.id) ?.filter(({ operation }) => operation === 'accessCols' || operation === 'accessRows') .map(({ operation, operand, type: _type, options: _options, ...args }) => ({ operation, operand, ...args })) ?? [] ]) .filter(([, operations]) => operations.length > 0)); } function findInvalidDataFrameAccesses({ operandShape, accessedCols, accessedRows }) { const invalidAccesses = []; if (operandShape !== undefined) { for (const row of accessedRows ?? []) { if (operandShape.rows.satisfies(row, satisfiable_domain_1.NumericalComparator.LessOrEqual) === logic_1.Ternary.Never) { invalidAccesses.push({ type: 'row', accessed: row }); } } for (const col of accessedCols ?? []) { if (typeof col === 'string' && operandShape.colnames.satisfies([col], satisfiable_domain_1.SetComparator.SubsetOrEqual) === logic_1.Ternary.Never) { invalidAccesses.push({ type: 'column', accessed: col }); } else if (typeof col === 'number' && operandShape.cols.satisfies(col, satisfiable_domain_1.NumericalComparator.LessOrEqual) === logic_1.Ternary.Never) { invalidAccesses.push({ type: 'column', accessed: col }); } } } return invalidAccesses; } //# sourceMappingURL=dataframe-access-validation.js.map