UNPKG

@eagleoutice/flowr

Version:

Static Dataflow Analyzer and Program Slicer for the R Programming Language

116 lines 6.67 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.DATA_FRAME_ACCESS_VALIDATION = void 0; const absint_info_1 = require("../../abstract-interpretation/data-frame/absint-info"); const domain_1 = require("../../abstract-interpretation/data-frame/domain"); const shape_inference_1 = require("../../abstract-interpretation/data-frame/shape-inference"); const config_1 = require("../../config"); const extract_cfg_1 = require("../../control-flow/extract-cfg"); const type_1 = require("../../r-bridge/lang-4.x/ast/model/type"); const flowr_search_builder_1 = require("../../search/flowr-search-builder"); const search_enrichers_1 = require("../../search/search-executor/search-enrichers"); const dfg_1 = require("../../util/mermaid/dfg"); const range_1 = require("../../util/range"); const linter_format_1 = require("../linter-format"); const linter_tags_1 = require("../linter-tags"); exports.DATA_FRAME_ACCESS_VALIDATION = { createSearch: () => flowr_search_builder_1.Q.all().with(search_enrichers_1.Enrichment.CallTargets, { onlyBuiltin: true }), processSearchResult: (elements, config, data) => { const flowrConfig = (0, config_1.amendConfig)(data.config, flowrConfig => { if (config.readLoadedData !== undefined) { flowrConfig.abstractInterpretation.dataFrame.readLoadedData.readExternalFiles = config.readLoadedData; } return flowrConfig; }); const cfg = (0, extract_cfg_1.extractCfg)(data.normalize, flowrConfig, data.dataflow.graph); (0, shape_inference_1.inferDataFrameShapes)(cfg, data.dataflow.graph, data.normalize, flowrConfig); const accessOperations = getAccessOperations(elements); const accesses = []; for (const [nodeId, operations] of accessOperations) { const access = { nodeId }; for (const operation of operations) { access.operand ??= operation.operand; access.operandShape ??= (0, shape_inference_1.resolveIdToDataFrameShape)(operation.operand, data.dataflow.graph); if (operation.operation === 'accessCols' && operation.columns !== undefined) { access.accessedCols ??= []; access.accessedCols.push(...operation.columns); } else if (operation.operation === 'accessRows' && operation.rows !== undefined) { access.accessedRows ??= []; access.accessedRows.push(...operation.rows); } } accesses.push(access); } const operations = [...accessOperations.entries()].flatMap(([, operations]) => operations); const metadata = { numOperations: accessOperations.size, numAccesses: operations.length, totalAccessed: operations .map(operation => operation.operation === 'accessCols' ? operation.columns?.length ?? 0 : operation.rows?.length ?? 0) .reduce((a, b) => a + b, 0) }; const results = accesses .flatMap(access => findInvalidDataFrameAccesses(access) .map(accessed => ({ nodeId: access.nodeId, operand: access.operand, ...accessed }))) .map(({ nodeId, operand, ...accessed }) => ({ ...accessed, node: data.normalize.idMap.get(nodeId), operand: operand !== undefined ? data.normalize.idMap.get(operand) : undefined, })) .map(({ node, operand, ...accessed }) => ({ ...accessed, access: node?.lexeme ?? '???', ...(operand?.type === type_1.RType.Symbol ? { operand: operand.content } : {}), range: node?.info.fullRange ?? node?.location ?? (0, range_1.rangeFrom)(-1, -1, -1, -1), certainty: linter_format_1.LintingResultCertainty.Certain })); return { results, '.meta': metadata }; }, prettyPrint: { [linter_format_1.LintingPrettyPrintContext.Query]: result => `Access of ${result.type} ` + (typeof result.accessed === 'string' ? `"${result.accessed}"` : result.accessed) + ' ' + (result.operand !== undefined ? `of \`${result.operand}\`` : `at \`${result.access}\``) + ` at ${(0, dfg_1.formatRange)(result.range)}`, [linter_format_1.LintingPrettyPrintContext.Full]: result => `Accessed ${result.type} ` + (typeof result.accessed === 'string' ? `"${result.accessed}"` : result.accessed) + ' does not exist ' + (result.operand !== undefined ? `in \`${result.operand}\`` : `at \`${result.access}\``) + ` at ${(0, dfg_1.formatRange)(result.range)}` }, info: { name: 'Dataframe Access Validation', tags: [linter_tags_1.LintingRuleTag.Bug, linter_tags_1.LintingRuleTag.Usability, linter_tags_1.LintingRuleTag.Reproducibility], // this rule is unable to detect all cases of dataframe access, but sufficiently ensures returned results are valid certainty: linter_format_1.LintingRuleCertainty.BestEffort, description: 'Validates the existance of accessed columns and rows of dataframes.', defaultConfig: { readLoadedData: false } } }; function getAccessOperations(elements) { return new Map(elements.getElements() .map(element => element.node) .filter(absint_info_1.hasDataFrameExpressionInfo) .map(node => [node.info.id, node.info.dataFrame.operations .filter(({ operation }) => operation === 'accessCols' || operation === 'accessRows') .map(({ operation, operand, type: _type, options: _options, ...args }) => ({ operation, operand, ...args })) ]) .filter(([, operations]) => operations.length > 0)); } function findInvalidDataFrameAccesses({ operandShape, accessedCols, accessedRows }) { const invalidAccesses = []; if (operandShape !== undefined) { for (const row of accessedRows ?? []) { if (!(0, domain_1.satisfiesLeqInterval)(operandShape.rows, row)) { invalidAccesses.push({ type: 'row', accessed: row }); } } for (const col of accessedCols ?? []) { if (typeof col === 'string' && !(0, domain_1.satisfiesColsNames)(operandShape.colnames, col)) { invalidAccesses.push({ type: 'column', accessed: col }); } else if (typeof col === 'number' && !(0, domain_1.satisfiesLeqInterval)(operandShape.cols, col)) { invalidAccesses.push({ type: 'column', accessed: col }); } } } return invalidAccesses; } //# sourceMappingURL=dataframe-access-validation.js.map