UNPKG

@eagleoutice/flowr

Version:

Static Dataflow Analyzer and Program Slicer for the R Programming Language

384 lines 18.6 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.DataFrameOperationNames = exports.ConstraintType = void 0; exports.applyDataFrameSemantics = applyDataFrameSemantics; exports.getConstraintType = getConstraintType; const assert_1 = require("../../util/assert"); const lattice_1 = require("../domains/lattice"); const positive_interval_domain_1 = require("../domains/positive-interval-domain"); const dataframe_domain_1 = require("./dataframe-domain"); /** * Represents the different types of resulting constraints that are inferred by abstract data frame operations. */ var ConstraintType; (function (ConstraintType) { /** The inferred constraints must hold for the operand at the point of the operation */ ConstraintType[ConstraintType["OperandPrecondition"] = 0] = "OperandPrecondition"; /** The inferred constraints are applied to the operand during the operation */ ConstraintType[ConstraintType["OperandModification"] = 1] = "OperandModification"; /** The inferred constraints must hold for the returned result of the operation */ ConstraintType[ConstraintType["ResultPostcondition"] = 2] = "ResultPostcondition"; })(ConstraintType || (exports.ConstraintType = ConstraintType = {})); /** * Mapper for defining the abstract data frame operations and mapping them to semantics applier functions, * including information about the type of the resulting constraints that are inferred by the operation. */ const DataFrameSemanticsMapper = { 'create': { apply: applyCreateSemantics, type: ConstraintType.ResultPostcondition }, 'read': { apply: applyReadSemantics, type: ConstraintType.ResultPostcondition }, 'accessCols': { apply: applyAccessColsSemantics, type: ConstraintType.OperandPrecondition }, 'accessRows': { apply: applyAccessRowsSemantics, type: ConstraintType.OperandPrecondition }, 'assignCols': { apply: applyAssignColsSemantics, type: ConstraintType.OperandModification }, 'assignRows': { apply: applyAssignRowsSemantics, type: ConstraintType.OperandModification }, 'setColNames': { apply: applySetColNamesSemantics, type: ConstraintType.OperandModification }, 'addCols': { apply: applyAddColsSemantics, type: ConstraintType.ResultPostcondition }, 'addRows': { apply: applyAddRowsSemantics, type: ConstraintType.ResultPostcondition }, 'removeCols': { apply: applyRemoveColsSemantics, type: ConstraintType.ResultPostcondition }, 'removeRows': { apply: applyRemoveRowsSemantics, type: ConstraintType.ResultPostcondition }, 'concatCols': { apply: applyConcatColsSemantics, type: ConstraintType.ResultPostcondition }, 'concatRows': { apply: applyConcatRowsSemantics, type: ConstraintType.ResultPostcondition }, 'subsetCols': { apply: applySubsetColsSemantics, type: ConstraintType.ResultPostcondition }, 'subsetRows': { apply: applySubsetRowsSemantics, type: ConstraintType.ResultPostcondition }, 'filterRows': { apply: applyFilterRowsSemantics, type: ConstraintType.ResultPostcondition }, 'mutateCols': { apply: applyMutateColsSemantics, type: ConstraintType.ResultPostcondition }, 'groupBy': { apply: applyGroupBySemantics, type: ConstraintType.ResultPostcondition }, 'summarize': { apply: applySummarizeSemantics, type: ConstraintType.ResultPostcondition }, 'join': { apply: applyJoinSemantics, type: ConstraintType.ResultPostcondition }, 'unknown': { apply: applyUnknownSemantics, type: ConstraintType.ResultPostcondition }, 'identity': { apply: applyIdentitySemantics, type: ConstraintType.ResultPostcondition } }; /** The names of all abstract data frame operations */ exports.DataFrameOperationNames = Object.keys(DataFrameSemanticsMapper); /** * Applies the abstract semantics of an abstract data frame operation with respect to the data frame shape domain. * This expects that all arguments have already been sanitized according to the original concrete data frame function (e.g. by replacing duplicate/invalid column names). * @param operation - The name of the abstract operation to apply the semantics of * @param value - The abstract data frame shape of the operand of the abstract operation * @param args - The arguments for applying the abstract semantics of the abstract operation * @param options - The optional additional options of the abstract operation * @returns The resulting new data frame shape constraints. * The semantic type of the resulting constraints depends on the {@link ConstraintType} of the abstract operation. */ function applyDataFrameSemantics(operation, value, args, options) { const applier = DataFrameSemanticsMapper[operation]; return applier.apply(value, args, options); } /** * Gets the default resulting constraint type for an abstract data frame operation. */ function getConstraintType(operation) { return DataFrameSemanticsMapper[operation].type; } function applyCreateSemantics(value, { colnames, rows }) { const colnamesValue = setRange(colnames); const colsValue = colnames !== undefined ? [colnames.length, colnames.length] : positive_interval_domain_1.PosIntervalTop; const rowsValue = Array.isArray(rows) ? rows : typeof rows === 'number' ? [rows, rows] : positive_interval_domain_1.PosIntervalTop; return new dataframe_domain_1.DataFrameDomain({ colnames: value.colnames.create(colnamesValue), cols: value.cols.create(colsValue), rows: value.rows.create(rowsValue) }); } function applyReadSemantics(value, { colnames, rows }) { return applyCreateSemantics(value, { colnames, rows }); } function applyAccessColsSemantics(value, { columns }) { if (columns?.every(col => typeof col === 'string')) { return new dataframe_domain_1.DataFrameDomain({ colnames: value.colnames.union(setRange(columns)), cols: value.cols, rows: value.rows }); } else if (columns?.every(col => typeof col === 'number')) { return new dataframe_domain_1.DataFrameDomain({ colnames: value.colnames, cols: columns.reduce((current, col) => current.max([col, col]), value.cols), rows: value.rows }); } return value; } function applyAccessRowsSemantics(value, { rows }) { if (rows !== undefined) { return new dataframe_domain_1.DataFrameDomain({ colnames: value.colnames, cols: value.cols, rows: rows.reduce((current, row) => current.max([row, row]), value.rows) }); } return value; } function applyAssignColsSemantics(value, { columns }) { if (columns?.every(col => typeof col === 'string')) { return new dataframe_domain_1.DataFrameDomain({ colnames: value.colnames.union(setRange(columns)), cols: value.cols.add([0, columns.length]).max([columns.length, columns.length]), rows: value.rows }); } else if (columns?.every(col => typeof col === 'number')) { return new dataframe_domain_1.DataFrameDomain({ colnames: value.colnames.widenUp(), cols: columns.reduce((current, col) => current.max([col, col]), value.cols), rows: value.rows }); } return new dataframe_domain_1.DataFrameDomain({ colnames: value.colnames.widenUp(), cols: value.cols.widenUp(), rows: value.rows }); } function applyAssignRowsSemantics(value, { rows }) { if (rows !== undefined) { return new dataframe_domain_1.DataFrameDomain({ colnames: value.colnames, cols: value.cols, rows: rows.reduce((current, row) => current.max([row, row]), value.rows) }); } return new dataframe_domain_1.DataFrameDomain({ colnames: value.colnames, cols: value.cols, rows: value.rows.widenUp() }); } function applySetColNamesSemantics(value, { colnames }, options) { if (options?.partial) { return new dataframe_domain_1.DataFrameDomain({ colnames: value.colnames.widenDown().union(setRange(colnames)), cols: value.cols, rows: value.rows }); } const allColNames = colnames?.every(assert_1.isNotUndefined) && value.cols.value !== lattice_1.Bottom && colnames.length >= value.cols.value[1]; return new dataframe_domain_1.DataFrameDomain({ colnames: allColNames ? value.colnames.create(setRange(colnames)) : value.colnames.create(setRange(colnames)).widenUp(), cols: value.cols, rows: value.rows }); } function applyAddColsSemantics(value, { colnames }) { return new dataframe_domain_1.DataFrameDomain({ colnames: colnames !== undefined ? value.colnames.union(setRange(colnames)) : value.colnames.widenUp(), cols: colnames !== undefined ? value.cols.add([colnames.length, colnames.length]) : value.cols.widenUp(), rows: value.rows }); } function applyAddRowsSemantics(value, { rows }) { if (value.cols.value !== lattice_1.Bottom && value.cols.value[0] === 0) { return new dataframe_domain_1.DataFrameDomain({ ...value, colnames: value.colnames.top(), cols: rows !== undefined ? value.cols.add([1, 1]) : value.cols.top(), rows: rows !== undefined ? value.rows.add([rows, rows]) : value.rows.widenUp() }); } return new dataframe_domain_1.DataFrameDomain({ colnames: value.colnames, cols: value.cols, rows: rows !== undefined ? value.rows.add([rows, rows]) : value.rows.widenUp() }); } function applyRemoveColsSemantics(value, { colnames }, options) { if (options?.maybe) { return new dataframe_domain_1.DataFrameDomain({ colnames: colnames !== undefined ? value.colnames.subtract(setRange(colnames)) : value.colnames.widenDown(), cols: colnames !== undefined ? value.cols.subtract([colnames.length, 0]) : value.cols.widenDown(), rows: value.rows }); } return new dataframe_domain_1.DataFrameDomain({ colnames: colnames !== undefined ? value.colnames.subtract(setRange(colnames)) : value.colnames.widenDown(), cols: colnames !== undefined ? value.cols.subtract([colnames.length, colnames.length]) : value.cols.widenDown(), rows: value.rows }); } function applyRemoveRowsSemantics(value, { rows }, options) { if (options?.maybe) { return new dataframe_domain_1.DataFrameDomain({ colnames: value.colnames, cols: value.cols, rows: rows !== undefined ? value.rows.subtract([rows, 0]) : value.rows.widenDown() }); } return new dataframe_domain_1.DataFrameDomain({ colnames: value.colnames, cols: value.cols, rows: rows !== undefined ? value.rows.subtract([rows, rows]) : value.rows.widenDown() }); } function applyConcatColsSemantics(value, { other }) { return new dataframe_domain_1.DataFrameDomain({ colnames: value.colnames.union(other.colnames), cols: value.cols.add(other.cols), rows: value.rows }); } function applyConcatRowsSemantics(value, { other }) { if (value.cols.value !== lattice_1.Bottom && value.cols.value[0] === 0) { return new dataframe_domain_1.DataFrameDomain({ ...value, colnames: value.colnames.join(other.colnames), cols: value.cols.join(other.cols), rows: value.rows.add(other.rows) }); } return new dataframe_domain_1.DataFrameDomain({ colnames: value.colnames, cols: value.cols, rows: value.rows.add(other.rows) }); } function applySubsetColsSemantics(value, { colnames }, options) { if (options?.duplicateCols) { return new dataframe_domain_1.DataFrameDomain({ colnames: value.colnames.top(), cols: colnames !== undefined ? value.cols.create([colnames.length, colnames.length]) : value.cols.top(), rows: value.rows }); } else if (options?.renamedCols) { return new dataframe_domain_1.DataFrameDomain({ colnames: value.colnames.top(), cols: colnames !== undefined ? value.cols.min([colnames.length, colnames.length]) : value.cols.widenDown(), rows: value.rows }); } return new dataframe_domain_1.DataFrameDomain({ colnames: colnames !== undefined ? value.colnames.intersect(setRange(colnames)) : value.colnames.widenDown(), cols: colnames !== undefined ? value.cols.min([colnames.length, colnames.length]) : value.cols.widenDown(), rows: value.rows }); } function applySubsetRowsSemantics(value, { rows }, options) { if (options?.duplicateRows) { return new dataframe_domain_1.DataFrameDomain({ colnames: value.colnames, cols: value.cols, rows: rows !== undefined ? value.rows.create([rows, rows]) : value.rows.top() }); } return new dataframe_domain_1.DataFrameDomain({ colnames: value.colnames, cols: value.cols, rows: rows !== undefined ? value.rows.min([rows, rows]) : value.rows.widenDown() }); } function applyFilterRowsSemantics(value, { condition }) { return new dataframe_domain_1.DataFrameDomain({ colnames: value.colnames, cols: value.cols, rows: condition ? value.rows : condition === false ? value.rows.create([0, 0]) : value.rows.widenDown() }); } function applyMutateColsSemantics(value, { colnames }) { return new dataframe_domain_1.DataFrameDomain({ colnames: colnames !== undefined ? value.colnames.union(setRange(colnames)) : value.colnames.widenUp(), cols: colnames !== undefined ? value.cols.add([0, colnames.length]).max([colnames.length, colnames.length]) : value.cols.widenUp(), rows: value.rows }); } function applyGroupBySemantics(value, { by }, options) { if (options?.mutatedCols) { return new dataframe_domain_1.DataFrameDomain({ colnames: value.colnames.union(setRange(by)), cols: value.cols.add([0, by.length]), rows: value.rows }); } // Group by only marks columns as groups but does not change the shape itself return value; } function applySummarizeSemantics(value, { colnames }) { return new dataframe_domain_1.DataFrameDomain({ colnames: colnames !== undefined ? value.colnames.join(setRange([])).union(setRange(colnames)) : value.colnames.widenUp(), cols: colnames !== undefined ? value.cols.add([0, colnames.length]).min([colnames.length, +Infinity]) : value.cols.widenUp(), rows: value.rows.min([1, +Infinity]).max([0, 1]) }); } function applyJoinSemantics(value, { other, by }, options) { // Merge two intervals by creating the maximum of the lower bounds and adding the upper bounds const mergeInterval = (interval1, interval2) => { if (interval1.value === lattice_1.Bottom || interval2.value === lattice_1.Bottom) { return interval1.bottom(); } else { return new positive_interval_domain_1.PosIntervalDomain([Math.max(interval1.value[0], interval2.value[0]), interval1.value[1] + interval2.value[1]]); } }; // Creating the Cartesian product of two intervals by keeping the lower bound and multiplying the upper bounds const productInterval = (lower, interval1, interval2) => { if (lower.value === lattice_1.Bottom || interval1.value === lattice_1.Bottom || interval2.value === lattice_1.Bottom) { return lower.bottom(); } else { return new positive_interval_domain_1.PosIntervalDomain([lower.value[0], interval1.value[1] * interval2.value[1]]); } }; let duplicateCols; // columns that may be renamed due to occurring in both data frames let productRows; // whether the resulting rows may be a Cartesian product of the rows of the data frames if (options?.natural) { const commonCols = value.colnames.intersect(other.colnames).upper(); duplicateCols = []; productRows = commonCols !== lattice_1.Bottom && commonCols !== lattice_1.Top && commonCols.size === 0; } else if (by === undefined) { duplicateCols = undefined; productRows = true; } else if (by.length === 0) { const commonCols = value.colnames.intersect(other.colnames).upper(); duplicateCols = commonCols !== lattice_1.Bottom ? commonCols !== lattice_1.Top ? [...commonCols] : undefined : []; productRows = true; } else if (by.every(assert_1.isNotUndefined)) { const remainingCols = value.colnames.intersect(other.colnames).subtract(setRange(by)).upper(); duplicateCols = remainingCols !== lattice_1.Bottom ? remainingCols !== lattice_1.Top ? [...remainingCols] : undefined : []; productRows = false; } else { duplicateCols = undefined; productRows = false; } const joinType = options?.join ?? 'inner'; let rows; switch (joinType) { case 'inner': rows = value.rows.max(other.rows).widenDown(); break; case 'left': rows = value.rows.max(other.rows.isValue() ? [0, other.rows.value[1]] : lattice_1.Bottom); break; case 'right': rows = other.rows.max(value.rows.isValue() ? [0, value.rows.value[1]] : lattice_1.Bottom); break; case 'full': rows = mergeInterval(value.rows, other.rows); break; default: (0, assert_1.assertUnreachable)(joinType); } return new dataframe_domain_1.DataFrameDomain({ ...value, colnames: duplicateCols === undefined ? value.colnames.top() : duplicateCols.length > 0 ? value.colnames.union(other.colnames).subtract(setRange(duplicateCols)).widenUp() : value.colnames.union(other.colnames), cols: by !== undefined ? value.cols.add(other.cols).subtract([by.length, by.length]) : mergeInterval(value.cols, other.cols), rows: productRows ? productInterval(rows, value.rows, other.rows) : rows }); } function applyIdentitySemantics(value, // eslint-disable-next-line @typescript-eslint/no-empty-object-type _args) { return value; } function applyUnknownSemantics(value, // eslint-disable-next-line @typescript-eslint/no-empty-object-type _args) { return value.top(); } function setRange(colnames) { const names = colnames?.filter(assert_1.isNotUndefined) ?? []; return { min: names, range: names.length === colnames?.length ? [] : lattice_1.Top }; } //# sourceMappingURL=semantics.js.map