UNPKG

@eagleoutice/flowr

Version:

Static Dataflow Analyzer and Program Slicer for the R Programming Language

363 lines 16.6 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.DataFrameOperationNames = exports.ConstraintType = void 0; exports.applyDataFrameSemantics = applyDataFrameSemantics; exports.getConstraintType = getConstraintType; const assert_1 = require("../../util/assert"); const domain_1 = require("./domain"); /** * Represents the different types of resulting constraints that are inferred by abstract data frame operations. */ var ConstraintType; (function (ConstraintType) { /** The inferred constraints must hold for the operand at the point of the operation */ ConstraintType[ConstraintType["OperandPrecondition"] = 0] = "OperandPrecondition"; /** The inferred constraints are applied to the operand during the operation */ ConstraintType[ConstraintType["OperandModification"] = 1] = "OperandModification"; /** The inferred constraints must hold for the returned result of the operation */ ConstraintType[ConstraintType["ResultPostcondition"] = 2] = "ResultPostcondition"; })(ConstraintType || (exports.ConstraintType = ConstraintType = {})); /** * Mapper for defining the abstract data frame operations and mapping them to semantics applier functions, * including information about the type of the resulting constraints that are inferred by the operation. */ const DataFrameSemanticsMapper = { 'create': { apply: applyCreateSemantics, type: ConstraintType.ResultPostcondition }, 'read': { apply: applyReadSemantics, type: ConstraintType.ResultPostcondition }, 'accessCols': { apply: applyAccessColsSemantics, type: ConstraintType.OperandPrecondition }, 'accessRows': { apply: applyAccessRowsSemantics, type: ConstraintType.OperandPrecondition }, 'assignCols': { apply: applyAssignColsSemantics, type: ConstraintType.OperandModification }, 'assignRows': { apply: applyAssignRowsSemantics, type: ConstraintType.OperandModification }, 'setColNames': { apply: applySetColNamesSemantics, type: ConstraintType.OperandModification }, 'addCols': { apply: applyAddColsSemantics, type: ConstraintType.ResultPostcondition }, 'addRows': { apply: applyAddRowsSemantics, type: ConstraintType.ResultPostcondition }, 'removeCols': { apply: applyRemoveColsSemantics, type: ConstraintType.ResultPostcondition }, 'removeRows': { apply: applyRemoveRowsSemantics, type: ConstraintType.ResultPostcondition }, 'concatCols': { apply: applyConcatColsSemantics, type: ConstraintType.ResultPostcondition }, 'concatRows': { apply: applyConcatRowsSemantics, type: ConstraintType.ResultPostcondition }, 'subsetCols': { apply: applySubsetColsSemantics, type: ConstraintType.ResultPostcondition }, 'subsetRows': { apply: applySubsetRowsSemantics, type: ConstraintType.ResultPostcondition }, 'filterRows': { apply: applyFilterRowsSemantics, type: ConstraintType.ResultPostcondition }, 'mutateCols': { apply: applyMutateColsSemantics, type: ConstraintType.ResultPostcondition }, 'groupBy': { apply: applyGroupBySemantics, type: ConstraintType.ResultPostcondition }, 'summarize': { apply: applySummarizeSemantics, type: ConstraintType.ResultPostcondition }, 'join': { apply: applyJoinSemantics, type: ConstraintType.ResultPostcondition }, 'unknown': { apply: applyUnknownSemantics, type: ConstraintType.ResultPostcondition }, 'identity': { apply: applyIdentitySemantics, type: ConstraintType.ResultPostcondition } }; /** The names of all abstract data frame operations */ exports.DataFrameOperationNames = Object.keys(DataFrameSemanticsMapper); /** * Applies the abstract semantics of an abstract data frame operation with respect to the data frame shape domain. * This expects that all arguments have already been sanitized according to the original concrete data frame function (e.g. by replacing duplicate/invalid column names). * * @param operation - The name of the abstract operation to apply the semantics of * @param value - The abstract data frame shape of the operand of the abstract operation * @param args - The arguments for applying the abstract semantics of the abstract operation * @param options - The optional additional options of the abstract operation * @returns The resulting new data frame shape constraints. * The semantic type of the resulting constraints depends on the {@link ConstraintType} of the abstract operation. */ function applyDataFrameSemantics(operation, value, args, options) { const applier = DataFrameSemanticsMapper[operation]; return applier.apply(value, args, options); } /** * Gets the default resulting constraint type for an abstract data frame operation. */ function getConstraintType(operation) { return DataFrameSemanticsMapper[operation].type; } function applyCreateSemantics(value, { colnames, rows }) { const cols = colnames?.length; return { colnames: colnames?.every(assert_1.isNotUndefined) ? colnames : domain_1.ColNamesTop, cols: cols !== undefined ? [cols, cols] : domain_1.IntervalTop, rows: Array.isArray(rows) ? rows : typeof rows === 'number' ? [rows, rows] : domain_1.IntervalTop }; } function applyReadSemantics(value, { colnames, rows }) { return applyCreateSemantics(value, { colnames, rows }); } function applyAccessColsSemantics(value, { columns }) { if (columns?.every(col => typeof col === 'string')) { return { ...value, colnames: (0, domain_1.joinColNames)(value.colnames, columns) }; } else if (columns?.every(col => typeof col === 'number')) { return { ...value, cols: columns.reduce((a, b) => (0, domain_1.maxInterval)(a, [b, b]), value.cols) }; } return value; } function applyAccessRowsSemantics(value, { rows }) { if (rows !== undefined) { return { ...value, rows: rows.reduce((a, b) => (0, domain_1.maxInterval)(a, [b, b]), value.rows) }; } return value; } function applyAssignColsSemantics(value, { columns }) { if (columns?.every(col => typeof col === 'string')) { const cols = columns.length; return { ...value, colnames: (0, domain_1.joinColNames)(value.colnames, columns), cols: (0, domain_1.maxInterval)((0, domain_1.addInterval)(value.cols, [0, cols]), [cols, cols]) }; } else if (columns?.every(col => typeof col === 'number')) { return { ...value, colnames: domain_1.ColNamesTop, cols: columns.reduce((a, b) => (0, domain_1.maxInterval)(a, [b, b]), value.cols) }; } return { ...value, colnames: domain_1.ColNamesTop, cols: (0, domain_1.extendIntervalToInfinity)(value.cols) }; } function applyAssignRowsSemantics(value, { rows }) { if (rows !== undefined) { return { ...value, rows: rows.reduce((a, b) => (0, domain_1.maxInterval)(a, [b, b]), value.rows) }; } return { ...value, rows: (0, domain_1.extendIntervalToInfinity)(value.rows) }; } function applySetColNamesSemantics(value, { colnames }, options) { if (options?.partial) { return { ...value, colnames: colnames?.every(assert_1.isNotUndefined) ? (0, domain_1.joinColNames)(value.colnames, colnames) : domain_1.ColNamesTop, }; } const cols = colnames?.length; const allColNames = value.cols !== domain_1.IntervalBottom && cols !== undefined && cols >= value.cols[1]; return { ...value, colnames: allColNames && colnames?.every(assert_1.isNotUndefined) ? colnames : domain_1.ColNamesTop, }; } function applyAddColsSemantics(value, { colnames }) { const cols = colnames?.length; return { ...value, colnames: colnames?.every(assert_1.isNotUndefined) ? (0, domain_1.joinColNames)(value.colnames, colnames) : domain_1.ColNamesTop, cols: cols !== undefined ? (0, domain_1.addInterval)(value.cols, [cols, cols]) : (0, domain_1.extendIntervalToInfinity)(value.cols) }; } function applyAddRowsSemantics(value, { rows }) { return { ...value, rows: rows !== undefined ? (0, domain_1.addInterval)(value.rows, [rows, rows]) : (0, domain_1.extendIntervalToInfinity)(value.rows) }; } function applyRemoveColsSemantics(value, { colnames }, options) { const cols = colnames?.length; if (options?.maybe) { return { ...value, colnames: colnames !== undefined ? (0, domain_1.subtractColNames)(value.colnames, colnames.filter(assert_1.isNotUndefined)) : value.colnames, cols: cols !== undefined ? (0, domain_1.subtractInterval)(value.cols, [cols, 0]) : (0, domain_1.extendIntervalToZero)(value.cols) }; } return { ...value, colnames: colnames !== undefined ? (0, domain_1.subtractColNames)(value.colnames, colnames.filter(assert_1.isNotUndefined)) : value.colnames, cols: cols !== undefined ? (0, domain_1.subtractInterval)(value.cols, [cols, cols]) : (0, domain_1.extendIntervalToZero)(value.cols) }; } function applyRemoveRowsSemantics(value, { rows }, options) { if (options?.maybe) { return { ...value, cols: rows !== undefined ? (0, domain_1.subtractInterval)(value.cols, [rows, 0]) : (0, domain_1.extendIntervalToZero)(value.cols) }; } return { ...value, rows: rows !== undefined ? (0, domain_1.subtractInterval)(value.rows, [rows, rows]) : (0, domain_1.extendIntervalToZero)(value.rows) }; } function applyConcatColsSemantics(value, { other }) { return { ...value, colnames: (0, domain_1.joinColNames)(value.colnames, other.colnames), cols: (0, domain_1.addInterval)(value.cols, other.cols) }; } function applyConcatRowsSemantics(value, { other }) { if (value.cols !== domain_1.IntervalBottom && value.cols[0] === 0) { return { ...value, colnames: (0, domain_1.joinColNames)(value.colnames, other.colnames), cols: (0, domain_1.joinInterval)(value.cols, other.cols), rows: (0, domain_1.addInterval)(value.rows, other.rows) }; } return { ...value, rows: (0, domain_1.addInterval)(value.rows, other.rows) }; } function applySubsetColsSemantics(value, { colnames }, options) { const cols = colnames?.length; if (options?.duplicateCols) { return { ...value, colnames: domain_1.ColNamesTop, cols: cols !== undefined ? [cols, cols] : domain_1.IntervalTop }; } else if (options?.renamedCols) { return { ...value, colnames: domain_1.ColNamesTop, cols: cols !== undefined ? (0, domain_1.minInterval)(value.cols, [cols, cols]) : (0, domain_1.extendIntervalToZero)(value.cols) }; } return { ...value, colnames: colnames?.every(assert_1.isNotUndefined) ? (0, domain_1.meetColNames)(value.colnames, colnames) : value.colnames, cols: cols !== undefined ? (0, domain_1.minInterval)(value.cols, [cols, cols]) : (0, domain_1.extendIntervalToZero)(value.cols) }; } function applySubsetRowsSemantics(value, { rows }, options) { if (options?.duplicateRows) { return { ...value, rows: rows !== undefined ? [rows, rows] : domain_1.IntervalTop }; } return { ...value, rows: rows !== undefined ? (0, domain_1.minInterval)(value.rows, [rows, rows]) : (0, domain_1.extendIntervalToZero)(value.rows) }; } function applyFilterRowsSemantics(value, { condition }) { return { ...value, rows: condition ? value.rows : condition === false ? [0, 0] : (0, domain_1.extendIntervalToZero)(value.rows) }; } function applyMutateColsSemantics(value, { colnames }) { const cols = colnames?.length; return { ...value, colnames: colnames?.every(assert_1.isNotUndefined) ? (0, domain_1.joinColNames)(value.colnames, colnames) : domain_1.ColNamesTop, cols: cols !== undefined ? (0, domain_1.maxInterval)((0, domain_1.addInterval)(value.cols, [0, cols]), [cols, cols]) : (0, domain_1.extendIntervalToInfinity)(value.cols) }; } function applyGroupBySemantics(value, { by }, options) { if (options?.mutatedCols) { return { ...value, colnames: by.every(assert_1.isNotUndefined) ? (0, domain_1.joinColNames)(value.colnames, by) : domain_1.ColNamesTop, cols: (0, domain_1.addInterval)(value.cols, [0, by.length]) }; } // Group by only marks columns as groups but does not change the shape itself return value; } function applySummarizeSemantics(value, { colnames }) { const cols = colnames?.length; return { ...value, colnames: colnames?.every(assert_1.isNotUndefined) ? (0, domain_1.joinColNames)(value.colnames, colnames) : domain_1.ColNamesTop, cols: cols !== undefined ? (0, domain_1.minInterval)((0, domain_1.addInterval)(value.cols, [0, cols]), [cols, Infinity]) : (0, domain_1.extendIntervalToInfinity)(value.rows), rows: (0, domain_1.maxInterval)((0, domain_1.minInterval)(value.rows, [1, Infinity]), [0, 1]) }; } function applyJoinSemantics(value, { other, by }, options) { // Merge two intervals by creating the maximum of the lower bounds and adding the upper bounds const mergeInterval = (interval1, interval2) => { if (interval1 === domain_1.IntervalBottom || interval2 === domain_1.IntervalBottom) { return domain_1.IntervalBottom; } else { return [Math.max(interval1[0], interval2[0]), interval1[1] + interval2[1]]; } }; // Creating the Cartesian product of two intervals by keeping the lower bound and multiplying the upper bounds const productInterval = (lower, interval1, interval2) => { if (lower === domain_1.IntervalBottom || interval1 === domain_1.IntervalBottom || interval2 === domain_1.IntervalBottom) { return domain_1.IntervalBottom; } else { return [lower[0], interval1[1] * interval2[1]]; } }; const commonCols = (0, domain_1.meetColNames)(value.colnames, other.colnames); let duplicateCols; // whether columns may be renamed due to occurrence in both data frames let productRows; // whether the resulting rows may be a Cartesian product of the rows of the data frames if (options?.natural) { duplicateCols = false; productRows = commonCols !== domain_1.ColNamesTop && commonCols.length === 0; } else if (by === undefined) { duplicateCols = true; productRows = true; } else if (by.length === 0) { duplicateCols = commonCols === domain_1.ColNamesTop || commonCols.length > 0; productRows = true; } else if (by.every(assert_1.isNotUndefined)) { const remainingCols = (0, domain_1.subtractColNames)(commonCols, by); duplicateCols = remainingCols === domain_1.ColNamesTop || remainingCols.length > 0; productRows = false; } else { duplicateCols = true; productRows = false; } const joinType = options?.join ?? 'inner'; let rows; switch (joinType) { case 'inner': rows = (0, domain_1.extendIntervalToZero)((0, domain_1.minInterval)(value.rows, other.rows)); break; case 'left': rows = value.rows; break; case 'right': rows = other.rows; break; case 'full': rows = mergeInterval(value.rows, other.rows); break; default: (0, assert_1.assertUnreachable)(joinType); } const byCols = by?.length; return { ...value, colnames: duplicateCols ? domain_1.ColNamesTop : (0, domain_1.joinColNames)(value.colnames, other.colnames), cols: byCols !== undefined ? (0, domain_1.subtractInterval)((0, domain_1.addInterval)(value.cols, other.cols), [byCols, byCols]) : mergeInterval(value.cols, other.cols), rows: productRows ? productInterval(rows, value.rows, other.rows) : rows }; } function applyIdentitySemantics(value, // eslint-disable-next-line @typescript-eslint/no-empty-object-type _args) { return value; } function applyUnknownSemantics(_value, // eslint-disable-next-line @typescript-eslint/no-empty-object-type _args) { return domain_1.DataFrameTop; } //# sourceMappingURL=semantics.js.map