@eagleoutice/flowr
Version:
Static Dataflow Analyzer and Program Slicer for the R Programming Language
363 lines • 16.6 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.DataFrameOperationNames = exports.ConstraintType = void 0;
exports.applyDataFrameSemantics = applyDataFrameSemantics;
exports.getConstraintType = getConstraintType;
const assert_1 = require("../../util/assert");
const domain_1 = require("./domain");
/**
* Represents the different types of resulting constraints that are inferred by abstract data frame operations.
*/
var ConstraintType;
(function (ConstraintType) {
/** The inferred constraints must hold for the operand at the point of the operation */
ConstraintType[ConstraintType["OperandPrecondition"] = 0] = "OperandPrecondition";
/** The inferred constraints are applied to the operand during the operation */
ConstraintType[ConstraintType["OperandModification"] = 1] = "OperandModification";
/** The inferred constraints must hold for the returned result of the operation */
ConstraintType[ConstraintType["ResultPostcondition"] = 2] = "ResultPostcondition";
})(ConstraintType || (exports.ConstraintType = ConstraintType = {}));
/**
* Mapper for defining the abstract data frame operations and mapping them to semantics applier functions,
* including information about the type of the resulting constraints that are inferred by the operation.
*/
const DataFrameSemanticsMapper = {
'create': { apply: applyCreateSemantics, type: ConstraintType.ResultPostcondition },
'read': { apply: applyReadSemantics, type: ConstraintType.ResultPostcondition },
'accessCols': { apply: applyAccessColsSemantics, type: ConstraintType.OperandPrecondition },
'accessRows': { apply: applyAccessRowsSemantics, type: ConstraintType.OperandPrecondition },
'assignCols': { apply: applyAssignColsSemantics, type: ConstraintType.OperandModification },
'assignRows': { apply: applyAssignRowsSemantics, type: ConstraintType.OperandModification },
'setColNames': { apply: applySetColNamesSemantics, type: ConstraintType.OperandModification },
'addCols': { apply: applyAddColsSemantics, type: ConstraintType.ResultPostcondition },
'addRows': { apply: applyAddRowsSemantics, type: ConstraintType.ResultPostcondition },
'removeCols': { apply: applyRemoveColsSemantics, type: ConstraintType.ResultPostcondition },
'removeRows': { apply: applyRemoveRowsSemantics, type: ConstraintType.ResultPostcondition },
'concatCols': { apply: applyConcatColsSemantics, type: ConstraintType.ResultPostcondition },
'concatRows': { apply: applyConcatRowsSemantics, type: ConstraintType.ResultPostcondition },
'subsetCols': { apply: applySubsetColsSemantics, type: ConstraintType.ResultPostcondition },
'subsetRows': { apply: applySubsetRowsSemantics, type: ConstraintType.ResultPostcondition },
'filterRows': { apply: applyFilterRowsSemantics, type: ConstraintType.ResultPostcondition },
'mutateCols': { apply: applyMutateColsSemantics, type: ConstraintType.ResultPostcondition },
'groupBy': { apply: applyGroupBySemantics, type: ConstraintType.ResultPostcondition },
'summarize': { apply: applySummarizeSemantics, type: ConstraintType.ResultPostcondition },
'join': { apply: applyJoinSemantics, type: ConstraintType.ResultPostcondition },
'unknown': { apply: applyUnknownSemantics, type: ConstraintType.ResultPostcondition },
'identity': { apply: applyIdentitySemantics, type: ConstraintType.ResultPostcondition }
};
/** The names of all abstract data frame operations */
exports.DataFrameOperationNames = Object.keys(DataFrameSemanticsMapper);
/**
* Applies the abstract semantics of an abstract data frame operation with respect to the data frame shape domain.
* This expects that all arguments have already been sanitized according to the original concrete data frame function (e.g. by replacing duplicate/invalid column names).
*
* @param operation - The name of the abstract operation to apply the semantics of
* @param value - The abstract data frame shape of the operand of the abstract operation
* @param args - The arguments for applying the abstract semantics of the abstract operation
* @param options - The optional additional options of the abstract operation
* @returns The resulting new data frame shape constraints.
* The semantic type of the resulting constraints depends on the {@link ConstraintType} of the abstract operation.
*/
function applyDataFrameSemantics(operation, value, args, options) {
const applier = DataFrameSemanticsMapper[operation];
return applier.apply(value, args, options);
}
/**
* Gets the default resulting constraint type for an abstract data frame operation.
*/
function getConstraintType(operation) {
return DataFrameSemanticsMapper[operation].type;
}
function applyCreateSemantics(value, { colnames, rows }) {
const cols = colnames?.length;
return {
colnames: colnames?.every(assert_1.isNotUndefined) ? colnames : domain_1.ColNamesTop,
cols: cols !== undefined ? [cols, cols] : domain_1.IntervalTop,
rows: Array.isArray(rows) ? rows : typeof rows === 'number' ? [rows, rows] : domain_1.IntervalTop
};
}
function applyReadSemantics(value, { colnames, rows }) {
return applyCreateSemantics(value, { colnames, rows });
}
function applyAccessColsSemantics(value, { columns }) {
if (columns?.every(col => typeof col === 'string')) {
return {
...value,
colnames: (0, domain_1.joinColNames)(value.colnames, columns)
};
}
else if (columns?.every(col => typeof col === 'number')) {
return {
...value,
cols: columns.reduce((a, b) => (0, domain_1.maxInterval)(a, [b, b]), value.cols)
};
}
return value;
}
function applyAccessRowsSemantics(value, { rows }) {
if (rows !== undefined) {
return {
...value,
rows: rows.reduce((a, b) => (0, domain_1.maxInterval)(a, [b, b]), value.rows)
};
}
return value;
}
function applyAssignColsSemantics(value, { columns }) {
if (columns?.every(col => typeof col === 'string')) {
const cols = columns.length;
return {
...value,
colnames: (0, domain_1.joinColNames)(value.colnames, columns),
cols: (0, domain_1.maxInterval)((0, domain_1.addInterval)(value.cols, [0, cols]), [cols, cols])
};
}
else if (columns?.every(col => typeof col === 'number')) {
return {
...value,
colnames: domain_1.ColNamesTop,
cols: columns.reduce((a, b) => (0, domain_1.maxInterval)(a, [b, b]), value.cols)
};
}
return {
...value,
colnames: domain_1.ColNamesTop,
cols: (0, domain_1.extendIntervalToInfinity)(value.cols)
};
}
function applyAssignRowsSemantics(value, { rows }) {
if (rows !== undefined) {
return {
...value,
rows: rows.reduce((a, b) => (0, domain_1.maxInterval)(a, [b, b]), value.rows)
};
}
return {
...value,
rows: (0, domain_1.extendIntervalToInfinity)(value.rows)
};
}
function applySetColNamesSemantics(value, { colnames }, options) {
if (options?.partial) {
return {
...value,
colnames: colnames?.every(assert_1.isNotUndefined) ? (0, domain_1.joinColNames)(value.colnames, colnames) : domain_1.ColNamesTop,
};
}
const cols = colnames?.length;
const allColNames = value.cols !== domain_1.IntervalBottom && cols !== undefined && cols >= value.cols[1];
return {
...value,
colnames: allColNames && colnames?.every(assert_1.isNotUndefined) ? colnames : domain_1.ColNamesTop,
};
}
function applyAddColsSemantics(value, { colnames }) {
const cols = colnames?.length;
return {
...value,
colnames: colnames?.every(assert_1.isNotUndefined) ? (0, domain_1.joinColNames)(value.colnames, colnames) : domain_1.ColNamesTop,
cols: cols !== undefined ? (0, domain_1.addInterval)(value.cols, [cols, cols]) : (0, domain_1.extendIntervalToInfinity)(value.cols)
};
}
function applyAddRowsSemantics(value, { rows }) {
return {
...value,
rows: rows !== undefined ? (0, domain_1.addInterval)(value.rows, [rows, rows]) : (0, domain_1.extendIntervalToInfinity)(value.rows)
};
}
function applyRemoveColsSemantics(value, { colnames }, options) {
const cols = colnames?.length;
if (options?.maybe) {
return {
...value,
colnames: colnames !== undefined ? (0, domain_1.subtractColNames)(value.colnames, colnames.filter(assert_1.isNotUndefined)) : value.colnames,
cols: cols !== undefined ? (0, domain_1.subtractInterval)(value.cols, [cols, 0]) : (0, domain_1.extendIntervalToZero)(value.cols)
};
}
return {
...value,
colnames: colnames !== undefined ? (0, domain_1.subtractColNames)(value.colnames, colnames.filter(assert_1.isNotUndefined)) : value.colnames,
cols: cols !== undefined ? (0, domain_1.subtractInterval)(value.cols, [cols, cols]) : (0, domain_1.extendIntervalToZero)(value.cols)
};
}
function applyRemoveRowsSemantics(value, { rows }, options) {
if (options?.maybe) {
return {
...value,
cols: rows !== undefined ? (0, domain_1.subtractInterval)(value.cols, [rows, 0]) : (0, domain_1.extendIntervalToZero)(value.cols)
};
}
return {
...value,
rows: rows !== undefined ? (0, domain_1.subtractInterval)(value.rows, [rows, rows]) : (0, domain_1.extendIntervalToZero)(value.rows)
};
}
function applyConcatColsSemantics(value, { other }) {
return {
...value,
colnames: (0, domain_1.joinColNames)(value.colnames, other.colnames),
cols: (0, domain_1.addInterval)(value.cols, other.cols)
};
}
function applyConcatRowsSemantics(value, { other }) {
if (value.cols !== domain_1.IntervalBottom && value.cols[0] === 0) {
return {
...value,
colnames: (0, domain_1.joinColNames)(value.colnames, other.colnames),
cols: (0, domain_1.joinInterval)(value.cols, other.cols),
rows: (0, domain_1.addInterval)(value.rows, other.rows)
};
}
return {
...value,
rows: (0, domain_1.addInterval)(value.rows, other.rows)
};
}
function applySubsetColsSemantics(value, { colnames }, options) {
const cols = colnames?.length;
if (options?.duplicateCols) {
return {
...value,
colnames: domain_1.ColNamesTop,
cols: cols !== undefined ? [cols, cols] : domain_1.IntervalTop
};
}
else if (options?.renamedCols) {
return {
...value,
colnames: domain_1.ColNamesTop,
cols: cols !== undefined ? (0, domain_1.minInterval)(value.cols, [cols, cols]) : (0, domain_1.extendIntervalToZero)(value.cols)
};
}
return {
...value,
colnames: colnames?.every(assert_1.isNotUndefined) ? (0, domain_1.meetColNames)(value.colnames, colnames) : value.colnames,
cols: cols !== undefined ? (0, domain_1.minInterval)(value.cols, [cols, cols]) : (0, domain_1.extendIntervalToZero)(value.cols)
};
}
function applySubsetRowsSemantics(value, { rows }, options) {
if (options?.duplicateRows) {
return {
...value,
rows: rows !== undefined ? [rows, rows] : domain_1.IntervalTop
};
}
return {
...value,
rows: rows !== undefined ? (0, domain_1.minInterval)(value.rows, [rows, rows]) : (0, domain_1.extendIntervalToZero)(value.rows)
};
}
function applyFilterRowsSemantics(value, { condition }) {
return {
...value,
rows: condition ? value.rows : condition === false ? [0, 0] : (0, domain_1.extendIntervalToZero)(value.rows)
};
}
function applyMutateColsSemantics(value, { colnames }) {
const cols = colnames?.length;
return {
...value,
colnames: colnames?.every(assert_1.isNotUndefined) ? (0, domain_1.joinColNames)(value.colnames, colnames) : domain_1.ColNamesTop,
cols: cols !== undefined ? (0, domain_1.maxInterval)((0, domain_1.addInterval)(value.cols, [0, cols]), [cols, cols]) : (0, domain_1.extendIntervalToInfinity)(value.cols)
};
}
function applyGroupBySemantics(value, { by }, options) {
if (options?.mutatedCols) {
return {
...value,
colnames: by.every(assert_1.isNotUndefined) ? (0, domain_1.joinColNames)(value.colnames, by) : domain_1.ColNamesTop,
cols: (0, domain_1.addInterval)(value.cols, [0, by.length])
};
}
// Group by only marks columns as groups but does not change the shape itself
return value;
}
function applySummarizeSemantics(value, { colnames }) {
const cols = colnames?.length;
return {
...value,
colnames: colnames?.every(assert_1.isNotUndefined) ? (0, domain_1.joinColNames)(value.colnames, colnames) : domain_1.ColNamesTop,
cols: cols !== undefined ? (0, domain_1.minInterval)((0, domain_1.addInterval)(value.cols, [0, cols]), [cols, Infinity]) : (0, domain_1.extendIntervalToInfinity)(value.rows),
rows: (0, domain_1.maxInterval)((0, domain_1.minInterval)(value.rows, [1, Infinity]), [0, 1])
};
}
function applyJoinSemantics(value, { other, by }, options) {
// Merge two intervals by creating the maximum of the lower bounds and adding the upper bounds
const mergeInterval = (interval1, interval2) => {
if (interval1 === domain_1.IntervalBottom || interval2 === domain_1.IntervalBottom) {
return domain_1.IntervalBottom;
}
else {
return [Math.max(interval1[0], interval2[0]), interval1[1] + interval2[1]];
}
};
// Creating the Cartesian product of two intervals by keeping the lower bound and multiplying the upper bounds
const productInterval = (lower, interval1, interval2) => {
if (lower === domain_1.IntervalBottom || interval1 === domain_1.IntervalBottom || interval2 === domain_1.IntervalBottom) {
return domain_1.IntervalBottom;
}
else {
return [lower[0], interval1[1] * interval2[1]];
}
};
const commonCols = (0, domain_1.meetColNames)(value.colnames, other.colnames);
let duplicateCols; // whether columns may be renamed due to occurrence in both data frames
let productRows; // whether the resulting rows may be a Cartesian product of the rows of the data frames
if (options?.natural) {
duplicateCols = false;
productRows = commonCols !== domain_1.ColNamesTop && commonCols.length === 0;
}
else if (by === undefined) {
duplicateCols = true;
productRows = true;
}
else if (by.length === 0) {
duplicateCols = commonCols === domain_1.ColNamesTop || commonCols.length > 0;
productRows = true;
}
else if (by.every(assert_1.isNotUndefined)) {
const remainingCols = (0, domain_1.subtractColNames)(commonCols, by);
duplicateCols = remainingCols === domain_1.ColNamesTop || remainingCols.length > 0;
productRows = false;
}
else {
duplicateCols = true;
productRows = false;
}
const joinType = options?.join ?? 'inner';
let rows;
switch (joinType) {
case 'inner':
rows = (0, domain_1.extendIntervalToZero)((0, domain_1.minInterval)(value.rows, other.rows));
break;
case 'left':
rows = value.rows;
break;
case 'right':
rows = other.rows;
break;
case 'full':
rows = mergeInterval(value.rows, other.rows);
break;
default:
(0, assert_1.assertUnreachable)(joinType);
}
const byCols = by?.length;
return {
...value,
colnames: duplicateCols ? domain_1.ColNamesTop : (0, domain_1.joinColNames)(value.colnames, other.colnames),
cols: byCols !== undefined ? (0, domain_1.subtractInterval)((0, domain_1.addInterval)(value.cols, other.cols), [byCols, byCols]) : mergeInterval(value.cols, other.cols),
rows: productRows ? productInterval(rows, value.rows, other.rows) : rows
};
}
function applyIdentitySemantics(value,
// eslint-disable-next-line @typescript-eslint/no-empty-object-type
_args) {
return value;
}
function applyUnknownSemantics(_value,
// eslint-disable-next-line @typescript-eslint/no-empty-object-type
_args) {
return domain_1.DataFrameTop;
}
//# sourceMappingURL=semantics.js.map