@eagleoutice/flowr
Version:
Static Dataflow Analyzer and Program Slicer for the R Programming Language
384 lines • 18.6 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.DataFrameOperationNames = exports.ConstraintType = void 0;
exports.applyDataFrameSemantics = applyDataFrameSemantics;
exports.getConstraintType = getConstraintType;
const assert_1 = require("../../util/assert");
const lattice_1 = require("../domains/lattice");
const positive_interval_domain_1 = require("../domains/positive-interval-domain");
const dataframe_domain_1 = require("./dataframe-domain");
/**
* Represents the different types of resulting constraints that are inferred by abstract data frame operations.
*/
var ConstraintType;
(function (ConstraintType) {
/** The inferred constraints must hold for the operand at the point of the operation */
ConstraintType[ConstraintType["OperandPrecondition"] = 0] = "OperandPrecondition";
/** The inferred constraints are applied to the operand during the operation */
ConstraintType[ConstraintType["OperandModification"] = 1] = "OperandModification";
/** The inferred constraints must hold for the returned result of the operation */
ConstraintType[ConstraintType["ResultPostcondition"] = 2] = "ResultPostcondition";
})(ConstraintType || (exports.ConstraintType = ConstraintType = {}));
/**
* Mapper for defining the abstract data frame operations and mapping them to semantics applier functions,
* including information about the type of the resulting constraints that are inferred by the operation.
*/
const DataFrameSemanticsMapper = {
'create': { apply: applyCreateSemantics, type: ConstraintType.ResultPostcondition },
'read': { apply: applyReadSemantics, type: ConstraintType.ResultPostcondition },
'accessCols': { apply: applyAccessColsSemantics, type: ConstraintType.OperandPrecondition },
'accessRows': { apply: applyAccessRowsSemantics, type: ConstraintType.OperandPrecondition },
'assignCols': { apply: applyAssignColsSemantics, type: ConstraintType.OperandModification },
'assignRows': { apply: applyAssignRowsSemantics, type: ConstraintType.OperandModification },
'setColNames': { apply: applySetColNamesSemantics, type: ConstraintType.OperandModification },
'addCols': { apply: applyAddColsSemantics, type: ConstraintType.ResultPostcondition },
'addRows': { apply: applyAddRowsSemantics, type: ConstraintType.ResultPostcondition },
'removeCols': { apply: applyRemoveColsSemantics, type: ConstraintType.ResultPostcondition },
'removeRows': { apply: applyRemoveRowsSemantics, type: ConstraintType.ResultPostcondition },
'concatCols': { apply: applyConcatColsSemantics, type: ConstraintType.ResultPostcondition },
'concatRows': { apply: applyConcatRowsSemantics, type: ConstraintType.ResultPostcondition },
'subsetCols': { apply: applySubsetColsSemantics, type: ConstraintType.ResultPostcondition },
'subsetRows': { apply: applySubsetRowsSemantics, type: ConstraintType.ResultPostcondition },
'filterRows': { apply: applyFilterRowsSemantics, type: ConstraintType.ResultPostcondition },
'mutateCols': { apply: applyMutateColsSemantics, type: ConstraintType.ResultPostcondition },
'groupBy': { apply: applyGroupBySemantics, type: ConstraintType.ResultPostcondition },
'summarize': { apply: applySummarizeSemantics, type: ConstraintType.ResultPostcondition },
'join': { apply: applyJoinSemantics, type: ConstraintType.ResultPostcondition },
'unknown': { apply: applyUnknownSemantics, type: ConstraintType.ResultPostcondition },
'identity': { apply: applyIdentitySemantics, type: ConstraintType.ResultPostcondition }
};
/** The names of all abstract data frame operations */
exports.DataFrameOperationNames = Object.keys(DataFrameSemanticsMapper);
/**
* Applies the abstract semantics of an abstract data frame operation with respect to the data frame shape domain.
* This expects that all arguments have already been sanitized according to the original concrete data frame function (e.g. by replacing duplicate/invalid column names).
* @param operation - The name of the abstract operation to apply the semantics of
* @param value - The abstract data frame shape of the operand of the abstract operation
* @param args - The arguments for applying the abstract semantics of the abstract operation
* @param options - The optional additional options of the abstract operation
* @returns The resulting new data frame shape constraints.
* The semantic type of the resulting constraints depends on the {@link ConstraintType} of the abstract operation.
*/
function applyDataFrameSemantics(operation, value, args, options) {
const applier = DataFrameSemanticsMapper[operation];
return applier.apply(value, args, options);
}
/**
* Gets the default resulting constraint type for an abstract data frame operation.
*/
function getConstraintType(operation) {
return DataFrameSemanticsMapper[operation].type;
}
function applyCreateSemantics(value, { colnames, rows }) {
const colnamesValue = setRange(colnames);
const colsValue = colnames !== undefined ? [colnames.length, colnames.length] : positive_interval_domain_1.PosIntervalTop;
const rowsValue = Array.isArray(rows) ? rows : typeof rows === 'number' ? [rows, rows] : positive_interval_domain_1.PosIntervalTop;
return new dataframe_domain_1.DataFrameDomain({
colnames: value.colnames.create(colnamesValue),
cols: value.cols.create(colsValue),
rows: value.rows.create(rowsValue)
});
}
function applyReadSemantics(value, { colnames, rows }) {
return applyCreateSemantics(value, { colnames, rows });
}
function applyAccessColsSemantics(value, { columns }) {
if (columns?.every(col => typeof col === 'string')) {
return new dataframe_domain_1.DataFrameDomain({
colnames: value.colnames.union(setRange(columns)),
cols: value.cols,
rows: value.rows
});
}
else if (columns?.every(col => typeof col === 'number')) {
return new dataframe_domain_1.DataFrameDomain({
colnames: value.colnames,
cols: columns.reduce((current, col) => current.max([col, col]), value.cols),
rows: value.rows
});
}
return value;
}
function applyAccessRowsSemantics(value, { rows }) {
if (rows !== undefined) {
return new dataframe_domain_1.DataFrameDomain({
colnames: value.colnames,
cols: value.cols,
rows: rows.reduce((current, row) => current.max([row, row]), value.rows)
});
}
return value;
}
function applyAssignColsSemantics(value, { columns }) {
if (columns?.every(col => typeof col === 'string')) {
return new dataframe_domain_1.DataFrameDomain({
colnames: value.colnames.union(setRange(columns)),
cols: value.cols.add([0, columns.length]).max([columns.length, columns.length]),
rows: value.rows
});
}
else if (columns?.every(col => typeof col === 'number')) {
return new dataframe_domain_1.DataFrameDomain({
colnames: value.colnames.widenUp(),
cols: columns.reduce((current, col) => current.max([col, col]), value.cols),
rows: value.rows
});
}
return new dataframe_domain_1.DataFrameDomain({
colnames: value.colnames.widenUp(),
cols: value.cols.widenUp(),
rows: value.rows
});
}
function applyAssignRowsSemantics(value, { rows }) {
if (rows !== undefined) {
return new dataframe_domain_1.DataFrameDomain({
colnames: value.colnames,
cols: value.cols,
rows: rows.reduce((current, row) => current.max([row, row]), value.rows)
});
}
return new dataframe_domain_1.DataFrameDomain({
colnames: value.colnames,
cols: value.cols,
rows: value.rows.widenUp()
});
}
function applySetColNamesSemantics(value, { colnames }, options) {
if (options?.partial) {
return new dataframe_domain_1.DataFrameDomain({
colnames: value.colnames.widenDown().union(setRange(colnames)),
cols: value.cols,
rows: value.rows
});
}
const allColNames = colnames?.every(assert_1.isNotUndefined) && value.cols.value !== lattice_1.Bottom && colnames.length >= value.cols.value[1];
return new dataframe_domain_1.DataFrameDomain({
colnames: allColNames ? value.colnames.create(setRange(colnames)) : value.colnames.create(setRange(colnames)).widenUp(),
cols: value.cols,
rows: value.rows
});
}
function applyAddColsSemantics(value, { colnames }) {
return new dataframe_domain_1.DataFrameDomain({
colnames: colnames !== undefined ? value.colnames.union(setRange(colnames)) : value.colnames.widenUp(),
cols: colnames !== undefined ? value.cols.add([colnames.length, colnames.length]) : value.cols.widenUp(),
rows: value.rows
});
}
function applyAddRowsSemantics(value, { rows }) {
if (value.cols.value !== lattice_1.Bottom && value.cols.value[0] === 0) {
return new dataframe_domain_1.DataFrameDomain({
...value,
colnames: value.colnames.top(),
cols: rows !== undefined ? value.cols.add([1, 1]) : value.cols.top(),
rows: rows !== undefined ? value.rows.add([rows, rows]) : value.rows.widenUp()
});
}
return new dataframe_domain_1.DataFrameDomain({
colnames: value.colnames,
cols: value.cols,
rows: rows !== undefined ? value.rows.add([rows, rows]) : value.rows.widenUp()
});
}
function applyRemoveColsSemantics(value, { colnames }, options) {
if (options?.maybe) {
return new dataframe_domain_1.DataFrameDomain({
colnames: colnames !== undefined ? value.colnames.subtract(setRange(colnames)) : value.colnames.widenDown(),
cols: colnames !== undefined ? value.cols.subtract([colnames.length, 0]) : value.cols.widenDown(),
rows: value.rows
});
}
return new dataframe_domain_1.DataFrameDomain({
colnames: colnames !== undefined ? value.colnames.subtract(setRange(colnames)) : value.colnames.widenDown(),
cols: colnames !== undefined ? value.cols.subtract([colnames.length, colnames.length]) : value.cols.widenDown(),
rows: value.rows
});
}
function applyRemoveRowsSemantics(value, { rows }, options) {
if (options?.maybe) {
return new dataframe_domain_1.DataFrameDomain({
colnames: value.colnames,
cols: value.cols,
rows: rows !== undefined ? value.rows.subtract([rows, 0]) : value.rows.widenDown()
});
}
return new dataframe_domain_1.DataFrameDomain({
colnames: value.colnames,
cols: value.cols,
rows: rows !== undefined ? value.rows.subtract([rows, rows]) : value.rows.widenDown()
});
}
function applyConcatColsSemantics(value, { other }) {
return new dataframe_domain_1.DataFrameDomain({
colnames: value.colnames.union(other.colnames),
cols: value.cols.add(other.cols),
rows: value.rows
});
}
function applyConcatRowsSemantics(value, { other }) {
if (value.cols.value !== lattice_1.Bottom && value.cols.value[0] === 0) {
return new dataframe_domain_1.DataFrameDomain({
...value,
colnames: value.colnames.join(other.colnames),
cols: value.cols.join(other.cols),
rows: value.rows.add(other.rows)
});
}
return new dataframe_domain_1.DataFrameDomain({
colnames: value.colnames,
cols: value.cols,
rows: value.rows.add(other.rows)
});
}
function applySubsetColsSemantics(value, { colnames }, options) {
if (options?.duplicateCols) {
return new dataframe_domain_1.DataFrameDomain({
colnames: value.colnames.top(),
cols: colnames !== undefined ? value.cols.create([colnames.length, colnames.length]) : value.cols.top(),
rows: value.rows
});
}
else if (options?.renamedCols) {
return new dataframe_domain_1.DataFrameDomain({
colnames: value.colnames.top(),
cols: colnames !== undefined ? value.cols.min([colnames.length, colnames.length]) : value.cols.widenDown(),
rows: value.rows
});
}
return new dataframe_domain_1.DataFrameDomain({
colnames: colnames !== undefined ? value.colnames.intersect(setRange(colnames)) : value.colnames.widenDown(),
cols: colnames !== undefined ? value.cols.min([colnames.length, colnames.length]) : value.cols.widenDown(),
rows: value.rows
});
}
function applySubsetRowsSemantics(value, { rows }, options) {
if (options?.duplicateRows) {
return new dataframe_domain_1.DataFrameDomain({
colnames: value.colnames,
cols: value.cols,
rows: rows !== undefined ? value.rows.create([rows, rows]) : value.rows.top()
});
}
return new dataframe_domain_1.DataFrameDomain({
colnames: value.colnames,
cols: value.cols,
rows: rows !== undefined ? value.rows.min([rows, rows]) : value.rows.widenDown()
});
}
function applyFilterRowsSemantics(value, { condition }) {
return new dataframe_domain_1.DataFrameDomain({
colnames: value.colnames,
cols: value.cols,
rows: condition ? value.rows : condition === false ? value.rows.create([0, 0]) : value.rows.widenDown()
});
}
function applyMutateColsSemantics(value, { colnames }) {
return new dataframe_domain_1.DataFrameDomain({
colnames: colnames !== undefined ? value.colnames.union(setRange(colnames)) : value.colnames.widenUp(),
cols: colnames !== undefined ? value.cols.add([0, colnames.length]).max([colnames.length, colnames.length]) : value.cols.widenUp(),
rows: value.rows
});
}
function applyGroupBySemantics(value, { by }, options) {
if (options?.mutatedCols) {
return new dataframe_domain_1.DataFrameDomain({
colnames: value.colnames.union(setRange(by)),
cols: value.cols.add([0, by.length]),
rows: value.rows
});
}
// Group by only marks columns as groups but does not change the shape itself
return value;
}
function applySummarizeSemantics(value, { colnames }) {
return new dataframe_domain_1.DataFrameDomain({
colnames: colnames !== undefined ? value.colnames.join(setRange([])).union(setRange(colnames)) : value.colnames.widenUp(),
cols: colnames !== undefined ? value.cols.add([0, colnames.length]).min([colnames.length, +Infinity]) : value.cols.widenUp(),
rows: value.rows.min([1, +Infinity]).max([0, 1])
});
}
function applyJoinSemantics(value, { other, by }, options) {
// Merge two intervals by creating the maximum of the lower bounds and adding the upper bounds
const mergeInterval = (interval1, interval2) => {
if (interval1.value === lattice_1.Bottom || interval2.value === lattice_1.Bottom) {
return interval1.bottom();
}
else {
return new positive_interval_domain_1.PosIntervalDomain([Math.max(interval1.value[0], interval2.value[0]), interval1.value[1] + interval2.value[1]]);
}
};
// Creating the Cartesian product of two intervals by keeping the lower bound and multiplying the upper bounds
const productInterval = (lower, interval1, interval2) => {
if (lower.value === lattice_1.Bottom || interval1.value === lattice_1.Bottom || interval2.value === lattice_1.Bottom) {
return lower.bottom();
}
else {
return new positive_interval_domain_1.PosIntervalDomain([lower.value[0], interval1.value[1] * interval2.value[1]]);
}
};
let duplicateCols; // columns that may be renamed due to occurring in both data frames
let productRows; // whether the resulting rows may be a Cartesian product of the rows of the data frames
if (options?.natural) {
const commonCols = value.colnames.intersect(other.colnames).upper();
duplicateCols = [];
productRows = commonCols !== lattice_1.Bottom && commonCols !== lattice_1.Top && commonCols.size === 0;
}
else if (by === undefined) {
duplicateCols = undefined;
productRows = true;
}
else if (by.length === 0) {
const commonCols = value.colnames.intersect(other.colnames).upper();
duplicateCols = commonCols !== lattice_1.Bottom ? commonCols !== lattice_1.Top ? [...commonCols] : undefined : [];
productRows = true;
}
else if (by.every(assert_1.isNotUndefined)) {
const remainingCols = value.colnames.intersect(other.colnames).subtract(setRange(by)).upper();
duplicateCols = remainingCols !== lattice_1.Bottom ? remainingCols !== lattice_1.Top ? [...remainingCols] : undefined : [];
productRows = false;
}
else {
duplicateCols = undefined;
productRows = false;
}
const joinType = options?.join ?? 'inner';
let rows;
switch (joinType) {
case 'inner':
rows = value.rows.max(other.rows).widenDown();
break;
case 'left':
rows = value.rows.max(other.rows.isValue() ? [0, other.rows.value[1]] : lattice_1.Bottom);
break;
case 'right':
rows = other.rows.max(value.rows.isValue() ? [0, value.rows.value[1]] : lattice_1.Bottom);
break;
case 'full':
rows = mergeInterval(value.rows, other.rows);
break;
default:
(0, assert_1.assertUnreachable)(joinType);
}
return new dataframe_domain_1.DataFrameDomain({
...value,
colnames: duplicateCols === undefined ? value.colnames.top() : duplicateCols.length > 0 ? value.colnames.union(other.colnames).subtract(setRange(duplicateCols)).widenUp() : value.colnames.union(other.colnames),
cols: by !== undefined ? value.cols.add(other.cols).subtract([by.length, by.length]) : mergeInterval(value.cols, other.cols),
rows: productRows ? productInterval(rows, value.rows, other.rows) : rows
});
}
function applyIdentitySemantics(value,
// eslint-disable-next-line @typescript-eslint/no-empty-object-type
_args) {
return value;
}
function applyUnknownSemantics(value,
// eslint-disable-next-line @typescript-eslint/no-empty-object-type
_args) {
return value.top();
}
function setRange(colnames) {
const names = colnames?.filter(assert_1.isNotUndefined) ?? [];
return { min: names, range: names.length === colnames?.length ? [] : lattice_1.Top };
}
//# sourceMappingURL=semantics.js.map