@eagleoutice/flowr
Version:
Static Dataflow Analyzer and Program Slicer for the R Programming Language
405 lines • 16.3 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.InputTraceType = exports.InputType = void 0;
exports.classifyInput = classifyInput;
const graph_1 = require("../../../dataflow/graph/graph");
const objects_1 = require("../../../util/objects");
const vertex_1 = require("../../../dataflow/graph/vertex");
const df_helper_1 = require("../../../dataflow/graph/df-helper");
const edge_1 = require("../../../dataflow/graph/edge");
const identifier_1 = require("../../../dataflow/environments/identifier");
const assert_1 = require("../../../util/assert");
const arrays_1 = require("../../../util/collections/arrays");
const built_in_proc_name_1 = require("../../../dataflow/environments/built-in-proc-name");
const record_1 = require("../../../util/record");
const r_number_1 = require("../../../r-bridge/lang-4.x/ast/model/nodes/r-number");
const r_string_1 = require("../../../r-bridge/lang-4.x/ast/model/nodes/r-string");
const r_logical_1 = require("../../../r-bridge/lang-4.x/ast/model/nodes/r-logical");
const r_symbol_1 = require("../../../r-bridge/lang-4.x/ast/model/nodes/r-symbol");
const convert_values_1 = require("../../../r-bridge/lang-4.x/convert-values");
function isConstantLike(type) {
return type === InputType.Constant || type === InputType.DerivedConstant;
}
/** Returns the common value shared by all defined entries, or `undefined` if they disagree or all are `undefined`. */
function singleValue(values) {
let result;
let seen = false;
for (const v of values) {
if (v === undefined) {
return undefined;
}
if (!seen) {
result = v;
seen = true;
}
else if (v !== result) {
return undefined;
}
}
return result;
}
/**
* Accumulates types, control-dependency types, values, and purity while traversing origin
* chains. Call {@link build} to produce the resulting {@link InputSource}.
*/
class ClassificationAccumulator {
types = [];
cds = [];
values = [];
allPure = true;
merge(c) {
this.types.push(...c.types);
this.values.push(c.value);
if (c.cds) {
this.cds.push(...c.cds);
}
if (c.trace !== InputTraceType.Pure) {
this.allPure = false;
}
}
pushUnknown() {
this.types.push(InputType.Unknown);
this.values.push(undefined);
}
build(id) {
const types = this.types.length === 0 ? [InputType.Unknown] : (0, arrays_1.uniqueArray)(this.types);
const trace = this.allPure ? InputTraceType.Pure : InputTraceType.Alias;
const src = { id, types, trace };
const cds = this.cds.length === 0 ? undefined : (0, arrays_1.uniqueArray)(this.cds);
if (cds) {
src.cds = cds;
}
if (types.every(isConstantLike)) {
const v = singleValue(this.values);
if (v !== undefined) {
src.value = v;
}
}
return src;
}
}
class InputClassifier {
dfg;
config;
cache = new Map();
fullDfg;
constructor(dfg, config, fullDfg) {
this.dfg = dfg;
this.config = config;
this.fullDfg = fullDfg;
}
isDefinedByOnCall(id) {
const out = (this.fullDfg ?? this.dfg).outgoingEdges(id) ?? new Map();
return out.values().some(e => edge_1.DfEdge.includesType(e, edge_1.EdgeType.DefinedByOnCall));
}
extractConstantValue(id) {
const node = this.dfg.idMap?.get(id);
if (node === undefined) {
return undefined;
}
if (r_number_1.RNumber.is(node)) {
return node.content.num;
}
if (r_string_1.RString.is(node)) {
return node.content.str;
}
if (r_logical_1.RLogical.is(node)) {
return node.content;
}
if (r_symbol_1.RSymbol.is(node) && node.content === convert_values_1.RNull) {
return null;
}
return undefined;
}
classifyEntry(vertex) {
const cached = this.cache.get(vertex.id);
if (cached) {
return cached;
}
// insert temporary unknown to break cycles
this.cache.set(vertex.id, { id: vertex.id, types: [InputType.Unknown], trace: InputTraceType.Unknown });
switch (vertex.tag) {
case vertex_1.VertexType.Value: {
const src = { id: vertex.id, types: [InputType.Constant], trace: InputTraceType.Unknown };
const v = this.extractConstantValue(vertex.id);
if (v !== undefined) {
src.value = v;
}
return this.classifyCdsAndReturn(vertex, src);
}
case vertex_1.VertexType.FunctionCall:
return this.classifyFunctionCall(vertex);
case vertex_1.VertexType.VariableDefinition:
return this.classifyVariableDefinition(vertex);
case vertex_1.VertexType.Use:
return this.classifyVariable(vertex);
default:
return this.classifyCdsAndReturn(vertex, { id: vertex.id, types: [InputType.Unknown], trace: InputTraceType.Unknown });
}
}
classifyFunctionCall(call) {
if (call.origin.includes(built_in_proc_name_1.BuiltInProcName.IfThenElse) || call.origin.includes(built_in_proc_name_1.BuiltInProcName.WhileLoop)) {
const condition = graph_1.FunctionArgument.getReference(call.args[0]);
if (condition) {
const vtx = this.dfg.getVertex(condition);
if (vtx) {
return this.classifyCdsAndReturn(call, this.classifyEntry(vtx));
}
}
}
else if (call.origin.includes(built_in_proc_name_1.BuiltInProcName.ForLoop)) {
const condition = graph_1.FunctionArgument.getReference(call.args[1]);
if (condition) {
const vtx = this.dfg.getVertex(condition);
if (vtx) {
return this.classifyCdsAndReturn(call, this.classifyEntry(vtx));
}
}
}
if (!matchesList(call, this.config.pure)) {
const types = [];
for (const [type, entry] of record_1.Record.entries(this.config)) {
if (record_1.Record.values(InputType).includes(type) && matchesList(call, entry)) {
types.push(type);
}
}
if (types.length === 0) {
// if it is not pure, we cannot classify based on the inputs, in that case we do not know!
types.push(InputType.Unknown);
}
return this.classifyCdsAndReturn(call, { id: call.id, types, trace: InputTraceType.Unknown });
}
// Otherwise, classify by arguments; pure functions get Known/Pure handling
const argTypes = [];
const cdTypes = [];
for (const arg of call.args) {
if (graph_1.FunctionArgument.isEmpty(arg)) {
continue;
}
const ref = graph_1.FunctionArgument.getReference(arg);
if (ref === undefined) {
argTypes.push(InputType.Unknown);
continue;
}
const argVtx = this.dfg.getVertex(ref);
if (!argVtx) {
argTypes.push(InputType.Unknown);
continue;
}
const classified = this.classifyEntry(argVtx);
// collect all observed types from this argument
argTypes.push(...classified.types);
if (classified.cds) {
cdTypes.push(...classified.cds);
}
}
const cds = cdTypes.length > 0 ? (0, arrays_1.uniqueArray)(cdTypes) : undefined;
// all arguments only contain constant-like types -> derived constant
const allConstLike = argTypes.length > 0 && argTypes.every(isConstantLike);
if (allConstLike) {
return this.classifyCdsAndReturn(call, (0, objects_1.compactRecord)({ id: call.id, types: [InputType.DerivedConstant], trace: InputTraceType.Pure, cds }));
}
argTypes.push(InputType.DerivedConstant);
return this.classifyCdsAndReturn(call, (0, objects_1.compactRecord)({ id: call.id, types: (0, arrays_1.uniqueArray)(argTypes), trace: InputTraceType.Known, cds }));
}
classifyVariable(vtx) {
const origins = df_helper_1.Dataflow.origin(this.dfg, vtx.id);
if (origins === undefined) {
return this.classifyCdsAndReturn(vtx, { id: vtx.id, types: this.isDefinedByOnCall(vtx.id) ? [InputType.Scope] : [InputType.Unknown], trace: InputTraceType.Unknown });
}
const acc = new ClassificationAccumulator();
for (const o of origins) {
if (o.type === 4 /* OriginType.ConstantOrigin */) {
acc.types.push(InputType.DerivedConstant);
acc.values.push(this.extractConstantValue(o.id));
}
else if (o.type === 0 /* OriginType.ReadVariableOrigin */ || o.type === 1 /* OriginType.WriteVariableOrigin */) {
this.classifyVariableOrigin(o.id, acc);
}
else if (o.type === 2 /* OriginType.FunctionCallOrigin */ || o.type === 3 /* OriginType.BuiltInFunctionOrigin */) {
this.classifyByVertex(o.id, acc);
}
else {
acc.pushUnknown();
}
}
return this.classifyCdsAndReturn(vtx, acc.build(vtx.id));
}
/**
* Resolves a variable definition or use origin, handling the special cases of
* scope-escaped variables (DefinedByOnCall) and parameter definitions.
*/
classifyVariableOrigin(definitionId, acc) {
const v = this.dfg.getVertex(definitionId);
if (!v) {
acc.pushUnknown();
return;
}
// if the referenced definition is linked via defined-by-on-call to another
// id (e.g., a parameter linked to a caller argument), mark it as a Scope origin
if (this.isDefinedByOnCall(v.id)) {
acc.types.push(InputType.Scope);
acc.values.push(undefined);
acc.allPure = false;
}
// if this is a variable definition that is a parameter, classify as Parameter
if (v.tag === vertex_1.VertexType.VariableDefinition && this.dfg.idMap?.get(v.id)?.info.role === "param-n" /* RoleInParent.ParameterName */) {
acc.types.push(InputType.Parameter);
acc.values.push(undefined);
return;
}
acc.merge(this.classifyEntry(v));
}
classifyByVertex(id, acc) {
const v = this.dfg.getVertex(id);
if (v) {
acc.merge(this.classifyEntry(v));
}
else {
acc.pushUnknown();
}
}
classifyVariableDefinition(vtx) {
// parameter definitions are classified as Parameter
if (this.dfg.idMap?.get(vtx.id)?.info.role === "param-n" /* RoleInParent.ParameterName */) {
return this.classifyCdsAndReturn(vtx, { id: vtx.id, types: [InputType.Parameter], trace: InputTraceType.Unknown });
}
const sources = vtx.source;
if (sources === undefined || sources.length === 0) {
// fallback to unknown if we cannot find the value
return this.classifyCdsAndReturn(vtx, { id: vtx.id, types: [InputType.Unknown], trace: InputTraceType.Unknown });
}
const acc = new ClassificationAccumulator();
for (const tid of sources) {
const tv = this.dfg.getVertex(tid);
if (tv) {
acc.merge(this.classifyEntry(tv));
}
else {
acc.pushUnknown();
}
}
return this.classifyCdsAndReturn(vtx, acc.build(vtx.id));
}
classifyCdsAndReturn(vtx, src) {
if (vtx.cds) {
const cds = (0, arrays_1.uniqueArray)(vtx.cds.flatMap(c => {
const cv = this.dfg.getVertex(c.id);
if (!cv) {
return undefined;
}
const e = this.classifyEntry(cv);
return e.cds ? [...e.types, ...e.cds] : [...e.types];
}).filter(assert_1.isNotUndefined).concat(src.cds ?? []));
if (cds.length > 0) {
src.cds = cds;
}
}
if (src.cds?.length === 0) {
delete src.cds;
}
this.cache.set(vtx.id, src);
return src;
}
}
/**
* Lattice flattening until we have a taint engine :)
* Please note that the classifier considers this basis with a set-lift,
* joining differing lattice elements.
*
*```
* [ Unknown ]
* |
* [Param] [File] [Net] [User], ...
* |
* [ DerivedConstant ]
* |
* [ Constant ]
*```
*
*/
var InputType;
(function (InputType) {
InputType["Parameter"] = "param";
InputType["File"] = "file";
InputType["Network"] = "net";
InputType["Random"] = "rand";
/** Calls to system/system2 and similar */
InputType["System"] = "system";
/** Calls to .C / Fortran interfaces (foreign function interfaces) */
InputType["Ffi"] = "ffi";
/** Language objects (quote/substitute/etc.) */
InputType["Lang"] = "lang";
/** Global options / option accessors (options, getOption) */
InputType["Options"] = "options";
/** Interactive user input (file choosers, prompts, dialogs, menu selections) */
InputType["User"] = "user";
InputType["Constant"] = "const";
/** Read from environment/call scope */
InputType["Scope"] = "scope";
/** Pure calculations from constants that lead to a constant */
InputType["DerivedConstant"] = "dconst";
InputType["Unknown"] = "unknown";
})(InputType || (exports.InputType = InputType = {}));
var InputTraceType;
(function (InputTraceType) {
/** Derived only from aliasing */
InputTraceType["Alias"] = "alias";
/** Derived from pure function chains */
InputTraceType["Pure"] = "pure";
/** Derived from known but not necessarily all pure function chains */
InputTraceType["Known"] = "known";
/** Not fully known origin */
InputTraceType["Unknown"] = "unknown";
})(InputTraceType || (exports.InputTraceType = InputTraceType = {}));
function matchesList(fn, list) {
if (list === undefined || list.length === 0) {
return false;
}
for (const id of list) {
if (fn.id === id || (identifier_1.Identifier.is(id) && identifier_1.Identifier.matches(id, fn.name))) {
return true;
}
}
return false;
}
/**
* Takes the given id which is expected to either be:
* - a function call - in this case all arguments are considered to be inputs (additionally to all read edges from the function call in the dataflow graph)
* - anything else - in that case the node itself is considered as an "input" - please note that in these scenarios the *return* value will only contain one mapping - that for the id you passed in.
*
* This method traces the dependencies in the dataflow graph using the specification of functions passed in.
* For the scope escape analysis, pass on the full, non-reduced DFG as `fullDfg`.
*/
function classifyInput(id, dfg, config, fullDfg) {
const vtx = dfg.getVertex(id);
if (!vtx) {
return [];
}
const c = new InputClassifier(dfg, config, fullDfg);
if (vtx.tag === vertex_1.VertexType.FunctionCall) {
const ret = [];
const args = vtx.args;
for (const arg of args) {
if (graph_1.FunctionArgument.isEmpty(arg)) {
continue;
}
const ref = graph_1.FunctionArgument.getReference(arg);
if (ref === undefined) {
continue;
}
const argVtx = dfg.getVertex(ref);
if (argVtx === undefined) {
continue;
}
ret.push(c.classifyEntry(argVtx));
}
return ret;
}
else {
return [
c.classifyEntry(vtx)
];
}
}
//# sourceMappingURL=simple-input-classifier.js.map