@eagleoutice/flowr
Version:
Static Dataflow Analyzer and Program Slicer for the R Programming Language
308 lines • 13.2 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.InputTraceType = exports.InputType = void 0;
exports.classifyInput = classifyInput;
const graph_1 = require("../../../dataflow/graph/graph");
const objects_1 = require("../../../util/objects");
const vertex_1 = require("../../../dataflow/graph/vertex");
const df_helper_1 = require("../../../dataflow/graph/df-helper");
const identifier_1 = require("../../../dataflow/environments/identifier");
const assert_1 = require("../../../util/assert");
const arrays_1 = require("../../../util/collections/arrays");
const built_in_proc_name_1 = require("../../../dataflow/environments/built-in-proc-name");
class InputClassifier {
dfg;
config;
cache = new Map();
constructor(dfg, config) {
this.dfg = dfg;
this.config = config;
}
classifyEntry(vertex) {
const cached = this.cache.get(vertex.id);
if (cached) {
return cached;
}
// insert temporary unknown to break cycles
this.cache.set(vertex.id, { id: vertex.id, type: [InputType.Unknown], trace: InputTraceType.Unknown });
switch (vertex.tag) {
case vertex_1.VertexType.Value:
return this.classifyCdsAndReturn(vertex, { id: vertex.id, type: [InputType.Constant], trace: InputTraceType.Unknown });
case vertex_1.VertexType.FunctionCall:
return this.classifyFunctionCall(vertex);
case vertex_1.VertexType.VariableDefinition:
return this.classifyVariableDefinition(vertex);
case vertex_1.VertexType.Use:
return this.classifyVariable(vertex);
default:
return this.classifyCdsAndReturn(vertex, { id: vertex.id, type: [InputType.Unknown], trace: InputTraceType.Unknown });
}
}
classifyFunctionCall(call) {
if (call.origin.includes(built_in_proc_name_1.BuiltInProcName.IfThenElse) || call.origin.includes(built_in_proc_name_1.BuiltInProcName.WhileLoop)) {
const condition = graph_1.FunctionArgument.getReference(call.args[0]);
if (condition) {
const vtx = this.dfg.getVertex(condition);
if (vtx) {
return this.classifyCdsAndReturn(call, this.classifyEntry(vtx));
}
}
}
else if (call.origin.includes(built_in_proc_name_1.BuiltInProcName.ForLoop)) {
const condition = graph_1.FunctionArgument.getReference(call.args[1]);
if (condition) {
const vtx = this.dfg.getVertex(condition);
if (vtx) {
return this.classifyCdsAndReturn(call, this.classifyEntry(vtx));
}
}
}
if (!matchesList(call, this.config.pureFns)) {
if (matchesList(call, this.config.readFileFns)) {
return this.classifyCdsAndReturn(call, { id: call.id, type: [InputType.File], trace: InputTraceType.Unknown });
}
else if (matchesList(call, this.config.networkFns)) {
return this.classifyCdsAndReturn(call, { id: call.id, type: [InputType.Network], trace: InputTraceType.Unknown });
}
else if (matchesList(call, this.config.randomFns)) {
return this.classifyCdsAndReturn(call, { id: call.id, type: [InputType.Random], trace: InputTraceType.Unknown });
}
else {
// if it is not pure, we cannot classify based on the inputs, in that case we do not know!
return this.classifyCdsAndReturn(call, { id: call.id, type: [InputType.Unknown], trace: InputTraceType.Unknown });
}
}
// Otherwise, classify by arguments; pure functions get Known/Pure handling
const argTypes = [];
const cdTypes = [];
for (const arg of call.args) {
if (graph_1.FunctionArgument.isEmpty(arg)) {
continue;
}
const ref = graph_1.FunctionArgument.getReference(arg);
if (ref === undefined) {
argTypes.push(InputType.Unknown);
continue;
}
const argVtx = this.dfg.getVertex(ref);
if (!argVtx) {
argTypes.push(InputType.Unknown);
continue;
}
const classified = this.classifyEntry(argVtx);
// collect all observed types from this argument
argTypes.push(...classified.type);
if (classified.cds) {
cdTypes.push(...classified.cds);
}
}
const cds = cdTypes.length > 0 ? (0, arrays_1.uniqueArray)(cdTypes) : undefined;
// all arguments only contain constant-like types -> derived constant
const allConstLike = argTypes.length > 0 && argTypes.every(t => t === InputType.Constant || t === InputType.DerivedConstant);
if (allConstLike) {
return this.classifyCdsAndReturn(call, (0, objects_1.compactRecord)({ id: call.id, type: [InputType.DerivedConstant], trace: InputTraceType.Pure, cds }));
}
const types = argTypes.length === 0 ? [InputType.DerivedConstant] : (0, arrays_1.uniqueArray)(argTypes);
return this.classifyCdsAndReturn(call, (0, objects_1.compactRecord)({ id: call.id, type: types, trace: InputTraceType.Known, cds }));
}
classifyVariable(vtx) {
const origins = df_helper_1.Dataflow.origin(this.dfg, vtx.id);
if (origins === undefined) {
return this.classifyCdsAndReturn(vtx, { id: vtx.id, type: [InputType.Unknown], trace: InputTraceType.Unknown });
}
const types = [];
const cds = [];
let allPure = true;
for (const o of origins) {
if (o.type === 4 /* OriginType.ConstantOrigin */) {
types.push(InputType.Constant);
continue;
}
if (o.type === 0 /* OriginType.ReadVariableOrigin */ || o.type === 1 /* OriginType.WriteVariableOrigin */) {
const v = this.dfg.getVertex(o.id);
if (v) {
// if this is a variable definition that is a parameter, classify as Parameter
if (v.tag === vertex_1.VertexType.VariableDefinition && this.dfg.idMap?.get(v.id)?.info.role === "param-n" /* RoleInParent.ParameterName */) {
types.push(InputType.Parameter);
continue;
}
const c = this.classifyEntry(v);
types.push(...c.type);
if (c.cds) {
cds.push(...c.cds);
}
if (c.trace !== InputTraceType.Pure) {
allPure = false;
}
}
else {
types.push(InputType.Unknown);
}
continue;
}
if (o.type === 2 /* OriginType.FunctionCallOrigin */ || o.type === 3 /* OriginType.BuiltInFunctionOrigin */) {
const v = this.dfg.getVertex(o.id);
if (v) {
const c = this.classifyEntry(v);
types.push(...c.type);
if (c.cds) {
cds.push(...c.cds);
}
if (c.trace !== InputTraceType.Pure) {
allPure = false;
}
}
else {
types.push(InputType.Unknown);
}
continue;
}
// unknown origin type
types.push(InputType.Unknown);
}
const t = types.length === 0 ? [InputType.Unknown] : (0, arrays_1.uniqueArray)(types);
const trace = allPure ? InputTraceType.Pure : InputTraceType.Alias;
return this.classifyCdsAndReturn(vtx, { id: vtx.id, type: t, trace, cds: cds.length === 0 ? undefined : (0, arrays_1.uniqueArray)(cds) });
}
classifyVariableDefinition(vtx) {
// parameter definitions are classified as Parameter
if (this.dfg.idMap?.get(vtx.id)?.info.role === "param-n" /* RoleInParent.ParameterName */) {
return this.classifyCdsAndReturn(vtx, { id: vtx.id, type: [InputType.Parameter], trace: InputTraceType.Unknown });
}
const sources = vtx.source;
if (sources === undefined || sources.length === 0) {
// fallback to unknown if we cannot find the value
return this.classifyCdsAndReturn(vtx, { id: vtx.id, type: [InputType.Unknown], trace: InputTraceType.Unknown });
}
const types = [];
const cds = [];
let allPure = true;
for (const tid of sources) {
const tv = this.dfg.getVertex(tid);
if (tv) {
const c = this.classifyEntry(tv);
types.push(...c.type);
if (c.cds) {
cds.push(...c.cds);
}
if (c.trace !== InputTraceType.Pure) {
allPure = false;
}
}
else {
types.push(InputType.Unknown);
}
}
const t = types.length === 0 ? [InputType.Unknown] : (0, arrays_1.uniqueArray)(types);
const trace = allPure ? InputTraceType.Pure : InputTraceType.Alias;
return this.classifyCdsAndReturn(vtx, { id: vtx.id, type: t, trace, cds: cds.length === 0 ? undefined : (0, arrays_1.uniqueArray)(cds) });
}
classifyCdsAndReturn(vtx, src) {
if (vtx.cds) {
const cds = (0, arrays_1.uniqueArray)(vtx.cds.flatMap(c => {
const cv = this.dfg.getVertex(c.id);
if (!cv) {
return undefined;
}
const e = this.classifyEntry(cv);
return e.cds ? [...e.type, ...e.cds] : [...e.type];
}).filter(assert_1.isNotUndefined).concat(src.cds ?? []));
if (cds.length > 0) {
src.cds = cds;
}
}
if (src.cds?.length === 0) {
delete src.cds;
}
this.cache.set(vtx.id, src);
return src;
}
}
/**
* Lattice flattening until we have a taint engine :)
*
*```
* [ Unknown ]
* / / | \ \
*[Param] [File] [Net] [Rand] [Scope]
* \ \ | / /
* [ DerivedConstant ]
* |
* [ Constant ]
*```
*
*/
var InputType;
(function (InputType) {
InputType["Parameter"] = "param";
InputType["File"] = "file";
InputType["Network"] = "net";
InputType["Random"] = "rand";
InputType["Constant"] = "const";
/** Read from environment/call scope */
InputType["Scope"] = "scope";
/** Pure calculations from constants that lead to a constant */
InputType["DerivedConstant"] = "dconst";
InputType["Unknown"] = "unknown";
})(InputType || (exports.InputType = InputType = {}));
var InputTraceType;
(function (InputTraceType) {
/** Derived only from aliasing */
InputTraceType["Alias"] = "alias";
/** Derived from pure function chains */
InputTraceType["Pure"] = "pure";
/** Derived from known but not necessarily all pure function chains */
InputTraceType["Known"] = "known";
/** Not fully known origin */
InputTraceType["Unknown"] = "unknown";
})(InputTraceType || (exports.InputTraceType = InputTraceType = {}));
function matchesList(fn, list) {
if (!list || list.length === 0) {
return false;
}
for (const id of list) {
if (fn.id === id || identifier_1.Identifier.matches(id, fn.name)) {
return true;
}
}
return false;
}
/**
* Takes the given id which is expected to either be:
* - a function call - in this case all arguments are considered to be inputs (additionally to all read edges from the function call in the dataflow graph)
* - anything else - in that case the node itself is considered as an "input" - please note that in these scenarios the *return* value will only contain one mapping - that for the id you pased in.
*
* This method traces the dependencies in the dataflow graph using the specification of functions passed in
*/
function classifyInput(id, dfg, config) {
const vtx = dfg.getVertex(id);
if (!vtx) {
return [];
}
const c = new InputClassifier(dfg, config);
if (vtx.tag === vertex_1.VertexType.FunctionCall) {
const ret = [];
const args = vtx.args;
for (const arg of args) {
if (graph_1.FunctionArgument.isEmpty(arg)) {
continue;
}
const ref = graph_1.FunctionArgument.getReference(arg);
if (ref === undefined) {
continue;
}
const argVtx = dfg.getVertex(ref);
if (argVtx === undefined) {
continue;
}
ret.push(c.classifyEntry(argVtx));
}
return ret;
}
else {
return [
c.classifyEntry(vtx)
];
}
}
//# sourceMappingURL=simple-input-classifier.js.map