UNPKG

@eagleoutice/flowr

Version:

Static Dataflow Analyzer and Program Slicer for the R Programming Language

308 lines 13.2 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.InputTraceType = exports.InputType = void 0; exports.classifyInput = classifyInput; const graph_1 = require("../../../dataflow/graph/graph"); const objects_1 = require("../../../util/objects"); const vertex_1 = require("../../../dataflow/graph/vertex"); const df_helper_1 = require("../../../dataflow/graph/df-helper"); const identifier_1 = require("../../../dataflow/environments/identifier"); const assert_1 = require("../../../util/assert"); const arrays_1 = require("../../../util/collections/arrays"); const built_in_proc_name_1 = require("../../../dataflow/environments/built-in-proc-name"); class InputClassifier { dfg; config; cache = new Map(); constructor(dfg, config) { this.dfg = dfg; this.config = config; } classifyEntry(vertex) { const cached = this.cache.get(vertex.id); if (cached) { return cached; } // insert temporary unknown to break cycles this.cache.set(vertex.id, { id: vertex.id, type: [InputType.Unknown], trace: InputTraceType.Unknown }); switch (vertex.tag) { case vertex_1.VertexType.Value: return this.classifyCdsAndReturn(vertex, { id: vertex.id, type: [InputType.Constant], trace: InputTraceType.Unknown }); case vertex_1.VertexType.FunctionCall: return this.classifyFunctionCall(vertex); case vertex_1.VertexType.VariableDefinition: return this.classifyVariableDefinition(vertex); case vertex_1.VertexType.Use: return this.classifyVariable(vertex); default: return this.classifyCdsAndReturn(vertex, { id: vertex.id, type: [InputType.Unknown], trace: InputTraceType.Unknown }); } } classifyFunctionCall(call) { if (call.origin.includes(built_in_proc_name_1.BuiltInProcName.IfThenElse) || call.origin.includes(built_in_proc_name_1.BuiltInProcName.WhileLoop)) { const condition = graph_1.FunctionArgument.getReference(call.args[0]); if (condition) { const vtx = this.dfg.getVertex(condition); if (vtx) { return this.classifyCdsAndReturn(call, this.classifyEntry(vtx)); } } } else if (call.origin.includes(built_in_proc_name_1.BuiltInProcName.ForLoop)) { const condition = graph_1.FunctionArgument.getReference(call.args[1]); if (condition) { const vtx = this.dfg.getVertex(condition); if (vtx) { return this.classifyCdsAndReturn(call, this.classifyEntry(vtx)); } } } if (!matchesList(call, this.config.pureFns)) { if (matchesList(call, this.config.readFileFns)) { return this.classifyCdsAndReturn(call, { id: call.id, type: [InputType.File], trace: InputTraceType.Unknown }); } else if (matchesList(call, this.config.networkFns)) { return this.classifyCdsAndReturn(call, { id: call.id, type: [InputType.Network], trace: InputTraceType.Unknown }); } else if (matchesList(call, this.config.randomFns)) { return this.classifyCdsAndReturn(call, { id: call.id, type: [InputType.Random], trace: InputTraceType.Unknown }); } else { // if it is not pure, we cannot classify based on the inputs, in that case we do not know! return this.classifyCdsAndReturn(call, { id: call.id, type: [InputType.Unknown], trace: InputTraceType.Unknown }); } } // Otherwise, classify by arguments; pure functions get Known/Pure handling const argTypes = []; const cdTypes = []; for (const arg of call.args) { if (graph_1.FunctionArgument.isEmpty(arg)) { continue; } const ref = graph_1.FunctionArgument.getReference(arg); if (ref === undefined) { argTypes.push(InputType.Unknown); continue; } const argVtx = this.dfg.getVertex(ref); if (!argVtx) { argTypes.push(InputType.Unknown); continue; } const classified = this.classifyEntry(argVtx); // collect all observed types from this argument argTypes.push(...classified.type); if (classified.cds) { cdTypes.push(...classified.cds); } } const cds = cdTypes.length > 0 ? (0, arrays_1.uniqueArray)(cdTypes) : undefined; // all arguments only contain constant-like types -> derived constant const allConstLike = argTypes.length > 0 && argTypes.every(t => t === InputType.Constant || t === InputType.DerivedConstant); if (allConstLike) { return this.classifyCdsAndReturn(call, (0, objects_1.compactRecord)({ id: call.id, type: [InputType.DerivedConstant], trace: InputTraceType.Pure, cds })); } const types = argTypes.length === 0 ? [InputType.DerivedConstant] : (0, arrays_1.uniqueArray)(argTypes); return this.classifyCdsAndReturn(call, (0, objects_1.compactRecord)({ id: call.id, type: types, trace: InputTraceType.Known, cds })); } classifyVariable(vtx) { const origins = df_helper_1.Dataflow.origin(this.dfg, vtx.id); if (origins === undefined) { return this.classifyCdsAndReturn(vtx, { id: vtx.id, type: [InputType.Unknown], trace: InputTraceType.Unknown }); } const types = []; const cds = []; let allPure = true; for (const o of origins) { if (o.type === 4 /* OriginType.ConstantOrigin */) { types.push(InputType.Constant); continue; } if (o.type === 0 /* OriginType.ReadVariableOrigin */ || o.type === 1 /* OriginType.WriteVariableOrigin */) { const v = this.dfg.getVertex(o.id); if (v) { // if this is a variable definition that is a parameter, classify as Parameter if (v.tag === vertex_1.VertexType.VariableDefinition && this.dfg.idMap?.get(v.id)?.info.role === "param-n" /* RoleInParent.ParameterName */) { types.push(InputType.Parameter); continue; } const c = this.classifyEntry(v); types.push(...c.type); if (c.cds) { cds.push(...c.cds); } if (c.trace !== InputTraceType.Pure) { allPure = false; } } else { types.push(InputType.Unknown); } continue; } if (o.type === 2 /* OriginType.FunctionCallOrigin */ || o.type === 3 /* OriginType.BuiltInFunctionOrigin */) { const v = this.dfg.getVertex(o.id); if (v) { const c = this.classifyEntry(v); types.push(...c.type); if (c.cds) { cds.push(...c.cds); } if (c.trace !== InputTraceType.Pure) { allPure = false; } } else { types.push(InputType.Unknown); } continue; } // unknown origin type types.push(InputType.Unknown); } const t = types.length === 0 ? [InputType.Unknown] : (0, arrays_1.uniqueArray)(types); const trace = allPure ? InputTraceType.Pure : InputTraceType.Alias; return this.classifyCdsAndReturn(vtx, { id: vtx.id, type: t, trace, cds: cds.length === 0 ? undefined : (0, arrays_1.uniqueArray)(cds) }); } classifyVariableDefinition(vtx) { // parameter definitions are classified as Parameter if (this.dfg.idMap?.get(vtx.id)?.info.role === "param-n" /* RoleInParent.ParameterName */) { return this.classifyCdsAndReturn(vtx, { id: vtx.id, type: [InputType.Parameter], trace: InputTraceType.Unknown }); } const sources = vtx.source; if (sources === undefined || sources.length === 0) { // fallback to unknown if we cannot find the value return this.classifyCdsAndReturn(vtx, { id: vtx.id, type: [InputType.Unknown], trace: InputTraceType.Unknown }); } const types = []; const cds = []; let allPure = true; for (const tid of sources) { const tv = this.dfg.getVertex(tid); if (tv) { const c = this.classifyEntry(tv); types.push(...c.type); if (c.cds) { cds.push(...c.cds); } if (c.trace !== InputTraceType.Pure) { allPure = false; } } else { types.push(InputType.Unknown); } } const t = types.length === 0 ? [InputType.Unknown] : (0, arrays_1.uniqueArray)(types); const trace = allPure ? InputTraceType.Pure : InputTraceType.Alias; return this.classifyCdsAndReturn(vtx, { id: vtx.id, type: t, trace, cds: cds.length === 0 ? undefined : (0, arrays_1.uniqueArray)(cds) }); } classifyCdsAndReturn(vtx, src) { if (vtx.cds) { const cds = (0, arrays_1.uniqueArray)(vtx.cds.flatMap(c => { const cv = this.dfg.getVertex(c.id); if (!cv) { return undefined; } const e = this.classifyEntry(cv); return e.cds ? [...e.type, ...e.cds] : [...e.type]; }).filter(assert_1.isNotUndefined).concat(src.cds ?? [])); if (cds.length > 0) { src.cds = cds; } } if (src.cds?.length === 0) { delete src.cds; } this.cache.set(vtx.id, src); return src; } } /** * Lattice flattening until we have a taint engine :) * *``` * [ Unknown ] * / / | \ \ *[Param] [File] [Net] [Rand] [Scope] * \ \ | / / * [ DerivedConstant ] * | * [ Constant ] *``` * */ var InputType; (function (InputType) { InputType["Parameter"] = "param"; InputType["File"] = "file"; InputType["Network"] = "net"; InputType["Random"] = "rand"; InputType["Constant"] = "const"; /** Read from environment/call scope */ InputType["Scope"] = "scope"; /** Pure calculations from constants that lead to a constant */ InputType["DerivedConstant"] = "dconst"; InputType["Unknown"] = "unknown"; })(InputType || (exports.InputType = InputType = {})); var InputTraceType; (function (InputTraceType) { /** Derived only from aliasing */ InputTraceType["Alias"] = "alias"; /** Derived from pure function chains */ InputTraceType["Pure"] = "pure"; /** Derived from known but not necessarily all pure function chains */ InputTraceType["Known"] = "known"; /** Not fully known origin */ InputTraceType["Unknown"] = "unknown"; })(InputTraceType || (exports.InputTraceType = InputTraceType = {})); function matchesList(fn, list) { if (!list || list.length === 0) { return false; } for (const id of list) { if (fn.id === id || identifier_1.Identifier.matches(id, fn.name)) { return true; } } return false; } /** * Takes the given id which is expected to either be: * - a function call - in this case all arguments are considered to be inputs (additionally to all read edges from the function call in the dataflow graph) * - anything else - in that case the node itself is considered as an "input" - please note that in these scenarios the *return* value will only contain one mapping - that for the id you pased in. * * This method traces the dependencies in the dataflow graph using the specification of functions passed in */ function classifyInput(id, dfg, config) { const vtx = dfg.getVertex(id); if (!vtx) { return []; } const c = new InputClassifier(dfg, config); if (vtx.tag === vertex_1.VertexType.FunctionCall) { const ret = []; const args = vtx.args; for (const arg of args) { if (graph_1.FunctionArgument.isEmpty(arg)) { continue; } const ref = graph_1.FunctionArgument.getReference(arg); if (ref === undefined) { continue; } const argVtx = dfg.getVertex(ref); if (argVtx === undefined) { continue; } ret.push(c.classifyEntry(argVtx)); } return ret; } else { return [ c.classifyEntry(vtx) ]; } } //# sourceMappingURL=simple-input-classifier.js.map