UNPKG

@eagleoutice/flowr

Version:

Static Dataflow Analyzer and Program Slicer for the R Programming Language

405 lines 16.3 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.InputTraceType = exports.InputType = void 0; exports.classifyInput = classifyInput; const graph_1 = require("../../../dataflow/graph/graph"); const objects_1 = require("../../../util/objects"); const vertex_1 = require("../../../dataflow/graph/vertex"); const df_helper_1 = require("../../../dataflow/graph/df-helper"); const edge_1 = require("../../../dataflow/graph/edge"); const identifier_1 = require("../../../dataflow/environments/identifier"); const assert_1 = require("../../../util/assert"); const arrays_1 = require("../../../util/collections/arrays"); const built_in_proc_name_1 = require("../../../dataflow/environments/built-in-proc-name"); const record_1 = require("../../../util/record"); const r_number_1 = require("../../../r-bridge/lang-4.x/ast/model/nodes/r-number"); const r_string_1 = require("../../../r-bridge/lang-4.x/ast/model/nodes/r-string"); const r_logical_1 = require("../../../r-bridge/lang-4.x/ast/model/nodes/r-logical"); const r_symbol_1 = require("../../../r-bridge/lang-4.x/ast/model/nodes/r-symbol"); const convert_values_1 = require("../../../r-bridge/lang-4.x/convert-values"); function isConstantLike(type) { return type === InputType.Constant || type === InputType.DerivedConstant; } /** Returns the common value shared by all defined entries, or `undefined` if they disagree or all are `undefined`. */ function singleValue(values) { let result; let seen = false; for (const v of values) { if (v === undefined) { return undefined; } if (!seen) { result = v; seen = true; } else if (v !== result) { return undefined; } } return result; } /** * Accumulates types, control-dependency types, values, and purity while traversing origin * chains. Call {@link build} to produce the resulting {@link InputSource}. */ class ClassificationAccumulator { types = []; cds = []; values = []; allPure = true; merge(c) { this.types.push(...c.types); this.values.push(c.value); if (c.cds) { this.cds.push(...c.cds); } if (c.trace !== InputTraceType.Pure) { this.allPure = false; } } pushUnknown() { this.types.push(InputType.Unknown); this.values.push(undefined); } build(id) { const types = this.types.length === 0 ? [InputType.Unknown] : (0, arrays_1.uniqueArray)(this.types); const trace = this.allPure ? InputTraceType.Pure : InputTraceType.Alias; const src = { id, types, trace }; const cds = this.cds.length === 0 ? undefined : (0, arrays_1.uniqueArray)(this.cds); if (cds) { src.cds = cds; } if (types.every(isConstantLike)) { const v = singleValue(this.values); if (v !== undefined) { src.value = v; } } return src; } } class InputClassifier { dfg; config; cache = new Map(); fullDfg; constructor(dfg, config, fullDfg) { this.dfg = dfg; this.config = config; this.fullDfg = fullDfg; } isDefinedByOnCall(id) { const out = (this.fullDfg ?? this.dfg).outgoingEdges(id) ?? new Map(); return out.values().some(e => edge_1.DfEdge.includesType(e, edge_1.EdgeType.DefinedByOnCall)); } extractConstantValue(id) { const node = this.dfg.idMap?.get(id); if (node === undefined) { return undefined; } if (r_number_1.RNumber.is(node)) { return node.content.num; } if (r_string_1.RString.is(node)) { return node.content.str; } if (r_logical_1.RLogical.is(node)) { return node.content; } if (r_symbol_1.RSymbol.is(node) && node.content === convert_values_1.RNull) { return null; } return undefined; } classifyEntry(vertex) { const cached = this.cache.get(vertex.id); if (cached) { return cached; } // insert temporary unknown to break cycles this.cache.set(vertex.id, { id: vertex.id, types: [InputType.Unknown], trace: InputTraceType.Unknown }); switch (vertex.tag) { case vertex_1.VertexType.Value: { const src = { id: vertex.id, types: [InputType.Constant], trace: InputTraceType.Unknown }; const v = this.extractConstantValue(vertex.id); if (v !== undefined) { src.value = v; } return this.classifyCdsAndReturn(vertex, src); } case vertex_1.VertexType.FunctionCall: return this.classifyFunctionCall(vertex); case vertex_1.VertexType.VariableDefinition: return this.classifyVariableDefinition(vertex); case vertex_1.VertexType.Use: return this.classifyVariable(vertex); default: return this.classifyCdsAndReturn(vertex, { id: vertex.id, types: [InputType.Unknown], trace: InputTraceType.Unknown }); } } classifyFunctionCall(call) { if (call.origin.includes(built_in_proc_name_1.BuiltInProcName.IfThenElse) || call.origin.includes(built_in_proc_name_1.BuiltInProcName.WhileLoop)) { const condition = graph_1.FunctionArgument.getReference(call.args[0]); if (condition) { const vtx = this.dfg.getVertex(condition); if (vtx) { return this.classifyCdsAndReturn(call, this.classifyEntry(vtx)); } } } else if (call.origin.includes(built_in_proc_name_1.BuiltInProcName.ForLoop)) { const condition = graph_1.FunctionArgument.getReference(call.args[1]); if (condition) { const vtx = this.dfg.getVertex(condition); if (vtx) { return this.classifyCdsAndReturn(call, this.classifyEntry(vtx)); } } } if (!matchesList(call, this.config.pure)) { const types = []; for (const [type, entry] of record_1.Record.entries(this.config)) { if (record_1.Record.values(InputType).includes(type) && matchesList(call, entry)) { types.push(type); } } if (types.length === 0) { // if it is not pure, we cannot classify based on the inputs, in that case we do not know! types.push(InputType.Unknown); } return this.classifyCdsAndReturn(call, { id: call.id, types, trace: InputTraceType.Unknown }); } // Otherwise, classify by arguments; pure functions get Known/Pure handling const argTypes = []; const cdTypes = []; for (const arg of call.args) { if (graph_1.FunctionArgument.isEmpty(arg)) { continue; } const ref = graph_1.FunctionArgument.getReference(arg); if (ref === undefined) { argTypes.push(InputType.Unknown); continue; } const argVtx = this.dfg.getVertex(ref); if (!argVtx) { argTypes.push(InputType.Unknown); continue; } const classified = this.classifyEntry(argVtx); // collect all observed types from this argument argTypes.push(...classified.types); if (classified.cds) { cdTypes.push(...classified.cds); } } const cds = cdTypes.length > 0 ? (0, arrays_1.uniqueArray)(cdTypes) : undefined; // all arguments only contain constant-like types -> derived constant const allConstLike = argTypes.length > 0 && argTypes.every(isConstantLike); if (allConstLike) { return this.classifyCdsAndReturn(call, (0, objects_1.compactRecord)({ id: call.id, types: [InputType.DerivedConstant], trace: InputTraceType.Pure, cds })); } argTypes.push(InputType.DerivedConstant); return this.classifyCdsAndReturn(call, (0, objects_1.compactRecord)({ id: call.id, types: (0, arrays_1.uniqueArray)(argTypes), trace: InputTraceType.Known, cds })); } classifyVariable(vtx) { const origins = df_helper_1.Dataflow.origin(this.dfg, vtx.id); if (origins === undefined) { return this.classifyCdsAndReturn(vtx, { id: vtx.id, types: this.isDefinedByOnCall(vtx.id) ? [InputType.Scope] : [InputType.Unknown], trace: InputTraceType.Unknown }); } const acc = new ClassificationAccumulator(); for (const o of origins) { if (o.type === 4 /* OriginType.ConstantOrigin */) { acc.types.push(InputType.DerivedConstant); acc.values.push(this.extractConstantValue(o.id)); } else if (o.type === 0 /* OriginType.ReadVariableOrigin */ || o.type === 1 /* OriginType.WriteVariableOrigin */) { this.classifyVariableOrigin(o.id, acc); } else if (o.type === 2 /* OriginType.FunctionCallOrigin */ || o.type === 3 /* OriginType.BuiltInFunctionOrigin */) { this.classifyByVertex(o.id, acc); } else { acc.pushUnknown(); } } return this.classifyCdsAndReturn(vtx, acc.build(vtx.id)); } /** * Resolves a variable definition or use origin, handling the special cases of * scope-escaped variables (DefinedByOnCall) and parameter definitions. */ classifyVariableOrigin(definitionId, acc) { const v = this.dfg.getVertex(definitionId); if (!v) { acc.pushUnknown(); return; } // if the referenced definition is linked via defined-by-on-call to another // id (e.g., a parameter linked to a caller argument), mark it as a Scope origin if (this.isDefinedByOnCall(v.id)) { acc.types.push(InputType.Scope); acc.values.push(undefined); acc.allPure = false; } // if this is a variable definition that is a parameter, classify as Parameter if (v.tag === vertex_1.VertexType.VariableDefinition && this.dfg.idMap?.get(v.id)?.info.role === "param-n" /* RoleInParent.ParameterName */) { acc.types.push(InputType.Parameter); acc.values.push(undefined); return; } acc.merge(this.classifyEntry(v)); } classifyByVertex(id, acc) { const v = this.dfg.getVertex(id); if (v) { acc.merge(this.classifyEntry(v)); } else { acc.pushUnknown(); } } classifyVariableDefinition(vtx) { // parameter definitions are classified as Parameter if (this.dfg.idMap?.get(vtx.id)?.info.role === "param-n" /* RoleInParent.ParameterName */) { return this.classifyCdsAndReturn(vtx, { id: vtx.id, types: [InputType.Parameter], trace: InputTraceType.Unknown }); } const sources = vtx.source; if (sources === undefined || sources.length === 0) { // fallback to unknown if we cannot find the value return this.classifyCdsAndReturn(vtx, { id: vtx.id, types: [InputType.Unknown], trace: InputTraceType.Unknown }); } const acc = new ClassificationAccumulator(); for (const tid of sources) { const tv = this.dfg.getVertex(tid); if (tv) { acc.merge(this.classifyEntry(tv)); } else { acc.pushUnknown(); } } return this.classifyCdsAndReturn(vtx, acc.build(vtx.id)); } classifyCdsAndReturn(vtx, src) { if (vtx.cds) { const cds = (0, arrays_1.uniqueArray)(vtx.cds.flatMap(c => { const cv = this.dfg.getVertex(c.id); if (!cv) { return undefined; } const e = this.classifyEntry(cv); return e.cds ? [...e.types, ...e.cds] : [...e.types]; }).filter(assert_1.isNotUndefined).concat(src.cds ?? [])); if (cds.length > 0) { src.cds = cds; } } if (src.cds?.length === 0) { delete src.cds; } this.cache.set(vtx.id, src); return src; } } /** * Lattice flattening until we have a taint engine :) * Please note that the classifier considers this basis with a set-lift, * joining differing lattice elements. * *``` * [ Unknown ] * | * [Param] [File] [Net] [User], ... * | * [ DerivedConstant ] * | * [ Constant ] *``` * */ var InputType; (function (InputType) { InputType["Parameter"] = "param"; InputType["File"] = "file"; InputType["Network"] = "net"; InputType["Random"] = "rand"; /** Calls to system/system2 and similar */ InputType["System"] = "system"; /** Calls to .C / Fortran interfaces (foreign function interfaces) */ InputType["Ffi"] = "ffi"; /** Language objects (quote/substitute/etc.) */ InputType["Lang"] = "lang"; /** Global options / option accessors (options, getOption) */ InputType["Options"] = "options"; /** Interactive user input (file choosers, prompts, dialogs, menu selections) */ InputType["User"] = "user"; InputType["Constant"] = "const"; /** Read from environment/call scope */ InputType["Scope"] = "scope"; /** Pure calculations from constants that lead to a constant */ InputType["DerivedConstant"] = "dconst"; InputType["Unknown"] = "unknown"; })(InputType || (exports.InputType = InputType = {})); var InputTraceType; (function (InputTraceType) { /** Derived only from aliasing */ InputTraceType["Alias"] = "alias"; /** Derived from pure function chains */ InputTraceType["Pure"] = "pure"; /** Derived from known but not necessarily all pure function chains */ InputTraceType["Known"] = "known"; /** Not fully known origin */ InputTraceType["Unknown"] = "unknown"; })(InputTraceType || (exports.InputTraceType = InputTraceType = {})); function matchesList(fn, list) { if (list === undefined || list.length === 0) { return false; } for (const id of list) { if (fn.id === id || (identifier_1.Identifier.is(id) && identifier_1.Identifier.matches(id, fn.name))) { return true; } } return false; } /** * Takes the given id which is expected to either be: * - a function call - in this case all arguments are considered to be inputs (additionally to all read edges from the function call in the dataflow graph) * - anything else - in that case the node itself is considered as an "input" - please note that in these scenarios the *return* value will only contain one mapping - that for the id you passed in. * * This method traces the dependencies in the dataflow graph using the specification of functions passed in. * For the scope escape analysis, pass on the full, non-reduced DFG as `fullDfg`. */ function classifyInput(id, dfg, config, fullDfg) { const vtx = dfg.getVertex(id); if (!vtx) { return []; } const c = new InputClassifier(dfg, config, fullDfg); if (vtx.tag === vertex_1.VertexType.FunctionCall) { const ret = []; const args = vtx.args; for (const arg of args) { if (graph_1.FunctionArgument.isEmpty(arg)) { continue; } const ref = graph_1.FunctionArgument.getReference(arg); if (ref === undefined) { continue; } const argVtx = dfg.getVertex(ref); if (argVtx === undefined) { continue; } ret.push(c.classifyEntry(argVtx)); } return ret; } else { return [ c.classifyEntry(vtx) ]; } } //# sourceMappingURL=simple-input-classifier.js.map