UNPKG

@eagleoutice/flowr

Version:

Static Dataflow Analyzer and Program Slicer for the R Programming Language

209 lines (208 loc) 10.8 kB
import type { BuiltInIdentifierConstant, BuiltInIdentifierDefinition } from './built-in'; import type { NodeId } from '../../r-bridge/lang-4.x/ast/model/processing/node-id'; import type { ControlDependency } from '../info'; /** this is just a safe-guard type to prevent mixing up branded identifiers with normal strings */ export type BrandedIdentifier = string & { __brand?: 'identifier'; }; /** this is just a safe-guard type to prevent mixing up branded namespaces with normal strings */ export type BrandedNamespace = string & { __brand?: 'namespace'; }; /** * Refers to an identifier by its name. * This can either be a simple name like `a` or a namespaced name like `pkg::a` (stored as ['a', 'pkg']). * By storing the namespace second, you can easily access the actual name via `id[0]`. * This represents the fundamental way to represent binding names in R. * @see {@link Identifier.getName} - to get the name part * @see {@link Identifier.getNamespace} - to get the namespace part * @see {@link Identifier.accessesInternal} - to check if the identifier accesses internal objects (`:::`) * @see {@link Identifier.toString} - to convert the identifier to a string representation */ export type Identifier = BrandedIdentifier | [id: BrandedIdentifier, namespace: BrandedNamespace, internal?: boolean]; /** * Helper functions to work with {@link Identifier|identifiers}. * Use {@link Identifier.matches} to check if two identifiers match according to R's scoping rules! * @example * ```ts * const id1 = Identifier.make('a', 'pkg'); * const id2 = Identifier.parse('pkg::a'); * const id3 = Identifier.parse('a'); * Identifier.matches(id1, id2); // true * Identifier.matches(id3, id2); // true, as id3 has no namespace * ``` */ export declare const Identifier: { readonly name: "Identifier"; /** * Create an identifier from its name and optional namespace. * Please note that for `internal` to count, a namespace must be provided! */ readonly make: (this: void, name: BrandedIdentifier, namespace?: BrandedNamespace, internal?: boolean) => Identifier; /** * Verify whether an unknown element has a valid identifier shape! */ readonly is: (this: void, id: unknown) => id is Identifier; /** * Parse an identifier from its string representation, * Please note, that in R if one writes `"pkg::a"` this refers to a symbol named `pkg::a` and NOT to the namespaced identifier `a` in package `pkg`. * In this scenario, see {@link Identifier.make} instead. */ readonly parse: (this: void, str: string) => Identifier; /** * Get the name part of the identifier */ readonly getName: (this: void, id: Identifier) => BrandedIdentifier; /** * Get the namespace part of the identifier, undefined if there is none */ readonly getNamespace: (this: void, id: Identifier) => BrandedNamespace | undefined; /** * Check if the identifier accesses internal objects (`:::`) */ readonly accessesInternal: (this: void, id: Identifier) => boolean | undefined; /** * Convert the identifier to a **valid R** string representation, * this will properly quote namespaces that contain `::` to avoid confusion. * @example * ```ts * Identifier.toString('a') // 'a' * Identifier.toString(['a', 'pkg']) // 'pkg::a' * Identifier.toString(['a', 'pkg:::internal', true]) // '"pkg:::internal":::a' * ``` */ readonly toString: (this: void, id: Identifier) => string; /** * Check if two identifiers match. * This differs from eq! * If the first identifier is not namespaced, it will match any namespace! * If we search for S3 methods (s3=true), the target may have an additional suffix after a dot. * If the first identifier is internal, it will match any target (internal or not). */ readonly matches: (this: void, id: Identifier, target: Identifier, s3?: boolean) => boolean; /** Special identifier for the `...` argument */ readonly dotdotdot: (this: void) => BrandedIdentifier; /** * Check if the identifier is the special `...` argument / or one of its accesses like `..1`, `..2`, etc. * This always returns false for namespaced identifiers. */ readonly isDotDotDotAccess: (this: void, id: Identifier) => boolean; /** * Functor over the name of the identifier */ readonly mapName: (this: void, id: Identifier, fn: (name: BrandedIdentifier) => BrandedIdentifier) => Identifier; /** * Functor over the namespace of the identifier */ readonly mapNamespace: (this: void, id: Identifier, fn: (ns: BrandedNamespace) => BrandedNamespace) => Identifier; /** * Convert the identifier to its array representation */ readonly toArray: (this: void, id: Identifier) => [BrandedIdentifier, BrandedNamespace | undefined, boolean | undefined]; }; /** * Each reference has exactly one reference type, stored as the respective number. * However, when checking, we may want to allow for one of several types, * allowing the combination of the respective bitmasks. * * Having reference types is important as R separates a variable definition from * a function when resolving an {@link Identifier|identifier}. * In `c <- 3; print(c(1, 2))` the call to `c` works normally (as the vector constructor), * while writing `c <- function(...) ..1` overshadows the built-in and causes `print` to only output the first element. * @see {@link isReferenceType} - for checking if a (potentially joint) reference type contains a certain type * @see {@link ReferenceTypeReverseMapping} - for debugging */ export declare enum ReferenceType { /** The identifier type is unknown */ Unknown = 1, /** The identifier is defined by a function (includes built-in function) */ Function = 2, /** The identifier is defined by a variable (includes parameter and argument) */ Variable = 4, /** The identifier is defined by a constant (includes built-in constant) */ Constant = 8, /** The identifier is defined by a parameter (which we know nothing about at the moment) */ Parameter = 16, /** The identifier is defined by an argument (which we know nothing about at the moment) */ Argument = 32, /** The identifier is defined by a built-in value/constant */ BuiltInConstant = 64, /** The identifier is defined by a built-in function */ BuiltInFunction = 128, /** Prefix to identify S3 methods, use this, to for example dispatch a call to `f` which will then link to `f.*` */ S3MethodPrefix = 256, /** Prefix to identify S7 methods, use this, to for example dispatch a call to `f` which will then link to `f<7>*` */ S7MethodPrefix = 512 } /** Reverse mapping of the reference types so you can get the name from the bitmask (useful for debugging) */ export declare const ReferenceTypeReverseMapping: Map<ReferenceType, string>; /** * Check if the reference types have an overlapping type! */ export declare function isReferenceType(t: ReferenceType, target: ReferenceType): boolean; /** * Describes all types of reference (definitions) that can appear within a graph (i.e., that are not built-in like the * default definition for the assignment operator `<-`). * @see {@link InGraphIdentifierDefinition} - for the definition of an identifier within the graph */ export type InGraphReferenceType = Exclude<ReferenceType, ReferenceType.BuiltInConstant | ReferenceType.BuiltInFunction>; /** * An identifier reference points to a variable like `a` in `b <- a`. * Without any surrounding code, `a` will produce the identifier reference `a`. * Similarly, `b` will create a reference (although it will be an {@link IdentifierDefinition|identifier definition} * which adds even more information). * * In general, * references are merely pointers (with meta-information) to a vertex in the {@link DataflowGraph|dataflow graph}. * In the context of the extractor, for example, * they indicate the references that are currently (during the analysis at this given node) * {@link DataflowInformation#in|read (`in`)}, {@link DataflowInformation#out|written (`out`)}, * or {@link DataflowInformation#unknownReferences|unknown (`unknownReferences`)}. * @see {@link InGraphIdentifierDefinition} */ export interface IdentifierReference { /** * The id of the node which represents the reference in the {@link NormalizedAst|normalized AST} and the {@link DataflowGraph|dataflow graph}. */ readonly nodeId: NodeId; /** Name the reference is identified by (e.g., the name of the variable), undefined if the reference is "artificial" (e.g., anonymous) */ readonly name: Identifier | undefined; /** Type of the reference to be resolved */ readonly type: ReferenceType; /** * If the reference is only effective, if, for example, an if-then-else condition is true, this references the root of the `if`. * As a hacky intermediate solution (until we have pointer-analysis), an empty array may indicate a `maybe` which is due to pointer access (e.g., in `a[x] <- 3`). */ cds?: ControlDependency[] | undefined; } /** * The definition of an {@link Identifier|identifier} within the {@link DataflowGraph|graph}. * This extends on the {@link IdentifierReference} * by adding the {@link NodeId} of the definition * (and using `type` to mark the object type). * * Within a code snippet like `a <- 3`, the symbol processor will first create an * {@link IdentifierReference|identifier reference} for `a` to reference the use * and then promote it to an {@link InGraphIdentifierDefinition|identifier definition}. * @see {@link IdentifierReference} */ export interface InGraphIdentifierDefinition extends IdentifierReference { readonly type: InGraphReferenceType; /** * The assignment node which ultimately defined this identifier * (the arrow operator for e.g. `x <- 3`, or `assign` call in `assign("x", 3)`) */ readonly definedAt: NodeId; /** * For value tracking, this contains all nodeIds of constant values that may be made available to this identifier * For example, in `x <- 3; y <- x`, the definition of `y` will have the value `3` in its value set */ readonly value?: NodeId[]; } /** * Stores the definition of an identifier within an {@link IEnvironment}. * * {@link BuiltInIdentifierDefinition} and {@link BuiltInIdentifierConstant} are used for built-in functions and constants only, * so the most important one for your day-to-day R script is the {@link InGraphIdentifierDefinition}. */ export type IdentifierDefinition = InGraphIdentifierDefinition | BuiltInIdentifierDefinition | BuiltInIdentifierConstant;