UNPKG

refa

Version:

A library for finite automata and regular expressions in the context of JS RegExp

1,423 lines 116 kB
import { AST, RegExpParser } from "@eslint-community/regexpp"; export interface SourceLocation { start: number; end: number; } interface NodeBase { type: Node["type"]; parent: Node["parent"]; source?: SourceLocation; } export type Element = CharacterClass | Alternation | Quantifier | Assertion | Unknown; export type Parent = Expression | Alternation | Quantifier | Assertion; export type Node = Expression | CharacterClass | Alternation | Quantifier | Assertion | Concatenation | Unknown; export interface Alternation extends NodeBase { type: "Alternation"; parent: Concatenation; alternatives: Concatenation[]; } export interface Assertion extends NodeBase { type: "Assertion"; parent: Concatenation; alternatives: Concatenation[]; kind: "ahead" | "behind"; negate: boolean; } export interface Quantifier extends NodeBase { type: "Quantifier"; parent: Concatenation; alternatives: Concatenation[]; lazy: boolean; min: number; max: number; } export interface CharacterClass extends NodeBase { type: "CharacterClass"; parent: Concatenation; characters: CharSet; } export interface Unknown extends NodeBase { type: "Unknown"; parent: Concatenation; id: string; } export interface Expression extends NodeBase { type: "Expression"; parent: null; alternatives: Concatenation[]; } export interface Concatenation extends NodeBase { type: "Concatenation"; parent: Parent; elements: Element[]; } type NodeIdent = { type: Node["type"]; }; type NoParentArray<T> = { [K in keyof T]: NoParent<T[K]>; }; type NoParentNode<T extends NodeIdent> = { [K in keyof NoParentNodePick<T>]: NoParent<NoParentNodePick<T>[K]>; }; type NoParentNodePick<T extends NodeIdent> = Pick<T, Exclude<keyof T, "parent">>; /** * A view of an AST node that hides the `parent` property. */ export type NoParent<T> = T extends NodeIdent ? NoParentNode<T> : T extends unknown[] ? NoParentArray<T> : T; /** * Sets the `parent` properties of the given node and all of its child nodes. * * @param node * @param parent The parent of `node`. */ export function setParent<T extends Node>(node: T | NoParent<T>, parent: T["parent"]): asserts node is T; /** * Sets the `source` property of the given node and all of its child nodes. * * If `source` is not a function, then the source object will be copied for all `source` properties to be set. The * object will be copied using the `start` and `end` properties alone, other properties will not be copied. * * @param node * @param source * @param overwrite */ export function setSource( node: NoParent<Node>, source: SourceLocation | (() => SourceLocation), overwrite?: boolean ): void; export interface VisitAstHandler { onAlternationEnter?(node: Alternation): void; onAlternationLeave?(node: Alternation): void; onAssertionEnter?(node: Assertion): void; onAssertionLeave?(node: Assertion): void; onCharacterClassEnter?(node: CharacterClass): void; onCharacterClassLeave?(node: CharacterClass): void; onConcatenationEnter?(node: Concatenation): void; onConcatenationLeave?(node: Concatenation): void; onExpressionEnter?(node: Expression): void; onExpressionLeave?(node: Expression): void; onQuantifierEnter?(node: Quantifier): void; onQuantifierLeave?(node: Quantifier): void; onUnknownEnter?(node: Unknown): void; onUnknownLeave?(node: Unknown): void; } export interface VisitNoParentAstHandler { onAlternationEnter?(node: NoParent<Alternation>): void; onAlternationLeave?(node: NoParent<Alternation>): void; onAssertionEnter?(node: NoParent<Assertion>): void; onAssertionLeave?(node: NoParent<Assertion>): void; onCharacterClassEnter?(node: NoParent<CharacterClass>): void; onCharacterClassLeave?(node: NoParent<CharacterClass>): void; onConcatenationEnter?(node: NoParent<Concatenation>): void; onConcatenationLeave?(node: NoParent<Concatenation>): void; onExpressionEnter?(node: NoParent<Expression>): void; onExpressionLeave?(node: NoParent<Expression>): void; onQuantifierEnter?(node: NoParent<Quantifier>): void; onQuantifierLeave?(node: NoParent<Quantifier>): void; onUnknownEnter?(node: NoParent<Unknown>): void; onUnknownLeave?(node: NoParent<Unknown>): void; } /** * Calls the given visitor on the given node and all of its children. * * If the given visitor throws an error, the traversal will stop and the error will be re-thrown. * * @param node * @param visitor */ export function visitAst(node: Node, visitor: VisitAstHandler): void; export function visitAst(node: NoParent<Node>, visitor: VisitNoParentAstHandler): void; /** * A transform is some algorithm that takes a AST subtree and makes any number of modifications to the given subtree. * They cannot see or modify anything outside the given subtree. Transformers are assumed to behave like a set of pure * functions. * * Transformers are always applied bottom-up. * * The most simple transformer is an empty object (`{}`). This is equivalent to a no-op transformer that does not change * the given AST. */ export interface Transformer { /** * An optional name useful for diagnostics. */ readonly name?: string; onAlternation?(node: NoParent<Alternation>, context: TransformContext): void; onAssertion?(node: NoParent<Assertion>, context: TransformContext): void; onCharacterClass?(node: NoParent<CharacterClass>, context: TransformContext): void; onConcatenation?(node: NoParent<Concatenation>, context: TransformContext): void; onExpression?(node: NoParent<Expression>, context: TransformContext): void; onQuantifier?(node: NoParent<Quantifier>, context: TransformContext): void; onUnknown?(node: NoParent<Unknown>, context: TransformContext): void; } export interface TransformContext { /** * The maximum character of all character sets in the AST. * * If the expression to transform does not contain any characters at the start of the transformation, then this * value will be `0`. */ readonly maxCharacter: Char; /** * Signals that the transformer changed the AST. */ readonly signalMutation: () => void; } /** * A transformer that runs all given transformers in sequentially order. * * The combined transformer is a special transformer in that the {@link transform} function knows about it. */ export class CombinedTransformer implements Transformer { readonly name = "CombinedTransformer"; /** * The transformers that will be applied in order. * * Note: These transformers are not necessarily the ones given to the constructor. If a transformer is a * `CombinedTransformer`, then its transformers will be used instead (think of it as flattening combined * transformers). */ readonly transformers: readonly Transformer[]; constructor(transformers: Iterable<Transformer>); onAlternation(node: NoParent<Alternation>, context: TransformContext): void; onAssertion(node: NoParent<Assertion>, context: TransformContext): void; onCharacterClass(node: NoParent<CharacterClass>, context: TransformContext): void; onConcatenation(node: NoParent<Concatenation>, context: TransformContext): void; onExpression(node: NoParent<Expression>, context: TransformContext): void; onQuantifier(node: NoParent<Quantifier>, context: TransformContext): void; onUnknown(node: NoParent<Unknown>, context: TransformContext): void; } /** * Creates a new transformer that performs all given transformers in sequentially order. * * If only one transformer is given, the returned transformer will be functionally equivalent. If no transformers are * given, the returned transformer will be equivalent to a noop transformer. * * The given iterable can be changed and reused after this function returns. * * @param transformers * @deprecated Use `new CombinedTransformer(transformers)` instead. */ export function combineTransformers(transformers: Iterable<Transformer>): CombinedTransformer; export interface TransformEvents { /** * An optional callback that will be called at the start of every pass. * * @param ast The AST that will be transformed. * @param pass The number of the pass that will be performed. Starts at `1`. */ onPassStart?: (ast: NoParent<Expression>, pass: number) => void; /** * An optional callback that will be called every time a transformer mutates the AST. * * @param ast The AST that was transformed. * @param node The node that was mutated by the transformer. Descendants of this node may have been mutated as well. * @param transformer The transformer that mutated the AST. */ onChange?: (ast: NoParent<Expression>, node: NoParent<Node>, transformer: Transformer) => void; } export interface TransformOptions { /** * The maximum number of times the transformer will be applied to the AST. * * This is only a maximum. The transformer will be stopped before this number is reach if the AST isn't modified * anymore. * * @default 10 */ maxPasses?: number; /** * Optional events to observe the transformation process. */ events?: TransformEvents; } /** * Transforms the given expression according to the given transformer. * * __Do not__ use the given `ast` object again after calling this function, the object will be in an undefined state. * * Note: This function knows about {@link CombinedTransformer} and will give it special treatment. Instead of applying * the transformer as is, it will apply all of its transformers instead. While this does not change the behavior of the * transformer, it does change which transformers the {@link TransformEvents} will see. Instead of seeing the combined * transformer, they will see the individual transformers. * * @param transformer * @param ast * @param options */ export function transform( transformer: Transformer, ast: NoParent<Expression>, options?: Readonly<TransformOptions> ): NoParent<Expression>; /** * A character base is constructed from a collection of character sets. It holds a list of disjoint, non-empty * character sets - the base sets - that can be used to construct every character set in the collection it was * constructed from. * * ## Guarantees * * - The base sets are guaranteed to be mutually disjoint and non-empty. * * - Every character set in the collection can be constructed by combining (union) a unique set of base sets. * * - The list of base sets is guaranteed to be as small as possible. There are at most `min(n^2, o)` base sets where `n` * is the number of unique, non-empty character sets in the collection, and `o` is the number of characters in the * union of all character sets in the collection. * * ## Use case * * The primary purpose of base sets is to remap alphabets. Some FA operations scale with the number of characters in the * alphabet of the FA (e.g. DFA minimization). * * Base sets can be used to determine which characters in an FA's alphabet *Σ* cannot be distinguished by the FA *A*. * Two characters *a,b* in *Σ* are indistinguishable if for all inputs *w* the following hold true: * * 1. *w* is accepted by *A* iff *w* with all occurrences of *a* replaced with *b* is accepted by *A*. * 2. *w* is accepted by *A* iff *w* with all occurrences of *b* replaced with *a* is accepted by *A*. * * Two indistinguishable characters are guaranteed to be in the same base set. * * By treating each base set as a character, it is possible to create a new (smaller) alphabet *Γ* (*|Γ| <= |Σ|*) such * that the FA *A* still behaves the same. * * Since *Γ* is typically (several orders of magnitude) smaller, operations that scale with the size of the alphabet * can be done more quickly. */ export class CharBase { /** * A list of disjoint, non-empty character sets. * * See {@link CharBase} to learn more. */ readonly sets: readonly CharSet[]; /** * Create the base sets of the given collection of character sets. * * See {@link CharBase} to learn more. * * @param charSets * @throws `RangeError` if the collection contains two character sets with different maximums. */ constructor(charSets: Iterable<CharSet>); /** * Splits the given character set into its base sets. * * The returned array will be a list of indexes of base sets necessary to construct the given character sets. The * indexes will be sorted and occur at most once. * * **Note**: This assumes that `charSet` is either empty or can be constructed from the base sets. If the * assumption is not met, the output of this function will be undefined. * * @param charSet */ split(charSet: CharSet): number[]; } export interface ReadonlyCharMap<T> extends Iterable<[CharRange, T]> { /** * Returns whether this map is empty. * * This is equivalent to `this.size === 0` and `this.entryCount === 0`. */ readonly isEmpty: boolean; /** * The number of characters in this map. This is different from {@link entryCount}. * * This is equivalent to `[...this.keys()].reduce((count, range) => count + range.max - range.min + 1, 0)`. */ readonly size: number; /** * The number of entires in this map. * * This is different from {@link size}. In general, you should use {@link size}, because it has the same semantics * as `Set#size` and `Map#size`. * * This is equivalent to `[...this.entries()].length`. */ readonly entryCount: number; /** * Returns whether the given character is a key in the map. * * @param char */ has(char: Char): boolean; /** * Returns whether every character in the given range is a key in the map. * * This is equivalent to: `[...chars].every(char => this.has(char))`. * * @param chars */ hasEvery(chars: CharRange): boolean; /** * Returns whether some character in the given range is a key in the map. * * This is equivalent to: `[...chars].some(char => this.has(char))`. * * @param chars */ hasSome(chars: CharRange): boolean; /** * Returns the value associated with the given character of `undefined` if the character is not key in the map. * * @param char */ get(char: Char): T | undefined; /** * Invokes the given callback for every item of the character map. * * This method is implemented more efficiently than other iterator based methods, so chose `forEach` where every * possible. * * @param callback */ forEach(callback: (value: T, chars: CharRange, map: ReadonlyCharMap<T>) => void): void; /** * Returns all ranges of characters that are keys in the map. * * Keys will be returned in the same order as `this.entries()`. */ keys(): Iterable<CharRange>; /** * Returns all values in the map. Values might not be unique if more than one range maps to the same value. * * Values will be returned in the same order as `this.entries()`. */ values(): Iterable<T>; /** * Returns all key-value pairs in the map. * * Entries will be returned in the order of ascending ranges. */ entries(): Iterable<[CharRange, T]>; /** * Returns a mapping from the values of this map to its keys. */ invert(maxCharacter: Char): Map<T, CharSet>; /** * Returns a new map with all values mapped by the given function. * * If no function is given, the identity function is used. */ copy(): CharMap<T>; copy<U>(mapFn: (value: T) => U): CharMap<U>; } /** * A map from characters to generic values. * * The map guarantees that there are no adjacent character ranges that map to the equal values, will always be iterated * as one character range. The equality of values is given by JavaScript's strict equality operator (`===`). */ export class CharMap<T> implements ReadonlyCharMap<T> { get isEmpty(): boolean; get size(): number; get entryCount(): number; has(char: Char): boolean; hasEvery(chars: CharRange): boolean; hasSome(chars: CharRange): boolean; get(char: Char): T | undefined; set(char: Char, value: T): void; /** * Sets the value for all characters in the given range. * * This is equivalent to `[...chars].forEach(char => this.set(char, value))`. * * @param chars * @param value */ setRange(chars: CharRange, value: T): void; /** * Sets the value for all characters in the given character set. * * This is equivalent to `[...charSet.characters()].forEach(char => this.set(char, value))`. * * @param charSet * @param value */ setCharSet(charSet: CharSet, value: T): void; delete(char: Char): boolean; /** * Deletes all characters in the given range. * * This is equivalent to `[...range].forEach(char => this.delete(char))`. * * @param range */ deleteRange(range: CharRange): void; /** * Deletes all entries in the map. */ clear(): void; copy(): CharMap<T>; copy<U>(mapFn: (value: T) => U): CharMap<U>; map(mapFn: (value: T, chars: CharRange, map: ReadonlyCharMap<T>) => T): void; mapRange( range: CharRange, mapFn: (value: T | undefined, chars: CharRange, map: ReadonlyCharMap<T>) => T | undefined ): void; filter(conditionFn: (value: T, chars: CharRange, map: ReadonlyCharMap<T>) => boolean): void; invert(maxCharacter: Char): Map<T, CharSet>; forEach(callback: (value: T, chars: CharRange, map: ReadonlyCharMap<T>) => void): void; keys(): IterableIterator<CharRange>; values(): IterableIterator<T>; entries(): IterableIterator<[CharRange, T]>; [Symbol.iterator](): IterableIterator<[CharRange, T]>; } /** * An immutable interval of {@link Char}s with inclusive ends. * * Each interval contains all characters `x` with `min <= x <= max`. */ export interface CharRange { /** * The inclusive minimum of the interval. * * This value has to be less or equal to {@link max}. */ readonly min: Char; /** * The inclusive maximum of the interval. * * This value has to be greater or equal to {@link min}. */ readonly max: Char; } /** * An immutable set of {@link Char}s represented as a sorted set of disjoint non-adjacent intervals ({@link CharRange}). * * All characters in the set have to be between 0 (inclusive) and the maximum of the set (inclusive). */ export class CharSet { /** * The greatest character which can be element of the set. */ readonly maximum: Char; /** * An array of ranges representing this character set. * * The array must be guaranteed to have the following properties at all times: * * 1. Any two ranges are disjoint. * 2. Any two ranges are non-adjacent. * 3. 0 <= `min` <= `max` <= `this.maximum` for all ranges. * 4. All ranges are sorted by ascending `min`. */ readonly ranges: readonly CharRange[]; /** * Returns `true` if this set doesn't contain any characters. */ get isEmpty(): boolean; /** * Returns `true` if all characters in the range from 0 to `this.maximum`, including 0 and `this.maximum`, are in * the set. */ get isAll(): boolean; /** * Returns the number of unique characters in the set. * * The returned number will be at least `0` and at most `this.maximum + 1`. */ get size(): number; /** * Returns an iterable of all characters in this set. * * Characters are sorted by ascending order and each character is yielded exactly once. * * Note: The iterable is stable. It can be iterated multiple times. */ characters(): Iterable<Char>; /** * Returns a string representation of the character set. */ toString(): string; /** * Returns a string representation of the ranges of this character set. * * The string representation has the following rules: * * 1. Each character is represented as a hexadecimal number. * 2. Each range where `min == max` will be represented by the `min` character. * 3. Each range where `min != max` will be represented by `min` followed by `".."` followed by `max`. * 4. The sequence of ranges will be joined together using `", "`. * * The returned string representation will have the following format: * * ``` * string = [ ranges ] * ranges = range *( ", " range ) * range = +hex [ ".." +hex ] * hex = "a" | "b" | "c" | "d" | "e" | "f" | digit * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" * ``` */ toRangesString(): string; /** * Returns a string representation of the Unicode ranges of this character set. * * The primary purpose of this function is provide an easy way to get a readable representation of a Unicode or * Unicode-like character set. The format is optimized for ease of reading for humans. * * The format follows these rules: * - If the character set is empty, `empty` will be returned. * - If the character set contains all characters, `all` will be returned. * - Ranges may be negated, which is indicated with `not`. E.g. `not a b` is the character set that contains all * characters except for a and b. * - A contiguous range of characters is represented using `min-max` where `min` and `max` are formatted characters. * - Single characters are formatted as either: * - a Unicode character (e.g. `a`), * - a quoted Unicode character (e.g. `'-'`), or * - a Unicode escape (e.g. `U+FF`). * * The returned string representation will have the following format: * * ``` * string = "all" | "empty" | ranges | "not " ranges * ranges = range *( " " range ) * range = char [ "-" char ] * char = literal | quoted | escape * literal = ?Printable Unicode characters? * literal = "'" ?any character? "'" * escape = "U+" +hex * hex = "A" | "B" | "C" | "D" | "E" | "F" | digit * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" * ``` */ toUnicodeString(): string; /** * Returns an empty character set with the given maximum. * * @param maximum The greatest character which can be element of the set. */ static empty(maximum: Char): CharSet; /** * Returns a complete character set with the given maximum. * * @param maximum The greatest character which will be element of the set. */ static all(maximum: Char): CharSet; /** * Returns a character set which contains the given characters. * * @param maximum The greatest character which will be element of the set. * @param characters A sorted collection of characters. * @throws `RangeError` if the given collection is not sorted or contains characters greater than `maximum`. */ static fromCharacters(maximum: Char, characters: Iterable<Char>): CharSet; /** * Returns a character set which contains the given range. * * @param maximum The greatest character which will be element of the set. * @param range * @throws `RangeError` if the maximum of the given range is greater than `maximum`. */ static fromRange(maximum: Char, range: CharRange): CharSet; /** * Returns a character set which contains the given character. * * @param maximum The greatest character which will be element of the set. * @param char * @throws `RangeError` if the maximum of the given range is greater than `maximum`. */ static fromCharacter(maximum: Char, char: Char): CharSet; /** * Returns whether this and the given character set are equivalent. * * Two `CharSet`s are equal if and only if: * * 1. They have the same maximum. * 2. They contain the same characters. * * Since each set of characters has a unique range representation, 2 equal `CharSet`s are guaranteed to have equal * ranges. * * A `CharSet` and a `CharRange` are equal if and only if they contain the same characters. * * @param other */ equals(other: CharSet | CharRange): boolean; /** * Compares this set with given set and returns an integer value describing their relation. Two equivalent set are * always guaranteed to return 0. * * The order defined by this function is guaranteed to be a * [total order](https://en.wikipedia.org/wiki/Total_order). Apart from this, no other guarantees are given. * * @param other */ compare(other: CharSet): number; /** * Returns a character set with the given maximum. * * The ranges of the returned character set are equivalent to the ranges of * `this.intersect({ min: 0, max: newMaximum })`. * * @param newMaximum * @returns */ resize(newMaximum: Char): CharSet; /** * Returns [the complement](https://en.wikipedia.org/wiki/Complement_(set_theory)) of this set. * * The returned set will have the same maximum as this set. */ negate(): CharSet; /** * Returns [the union](https://en.wikipedia.org/wiki/Union_(set_theory)) of this set and all given sets and * character ranges. * * The returned set will have the same maximum as this set. * * @param data * @throws `RangeError` If the maximum of one of the given sets differs from the maximum of this set or if the * maximum of one of the given ranges is greater than the maximum of this set. */ union(...data: (Iterable<CharRange> | CharSet)[]): CharSet; /** * Returns [the intersection](https://en.wikipedia.org/wiki/Intersection_(set_theory)) of this set and the given * set/ranges of characters. * * The returned set will have the same maximum as this set. * * @param other * @throws `RangeError` If the maximum of the given set differs from the maximum of this set. */ intersect(other: CharSet | CharRange): CharSet; /** * Returns a set that contains all characters of this set that are not in the given set/range. * * The returned set will have the same maximum as this set. * * @param other * @throws `RangeError` If the maximum of the given set differs from the maximum of this set. */ without(other: CharSet | CharRange): CharSet; /** * Returns whether this set contains the given character. * * @param character */ has(character: Char): boolean; /** * Returns whether `this ⊇ other`. * * @param other */ isSupersetOf(other: CharSet | CharRange): boolean; /** * Returns whether `this ⊆ other`. * * @param other */ isSubsetOf(other: CharSet | CharRange): boolean; /** * Returns whether `this ⊃ other`. * * @param other */ isProperSupersetOf(other: CharSet | CharRange): boolean; /** * Returns whether `this ⊂ other`. * * @param other */ isProperSubsetOf(other: CharSet | CharRange): boolean; /** * Returns whether this set and the given set (or range) are disjoint. * * @param other */ isDisjointWith(other: CharSet | CharRange): boolean; /** * Returns any one of the common characters of this set and the given set or range. * * If this character set is disjoint with the given character set/range, then `undefined` will be returned. * * @param other */ commonCharacter(other: CharSet | CharRange): Char | undefined; } /** * A character is a non-negative integer. * * This is one of the core concepts of refa. Instead of operating on JavaScript strings, UTF16 character codes, or * Unicode code points, this library uses plain numbers instead. This makes refa agnostic to text encodings and even * text in general since the integers used as character may represent arbitrary concepts. * * There are only 2 restrictions on the numbers that can be characters: * * 1. They have to be non-negative integers. * 2. They can be at most `Number.MAX_SAFE_INTEGER`. * * --- * * This type serves as a way to document characters. It is a clear way to signal that a value is not just any number. */ export type Char = number & { __char?: never; }; /** * A word is finite sequence of {@link Char}s. * * This one of the core concepts of refa. Instead of operating on JavaScript strings, all functions operate on * {@link Char}s and char arrays (= words). This means that refa is agnostic to text encodings, the string * representation of JavaScript, and even text itself. * * This type serves as a way to document words. It should _not_ be used interchangeably with `Char[]` or `number[]`. */ export type Word = Char[]; /** * An immutable finite sequence of {@link Char}s. * * This is an immutable view on a {@link Word}. */ export type ReadonlyWord = readonly Char[]; export interface FiniteAutomaton { /** * Returns whether this FA accepts the empty language meaning that it doesn't accept any words. */ readonly isEmpty: boolean; /** * Returns whether the formal language accepted by this FA contains finitely many words. * * __Note__: Finite does not mean that all words can be iterated in practice. E.g. the set of all Unicode words with * 10 or less characters contains 2.6e54 many words and can be accepted by a DFA with only 11 states. */ readonly isFinite: boolean; /** * The maximum character that is part of the alphabet of the words that this FA can accept. */ readonly maxCharacter: Char; /** * Returns whether this FA accepts the given word. * * @param word The characters of the word to test. */ test(word: ReadonlyWord): boolean; /** * Returns an iterable that will yield all words accepted by this FA. Words are yielded by ascending length. * * If this FA accepts infinitely many words, the iterable will never end. */ words(): Iterable<Word>; /** * Returns an iterable that will yield all word sets accepted by this FA. Word sets are yielded by ascending length. * * If this FA accepts infinitely many words, the iterable will never end. If this FA is finite, the iterable will * end after at most `2^O(n)` word sets (`n` = number of states). * * If you analyse the words of an FA, consider using this method instead of `words`. If this method yields `k` word * sets, then `words` will yield up to `O(k * m ^ l)` words (`m` = number of possible characters, `l` = the maximum * length of any of the `k` word sets). */ wordSets(): Iterable<WordSet>; /** * Returns a string representation of this FA. */ toString(): string; /** * Returns the AST of a regular expression that accepts the same language as this FA. * * @param options */ toRegex(options?: Readonly<ToRegexOptions>): NoParent<Expression>; /** * Returns the string representation of this FA in the * [DOT format](https://en.wikipedia.org/wiki/DOT_(graph_description_language)). * * The output of this function can passed to any graph visualization program. This can be a * [local installation](https://graphviz.org/download/) or an [online editor](https://edotor.net/). * * By default, {@link CharSet#toUnicodeString} is used to represent {@link CharSet}s. It's possible to provide a * custom stringify function using the `charSetToString` parameter. * * @param charSetToString */ toDot(charSetToString?: (charSet: CharSet) => string): string; /** * Returns the string representation of this FA in the [Mermaid format](https://mermaid.js.org/). * * By default, {@link CharSet#toUnicodeString} is used to represent {@link CharSet}s. It's possible to provide a * custom stringify function using the `charSetToString` parameter. * * @param charSetToString */ toMermaid(charSetToString?: (charSet: CharSet) => string): string; } /** * A graph iterator for all states of an FA with final states. * * @template S The type of a state in the FA to iterate. * @template O The type of the value each state maps to. */ export interface FAIterator<S, O = Iterable<S>> { /** * The initial state of the FA. */ readonly initial: S; /** * Returns the value a state maps to. * * Callers of this function are allowed to call the function **without** a `this` argument. * * @see {@link stableOut} */ readonly getOut: (state: S) => O; /** * Whether the {@link getOut} function is stableOut during the lifetime of the iterator. * * Stable means that if `getOut` gets called for the same state more than once, it will always return the same * value. * * The sameness of states is defined by * [the key equality of the Map class](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map#key_equality). * * The sameness of returned values is not defined by this interface and depends of the iterator. * * I.e. a stable `getOut` function may return new collections/iterators on subsequent invocations as long as the * collections/iterators are considered equal (e.g. equal could be defined as "same elements") by the consumer of * the iterator. * * @default false */ readonly stableOut?: boolean; /** * Returns whether the given state is a final state. * * This function is guaranteed to be deterministic during the time the iterator is used. It is also guaranteed to be * sufficiently fast, usually `O(1)` can be assumed. * * Callers of this function are allowed to call the function **without** a `this` argument. */ readonly isFinal: (state: S) => boolean; } /** * A factory for the nodes of finite automata. */ export interface NodeFactory<S> { /** * Creates a new state. * * @throws {@link TooManyNodesError} * May be thrown if the number of created nodes exceeds some limit. */ readonly createNode: () => S; } /** * An FA builder has the responsibility of constructing a finite automaton. * * The constructed FA is always owned by the builder. * * @template S The type of a state. * @template T The transition type of the values linking states. */ export interface FABuilder<S, T> extends NodeFactory<S> { /** * The initial state of the FA. */ readonly initial: S; /** * Makes the given state behave like a final state of this FA. * * This does not necessarily mean that the given state will be a final state. I.e. calling `makeFinal(s)` does not * necessitate that `isFinal(s)` is true. * * The implementation has to guarantee that calling this method for the same state more than once is allowed. */ readonly makeFinal: (state: S) => void; /** * Returns whether the given state is a final state. * * This operation is assumed to be semantically equivalent to {@link FAIterator.isFinal}. */ readonly isFinal: (state: S) => boolean; /** * Links to the two given states using the given transition. * * Calling this operations more than once for the given `from` and `to` states is not guaranteed to succeed. */ readonly linkNodes: (from: S, to: S, transition: T) => void; } /** * An {@link FAIterator} where transitions are map of states to character sets. * * This is a commonly used interface when dealing with FA. It's the common core all currently implemented FA support. */ export type TransitionIterator<T> = FAIterator<T, ReadonlyMap<T, CharSet>>; /** * A graph or FA that can create a {@link TransitionIterator}. */ export interface TransitionIterable<T> { readonly maxCharacter: Char; readonly transitionIterator: () => TransitionIterator<T>; } export interface ToRegexOptions { /** * The maximum number of RE AST nodes the implementation is allowed to create. * * If the implementation has to create more nodes to create the RE, a `TooManyNodesError` will be thrown. This * maximum will be check before any optimization passes. * * @default 10000 */ maxNodes?: number; /** * The maximum number of optimization passes that will be done after the initial RE AST was created. * * The initial AST is usually a lot more complex than necessary. Optimizations are then applied in order to minimize * the AST until this limit is reached or the AST can be optimized no further. * * The default number of passes is implementation defined. */ maxOptimizationPasses?: number; } /** * A readonly {@link DFA}. */ export interface ReadonlyDFA extends FiniteAutomaton, TransitionIterable<DFA.ReadonlyNode> { /** * The initial state of the DFA. */ readonly initial: DFA.ReadonlyNode; /** * The set of final states of the DFA. * * This set may be empty or contain nodes not reachable from the initial state. */ readonly finals: ReadonlySet<DFA.ReadonlyNode>; stateIterator(): FAIterator<DFA.ReadonlyNode>; /** * Yields all nodes reachable from the initial state including the initial state. * * This may include trap states, but it will not include unreachable final states. * * The order in which nodes will be returned is implementation defined and may change after any operation that * modifies the DFA. * * Modifying the DFA while iterating will result in implementation-defined behavior. The implementation may stop the * iteration or yield an nodes. * * This operation runs in _O(E + V)_ where _E_ is the number of nodes reachable from the initial state and _V_ is * the number of transitions. */ nodes(): Iterable<DFA.ReadonlyNode>; /** * Returns the number of nodes reachable from the initial state including the initial state. * * This returns the number of nodes returned by {@link nodes}. */ countNodes(): number; /** * Creates a new DFA equivalent to this one. */ copy(factory?: NodeFactory<DFA.Node>): DFA; /** * Returns whether this and the given DFA are structurally equal meaning that all nodes and all transitions are * equal. * * @param other */ structurallyEqual(other: ReadonlyDFA): boolean; } /** * A [deterministic finite automaton](https://en.wikipedia.org/wiki/Deterministic_finite_automaton). * * This class implements DFAs with the following properties: * * - There is exactly one initial state. * * - There may be any number of final states. * * This is implemented using a `Set` of states. * * - No epsilon transitions. * * - A transitions always consumes a character. * * (All character sets are guaranteed to be non-empty.) * * - Transitions are unordered. * * As a consequence, `/aa|bb/` and `/bb|aa/` have the same state machine. * * - Between any two states, there can at most be one transition. */ export class DFA implements ReadonlyDFA { readonly initial: DFA.Node; readonly finals: Set<DFA.Node>; readonly maxCharacter: Char; get isEmpty(): boolean; get isFinite(): boolean; stateIterator(): FAIterator<DFA.ReadonlyNode>; transitionIterator(): TransitionIterator<DFA.ReadonlyNode>; nodes(): Iterable<DFA.Node>; countNodes(): number; test(word: ReadonlyWord): boolean; wordSets(): Iterable<WordSet>; words(): Iterable<Word>; toString(): string; toRegex(options?: Readonly<ToRegexOptions>): NoParent<Expression>; toDot(charSetToString?: (charSet: CharSet) => string): string; toMermaid(charSetToString?: (charSet: CharSet) => string): string; copy(factory?: NodeFactory<DFA.Node>): DFA; structurallyEqual(other: ReadonlyDFA): boolean; removeUnreachable(): void; /** * [Minimizes](https://en.wikipedia.org/wiki/DFA_minimization) this DFA. */ minimize(): void; /** * Complements this DFA. * * This DFA after calling this function will accept all words that are not accepted by this DFA before calling this * function. * * This operation will create at most 1 node with the given factory. * * @param factory */ complement(factory?: NodeFactory<DFA.Node>): void; /** * Modifies this DFA such that all prefixes of all accepted words are also accepted. * * If the language of this DFA is empty, then it will remain empty. * * Unreachable states will be removed by this operation. */ prefixes(): void; /** * Creates a new DFA which matches no words. The language of the returned DFA is empty. * * This operation will create exactly 1 node with the given factory. * * @param options * @param factory */ static empty(options: Readonly<DFA.Options>, factory?: NodeFactory<DFA.Node>): DFA; /** * Creates a new DFA which matches only the empty word. * * This operation will create exactly 1 node with the given factory. * * @param options * @param factory */ static emptyWord(options: Readonly<DFA.Options>, factory?: NodeFactory<DFA.Node>): DFA; /** * Creates a new DFA which matches all words. * * This operation will create exactly 1 node with the given factory. * * @param options * @param factory */ static all(options: Readonly<DFA.Options>, factory?: NodeFactory<DFA.Node>): DFA; /** * Creates a new DFA which matches the given characters. * * This operation will create at most 2 nodes with the given factory. * * @param charSet * @param factory */ static fromCharSet(charSet: CharSet, factory?: NodeFactory<DFA.Node>): DFA; /** * Returns a new DFA which is equivalent to the intersection of the two given FA. * * @param left * @param right * @param factory */ static fromIntersection<L, R>( left: TransitionIterable<L>, right: TransitionIterable<R>, factory?: NodeFactory<DFA.Node> ): DFA; /** * Creates a new DFA which matches all and only all of the given words. * * @param words * @param options * @param factory */ static fromWords( words: Iterable<ReadonlyWord>, options: Readonly<DFA.Options>, factory?: NodeFactory<DFA.Node> ): DFA; /** * Creates a new DFA which matches all and only all of the given word sets. * * @param wordSets * @param options * @param factory */ static fromWordSets( wordSets: Iterable<ReadonlyWordSet>, options: Readonly<DFA.Options>, factory?: NodeFactory<DFA.Node> ): DFA; static fromFA<InputNode>(fa: TransitionIterable<InputNode>, factory?: NodeFactory<DFA.Node>): DFA; static fromTransitionIterator<InputNode>( iter: TransitionIterator<InputNode>, options: Readonly<DFA.Options>, factory?: NodeFactory<DFA.Node> ): DFA; static fromBuilder(builder: DFA.Builder, options: Readonly<DFA.Options>): DFA; } /** * A namespace for DFA-specific classes and interfaces. * * @see {@link DFA} (class) */ export namespace DFA { interface ReadonlyNode { readonly out: ReadonlyCharMap<ReadonlyNode>; } class Node implements ReadonlyNode { readonly out: CharMap<Node>; link(to: Node, via: CharSet): void; unlink(to: Node): void; /** * Unlinks all outgoing and incoming transitions of this node. */ unlinkAll(): void; } /** * An unlimited node factory that will simply call the {@link Node} constructor. */ const nodeFactory: NodeFactory<Node>; class LimitedNodeFactory implements NodeFactory<Node> { readonly limit: number; constructor(limit?: number); createNode(): Node; } class Builder implements FABuilder<Node, CharSet> { readonly initial: Node; readonly finals: Set<Node>; readonly factory: NodeFactory<Node>; constructor(factory: NodeFactory<Node>); makeFinal(state: Node): void; isFinal(state: Node): boolean; linkNodes(from: Node, to: Node, transition: CharSet): void; createNode(): Node; } interface Options { /** * The maximum numerical value any character can have. * * This will be the maximum of all underlying {@link CharSet}s. */ maxCharacter: Char; } } /** * A readonly {@link ENFA}. */ export interface ReadonlyENFA extends FiniteAutomaton, TransitionIterable<ENFA.ReadonlyNode> { /** * The initial state of the ENFA. */ readonly initial: ENFA.ReadonlyNode; /** * The final state of the ENFA. * * This state may not be reachable from the initial state. */ readonly final: ENFA.ReadonlyNode; /** * Whether this ENFA is in its normal form. * * @see {@link ENFA} */ readonly isNormalized: boolean; stateIterator(resolveEpsilon: boolean): FAIterator<ENFA.ReadonlyNode>; /** * Yields all nodes reachable from the initial state including the initial state. * * This may include trap states, but it will not include the final states if it is unreachable from the initial * state. * * The order in which nodes will be returned is implementation defined and may change after any operation that * modifies the ENFA. * * Modifying the ENFA while iterating will result in implementation-defined behavior. The implementation may stop * the iteration or yield an nodes. * * This operation runs in _O(E + V)_ where _E_ is the number of nodes reachable from the initial state and _V_ is * the number of transitions. */ nodes(): Iterable<ENFA.ReadonlyNode>; /** * Returns the number of nodes reachable from the initial state including the initial state. * * This returns the number of nodes returned by {@link nodes}. */ countNodes(): number; /** * Create a mutable copy of this ENFA. */ copy(factory?: NodeFactory<ENFA.Node>): ENFA; } /** * A [nondeterministic finite automaton](https://en.wikipedia.org/wiki/Nondeterministic_finite_automaton) with epsilon * transitions. * * This class implements NFAs with the following properties: * * - There is exactly one initial state. * * - There is exactly one final state. * * - There are epsilon transitions. * * - A transitions either an epsilon transition or consumes a character. * * Epsilon transition are represented using `null` and characters are represented using non-empty `CharSet`s. * * - Transitions are ordered. * * As a consequence, `/aa|bb/` and `/bb|aa/` have different state machines in this NFA implementation. * * Order is only guaranteed as long as no transitions are removed. Order is defined by the key order of the JavaScript * `Map` class. * * - Between any two states, there can at most be one transition. * * Unlike the {@link NFA} class, transition cannot be merged. As a consequence, `/a|a/` and `/a/` have different * state machines in this NFA implementation. * * ## Normal form * * The normal form of this ENFA implementation has the following restriction: * * - The initial state must not have incoming transitions. * - The final state must not have outgoing transitions. * - The initial state and final state are different states. * * Non-normalized ENFAs will either be tolerated or normalized by operations. */ export class ENFA implements ReadonlyENFA { initial: ENFA.Node; final: ENFA.Node; readonly maxCharacter: Char; get isEmpty(): boolean; get isFinite(): boolean; get isNormalized(): boolean; /** * Brings this ENFA is in its normal form. * * This operation will create at most 2 nodes with the given factory. * * @param factory * @see {@link ENFA} */ normalize(factory?: NodeFactory<ENFA.Node>): void; stateIterator(resolveEpsilon: boolean): FAIterator<ENFA.ReadonlyNode>; transitionIterator(): TransitionIterator<ENFA.ReadonlyNode>; nodes(): Iterable<ENFA.Node>; countNodes(): number; copy(factory?: NodeFactory<ENFA.Node>): ENFA; test(word: ReadonlyWord): boolean; wordSets(): Iterable<WordSet>; words(): Iterable<Word>; toString(): string; toRegex(options?: Readonly<ToRegexOptions>): NoParent<Expression>; toDot(charSetToString?: (charSet: CharSet) => string): string; toMermaid(charSetToString?: (charSet: CharSet) => string): string; /** * Modifies this ENFA to accept the concatenation of this ENFA and the given FA. * * @param other * @param factory */ append<O>(other: TransitionIterable<O>, factory?: NodeFactory<ENFA.Node>): void; /** * Modifies this ENFA to accept the concatenation of this ENFA and the other ENFA. * * This operation is implemented by moving (not copying) the states from the other ENFA into this ENFA. The other * ENFA will be in an **invalid state** after this operation completes. The initial and final states of the other * ENFA will be random nodes of this ENFA. Makes sure that you never use the other ENFA again. * * This operation will create at most 4 nodes with the given factory. * * @param other * @param factory */ appendInto(other: ENFA, factory?: NodeFactory<ENFA.Node>): void; /** * Modifies this ENFA to accept the concatenation of the given FA and this ENFA. * * @param other * @param factory */ prepend<O>(other: TransitionIterable<O>, factory?: NodeFactory<ENFA.Node>): void; /** * Modifies this ENFA to accept the concatenation of the other ENFA and this ENFA. * * This operation is implemented by moving (not copying) the states from the other ENFA into this ENFA. The other * ENFA will be in an **invalid state** after this operation completes. The initial and final states of the other * ENFA will be random nodes of this ENFA. Makes sure that you never use the other ENFA again. * * This operation will create at most 4 nodes with the given factory. * * @param other * @param factory */ prependInto(other: ENFA, factory?: NodeFactory<ENFA.Node>): void; /** * Modifies this ENFA to accept the language of this ENFA and the language of the given FA. * * If the union kind is `left`, then this ENFA will be modified to accept `<other>|<this>`. Otherwise, it will be * modified to accept `<this>|<other>`. * * @param other * @param kind * @param factory */ union<O>(other: TransitionIterable<O>, kind?: "left" | "right", factory?: NodeFactory<ENFA.Node>): void; /** * Modifies this ENFA to accept the language of this ENFA and the language of the other ENFA. * * If the union kind is `left`, then this ENFA will be modified to accept `<other>|<this>`. Otherwise, it will be * modified to accept `<this>|<other>`. * * This operation is implemented by moving (not copying) the states from the other ENFA into this ENFA. The other * ENFA will be in an **invalid state** after this operation completes. The initial and final states of the other * ENFA will be random nodes of this ENFA. Makes sure that you never use the other ENFA again. * * This operation will create at most 6 nodes with the given factory. * * @param other * @param kind * @param factory */ unionInto(other: ENFA, kind?: "left" | "right", factory?: NodeFactory<ENFA.Node>): void; /** * Modifies this ENFA to accept at least `min` and at most `max` concatenations of itself. * * Both `min` and `max` both have to be non-negative integers with `min <= max`. * `max` is also allowed to be `Infinity`. * * @param min * @param max * @param lazy * @param factory */ quantify(min: number, max: number, lazy?: boolean, factory?: NodeFactory<ENFA.Node>): void; /** * Removes the empty word from the accepted languages of this ENFA. * * Unreachable states will be removed by this operation. * * @param factory */ withoutEmptyWord(factory?: NodeFactory<ENFA.Node>): void; /** * All states which cannot be reached from the initial state or cannot reach (or are) a final state, will be * removed. */ removeUnreachable(): void; /** * Modifies this ENFA such that all prefixes of a