refa
Version:
A library for finite automata and regular expressions in the context of JS RegExp
1,423 lines • 116 kB
TypeScript
import { AST, RegExpParser } from "@eslint-community/regexpp";
export interface SourceLocation {
start: number;
end: number;
}
interface NodeBase {
type: Node["type"];
parent: Node["parent"];
source?: SourceLocation;
}
export type Element = CharacterClass | Alternation | Quantifier | Assertion | Unknown;
export type Parent = Expression | Alternation | Quantifier | Assertion;
export type Node = Expression | CharacterClass | Alternation | Quantifier | Assertion | Concatenation | Unknown;
export interface Alternation extends NodeBase {
type: "Alternation";
parent: Concatenation;
alternatives: Concatenation[];
}
export interface Assertion extends NodeBase {
type: "Assertion";
parent: Concatenation;
alternatives: Concatenation[];
kind: "ahead" | "behind";
negate: boolean;
}
export interface Quantifier extends NodeBase {
type: "Quantifier";
parent: Concatenation;
alternatives: Concatenation[];
lazy: boolean;
min: number;
max: number;
}
export interface CharacterClass extends NodeBase {
type: "CharacterClass";
parent: Concatenation;
characters: CharSet;
}
export interface Unknown extends NodeBase {
type: "Unknown";
parent: Concatenation;
id: string;
}
export interface Expression extends NodeBase {
type: "Expression";
parent: null;
alternatives: Concatenation[];
}
export interface Concatenation extends NodeBase {
type: "Concatenation";
parent: Parent;
elements: Element[];
}
type NodeIdent = {
type: Node["type"];
};
type NoParentArray<T> = {
[K in keyof T]: NoParent<T[K]>;
};
type NoParentNode<T extends NodeIdent> = {
[K in keyof NoParentNodePick<T>]: NoParent<NoParentNodePick<T>[K]>;
};
type NoParentNodePick<T extends NodeIdent> = Pick<T, Exclude<keyof T, "parent">>;
/**
* A view of an AST node that hides the `parent` property.
*/
export type NoParent<T> = T extends NodeIdent ? NoParentNode<T> : T extends unknown[] ? NoParentArray<T> : T;
/**
* Sets the `parent` properties of the given node and all of its child nodes.
*
* @param node
* @param parent The parent of `node`.
*/
export function setParent<T extends Node>(node: T | NoParent<T>, parent: T["parent"]): asserts node is T;
/**
* Sets the `source` property of the given node and all of its child nodes.
*
* If `source` is not a function, then the source object will be copied for all `source` properties to be set. The
* object will be copied using the `start` and `end` properties alone, other properties will not be copied.
*
* @param node
* @param source
* @param overwrite
*/
export function setSource(
node: NoParent<Node>,
source: SourceLocation | (() => SourceLocation),
overwrite?: boolean
): void;
export interface VisitAstHandler {
onAlternationEnter?(node: Alternation): void;
onAlternationLeave?(node: Alternation): void;
onAssertionEnter?(node: Assertion): void;
onAssertionLeave?(node: Assertion): void;
onCharacterClassEnter?(node: CharacterClass): void;
onCharacterClassLeave?(node: CharacterClass): void;
onConcatenationEnter?(node: Concatenation): void;
onConcatenationLeave?(node: Concatenation): void;
onExpressionEnter?(node: Expression): void;
onExpressionLeave?(node: Expression): void;
onQuantifierEnter?(node: Quantifier): void;
onQuantifierLeave?(node: Quantifier): void;
onUnknownEnter?(node: Unknown): void;
onUnknownLeave?(node: Unknown): void;
}
export interface VisitNoParentAstHandler {
onAlternationEnter?(node: NoParent<Alternation>): void;
onAlternationLeave?(node: NoParent<Alternation>): void;
onAssertionEnter?(node: NoParent<Assertion>): void;
onAssertionLeave?(node: NoParent<Assertion>): void;
onCharacterClassEnter?(node: NoParent<CharacterClass>): void;
onCharacterClassLeave?(node: NoParent<CharacterClass>): void;
onConcatenationEnter?(node: NoParent<Concatenation>): void;
onConcatenationLeave?(node: NoParent<Concatenation>): void;
onExpressionEnter?(node: NoParent<Expression>): void;
onExpressionLeave?(node: NoParent<Expression>): void;
onQuantifierEnter?(node: NoParent<Quantifier>): void;
onQuantifierLeave?(node: NoParent<Quantifier>): void;
onUnknownEnter?(node: NoParent<Unknown>): void;
onUnknownLeave?(node: NoParent<Unknown>): void;
}
/**
* Calls the given visitor on the given node and all of its children.
*
* If the given visitor throws an error, the traversal will stop and the error will be re-thrown.
*
* @param node
* @param visitor
*/
export function visitAst(node: Node, visitor: VisitAstHandler): void;
export function visitAst(node: NoParent<Node>, visitor: VisitNoParentAstHandler): void;
/**
* A transform is some algorithm that takes a AST subtree and makes any number of modifications to the given subtree.
* They cannot see or modify anything outside the given subtree. Transformers are assumed to behave like a set of pure
* functions.
*
* Transformers are always applied bottom-up.
*
* The most simple transformer is an empty object (`{}`). This is equivalent to a no-op transformer that does not change
* the given AST.
*/
export interface Transformer {
/**
* An optional name useful for diagnostics.
*/
readonly name?: string;
onAlternation?(node: NoParent<Alternation>, context: TransformContext): void;
onAssertion?(node: NoParent<Assertion>, context: TransformContext): void;
onCharacterClass?(node: NoParent<CharacterClass>, context: TransformContext): void;
onConcatenation?(node: NoParent<Concatenation>, context: TransformContext): void;
onExpression?(node: NoParent<Expression>, context: TransformContext): void;
onQuantifier?(node: NoParent<Quantifier>, context: TransformContext): void;
onUnknown?(node: NoParent<Unknown>, context: TransformContext): void;
}
export interface TransformContext {
/**
* The maximum character of all character sets in the AST.
*
* If the expression to transform does not contain any characters at the start of the transformation, then this
* value will be `0`.
*/
readonly maxCharacter: Char;
/**
* Signals that the transformer changed the AST.
*/
readonly signalMutation: () => void;
}
/**
* A transformer that runs all given transformers in sequentially order.
*
* The combined transformer is a special transformer in that the {@link transform} function knows about it.
*/
export class CombinedTransformer implements Transformer {
readonly name = "CombinedTransformer";
/**
* The transformers that will be applied in order.
*
* Note: These transformers are not necessarily the ones given to the constructor. If a transformer is a
* `CombinedTransformer`, then its transformers will be used instead (think of it as flattening combined
* transformers).
*/
readonly transformers: readonly Transformer[];
constructor(transformers: Iterable<Transformer>);
onAlternation(node: NoParent<Alternation>, context: TransformContext): void;
onAssertion(node: NoParent<Assertion>, context: TransformContext): void;
onCharacterClass(node: NoParent<CharacterClass>, context: TransformContext): void;
onConcatenation(node: NoParent<Concatenation>, context: TransformContext): void;
onExpression(node: NoParent<Expression>, context: TransformContext): void;
onQuantifier(node: NoParent<Quantifier>, context: TransformContext): void;
onUnknown(node: NoParent<Unknown>, context: TransformContext): void;
}
/**
* Creates a new transformer that performs all given transformers in sequentially order.
*
* If only one transformer is given, the returned transformer will be functionally equivalent. If no transformers are
* given, the returned transformer will be equivalent to a noop transformer.
*
* The given iterable can be changed and reused after this function returns.
*
* @param transformers
* @deprecated Use `new CombinedTransformer(transformers)` instead.
*/
export function combineTransformers(transformers: Iterable<Transformer>): CombinedTransformer;
export interface TransformEvents {
/**
* An optional callback that will be called at the start of every pass.
*
* @param ast The AST that will be transformed.
* @param pass The number of the pass that will be performed. Starts at `1`.
*/
onPassStart?: (ast: NoParent<Expression>, pass: number) => void;
/**
* An optional callback that will be called every time a transformer mutates the AST.
*
* @param ast The AST that was transformed.
* @param node The node that was mutated by the transformer. Descendants of this node may have been mutated as well.
* @param transformer The transformer that mutated the AST.
*/
onChange?: (ast: NoParent<Expression>, node: NoParent<Node>, transformer: Transformer) => void;
}
export interface TransformOptions {
/**
* The maximum number of times the transformer will be applied to the AST.
*
* This is only a maximum. The transformer will be stopped before this number is reach if the AST isn't modified
* anymore.
*
* @default 10
*/
maxPasses?: number;
/**
* Optional events to observe the transformation process.
*/
events?: TransformEvents;
}
/**
* Transforms the given expression according to the given transformer.
*
* __Do not__ use the given `ast` object again after calling this function, the object will be in an undefined state.
*
* Note: This function knows about {@link CombinedTransformer} and will give it special treatment. Instead of applying
* the transformer as is, it will apply all of its transformers instead. While this does not change the behavior of the
* transformer, it does change which transformers the {@link TransformEvents} will see. Instead of seeing the combined
* transformer, they will see the individual transformers.
*
* @param transformer
* @param ast
* @param options
*/
export function transform(
transformer: Transformer,
ast: NoParent<Expression>,
options?: Readonly<TransformOptions>
): NoParent<Expression>;
/**
* A character base is constructed from a collection of character sets. It holds a list of disjoint, non-empty
* character sets - the base sets - that can be used to construct every character set in the collection it was
* constructed from.
*
* ## Guarantees
*
* - The base sets are guaranteed to be mutually disjoint and non-empty.
*
* - Every character set in the collection can be constructed by combining (union) a unique set of base sets.
*
* - The list of base sets is guaranteed to be as small as possible. There are at most `min(n^2, o)` base sets where `n`
* is the number of unique, non-empty character sets in the collection, and `o` is the number of characters in the
* union of all character sets in the collection.
*
* ## Use case
*
* The primary purpose of base sets is to remap alphabets. Some FA operations scale with the number of characters in the
* alphabet of the FA (e.g. DFA minimization).
*
* Base sets can be used to determine which characters in an FA's alphabet *Σ* cannot be distinguished by the FA *A*.
* Two characters *a,b* in *Σ* are indistinguishable if for all inputs *w* the following hold true:
*
* 1. *w* is accepted by *A* iff *w* with all occurrences of *a* replaced with *b* is accepted by *A*.
* 2. *w* is accepted by *A* iff *w* with all occurrences of *b* replaced with *a* is accepted by *A*.
*
* Two indistinguishable characters are guaranteed to be in the same base set.
*
* By treating each base set as a character, it is possible to create a new (smaller) alphabet *Γ* (*|Γ| <= |Σ|*) such
* that the FA *A* still behaves the same.
*
* Since *Γ* is typically (several orders of magnitude) smaller, operations that scale with the size of the alphabet
* can be done more quickly.
*/
export class CharBase {
/**
* A list of disjoint, non-empty character sets.
*
* See {@link CharBase} to learn more.
*/
readonly sets: readonly CharSet[];
/**
* Create the base sets of the given collection of character sets.
*
* See {@link CharBase} to learn more.
*
* @param charSets
* @throws `RangeError` if the collection contains two character sets with different maximums.
*/
constructor(charSets: Iterable<CharSet>);
/**
* Splits the given character set into its base sets.
*
* The returned array will be a list of indexes of base sets necessary to construct the given character sets. The
* indexes will be sorted and occur at most once.
*
* **Note**: This assumes that `charSet` is either empty or can be constructed from the base sets. If the
* assumption is not met, the output of this function will be undefined.
*
* @param charSet
*/
split(charSet: CharSet): number[];
}
export interface ReadonlyCharMap<T> extends Iterable<[CharRange, T]> {
/**
* Returns whether this map is empty.
*
* This is equivalent to `this.size === 0` and `this.entryCount === 0`.
*/
readonly isEmpty: boolean;
/**
* The number of characters in this map. This is different from {@link entryCount}.
*
* This is equivalent to `[...this.keys()].reduce((count, range) => count + range.max - range.min + 1, 0)`.
*/
readonly size: number;
/**
* The number of entires in this map.
*
* This is different from {@link size}. In general, you should use {@link size}, because it has the same semantics
* as `Set#size` and `Map#size`.
*
* This is equivalent to `[...this.entries()].length`.
*/
readonly entryCount: number;
/**
* Returns whether the given character is a key in the map.
*
* @param char
*/
has(char: Char): boolean;
/**
* Returns whether every character in the given range is a key in the map.
*
* This is equivalent to: `[...chars].every(char => this.has(char))`.
*
* @param chars
*/
hasEvery(chars: CharRange): boolean;
/**
* Returns whether some character in the given range is a key in the map.
*
* This is equivalent to: `[...chars].some(char => this.has(char))`.
*
* @param chars
*/
hasSome(chars: CharRange): boolean;
/**
* Returns the value associated with the given character of `undefined` if the character is not key in the map.
*
* @param char
*/
get(char: Char): T | undefined;
/**
* Invokes the given callback for every item of the character map.
*
* This method is implemented more efficiently than other iterator based methods, so chose `forEach` where every
* possible.
*
* @param callback
*/
forEach(callback: (value: T, chars: CharRange, map: ReadonlyCharMap<T>) => void): void;
/**
* Returns all ranges of characters that are keys in the map.
*
* Keys will be returned in the same order as `this.entries()`.
*/
keys(): Iterable<CharRange>;
/**
* Returns all values in the map. Values might not be unique if more than one range maps to the same value.
*
* Values will be returned in the same order as `this.entries()`.
*/
values(): Iterable<T>;
/**
* Returns all key-value pairs in the map.
*
* Entries will be returned in the order of ascending ranges.
*/
entries(): Iterable<[CharRange, T]>;
/**
* Returns a mapping from the values of this map to its keys.
*/
invert(maxCharacter: Char): Map<T, CharSet>;
/**
* Returns a new map with all values mapped by the given function.
*
* If no function is given, the identity function is used.
*/
copy(): CharMap<T>;
copy<U>(mapFn: (value: T) => U): CharMap<U>;
}
/**
* A map from characters to generic values.
*
* The map guarantees that there are no adjacent character ranges that map to the equal values, will always be iterated
* as one character range. The equality of values is given by JavaScript's strict equality operator (`===`).
*/
export class CharMap<T> implements ReadonlyCharMap<T> {
get isEmpty(): boolean;
get size(): number;
get entryCount(): number;
has(char: Char): boolean;
hasEvery(chars: CharRange): boolean;
hasSome(chars: CharRange): boolean;
get(char: Char): T | undefined;
set(char: Char, value: T): void;
/**
* Sets the value for all characters in the given range.
*
* This is equivalent to `[...chars].forEach(char => this.set(char, value))`.
*
* @param chars
* @param value
*/
setRange(chars: CharRange, value: T): void;
/**
* Sets the value for all characters in the given character set.
*
* This is equivalent to `[...charSet.characters()].forEach(char => this.set(char, value))`.
*
* @param charSet
* @param value
*/
setCharSet(charSet: CharSet, value: T): void;
delete(char: Char): boolean;
/**
* Deletes all characters in the given range.
*
* This is equivalent to `[...range].forEach(char => this.delete(char))`.
*
* @param range
*/
deleteRange(range: CharRange): void;
/**
* Deletes all entries in the map.
*/
clear(): void;
copy(): CharMap<T>;
copy<U>(mapFn: (value: T) => U): CharMap<U>;
map(mapFn: (value: T, chars: CharRange, map: ReadonlyCharMap<T>) => T): void;
mapRange(
range: CharRange,
mapFn: (value: T | undefined, chars: CharRange, map: ReadonlyCharMap<T>) => T | undefined
): void;
filter(conditionFn: (value: T, chars: CharRange, map: ReadonlyCharMap<T>) => boolean): void;
invert(maxCharacter: Char): Map<T, CharSet>;
forEach(callback: (value: T, chars: CharRange, map: ReadonlyCharMap<T>) => void): void;
keys(): IterableIterator<CharRange>;
values(): IterableIterator<T>;
entries(): IterableIterator<[CharRange, T]>;
[Symbol.iterator](): IterableIterator<[CharRange, T]>;
}
/**
* An immutable interval of {@link Char}s with inclusive ends.
*
* Each interval contains all characters `x` with `min <= x <= max`.
*/
export interface CharRange {
/**
* The inclusive minimum of the interval.
*
* This value has to be less or equal to {@link max}.
*/
readonly min: Char;
/**
* The inclusive maximum of the interval.
*
* This value has to be greater or equal to {@link min}.
*/
readonly max: Char;
}
/**
* An immutable set of {@link Char}s represented as a sorted set of disjoint non-adjacent intervals ({@link CharRange}).
*
* All characters in the set have to be between 0 (inclusive) and the maximum of the set (inclusive).
*/
export class CharSet {
/**
* The greatest character which can be element of the set.
*/
readonly maximum: Char;
/**
* An array of ranges representing this character set.
*
* The array must be guaranteed to have the following properties at all times:
*
* 1. Any two ranges are disjoint.
* 2. Any two ranges are non-adjacent.
* 3. 0 <= `min` <= `max` <= `this.maximum` for all ranges.
* 4. All ranges are sorted by ascending `min`.
*/
readonly ranges: readonly CharRange[];
/**
* Returns `true` if this set doesn't contain any characters.
*/
get isEmpty(): boolean;
/**
* Returns `true` if all characters in the range from 0 to `this.maximum`, including 0 and `this.maximum`, are in
* the set.
*/
get isAll(): boolean;
/**
* Returns the number of unique characters in the set.
*
* The returned number will be at least `0` and at most `this.maximum + 1`.
*/
get size(): number;
/**
* Returns an iterable of all characters in this set.
*
* Characters are sorted by ascending order and each character is yielded exactly once.
*
* Note: The iterable is stable. It can be iterated multiple times.
*/
characters(): Iterable<Char>;
/**
* Returns a string representation of the character set.
*/
toString(): string;
/**
* Returns a string representation of the ranges of this character set.
*
* The string representation has the following rules:
*
* 1. Each character is represented as a hexadecimal number.
* 2. Each range where `min == max` will be represented by the `min` character.
* 3. Each range where `min != max` will be represented by `min` followed by `".."` followed by `max`.
* 4. The sequence of ranges will be joined together using `", "`.
*
* The returned string representation will have the following format:
*
* ```
* string = [ ranges ]
* ranges = range *( ", " range )
* range = +hex [ ".." +hex ]
* hex = "a" | "b" | "c" | "d" | "e" | "f" | digit
* digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
* ```
*/
toRangesString(): string;
/**
* Returns a string representation of the Unicode ranges of this character set.
*
* The primary purpose of this function is provide an easy way to get a readable representation of a Unicode or
* Unicode-like character set. The format is optimized for ease of reading for humans.
*
* The format follows these rules:
* - If the character set is empty, `empty` will be returned.
* - If the character set contains all characters, `all` will be returned.
* - Ranges may be negated, which is indicated with `not`. E.g. `not a b` is the character set that contains all
* characters except for a and b.
* - A contiguous range of characters is represented using `min-max` where `min` and `max` are formatted characters.
* - Single characters are formatted as either:
* - a Unicode character (e.g. `a`),
* - a quoted Unicode character (e.g. `'-'`), or
* - a Unicode escape (e.g. `U+FF`).
*
* The returned string representation will have the following format:
*
* ```
* string = "all" | "empty" | ranges | "not " ranges
* ranges = range *( " " range )
* range = char [ "-" char ]
* char = literal | quoted | escape
* literal = ?Printable Unicode characters?
* literal = "'" ?any character? "'"
* escape = "U+" +hex
* hex = "A" | "B" | "C" | "D" | "E" | "F" | digit
* digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
* ```
*/
toUnicodeString(): string;
/**
* Returns an empty character set with the given maximum.
*
* @param maximum The greatest character which can be element of the set.
*/
static empty(maximum: Char): CharSet;
/**
* Returns a complete character set with the given maximum.
*
* @param maximum The greatest character which will be element of the set.
*/
static all(maximum: Char): CharSet;
/**
* Returns a character set which contains the given characters.
*
* @param maximum The greatest character which will be element of the set.
* @param characters A sorted collection of characters.
* @throws `RangeError` if the given collection is not sorted or contains characters greater than `maximum`.
*/
static fromCharacters(maximum: Char, characters: Iterable<Char>): CharSet;
/**
* Returns a character set which contains the given range.
*
* @param maximum The greatest character which will be element of the set.
* @param range
* @throws `RangeError` if the maximum of the given range is greater than `maximum`.
*/
static fromRange(maximum: Char, range: CharRange): CharSet;
/**
* Returns a character set which contains the given character.
*
* @param maximum The greatest character which will be element of the set.
* @param char
* @throws `RangeError` if the maximum of the given range is greater than `maximum`.
*/
static fromCharacter(maximum: Char, char: Char): CharSet;
/**
* Returns whether this and the given character set are equivalent.
*
* Two `CharSet`s are equal if and only if:
*
* 1. They have the same maximum.
* 2. They contain the same characters.
*
* Since each set of characters has a unique range representation, 2 equal `CharSet`s are guaranteed to have equal
* ranges.
*
* A `CharSet` and a `CharRange` are equal if and only if they contain the same characters.
*
* @param other
*/
equals(other: CharSet | CharRange): boolean;
/**
* Compares this set with given set and returns an integer value describing their relation. Two equivalent set are
* always guaranteed to return 0.
*
* The order defined by this function is guaranteed to be a
* [total order](https://en.wikipedia.org/wiki/Total_order). Apart from this, no other guarantees are given.
*
* @param other
*/
compare(other: CharSet): number;
/**
* Returns a character set with the given maximum.
*
* The ranges of the returned character set are equivalent to the ranges of
* `this.intersect({ min: 0, max: newMaximum })`.
*
* @param newMaximum
* @returns
*/
resize(newMaximum: Char): CharSet;
/**
* Returns [the complement](https://en.wikipedia.org/wiki/Complement_(set_theory)) of this set.
*
* The returned set will have the same maximum as this set.
*/
negate(): CharSet;
/**
* Returns [the union](https://en.wikipedia.org/wiki/Union_(set_theory)) of this set and all given sets and
* character ranges.
*
* The returned set will have the same maximum as this set.
*
* @param data
* @throws `RangeError` If the maximum of one of the given sets differs from the maximum of this set or if the
* maximum of one of the given ranges is greater than the maximum of this set.
*/
union(...data: (Iterable<CharRange> | CharSet)[]): CharSet;
/**
* Returns [the intersection](https://en.wikipedia.org/wiki/Intersection_(set_theory)) of this set and the given
* set/ranges of characters.
*
* The returned set will have the same maximum as this set.
*
* @param other
* @throws `RangeError` If the maximum of the given set differs from the maximum of this set.
*/
intersect(other: CharSet | CharRange): CharSet;
/**
* Returns a set that contains all characters of this set that are not in the given set/range.
*
* The returned set will have the same maximum as this set.
*
* @param other
* @throws `RangeError` If the maximum of the given set differs from the maximum of this set.
*/
without(other: CharSet | CharRange): CharSet;
/**
* Returns whether this set contains the given character.
*
* @param character
*/
has(character: Char): boolean;
/**
* Returns whether `this ⊇ other`.
*
* @param other
*/
isSupersetOf(other: CharSet | CharRange): boolean;
/**
* Returns whether `this ⊆ other`.
*
* @param other
*/
isSubsetOf(other: CharSet | CharRange): boolean;
/**
* Returns whether `this ⊃ other`.
*
* @param other
*/
isProperSupersetOf(other: CharSet | CharRange): boolean;
/**
* Returns whether `this ⊂ other`.
*
* @param other
*/
isProperSubsetOf(other: CharSet | CharRange): boolean;
/**
* Returns whether this set and the given set (or range) are disjoint.
*
* @param other
*/
isDisjointWith(other: CharSet | CharRange): boolean;
/**
* Returns any one of the common characters of this set and the given set or range.
*
* If this character set is disjoint with the given character set/range, then `undefined` will be returned.
*
* @param other
*/
commonCharacter(other: CharSet | CharRange): Char | undefined;
}
/**
* A character is a non-negative integer.
*
* This is one of the core concepts of refa. Instead of operating on JavaScript strings, UTF16 character codes, or
* Unicode code points, this library uses plain numbers instead. This makes refa agnostic to text encodings and even
* text in general since the integers used as character may represent arbitrary concepts.
*
* There are only 2 restrictions on the numbers that can be characters:
*
* 1. They have to be non-negative integers.
* 2. They can be at most `Number.MAX_SAFE_INTEGER`.
*
* ---
*
* This type serves as a way to document characters. It is a clear way to signal that a value is not just any number.
*/
export type Char = number & {
__char?: never;
};
/**
* A word is finite sequence of {@link Char}s.
*
* This one of the core concepts of refa. Instead of operating on JavaScript strings, all functions operate on
* {@link Char}s and char arrays (= words). This means that refa is agnostic to text encodings, the string
* representation of JavaScript, and even text itself.
*
* This type serves as a way to document words. It should _not_ be used interchangeably with `Char[]` or `number[]`.
*/
export type Word = Char[];
/**
* An immutable finite sequence of {@link Char}s.
*
* This is an immutable view on a {@link Word}.
*/
export type ReadonlyWord = readonly Char[];
export interface FiniteAutomaton {
/**
* Returns whether this FA accepts the empty language meaning that it doesn't accept any words.
*/
readonly isEmpty: boolean;
/**
* Returns whether the formal language accepted by this FA contains finitely many words.
*
* __Note__: Finite does not mean that all words can be iterated in practice. E.g. the set of all Unicode words with
* 10 or less characters contains 2.6e54 many words and can be accepted by a DFA with only 11 states.
*/
readonly isFinite: boolean;
/**
* The maximum character that is part of the alphabet of the words that this FA can accept.
*/
readonly maxCharacter: Char;
/**
* Returns whether this FA accepts the given word.
*
* @param word The characters of the word to test.
*/
test(word: ReadonlyWord): boolean;
/**
* Returns an iterable that will yield all words accepted by this FA. Words are yielded by ascending length.
*
* If this FA accepts infinitely many words, the iterable will never end.
*/
words(): Iterable<Word>;
/**
* Returns an iterable that will yield all word sets accepted by this FA. Word sets are yielded by ascending length.
*
* If this FA accepts infinitely many words, the iterable will never end. If this FA is finite, the iterable will
* end after at most `2^O(n)` word sets (`n` = number of states).
*
* If you analyse the words of an FA, consider using this method instead of `words`. If this method yields `k` word
* sets, then `words` will yield up to `O(k * m ^ l)` words (`m` = number of possible characters, `l` = the maximum
* length of any of the `k` word sets).
*/
wordSets(): Iterable<WordSet>;
/**
* Returns a string representation of this FA.
*/
toString(): string;
/**
* Returns the AST of a regular expression that accepts the same language as this FA.
*
* @param options
*/
toRegex(options?: Readonly<ToRegexOptions>): NoParent<Expression>;
/**
* Returns the string representation of this FA in the
* [DOT format](https://en.wikipedia.org/wiki/DOT_(graph_description_language)).
*
* The output of this function can passed to any graph visualization program. This can be a
* [local installation](https://graphviz.org/download/) or an [online editor](https://edotor.net/).
*
* By default, {@link CharSet#toUnicodeString} is used to represent {@link CharSet}s. It's possible to provide a
* custom stringify function using the `charSetToString` parameter.
*
* @param charSetToString
*/
toDot(charSetToString?: (charSet: CharSet) => string): string;
/**
* Returns the string representation of this FA in the [Mermaid format](https://mermaid.js.org/).
*
* By default, {@link CharSet#toUnicodeString} is used to represent {@link CharSet}s. It's possible to provide a
* custom stringify function using the `charSetToString` parameter.
*
* @param charSetToString
*/
toMermaid(charSetToString?: (charSet: CharSet) => string): string;
}
/**
* A graph iterator for all states of an FA with final states.
*
* @template S The type of a state in the FA to iterate.
* @template O The type of the value each state maps to.
*/
export interface FAIterator<S, O = Iterable<S>> {
/**
* The initial state of the FA.
*/
readonly initial: S;
/**
* Returns the value a state maps to.
*
* Callers of this function are allowed to call the function **without** a `this` argument.
*
* @see {@link stableOut}
*/
readonly getOut: (state: S) => O;
/**
* Whether the {@link getOut} function is stableOut during the lifetime of the iterator.
*
* Stable means that if `getOut` gets called for the same state more than once, it will always return the same
* value.
*
* The sameness of states is defined by
* [the key equality of the Map class](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map#key_equality).
*
* The sameness of returned values is not defined by this interface and depends of the iterator.
*
* I.e. a stable `getOut` function may return new collections/iterators on subsequent invocations as long as the
* collections/iterators are considered equal (e.g. equal could be defined as "same elements") by the consumer of
* the iterator.
*
* @default false
*/
readonly stableOut?: boolean;
/**
* Returns whether the given state is a final state.
*
* This function is guaranteed to be deterministic during the time the iterator is used. It is also guaranteed to be
* sufficiently fast, usually `O(1)` can be assumed.
*
* Callers of this function are allowed to call the function **without** a `this` argument.
*/
readonly isFinal: (state: S) => boolean;
}
/**
* A factory for the nodes of finite automata.
*/
export interface NodeFactory<S> {
/**
* Creates a new state.
*
* @throws {@link TooManyNodesError}
* May be thrown if the number of created nodes exceeds some limit.
*/
readonly createNode: () => S;
}
/**
* An FA builder has the responsibility of constructing a finite automaton.
*
* The constructed FA is always owned by the builder.
*
* @template S The type of a state.
* @template T The transition type of the values linking states.
*/
export interface FABuilder<S, T> extends NodeFactory<S> {
/**
* The initial state of the FA.
*/
readonly initial: S;
/**
* Makes the given state behave like a final state of this FA.
*
* This does not necessarily mean that the given state will be a final state. I.e. calling `makeFinal(s)` does not
* necessitate that `isFinal(s)` is true.
*
* The implementation has to guarantee that calling this method for the same state more than once is allowed.
*/
readonly makeFinal: (state: S) => void;
/**
* Returns whether the given state is a final state.
*
* This operation is assumed to be semantically equivalent to {@link FAIterator.isFinal}.
*/
readonly isFinal: (state: S) => boolean;
/**
* Links to the two given states using the given transition.
*
* Calling this operations more than once for the given `from` and `to` states is not guaranteed to succeed.
*/
readonly linkNodes: (from: S, to: S, transition: T) => void;
}
/**
* An {@link FAIterator} where transitions are map of states to character sets.
*
* This is a commonly used interface when dealing with FA. It's the common core all currently implemented FA support.
*/
export type TransitionIterator<T> = FAIterator<T, ReadonlyMap<T, CharSet>>;
/**
* A graph or FA that can create a {@link TransitionIterator}.
*/
export interface TransitionIterable<T> {
readonly maxCharacter: Char;
readonly transitionIterator: () => TransitionIterator<T>;
}
export interface ToRegexOptions {
/**
* The maximum number of RE AST nodes the implementation is allowed to create.
*
* If the implementation has to create more nodes to create the RE, a `TooManyNodesError` will be thrown. This
* maximum will be check before any optimization passes.
*
* @default 10000
*/
maxNodes?: number;
/**
* The maximum number of optimization passes that will be done after the initial RE AST was created.
*
* The initial AST is usually a lot more complex than necessary. Optimizations are then applied in order to minimize
* the AST until this limit is reached or the AST can be optimized no further.
*
* The default number of passes is implementation defined.
*/
maxOptimizationPasses?: number;
}
/**
* A readonly {@link DFA}.
*/
export interface ReadonlyDFA extends FiniteAutomaton, TransitionIterable<DFA.ReadonlyNode> {
/**
* The initial state of the DFA.
*/
readonly initial: DFA.ReadonlyNode;
/**
* The set of final states of the DFA.
*
* This set may be empty or contain nodes not reachable from the initial state.
*/
readonly finals: ReadonlySet<DFA.ReadonlyNode>;
stateIterator(): FAIterator<DFA.ReadonlyNode>;
/**
* Yields all nodes reachable from the initial state including the initial state.
*
* This may include trap states, but it will not include unreachable final states.
*
* The order in which nodes will be returned is implementation defined and may change after any operation that
* modifies the DFA.
*
* Modifying the DFA while iterating will result in implementation-defined behavior. The implementation may stop the
* iteration or yield an nodes.
*
* This operation runs in _O(E + V)_ where _E_ is the number of nodes reachable from the initial state and _V_ is
* the number of transitions.
*/
nodes(): Iterable<DFA.ReadonlyNode>;
/**
* Returns the number of nodes reachable from the initial state including the initial state.
*
* This returns the number of nodes returned by {@link nodes}.
*/
countNodes(): number;
/**
* Creates a new DFA equivalent to this one.
*/
copy(factory?: NodeFactory<DFA.Node>): DFA;
/**
* Returns whether this and the given DFA are structurally equal meaning that all nodes and all transitions are
* equal.
*
* @param other
*/
structurallyEqual(other: ReadonlyDFA): boolean;
}
/**
* A [deterministic finite automaton](https://en.wikipedia.org/wiki/Deterministic_finite_automaton).
*
* This class implements DFAs with the following properties:
*
* - There is exactly one initial state.
*
* - There may be any number of final states.
*
* This is implemented using a `Set` of states.
*
* - No epsilon transitions.
*
* - A transitions always consumes a character.
*
* (All character sets are guaranteed to be non-empty.)
*
* - Transitions are unordered.
*
* As a consequence, `/aa|bb/` and `/bb|aa/` have the same state machine.
*
* - Between any two states, there can at most be one transition.
*/
export class DFA implements ReadonlyDFA {
readonly initial: DFA.Node;
readonly finals: Set<DFA.Node>;
readonly maxCharacter: Char;
get isEmpty(): boolean;
get isFinite(): boolean;
stateIterator(): FAIterator<DFA.ReadonlyNode>;
transitionIterator(): TransitionIterator<DFA.ReadonlyNode>;
nodes(): Iterable<DFA.Node>;
countNodes(): number;
test(word: ReadonlyWord): boolean;
wordSets(): Iterable<WordSet>;
words(): Iterable<Word>;
toString(): string;
toRegex(options?: Readonly<ToRegexOptions>): NoParent<Expression>;
toDot(charSetToString?: (charSet: CharSet) => string): string;
toMermaid(charSetToString?: (charSet: CharSet) => string): string;
copy(factory?: NodeFactory<DFA.Node>): DFA;
structurallyEqual(other: ReadonlyDFA): boolean;
removeUnreachable(): void;
/**
* [Minimizes](https://en.wikipedia.org/wiki/DFA_minimization) this DFA.
*/
minimize(): void;
/**
* Complements this DFA.
*
* This DFA after calling this function will accept all words that are not accepted by this DFA before calling this
* function.
*
* This operation will create at most 1 node with the given factory.
*
* @param factory
*/
complement(factory?: NodeFactory<DFA.Node>): void;
/**
* Modifies this DFA such that all prefixes of all accepted words are also accepted.
*
* If the language of this DFA is empty, then it will remain empty.
*
* Unreachable states will be removed by this operation.
*/
prefixes(): void;
/**
* Creates a new DFA which matches no words. The language of the returned DFA is empty.
*
* This operation will create exactly 1 node with the given factory.
*
* @param options
* @param factory
*/
static empty(options: Readonly<DFA.Options>, factory?: NodeFactory<DFA.Node>): DFA;
/**
* Creates a new DFA which matches only the empty word.
*
* This operation will create exactly 1 node with the given factory.
*
* @param options
* @param factory
*/
static emptyWord(options: Readonly<DFA.Options>, factory?: NodeFactory<DFA.Node>): DFA;
/**
* Creates a new DFA which matches all words.
*
* This operation will create exactly 1 node with the given factory.
*
* @param options
* @param factory
*/
static all(options: Readonly<DFA.Options>, factory?: NodeFactory<DFA.Node>): DFA;
/**
* Creates a new DFA which matches the given characters.
*
* This operation will create at most 2 nodes with the given factory.
*
* @param charSet
* @param factory
*/
static fromCharSet(charSet: CharSet, factory?: NodeFactory<DFA.Node>): DFA;
/**
* Returns a new DFA which is equivalent to the intersection of the two given FA.
*
* @param left
* @param right
* @param factory
*/
static fromIntersection<L, R>(
left: TransitionIterable<L>,
right: TransitionIterable<R>,
factory?: NodeFactory<DFA.Node>
): DFA;
/**
* Creates a new DFA which matches all and only all of the given words.
*
* @param words
* @param options
* @param factory
*/
static fromWords(
words: Iterable<ReadonlyWord>,
options: Readonly<DFA.Options>,
factory?: NodeFactory<DFA.Node>
): DFA;
/**
* Creates a new DFA which matches all and only all of the given word sets.
*
* @param wordSets
* @param options
* @param factory
*/
static fromWordSets(
wordSets: Iterable<ReadonlyWordSet>,
options: Readonly<DFA.Options>,
factory?: NodeFactory<DFA.Node>
): DFA;
static fromFA<InputNode>(fa: TransitionIterable<InputNode>, factory?: NodeFactory<DFA.Node>): DFA;
static fromTransitionIterator<InputNode>(
iter: TransitionIterator<InputNode>,
options: Readonly<DFA.Options>,
factory?: NodeFactory<DFA.Node>
): DFA;
static fromBuilder(builder: DFA.Builder, options: Readonly<DFA.Options>): DFA;
}
/**
* A namespace for DFA-specific classes and interfaces.
*
* @see {@link DFA} (class)
*/
export namespace DFA {
interface ReadonlyNode {
readonly out: ReadonlyCharMap<ReadonlyNode>;
}
class Node implements ReadonlyNode {
readonly out: CharMap<Node>;
link(to: Node, via: CharSet): void;
unlink(to: Node): void;
/**
* Unlinks all outgoing and incoming transitions of this node.
*/
unlinkAll(): void;
}
/**
* An unlimited node factory that will simply call the {@link Node} constructor.
*/
const nodeFactory: NodeFactory<Node>;
class LimitedNodeFactory implements NodeFactory<Node> {
readonly limit: number;
constructor(limit?: number);
createNode(): Node;
}
class Builder implements FABuilder<Node, CharSet> {
readonly initial: Node;
readonly finals: Set<Node>;
readonly factory: NodeFactory<Node>;
constructor(factory: NodeFactory<Node>);
makeFinal(state: Node): void;
isFinal(state: Node): boolean;
linkNodes(from: Node, to: Node, transition: CharSet): void;
createNode(): Node;
}
interface Options {
/**
* The maximum numerical value any character can have.
*
* This will be the maximum of all underlying {@link CharSet}s.
*/
maxCharacter: Char;
}
}
/**
* A readonly {@link ENFA}.
*/
export interface ReadonlyENFA extends FiniteAutomaton, TransitionIterable<ENFA.ReadonlyNode> {
/**
* The initial state of the ENFA.
*/
readonly initial: ENFA.ReadonlyNode;
/**
* The final state of the ENFA.
*
* This state may not be reachable from the initial state.
*/
readonly final: ENFA.ReadonlyNode;
/**
* Whether this ENFA is in its normal form.
*
* @see {@link ENFA}
*/
readonly isNormalized: boolean;
stateIterator(resolveEpsilon: boolean): FAIterator<ENFA.ReadonlyNode>;
/**
* Yields all nodes reachable from the initial state including the initial state.
*
* This may include trap states, but it will not include the final states if it is unreachable from the initial
* state.
*
* The order in which nodes will be returned is implementation defined and may change after any operation that
* modifies the ENFA.
*
* Modifying the ENFA while iterating will result in implementation-defined behavior. The implementation may stop
* the iteration or yield an nodes.
*
* This operation runs in _O(E + V)_ where _E_ is the number of nodes reachable from the initial state and _V_ is
* the number of transitions.
*/
nodes(): Iterable<ENFA.ReadonlyNode>;
/**
* Returns the number of nodes reachable from the initial state including the initial state.
*
* This returns the number of nodes returned by {@link nodes}.
*/
countNodes(): number;
/**
* Create a mutable copy of this ENFA.
*/
copy(factory?: NodeFactory<ENFA.Node>): ENFA;
}
/**
* A [nondeterministic finite automaton](https://en.wikipedia.org/wiki/Nondeterministic_finite_automaton) with epsilon
* transitions.
*
* This class implements NFAs with the following properties:
*
* - There is exactly one initial state.
*
* - There is exactly one final state.
*
* - There are epsilon transitions.
*
* - A transitions either an epsilon transition or consumes a character.
*
* Epsilon transition are represented using `null` and characters are represented using non-empty `CharSet`s.
*
* - Transitions are ordered.
*
* As a consequence, `/aa|bb/` and `/bb|aa/` have different state machines in this NFA implementation.
*
* Order is only guaranteed as long as no transitions are removed. Order is defined by the key order of the JavaScript
* `Map` class.
*
* - Between any two states, there can at most be one transition.
*
* Unlike the {@link NFA} class, transition cannot be merged. As a consequence, `/a|a/` and `/a/` have different
* state machines in this NFA implementation.
*
* ## Normal form
*
* The normal form of this ENFA implementation has the following restriction:
*
* - The initial state must not have incoming transitions.
* - The final state must not have outgoing transitions.
* - The initial state and final state are different states.
*
* Non-normalized ENFAs will either be tolerated or normalized by operations.
*/
export class ENFA implements ReadonlyENFA {
initial: ENFA.Node;
final: ENFA.Node;
readonly maxCharacter: Char;
get isEmpty(): boolean;
get isFinite(): boolean;
get isNormalized(): boolean;
/**
* Brings this ENFA is in its normal form.
*
* This operation will create at most 2 nodes with the given factory.
*
* @param factory
* @see {@link ENFA}
*/
normalize(factory?: NodeFactory<ENFA.Node>): void;
stateIterator(resolveEpsilon: boolean): FAIterator<ENFA.ReadonlyNode>;
transitionIterator(): TransitionIterator<ENFA.ReadonlyNode>;
nodes(): Iterable<ENFA.Node>;
countNodes(): number;
copy(factory?: NodeFactory<ENFA.Node>): ENFA;
test(word: ReadonlyWord): boolean;
wordSets(): Iterable<WordSet>;
words(): Iterable<Word>;
toString(): string;
toRegex(options?: Readonly<ToRegexOptions>): NoParent<Expression>;
toDot(charSetToString?: (charSet: CharSet) => string): string;
toMermaid(charSetToString?: (charSet: CharSet) => string): string;
/**
* Modifies this ENFA to accept the concatenation of this ENFA and the given FA.
*
* @param other
* @param factory
*/
append<O>(other: TransitionIterable<O>, factory?: NodeFactory<ENFA.Node>): void;
/**
* Modifies this ENFA to accept the concatenation of this ENFA and the other ENFA.
*
* This operation is implemented by moving (not copying) the states from the other ENFA into this ENFA. The other
* ENFA will be in an **invalid state** after this operation completes. The initial and final states of the other
* ENFA will be random nodes of this ENFA. Makes sure that you never use the other ENFA again.
*
* This operation will create at most 4 nodes with the given factory.
*
* @param other
* @param factory
*/
appendInto(other: ENFA, factory?: NodeFactory<ENFA.Node>): void;
/**
* Modifies this ENFA to accept the concatenation of the given FA and this ENFA.
*
* @param other
* @param factory
*/
prepend<O>(other: TransitionIterable<O>, factory?: NodeFactory<ENFA.Node>): void;
/**
* Modifies this ENFA to accept the concatenation of the other ENFA and this ENFA.
*
* This operation is implemented by moving (not copying) the states from the other ENFA into this ENFA. The other
* ENFA will be in an **invalid state** after this operation completes. The initial and final states of the other
* ENFA will be random nodes of this ENFA. Makes sure that you never use the other ENFA again.
*
* This operation will create at most 4 nodes with the given factory.
*
* @param other
* @param factory
*/
prependInto(other: ENFA, factory?: NodeFactory<ENFA.Node>): void;
/**
* Modifies this ENFA to accept the language of this ENFA and the language of the given FA.
*
* If the union kind is `left`, then this ENFA will be modified to accept `<other>|<this>`. Otherwise, it will be
* modified to accept `<this>|<other>`.
*
* @param other
* @param kind
* @param factory
*/
union<O>(other: TransitionIterable<O>, kind?: "left" | "right", factory?: NodeFactory<ENFA.Node>): void;
/**
* Modifies this ENFA to accept the language of this ENFA and the language of the other ENFA.
*
* If the union kind is `left`, then this ENFA will be modified to accept `<other>|<this>`. Otherwise, it will be
* modified to accept `<this>|<other>`.
*
* This operation is implemented by moving (not copying) the states from the other ENFA into this ENFA. The other
* ENFA will be in an **invalid state** after this operation completes. The initial and final states of the other
* ENFA will be random nodes of this ENFA. Makes sure that you never use the other ENFA again.
*
* This operation will create at most 6 nodes with the given factory.
*
* @param other
* @param kind
* @param factory
*/
unionInto(other: ENFA, kind?: "left" | "right", factory?: NodeFactory<ENFA.Node>): void;
/**
* Modifies this ENFA to accept at least `min` and at most `max` concatenations of itself.
*
* Both `min` and `max` both have to be non-negative integers with `min <= max`.
* `max` is also allowed to be `Infinity`.
*
* @param min
* @param max
* @param lazy
* @param factory
*/
quantify(min: number, max: number, lazy?: boolean, factory?: NodeFactory<ENFA.Node>): void;
/**
* Removes the empty word from the accepted languages of this ENFA.
*
* Unreachable states will be removed by this operation.
*
* @param factory
*/
withoutEmptyWord(factory?: NodeFactory<ENFA.Node>): void;
/**
* All states which cannot be reached from the initial state or cannot reach (or are) a final state, will be
* removed.
*/
removeUnreachable(): void;
/**
* Modifies this ENFA such that all prefixes of a