chevrotain
Version:
Chevrotain is a high performance fault tolerant javascript parsing DSL for building recursive decent parsers
1,790 lines (1,619 loc) • 80.5 kB
TypeScript
export as namespace chevrotain
export declare const VERSION: string
/**
* This class does not actually exists nor is exposed at runtime.
* This is just a helper to avoid duplications in the Type Definitions
* Of `CstParser` and `EmbeddedActionsParser`
*/
declare abstract class BaseParser {
/**
* This must be called at the end of a Parser constructor.
* See: http://chevrotain.io/docs/tutorial/step2_parsing.html#under-the-hood
*/
protected performSelfAnalysis(): void
/**
* It is recommended to reuse the same Parser instance
* by passing an empty array to the input argument
* and only later setting the input by using the input property.
* See: http://chevrotain.io/docs/FAQ.html#major-performance-benefits
*
* @param tokenVocabulary - A data structure containing all the Tokens used by the Parser.
* @param config - The Parser's configuration.
*/
constructor(tokenVocabulary: TokenVocabulary, config?: IParserConfig)
errors: IRecognitionException[]
/**
* Flag indicating the Parser is at the recording phase.
* Can be used to implement methods similar to {@link BaseParser.ACTION}
* Or any other logic to requires knowledge of the recording phase.
* See:
* - https://chevrotain.io/docs/guide/internals.html#grammar-recording
* to learn more on the recording phase and how Chevrotain works.
*/
RECORDING_PHASE: boolean
/**
* Resets the parser state, should be overridden for custom parsers which "carry" additional state.
* When overriding, remember to also invoke the super implementation!
*/
reset(): void
getBaseCstVisitorConstructor(): {
new (...args: any[]): ICstVisitor<any, any>
}
getBaseCstVisitorConstructorWithDefaults(): {
new (...args: any[]): ICstVisitor<any, any>
}
getGAstProductions(): Record<string, Rule>
getSerializedGastProductions(): ISerializedGast[]
/**
* @param startRuleName
* @param precedingInput - The token vector up to (not including) the content assist point
*/
computeContentAssist(
startRuleName: string,
precedingInput: IToken[]
): ISyntacticContentAssistPath[]
/**
* @param grammarRule - The rule to try and parse in backtracking mode.
* @param args - argument to be passed to the grammar rule execution
*
* @return a lookahead function that will try to parse the given grammarRule and will return true if succeed.
*/
protected BACKTRACK<T>(
grammarRule: (...args: any[]) => T,
args?: any[]
): () => boolean
/**
* The Semantic Actions wrapper.
* Should be used to wrap semantic actions that either:
* - May fail when executing in "recording phase".
* - Have global side effects that should be avoided during "recording phase".
*
* For more information see:
* - https://chevrotain.io/docs/guide/internals.html#grammar-recording
*/
protected ACTION<T>(impl: () => T): T
/**
* Like `CONSUME` with the numerical suffix as a parameter, e.g:
* consume(0, X) === CONSUME(X)
* consume(1, X) === CONSUME1(X)
* consume(2, X) === CONSUME2(X)
* ...
* @see CONSUME
*/
protected consume(
idx: number,
tokType: TokenType,
options?: ConsumeMethodOpts
): IToken
/**
* Like `OPTION` with the numerical suffix as a parameter, e.g:
* option(0, X) === OPTION(X)
* option(1, X) === OPTION1(X)
* option(2, X) === OPTION2(X)
* ...
* @see SUBRULE
*/
protected option<OUT>(
idx: number,
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>
): OUT
/**
* Like `OR` with the numerical suffix as a parameter, e.g:
* or(0, X) === OR(X)
* or(1, X) === OR1(X)
* or(2, X) === OR2(X)
* ...
* @see OR
*/
protected or(
idx: number,
altsOrOpts: IOrAlt<any>[] | OrMethodOpts<any>
): any
protected or<T>(
idx: number,
altsOrOpts: IOrAlt<T>[] | OrMethodOpts<T>
): T
/**
* Like `MANY` with the numerical suffix as a parameter, e.g:
* many(0, X) === MANY(X)
* many(1, X) === MANY1(X)
* many(2, X) === MANY2(X)
* ...
* @see MANY
*/
protected many(
idx: number,
actionORMethodDef: GrammarAction<any> | DSLMethodOpts<any>
): void
/**
* Like `AT_LEAST_ONE` with the numerical suffix as a parameter, e.g:
* atLeastOne(0, X) === AT_LEAST_ONE(X)
* atLeastOne(1, X) === AT_LEAST_ONE1(X)
* atLeastOne(2, X) === AT_LEAST_ONE2(X)
* ...
* @see AT_LEAST_ONE
*/
protected atLeastOne(
idx: number,
actionORMethodDef: GrammarAction<any> | DSLMethodOptsWithErr<any>
): void
/**
*
* A Parsing DSL method use to consume a single Token.
* In EBNF terms this is equivalent to a Terminal.
*
* A Token will be consumed, IFF the next token in the token vector matches <tokType>.
* otherwise the parser may attempt to perform error recovery (if enabled).
*
* The index in the method name indicates the unique occurrence of a terminal consumption
* inside a the top level rule. What this means is that if a terminal appears
* more than once in a single rule, each appearance must have a **different** index.
*
* For example:
* ```
* this.RULE("qualifiedName", () => {
* this.CONSUME1(Identifier);
* this.MANY(() => {
* this.CONSUME1(Dot);
* // here we use CONSUME2 because the terminal
* // 'Identifier' has already appeared previously in the
* // the rule 'parseQualifiedName'
* this.CONSUME2(Identifier);
* });
* })
* ```
*
* - See more details on the [unique suffixes requirement](http://chevrotain.io/docs/FAQ.html#NUMERICAL_SUFFIXES).
*
* @param tokType - The Type of the token to be consumed.
* @param options - optional properties to modify the behavior of CONSUME.
*/
protected CONSUME(
tokType: TokenType,
options?: ConsumeMethodOpts
): IToken
/**
* @see CONSUME
* @hidden
*/
protected CONSUME1(
tokType: TokenType,
options?: ConsumeMethodOpts
): IToken
/**
* @see CONSUME
* @hidden
*/
protected CONSUME2(
tokType: TokenType,
options?: ConsumeMethodOpts
): IToken
/**
* @see CONSUME
* @hidden
*/
protected CONSUME3(
tokType: TokenType,
options?: ConsumeMethodOpts
): IToken
/**
* @see CONSUME
* @hidden
*/
protected CONSUME4(
tokType: TokenType,
options?: ConsumeMethodOpts
): IToken
/**
* @see CONSUME
* @hidden
*/
protected CONSUME5(
tokType: TokenType,
options?: ConsumeMethodOpts
): IToken
/**
* @see CONSUME
* @hidden
*/
protected CONSUME6(
tokType: TokenType,
options?: ConsumeMethodOpts
): IToken
/**
* @see CONSUME
* @hidden
*/
protected CONSUME7(
tokType: TokenType,
options?: ConsumeMethodOpts
): IToken
/**
* @see CONSUME
* @hidden
*/
protected CONSUME8(
tokType: TokenType,
options?: ConsumeMethodOpts
): IToken
/**
* @see CONSUME
* @hidden
*/
protected CONSUME9(
tokType: TokenType,
options?: ConsumeMethodOpts
): IToken
/**
* Parsing DSL Method that Indicates an Optional production.
* in EBNF notation this is equivalent to: "[...]".
*
* Note that there are two syntax forms:
* - Passing the grammar action directly:
* ```
* this.OPTION(() => {
* this.CONSUME(Digit)}
* );
* ```
*
* - using an "options" object:
* ```
* this.OPTION({
* GATE:predicateFunc,
* DEF: () => {
* this.CONSUME(Digit)
* }});
* ```
*
* The optional 'GATE' property in "options" object form can be used to add constraints
* to invoking the grammar action.
*
* As in CONSUME the index in the method name indicates the occurrence
* of the optional production in it's top rule.
*
* @param actionORMethodDef - The grammar action to optionally invoke once
* or an "OPTIONS" object describing the grammar action and optional properties.
*/
// TODO: return `OUT | undefined` explicitly
protected OPTION<OUT>(
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>
): OUT
/**
* @see OPTION
* @hidden
*/
protected OPTION1<OUT>(
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>
): OUT
/**
* @see OPTION
* @hidden
*/
protected OPTION2<OUT>(
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>
): OUT
/**
* @see OPTION
* @hidden
*/
protected OPTION3<OUT>(
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>
): OUT
/**
* @see OPTION
* @hidden
*/
protected OPTION4<OUT>(
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>
): OUT
/**
* @see OPTION
* @hidden
*/
protected OPTION5<OUT>(
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>
): OUT
/**
* @see OPTION
* @hidden
*/
protected OPTION6<OUT>(
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>
): OUT
/**
* @see OPTION
* @hidden
*/
protected OPTION7<OUT>(
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>
): OUT
/**
* @see OPTION
* @hidden
*/
protected OPTION8<OUT>(
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>
): OUT
/**
* @see OPTION
* @hidden
*/
protected OPTION9<OUT>(
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>
): OUT
/**
* Parsing DSL method that indicates a choice between a set of alternatives must be made.
* This is equivalent to an EBNF alternation (A | B | C | D ...), except
* that the alternatives are ordered like in a PEG grammar.
* This means that the **first** matching alternative is always chosen.
*
* There are several forms for the inner alternatives array:
*
* - Passing alternatives array directly:
* ```
* this.OR([
* { ALT:() => { this.CONSUME(One) }},
* { ALT:() => { this.CONSUME(Two) }},
* { ALT:() => { this.CONSUME(Three) }}
* ])
* ```
*
* - Passing alternative array directly with predicates (GATE):
* ```
* this.OR([
* { GATE: predicateFunc1, ALT:() => { this.CONSUME(One) }},
* { GATE: predicateFuncX, ALT:() => { this.CONSUME(Two) }},
* { GATE: predicateFuncX, ALT:() => { this.CONSUME(Three) }}
* ])
* ```
*
* - These syntax forms can also be mixed:
* ```
* this.OR([
* {
* GATE: predicateFunc1,
* ALT:() => { this.CONSUME(One) }
* },
* { ALT:() => { this.CONSUME(Two) }},
* { ALT:() => { this.CONSUME(Three) }}
* ])
* ```
*
* - Additionally an "options" object may be used:
* ```
* this.OR({
* DEF:[
* { ALT:() => { this.CONSUME(One) }},
* { ALT:() => { this.CONSUME(Two) }},
* { ALT:() => { this.CONSUME(Three) }}
* ],
* // OPTIONAL property
* ERR_MSG: "A Number"
* })
* ```
*
* The 'predicateFuncX' in the long form can be used to add constraints to choosing the alternative.
*
* As in CONSUME the index in the method name indicates the occurrence
* of the alternation production in it's top rule.
*
* @param altsOrOpts - A set of alternatives or an "OPTIONS" object describing the alternatives and optional properties.
*
* @returns The result of invoking the chosen alternative.
*/
protected OR<T>(altsOrOpts: IOrAlt<T>[] | OrMethodOpts<T>): T
protected OR(altsOrOpts: IOrAlt<any>[] | OrMethodOpts<any>): any
/**
* @see OR
* @hidden
*/
protected OR1<T>(altsOrOpts: IOrAlt<T>[] | OrMethodOpts<T>): T
protected OR1(altsOrOpts: IOrAlt<any>[] | OrMethodOpts<any>): any
/**
* @see OR
* @hidden
*/
protected OR2<T>(altsOrOpts: IOrAlt<T>[] | OrMethodOpts<T>): T
protected OR2(altsOrOpts: IOrAlt<any>[] | OrMethodOpts<any>): any
/**
* @see OR
* @hidden
*/
protected OR3<T>(altsOrOpts: IOrAlt<T>[] | OrMethodOpts<T>): T
protected OR3(altsOrOpts: IOrAlt<any>[] | OrMethodOpts<any>): any
/**
* @see OR
* @hidden
*/
protected OR4<T>(altsOrOpts: IOrAlt<T>[] | OrMethodOpts<T>): T
protected OR4(altsOrOpts: IOrAlt<any>[] | OrMethodOpts<any>): any
/**
* @see OR
* @hidden
*/
protected OR5<T>(altsOrOpts: IOrAlt<T>[] | OrMethodOpts<T>): T
protected OR5(altsOrOpts: IOrAlt<any>[] | OrMethodOpts<any>): any
/**
* @see OR
* @hidden
*/
protected OR6<T>(altsOrOpts: IOrAlt<T>[] | OrMethodOpts<T>): T
protected OR6(altsOrOpts: IOrAlt<any>[] | OrMethodOpts<any>): any
/**
* @see OR
* @hidden
*/
protected OR7<T>(altsOrOpts: IOrAlt<T>[] | OrMethodOpts<T>): T
protected OR7(altsOrOpts: IOrAlt<any>[] | OrMethodOpts<any>): any
/**
* @see OR
* @hidden
*/
protected OR8<T>(altsOrOpts: IOrAlt<T>[] | OrMethodOpts<T>): T
protected OR8(altsOrOpts: IOrAlt<any>[] | OrMethodOpts<any>): any
/**
* @see OR
* @hidden
*/
protected OR9<T>(altsOrOpts: IOrAlt<T>[] | OrMethodOpts<T>): T
protected OR9(altsOrOpts: IOrAlt<any>[] | OrMethodOpts<any>): any
/**
* Parsing DSL method, that indicates a repetition of zero or more.
* This is equivalent to EBNF repetition {...}.
*
* Note that there are two syntax forms:
* - Passing the grammar action directly:
* ```
* this.MANY(() => {
* this.CONSUME(Comma)
* this.CONSUME(Digit)
* })
* ```
*
* - using an "options" object:
* ```
* this.MANY({
* GATE: predicateFunc,
* DEF: () => {
* this.CONSUME(Comma)
* this.CONSUME(Digit)
* }
* });
* ```
*
* The optional 'GATE' property in "options" object form can be used to add constraints
* to invoking the grammar action.
*
* As in CONSUME the index in the method name indicates the occurrence
* of the repetition production in it's top rule.
*
* @param actionORMethodDef - The grammar action to optionally invoke multiple times
* or an "OPTIONS" object describing the grammar action and optional properties.
*
*/
protected MANY(
actionORMethodDef: GrammarAction<any> | DSLMethodOpts<any>
): void
/**
* @see MANY
* @hidden
*/
protected MANY1(
actionORMethodDef: GrammarAction<any> | DSLMethodOpts<any>
): void
/**
* @see MANY
* @hidden
*/
protected MANY2(
actionORMethodDef: GrammarAction<any> | DSLMethodOpts<any>
): void
/**
* @see MANY
* @hidden
*/
protected MANY3(
actionORMethodDef: GrammarAction<any> | DSLMethodOpts<any>
): void
/**
* @see MANY
* @hidden
*/
protected MANY4(
actionORMethodDef: GrammarAction<any> | DSLMethodOpts<any>
): void
/**
* @see MANY
* @hidden
*/
protected MANY5(
actionORMethodDef: GrammarAction<any> | DSLMethodOpts<any>
): void
/**
* @see MANY
* @hidden
*/
protected MANY6(
actionORMethodDef: GrammarAction<any> | DSLMethodOpts<any>
): void
/**
* @see MANY
* @hidden
*/
protected MANY7(
actionORMethodDef: GrammarAction<any> | DSLMethodOpts<any>
): void
/**
* @see MANY
* @hidden
*/
protected MANY8(
actionORMethodDef: GrammarAction<any> | DSLMethodOpts<any>
): void
/**
* @see MANY
* @hidden
*/
protected MANY9(
actionORMethodDef: GrammarAction<any> | DSLMethodOpts<any>
): void
/**
* Parsing DSL method, that indicates a repetition of zero or more with a separator
* Token between the repetitions.
*
* Example:
*
* ```
* this.MANY_SEP({
* SEP:Comma,
* DEF: () => {
* this.CONSUME(Number};
* // ...
* })
* ```
*
* Note that because this DSL method always requires more than one argument the options object is always required
* and it is not possible to use a shorter form like in the MANY DSL method.
*
* Note that for the purposes of deciding on whether or not another iteration exists
* Only a single Token is examined (The separator). Therefore if the grammar being implemented is
* so "crazy" to require multiple tokens to identify an item separator please use the more basic DSL methods
* to implement it.
*
* As in CONSUME the index in the method name indicates the occurrence
* of the repetition production in it's top rule.
*
* @param options - An object defining the grammar of each iteration and the separator between iterations
*
*/
protected MANY_SEP(options: ManySepMethodOpts<any>): void
/**
* @see MANY_SEP
* @hidden
*/
protected MANY_SEP1(options: ManySepMethodOpts<any>): void
/**
* @see MANY_SEP
* @hidden
*/
protected MANY_SEP2(options: ManySepMethodOpts<any>): void
/**
* @see MANY_SEP
* @hidden
*/
protected MANY_SEP3(options: ManySepMethodOpts<any>): void
/**
* @see MANY_SEP
* @hidden
*/
protected MANY_SEP4(options: ManySepMethodOpts<any>): void
/**
* @see MANY_SEP
* @hidden
*/
protected MANY_SEP5(options: ManySepMethodOpts<any>): void
/**
* @see MANY_SEP
* @hidden
*/
protected MANY_SEP6(options: ManySepMethodOpts<any>): void
/**
* @see MANY_SEP
* @hidden
*/
protected MANY_SEP7(options: ManySepMethodOpts<any>): void
/**
* @see MANY_SEP
* @hidden
*/
protected MANY_SEP8(options: ManySepMethodOpts<any>): void
/**
* @see MANY_SEP
* @hidden
*/
protected MANY_SEP9(options: ManySepMethodOpts<any>): void
/**
* Convenience method, same as MANY but the repetition is of one or more.
* failing to match at least one repetition will result in a parsing error and
* cause a parsing error.
*
* @see MANY
*
* @param actionORMethodDef - The grammar action to optionally invoke multiple times
* or an "OPTIONS" object describing the grammar action and optional properties.
*
*/
protected AT_LEAST_ONE(
actionORMethodDef: GrammarAction<any> | DSLMethodOptsWithErr<any>
): void
/**
* @see AT_LEAST_ONE
* @hidden
*/
protected AT_LEAST_ONE1(
actionORMethodDef: GrammarAction<any> | DSLMethodOptsWithErr<any>
): void
/**
* @see AT_LEAST_ONE
* @hidden
*/
protected AT_LEAST_ONE2(
actionORMethodDef: GrammarAction<any> | DSLMethodOptsWithErr<any>
): void
/**
* @see AT_LEAST_ONE
* @hidden
*/
protected AT_LEAST_ONE3(
actionORMethodDef: GrammarAction<any> | DSLMethodOptsWithErr<any>
): void
/**
* @see AT_LEAST_ONE
* @hidden
*/
protected AT_LEAST_ONE4(
actionORMethodDef: GrammarAction<any> | DSLMethodOptsWithErr<any>
): void
/**
* @see AT_LEAST_ONE
* @hidden
*/
protected AT_LEAST_ONE5(
actionORMethodDef: GrammarAction<any> | DSLMethodOptsWithErr<any>
): void
/**
* @see AT_LEAST_ONE
* @hidden
*/
protected AT_LEAST_ONE6(
actionORMethodDef: GrammarAction<any> | DSLMethodOptsWithErr<any>
): void
/**
* @see AT_LEAST_ONE
* @hidden
*/
protected AT_LEAST_ONE7(
actionORMethodDef: GrammarAction<any> | DSLMethodOptsWithErr<any>
): void
/**
* @see AT_LEAST_ONE
* @hidden
*/
protected AT_LEAST_ONE8(
actionORMethodDef: GrammarAction<any> | DSLMethodOptsWithErr<any>
): void
/**
* @see AT_LEAST_ONE
* @hidden
*/
protected AT_LEAST_ONE9(
actionORMethodDef: GrammarAction<any> | DSLMethodOptsWithErr<any>
): void
/**
* Convenience method, same as MANY_SEP but the repetition is of one or more.
* failing to match at least one repetition will result in a parsing error and
* cause the parser to attempt error recovery.
*
* Note that an additional optional property ERR_MSG can be used to provide custom error messages.
*
* @see MANY_SEP
*
* @param options - An object defining the grammar of each iteration and the separator between iterations
*
* @return {ISeparatedIterationResult<OUT>}
*/
protected AT_LEAST_ONE_SEP(options: AtLeastOneSepMethodOpts<any>): void
/**
* @see AT_LEAST_ONE_SEP
* @hidden
*/
protected AT_LEAST_ONE_SEP1(options: AtLeastOneSepMethodOpts<any>): void
/**
* @see AT_LEAST_ONE_SEP
* @hidden
*/
protected AT_LEAST_ONE_SEP2(options: AtLeastOneSepMethodOpts<any>): void
/**
* @see AT_LEAST_ONE_SEP
* @hidden
*/
protected AT_LEAST_ONE_SEP3(options: AtLeastOneSepMethodOpts<any>): void
/**
* @see AT_LEAST_ONE_SEP
* @hidden
*/
protected AT_LEAST_ONE_SEP4(options: AtLeastOneSepMethodOpts<any>): void
/**
* @see AT_LEAST_ONE_SEP
* @hidden
*/
protected AT_LEAST_ONE_SEP5(options: AtLeastOneSepMethodOpts<any>): void
/**
* @see AT_LEAST_ONE_SEP
* @hidden
*/
protected AT_LEAST_ONE_SEP6(options: AtLeastOneSepMethodOpts<any>): void
/**
* @see AT_LEAST_ONE_SEP
* @hidden
*/
protected AT_LEAST_ONE_SEP7(options: AtLeastOneSepMethodOpts<any>): void
/**
* @see AT_LEAST_ONE_SEP
* @hidden
*/
protected AT_LEAST_ONE_SEP8(options: AtLeastOneSepMethodOpts<any>): void
/**
* @see AT_LEAST_ONE_SEP
* @hidden
*/
protected AT_LEAST_ONE_SEP9(options: AtLeastOneSepMethodOpts<any>): void
/**
* Returns an "imaginary" Token to insert when Single Token Insertion is done
* Override this if you require special behavior in your grammar.
* For example if an IntegerToken is required provide one with the image '0' so it would be valid syntactically.
*/
protected getTokenToInsert(tokType: TokenType): IToken
/**
* By default all tokens type may be inserted. This behavior may be overridden in inheriting Recognizers
* for example: One may decide that only punctuation tokens may be inserted automatically as they have no additional
* semantic value. (A mandatory semicolon has no additional semantic meaning, but an Integer may have additional meaning
* depending on its int value and context (Inserting an integer 0 in cardinality: "[1..]" will cause semantic issues
* as the max of the cardinality will be greater than the min value (and this is a false error!).
*/
protected canTokenTypeBeInsertedInRecovery(tokType: TokenType): boolean
/**
* @deprecated - will be removed in the future
*/
protected getNextPossibleTokenTypes(
grammarPath: ITokenGrammarPath
): TokenType[]
input: IToken[]
protected SKIP_TOKEN(): IToken
/**
* Look-Ahead for the Token Vector
* LA(1) is the next Token ahead.
* LA(n) is the nth Token ahead.
* LA(0) is the previously consumed Token.
*
* Looking beyond the end of the Token Vector or before its begining
* will return in an IToken of type EOF {@link EOF}.
* This behavior can be used to avoid infinite loops.
*
* This is often used to implement custom lookahead logic for GATES.
* https://chevrotain.io/docs/features/gates.html
*/
protected LA(howMuch: number): IToken
}
/**
* A Parser that outputs a Concrete Syntax Tree.
* See:
* - https://chevrotain.io/docs/tutorial/step3_adding_actions_root.html#alternatives
* - https://chevrotain.io/docs/guide/concrete_syntax_tree.html
* For in depth docs.
*/
export declare class CstParser extends BaseParser {
/**
* Creates a Grammar Rule
*/
protected RULE(
name: string,
implementation: (...implArgs: any[]) => any,
config?: IRuleConfig<CstNode>
): (idxInCallingRule?: number, ...args: any[]) => CstNode
/**
* Overrides a Grammar Rule
* See usage example in: https://github.com/chevrotain/chevrotain/blob/master/examples/parser/versioning/versioning.js
*/
protected OVERRIDE_RULE<T>(
name: string,
implementation: (...implArgs: any[]) => any,
config?: IRuleConfig<CstNode>
): (idxInCallingRule?: number, ...args: any[]) => CstNode
/**
* Like `SUBRULE` with the numerical suffix as a parameter, e.g:
* subrule(0, X) === SUBRULE(X)
* subrule(1, X) === SUBRULE1(X)
* subrule(2, X) === SUBRULE2(X)
* ...
* @see SUBRULE
*/
protected subrule(
idx: number,
ruleToCall: (idx: number) => CstNode,
options?: SubruleMethodOpts
): CstNode
/**
* The Parsing DSL Method is used by one rule to call another.
* It is equivalent to a non-Terminal in EBNF notation.
*
* This may seem redundant as it does not actually do much.
* However using it is **mandatory** for all sub rule invocations.
*
* Calling another rule without wrapping in SUBRULE(...)
* will cause errors/mistakes in the Parser's self analysis phase,
* which will lead to errors in error recovery/automatic lookahead calculation
* and any other functionality relying on the Parser's self analysis
* output.
*
* As in CONSUME the index in the method name indicates the occurrence
* of the sub rule invocation in its rule.
*
*/
protected SUBRULE(
ruleToCall: (idx: number) => CstNode,
options?: SubruleMethodOpts
): CstNode
/**
* @see SUBRULE
* @hidden
*/
protected SUBRULE1(
ruleToCall: (idx: number) => CstNode,
options?: SubruleMethodOpts
): CstNode
/**
* @see SUBRULE
* @hidden
*/
protected SUBRULE2(
ruleToCall: (idx: number) => CstNode,
options?: SubruleMethodOpts
): CstNode
/**
* @see SUBRULE
* @hidden
*/
protected SUBRULE3(
ruleToCall: (idx: number) => CstNode,
options?: SubruleMethodOpts
): CstNode
/**
* @see SUBRULE
* @hidden
*/
protected SUBRULE4(
ruleToCall: (idx: number) => CstNode,
options?: SubruleMethodOpts
): CstNode
/**
* @see SUBRULE
* @hidden
*/
protected SUBRULE5(
ruleToCall: (idx: number) => CstNode,
options?: SubruleMethodOpts
): CstNode
/**
* @see SUBRULE
* @hidden
*/
protected SUBRULE6(
ruleToCall: (idx: number) => CstNode,
options?: SubruleMethodOpts
): CstNode
/**
* @see SUBRULE
* @hidden
*/
protected SUBRULE7(
ruleToCall: (idx: number) => CstNode,
options?: SubruleMethodOpts
): CstNode
/**
* @see SUBRULE
* @hidden
*/
protected SUBRULE8(
ruleToCall: (idx: number) => CstNode,
options?: SubruleMethodOpts
): CstNode
/**
* @see SUBRULE
* @hidden
*/
protected SUBRULE9(
ruleToCall: (idx: number) => CstNode,
options?: SubruleMethodOpts
): CstNode
}
/**
* A Parser that relies on end user's embedded actions to control its output.
* For more details see:
* - https://chevrotain.io/docs/tutorial/step3_adding_actions_root.html#alternatives
* - https://chevrotain.io/docs/tutorial/step3b_adding_actions_embedded.html#simple-example
*/
export declare class EmbeddedActionsParser extends BaseParser {
/**
* Creates a Grammar Rule
*/
protected RULE<T>(
name: string,
implementation: (...implArgs: any[]) => T,
config?: IRuleConfig<T>
): (idxInCallingRule?: number, ...args: any[]) => T
/**
* Overrides a Grammar Rule
* See usage example in: https://github.com/chevrotain/chevrotain/blob/master/examples/parser/versioning/versioning.js
*/
protected OVERRIDE_RULE<T>(
name: string,
impl: (...implArgs: any[]) => T,
config?: IRuleConfig<T>
): (idxInCallingRule?: number, ...args: any[]) => T
/**
* Like `SUBRULE` with the numerical suffix as a parameter, e.g:
* subrule(0, X) === SUBRULE(X)
* subrule(1, X) === SUBRULE1(X)
* subrule(2, X) === SUBRULE2(X)
* ...
* @see SUBRULE
*/
protected subrule<T>(
idx: number,
ruleToCall: (idx: number) => T,
options?: SubruleMethodOpts
): T
/**
* The Parsing DSL Method is used by one rule to call another.
* It is equivalent to a non-Terminal in EBNF notation.
*
* This may seem redundant as it does not actually do much.
* However using it is **mandatory** for all sub rule invocations.
*
* Calling another rule without wrapping in SUBRULE(...)
* will cause errors/mistakes in the Parser's self analysis phase,
* which will lead to errors in error recovery/automatic lookahead calculation
* and any other functionality relying on the Parser's self analysis
* output.
*
* As in CONSUME the index in the method name indicates the occurrence
* of the sub rule invocation in its rule.
*
*/
protected SUBRULE<T>(
ruleToCall: (idx: number) => T,
options?: SubruleMethodOpts
): T
/**
* @see SUBRULE
* @hidden
*/
protected SUBRULE1<T>(
ruleToCall: (idx: number) => T,
options?: SubruleMethodOpts
): T
/**
* @see SUBRULE
* @hidden
*/
protected SUBRULE2<T>(
ruleToCall: (idx: number) => T,
options?: SubruleMethodOpts
): T
/**
* @see SUBRULE
* @hidden
*/
protected SUBRULE3<T>(
ruleToCall: (idx: number) => T,
options?: SubruleMethodOpts
): T
/**
* @see SUBRULE
* @hidden
*/
protected SUBRULE4<T>(
ruleToCall: (idx: number) => T,
options?: SubruleMethodOpts
): T
/**
* @see SUBRULE
* @hidden
*/
protected SUBRULE5<T>(
ruleToCall: (idx: number) => T,
options?: SubruleMethodOpts
): T
/**
* @see SUBRULE
* @hidden
*/
protected SUBRULE6<T>(
ruleToCall: (idx: number) => T,
options?: SubruleMethodOpts
): T
/**
* @see SUBRULE
* @hidden
*/
protected SUBRULE7<T>(
ruleToCall: (idx: number) => T,
options?: SubruleMethodOpts
): T
/**
* @see SUBRULE
* @hidden
*/
protected SUBRULE8<T>(
ruleToCall: (idx: number) => T,
options?: SubruleMethodOpts
): T
/**
* @see SUBRULE
* @hidden
*/
protected SUBRULE9<T>(
ruleToCall: (idx: number) => T,
options?: SubruleMethodOpts
): T
}
export declare enum ParserDefinitionErrorType {
INVALID_RULE_NAME = 0,
DUPLICATE_RULE_NAME = 1,
INVALID_RULE_OVERRIDE = 2,
DUPLICATE_PRODUCTIONS = 3,
UNRESOLVED_SUBRULE_REF = 4,
LEFT_RECURSION = 5,
NONE_LAST_EMPTY_ALT = 6,
AMBIGUOUS_ALTS = 7,
CONFLICT_TOKENS_RULES_NAMESPACE = 8,
INVALID_TOKEN_NAME = 9,
NO_NON_EMPTY_LOOKAHEAD = 10,
AMBIGUOUS_PREFIX_ALTS = 11,
TOO_MANY_ALTS = 12
}
export interface ILexerDefinitionError {
message: string
type: LexerDefinitionErrorType
tokenTypes?: TokenType[]
}
export declare class Lexer {
static SKIPPED: string
/**
* A Constant to mark "abstract" TokenTypes that are used
* purely as token categories.
* See: {@link createToken.categories}
*/
static NA: RegExp
lexerDefinitionErrors: ILexerDefinitionError[]
/**
* @param lexerDefinition -
* Structure composed of Tokens Types this lexer will identify.
*
* In the simple case the structure is an array of TokenTypes.
* In the case of {@link IMultiModeLexerDefinition} the structure is an object with two properties:
* 1. a "modes" property where each value is an array of TokenTypes.
* 2. a "defaultMode" property specifying the initial lexer mode.
*
* for example:
*
* ```
* {
* modes : {
* modeX : [Token1, Token2],
* modeY : [Token3, Token4]
* },
*
* defaultMode : "modeY"
* }
* ```
*
* A lexer with {@link MultiModesDefinition} is simply multiple Lexers where only one Lexer(mode) can be active at the same time.
* This is useful for lexing languages where there are different lexing rules depending on context.
*
* The current lexing mode is selected via a "mode stack".
* The last (peek) value in the stack will be the current mode of the lexer.
* Defining entering and exiting lexer modes is done using the "push_mode" and "pop_mode" properites
* of the {@link createToken.config} parameter.
*
* - The Lexer will match the **first** pattern that matches, Therefor the order of Token Types is significant.
* For example when one pattern may match a prefix of another pattern.
*
* Note that there are situations in which we may wish to order the longer pattern after the shorter one.
* For example: [keywords vs Identifiers](https://github.com/chevrotain/chevrotain/tree/master/examples/lexer/keywords_vs_identifiers).
*/
constructor(
lexerDefinition: TokenType[] | IMultiModeLexerDefinition,
config?: ILexerConfig
)
/**
* Will lex(Tokenize) a string.
* Note that this can be called repeatedly on different strings as this method
* does not modify the state of the Lexer.
*
* @param text - The string to lex
* @param [initialMode] - The initial Lexer Mode to start with, by default this will be the first mode in the lexer's
* definition. If the lexer has no explicit modes it will be the implicit single 'default_mode' mode.
*/
tokenize(text: string, initialMode?: string): ILexingResult
}
export interface ILexingResult {
tokens: IToken[]
groups: {
[groupName: string]: IToken[]
}
errors: ILexingError[]
}
export interface ILexingError {
offset: number
line: number
column: number
length: number
message: string
}
export interface ILexerConfig {
/**
* An optional flag indicating that lexer definition errors
* should not automatically cause an error to be raised.
* This can be useful when wishing to indicate lexer errors in another manner
* than simply throwing an error (for example in an online playground).
*/
deferDefinitionErrorsHandling?: boolean
/**
* "full" location information means all six combinations of /(end|start)(Line|Column|Offset)/ properties.
* "onlyStart" means that only startLine, startColumn and startOffset will be tracked
* "onlyOffset" means that only the startOffset will be tracked.
*
* The less position tracking the faster the Lexer will be and the less memory used.
* However the difference is not large (~10% On V8), thus reduced location tracking options should only be used
* in edge cases where every last ounce of performance is needed.
*/
// TODO: consider renaming this to LocationTracking to align with NodeLocationTracking option on the ParserConfig.
positionTracking?: "full" | "onlyStart" | "onlyOffset"
/**
* A regExp defining custom line terminators.
* This will be used to calculate the line and column information.
*
* Note that the regExp should use the global flag, for example: /\n/g
*
* The default is: /\n|\r\n?/g
*
* But some grammars have a different definition, for example in ECMAScript:
* https://www.ecma-international.org/ecma-262/8.0/index.html#sec-line-terminators
* U+2028 and U+2029 are also treated as line terminators.
*
* In that case we would use /\n|\r|\u2028|\u2029/g
*
* Note that it is also possible to supply an optimized RegExp like implementation
* as only a subset of the RegExp APIs is needed, {@link ILineTerminatorsTester}
* for details.
*
* keep in mind that for the default pattern: /\n|\r\n?/g an optimized implementation is already built-in.
* This means the optimization is only relevant for lexers overriding the default pattern.
*/
lineTerminatorsPattern?: RegExp | ILineTerminatorsTester
/**
* Characters or CharCodes that represent line terminators for this lexer.
* This always needs to be provided when using a custom {@link ILexerConfig.lineTerminatorsPattern}.
* In the future this duplication may be removed or reduced.
*/
lineTerminatorCharacters?: (number | string)[]
/**
* When true this flag will cause the Lexer to throw an Error
* When it is unable to perform all of its performance optimizations.
*
* In addition error messages will be printed to the console with details
* how to resolve the optimizations issues.
*
* Use this flag to guarantee higher lexer performance.
* The optimizations can boost the lexer's performance anywhere from 30%
* to 100%+ depending on the number of TokenTypes used.
*/
ensureOptimizations?: boolean
/**
* Can be used to disable lexer optimizations
* If there is a suspicion they are causing incorrect behavior.
* Note that this would have negative performance implications.
*/
safeMode?: boolean
/**
* A custom error message provider.
* Can be used to override the default error messages.
* For example:
* - Translating the error messages to a different languages.
* - Changing the formatting.
*/
errorMessageProvider?: ILexerErrorMessageProvider
/**
* Enabling this flag will print performance tracing logs during lexer
* Initialization (constructor invocation), this is useful to narrow down the cause
* of the initialization performance problem.
*
* You can also pass a numerical value which affects the verbosity
* of the traces, this number is the maximum nesting level of the traces, e.g:
* 0: Traces disabled === 'false'
* 1: Top Level traces only.
* 2: One level of nested inner traces.
* ...
*
* Note that passing the boolean `true` is identical to passing the numerical value `infinity`
*/
traceInitPerf?: boolean | number
/**
* This flag will avoid running the Lexer validations during Lexer initialization.
*
* This can substantially improve the Lexer's initialization (constructor) time.
* @see ILexerConfig.traceInitPerf to measure the Lexer validations cost for your Lexer.
*
* Note that the Lexer validations are **extremely useful** during development time,
* e.g: Detecting empty/invalid regExp Patterns.
* So they should not be skipped during development flows.
* - For example: via a conditional that checks an env variable.
*/
skipValidations?: boolean
}
export interface ILexerErrorMessageProvider {
/**
* An Unexpected Character Error occurs when the lexer is unable to match a range of one or more
* characters in the input text against any of the Token Types in it's Lexer definition
*
* @param fullText - Full original input text.
*
* @param startOffset - Offset in input text where error starts.
*
* @param length - Error length.
*
* @param line - Line number where the error occured. (optional)
* Will not be provided when lexer is not defined to track lines/columns
*
* @param column - Column number where the error occured. (optional)
* Will not be provided when lexer is not defined to track lines/columns
*/
buildUnexpectedCharactersMessage(
fullText: string,
startOffset: number,
length: number,
line?: number,
column?: number
): string
/**
* Unable To Pop Lexer Mode Error happens when lexer tries to pop the last remaining mode from the mode stack
* so that there is no longer any active lexer mode
* This error only relevant for multi-mode lexers
*
* @param token - The Token that requested pop mode.
*/
buildUnableToPopLexerModeMessage(token: IToken): string
}
/**
* This is the default logic Chevrotain uses to construct lexing error messages.
* It can be used as a reference or as a starting point customize a lexer's
* error messages.
*
* - See: {@link ILexerConfig.errorMessageProvider}
*/
export declare const defaultLexerErrorProvider: ILexerErrorMessageProvider
/**
* A subset of the regExp interface.
* Needed to compute line/column info by a chevrotain lexer.
*/
export interface ILineTerminatorsTester {
/**
* Just like regExp.test
*/
test: (text: string) => boolean
/**
* Just like the regExp lastIndex with the global flag enabled
* It should be updated after every match to point to the offset where the next
* match attempt starts.
*/
lastIndex: number
}
export type TokenPattern =
| RegExp
| string
| CustomPatternMatcherFunc
| ICustomPattern
export interface ITokenConfig {
name: string
/**
* Categories enable polymorphism on Token Types.
* A TokenType X with categories C1, C2, ... ,Cn can
* be matched by the parser against any of those categories.
* In practical terms this means that:
* CONSUME(C1) can match a Token of type X.
*/
categories?: TokenType | TokenType[]
/**
* The Label is a human readable name to be used
* in error messages and syntax diagrams.
*
* For example a TokenType may be called LCurly, which is
* short for "left curly brace". The much easier to understand
* label could simply be "{".
*/
label?: string
/**
* This defines what sequence of characters would be matched
* To this TokenType when Lexing.
*
* For Custom Patterns see: http://chevrotain.io/docs/guide/custom_token_patterns.html
*/
pattern?: TokenPattern
/**
* The group property will cause the lexer to collect
* Tokens of this type separately from the other Tokens.
*
* For example this could be used to collect comments for
* post processing.
*
* See: https://github.com/chevrotain/chevrotain/tree/master/examples/lexer/token_groups
*/
group?: string
/**
* A name of a Lexer mode to "enter" once this Token Type has been matched.
* Lexer modes can be used to support different sets of possible Tokens Types
*
* Lexer Modes work as a stack of Lexers, so "entering" a mode means pushing it to the top of the stack.
*
* See: https://github.com/chevrotain/chevrotain/tree/master/examples/lexer/multi_mode_lexer
*/
push_mode?: string
/**
* If "pop_mode" is true the Lexer will pop the last mode of the modes stack and
* continue lexing using the new mode at the top of the stack.
*
* See: https://github.com/chevrotain/chevrotain/tree/master/examples/lexer/multi_mode_lexer
*/
pop_mode?: boolean
/**
* The "longer_alt" property will cause the Lexer to attempt matching against another Token Type
* every time this Token Type has been matched.
*
* This feature can be useful when two Token Types have common prefixes which
* cannot be resolved (only) by the ordering of the Tokens in the lexer definition.
*
* For example see: https://github.com/chevrotain/chevrotain/tree/master/examples/lexer/keywords_vs_identifiers
* For resolving the keywords vs Identifier ambiguity.
*/
longer_alt?: TokenType
/**
* Can a String matching this Token Type's pattern possibly contain a line terminator?
* If true and the line_breaks property is not also true this will cause inaccuracies in the Lexer's line / column tracking.
*/
line_breaks?: boolean
/**
* Possible starting characters or charCodes of the pattern.
* These will be used to optimize the Lexer's performance.
*
* These are normally **automatically** computed, however the option to explicitly
* specify those can enable optimizations even when the automatic analysis fails.
*
* e.g:
* * strings hints should be one character long.
* ```
* { start_chars_hint: ["a", "b"] }
* ```
*
* * number hints are the result of running ".charCodeAt(0)" on the strings.
* ```
* { start_chars_hint: [97, 98] }
* ```
*
* * For unicode characters outside the BMP use the first of their surrogate pairs.
* for example: The '💩' character is represented by surrogate pairs: '\uD83D\uDCA9'
* and D83D is 55357 in decimal.
* * Note that "💩".charCodeAt(0) === 55357
*/
start_chars_hint?: (string | number)[]
}
/**
* Creates a new TokenType which can then be used
* to define a Lexer and Parser
*/
export declare function createToken(config: ITokenConfig): TokenType
/**
* Utility to create Chevrotain IToken "instances"
* Note that Chevrotain tokens are not real TokenTypes instances
* and thus the instanceOf cannot be used with them.
*/
export declare function createTokenInstance(
tokType: TokenType,
image: string,
startOffset: number,
endOffset: number,
startLine: number,
endLine: number,
startColumn: number,
endColumn: number
): IToken
/**
* API #1 [Custom Token Patterns](http://chevrotain.io/docs/guide/custom_token_patterns.html).
*/
export declare type CustomPatternMatcherFunc = (
/**
* The full input string.
*/
text: string,
/**
* The offset at which to attempt a match
*/
offset: number,
/**
* Previously scanned Tokens
*/
tokens: IToken[],
/**
* Token Groups
*/
groups: {
[groupName: string]: IToken[]
}
) => CustomPatternMatcherReturn | RegExpExecArray | null // RegExpExecArray included for legacy reasons
export type CustomPatternMatcherReturn = [string] & { payload?: any }
export interface TokenType {
name: string
GROUP?: string
PATTERN?: TokenPattern
LABEL?: string
LONGER_ALT?: TokenType
POP_MODE?: boolean
PUSH_MODE?: string
LINE_BREAKS?: boolean
CATEGORIES?: TokenType[]
tokenTypeIdx?: number
categoryMatches?: number[]
categoryMatchesMap?: {
[tokType: number]: boolean
}
isParent?: boolean
START_CHARS_HINT?: (string | number)[]
}
/**
* API #2 for [Custom Token Patterns](http://chevrotain.io/docs/guide/custom_token_patterns.html).
*/
interface ICustomPattern {
exec: CustomPatternMatcherFunc
}
/**
* Things to note:
* - The offset range is inclusive to exclusive.
*
* - A lineTerminator as the last character does not effect the Token's line numbering.
* In other words a new line only starts **after** a line terminator.
*
* - A Token's image is it's **literal** text.
* e.g unicode escaping is untouched.
*/
export interface IToken {
/** The textual representation of the Token as it appeared in the text. */
image: string
/** Offset of the first character of the Token. */
startOffset: number
/** Line of the first character of the Token. */
startLine?: number
/** Column of the first character of the Token. */
startColumn?: number
/** Offset of the last character of the Token. */
endOffset?: number
/** Line of the last character of the Token. */
endLine?: number
/** Column of the last character of the Token. */
endColumn?: number
/** this marks if a Token does not really exist and has been inserted "artificially" during parsing in rule error recovery. */
isInsertedInRecovery?: boolean
/** An number index representing the type of the Token use <getTokenConstructor> to get the Token Type from a token "instance" */
tokenTypeIdx: number
/**
* The actual Token Type of this Token "instance"
* This is the same Object returned by the "createToken" API.
* This property is very useful for debugging the Lexing and Parsing phases.
*/
tokenType: TokenType
/**
* Custom Payload value, this is an optional feature of Custom Token Patterns
* For additional details see the docs:
* https://chevrotain.io/docs/guide/custom_token_patterns.html#custom-payloads
*/
payload?: any
}
export declare function tokenName(tokType: TokenType): string
/**
* Returns a human readable label for a TokenType if such exists,
* otherwise will return the TokenType's name.
*
* Labels are useful in improving the readability of error messages and syntax diagrams.
* To define labels provide the label property in the {@link createToken} config parameter.
*/
export declare function tokenLabel(tokType: TokenType): string
/**
* A Utility method to check if a token is of the type of the argument Token class.
* This utility is needed because Chevrotain tokens support "categories" which means
* A TokenType may have multiple categories.
*
* This means a simple comparison using the {@link IToken.tokenType} property may not suffice.
* For example:
*
* ```
* import { createToken, tokenMatcher, Lexer } from "chevrotain"
*
* // An "abstract" Token used only for categorization purposes.
* const NumberTokType = createToken({ name: "NumberTokType", pattern: Lexer.NA })
*
* const IntegerTokType = createToken({
* name: "IntegerTokType",
* pattern: /\d+/,
* // Integer "Is A" Number
* categories: [NumberTokType]
* })
*
* const DecimalTokType = createToken({
* name: "DecimalTokType",
* pattern: /\d+\.\d+/,
* // Double "Is A" Number
* categories: [NumberTokType]
* })
*
* // Will always be false as the tokenType property can only
* // be Integer or Double Token Types as the Number TokenType is "abstract".
* if (myToken.tokenType === NumberTokType) { /* ... *\/ }
*
* // Will be true when myToken is of Type Integer or Double.
* // Because the hierarchy defined by the categories is taken into account.
* if (tokenMatcher(myToken, NumberTokType) { /* ... *\/ }
* ```
*
* @returns true iff the token matches the TokenType.
*/
export function tokenMatcher(token: IToken, tokType: TokenType): boolean
export declare type MultiModesDefinition = {
[modeName: string]: TokenType[]
}
export interface IMultiModeLexerDefinition {
modes: MultiModesDefinition
defaultMode: string
}
export type TokenTypeDictionary = { [tokenName: string]: TokenType }
export declare type TokenVocabulary =
| TokenTypeDictionary
| TokenType[]
| IMultiModeLexerDefinition
export interface IRuleConfig<T> {
/**
* The function which will be invoked to produce the returned value for a production that have not been
* successfully executed and the parser recovered from.
*/
recoveryValueFunc?: () => T
/**
* Enable/Disable re-sync error recovery for this specific production.
*/
resyncEnabled?: boolean
}
export interface DSLMethodOpts<T> {
/**
* The Grammar to process in this method.
*/
DEF: GrammarAction<T>
/**
* A semantic constraint on this DSL method
* @see https://github.com/chevrotain/chevrotain/blob/master/examples/parser/predicate_lookahead/predicate_lookahead.js
* For farther details.
*/
GATE?: () => boolean
/**
* Maximum number of "following tokens" which would be used to
* Choose between the alternatives.
*
* By default this value is determined by the {@link IParserConfig.maxLookahead} value.
* A Higher value may be used for a specific