chevrotain
Version:
Chevrotain is a high performance fault tolerant javascript parsing DSL for building recursive decent parsers
952 lines (878 loc) • 32 kB
text/typescript
import {
AtLeastOneSepMethodOpts,
ConsumeMethodOpts,
DSLMethodOpts,
DSLMethodOptsWithErr,
GrammarAction,
IOrAlt,
IParserConfig,
IRuleConfig,
IToken,
ManySepMethodOpts,
OrMethodOpts,
ParserMethod,
SubruleMethodOpts,
TokenType,
TokenTypeDictionary,
TokenVocabulary,
} from "@chevrotain/types";
import {
AT_LEAST_ONE_IDX,
AT_LEAST_ONE_SEP_IDX,
BITS_FOR_METHOD_TYPE,
BITS_FOR_OCCURRENCE_IDX,
MANY_IDX,
MANY_SEP_IDX,
OPTION_IDX,
OR_IDX,
} from "../../grammar/keys.js";
import {
isRecognitionException,
MismatchedTokenException,
NotAllInputParsedException,
} from "../../exceptions_public.js";
import { PROD_TYPE } from "../../grammar/lookahead.js";
import {
AbstractNextTerminalAfterProductionWalker,
NextTerminalAfterAtLeastOneSepWalker,
NextTerminalAfterAtLeastOneWalker,
NextTerminalAfterManySepWalker,
NextTerminalAfterManyWalker,
} from "../../grammar/interpreter.js";
import {
DEFAULT_RULE_CONFIG,
END_OF_FILE,
IParserState,
TokenMatcher,
} from "../parser.js";
import { IN_RULE_RECOVERY_EXCEPTION } from "./recoverable.js";
import { EOF } from "../../../scan/tokens_public.js";
import { MixedInParser } from "./parser_traits.js";
import {
augmentTokenTypes,
isTokenType,
tokenStructuredMatcher,
tokenStructuredMatcherNoCategories,
} from "../../../scan/tokens.js";
import { Rule } from "@chevrotain/gast";
import { ParserMethodInternal } from "../types.js";
/**
* This trait is responsible for the runtime parsing engine
* Used by the official API (recognizer_api.ts)
*/
export class RecognizerEngine {
isBackTrackingStack: boolean[];
className: string;
RULE_STACK: number[];
RULE_OCCURRENCE_STACK: number[];
// Depth counters for the pre-allocated state stacks.
// Using index-based access (arr[++idx] = val / idx--) instead of push/pop
// avoids method-call overhead on every rule entry/exit.
RULE_STACK_IDX: number;
RULE_OCCURRENCE_STACK_IDX: number;
definedRulesNames: string[];
tokensMap: { [fqn: string]: TokenType };
gastProductionsCache: Record<string, Rule>;
shortRuleNameToFull: Record<string, string>;
fullRuleNameToShort: Record<string, number>;
// The shortName Index must be coded "after" the first 8bits to enable building unique lookahead keys
ruleShortNameIdx: number;
tokenMatcher: TokenMatcher;
subruleIdx: number;
// Cached value of the current rule's short name to avoid repeated RULE_STACK[length-1] lookups.
// Updated on rule entry/exit and state reload.
currRuleShortName: number;
initRecognizerEngine(
tokenVocabulary: TokenVocabulary,
config: IParserConfig,
) {
this.className = this.constructor.name;
// TODO: would using an ES6 Map or plain object be faster (CST building scenario)
this.shortRuleNameToFull = {};
this.fullRuleNameToShort = {};
this.ruleShortNameIdx = 256;
this.tokenMatcher = tokenStructuredMatcherNoCategories;
this.subruleIdx = 0;
this.currRuleShortName = 0;
this.definedRulesNames = [];
this.tokensMap = {};
this.isBackTrackingStack = [];
this.RULE_STACK = [];
this.RULE_STACK_IDX = -1;
this.RULE_OCCURRENCE_STACK = [];
this.RULE_OCCURRENCE_STACK_IDX = -1;
this.gastProductionsCache = {};
if (Object.hasOwn(config, "serializedGrammar")) {
throw Error(
"The Parser's configuration can no longer contain a <serializedGrammar> property.\n" +
"\tSee: https://chevrotain.io/docs/changes/BREAKING_CHANGES.html#_6-0-0\n" +
"\tFor Further details.",
);
}
if (Array.isArray(tokenVocabulary)) {
// This only checks for Token vocabularies provided as arrays.
// That is good enough because the main objective is to detect users of pre-V4.0 APIs
// rather than all edge cases of empty Token vocabularies.
if ((tokenVocabulary as any[]).length === 0) {
throw Error(
"A Token Vocabulary cannot be empty.\n" +
"\tNote that the first argument for the parser constructor\n" +
"\tis no longer a Token vector (since v4.0).",
);
}
if (typeof (tokenVocabulary as any[])[0].startOffset === "number") {
throw Error(
"The Parser constructor no longer accepts a token vector as the first argument.\n" +
"\tSee: https://chevrotain.io/docs/changes/BREAKING_CHANGES.html#_4-0-0\n" +
"\tFor Further details.",
);
}
}
if (Array.isArray(tokenVocabulary)) {
this.tokensMap = (tokenVocabulary as TokenType[]).reduce(
(acc: { [tokenName: string]: TokenType }, tokType: TokenType) => {
acc[tokType.name] = tokType;
return acc;
},
{} as { [tokenName: string]: TokenType },
);
} else if (
Object.hasOwn(tokenVocabulary, "modes") &&
(Object.values((<any>tokenVocabulary).modes) as any[][])
.flat()
.every(isTokenType)
) {
const allTokenTypes = (
Object.values((<any>tokenVocabulary).modes) as any[][]
).flat();
const uniqueTokens = [...new Set(allTokenTypes)];
this.tokensMap = <any>uniqueTokens.reduce(
(acc: { [tokenName: string]: TokenType }, tokType: TokenType) => {
acc[tokType.name] = tokType;
return acc;
},
{} as { [tokenName: string]: TokenType },
);
} else if (
typeof tokenVocabulary === "object" &&
tokenVocabulary !== null
) {
this.tokensMap = { ...(tokenVocabulary as TokenTypeDictionary) };
} else {
throw new Error(
"<tokensDictionary> argument must be An Array of Token constructors," +
" A dictionary of Token constructors or an IMultiModeLexerDefinition",
);
}
// always add EOF to the tokenNames -> constructors map. it is useful to assure all the input has been
// parsed with a clear error message ("expecting EOF but found ...")
this.tokensMap["EOF"] = EOF;
const allTokenTypes = Object.hasOwn(tokenVocabulary, "modes")
? (Object.values((<any>tokenVocabulary).modes) as any[][]).flat()
: Object.values(tokenVocabulary);
const noTokenCategoriesUsed = allTokenTypes.every(
// intentional "==" to also cover "undefined"
(tokenConstructor: any) => tokenConstructor.categoryMatches?.length == 0,
);
this.tokenMatcher = noTokenCategoriesUsed
? tokenStructuredMatcherNoCategories
: tokenStructuredMatcher;
// Because ES2015+ syntax should be supported for creating Token classes
// We cannot assume that the Token classes were created using the "extendToken" utilities
// Therefore we must augment the Token classes both on Lexer initialization and on Parser initialization
augmentTokenTypes(Object.values(this.tokensMap));
}
defineRule<ARGS extends unknown[], R>(
this: MixedInParser,
ruleName: string,
impl: (...args: ARGS) => R,
config: IRuleConfig<R>,
): ParserMethodInternal<ARGS, R> {
if (this.selfAnalysisDone) {
throw Error(
`Grammar rule <${ruleName}> may not be defined after the 'performSelfAnalysis' method has been called'\n` +
`Make sure that all grammar rule definitions are done before 'performSelfAnalysis' is called.`,
);
}
const resyncEnabled: boolean = Object.hasOwn(config, "resyncEnabled")
? (config.resyncEnabled as boolean) // assumes end user provides the correct config value/type
: DEFAULT_RULE_CONFIG.resyncEnabled;
const recoveryValueFunc = Object.hasOwn(config, "recoveryValueFunc")
? (config.recoveryValueFunc as () => R) // assumes end user provides the correct config value/type
: DEFAULT_RULE_CONFIG.recoveryValueFunc;
// performance optimization: Use small integers as keys for the longer human readable "full" rule names.
// this greatly improves Map access time (as much as 8% for some performance benchmarks).
const shortName =
this.ruleShortNameIdx << (BITS_FOR_METHOD_TYPE + BITS_FOR_OCCURRENCE_IDX);
this.ruleShortNameIdx++;
this.shortRuleNameToFull[shortName] = ruleName;
this.fullRuleNameToShort[ruleName] = shortName;
let coreRuleFunction: ParserMethod<ARGS, R>;
// Micro optimization, only check the condition **once** on rule definition
// instead of **every single** rule invocation.
if (this.outputCst === true) {
coreRuleFunction = function invokeRuleWithTry(
this: MixedInParser,
...args: ARGS
): R {
try {
this.ruleInvocationStateUpdate(shortName, ruleName, this.subruleIdx);
impl.apply(this, args);
const cst = this.CST_STACK[this.CST_STACK.length - 1];
this.cstPostRule(cst);
return cst as unknown as R;
} catch (e) {
return this.invokeRuleCatch(e, resyncEnabled, recoveryValueFunc) as R;
} finally {
this.ruleFinallyStateUpdate();
}
};
} else {
coreRuleFunction = function invokeRuleWithTryCst(
this: MixedInParser,
...args: ARGS
): R {
try {
this.ruleInvocationStateUpdate(shortName, ruleName, this.subruleIdx);
return impl.apply(this, args);
} catch (e) {
return this.invokeRuleCatch(e, resyncEnabled, recoveryValueFunc) as R;
} finally {
this.ruleFinallyStateUpdate();
}
};
}
// wrapper to allow before/after parsing hooks
const rootRuleFunction: ParserMethod<ARGS, R> = function rootRule(
this: MixedInParser,
...args: ARGS
): R {
this.onBeforeParse(ruleName);
try {
return coreRuleFunction.apply(this, args);
} finally {
this.onAfterParse(ruleName);
}
};
const wrappedGrammarRule: ParserMethodInternal<ARGS, R> = Object.assign(
rootRuleFunction as any,
{ ruleName, originalGrammarAction: impl, coreRule: coreRuleFunction },
);
return wrappedGrammarRule;
}
invokeRuleCatch(
this: MixedInParser,
e: Error,
resyncEnabledConfig: boolean,
recoveryValueFunc: Function,
): unknown {
const isFirstInvokedRule = this.RULE_STACK_IDX === 0;
// note the reSync is always enabled for the first rule invocation, because we must always be able to
// reSync with EOF and just output some INVALID ParseTree
// during backtracking reSync recovery is disabled, otherwise we can't be certain the backtracking
// path is really the most valid one
const reSyncEnabled =
resyncEnabledConfig && !this.isBackTracking() && this.recoveryEnabled;
if (isRecognitionException(e)) {
const recogError: any = e;
if (reSyncEnabled) {
const reSyncTokType = this.findReSyncTokenType();
if (this.isInCurrentRuleReSyncSet(reSyncTokType)) {
recogError.resyncedTokens = this.reSyncTo(reSyncTokType);
if (this.outputCst) {
const partialCstResult: any =
this.CST_STACK[this.CST_STACK.length - 1];
partialCstResult.recoveredNode = true;
return partialCstResult;
} else {
return recoveryValueFunc(e);
}
} else {
if (this.outputCst) {
const partialCstResult: any =
this.CST_STACK[this.CST_STACK.length - 1];
partialCstResult.recoveredNode = true;
recogError.partialCstResult = partialCstResult;
}
// to be handled Further up the call stack
throw recogError;
}
} else if (isFirstInvokedRule) {
// otherwise a Redundant input error will be created as well and we cannot guarantee that this is indeed the case
this.moveToTerminatedState();
// the parser should never throw one of its own errors outside its flow.
// even if error recovery is disabled
return recoveryValueFunc(e);
} else {
// to be recovered Further up the call stack
throw recogError;
}
} else {
// some other Error type which we don't know how to handle (for example a built in JavaScript Error)
throw e;
}
}
// Implementation of parsing DSL
optionInternal<OUT>(
this: MixedInParser,
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>,
occurrence: number,
): OUT | undefined {
const key = this.getKeyForAutomaticLookahead(OPTION_IDX, occurrence);
return this.optionInternalLogic(actionORMethodDef, occurrence, key);
}
optionInternalLogic<OUT>(
this: MixedInParser,
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>,
occurrence: number,
key: number,
): OUT | undefined {
let lookAheadFunc = this.getLaFuncFromCache(key);
let action: GrammarAction<OUT>;
if (typeof actionORMethodDef !== "function") {
action = actionORMethodDef.DEF;
const predicate = actionORMethodDef.GATE;
// predicate present
if (predicate !== undefined) {
const orgLookaheadFunction = lookAheadFunc;
lookAheadFunc = () => {
return predicate.call(this) && orgLookaheadFunction.call(this);
};
}
} else {
action = actionORMethodDef;
}
if (lookAheadFunc.call(this) === true) {
return action.call(this);
}
return undefined;
}
atLeastOneInternal<OUT>(
this: MixedInParser,
prodOccurrence: number,
actionORMethodDef: GrammarAction<OUT> | DSLMethodOptsWithErr<OUT>,
): void {
const laKey = this.getKeyForAutomaticLookahead(
AT_LEAST_ONE_IDX,
prodOccurrence,
);
return this.atLeastOneInternalLogic(
prodOccurrence,
actionORMethodDef,
laKey,
);
}
atLeastOneInternalLogic<OUT>(
this: MixedInParser,
prodOccurrence: number,
actionORMethodDef: GrammarAction<OUT> | DSLMethodOptsWithErr<OUT>,
key: number,
): void {
let lookAheadFunc = this.getLaFuncFromCache(key);
let action;
if (typeof actionORMethodDef !== "function") {
action = actionORMethodDef.DEF;
const predicate = actionORMethodDef.GATE;
// predicate present
if (predicate !== undefined) {
const orgLookaheadFunction = lookAheadFunc;
lookAheadFunc = () => {
return predicate.call(this) && orgLookaheadFunction.call(this);
};
}
} else {
action = actionORMethodDef;
}
if ((<Function>lookAheadFunc).call(this) === true) {
let notStuck = this.doSingleRepetition(action);
while (
(<Function>lookAheadFunc).call(this) === true &&
notStuck === true
) {
notStuck = this.doSingleRepetition(action);
}
} else {
throw this.raiseEarlyExitException(
prodOccurrence,
PROD_TYPE.REPETITION_MANDATORY,
(<DSLMethodOptsWithErr<OUT>>actionORMethodDef).ERR_MSG,
);
}
// note that while it may seem that this can cause an error because by using a recursive call to
// AT_LEAST_ONE we change the grammar to AT_LEAST_TWO, AT_LEAST_THREE ... , the possible recursive call
// from the tryInRepetitionRecovery(...) will only happen IFF there really are TWO/THREE/.... items.
// Performance optimization: "attemptInRepetitionRecovery" will be defined as NOOP unless recovery is enabled
this.attemptInRepetitionRecovery(
this.atLeastOneInternal,
[prodOccurrence, actionORMethodDef],
<any>lookAheadFunc,
AT_LEAST_ONE_IDX,
prodOccurrence,
NextTerminalAfterAtLeastOneWalker,
);
}
atLeastOneSepFirstInternal<OUT>(
this: MixedInParser,
prodOccurrence: number,
options: AtLeastOneSepMethodOpts<OUT>,
): void {
const laKey = this.getKeyForAutomaticLookahead(
AT_LEAST_ONE_SEP_IDX,
prodOccurrence,
);
this.atLeastOneSepFirstInternalLogic(prodOccurrence, options, laKey);
}
atLeastOneSepFirstInternalLogic<OUT>(
this: MixedInParser,
prodOccurrence: number,
options: AtLeastOneSepMethodOpts<OUT>,
key: number,
): void {
const action = options.DEF;
const separator = options.SEP;
const firstIterationLookaheadFunc = this.getLaFuncFromCache(key);
// 1st iteration
if (firstIterationLookaheadFunc.call(this) === true) {
(<GrammarAction<OUT>>action).call(this);
// TODO: Optimization can move this function construction into "attemptInRepetitionRecovery"
// because it is only needed in error recovery scenarios.
const separatorLookAheadFunc = () => {
return this.tokenMatcher(this.LA_FAST(1), separator);
};
// 2nd..nth iterations
while (this.tokenMatcher(this.LA_FAST(1), separator) === true) {
// note that this CONSUME will never enter recovery because
// the separatorLookAheadFunc checks that the separator really does exist.
this.CONSUME(separator);
// No need for checking infinite loop here due to consuming the separator.
(<GrammarAction<OUT>>action).call(this);
}
// Performance optimization: "attemptInRepetitionRecovery" will be defined as NOOP unless recovery is enabled
this.attemptInRepetitionRecovery(
this.repetitionSepSecondInternal,
[
prodOccurrence,
separator,
separatorLookAheadFunc,
action,
NextTerminalAfterAtLeastOneSepWalker,
],
separatorLookAheadFunc,
AT_LEAST_ONE_SEP_IDX,
prodOccurrence,
NextTerminalAfterAtLeastOneSepWalker,
);
} else {
throw this.raiseEarlyExitException(
prodOccurrence,
PROD_TYPE.REPETITION_MANDATORY_WITH_SEPARATOR,
options.ERR_MSG,
);
}
}
manyInternal<OUT>(
this: MixedInParser,
prodOccurrence: number,
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>,
): void {
const laKey = this.getKeyForAutomaticLookahead(MANY_IDX, prodOccurrence);
return this.manyInternalLogic(prodOccurrence, actionORMethodDef, laKey);
}
manyInternalLogic<OUT>(
this: MixedInParser,
prodOccurrence: number,
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>,
key: number,
) {
let lookaheadFunction = this.getLaFuncFromCache(key);
let action;
if (typeof actionORMethodDef !== "function") {
action = actionORMethodDef.DEF;
const predicate = actionORMethodDef.GATE;
// predicate present
if (predicate !== undefined) {
const orgLookaheadFunction = lookaheadFunction;
lookaheadFunction = () => {
return predicate.call(this) && orgLookaheadFunction.call(this);
};
}
} else {
action = actionORMethodDef;
}
let notStuck = true;
while (lookaheadFunction.call(this) === true && notStuck === true) {
notStuck = this.doSingleRepetition(action);
}
// Performance optimization: "attemptInRepetitionRecovery" will be defined as NOOP unless recovery is enabled
this.attemptInRepetitionRecovery(
this.manyInternal,
[prodOccurrence, actionORMethodDef],
<any>lookaheadFunction,
MANY_IDX,
prodOccurrence,
NextTerminalAfterManyWalker,
// The notStuck parameter is only relevant when "attemptInRepetitionRecovery"
// is invoked from manyInternal, in the MANY_SEP case and AT_LEAST_ONE[_SEP]
// An infinite loop cannot occur as:
// - Either the lookahead is guaranteed to consume something (Single Token Separator)
// - AT_LEAST_ONE by definition is guaranteed to consume something (or error out).
notStuck,
);
}
manySepFirstInternal<OUT>(
this: MixedInParser,
prodOccurrence: number,
options: ManySepMethodOpts<OUT>,
): void {
const laKey = this.getKeyForAutomaticLookahead(
MANY_SEP_IDX,
prodOccurrence,
);
this.manySepFirstInternalLogic(prodOccurrence, options, laKey);
}
manySepFirstInternalLogic<OUT>(
this: MixedInParser,
prodOccurrence: number,
options: ManySepMethodOpts<OUT>,
key: number,
): void {
const action = options.DEF;
const separator = options.SEP;
const firstIterationLaFunc = this.getLaFuncFromCache(key);
// 1st iteration
if (firstIterationLaFunc.call(this) === true) {
action.call(this);
const separatorLookAheadFunc = () => {
return this.tokenMatcher(this.LA_FAST(1), separator);
};
// 2nd..nth iterations
while (this.tokenMatcher(this.LA_FAST(1), separator) === true) {
// note that this CONSUME will never enter recovery because
// the separatorLookAheadFunc checks that the separator really does exist.
this.CONSUME(separator);
// No need for checking infinite loop here due to consuming the separator.
action.call(this);
}
// Performance optimization: "attemptInRepetitionRecovery" will be defined as NOOP unless recovery is enabled
this.attemptInRepetitionRecovery(
this.repetitionSepSecondInternal,
[
prodOccurrence,
separator,
separatorLookAheadFunc,
action,
NextTerminalAfterManySepWalker,
],
separatorLookAheadFunc,
MANY_SEP_IDX,
prodOccurrence,
NextTerminalAfterManySepWalker,
);
}
}
repetitionSepSecondInternal<OUT>(
this: MixedInParser,
prodOccurrence: number,
separator: TokenType,
separatorLookAheadFunc: () => boolean,
action: GrammarAction<OUT>,
nextTerminalAfterWalker: typeof AbstractNextTerminalAfterProductionWalker,
): void {
while (separatorLookAheadFunc()) {
// note that this CONSUME will never enter recovery because
// the separatorLookAheadFunc checks that the separator really does exist.
this.CONSUME(separator);
action.call(this);
}
// we can only arrive to this function after an error
// has occurred (hence the name 'second') so the following
// IF will always be entered, its possible to remove it...
// however it is kept to avoid confusion and be consistent.
// Performance optimization: "attemptInRepetitionRecovery" will be defined as NOOP unless recovery is enabled
/* istanbul ignore else */
this.attemptInRepetitionRecovery(
this.repetitionSepSecondInternal,
[
prodOccurrence,
separator,
separatorLookAheadFunc,
action,
nextTerminalAfterWalker,
],
separatorLookAheadFunc,
AT_LEAST_ONE_SEP_IDX,
prodOccurrence,
nextTerminalAfterWalker,
);
}
doSingleRepetition(this: MixedInParser, action: Function): any {
const beforeIteration = this.getLexerPosition();
action.call(this);
const afterIteration = this.getLexerPosition();
// This boolean will indicate if this repetition progressed
// or if we are "stuck" (potential infinite loop in the repetition).
return afterIteration > beforeIteration;
}
orInternal<T>(
this: MixedInParser,
altsOrOpts: IOrAlt<any>[] | OrMethodOpts<unknown>,
occurrence: number,
): T {
const laKey = this.getKeyForAutomaticLookahead(OR_IDX, occurrence);
const alts = Array.isArray(altsOrOpts) ? altsOrOpts : altsOrOpts.DEF;
const laFunc = this.getLaFuncFromCache(laKey);
const altIdxToTake = laFunc.call(this, alts);
if (altIdxToTake !== undefined) {
const chosenAlternative: any = alts[altIdxToTake];
return chosenAlternative.ALT.call(this);
}
this.raiseNoAltException(
occurrence,
(altsOrOpts as OrMethodOpts<unknown>).ERR_MSG,
);
}
ruleFinallyStateUpdate(this: MixedInParser): void {
this.RULE_STACK_IDX--;
this.RULE_OCCURRENCE_STACK_IDX--;
// Restore the cached short name to the parent rule.
// When the stack is empty (top-level rule exiting), the stale value
// is harmless — no DSL methods will be called before the next ruleInvocationStateUpdate.
if (this.RULE_STACK_IDX >= 0) {
this.currRuleShortName = this.RULE_STACK[this.RULE_STACK_IDX];
}
// NOOP when cst is disabled
this.cstFinallyStateUpdate();
}
subruleInternal<ARGS extends unknown[], R>(
this: MixedInParser,
ruleToCall: ParserMethodInternal<ARGS, R>,
idx: number,
options?: SubruleMethodOpts<ARGS>,
): R {
let ruleResult;
try {
const args = options !== undefined ? options.ARGS : undefined;
this.subruleIdx = idx;
// Use coreRule to bypass root-level hooks (onBeforeParse/onAfterParse)
ruleResult = ruleToCall.coreRule.apply(this, args);
this.cstPostNonTerminal(
ruleResult,
options !== undefined && options.LABEL !== undefined
? options.LABEL
: ruleToCall.ruleName,
);
return ruleResult;
} catch (e) {
throw this.subruleInternalError(e, options, ruleToCall.ruleName);
}
}
subruleInternalError(
this: MixedInParser,
e: any,
options: SubruleMethodOpts<unknown[]> | undefined,
ruleName: string,
): void {
if (isRecognitionException(e) && e.partialCstResult !== undefined) {
this.cstPostNonTerminal(
e.partialCstResult,
options !== undefined && options.LABEL !== undefined
? options.LABEL
: ruleName,
);
delete e.partialCstResult;
}
throw e;
}
consumeInternal(
this: MixedInParser,
tokType: TokenType,
idx: number,
options: ConsumeMethodOpts | undefined,
): IToken {
let consumedToken!: IToken;
try {
const nextToken = this.LA_FAST(1);
if (this.tokenMatcher(nextToken, tokType) === true) {
this.consumeToken();
consumedToken = nextToken;
} else {
this.consumeInternalError(tokType, nextToken, options);
}
} catch (eFromConsumption) {
consumedToken = this.consumeInternalRecovery(
tokType,
idx,
eFromConsumption,
);
}
this.cstPostTerminal(
options !== undefined && options.LABEL !== undefined
? options.LABEL
: tokType.name,
consumedToken,
);
return consumedToken;
}
consumeInternalError(
this: MixedInParser,
tokType: TokenType,
nextToken: IToken,
options: ConsumeMethodOpts | undefined,
): void {
let msg;
const previousToken = this.LA(0);
if (options !== undefined && options.ERR_MSG) {
msg = options.ERR_MSG;
} else {
msg = this.errorMessageProvider.buildMismatchTokenMessage({
expected: tokType,
actual: nextToken,
previous: previousToken,
ruleName: this.getCurrRuleFullName(),
});
}
throw this.SAVE_ERROR(
new MismatchedTokenException(msg, nextToken, previousToken),
);
}
consumeInternalRecovery(
this: MixedInParser,
tokType: TokenType,
idx: number,
eFromConsumption: Error,
): IToken {
// no recovery allowed during backtracking, otherwise backtracking may recover invalid syntax and accept it
// but the original syntax could have been parsed successfully without any backtracking + recovery
if (
this.recoveryEnabled &&
// TODO: more robust checking of the exception type. Perhaps Typescript extending expressions?
eFromConsumption.name === "MismatchedTokenException" &&
!this.isBackTracking()
) {
const follows = this.getFollowsForInRuleRecovery(<any>tokType, idx);
try {
return this.tryInRuleRecovery(<any>tokType, follows);
} catch (eFromInRuleRecovery) {
if (eFromInRuleRecovery.name === IN_RULE_RECOVERY_EXCEPTION) {
// failed in RuleRecovery.
// throw the original error in order to trigger reSync error recovery
throw eFromConsumption;
} else {
throw eFromInRuleRecovery;
}
}
} else {
throw eFromConsumption;
}
}
saveRecogState(this: MixedInParser): IParserState {
// errors is a getter which will clone the errors array
const savedErrors = this.errors;
// Slice only the active portion of the pre-allocated stack
const savedRuleStack = this.RULE_STACK.slice(0, this.RULE_STACK_IDX + 1);
return {
errors: savedErrors,
lexerState: this.exportLexerState(),
RULE_STACK: savedRuleStack,
CST_STACK: this.CST_STACK,
};
}
reloadRecogState(this: MixedInParser, newState: IParserState) {
this.errors = newState.errors;
this.importLexerState(newState.lexerState);
// Copy saved stack back into the pre-allocated array and restore the index
const saved = newState.RULE_STACK;
for (let i = 0; i < saved.length; i++) {
this.RULE_STACK[i] = saved[i];
}
this.RULE_STACK_IDX = saved.length - 1;
// Restore cached short name from the restored stack
if (this.RULE_STACK_IDX >= 0) {
this.currRuleShortName = this.RULE_STACK[this.RULE_STACK_IDX];
}
}
ruleInvocationStateUpdate(
this: MixedInParser,
shortName: number,
fullName: string,
idxInCallingRule: number,
): void {
this.RULE_OCCURRENCE_STACK[++this.RULE_OCCURRENCE_STACK_IDX] =
idxInCallingRule;
this.RULE_STACK[++this.RULE_STACK_IDX] = shortName;
this.currRuleShortName = shortName;
// NOOP when cst is disabled
this.cstInvocationStateUpdate(fullName);
}
isBackTracking(this: MixedInParser): boolean {
return this.isBackTrackingStack.length !== 0;
}
getCurrRuleFullName(this: MixedInParser): string {
const shortName = this.currRuleShortName;
return this.shortRuleNameToFull[shortName];
}
shortRuleNameToFullName(this: MixedInParser, shortName: number) {
return this.shortRuleNameToFull[shortName];
}
public isAtEndOfInput(this: MixedInParser): boolean {
return this.tokenMatcher(this.LA(1), EOF);
}
public reset(this: MixedInParser): void {
this.resetLexerState();
this.subruleIdx = 0;
this.currRuleShortName = 0;
this.isBackTrackingStack = [];
this.errors = [];
// Reset depth counters but keep arrays allocated to avoid re-allocation.
// Stale number values in unused slots are harmless.
this.RULE_STACK_IDX = -1;
this.RULE_OCCURRENCE_STACK_IDX = -1;
// TODO: extract a specific reset for TreeBuilder trait
this.CST_STACK = [];
}
/**
* Hook called before the root-level parsing rule is invoked.
* This is only called when a rule is invoked directly by the consumer
* (e.g., `parser.json()`), not when invoked as a sub-rule via SUBRULE.
*
* Override this method to perform actions before parsing begins.
* The default implementation is a no-op.
*
* @param ruleName - The name of the root rule being invoked.
*/
onBeforeParse(this: MixedInParser, ruleName: string): void {
// Pad with sentinels for bounds-free forward LA()
for (let i = 0; i < this.maxLookahead + 1; i++) {
this.tokVector.push(END_OF_FILE);
}
}
/**
* Hook called after the root-level parsing rule has completed (or thrown).
* This is only called when a rule is invoked directly by the consumer
* (e.g., `parser.json()`), not when invoked as a sub-rule via SUBRULE.
*
* This hook is called in a `finally` block, so it executes regardless of
* whether parsing succeeded or threw an error.
*
* Override this method to perform actions after parsing completes.
* The default implementation is a no-op.
*
* @param ruleName - The name of the root rule that was invoked.
*/
onAfterParse(this: MixedInParser, ruleName: string): void {
if (this.isAtEndOfInput() === false) {
const firstRedundantTok = this.LA(1);
const errMsg = this.errorMessageProvider.buildNotAllInputParsedMessage({
firstRedundant: firstRedundantTok,
ruleName: this.getCurrRuleFullName(),
});
this.SAVE_ERROR(
new NotAllInputParsedException(errMsg, firstRedundantTok),
);
}
// undo the padding of sentinels for bounds-free forward LA() in onBeforeParse
while (this.tokVector.at(-1) === END_OF_FILE) {
this.tokVector.pop();
}
}
}