chevrotain
Version:
Chevrotain is a high performance fault tolerant javascript parsing DSL for building recursive decent parsers
312 lines • 14.5 kB
JavaScript
import { createTokenInstance, EOF, tokenMatcher, } from "../../../scan/tokens_public.js";
import { NextAfterTokenWalker, } from "../../grammar/interpreter.js";
import { MismatchedTokenException } from "../../exceptions_public.js";
import { IN } from "../../constants.js";
import { DEFAULT_PARSER_CONFIG } from "../parser.js";
export const EOF_FOLLOW_KEY = {};
export const IN_RULE_RECOVERY_EXCEPTION = "InRuleRecoveryException";
export class InRuleRecoveryException extends Error {
constructor(message) {
super(message);
this.name = IN_RULE_RECOVERY_EXCEPTION;
}
}
/**
* This trait is responsible for the error recovery and fault tolerant logic
*/
export class Recoverable {
initRecoverable(config) {
this.firstAfterRepMap = {};
this.resyncFollows = {};
this.recoveryEnabled = Object.hasOwn(config, "recoveryEnabled")
? config.recoveryEnabled // assumes end user provides the correct config value/type
: DEFAULT_PARSER_CONFIG.recoveryEnabled;
// performance optimization, NOOP will be inlined which
// effectively means that this optional feature does not exist
// when not used.
if (this.recoveryEnabled) {
this.attemptInRepetitionRecovery = attemptInRepetitionRecovery;
}
}
getTokenToInsert(tokType) {
const tokToInsert = createTokenInstance(tokType, "", NaN, NaN, NaN, NaN, NaN, NaN);
tokToInsert.isInsertedInRecovery = true;
return tokToInsert;
}
canTokenTypeBeInsertedInRecovery(tokType) {
return true;
}
canTokenTypeBeDeletedInRecovery(tokType) {
return true;
}
tryInRepetitionRecovery(grammarRule, grammarRuleArgs, lookAheadFunc, expectedTokType) {
// TODO: can the resyncTokenType be cached?
const reSyncTokType = this.findReSyncTokenType();
const savedLexerState = this.exportLexerState();
const resyncedTokens = [];
let passedResyncPoint = false;
const nextTokenWithoutResync = this.LA_FAST(1);
let currToken = this.LA_FAST(1);
const generateErrorMessage = () => {
const previousToken = this.LA(0);
// we are preemptively re-syncing before an error has been detected, therefor we must reproduce
// the error that would have been thrown
const msg = this.errorMessageProvider.buildMismatchTokenMessage({
expected: expectedTokType,
actual: nextTokenWithoutResync,
previous: previousToken,
ruleName: this.getCurrRuleFullName(),
});
const error = new MismatchedTokenException(msg, nextTokenWithoutResync, this.LA(0));
// the first token here will be the original cause of the error, this is not part of the resyncedTokens property.
error.resyncedTokens = resyncedTokens.slice(0, -1);
this.SAVE_ERROR(error);
};
while (!passedResyncPoint) {
// re-synced to a point where we can safely exit the repetition/
if (this.tokenMatcher(currToken, expectedTokType)) {
generateErrorMessage();
return; // must return here to avoid reverting the inputIdx
}
else if (lookAheadFunc.call(this)) {
// we skipped enough tokens so we can resync right back into another iteration of the repetition grammar rule
generateErrorMessage();
// recursive invocation in other to support multiple re-syncs in the same top level repetition grammar rule
grammarRule.apply(this, grammarRuleArgs);
return; // must return here to avoid reverting the inputIdx
}
else if (this.tokenMatcher(currToken, reSyncTokType)) {
passedResyncPoint = true;
}
else {
currToken = this.SKIP_TOKEN();
this.addToResyncTokens(currToken, resyncedTokens);
}
}
// we were unable to find a CLOSER point to resync inside the Repetition, reset the state.
// The parsing exception we were trying to prevent will happen in the NEXT parsing step. it may be handled by
// "between rules" resync recovery later in the flow.
this.importLexerState(savedLexerState);
}
shouldInRepetitionRecoveryBeTried(expectTokAfterLastMatch, nextTokIdx, notStuck) {
// Edge case of arriving from a MANY repetition which is stuck
// Attempting recovery in this case could cause an infinite loop
if (notStuck === false) {
return false;
}
// no need to recover, next token is what we expect...
if (this.tokenMatcher(this.LA_FAST(1), expectTokAfterLastMatch)) {
return false;
}
// error recovery is disabled during backtracking as it can make the parser ignore a valid grammar path
// and prefer some backtracking path that includes recovered errors.
if (this.isBackTracking()) {
return false;
}
// if we can perform inRule recovery (single token insertion or deletion) we always prefer that recovery algorithm
// because if it works, it makes the least amount of changes to the input stream (greedy algorithm)
//noinspection RedundantIfStatementJS
if (this.canPerformInRuleRecovery(expectTokAfterLastMatch, this.getFollowsForInRuleRecovery(expectTokAfterLastMatch, nextTokIdx))) {
return false;
}
return true;
}
// TODO: should this be a member method or a utility? it does not have any state or usage of 'this'...
// TODO: should this be more explicitly part of the public API?
getNextPossibleTokenTypes(grammarPath) {
const topRuleName = grammarPath.ruleStack[0];
const gastProductions = this.getGAstProductions();
const topProduction = gastProductions[topRuleName];
const nextPossibleTokenTypes = new NextAfterTokenWalker(topProduction, grammarPath).startWalking();
return nextPossibleTokenTypes;
}
// Error Recovery functionality
getFollowsForInRuleRecovery(tokType, tokIdxInRule) {
const grammarPath = this.getCurrentGrammarPath(tokType, tokIdxInRule);
const follows = this.getNextPossibleTokenTypes(grammarPath);
return follows;
}
tryInRuleRecovery(expectedTokType, follows) {
if (this.canRecoverWithSingleTokenInsertion(expectedTokType, follows)) {
const tokToInsert = this.getTokenToInsert(expectedTokType);
return tokToInsert;
}
if (this.canRecoverWithSingleTokenDeletion(expectedTokType)) {
const nextTok = this.SKIP_TOKEN();
this.consumeToken();
return nextTok;
}
throw new InRuleRecoveryException("sad sad panda");
}
canPerformInRuleRecovery(expectedToken, follows) {
return (this.canRecoverWithSingleTokenInsertion(expectedToken, follows) ||
this.canRecoverWithSingleTokenDeletion(expectedToken));
}
canRecoverWithSingleTokenInsertion(expectedTokType, follows) {
if (!this.canTokenTypeBeInsertedInRecovery(expectedTokType)) {
return false;
}
// must know the possible following tokens to perform single token insertion
if (follows.length === 0) {
return false;
}
const mismatchedTok = this.LA_FAST(1);
const isMisMatchedTokInFollows = follows.find((possibleFollowsTokType) => {
return this.tokenMatcher(mismatchedTok, possibleFollowsTokType);
}) !== undefined;
return isMisMatchedTokInFollows;
}
canRecoverWithSingleTokenDeletion(expectedTokType) {
if (!this.canTokenTypeBeDeletedInRecovery(expectedTokType)) {
return false;
}
const isNextTokenWhatIsExpected = this.tokenMatcher(
// not using LA_FAST because LA(2) might be un-safe with maxLookahead=1
// in some edge cases (?)
this.LA(2), expectedTokType);
return isNextTokenWhatIsExpected;
}
isInCurrentRuleReSyncSet(tokenTypeIdx) {
const followKey = this.getCurrFollowKey();
const currentRuleReSyncSet = this.getFollowSetFromFollowKey(followKey);
return currentRuleReSyncSet.includes(tokenTypeIdx);
}
findReSyncTokenType() {
const allPossibleReSyncTokTypes = this.flattenFollowSet();
// this loop will always terminate as EOF is always in the follow stack and also always (virtually) in the input
let nextToken = this.LA_FAST(1);
let k = 2;
while (true) {
const foundMatch = allPossibleReSyncTokTypes.find((resyncTokType) => {
const canMatch = tokenMatcher(nextToken, resyncTokType);
return canMatch;
});
if (foundMatch !== undefined) {
return foundMatch;
}
nextToken = this.LA(k);
k++;
}
}
getCurrFollowKey() {
// the length is at least one as we always add the ruleName to the stack before invoking the rule.
if (this.RULE_STACK_IDX === 0) {
return EOF_FOLLOW_KEY;
}
const currRuleShortName = this.currRuleShortName;
const currRuleIdx = this.getLastExplicitRuleOccurrenceIndex();
const prevRuleShortName = this.getPreviousExplicitRuleShortName();
return {
ruleName: this.shortRuleNameToFullName(currRuleShortName),
idxInCallingRule: currRuleIdx,
inRule: this.shortRuleNameToFullName(prevRuleShortName),
};
}
buildFullFollowKeyStack() {
const explicitRuleStack = this.RULE_STACK;
const explicitOccurrenceStack = this.RULE_OCCURRENCE_STACK;
const len = this.RULE_STACK_IDX + 1;
const result = new Array(len);
for (let idx = 0; idx < len; idx++) {
if (idx === 0) {
result[idx] = EOF_FOLLOW_KEY;
}
else {
result[idx] = {
ruleName: this.shortRuleNameToFullName(explicitRuleStack[idx]),
idxInCallingRule: explicitOccurrenceStack[idx],
inRule: this.shortRuleNameToFullName(explicitRuleStack[idx - 1]),
};
}
}
return result;
}
flattenFollowSet() {
const followStack = this.buildFullFollowKeyStack().map((currKey) => {
return this.getFollowSetFromFollowKey(currKey);
});
return followStack.flat();
}
getFollowSetFromFollowKey(followKey) {
if (followKey === EOF_FOLLOW_KEY) {
return [EOF];
}
const followName = followKey.ruleName + followKey.idxInCallingRule + IN + followKey.inRule;
return this.resyncFollows[followName];
}
// It does not make any sense to include a virtual EOF token in the list of resynced tokens
// as EOF does not really exist and thus does not contain any useful information (line/column numbers)
addToResyncTokens(token, resyncTokens) {
if (!this.tokenMatcher(token, EOF)) {
resyncTokens.push(token);
}
return resyncTokens;
}
reSyncTo(tokType) {
const resyncedTokens = [];
let nextTok = this.LA_FAST(1);
while (this.tokenMatcher(nextTok, tokType) === false) {
nextTok = this.SKIP_TOKEN();
this.addToResyncTokens(nextTok, resyncedTokens);
}
// the last token is not part of the error.
return resyncedTokens.slice(0, -1);
}
attemptInRepetitionRecovery(prodFunc, args, lookaheadFunc, dslMethodIdx, prodOccurrence, nextToksWalker, notStuck) {
// by default this is a NO-OP
// The actual implementation is with the function(not method) below
}
getCurrentGrammarPath(tokType, tokIdxInRule) {
const pathRuleStack = this.getHumanReadableRuleStack();
const pathOccurrenceStack = this.RULE_OCCURRENCE_STACK.slice(0, this.RULE_OCCURRENCE_STACK_IDX + 1);
const grammarPath = {
ruleStack: pathRuleStack,
occurrenceStack: pathOccurrenceStack,
lastTok: tokType,
lastTokOccurrence: tokIdxInRule,
};
return grammarPath;
}
getHumanReadableRuleStack() {
const len = this.RULE_STACK_IDX + 1;
const result = new Array(len);
for (let i = 0; i < len; i++) {
result[i] = this.shortRuleNameToFullName(this.RULE_STACK[i]);
}
return result;
}
}
export function attemptInRepetitionRecovery(prodFunc, args, lookaheadFunc, dslMethodIdx, prodOccurrence, nextToksWalker, notStuck) {
const key = this.getKeyForAutomaticLookahead(dslMethodIdx, prodOccurrence);
let firstAfterRepInfo = this.firstAfterRepMap[key];
if (firstAfterRepInfo === undefined) {
const currRuleName = this.getCurrRuleFullName();
const ruleGrammar = this.getGAstProductions()[currRuleName];
const walker = new nextToksWalker(ruleGrammar, prodOccurrence);
firstAfterRepInfo = walker.startWalking();
this.firstAfterRepMap[key] = firstAfterRepInfo;
}
let expectTokAfterLastMatch = firstAfterRepInfo.token;
let nextTokIdx = firstAfterRepInfo.occurrence;
const isEndOfRule = firstAfterRepInfo.isEndOfRule;
// special edge case of a TOP most repetition after which the input should END.
// this will force an attempt for inRule recovery in that scenario.
if (this.RULE_STACK_IDX === 0 &&
isEndOfRule &&
expectTokAfterLastMatch === undefined) {
expectTokAfterLastMatch = EOF;
nextTokIdx = 1;
}
// We don't have anything to re-sync to...
// this condition was extracted from `shouldInRepetitionRecoveryBeTried` to act as a type-guard
if (expectTokAfterLastMatch === undefined || nextTokIdx === undefined) {
return;
}
if (this.shouldInRepetitionRecoveryBeTried(expectTokAfterLastMatch, nextTokIdx, notStuck)) {
// TODO: performance optimization: instead of passing the original args here, we modify
// the args param (or create a new one) and make sure the lookahead func is explicitly provided
// to avoid searching the cache for it once more.
this.tryInRepetitionRecovery(prodFunc, args, lookaheadFunc, expectTokAfterLastMatch);
}
}
//# sourceMappingURL=recoverable.js.map