UNPKG

chevrotain

Version:

Chevrotain is a high performance fault tolerant javascript parsing DSL for building recursive decent parsers

chevrotain.io/docs/

Chevrotain/chevrotain

312 lines • 14.5 kB

JavaScript

import { createTokenInstance, EOF, tokenMatcher, } from "../../../scan/tokens_public.js"; import { NextAfterTokenWalker, } from "../../grammar/interpreter.js"; import { MismatchedTokenException } from "../../exceptions_public.js"; import { IN } from "../../constants.js"; import { DEFAULT_PARSER_CONFIG } from "../parser.js"; export const EOF_FOLLOW_KEY = {}; export const IN_RULE_RECOVERY_EXCEPTION = "InRuleRecoveryException"; export class InRuleRecoveryException extends Error { constructor(message) { super(message); this.name = IN_RULE_RECOVERY_EXCEPTION; } } /** * This trait is responsible for the error recovery and fault tolerant logic */ export class Recoverable { initRecoverable(config) { this.firstAfterRepMap = {}; this.resyncFollows = {}; this.recoveryEnabled = Object.hasOwn(config, "recoveryEnabled") ? config.recoveryEnabled // assumes end user provides the correct config value/type : DEFAULT_PARSER_CONFIG.recoveryEnabled; // performance optimization, NOOP will be inlined which // effectively means that this optional feature does not exist // when not used. if (this.recoveryEnabled) { this.attemptInRepetitionRecovery = attemptInRepetitionRecovery; } } getTokenToInsert(tokType) { const tokToInsert = createTokenInstance(tokType, "", NaN, NaN, NaN, NaN, NaN, NaN); tokToInsert.isInsertedInRecovery = true; return tokToInsert; } canTokenTypeBeInsertedInRecovery(tokType) { return true; } canTokenTypeBeDeletedInRecovery(tokType) { return true; } tryInRepetitionRecovery(grammarRule, grammarRuleArgs, lookAheadFunc, expectedTokType) { // TODO: can the resyncTokenType be cached? const reSyncTokType = this.findReSyncTokenType(); const savedLexerState = this.exportLexerState(); const resyncedTokens = []; let passedResyncPoint = false; const nextTokenWithoutResync = this.LA_FAST(1); let currToken = this.LA_FAST(1); const generateErrorMessage = () => { const previousToken = this.LA(0); // we are preemptively re-syncing before an error has been detected, therefor we must reproduce // the error that would have been thrown const msg = this.errorMessageProvider.buildMismatchTokenMessage({ expected: expectedTokType, actual: nextTokenWithoutResync, previous: previousToken, ruleName: this.getCurrRuleFullName(), }); const error = new MismatchedTokenException(msg, nextTokenWithoutResync, this.LA(0)); // the first token here will be the original cause of the error, this is not part of the resyncedTokens property. error.resyncedTokens = resyncedTokens.slice(0, -1); this.SAVE_ERROR(error); }; while (!passedResyncPoint) { // re-synced to a point where we can safely exit the repetition/ if (this.tokenMatcher(currToken, expectedTokType)) { generateErrorMessage(); return; // must return here to avoid reverting the inputIdx } else if (lookAheadFunc.call(this)) { // we skipped enough tokens so we can resync right back into another iteration of the repetition grammar rule generateErrorMessage(); // recursive invocation in other to support multiple re-syncs in the same top level repetition grammar rule grammarRule.apply(this, grammarRuleArgs); return; // must return here to avoid reverting the inputIdx } else if (this.tokenMatcher(currToken, reSyncTokType)) { passedResyncPoint = true; } else { currToken = this.SKIP_TOKEN(); this.addToResyncTokens(currToken, resyncedTokens); } } // we were unable to find a CLOSER point to resync inside the Repetition, reset the state. // The parsing exception we were trying to prevent will happen in the NEXT parsing step. it may be handled by // "between rules" resync recovery later in the flow. this.importLexerState(savedLexerState); } shouldInRepetitionRecoveryBeTried(expectTokAfterLastMatch, nextTokIdx, notStuck) { // Edge case of arriving from a MANY repetition which is stuck // Attempting recovery in this case could cause an infinite loop if (notStuck === false) { return false; } // no need to recover, next token is what we expect... if (this.tokenMatcher(this.LA_FAST(1), expectTokAfterLastMatch)) { return false; } // error recovery is disabled during backtracking as it can make the parser ignore a valid grammar path // and prefer some backtracking path that includes recovered errors. if (this.isBackTracking()) { return false; } // if we can perform inRule recovery (single token insertion or deletion) we always prefer that recovery algorithm // because if it works, it makes the least amount of changes to the input stream (greedy algorithm) //noinspection RedundantIfStatementJS if (this.canPerformInRuleRecovery(expectTokAfterLastMatch, this.getFollowsForInRuleRecovery(expectTokAfterLastMatch, nextTokIdx))) { return false; } return true; } // TODO: should this be a member method or a utility? it does not have any state or usage of 'this'... // TODO: should this be more explicitly part of the public API? getNextPossibleTokenTypes(grammarPath) { const topRuleName = grammarPath.ruleStack[0]; const gastProductions = this.getGAstProductions(); const topProduction = gastProductions[topRuleName]; const nextPossibleTokenTypes = new NextAfterTokenWalker(topProduction, grammarPath).startWalking(); return nextPossibleTokenTypes; } // Error Recovery functionality getFollowsForInRuleRecovery(tokType, tokIdxInRule) { const grammarPath = this.getCurrentGrammarPath(tokType, tokIdxInRule); const follows = this.getNextPossibleTokenTypes(grammarPath); return follows; } tryInRuleRecovery(expectedTokType, follows) { if (this.canRecoverWithSingleTokenInsertion(expectedTokType, follows)) { const tokToInsert = this.getTokenToInsert(expectedTokType); return tokToInsert; } if (this.canRecoverWithSingleTokenDeletion(expectedTokType)) { const nextTok = this.SKIP_TOKEN(); this.consumeToken(); return nextTok; } throw new InRuleRecoveryException("sad sad panda"); } canPerformInRuleRecovery(expectedToken, follows) { return (this.canRecoverWithSingleTokenInsertion(expectedToken, follows) || this.canRecoverWithSingleTokenDeletion(expectedToken)); } canRecoverWithSingleTokenInsertion(expectedTokType, follows) { if (!this.canTokenTypeBeInsertedInRecovery(expectedTokType)) { return false; } // must know the possible following tokens to perform single token insertion if (follows.length === 0) { return false; } const mismatchedTok = this.LA_FAST(1); const isMisMatchedTokInFollows = follows.find((possibleFollowsTokType) => { return this.tokenMatcher(mismatchedTok, possibleFollowsTokType); }) !== undefined; return isMisMatchedTokInFollows; } canRecoverWithSingleTokenDeletion(expectedTokType) { if (!this.canTokenTypeBeDeletedInRecovery(expectedTokType)) { return false; } const isNextTokenWhatIsExpected = this.tokenMatcher( // not using LA_FAST because LA(2) might be un-safe with maxLookahead=1 // in some edge cases (?) this.LA(2), expectedTokType); return isNextTokenWhatIsExpected; } isInCurrentRuleReSyncSet(tokenTypeIdx) { const followKey = this.getCurrFollowKey(); const currentRuleReSyncSet = this.getFollowSetFromFollowKey(followKey); return currentRuleReSyncSet.includes(tokenTypeIdx); } findReSyncTokenType() { const allPossibleReSyncTokTypes = this.flattenFollowSet(); // this loop will always terminate as EOF is always in the follow stack and also always (virtually) in the input let nextToken = this.LA_FAST(1); let k = 2; while (true) { const foundMatch = allPossibleReSyncTokTypes.find((resyncTokType) => { const canMatch = tokenMatcher(nextToken, resyncTokType); return canMatch; }); if (foundMatch !== undefined) { return foundMatch; } nextToken = this.LA(k); k++; } } getCurrFollowKey() { // the length is at least one as we always add the ruleName to the stack before invoking the rule. if (this.RULE_STACK_IDX === 0) { return EOF_FOLLOW_KEY; } const currRuleShortName = this.currRuleShortName; const currRuleIdx = this.getLastExplicitRuleOccurrenceIndex(); const prevRuleShortName = this.getPreviousExplicitRuleShortName(); return { ruleName: this.shortRuleNameToFullName(currRuleShortName), idxInCallingRule: currRuleIdx, inRule: this.shortRuleNameToFullName(prevRuleShortName), }; } buildFullFollowKeyStack() { const explicitRuleStack = this.RULE_STACK; const explicitOccurrenceStack = this.RULE_OCCURRENCE_STACK; const len = this.RULE_STACK_IDX + 1; const result = new Array(len); for (let idx = 0; idx < len; idx++) { if (idx === 0) { result[idx] = EOF_FOLLOW_KEY; } else { result[idx] = { ruleName: this.shortRuleNameToFullName(explicitRuleStack[idx]), idxInCallingRule: explicitOccurrenceStack[idx], inRule: this.shortRuleNameToFullName(explicitRuleStack[idx - 1]), }; } } return result; } flattenFollowSet() { const followStack = this.buildFullFollowKeyStack().map((currKey) => { return this.getFollowSetFromFollowKey(currKey); }); return followStack.flat(); } getFollowSetFromFollowKey(followKey) { if (followKey === EOF_FOLLOW_KEY) { return [EOF]; } const followName = followKey.ruleName + followKey.idxInCallingRule + IN + followKey.inRule; return this.resyncFollows[followName]; } // It does not make any sense to include a virtual EOF token in the list of resynced tokens // as EOF does not really exist and thus does not contain any useful information (line/column numbers) addToResyncTokens(token, resyncTokens) { if (!this.tokenMatcher(token, EOF)) { resyncTokens.push(token); } return resyncTokens; } reSyncTo(tokType) { const resyncedTokens = []; let nextTok = this.LA_FAST(1); while (this.tokenMatcher(nextTok, tokType) === false) { nextTok = this.SKIP_TOKEN(); this.addToResyncTokens(nextTok, resyncedTokens); } // the last token is not part of the error. return resyncedTokens.slice(0, -1); } attemptInRepetitionRecovery(prodFunc, args, lookaheadFunc, dslMethodIdx, prodOccurrence, nextToksWalker, notStuck) { // by default this is a NO-OP // The actual implementation is with the function(not method) below } getCurrentGrammarPath(tokType, tokIdxInRule) { const pathRuleStack = this.getHumanReadableRuleStack(); const pathOccurrenceStack = this.RULE_OCCURRENCE_STACK.slice(0, this.RULE_OCCURRENCE_STACK_IDX + 1); const grammarPath = { ruleStack: pathRuleStack, occurrenceStack: pathOccurrenceStack, lastTok: tokType, lastTokOccurrence: tokIdxInRule, }; return grammarPath; } getHumanReadableRuleStack() { const len = this.RULE_STACK_IDX + 1; const result = new Array(len); for (let i = 0; i < len; i++) { result[i] = this.shortRuleNameToFullName(this.RULE_STACK[i]); } return result; } } export function attemptInRepetitionRecovery(prodFunc, args, lookaheadFunc, dslMethodIdx, prodOccurrence, nextToksWalker, notStuck) { const key = this.getKeyForAutomaticLookahead(dslMethodIdx, prodOccurrence); let firstAfterRepInfo = this.firstAfterRepMap[key]; if (firstAfterRepInfo === undefined) { const currRuleName = this.getCurrRuleFullName(); const ruleGrammar = this.getGAstProductions()[currRuleName]; const walker = new nextToksWalker(ruleGrammar, prodOccurrence); firstAfterRepInfo = walker.startWalking(); this.firstAfterRepMap[key] = firstAfterRepInfo; } let expectTokAfterLastMatch = firstAfterRepInfo.token; let nextTokIdx = firstAfterRepInfo.occurrence; const isEndOfRule = firstAfterRepInfo.isEndOfRule; // special edge case of a TOP most repetition after which the input should END. // this will force an attempt for inRule recovery in that scenario. if (this.RULE_STACK_IDX === 0 && isEndOfRule && expectTokAfterLastMatch === undefined) { expectTokAfterLastMatch = EOF; nextTokIdx = 1; } // We don't have anything to re-sync to... // this condition was extracted from `shouldInRepetitionRecoveryBeTried` to act as a type-guard if (expectTokAfterLastMatch === undefined || nextTokIdx === undefined) { return; } if (this.shouldInRepetitionRecoveryBeTried(expectTokAfterLastMatch, nextTokIdx, notStuck)) { // TODO: performance optimization: instead of passing the original args here, we modify // the args param (or create a new one) and make sure the lookahead func is explicitly provided // to avoid searching the cache for it once more. this.tryInRepetitionRecovery(prodFunc, args, lookaheadFunc, expectTokAfterLastMatch); } } //# sourceMappingURL=recoverable.js.map