UNPKG

chevrotain

Version:

Chevrotain is a high performance fault tolerant javascript parsing DSL for building recursive decent parsers

290 lines 14.1 kB
import { createTokenInstance, EOF } from "../../../scan/tokens_public"; import { cloneArr, contains, dropRight, find, flatten, has, isEmpty, map } from "../../../utils/utils"; import { MismatchedTokenException } from "../../exceptions_public"; import { IN } from "../../constants"; import { DEFAULT_PARSER_CONFIG } from "../parser"; export var EOF_FOLLOW_KEY = {}; export var IN_RULE_RECOVERY_EXCEPTION = "InRuleRecoveryException"; export function InRuleRecoveryException(message) { this.name = IN_RULE_RECOVERY_EXCEPTION; this.message = message; } InRuleRecoveryException.prototype = Error.prototype; /** * This trait is responsible for the error recovery and fault tolerant logic */ var Recoverable = /** @class */ (function () { function Recoverable() { } Recoverable.prototype.initRecoverable = function (config) { this.firstAfterRepMap = {}; this.resyncFollows = {}; this.recoveryEnabled = has(config, "recoveryEnabled") ? config.recoveryEnabled : DEFAULT_PARSER_CONFIG.recoveryEnabled; // performance optimization, NOOP will be inlined which // effectively means that this optional feature does not exist // when not used. if (this.recoveryEnabled) { this.attemptInRepetitionRecovery = attemptInRepetitionRecovery; } }; Recoverable.prototype.getTokenToInsert = function (tokType) { var tokToInsert = createTokenInstance(tokType, "", NaN, NaN, NaN, NaN, NaN, NaN); tokToInsert.isInsertedInRecovery = true; return tokToInsert; }; Recoverable.prototype.canTokenTypeBeInsertedInRecovery = function (tokType) { return true; }; Recoverable.prototype.tryInRepetitionRecovery = function (grammarRule, grammarRuleArgs, lookAheadFunc, expectedTokType) { var _this = this; // TODO: can the resyncTokenType be cached? var reSyncTokType = this.findReSyncTokenType(); var savedLexerState = this.exportLexerState(); var resyncedTokens = []; var passedResyncPoint = false; var nextTokenWithoutResync = this.LA(1); var currToken = this.LA(1); var generateErrorMessage = function () { var previousToken = _this.LA(0); // we are preemptively re-syncing before an error has been detected, therefor we must reproduce // the error that would have been thrown var msg = _this.errorMessageProvider.buildMismatchTokenMessage({ expected: expectedTokType, actual: nextTokenWithoutResync, previous: previousToken, ruleName: _this.getCurrRuleFullName() }); var error = new MismatchedTokenException(msg, nextTokenWithoutResync, _this.LA(0)); // the first token here will be the original cause of the error, this is not part of the resyncedTokens property. error.resyncedTokens = dropRight(resyncedTokens); _this.SAVE_ERROR(error); }; while (!passedResyncPoint) { // re-synced to a point where we can safely exit the repetition/ if (this.tokenMatcher(currToken, expectedTokType)) { generateErrorMessage(); return; // must return here to avoid reverting the inputIdx } else if (lookAheadFunc.call(this)) { // we skipped enough tokens so we can resync right back into another iteration of the repetition grammar rule generateErrorMessage(); // recursive invocation in other to support multiple re-syncs in the same top level repetition grammar rule grammarRule.apply(this, grammarRuleArgs); return; // must return here to avoid reverting the inputIdx } else if (this.tokenMatcher(currToken, reSyncTokType)) { passedResyncPoint = true; } else { currToken = this.SKIP_TOKEN(); this.addToResyncTokens(currToken, resyncedTokens); } } // we were unable to find a CLOSER point to resync inside the Repetition, reset the state. // The parsing exception we were trying to prevent will happen in the NEXT parsing step. it may be handled by // "between rules" resync recovery later in the flow. this.importLexerState(savedLexerState); }; Recoverable.prototype.shouldInRepetitionRecoveryBeTried = function (expectTokAfterLastMatch, nextTokIdx, notStuck) { // Edge case of arriving from a MANY repetition which is stuck // Attempting recovery in this case could cause an infinite loop if (notStuck === false) { return false; } // arguments to try and perform resync into the next iteration of the many are missing if (expectTokAfterLastMatch === undefined || nextTokIdx === undefined) { return false; } // no need to recover, next token is what we expect... if (this.tokenMatcher(this.LA(1), expectTokAfterLastMatch)) { return false; } // error recovery is disabled during backtracking as it can make the parser ignore a valid grammar path // and prefer some backtracking path that includes recovered errors. if (this.isBackTracking()) { return false; } // if we can perform inRule recovery (single token insertion or deletion) we always prefer that recovery algorithm // because if it works, it makes the least amount of changes to the input stream (greedy algorithm) //noinspection RedundantIfStatementJS if (this.canPerformInRuleRecovery(expectTokAfterLastMatch, this.getFollowsForInRuleRecovery(expectTokAfterLastMatch, nextTokIdx))) { return false; } return true; }; // Error Recovery functionality Recoverable.prototype.getFollowsForInRuleRecovery = function (tokType, tokIdxInRule) { var grammarPath = this.getCurrentGrammarPath(tokType, tokIdxInRule); var follows = this.getNextPossibleTokenTypes(grammarPath); return follows; }; Recoverable.prototype.tryInRuleRecovery = function (expectedTokType, follows) { if (this.canRecoverWithSingleTokenInsertion(expectedTokType, follows)) { var tokToInsert = this.getTokenToInsert(expectedTokType); return tokToInsert; } if (this.canRecoverWithSingleTokenDeletion(expectedTokType)) { var nextTok = this.SKIP_TOKEN(); this.consumeToken(); return nextTok; } throw new InRuleRecoveryException("sad sad panda"); }; Recoverable.prototype.canPerformInRuleRecovery = function (expectedToken, follows) { return (this.canRecoverWithSingleTokenInsertion(expectedToken, follows) || this.canRecoverWithSingleTokenDeletion(expectedToken)); }; Recoverable.prototype.canRecoverWithSingleTokenInsertion = function (expectedTokType, follows) { var _this = this; if (!this.canTokenTypeBeInsertedInRecovery(expectedTokType)) { return false; } // must know the possible following tokens to perform single token insertion if (isEmpty(follows)) { return false; } var mismatchedTok = this.LA(1); var isMisMatchedTokInFollows = find(follows, function (possibleFollowsTokType) { return _this.tokenMatcher(mismatchedTok, possibleFollowsTokType); }) !== undefined; return isMisMatchedTokInFollows; }; Recoverable.prototype.canRecoverWithSingleTokenDeletion = function (expectedTokType) { var isNextTokenWhatIsExpected = this.tokenMatcher(this.LA(2), expectedTokType); return isNextTokenWhatIsExpected; }; Recoverable.prototype.isInCurrentRuleReSyncSet = function (tokenTypeIdx) { var followKey = this.getCurrFollowKey(); var currentRuleReSyncSet = this.getFollowSetFromFollowKey(followKey); return contains(currentRuleReSyncSet, tokenTypeIdx); }; Recoverable.prototype.findReSyncTokenType = function () { var allPossibleReSyncTokTypes = this.flattenFollowSet(); // this loop will always terminate as EOF is always in the follow stack and also always (virtually) in the input var nextToken = this.LA(1); var k = 2; while (true) { var nextTokenType = nextToken.tokenType; if (contains(allPossibleReSyncTokTypes, nextTokenType)) { return nextTokenType; } nextToken = this.LA(k); k++; } }; Recoverable.prototype.getCurrFollowKey = function () { // the length is at least one as we always add the ruleName to the stack before invoking the rule. if (this.RULE_STACK.length === 1) { return EOF_FOLLOW_KEY; } var currRuleShortName = this.getLastExplicitRuleShortName(); var currRuleIdx = this.getLastExplicitRuleOccurrenceIndex(); var prevRuleShortName = this.getPreviousExplicitRuleShortName(); return { ruleName: this.shortRuleNameToFullName(currRuleShortName), idxInCallingRule: currRuleIdx, inRule: this.shortRuleNameToFullName(prevRuleShortName) }; }; Recoverable.prototype.buildFullFollowKeyStack = function () { var _this = this; var explicitRuleStack = this.RULE_STACK; var explicitOccurrenceStack = this.RULE_OCCURRENCE_STACK; return map(explicitRuleStack, function (ruleName, idx) { if (idx === 0) { return EOF_FOLLOW_KEY; } return { ruleName: _this.shortRuleNameToFullName(ruleName), idxInCallingRule: explicitOccurrenceStack[idx], inRule: _this.shortRuleNameToFullName(explicitRuleStack[idx - 1]) }; }); }; Recoverable.prototype.flattenFollowSet = function () { var _this = this; var followStack = map(this.buildFullFollowKeyStack(), function (currKey) { return _this.getFollowSetFromFollowKey(currKey); }); return flatten(followStack); }; Recoverable.prototype.getFollowSetFromFollowKey = function (followKey) { if (followKey === EOF_FOLLOW_KEY) { return [EOF]; } var followName = followKey.ruleName + followKey.idxInCallingRule + IN + followKey.inRule; return this.resyncFollows[followName]; }; // It does not make any sense to include a virtual EOF token in the list of resynced tokens // as EOF does not really exist and thus does not contain any useful information (line/column numbers) Recoverable.prototype.addToResyncTokens = function (token, resyncTokens) { if (!this.tokenMatcher(token, EOF)) { resyncTokens.push(token); } return resyncTokens; }; Recoverable.prototype.reSyncTo = function (tokType) { var resyncedTokens = []; var nextTok = this.LA(1); while (this.tokenMatcher(nextTok, tokType) === false) { nextTok = this.SKIP_TOKEN(); this.addToResyncTokens(nextTok, resyncedTokens); } // the last token is not part of the error. return dropRight(resyncedTokens); }; Recoverable.prototype.attemptInRepetitionRecovery = function (prodFunc, args, lookaheadFunc, dslMethodIdx, prodOccurrence, nextToksWalker, notStuck) { // by default this is a NO-OP // The actual implementation is with the function(not method) below }; Recoverable.prototype.getCurrentGrammarPath = function (tokType, tokIdxInRule) { var pathRuleStack = this.getHumanReadableRuleStack(); var pathOccurrenceStack = cloneArr(this.RULE_OCCURRENCE_STACK); var grammarPath = { ruleStack: pathRuleStack, occurrenceStack: pathOccurrenceStack, lastTok: tokType, lastTokOccurrence: tokIdxInRule }; return grammarPath; }; Recoverable.prototype.getHumanReadableRuleStack = function () { var _this = this; return map(this.RULE_STACK, function (currShortName) { return _this.shortRuleNameToFullName(currShortName); }); }; return Recoverable; }()); export { Recoverable }; export function attemptInRepetitionRecovery(prodFunc, args, lookaheadFunc, dslMethodIdx, prodOccurrence, nextToksWalker, notStuck) { var key = this.getKeyForAutomaticLookahead(dslMethodIdx, prodOccurrence); var firstAfterRepInfo = this.firstAfterRepMap[key]; if (firstAfterRepInfo === undefined) { var currRuleName = this.getCurrRuleFullName(); var ruleGrammar = this.getGAstProductions()[currRuleName]; var walker = new nextToksWalker(ruleGrammar, prodOccurrence); firstAfterRepInfo = walker.startWalking(); this.firstAfterRepMap[key] = firstAfterRepInfo; } var expectTokAfterLastMatch = firstAfterRepInfo.token; var nextTokIdx = firstAfterRepInfo.occurrence; var isEndOfRule = firstAfterRepInfo.isEndOfRule; // special edge case of a TOP most repetition after which the input should END. // this will force an attempt for inRule recovery in that scenario. if (this.RULE_STACK.length === 1 && isEndOfRule && expectTokAfterLastMatch === undefined) { expectTokAfterLastMatch = EOF; nextTokIdx = 1; } if (this.shouldInRepetitionRecoveryBeTried(expectTokAfterLastMatch, nextTokIdx, notStuck)) { // TODO: performance optimization: instead of passing the original args here, we modify // the args param (or create a new one) and make sure the lookahead func is explicitly provided // to avoid searching the cache for it once more. this.tryInRepetitionRecovery(prodFunc, args, lookaheadFunc, expectTokAfterLastMatch); } } //# sourceMappingURL=recoverable.js.map