chevrotain
Version:
Chevrotain is a high performance fault tolerant javascript parsing DSL for building recursive decent parsers
290 lines • 14.1 kB
JavaScript
import { createTokenInstance, EOF } from "../../../scan/tokens_public";
import { cloneArr, contains, dropRight, find, flatten, has, isEmpty, map } from "../../../utils/utils";
import { MismatchedTokenException } from "../../exceptions_public";
import { IN } from "../../constants";
import { DEFAULT_PARSER_CONFIG } from "../parser";
export var EOF_FOLLOW_KEY = {};
export var IN_RULE_RECOVERY_EXCEPTION = "InRuleRecoveryException";
export function InRuleRecoveryException(message) {
this.name = IN_RULE_RECOVERY_EXCEPTION;
this.message = message;
}
InRuleRecoveryException.prototype = Error.prototype;
/**
* This trait is responsible for the error recovery and fault tolerant logic
*/
var Recoverable = /** @class */ (function () {
function Recoverable() {
}
Recoverable.prototype.initRecoverable = function (config) {
this.firstAfterRepMap = {};
this.resyncFollows = {};
this.recoveryEnabled = has(config, "recoveryEnabled")
? config.recoveryEnabled
: DEFAULT_PARSER_CONFIG.recoveryEnabled;
// performance optimization, NOOP will be inlined which
// effectively means that this optional feature does not exist
// when not used.
if (this.recoveryEnabled) {
this.attemptInRepetitionRecovery = attemptInRepetitionRecovery;
}
};
Recoverable.prototype.getTokenToInsert = function (tokType) {
var tokToInsert = createTokenInstance(tokType, "", NaN, NaN, NaN, NaN, NaN, NaN);
tokToInsert.isInsertedInRecovery = true;
return tokToInsert;
};
Recoverable.prototype.canTokenTypeBeInsertedInRecovery = function (tokType) {
return true;
};
Recoverable.prototype.tryInRepetitionRecovery = function (grammarRule, grammarRuleArgs, lookAheadFunc, expectedTokType) {
var _this = this;
// TODO: can the resyncTokenType be cached?
var reSyncTokType = this.findReSyncTokenType();
var savedLexerState = this.exportLexerState();
var resyncedTokens = [];
var passedResyncPoint = false;
var nextTokenWithoutResync = this.LA(1);
var currToken = this.LA(1);
var generateErrorMessage = function () {
var previousToken = _this.LA(0);
// we are preemptively re-syncing before an error has been detected, therefor we must reproduce
// the error that would have been thrown
var msg = _this.errorMessageProvider.buildMismatchTokenMessage({
expected: expectedTokType,
actual: nextTokenWithoutResync,
previous: previousToken,
ruleName: _this.getCurrRuleFullName()
});
var error = new MismatchedTokenException(msg, nextTokenWithoutResync, _this.LA(0));
// the first token here will be the original cause of the error, this is not part of the resyncedTokens property.
error.resyncedTokens = dropRight(resyncedTokens);
_this.SAVE_ERROR(error);
};
while (!passedResyncPoint) {
// re-synced to a point where we can safely exit the repetition/
if (this.tokenMatcher(currToken, expectedTokType)) {
generateErrorMessage();
return; // must return here to avoid reverting the inputIdx
}
else if (lookAheadFunc.call(this)) {
// we skipped enough tokens so we can resync right back into another iteration of the repetition grammar rule
generateErrorMessage();
// recursive invocation in other to support multiple re-syncs in the same top level repetition grammar rule
grammarRule.apply(this, grammarRuleArgs);
return; // must return here to avoid reverting the inputIdx
}
else if (this.tokenMatcher(currToken, reSyncTokType)) {
passedResyncPoint = true;
}
else {
currToken = this.SKIP_TOKEN();
this.addToResyncTokens(currToken, resyncedTokens);
}
}
// we were unable to find a CLOSER point to resync inside the Repetition, reset the state.
// The parsing exception we were trying to prevent will happen in the NEXT parsing step. it may be handled by
// "between rules" resync recovery later in the flow.
this.importLexerState(savedLexerState);
};
Recoverable.prototype.shouldInRepetitionRecoveryBeTried = function (expectTokAfterLastMatch, nextTokIdx, notStuck) {
// Edge case of arriving from a MANY repetition which is stuck
// Attempting recovery in this case could cause an infinite loop
if (notStuck === false) {
return false;
}
// arguments to try and perform resync into the next iteration of the many are missing
if (expectTokAfterLastMatch === undefined || nextTokIdx === undefined) {
return false;
}
// no need to recover, next token is what we expect...
if (this.tokenMatcher(this.LA(1), expectTokAfterLastMatch)) {
return false;
}
// error recovery is disabled during backtracking as it can make the parser ignore a valid grammar path
// and prefer some backtracking path that includes recovered errors.
if (this.isBackTracking()) {
return false;
}
// if we can perform inRule recovery (single token insertion or deletion) we always prefer that recovery algorithm
// because if it works, it makes the least amount of changes to the input stream (greedy algorithm)
//noinspection RedundantIfStatementJS
if (this.canPerformInRuleRecovery(expectTokAfterLastMatch, this.getFollowsForInRuleRecovery(expectTokAfterLastMatch, nextTokIdx))) {
return false;
}
return true;
};
// Error Recovery functionality
Recoverable.prototype.getFollowsForInRuleRecovery = function (tokType, tokIdxInRule) {
var grammarPath = this.getCurrentGrammarPath(tokType, tokIdxInRule);
var follows = this.getNextPossibleTokenTypes(grammarPath);
return follows;
};
Recoverable.prototype.tryInRuleRecovery = function (expectedTokType, follows) {
if (this.canRecoverWithSingleTokenInsertion(expectedTokType, follows)) {
var tokToInsert = this.getTokenToInsert(expectedTokType);
return tokToInsert;
}
if (this.canRecoverWithSingleTokenDeletion(expectedTokType)) {
var nextTok = this.SKIP_TOKEN();
this.consumeToken();
return nextTok;
}
throw new InRuleRecoveryException("sad sad panda");
};
Recoverable.prototype.canPerformInRuleRecovery = function (expectedToken, follows) {
return (this.canRecoverWithSingleTokenInsertion(expectedToken, follows) ||
this.canRecoverWithSingleTokenDeletion(expectedToken));
};
Recoverable.prototype.canRecoverWithSingleTokenInsertion = function (expectedTokType, follows) {
var _this = this;
if (!this.canTokenTypeBeInsertedInRecovery(expectedTokType)) {
return false;
}
// must know the possible following tokens to perform single token insertion
if (isEmpty(follows)) {
return false;
}
var mismatchedTok = this.LA(1);
var isMisMatchedTokInFollows = find(follows, function (possibleFollowsTokType) {
return _this.tokenMatcher(mismatchedTok, possibleFollowsTokType);
}) !== undefined;
return isMisMatchedTokInFollows;
};
Recoverable.prototype.canRecoverWithSingleTokenDeletion = function (expectedTokType) {
var isNextTokenWhatIsExpected = this.tokenMatcher(this.LA(2), expectedTokType);
return isNextTokenWhatIsExpected;
};
Recoverable.prototype.isInCurrentRuleReSyncSet = function (tokenTypeIdx) {
var followKey = this.getCurrFollowKey();
var currentRuleReSyncSet = this.getFollowSetFromFollowKey(followKey);
return contains(currentRuleReSyncSet, tokenTypeIdx);
};
Recoverable.prototype.findReSyncTokenType = function () {
var allPossibleReSyncTokTypes = this.flattenFollowSet();
// this loop will always terminate as EOF is always in the follow stack and also always (virtually) in the input
var nextToken = this.LA(1);
var k = 2;
while (true) {
var nextTokenType = nextToken.tokenType;
if (contains(allPossibleReSyncTokTypes, nextTokenType)) {
return nextTokenType;
}
nextToken = this.LA(k);
k++;
}
};
Recoverable.prototype.getCurrFollowKey = function () {
// the length is at least one as we always add the ruleName to the stack before invoking the rule.
if (this.RULE_STACK.length === 1) {
return EOF_FOLLOW_KEY;
}
var currRuleShortName = this.getLastExplicitRuleShortName();
var currRuleIdx = this.getLastExplicitRuleOccurrenceIndex();
var prevRuleShortName = this.getPreviousExplicitRuleShortName();
return {
ruleName: this.shortRuleNameToFullName(currRuleShortName),
idxInCallingRule: currRuleIdx,
inRule: this.shortRuleNameToFullName(prevRuleShortName)
};
};
Recoverable.prototype.buildFullFollowKeyStack = function () {
var _this = this;
var explicitRuleStack = this.RULE_STACK;
var explicitOccurrenceStack = this.RULE_OCCURRENCE_STACK;
return map(explicitRuleStack, function (ruleName, idx) {
if (idx === 0) {
return EOF_FOLLOW_KEY;
}
return {
ruleName: _this.shortRuleNameToFullName(ruleName),
idxInCallingRule: explicitOccurrenceStack[idx],
inRule: _this.shortRuleNameToFullName(explicitRuleStack[idx - 1])
};
});
};
Recoverable.prototype.flattenFollowSet = function () {
var _this = this;
var followStack = map(this.buildFullFollowKeyStack(), function (currKey) {
return _this.getFollowSetFromFollowKey(currKey);
});
return flatten(followStack);
};
Recoverable.prototype.getFollowSetFromFollowKey = function (followKey) {
if (followKey === EOF_FOLLOW_KEY) {
return [EOF];
}
var followName = followKey.ruleName + followKey.idxInCallingRule + IN + followKey.inRule;
return this.resyncFollows[followName];
};
// It does not make any sense to include a virtual EOF token in the list of resynced tokens
// as EOF does not really exist and thus does not contain any useful information (line/column numbers)
Recoverable.prototype.addToResyncTokens = function (token, resyncTokens) {
if (!this.tokenMatcher(token, EOF)) {
resyncTokens.push(token);
}
return resyncTokens;
};
Recoverable.prototype.reSyncTo = function (tokType) {
var resyncedTokens = [];
var nextTok = this.LA(1);
while (this.tokenMatcher(nextTok, tokType) === false) {
nextTok = this.SKIP_TOKEN();
this.addToResyncTokens(nextTok, resyncedTokens);
}
// the last token is not part of the error.
return dropRight(resyncedTokens);
};
Recoverable.prototype.attemptInRepetitionRecovery = function (prodFunc, args, lookaheadFunc, dslMethodIdx, prodOccurrence, nextToksWalker, notStuck) {
// by default this is a NO-OP
// The actual implementation is with the function(not method) below
};
Recoverable.prototype.getCurrentGrammarPath = function (tokType, tokIdxInRule) {
var pathRuleStack = this.getHumanReadableRuleStack();
var pathOccurrenceStack = cloneArr(this.RULE_OCCURRENCE_STACK);
var grammarPath = {
ruleStack: pathRuleStack,
occurrenceStack: pathOccurrenceStack,
lastTok: tokType,
lastTokOccurrence: tokIdxInRule
};
return grammarPath;
};
Recoverable.prototype.getHumanReadableRuleStack = function () {
var _this = this;
return map(this.RULE_STACK, function (currShortName) {
return _this.shortRuleNameToFullName(currShortName);
});
};
return Recoverable;
}());
export { Recoverable };
export function attemptInRepetitionRecovery(prodFunc, args, lookaheadFunc, dslMethodIdx, prodOccurrence, nextToksWalker, notStuck) {
var key = this.getKeyForAutomaticLookahead(dslMethodIdx, prodOccurrence);
var firstAfterRepInfo = this.firstAfterRepMap[key];
if (firstAfterRepInfo === undefined) {
var currRuleName = this.getCurrRuleFullName();
var ruleGrammar = this.getGAstProductions()[currRuleName];
var walker = new nextToksWalker(ruleGrammar, prodOccurrence);
firstAfterRepInfo = walker.startWalking();
this.firstAfterRepMap[key] = firstAfterRepInfo;
}
var expectTokAfterLastMatch = firstAfterRepInfo.token;
var nextTokIdx = firstAfterRepInfo.occurrence;
var isEndOfRule = firstAfterRepInfo.isEndOfRule;
// special edge case of a TOP most repetition after which the input should END.
// this will force an attempt for inRule recovery in that scenario.
if (this.RULE_STACK.length === 1 &&
isEndOfRule &&
expectTokAfterLastMatch === undefined) {
expectTokAfterLastMatch = EOF;
nextTokIdx = 1;
}
if (this.shouldInRepetitionRecoveryBeTried(expectTokAfterLastMatch, nextTokIdx, notStuck)) {
// TODO: performance optimization: instead of passing the original args here, we modify
// the args param (or create a new one) and make sure the lookahead func is explicitly provided
// to avoid searching the cache for it once more.
this.tryInRepetitionRecovery(prodFunc, args, lookaheadFunc, expectTokAfterLastMatch);
}
}
//# sourceMappingURL=recoverable.js.map