UNPKG

antlr-ng

Version:

Next generation ANTLR Tool

www.antlr-ng.org

mike-lischke/antlr-ng

337 lines (336 loc) • 15.5 kB

JavaScript

var __defProp = Object.defineProperty; var __name = (target, value) => __defProp(target, "name", { value, configurable: true }); import { ATN, ATNState, BailErrorStrategy, BitSet, DefaultErrorStrategy, InputMismatchException, ParserInterpreter, PredictionMode, StarLoopEntryState, Trees } from "antlr4ng"; import { ClassFactory } from "../ClassFactory.js"; import { GrammarInterpreterRuleContext } from "./GrammarInterpreterRuleContext.js"; class GrammarParserInterpreter extends ParserInterpreter { static { __name(this, "GrammarParserInterpreter"); } /** * We want to stop and track the first error but we cannot bail out like {@link BailErrorStrategy} as consume() * constructs trees. We make sure to create an error node during recovery with this strategy. We consume() 1 * token during the "bail out of rule" mechanism in recover() and let it fall out of the rule to finish * constructing trees. For recovery in line, we throw InputMismatchException to engage recover(). */ static BailButConsumeErrorStrategy = class BailButConsumeErrorStrategy extends DefaultErrorStrategy { static { __name(this, "BailButConsumeErrorStrategy"); } firstErrorTokenIndex = -1; recover(recognizer, e) { const errIndex = recognizer.inputStream.index; if (this.firstErrorTokenIndex === -1) { this.firstErrorTokenIndex = errIndex; } const input = recognizer.inputStream; if (input.index < input.size - 1) { recognizer.consume(); } } recoverInline(recognizer) { const errIndex = recognizer.inputStream.index; if (this.firstErrorTokenIndex === -1) { this.firstErrorTokenIndex = errIndex; } throw new InputMismatchException(recognizer); } sync(recognizer) { } }; /** * The grammar associated with this interpreter. Unlike the {@link ParserInterpreter} from the standard * distribution, this can reference Grammar, which is in the tools area not purely runtime. */ g; decisionStatesThatSetOuterAltNumInContext; /** * Cache {@link LeftRecursiveRule.getPrimaryAlts()} and {@link LeftRecursiveRule.getRecursiveOpAlts()} for states * in {@link decisionStatesThatSetOuterAltNumInContext}. It only caches decisions in left-recursive rules. */ stateToAltsMap = []; constructor(...args) { if (args[1] instanceof ATN) { const [g, atn, input] = args; super(g.fileName, g.getVocabulary(), g.getRuleNames(), atn, input); this.g = g; this.decisionStatesThatSetOuterAltNumInContext = this.findOuterMostDecisionStates(); } else { const [g, grammarFileName, vocabulary, ruleNames, atn, input] = args; super(grammarFileName, vocabulary, ruleNames, atn, input); this.g = g; } } /** * Given an ambiguous parse information, return the list of ambiguous parse trees. An ambiguity occurs when a * specific token sequence can be recognized in more than one way by the grammar. These ambiguities are detected * only at decision points. * * The list of trees includes the actual interpretation (that for the minimum alternative number) and all * ambiguous alternatives. The actual interpretation is always first. * * This method reuses the same physical input token stream used to detect the ambiguity by the original parser * in the first place. This method resets/seeks within but does not alter originalParser. * * The trees are rooted at the node whose start..stop token indices include the start and stop indices of this * ambiguity event. That is, the trees returned will always include the complete ambiguous sub phrase * identified by the ambiguity event. The subtrees returned will also always contain the node associated with * the overridden decision. * * Be aware that this method does NOT notify error or parse listeners as it would trigger duplicate or otherwise * unwanted events. * * This uses a temporary ParserATNSimulator and a ParserInterpreter so we don't mess up any statistics, event * lists, etc... The parse tree constructed while identifying/making ambiguityInfo is not affected by this method * as it creates a new parser interp to get the ambiguous interpretations. * * Nodes in the returned ambig trees are independent of the original parse tree (constructed while * identifying/creating ambiguityInfo). * * @param g From which grammar should we drive alternative numbers and alternative labels. * @param originalParser The parser used to create ambiguityInfo; it is not modified by this routine and can be * either a generated or interpreted parser. It's token stream *is* reset/seek()'d. * @param tokens A stream of tokens to use with the temporary parser. This will often be just the token stream * within the original parser but here it is for flexibility. * @param decision Which decision to try different alternatives for. * @param alts The set of alternatives to try while re-parsing. * @param startIndex The index of the first token of the ambiguous input or other input of interest. * @param stopIndex The index of the last token of the ambiguous input. The start and stop indexes are used * primarily to identify how much of the resulting parse tree to return. * @param startRuleIndex The start rule for the entire grammar, not the ambiguous decision. We re-parse the entire * input and so we need the original start rule. * @returns The list of all possible interpretations of the input for the decision in ambiguityInfo. The actual * interpretation chosen by the parser is always given first because this method retests the input in * alternative order and ANTLR always resolves ambiguities by choosing the first alternative that matches * the input. */ static getAllPossibleParseTrees(g, originalParser, tokens, decision, alts, startIndex, stopIndex, startRuleIndex) { const trees = new Array(); const parser = GrammarParserInterpreter.deriveTempParserInterpreter(g, originalParser, tokens); if (stopIndex >= tokens.size - 1) { stopIndex = tokens.size - 2; } let alt = alts.nextSetBit(0); while (alt !== void 0 && alt >= 0) { parser.reset(); parser.addDecisionOverride(decision, startIndex, alt); const t = parser.parse(startRuleIndex); let ambigSubTree = Trees.getRootOfSubtreeEnclosingRegion(t, startIndex, stopIndex); if (Trees.isAncestorOf(parser.overrideDecisionRoot, ambigSubTree)) { ambigSubTree = parser.overrideDecisionRoot; } trees.push(ambigSubTree); alt = alts.nextSetBit(alt + 1); } return trees; } /** * @returns list of parse trees, one for each alternative in a decision given the same input. * * Very similar to {@link getAllPossibleParseTrees} except that it re-parses the input for every alternative * in a decision, not just the ambiguous ones (there is no alts parameter here). This method also tries to reduce * the size of the parse trees by stripping away children of the tree that are completely out of range of * startIndex..stopIndex. Also, because errors are expected, we use a specialized error handler that more or less * bails out but that also consumes the first erroneous token at least. This ensures that an error node will be * in the parse tree for display. * * NOTES: * We must parse the entire input now with decision overrides we cannot parse a subset because it could be that * a decision above our decision of interest needs to read way past lookaheadInfo.stopIndex. It seems like there * is no escaping the use of a full and complete token stream if we are resetting to token index 0 and re-parsing * from the start symbol. It's not easy to restart parsing somewhere in the middle like a continuation because * our call stack does not match the tree stack because of left recursive rule rewriting. * * @param g The grammar from which to derive alternative numbers and alternative labels. * @param originalParser The parser for context information. * @param tokens The token stream to use with the new parser. * @param startRuleIndex The start rule for the entire grammar. * @param decision The decision to try different alternatives for. * @param startIndex The index of the first token of the ambiguous input. * @param stopIndex The index of the last token of the ambiguous input. */ static getLookaheadParseTrees(g, originalParser, tokens, startRuleIndex, decision, startIndex, stopIndex) { const trees = new Array(); const parser = GrammarParserInterpreter.deriveTempParserInterpreter(g, originalParser, tokens); const decisionState = originalParser.atn.decisionToState[decision]; for (let alt = 1; alt <= decisionState.transitions.length; alt++) { const errorHandler = new GrammarParserInterpreter.BailButConsumeErrorStrategy(); parser.errorHandler = errorHandler; parser.reset(); parser.addDecisionOverride(decision, startIndex, alt); const tt = parser.parse(startRuleIndex); let stopTreeAt = stopIndex; if (errorHandler.firstErrorTokenIndex >= 0) { stopTreeAt = errorHandler.firstErrorTokenIndex; } const overallRange = tt.getSourceInterval(); if (stopTreeAt > overallRange.stop) { stopTreeAt = overallRange.stop; } let subtree = Trees.getRootOfSubtreeEnclosingRegion(tt, startIndex, stopTreeAt); if (Trees.isAncestorOf(parser.overrideDecisionRoot, subtree)) { subtree = parser.overrideDecisionRoot; } Trees.stripChildrenOutOfRange(subtree, parser.overrideDecisionRoot, startIndex, stopTreeAt); trees.push(subtree); } return trees; } /** * Derives a new parser from an old one that has knowledge of the grammar. The Grammar object is used to correctly * compute outer alternative numbers for parse tree nodes. A parser of the same type is created for subclasses * of {@link ParserInterpreter}. * * @param g The grammar from which to derive alternative numbers and alternative labels. * @param originalParser The parser to derive from. * @param tokens The token stream to use with the new parser. * * @returns A new parser that can be used to parse the same input as the original parser. */ static deriveTempParserInterpreter(g, originalParser, tokens) { let parser; if (originalParser instanceof ParserInterpreter) { try { const ctor = originalParser.constructor; parser = new ctor(g, originalParser.atn, originalParser.tokenStream); } catch (e) { if (e instanceof Error) { throw new Error("can't create parser to match incoming " + originalParser.constructor.name); } else { throw e; } } } else { parser = new ParserInterpreter( originalParser.grammarFileName, originalParser.vocabulary, originalParser.ruleNames, originalParser.atn, tokens ); } parser.inputStream = tokens; parser.errorHandler = new BailErrorStrategy(); parser.removeErrorListeners(); parser.removeParseListeners(); parser.interpreter.predictionMode = PredictionMode.LL_EXACT_AMBIG_DETECTION; return parser; } createInterpreterRuleContext(parent, invokingStateNumber, ruleIndex) { return new GrammarInterpreterRuleContext(ruleIndex, parent, invokingStateNumber); } /** * Override this method so that we can record which alternative was taken at each decision point. For non-left * recursive rules, it's simple. Set decisionStatesThatSetOuterAltNumInContext indicates which decision states * should set the outer alternative number. * * Left recursive rules are much more complicated to deal with: there is typically a decision for the primary * alternatives and a decision to choose between the recursive operator alternatives. For example, the following * left recursive rule has two primary and 2 recursive alternatives. * * ```antlr * e : e '*' e * | '-' INT * | e '+' e * | ID * ; * ``` * * ANTLR rewrites that rule to be * * ```antlr * e[int precedence] * : ('-' INT | ID) * ( {...}? '*' e[5] * | {...}? '+' e[3] * )* * ; * ``` * * So, there are two decisions associated with picking the outermost alt. This complicates our tracking * significantly. The outermost alternative number is a function of the decision (ATN state) within a left * recursive rule and the predicted alternative coming back from adaptivePredict(). * * We use stateToAltsMap as a cache to avoid expensive calls to getRecursiveOpAlts(). * * @param p The decision state to visit. * * @returns The prediction made by the interpreter for this decision state. */ visitDecisionState(p) { const predictedAlt = super.visitDecisionState(p); if (p.transitions.length > 1) { if (p.decision === this.overrideDecision && this.inputStream.index === this.overrideDecisionInputIndex) { this.overrideDecisionRoot = this.context; } } const ctx = this.context; if (this.decisionStatesThatSetOuterAltNumInContext.get(p.stateNumber)) { ctx.setAltNumber(predictedAlt); const r = this.g.getRule(p.ruleIndex); if (this.atn.ruleToStartState[r.index]?.isLeftRecursiveRule) { let alts = this.stateToAltsMap[p.stateNumber]; const lr = this.g.getRule(p.ruleIndex); if (!alts) { if (p.constructor.stateType === ATNState.BLOCK_START) { alts = lr.getPrimaryAlts(); this.stateToAltsMap[p.stateNumber] = alts; } else { if (p.constructor.stateType === ATNState.STAR_BLOCK_START) { alts = lr.getRecursiveOpAlts(); this.stateToAltsMap[p.stateNumber] = alts; } } } ctx.setAltNumber(alts[predictedAlt]); } } return predictedAlt; } /** * Identify the ATN states where we need to set the outer alt number. For regular rules, that's the block at the * target to rule start state. For left-recursive rules, we track the primary block, which looks just like a * regular rule's outer block, and the star loop block (always there even if 1 alt). * * @returns A set of ATN state numbers. */ findOuterMostDecisionStates() { const track = new BitSet(); const numberOfDecisions = this.atn.getNumberOfDecisions(); for (let i = 0; i < numberOfDecisions; i++) { const decisionState = this.atn.getDecisionState(i); const startState = this.atn.ruleToStartState[decisionState.ruleIndex]; if (decisionState instanceof StarLoopEntryState) { const loopEntry = decisionState; if (loopEntry.precedenceRuleDecision) { const blockStart = loopEntry.transitions[0].target; track.set(blockStart.stateNumber); } } else { if (startState?.transitions[0].target === decisionState) { track.set(decisionState.stateNumber); } } } return track; } static { ClassFactory.createGrammarParserInterpreter = (g, atn, input) => { return new GrammarParserInterpreter(g, atn, input); }; } } export { GrammarParserInterpreter };