antlr-ng
Version:
Next generation ANTLR Tool
158 lines (157 loc) • 10.1 kB
TypeScript
import { ATN, BitSet, DecisionState, InterpreterRuleContext, Parser, ParserInterpreter, ParserRuleContext, TokenStream, Vocabulary } from "antlr4ng";
import type { IGrammar, IGrammarParserInterpreter } from "../types.js";
/**
* A heavier weight {@link ParserInterpreter} that creates parse trees that track alternative numbers for subtree roots.
*/
export declare class GrammarParserInterpreter extends ParserInterpreter implements IGrammarParserInterpreter {
/**
* We want to stop and track the first error but we cannot bail out like {@link BailErrorStrategy} as consume()
* constructs trees. We make sure to create an error node during recovery with this strategy. We consume() 1
* token during the "bail out of rule" mechanism in recover() and let it fall out of the rule to finish
* constructing trees. For recovery in line, we throw InputMismatchException to engage recover().
*/
private static BailButConsumeErrorStrategy;
/**
* The grammar associated with this interpreter. Unlike the {@link ParserInterpreter} from the standard
* distribution, this can reference Grammar, which is in the tools area not purely runtime.
*/
protected readonly g: IGrammar;
protected decisionStatesThatSetOuterAltNumInContext: BitSet;
/**
* Cache {@link LeftRecursiveRule.getPrimaryAlts()} and {@link LeftRecursiveRule.getRecursiveOpAlts()} for states
* in {@link decisionStatesThatSetOuterAltNumInContext}. It only caches decisions in left-recursive rules.
*/
private stateToAltsMap;
constructor(g: IGrammar, atn: ATN, input: TokenStream);
constructor(g: IGrammar, grammarFileName: string, vocabulary: Vocabulary, ruleNames: string[], atn: ATN, input: TokenStream);
/**
* Given an ambiguous parse information, return the list of ambiguous parse trees. An ambiguity occurs when a
* specific token sequence can be recognized in more than one way by the grammar. These ambiguities are detected
* only at decision points.
*
* The list of trees includes the actual interpretation (that for the minimum alternative number) and all
* ambiguous alternatives. The actual interpretation is always first.
*
* This method reuses the same physical input token stream used to detect the ambiguity by the original parser
* in the first place. This method resets/seeks within but does not alter originalParser.
*
* The trees are rooted at the node whose start..stop token indices include the start and stop indices of this
* ambiguity event. That is, the trees returned will always include the complete ambiguous sub phrase
* identified by the ambiguity event. The subtrees returned will also always contain the node associated with
* the overridden decision.
*
* Be aware that this method does NOT notify error or parse listeners as it would trigger duplicate or otherwise
* unwanted events.
*
* This uses a temporary ParserATNSimulator and a ParserInterpreter so we don't mess up any statistics, event
* lists, etc... The parse tree constructed while identifying/making ambiguityInfo is not affected by this method
* as it creates a new parser interp to get the ambiguous interpretations.
*
* Nodes in the returned ambig trees are independent of the original parse tree (constructed while
* identifying/creating ambiguityInfo).
*
* @param g From which grammar should we drive alternative numbers and alternative labels.
* @param originalParser The parser used to create ambiguityInfo; it is not modified by this routine and can be
* either a generated or interpreted parser. It's token stream *is* reset/seek()'d.
* @param tokens A stream of tokens to use with the temporary parser. This will often be just the token stream
* within the original parser but here it is for flexibility.
* @param decision Which decision to try different alternatives for.
* @param alts The set of alternatives to try while re-parsing.
* @param startIndex The index of the first token of the ambiguous input or other input of interest.
* @param stopIndex The index of the last token of the ambiguous input. The start and stop indexes are used
* primarily to identify how much of the resulting parse tree to return.
* @param startRuleIndex The start rule for the entire grammar, not the ambiguous decision. We re-parse the entire
* input and so we need the original start rule.
* @returns The list of all possible interpretations of the input for the decision in ambiguityInfo. The actual
* interpretation chosen by the parser is always given first because this method retests the input in
* alternative order and ANTLR always resolves ambiguities by choosing the first alternative that matches
* the input.
*/
static getAllPossibleParseTrees(g: IGrammar, originalParser: Parser, tokens: TokenStream, decision: number, alts: BitSet, startIndex: number, stopIndex: number, startRuleIndex: number): ParserRuleContext[];
/**
* @returns list of parse trees, one for each alternative in a decision given the same input.
*
* Very similar to {@link getAllPossibleParseTrees} except that it re-parses the input for every alternative
* in a decision, not just the ambiguous ones (there is no alts parameter here). This method also tries to reduce
* the size of the parse trees by stripping away children of the tree that are completely out of range of
* startIndex..stopIndex. Also, because errors are expected, we use a specialized error handler that more or less
* bails out but that also consumes the first erroneous token at least. This ensures that an error node will be
* in the parse tree for display.
*
* NOTES:
* We must parse the entire input now with decision overrides we cannot parse a subset because it could be that
* a decision above our decision of interest needs to read way past lookaheadInfo.stopIndex. It seems like there
* is no escaping the use of a full and complete token stream if we are resetting to token index 0 and re-parsing
* from the start symbol. It's not easy to restart parsing somewhere in the middle like a continuation because
* our call stack does not match the tree stack because of left recursive rule rewriting.
*
* @param g The grammar from which to derive alternative numbers and alternative labels.
* @param originalParser The parser for context information.
* @param tokens The token stream to use with the new parser.
* @param startRuleIndex The start rule for the entire grammar.
* @param decision The decision to try different alternatives for.
* @param startIndex The index of the first token of the ambiguous input.
* @param stopIndex The index of the last token of the ambiguous input.
*/
static getLookaheadParseTrees(g: IGrammar, originalParser: ParserInterpreter, tokens: TokenStream, startRuleIndex: number, decision: number, startIndex: number, stopIndex: number): ParserRuleContext[];
/**
* Derives a new parser from an old one that has knowledge of the grammar. The Grammar object is used to correctly
* compute outer alternative numbers for parse tree nodes. A parser of the same type is created for subclasses
* of {@link ParserInterpreter}.
*
* @param g The grammar from which to derive alternative numbers and alternative labels.
* @param originalParser The parser to derive from.
* @param tokens The token stream to use with the new parser.
*
* @returns A new parser that can be used to parse the same input as the original parser.
*/
static deriveTempParserInterpreter(g: IGrammar, originalParser: Parser, tokens: TokenStream): ParserInterpreter;
protected createInterpreterRuleContext(parent: ParserRuleContext, invokingStateNumber: number, ruleIndex: number): InterpreterRuleContext;
/**
* Override this method so that we can record which alternative was taken at each decision point. For non-left
* recursive rules, it's simple. Set decisionStatesThatSetOuterAltNumInContext indicates which decision states
* should set the outer alternative number.
*
* Left recursive rules are much more complicated to deal with: there is typically a decision for the primary
* alternatives and a decision to choose between the recursive operator alternatives. For example, the following
* left recursive rule has two primary and 2 recursive alternatives.
*
* ```antlr
* e : e '*' e
* | '-' INT
* | e '+' e
* | ID
* ;
* ```
*
* ANTLR rewrites that rule to be
*
* ```antlr
* e[int precedence]
* : ('-' INT | ID)
* ( {...}? '*' e[5]
* | {...}? '+' e[3]
* )*
* ;
* ```
*
* So, there are two decisions associated with picking the outermost alt. This complicates our tracking
* significantly. The outermost alternative number is a function of the decision (ATN state) within a left
* recursive rule and the predicted alternative coming back from adaptivePredict().
*
* We use stateToAltsMap as a cache to avoid expensive calls to getRecursiveOpAlts().
*
* @param p The decision state to visit.
*
* @returns The prediction made by the interpreter for this decision state.
*/
protected visitDecisionState(p: DecisionState): number;
/**
* Identify the ATN states where we need to set the outer alt number. For regular rules, that's the block at the
* target to rule start state. For left-recursive rules, we track the primary block, which looks just like a
* regular rule's outer block, and the star loop block (always there even if 1 alt).
*
* @returns A set of ATN state numbers.
*/
private findOuterMostDecisionStates;
}