UNPKG

antlr4-runtime

Version:

JavaScript runtime for ANTLR4

1,079 lines (1,041 loc) 79.9 kB
/* Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. * Use of this file is governed by the BSD 3-clause license that * can be found in the LICENSE.txt file in the project root. */ import ATN from './ATN.js'; import ATNState from '../state/ATNState.js'; import RuleStopState from '../state/RuleStopState.js'; import ATNConfig from './ATNConfig.js'; import ATNConfigSet from './ATNConfigSet.js'; import Token from '../Token.js'; import DFAState from '../dfa/DFAState.js'; import PredPrediction from '../dfa/PredPrediction.js'; import ATNSimulator from './ATNSimulator.js'; import PredictionMode from './PredictionMode.js'; import RuleContext from '../context/RuleContext.js'; import SemanticContext from './SemanticContext.js'; import PredictionContext from '../context/PredictionContext.js'; import Interval from '../misc/Interval.js'; import Transition from '../transition/Transition.js'; import SetTransition from '../transition/SetTransition.js'; import NotSetTransition from '../transition/NotSetTransition.js'; import RuleTransition from '../transition/RuleTransition.js'; import ActionTransition from '../transition/ActionTransition.js'; import NoViableAltException from '../error/NoViableAltException.js'; import SingletonPredictionContext from '../context/SingletonPredictionContext.js'; import {predictionContextFromRuleContext} from '../context/PredictionContextUtils.js'; import AtomTransition from "../transition/AtomTransition.js"; import arrayToString from "../utils/arrayToString.js"; import BitSet from "../misc/BitSet.js"; import DoubleDict from "../utils/DoubleDict.js"; import HashSet from "../misc/HashSet.js"; /** * The embodiment of the adaptive LL(*), ALL(*), parsing strategy. * * <p> * The basic complexity of the adaptive strategy makes it harder to understand. * We begin with ATN simulation to build paths in a DFA. Subsequent prediction * requests go through the DFA first. If they reach a state without an edge for * the current symbol, the algorithm fails over to the ATN simulation to * complete the DFA path for the current input (until it finds a conflict state * or uniquely predicting state).</p> * * <p> * All of that is done without using the outer context because we want to create * a DFA that is not dependent upon the rule invocation stack when we do a * prediction. One DFA works in all contexts. We avoid using context not * necessarily because it's slower, although it can be, but because of the DFA * caching problem. The closure routine only considers the rule invocation stack * created during prediction beginning in the decision rule. For example, if * prediction occurs without invoking another rule's ATN, there are no context * stacks in the configurations. When lack of context leads to a conflict, we * don't know if it's an ambiguity or a weakness in the strong LL(*) parsing * strategy (versus full LL(*)).</p> * * <p> * When SLL yields a configuration set with conflict, we rewind the input and * retry the ATN simulation, this time using full outer context without adding * to the DFA. Configuration context stacks will be the full invocation stacks * from the start rule. If we get a conflict using full context, then we can * definitively say we have a true ambiguity for that input sequence. If we * don't get a conflict, it implies that the decision is sensitive to the outer * context. (It is not context-sensitive in the sense of context-sensitive * grammars.)</p> * * <p> * The next time we reach this DFA state with an SLL conflict, through DFA * simulation, we will again retry the ATN simulation using full context mode. * This is slow because we can't save the results and have to "interpret" the * ATN each time we get that input.</p> * * <p> * <strong>CACHING FULL CONTEXT PREDICTIONS</strong></p> * * <p> * We could cache results from full context to predicted alternative easily and * that saves a lot of time but doesn't work in presence of predicates. The set * of visible predicates from the ATN start state changes depending on the * context, because closure can fall off the end of a rule. I tried to cache * tuples (stack context, semantic context, predicted alt) but it was slower * than interpreting and much more complicated. Also required a huge amount of * memory. The goal is not to create the world's fastest parser anyway. I'd like * to keep this algorithm simple. By launching multiple threads, we can improve * the speed of parsing across a large number of files.</p> * * <p> * There is no strict ordering between the amount of input used by SLL vs LL, * which makes it really hard to build a cache for full context. Let's say that * we have input A B C that leads to an SLL conflict with full context X. That * implies that using X we might only use A B but we could also use A B C D to * resolve conflict. Input A B C D could predict alternative 1 in one position * in the input and A B C E could predict alternative 2 in another position in * input. The conflicting SLL configurations could still be non-unique in the * full context prediction, which would lead us to requiring more input than the * original A B C. To make a prediction cache work, we have to track the exact * input used during the previous prediction. That amounts to a cache that maps * X to a specific DFA for that context.</p> * * <p> * Something should be done for left-recursive expression predictions. They are * likely LL(1) + pred eval. Easier to do the whole SLL unless error and retry * with full LL thing Sam does.</p> * * <p> * <strong>AVOIDING FULL CONTEXT PREDICTION</strong></p> * * <p> * We avoid doing full context retry when the outer context is empty, we did not * dip into the outer context by falling off the end of the decision state rule, * or when we force SLL mode.</p> * * <p> * As an example of the not dip into outer context case, consider as super * constructor calls versus function calls. One grammar might look like * this:</p> * * <pre> * ctorBody * : '{' superCall? stat* '}' * ; * </pre> * * <p> * Or, you might see something like</p> * * <pre> * stat * : superCall ';' * | expression ';' * | ... * ; * </pre> * * <p> * In both cases I believe that no closure operations will dip into the outer * context. In the first case ctorBody in the worst case will stop at the '}'. * In the 2nd case it should stop at the ';'. Both cases should stay within the * entry rule and not dip into the outer context.</p> * * <p> * <strong>PREDICATES</strong></p> * * <p> * Predicates are always evaluated if present in either SLL or LL both. SLL and * LL simulation deals with predicates differently. SLL collects predicates as * it performs closure operations like ANTLR v3 did. It delays predicate * evaluation until it reaches and accept state. This allows us to cache the SLL * ATN simulation whereas, if we had evaluated predicates on-the-fly during * closure, the DFA state configuration sets would be different and we couldn't * build up a suitable DFA.</p> * * <p> * When building a DFA accept state during ATN simulation, we evaluate any * predicates and return the sole semantically valid alternative. If there is * more than 1 alternative, we report an ambiguity. If there are 0 alternatives, * we throw an exception. Alternatives without predicates act like they have * true predicates. The simple way to think about it is to strip away all * alternatives with false predicates and choose the minimum alternative that * remains.</p> * * <p> * When we start in the DFA and reach an accept state that's predicated, we test * those and return the minimum semantically viable alternative. If no * alternatives are viable, we throw an exception.</p> * * <p> * During full LL ATN simulation, closure always evaluates predicates and * on-the-fly. This is crucial to reducing the configuration set size during * closure. It hits a landmine when parsing with the Java grammar, for example, * without this on-the-fly evaluation.</p> * * <p> * <strong>SHARING DFA</strong></p> * * <p> * All instances of the same parser share the same decision DFAs through a * static field. Each instance gets its own ATN simulator but they share the * same {@link //decisionToDFA} field. They also share a * {@link PredictionContextCache} object that makes sure that all * {@link PredictionContext} objects are shared among the DFA states. This makes * a big size difference.</p> * * <p> * <strong>THREAD SAFETY</strong></p> * * <p> * The {@link ParserATNSimulator} locks on the {@link //decisionToDFA} field when * it adds a new DFA object to that array. {@link //addDFAEdge} * locks on the DFA for the current decision when setting the * {@link DFAState//edges} field. {@link //addDFAState} locks on * the DFA for the current decision when looking up a DFA state to see if it * already exists. We must make sure that all requests to add DFA states that * are equivalent result in the same shared DFA object. This is because lots of * threads will be trying to update the DFA at once. The * {@link //addDFAState} method also locks inside the DFA lock * but this time on the shared context cache when it rebuilds the * configurations' {@link PredictionContext} objects using cached * subgraphs/nodes. No other locking occurs, even during DFA simulation. This is * safe as long as we can guarantee that all threads referencing * {@code s.edge[t]} get the same physical target {@link DFAState}, or * {@code null}. Once into the DFA, the DFA simulation does not reference the * {@link DFA//states} map. It follows the {@link DFAState//edges} field to new * targets. The DFA simulator will either find {@link DFAState//edges} to be * {@code null}, to be non-{@code null} and {@code dfa.edges[t]} null, or * {@code dfa.edges[t]} to be non-null. The * {@link //addDFAEdge} method could be racing to set the field * but in either case the DFA simulator works; if {@code null}, and requests ATN * simulation. It could also race trying to get {@code dfa.edges[t]}, but either * way it will work because it's not doing a test and set operation.</p> * * <p> * <strong>Starting with SLL then failing to combined SLL/LL (Two-Stage * Parsing)</strong></p> * * <p> * Sam pointed out that if SLL does not give a syntax error, then there is no * point in doing full LL, which is slower. We only have to try LL if we get a * syntax error. For maximum speed, Sam starts the parser set to pure SLL * mode with the {@link BailErrorStrategy}:</p> * * <pre> * parser.{@link Parser//getInterpreter() getInterpreter()}.{@link //setPredictionMode setPredictionMode}{@code (}{@link PredictionMode//SLL}{@code )}; * parser.{@link Parser//setErrorHandler setErrorHandler}(new {@link BailErrorStrategy}()); * </pre> * * <p> * If it does not get a syntax error, then we're done. If it does get a syntax * error, we need to retry with the combined SLL/LL strategy.</p> * * <p> * The reason this works is as follows. If there are no SLL conflicts, then the * grammar is SLL (at least for that input set). If there is an SLL conflict, * the full LL analysis must yield a set of viable alternatives which is a * subset of the alternatives reported by SLL. If the LL set is a singleton, * then the grammar is LL but not SLL. If the LL set is the same size as the SLL * set, the decision is SLL. If the LL set has size &gt; 1, then that decision * is truly ambiguous on the current input. If the LL set is smaller, then the * SLL conflict resolution might choose an alternative that the full LL would * rule out as a possibility based upon better context information. If that's * the case, then the SLL parse will definitely get an error because the full LL * analysis says it's not viable. If SLL conflict resolution chooses an * alternative within the LL set, them both SLL and LL would choose the same * alternative because they both choose the minimum of multiple conflicting * alternatives.</p> * * <p> * Let's say we have a set of SLL conflicting alternatives {@code {1, 2, 3}} and * a smaller LL set called <em>s</em>. If <em>s</em> is {@code {2, 3}}, then SLL * parsing will get an error because SLL will pursue alternative 1. If * <em>s</em> is {@code {1, 2}} or {@code {1, 3}} then both SLL and LL will * choose the same alternative because alternative one is the minimum of either * set. If <em>s</em> is {@code {2}} or {@code {3}} then SLL will get a syntax * error. If <em>s</em> is {@code {1}} then SLL will succeed.</p> * * <p> * Of course, if the input is invalid, then we will get an error for sure in * both SLL and LL parsing. Erroneous input will therefore require 2 passes over * the input.</p> */ export default class ParserATNSimulator extends ATNSimulator { constructor(parser, atn, decisionToDFA, sharedContextCache) { super(atn, sharedContextCache); this.parser = parser; this.decisionToDFA = decisionToDFA; // SLL, LL, or LL + exact ambig detection?// this.predictionMode = PredictionMode.LL; // LAME globals to avoid parameters!!!!! I need these down deep in predTransition this._input = null; this._startIndex = 0; this._outerContext = null; this._dfa = null; /** * Each prediction operation uses a cache for merge of prediction contexts. * Don't keep around as it wastes huge amounts of memory. DoubleKeyMap * isn't synchronized but we're ok since two threads shouldn't reuse same * parser/atnsim object because it can only handle one input at a time. * This maps graphs a and b to merged result c. (a,b)&rarr;c. We can avoid * the merge if we ever see a and b again. Note that (b,a)&rarr;c should * also be examined during cache lookup. */ this.mergeCache = null; this.debug = false; this.debug_closure = false; this.debug_add = false; this.trace_atn_sim = false; this.dfa_debug = false; this.retry_debug = false; } reset() {} adaptivePredict(input, decision, outerContext) { if (this.debug || this.trace_atn_sim) { console.log("adaptivePredict decision " + decision + " exec LA(1)==" + this.getLookaheadName(input) + " line " + input.LT(1).line + ":" + input.LT(1).column); } this._input = input; this._startIndex = input.index; this._outerContext = outerContext; const dfa = this.decisionToDFA[decision]; this._dfa = dfa; const m = input.mark(); const index = input.index; // Now we are certain to have a specific decision's DFA // But, do we still need an initial state? try { let s0; if (dfa.precedenceDfa) { // the start state for a precedence DFA depends on the current // parser precedence, and is provided by a DFA method. s0 = dfa.getPrecedenceStartState(this.parser.getPrecedence()); } else { // the start state for a "regular" DFA is just s0 s0 = dfa.s0; } if (s0===null) { if (outerContext===null) { outerContext = RuleContext.EMPTY; } if (this.debug ) { console.log("predictATN decision " + dfa.decision + " exec LA(1)==" + this.getLookaheadName(input) + ", outerContext=" + outerContext.toString(this.parser.ruleNames)); } const fullCtx = false; let s0_closure = this.computeStartState(dfa.atnStartState, RuleContext.EMPTY, fullCtx); if( dfa.precedenceDfa) { // If this is a precedence DFA, we use applyPrecedenceFilter // to convert the computed start state to a precedence start // state. We then use DFA.setPrecedenceStartState to set the // appropriate start state for the precedence level rather // than simply setting DFA.s0. // dfa.s0.configs = s0_closure; // not used for prediction but useful to know start configs anyway s0_closure = this.applyPrecedenceFilter(s0_closure); s0 = this.addDFAState(dfa, new DFAState(null, s0_closure)); dfa.setPrecedenceStartState(this.parser.getPrecedence(), s0); } else { s0 = this.addDFAState(dfa, new DFAState(null, s0_closure)); dfa.s0 = s0; } } const alt = this.execATN(dfa, s0, input, index, outerContext); if (this.debug) { console.log("DFA after predictATN: " + dfa.toString(this.parser.literalNames, this.parser.symbolicNames)); } return alt; } finally { this._dfa = null; this.mergeCache = null; // wack cache after each prediction input.seek(index); input.release(m); } } /** * Performs ATN simulation to compute a predicted alternative based * upon the remaining input, but also updates the DFA cache to avoid * having to traverse the ATN again for the same input sequence. * * There are some key conditions we're looking for after computing a new * set of ATN configs (proposed DFA state): * if the set is empty, there is no viable alternative for current symbol * does the state uniquely predict an alternative? * does the state have a conflict that would prevent us from * putting it on the work list? * * We also have some key operations to do: * add an edge from previous DFA state to potentially new DFA state, D, * upon current symbol but only if adding to work list, which means in all * cases except no viable alternative (and possibly non-greedy decisions?) * collecting predicates and adding semantic context to DFA accept states * adding rule context to context-sensitive DFA accept states * consuming an input symbol * reporting a conflict * reporting an ambiguity * reporting a context sensitivity * reporting insufficient predicates * * cover these cases: * dead end * single alt * single alt + preds * conflict * conflict + preds * */ execATN(dfa, s0, input, startIndex, outerContext ) { if (this.debug || this.trace_atn_sim) { console.log("execATN decision " + dfa.decision + ", DFA state " + s0 + ", LA(1)==" + this.getLookaheadName(input) + " line " + input.LT(1).line + ":" + input.LT(1).column); } let alt; let previousD = s0; if (this.debug) { console.log("s0 = " + s0); } let t = input.LA(1); for(;;) { // while more work let D = this.getExistingTargetState(previousD, t); if(D===null) { D = this.computeTargetState(dfa, previousD, t); } if(D===ATNSimulator.ERROR) { // if any configs in previous dipped into outer context, that // means that input up to t actually finished entry rule // at least for SLL decision. Full LL doesn't dip into outer // so don't need special case. // We will get an error no matter what so delay until after // decision; better error message. Also, no reachable target // ATN states in SLL implies LL will also get nowhere. // If conflict in states that dip out, choose min since we // will get error no matter what. const e = this.noViableAlt(input, outerContext, previousD.configs, startIndex); input.seek(startIndex); alt = this.getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(previousD.configs, outerContext); if(alt!==ATN.INVALID_ALT_NUMBER) { return alt; } else { throw e; } } if(D.requiresFullContext && this.predictionMode !== PredictionMode.SLL) { // IF PREDS, MIGHT RESOLVE TO SINGLE ALT => SLL (or syntax error) let conflictingAlts = null; if (D.predicates!==null) { if (this.debug) { console.log("DFA state has preds in DFA sim LL failover"); } const conflictIndex = input.index; if(conflictIndex !== startIndex) { input.seek(startIndex); } conflictingAlts = this.evalSemanticContext(D.predicates, outerContext, true); if (conflictingAlts.length===1) { if(this.debug) { console.log("Full LL avoided"); } return conflictingAlts.minValue(); } if (conflictIndex !== startIndex) { // restore the index so reporting the fallback to full // context occurs with the index at the correct spot input.seek(conflictIndex); } } if (this.dfa_debug) { console.log("ctx sensitive state " + outerContext +" in " + D); } const fullCtx = true; const s0_closure = this.computeStartState(dfa.atnStartState, outerContext, fullCtx); this.reportAttemptingFullContext(dfa, conflictingAlts, D.configs, startIndex, input.index); alt = this.execATNWithFullContext(dfa, D, s0_closure, input, startIndex, outerContext); return alt; } if (D.isAcceptState) { if (D.predicates===null) { return D.prediction; } const stopIndex = input.index; input.seek(startIndex); const alts = this.evalSemanticContext(D.predicates, outerContext, true); if (alts.length===0) { throw this.noViableAlt(input, outerContext, D.configs, startIndex); } else if (alts.length===1) { return alts.minValue(); } else { // report ambiguity after predicate evaluation to make sure the correct set of ambig alts is reported. this.reportAmbiguity(dfa, D, startIndex, stopIndex, false, alts, D.configs); return alts.minValue(); } } previousD = D; if (t !== Token.EOF) { input.consume(); t = input.LA(1); } } } /** * Get an existing target state for an edge in the DFA. If the target state * for the edge has not yet been computed or is otherwise not available, * this method returns {@code null}. * * @param previousD The current DFA state * @param t The next input symbol * @return The existing target DFA state for the given input symbol * {@code t}, or {@code null} if the target state for this edge is not * already cached */ getExistingTargetState(previousD, t) { const edges = previousD.edges; if (edges===null) { return null; } else { return edges[t + 1] || null; } } /** * Compute a target state for an edge in the DFA, and attempt to add the * computed state and corresponding edge to the DFA. * * @param dfa The DFA * @param previousD The current DFA state * @param t The next input symbol * * @return The computed target DFA state for the given input symbol * {@code t}. If {@code t} does not lead to a valid DFA state, this method * returns {@link //ERROR */ computeTargetState(dfa, previousD, t) { const reach = this.computeReachSet(previousD.configs, t, false); if(reach===null) { this.addDFAEdge(dfa, previousD, t, ATNSimulator.ERROR); return ATNSimulator.ERROR; } // create new target state; we'll add to DFA after it's complete let D = new DFAState(null, reach); const predictedAlt = this.getUniqueAlt(reach); if (this.debug) { const altSubSets = PredictionMode.getConflictingAltSubsets(reach); console.log("SLL altSubSets=" + arrayToString(altSubSets) + /*", previous=" + previousD.configs + */ ", configs=" + reach + ", predict=" + predictedAlt + ", allSubsetsConflict=" + PredictionMode.allSubsetsConflict(altSubSets) + ", conflictingAlts=" + this.getConflictingAlts(reach)); } if (predictedAlt!==ATN.INVALID_ALT_NUMBER) { // NO CONFLICT, UNIQUELY PREDICTED ALT D.isAcceptState = true; D.configs.uniqueAlt = predictedAlt; D.prediction = predictedAlt; } else if (PredictionMode.hasSLLConflictTerminatingPrediction(this.predictionMode, reach)) { // MORE THAN ONE VIABLE ALTERNATIVE D.configs.conflictingAlts = this.getConflictingAlts(reach); D.requiresFullContext = true; // in SLL-only mode, we will stop at this state and return the minimum alt D.isAcceptState = true; D.prediction = D.configs.conflictingAlts.minValue(); } if (D.isAcceptState && D.configs.hasSemanticContext) { this.predicateDFAState(D, this.atn.getDecisionState(dfa.decision)); if( D.predicates!==null) { D.prediction = ATN.INVALID_ALT_NUMBER; } } // all adds to dfa are done after we've created full D state D = this.addDFAEdge(dfa, previousD, t, D); return D; } predicateDFAState(dfaState, decisionState) { // We need to test all predicates, even in DFA states that // uniquely predict alternative. const nalts = decisionState.transitions.length; // Update DFA so reach becomes accept state with (predicate,alt) // pairs if preds found for conflicting alts const altsToCollectPredsFrom = this.getConflictingAltsOrUniqueAlt(dfaState.configs); const altToPred = this.getPredsForAmbigAlts(altsToCollectPredsFrom, dfaState.configs, nalts); if (altToPred!==null) { dfaState.predicates = this.getPredicatePredictions(altsToCollectPredsFrom, altToPred); dfaState.prediction = ATN.INVALID_ALT_NUMBER; // make sure we use preds } else { // There are preds in configs but they might go away // when OR'd together like {p}? || NONE == NONE. If neither // alt has preds, resolve to min alt dfaState.prediction = altsToCollectPredsFrom.minValue(); } } // comes back with reach.uniqueAlt set to a valid alt execATNWithFullContext(dfa, D, // how far we got before failing over s0, input, startIndex, outerContext) { if (this.debug || this.trace_atn_sim) { console.log("execATNWithFullContext "+s0); } const fullCtx = true; let foundExactAmbig = false; let reach; let previous = s0; input.seek(startIndex); let t = input.LA(1); let predictedAlt = -1; for (;;) { // while more work reach = this.computeReachSet(previous, t, fullCtx); if (reach===null) { // if any configs in previous dipped into outer context, that // means that input up to t actually finished entry rule // at least for LL decision. Full LL doesn't dip into outer // so don't need special case. // We will get an error no matter what so delay until after // decision; better error message. Also, no reachable target // ATN states in SLL implies LL will also get nowhere. // If conflict in states that dip out, choose min since we // will get error no matter what. const e = this.noViableAlt(input, outerContext, previous, startIndex); input.seek(startIndex); const alt = this.getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(previous, outerContext); if(alt!==ATN.INVALID_ALT_NUMBER) { return alt; } else { throw e; } } const altSubSets = PredictionMode.getConflictingAltSubsets(reach); if(this.debug) { console.log("LL altSubSets=" + altSubSets + ", predict=" + PredictionMode.getUniqueAlt(altSubSets) + ", resolvesToJustOneViableAlt=" + PredictionMode.resolvesToJustOneViableAlt(altSubSets)); } reach.uniqueAlt = this.getUniqueAlt(reach); // unique prediction? if(reach.uniqueAlt!==ATN.INVALID_ALT_NUMBER) { predictedAlt = reach.uniqueAlt; break; } else if (this.predictionMode !== PredictionMode.LL_EXACT_AMBIG_DETECTION) { predictedAlt = PredictionMode.resolvesToJustOneViableAlt(altSubSets); if(predictedAlt !== ATN.INVALID_ALT_NUMBER) { break; } } else { // In exact ambiguity mode, we never try to terminate early. // Just keeps scarfing until we know what the conflict is if (PredictionMode.allSubsetsConflict(altSubSets) && PredictionMode.allSubsetsEqual(altSubSets)) { foundExactAmbig = true; predictedAlt = PredictionMode.getSingleViableAlt(altSubSets); break; } // else there are multiple non-conflicting subsets or // we're not sure what the ambiguity is yet. // So, keep going. } previous = reach; if( t !== Token.EOF) { input.consume(); t = input.LA(1); } } // If the configuration set uniquely predicts an alternative, // without conflict, then we know that it's a full LL decision // not SLL. if (reach.uniqueAlt !== ATN.INVALID_ALT_NUMBER ) { this.reportContextSensitivity(dfa, predictedAlt, reach, startIndex, input.index); return predictedAlt; } // We do not check predicates here because we have checked them // on-the-fly when doing full context prediction. // // In non-exact ambiguity detection mode, we might actually be able to // detect an exact ambiguity, but I'm not going to spend the cycles // needed to check. We only emit ambiguity warnings in exact ambiguity // mode. // // For example, we might know that we have conflicting configurations. // But, that does not mean that there is no way forward without a // conflict. It's possible to have nonconflicting alt subsets as in: // altSubSets=[{1, 2}, {1, 2}, {1}, {1, 2}] // from // // [(17,1,[5 $]), (13,1,[5 10 $]), (21,1,[5 10 $]), (11,1,[$]), // (13,2,[5 10 $]), (21,2,[5 10 $]), (11,2,[$])] // // In this case, (17,1,[5 $]) indicates there is some next sequence that // would resolve this without conflict to alternative 1. Any other viable // next sequence, however, is associated with a conflict. We stop // looking for input because no amount of further lookahead will alter // the fact that we should predict alternative 1. We just can't say for // sure that there is an ambiguity without looking further. this.reportAmbiguity(dfa, D, startIndex, input.index, foundExactAmbig, null, reach); return predictedAlt; } computeReachSet(closure, t, fullCtx) { if (this.debug) { console.log("in computeReachSet, starting closure: " + closure); } if( this.mergeCache===null) { this.mergeCache = new DoubleDict(); } const intermediate = new ATNConfigSet(fullCtx); // Configurations already in a rule stop state indicate reaching the end // of the decision rule (local context) or end of the start rule (full // context). Once reached, these configurations are never updated by a // closure operation, so they are handled separately for the performance // advantage of having a smaller intermediate set when calling closure. // // For full-context reach operations, separate handling is required to // ensure that the alternative matching the longest overall sequence is // chosen when multiple such configurations can match the input. let skippedStopStates = null; // First figure out where we can reach on input t for (let i=0; i<closure.items.length;i++) { const c = closure.items[i]; if(this.debug) { console.log("testing " + this.getTokenName(t) + " at " + c); } if (c.state instanceof RuleStopState) { if (fullCtx || t === Token.EOF) { if (skippedStopStates===null) { skippedStopStates = []; } skippedStopStates.push(c); if(this.debug_add) { console.log("added " + c + " to skippedStopStates"); } } continue; } for(let j=0;j<c.state.transitions.length;j++) { const trans = c.state.transitions[j]; const target = this.getReachableTarget(trans, t); if (target!==null) { const cfg = new ATNConfig({state:target}, c); intermediate.add(cfg, this.mergeCache); if(this.debug_add) { console.log("added " + cfg + " to intermediate"); } } } } // Now figure out where the reach operation can take us... let reach = null; // This block optimizes the reach operation for intermediate sets which // trivially indicate a termination state for the overall // adaptivePredict operation. // // The conditions assume that intermediate // contains all configurations relevant to the reach set, but this // condition is not true when one or more configurations have been // withheld in skippedStopStates, or when the current symbol is EOF. // if (skippedStopStates===null && t!==Token.EOF) { if (intermediate.items.length===1) { // Don't pursue the closure if there is just one state. // It can only have one alternative; just add to result // Also don't pursue the closure if there is unique alternative // among the configurations. reach = intermediate; } else if (this.getUniqueAlt(intermediate)!==ATN.INVALID_ALT_NUMBER) { // Also don't pursue the closure if there is unique alternative // among the configurations. reach = intermediate; } } // If the reach set could not be trivially determined, perform a closure // operation on the intermediate set to compute its initial value. // if (reach===null) { reach = new ATNConfigSet(fullCtx); const closureBusy = new HashSet(); const treatEofAsEpsilon = t === Token.EOF; for (let k=0; k<intermediate.items.length;k++) { this.closure(intermediate.items[k], reach, closureBusy, false, fullCtx, treatEofAsEpsilon); } } if (t === Token.EOF) { // After consuming EOF no additional input is possible, so we are // only interested in configurations which reached the end of the // decision rule (local context) or end of the start rule (full // context). Update reach to contain only these configurations. This // handles both explicit EOF transitions in the grammar and implicit // EOF transitions following the end of the decision or start rule. // // When reach==intermediate, no closure operation was performed. In // this case, removeAllConfigsNotInRuleStopState needs to check for // reachable rule stop states as well as configurations already in // a rule stop state. // // This is handled before the configurations in skippedStopStates, // because any configurations potentially added from that list are // already guaranteed to meet this condition whether or not it's // required. // reach = this.removeAllConfigsNotInRuleStopState(reach, reach === intermediate); } // If skippedStopStates!==null, then it contains at least one // configuration. For full-context reach operations, these // configurations reached the end of the start rule, in which case we // only add them back to reach if no configuration during the current // closure operation reached such a state. This ensures adaptivePredict // chooses an alternative matching the longest overall sequence when // multiple alternatives are viable. // if (skippedStopStates!==null && ( (! fullCtx) || (! PredictionMode.hasConfigInRuleStopState(reach)))) { for (let l=0; l<skippedStopStates.length;l++) { reach.add(skippedStopStates[l], this.mergeCache); } } if ( this.trace_atn_sim ) { console.log("computeReachSet "+closure+" -> "+reach); } if (reach.items.length===0) { return null; } else { return reach; } } /** * Return a configuration set containing only the configurations from * {@code configs} which are in a {@link RuleStopState}. If all * configurations in {@code configs} are already in a rule stop state, this * method simply returns {@code configs}. * * <p>When {@code lookToEndOfRule} is true, this method uses * {@link ATN//nextTokens} for each configuration in {@code configs} which is * not already in a rule stop state to see if a rule stop state is reachable * from the configuration via epsilon-only transitions.</p> * * @param configs the configuration set to update * @param lookToEndOfRule when true, this method checks for rule stop states * reachable by epsilon-only transitions from each configuration in * {@code configs}. * * @return {@code configs} if all configurations in {@code configs} are in a * rule stop state, otherwise return a new configuration set containing only * the configurations from {@code configs} which are in a rule stop state */ removeAllConfigsNotInRuleStopState(configs, lookToEndOfRule) { if (PredictionMode.allConfigsInRuleStopStates(configs)) { return configs; } const result = new ATNConfigSet(configs.fullCtx); for(let i=0; i<configs.items.length;i++) { const config = configs.items[i]; if (config.state instanceof RuleStopState) { result.add(config, this.mergeCache); continue; } if (lookToEndOfRule && config.state.epsilonOnlyTransitions) { const nextTokens = this.atn.nextTokens(config.state); if (nextTokens.contains(Token.EPSILON)) { const endOfRuleState = this.atn.ruleToStopState[config.state.ruleIndex]; result.add(new ATNConfig({state:endOfRuleState}, config), this.mergeCache); } } } return result; } computeStartState(p, ctx, fullCtx) { // always at least the implicit call to start rule const initialContext = predictionContextFromRuleContext(this.atn, ctx); const configs = new ATNConfigSet(fullCtx); if ( this.trace_atn_sim ) { console.log("computeStartState from ATN state " + p + " initialContext=" + initialContext.toString(this.parser)); } for(let i=0;i<p.transitions.length;i++) { const target = p.transitions[i].target; const c = new ATNConfig({ state:target, alt:i+1, context:initialContext }, null); const closureBusy = new HashSet(); this.closure(c, configs, closureBusy, true, fullCtx, false); } return configs; } /** * This method transforms the start state computed by * {@link //computeStartState} to the special start state used by a * precedence DFA for a particular precedence value. The transformation * process applies the following changes to the start state's configuration * set. * * <ol> * <li>Evaluate the precedence predicates for each configuration using * {@link SemanticContext//evalPrecedence}.</li> * <li>Remove all configurations which predict an alternative greater than * 1, for which another configuration that predicts alternative 1 is in the * same ATN state with the same prediction context. This transformation is * valid for the following reasons: * <ul> * <li>The closure block cannot contain any epsilon transitions which bypass * the body of the closure, so all states reachable via alternative 1 are * part of the precedence alternatives of the transformed left-recursive * rule.</li> * <li>The "primary" portion of a left recursive rule cannot contain an * epsilon transition, so the only way an alternative other than 1 can exist * in a state that is also reachable via alternative 1 is by nesting calls * to the left-recursive rule, with the outer calls not being at the * preferred precedence level.</li> * </ul> * </li> * </ol> * * <p> * The prediction context must be considered by this filter to address * situations like the following. * </p> * <code> * <pre> * grammar TA; * prog: statement* EOF; * statement: letterA | statement letterA 'b' ; * letterA: 'a'; * </pre> * </code> * <p> * If the above grammar, the ATN state immediately before the token * reference {@code 'a'} in {@code letterA} is reachable from the left edge * of both the primary and closure blocks of the left-recursive rule * {@code statement}. The prediction context associated with each of these * configurations distinguishes between them, and prevents the alternative * which stepped out to {@code prog} (and then back in to {@code statement} * from being eliminated by the filter. * </p> * * @param configs The configuration set computed by * {@link //computeStartState} as the start state for the DFA. * @return The transformed configuration set representing the start state * for a precedence DFA at a particular precedence level (determined by * calling {@link Parser//getPrecedence}) */ applyPrecedenceFilter(configs) { let config; const statesFromAlt1 = []; const configSet = new ATNConfigSet(configs.fullCtx); for(let i=0; i<configs.items.length; i++) { config = configs.items[i]; // handle alt 1 first if (config.alt !== 1) { continue; } const updatedContext = config.semanticContext.evalPrecedence(this.parser, this._outerContext); if (updatedContext===null) { // the configuration was eliminated continue; } statesFromAlt1[config.state.stateNumber] = config.context; if (updatedContext !== config.semanticContext) { configSet.add(new ATNConfig({semanticContext:updatedContext}, config), this.mergeCache); } else { configSet.add(config, this.mergeCache); } } for(let i=0; i<configs.items.length; i++) { config = configs.items[i]; if (config.alt === 1) { // already handled continue; } // In the future, this elimination step could be updated to also // filter the prediction context for alternatives predicting alt>1 // (basically a graph subtraction algorithm). if (!config.precedenceFilterSuppressed) { const context = statesFromAlt1[config.state.stateNumber] || null; if (context!==null && context.equals(config.context)) { // eliminated continue; } } configSet.add(config, this.mergeCache); } return configSet; } getReachableTarget(trans, ttype) { if (trans.matches(ttype, 0, this.atn.maxTokenType)) { return trans.target; } else { return null; } } getPredsForAmbigAlts(ambigAlts, configs, nalts) { // REACH=[1|1|[]|0:0, 1|2|[]|0:1] // altToPred starts as an array of all null contexts. The entry at index i // corresponds to alternative i. altToPred[i] may have one of three values: // 1. null: no ATNConfig c is found such that c.alt==i // 2. SemanticContext.NONE: At least one ATNConfig c exists such that // c.alt==i and c.semanticContext==SemanticContext.NONE. In other words, // alt i has at least one unpredicated config. // 3. Non-NONE Semantic Context: There exists at least one, and for all // ATNConfig c such that c.alt==i, c.semanticContext!=SemanticContext.NONE. // // From this, it is clear that NONE||anything==NONE. // let altToPred = []; for(let i=0;i<configs.items.length;i++) { const c = configs.items[i]; if(ambigAlts.has( c.alt )) { altToPred[c.alt] = SemanticContext.orContext(altToPred[c.alt] || null, c.semanticContext); } } let nPredAlts = 0; for (let i =1;i< nalts+1;i++) { const pred = altToPred[i] || null; if (pred===null) { altToPred[i] = SemanticContext.NONE; } else if (pred !== SemanticContext.NONE) { nPredAlts += 1; } } // nonambig alts are null in altToPred if (nPredAlts===0) { altToPred = null; } if (this.debug) { console.log("getPredsForAmbigAlts result " + arrayToString(altToPred)); } return altToPred; } getPredicatePredictions(ambigAlts, altToPred) { const pairs = []; let containsPredicate = false; for (let i=1; i<altToPred.length;i++) { const pred = altToPred[i]; // unpredicated is indicated by SemanticContext.NONE if( ambigAlts!==null && ambigAlts.has( i )) { pairs.push(new PredPrediction(pred, i)); } if (pred !== SemanticContext.NONE) { containsPredicate = true; } } if (! containsPredicate) { return null; } return pairs; } /** * This method is used to improve the localization of error messages by * choosing an alternative rather than throwing a * {@link NoViableAltException} in particular prediction scenarios where the * {@link //ERROR} state was reached during ATN simulation. * * <p> * The default implementation of this method uses the following * algorithm to identify an ATN configuration which successfully parsed the * decision entry rule. Choosing such an alternative ensures that the * {@link ParserRuleContext} returned by the calling rule will be complete * and valid, and the syntax error will be reported later at a more * localized location.</p> * * <ul> * <li>If a syntactically valid path or paths reach the end of the decision rule and * they are semantically valid if predicated, return the min associated alt.</li> * <li>Else, if a semantically invalid but syntactically valid path exist * or paths exist, return the minimum associated alt. * </li> * <li>Otherwise, return {@link ATN//INVALID_ALT_NUMBER}.</li> * </ul> * * <p> * In some scenarios, the algori