antlr4-runtime
Version:
JavaScript runtime for ANTLR4
644 lines (600 loc) • 27.1 kB
JavaScript
/* Copyright (c) 2012-2022 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
import Token from '../Token.js';
import Lexer from './../Lexer.js';
import ATN from './ATN.js';
import ATNSimulator from './ATNSimulator.js';
import DFAState from '../dfa/DFAState.js';
import OrderedATNConfigSet from './OrderedATNConfigSet.js';
import PredictionContext from '../context/PredictionContext.js';
import SingletonPredictionContext from '../context/SingletonPredictionContext.js';
import RuleStopState from '../state/RuleStopState.js';
import LexerATNConfig from './LexerATNConfig.js';
import Transition from '../transition/Transition.js';
import LexerActionExecutor from './LexerActionExecutor.js';
import LexerNoViableAltException from '../error/LexerNoViableAltException.js';
function resetSimState(sim) {
sim.index = -1;
sim.line = 0;
sim.column = -1;
sim.dfaState = null;
}
class SimState {
constructor() {
resetSimState(this);
}
reset() {
resetSimState(this);
}
}
export default class LexerATNSimulator extends ATNSimulator {
/**
* When we hit an accept state in either the DFA or the ATN, we
* have to notify the character stream to start buffering characters
* via {@link IntStream//mark} and record the current state. The current sim state
* includes the current index into the input, the current line,
* and current character position in that line. Note that the Lexer is
* tracking the starting line and characterization of the token. These
* variables track the "state" of the simulator when it hits an accept state.
*
* <p>We track these variables separately for the DFA and ATN simulation
* because the DFA simulation often has to fail over to the ATN
* simulation. If the ATN simulation fails, we need the DFA to fall
* back to its previously accepted state, if any. If the ATN succeeds,
* then the ATN does the accept and the DFA simulator that invoked it
* can simply return the predicted token type.</p>
*/
constructor(recog, atn, decisionToDFA, sharedContextCache) {
super(atn, sharedContextCache);
this.decisionToDFA = decisionToDFA;
this.recog = recog;
/**
* The current token's starting index into the character stream.
* Shared across DFA to ATN simulation in case the ATN fails and the
* DFA did not have a previous accept state. In this case, we use the
* ATN-generated exception object
*/
this.startIndex = -1;
// line number 1..n within the input///
this.line = 1;
/**
* The index of the character relative to the beginning of the line
* 0..n-1
*/
this.column = 0;
this.mode = Lexer.DEFAULT_MODE;
/**
* Used during DFA/ATN exec to record the most recent accept configuration
* info
*/
this.prevAccept = new SimState();
}
copyState(simulator) {
this.column = simulator.column;
this.line = simulator.line;
this.mode = simulator.mode;
this.startIndex = simulator.startIndex;
}
match(input, mode) {
this.mode = mode;
const mark = input.mark();
try {
this.startIndex = input.index;
this.prevAccept.reset();
const dfa = this.decisionToDFA[mode];
if (dfa.s0 === null) {
return this.matchATN(input);
} else {
return this.execATN(input, dfa.s0);
}
} finally {
input.release(mark);
}
}
reset() {
this.prevAccept.reset();
this.startIndex = -1;
this.line = 1;
this.column = 0;
this.mode = Lexer.DEFAULT_MODE;
}
matchATN(input) {
const startState = this.atn.modeToStartState[this.mode];
if (LexerATNSimulator.debug) {
console.log("matchATN mode " + this.mode + " start: " + startState);
}
const old_mode = this.mode;
const s0_closure = this.computeStartState(input, startState);
const suppressEdge = s0_closure.hasSemanticContext;
s0_closure.hasSemanticContext = false;
const next = this.addDFAState(s0_closure);
if (!suppressEdge) {
this.decisionToDFA[this.mode].s0 = next;
}
const predict = this.execATN(input, next);
if (LexerATNSimulator.debug) {
console.log("DFA after matchATN: " + this.decisionToDFA[old_mode].toLexerString());
}
return predict;
}
execATN(input, ds0) {
if (LexerATNSimulator.debug) {
console.log("start state closure=" + ds0.configs);
}
if (ds0.isAcceptState) {
// allow zero-length tokens
this.captureSimState(this.prevAccept, input, ds0);
}
let t = input.LA(1);
let s = ds0; // s is current/from DFA state
for (; ;) { // while more work
if (LexerATNSimulator.debug) {
console.log("execATN loop starting closure: " + s.configs);
}
/**
* As we move src->trg, src->trg, we keep track of the previous trg to
* avoid looking up the DFA state again, which is expensive.
* If the previous target was already part of the DFA, we might
* be able to avoid doing a reach operation upon t. If s!=null,
* it means that semantic predicates didn't prevent us from
* creating a DFA state. Once we know s!=null, we check to see if
* the DFA state has an edge already for t. If so, we can just reuse
* it's configuration set; there's no point in re-computing it.
* This is kind of like doing DFA simulation within the ATN
* simulation because DFA simulation is really just a way to avoid
* computing reach/closure sets. Technically, once we know that
* we have a previously added DFA state, we could jump over to
* the DFA simulator. But, that would mean popping back and forth
* a lot and making things more complicated algorithmically.
* This optimization makes a lot of sense for loops within DFA.
* A character will take us back to an existing DFA state
* that already has lots of edges out of it. e.g., .* in comments.
* print("Target for:" + str(s) + " and:" + str(t))
*/
let target = this.getExistingTargetState(s, t);
// print("Existing:" + str(target))
if (target === null) {
target = this.computeTargetState(input, s, t);
// print("Computed:" + str(target))
}
if (target === ATNSimulator.ERROR) {
break;
}
// If this is a consumable input element, make sure to consume before
// capturing the accept state so the input index, line, and char
// position accurately reflect the state of the interpreter at the
// end of the token.
if (t !== Token.EOF) {
this.consume(input);
}
if (target.isAcceptState) {
this.captureSimState(this.prevAccept, input, target);
if (t === Token.EOF) {
break;
}
}
t = input.LA(1);
s = target; // flip; current DFA target becomes new src/from state
}
return this.failOrAccept(this.prevAccept, input, s.configs, t);
}
/**
* Get an existing target state for an edge in the DFA. If the target state
* for the edge has not yet been computed or is otherwise not available,
* this method returns {@code null}.
*
* @param s The current DFA state
* @param t The next input symbol
* @return The existing target DFA state for the given input symbol
* {@code t}, or {@code null} if the target state for this edge is not
* already cached
*/
getExistingTargetState(s, t) {
if (s.edges === null || t < LexerATNSimulator.MIN_DFA_EDGE || t > LexerATNSimulator.MAX_DFA_EDGE) {
return null;
}
let target = s.edges[t - LexerATNSimulator.MIN_DFA_EDGE];
if (target === undefined) {
target = null;
}
if (LexerATNSimulator.debug && target !== null) {
console.log("reuse state " + s.stateNumber + " edge to " + target.stateNumber);
}
return target;
}
/**
* Compute a target state for an edge in the DFA, and attempt to add the
* computed state and corresponding edge to the DFA.
*
* @param input The input stream
* @param s The current DFA state
* @param t The next input symbol
*
* @return The computed target DFA state for the given input symbol
* {@code t}. If {@code t} does not lead to a valid DFA state, this method
* returns {@link //ERROR}.
*/
computeTargetState(input, s, t) {
const reach = new OrderedATNConfigSet();
// if we don't find an existing DFA state
// Fill reach starting from closure, following t transitions
this.getReachableConfigSet(input, s.configs, reach, t);
if (reach.items.length === 0) { // we got nowhere on t from s
if (!reach.hasSemanticContext) {
// we got nowhere on t, don't throw out this knowledge; it'd
// cause a failover from DFA later.
this.addDFAEdge(s, t, ATNSimulator.ERROR);
}
// stop when we can't match any more char
return ATNSimulator.ERROR;
}
// Add an edge from s to target DFA found/created for reach
return this.addDFAEdge(s, t, null, reach);
}
failOrAccept(prevAccept, input, reach, t) {
if (this.prevAccept.dfaState !== null) {
const lexerActionExecutor = prevAccept.dfaState.lexerActionExecutor;
this.accept(input, lexerActionExecutor, this.startIndex,
prevAccept.index, prevAccept.line, prevAccept.column);
return prevAccept.dfaState.prediction;
} else {
// if no accept and EOF is first char, return EOF
if (t === Token.EOF && input.index === this.startIndex) {
return Token.EOF;
}
throw new LexerNoViableAltException(this.recog, input, this.startIndex, reach);
}
}
/**
* Given a starting configuration set, figure out all ATN configurations
* we can reach upon input {@code t}. Parameter {@code reach} is a return
* parameter.
*/
getReachableConfigSet(input, closure, reach, t) {
// this is used to skip processing for configs which have a lower priority
// than a config that already reached an accept state for the same rule
let skipAlt = ATN.INVALID_ALT_NUMBER;
for (let i = 0; i < closure.items.length; i++) {
const cfg = closure.items[i];
const currentAltReachedAcceptState = (cfg.alt === skipAlt);
if (currentAltReachedAcceptState && cfg.passedThroughNonGreedyDecision) {
continue;
}
if (LexerATNSimulator.debug) {
console.log("testing %s at %s\n", this.getTokenName(t), cfg
.toString(this.recog, true));
}
for (let j = 0; j < cfg.state.transitions.length; j++) {
const trans = cfg.state.transitions[j]; // for each transition
const target = this.getReachableTarget(trans, t);
if (target !== null) {
let lexerActionExecutor = cfg.lexerActionExecutor;
if (lexerActionExecutor !== null) {
lexerActionExecutor = lexerActionExecutor.fixOffsetBeforeMatch(input.index - this.startIndex);
}
const treatEofAsEpsilon = (t === Token.EOF);
const config = new LexerATNConfig({state: target, lexerActionExecutor: lexerActionExecutor}, cfg);
if (this.closure(input, config, reach,
currentAltReachedAcceptState, true, treatEofAsEpsilon)) {
// any remaining configs for this alt have a lower priority
// than the one that just reached an accept state.
skipAlt = cfg.alt;
}
}
}
}
}
accept(input, lexerActionExecutor, startIndex, index, line, charPos) {
if (LexerATNSimulator.debug) {
console.log("ACTION %s\n", lexerActionExecutor);
}
// seek to after last char in token
input.seek(index);
this.line = line;
this.column = charPos;
if (lexerActionExecutor !== null && this.recog !== null) {
lexerActionExecutor.execute(this.recog, input, startIndex);
}
}
getReachableTarget(trans, t) {
if (trans.matches(t, 0, Lexer.MAX_CHAR_VALUE)) {
return trans.target;
} else {
return null;
}
}
computeStartState(input, p) {
const initialContext = PredictionContext.EMPTY;
const configs = new OrderedATNConfigSet();
for (let i = 0; i < p.transitions.length; i++) {
const target = p.transitions[i].target;
const cfg = new LexerATNConfig({state: target, alt: i + 1, context: initialContext}, null);
this.closure(input, cfg, configs, false, false, false);
}
return configs;
}
/**
* Since the alternatives within any lexer decision are ordered by
* preference, this method stops pursuing the closure as soon as an accept
* state is reached. After the first accept state is reached by depth-first
* search from {@code config}, all other (potentially reachable) states for
* this rule would have a lower priority.
*
* @return {Boolean} {@code true} if an accept state is reached, otherwise
* {@code false}.
*/
closure(input, config, configs,
currentAltReachedAcceptState, speculative, treatEofAsEpsilon) {
let cfg = null;
if (LexerATNSimulator.debug) {
console.log("closure(" + config.toString(this.recog, true) + ")");
}
if (config.state instanceof RuleStopState) {
if (LexerATNSimulator.debug) {
if (this.recog !== null) {
console.log("closure at %s rule stop %s\n", this.recog.ruleNames[config.state.ruleIndex], config);
} else {
console.log("closure at rule stop %s\n", config);
}
}
if (config.context === null || config.context.hasEmptyPath()) {
if (config.context === null || config.context.isEmpty()) {
configs.add(config);
return true;
} else {
configs.add(new LexerATNConfig({state: config.state, context: PredictionContext.EMPTY}, config));
currentAltReachedAcceptState = true;
}
}
if (config.context !== null && !config.context.isEmpty()) {
for (let i = 0; i < config.context.length; i++) {
if (config.context.getReturnState(i) !== PredictionContext.EMPTY_RETURN_STATE) {
const newContext = config.context.getParent(i); // "pop" return state
const returnState = this.atn.states[config.context.getReturnState(i)];
cfg = new LexerATNConfig({state: returnState, context: newContext}, config);
currentAltReachedAcceptState = this.closure(input, cfg,
configs, currentAltReachedAcceptState, speculative,
treatEofAsEpsilon);
}
}
}
return currentAltReachedAcceptState;
}
// optimization
if (!config.state.epsilonOnlyTransitions) {
if (!currentAltReachedAcceptState || !config.passedThroughNonGreedyDecision) {
configs.add(config);
}
}
for (let j = 0; j < config.state.transitions.length; j++) {
const trans = config.state.transitions[j];
cfg = this.getEpsilonTarget(input, config, trans, configs, speculative, treatEofAsEpsilon);
if (cfg !== null) {
currentAltReachedAcceptState = this.closure(input, cfg, configs,
currentAltReachedAcceptState, speculative, treatEofAsEpsilon);
}
}
return currentAltReachedAcceptState;
}
// side-effect: can alter configs.hasSemanticContext
getEpsilonTarget(input, config, trans,
configs, speculative, treatEofAsEpsilon) {
let cfg = null;
if (trans.serializationType === Transition.RULE) {
const newContext = SingletonPredictionContext.create(config.context, trans.followState.stateNumber);
cfg = new LexerATNConfig({state: trans.target, context: newContext}, config);
} else if (trans.serializationType === Transition.PRECEDENCE) {
throw "Precedence predicates are not supported in lexers.";
} else if (trans.serializationType === Transition.PREDICATE) {
// Track traversing semantic predicates. If we traverse,
// we cannot add a DFA state for this "reach" computation
// because the DFA would not test the predicate again in the
// future. Rather than creating collections of semantic predicates
// like v3 and testing them on prediction, v4 will test them on the
// fly all the time using the ATN not the DFA. This is slower but
// semantically it's not used that often. One of the key elements to
// this predicate mechanism is not adding DFA states that see
// predicates immediately afterwards in the ATN. For example,
// a : ID {p1}? | ID {p2}? ;
// should create the start state for rule 'a' (to save start state
// competition), but should not create target of ID state. The
// collection of ATN states the following ID references includes
// states reached by traversing predicates. Since this is when we
// test them, we cannot cash the DFA state target of ID.
if (LexerATNSimulator.debug) {
console.log("EVAL rule " + trans.ruleIndex + ":" + trans.predIndex);
}
configs.hasSemanticContext = true;
if (this.evaluatePredicate(input, trans.ruleIndex, trans.predIndex, speculative)) {
cfg = new LexerATNConfig({state: trans.target}, config);
}
} else if (trans.serializationType === Transition.ACTION) {
if (config.context === null || config.context.hasEmptyPath()) {
// execute actions anywhere in the start rule for a token.
//
// TODO: if the entry rule is invoked recursively, some
// actions may be executed during the recursive call. The
// problem can appear when hasEmptyPath() is true but
// isEmpty() is false. In this case, the config needs to be
// split into two contexts - one with just the empty path
// and another with everything but the empty path.
// Unfortunately, the current algorithm does not allow
// getEpsilonTarget to return two configurations, so
// additional modifications are needed before we can support
// the split operation.
const lexerActionExecutor = LexerActionExecutor.append(config.lexerActionExecutor,
this.atn.lexerActions[trans.actionIndex]);
cfg = new LexerATNConfig({state: trans.target, lexerActionExecutor: lexerActionExecutor}, config);
} else {
// ignore actions in referenced rules
cfg = new LexerATNConfig({state: trans.target}, config);
}
} else if (trans.serializationType === Transition.EPSILON) {
cfg = new LexerATNConfig({state: trans.target}, config);
} else if (trans.serializationType === Transition.ATOM ||
trans.serializationType === Transition.RANGE ||
trans.serializationType === Transition.SET) {
if (treatEofAsEpsilon) {
if (trans.matches(Token.EOF, 0, Lexer.MAX_CHAR_VALUE)) {
cfg = new LexerATNConfig({state: trans.target}, config);
}
}
}
return cfg;
}
/**
* Evaluate a predicate specified in the lexer.
*
* <p>If {@code speculative} is {@code true}, this method was called before
* {@link //consume} for the matched character. This method should call
* {@link //consume} before evaluating the predicate to ensure position
* sensitive values, including {@link Lexer//getText}, {@link Lexer//getLine},
* and {@link Lexer//getcolumn}, properly reflect the current
* lexer state. This method should restore {@code input} and the simulator
* to the original state before returning (i.e. undo the actions made by the
* call to {@link //consume}.</p>
*
* @param input The input stream.
* @param ruleIndex The rule containing the predicate.
* @param predIndex The index of the predicate within the rule.
* @param speculative {@code true} if the current index in {@code input} is
* one character before the predicate's location.
*
* @return {@code true} if the specified predicate evaluates to
* {@code true}.
*/
evaluatePredicate(input, ruleIndex,
predIndex, speculative) {
// assume true if no recognizer was provided
if (this.recog === null) {
return true;
}
if (!speculative) {
return this.recog.sempred(null, ruleIndex, predIndex);
}
const savedcolumn = this.column;
const savedLine = this.line;
const index = input.index;
const marker = input.mark();
try {
this.consume(input);
return this.recog.sempred(null, ruleIndex, predIndex);
} finally {
this.column = savedcolumn;
this.line = savedLine;
input.seek(index);
input.release(marker);
}
}
captureSimState(settings, input, dfaState) {
settings.index = input.index;
settings.line = this.line;
settings.column = this.column;
settings.dfaState = dfaState;
}
addDFAEdge(from_, tk, to, cfgs) {
if (to === undefined) {
to = null;
}
if (cfgs === undefined) {
cfgs = null;
}
if (to === null && cfgs !== null) {
// leading to this call, ATNConfigSet.hasSemanticContext is used as a
// marker indicating dynamic predicate evaluation makes this edge
// dependent on the specific input sequence, so the static edge in the
// DFA should be omitted. The target DFAState is still created since
// execATN has the ability to resynchronize with the DFA state cache
// following the predicate evaluation step.
//
// TJP notes: next time through the DFA, we see a pred again and eval.
// If that gets us to a previously created (but dangling) DFA
// state, we can continue in pure DFA mode from there.
// /
const suppressEdge = cfgs.hasSemanticContext;
cfgs.hasSemanticContext = false;
to = this.addDFAState(cfgs);
if (suppressEdge) {
return to;
}
}
// add the edge
if (tk < LexerATNSimulator.MIN_DFA_EDGE || tk > LexerATNSimulator.MAX_DFA_EDGE) {
// Only track edges within the DFA bounds
return to;
}
if (LexerATNSimulator.debug) {
console.log("EDGE " + from_ + " -> " + to + " upon " + tk);
}
if (from_.edges === null) {
// make room for tokens 1..n and -1 masquerading as index 0
from_.edges = [];
}
from_.edges[tk - LexerATNSimulator.MIN_DFA_EDGE] = to; // connect
return to;
}
/**
* Add a new DFA state if there isn't one with this set of
* configurations already. This method also detects the first
* configuration containing an ATN rule stop state. Later, when
* traversing the DFA, we will know which rule to accept.
*/
addDFAState(configs) {
const proposed = new DFAState(null, configs);
let firstConfigWithRuleStopState = null;
for (let i = 0; i < configs.items.length; i++) {
const cfg = configs.items[i];
if (cfg.state instanceof RuleStopState) {
firstConfigWithRuleStopState = cfg;
break;
}
}
if (firstConfigWithRuleStopState !== null) {
proposed.isAcceptState = true;
proposed.lexerActionExecutor = firstConfigWithRuleStopState.lexerActionExecutor;
proposed.prediction = this.atn.ruleToTokenType[firstConfigWithRuleStopState.state.ruleIndex];
}
const dfa = this.decisionToDFA[this.mode];
const existing = dfa.states.get(proposed);
if (existing !== null) {
return existing;
}
const newState = proposed;
newState.stateNumber = dfa.states.length;
configs.setReadonly(true);
newState.configs = configs;
dfa.states.add(newState);
return newState;
}
getDFA(mode) {
return this.decisionToDFA[mode];
}
// Get the text matched so far for the current token.
getText(input) {
// index is first lookahead char, don't include.
return input.getText(this.startIndex, input.index - 1);
}
consume(input) {
const curChar = input.LA(1);
if (curChar === "\n".charCodeAt(0)) {
this.line += 1;
this.column = 0;
} else {
this.column += 1;
}
input.consume();
}
getTokenName(tt) {
if (tt === -1) {
return "EOF";
} else {
return "'" + String.fromCharCode(tt) + "'";
}
}
}
LexerATNSimulator.debug = false;
LexerATNSimulator.dfa_debug = false;
LexerATNSimulator.MIN_DFA_EDGE = 0;
LexerATNSimulator.MAX_DFA_EDGE = 127; // forces unicode to stay in ATN