antlr-ng
Version:
Next generation ANTLR Tool
619 lines (618 loc) • 19.5 kB
JavaScript
var __defProp = Object.defineProperty;
var __name = (target, value) => __defProp(target, "name", { value, configurable: true });
import {
ATN,
ATNState,
ActionTransition,
AtomTransition,
BasicBlockStartState,
BasicState,
BlockEndState,
EpsilonTransition,
IntervalSet,
LL1Analyzer,
LoopEndState,
NotSetTransition,
PlusBlockStartState,
PlusLoopbackState,
PrecedencePredicateTransition,
PredicateTransition,
RuleStartState,
RuleStopState,
RuleTransition,
SetTransition,
StarBlockStartState,
StarLoopEntryState,
StarLoopbackState,
Token,
WildcardTransition
} from "antlr4ng";
import { ClassFactory } from "../ClassFactory.js";
import { Constants } from "../Constants.js";
import { ANTLRv4Parser } from "../generated/ANTLRv4Parser.js";
import { UseDefAnalyzer } from "../semantics/UseDefAnalyzer.js";
import { ActionAST } from "../tool/ast/ActionAST.js";
import { AltAST } from "../tool/ast/AltAST.js";
import { IssueCode } from "../tool/Issues.js";
import { LeftRecursiveRule } from "../tool/LeftRecursiveRule.js";
import { LexerGrammar } from "../tool/LexerGrammar.js";
import { CommonTreeNodeStream } from "../tree/CommonTreeNodeStream.js";
import { ATNBuilder } from "../tree/walkers/ATNBuilder.js";
import { ATNOptimizer } from "./ATNOptimizer.js";
import { TailEpsilonRemover } from "./TailEpsilonRemover.js";
class ParserATNFactory {
static {
__name(this, "ParserATNFactory");
}
currentRule;
currentOuterAlt;
g;
atn;
preventEpsilonClosureBlocks = new Array();
preventEpsilonOptionalBlocks = new Array();
constructor(g) {
this.g = g;
const atnType = g instanceof LexerGrammar ? ATN.LEXER : ATN.PARSER;
const maxTokenType = g.getMaxTokenType();
this.atn = new ATN(atnType, maxTokenType);
}
/**
* `(BLOCK (ALT .))` or `(BLOCK (ALT 'a') (ALT .))`.
*/
static blockHasWildcardAlt(block) {
for (const alt of block.children) {
if (!(alt instanceof AltAST)) {
continue;
}
const altAST = alt;
if (altAST.children.length === 1 || altAST.children.length === 2 && altAST.children[0].getType() === ANTLRv4Parser.ELEMENT_OPTIONS) {
const e = altAST.children[altAST.children.length - 1];
if (e.getType() === ANTLRv4Parser.WILDCARD) {
return true;
}
}
}
return false;
}
createATN() {
this.doCreateATN(Array.from(this.g.rules.values()));
this.addRuleFollowLinks();
this.addEOFTransitionToStartRules();
ATNOptimizer.optimize(this.g, this.atn);
this.checkEpsilonClosure();
optionalCheck:
for (const [rule, atnState1, atnState2] of this.preventEpsilonOptionalBlocks) {
let bypassCount = 0;
for (const transition of atnState1.transitions) {
const startState = transition.target;
if (startState === atnState2) {
bypassCount++;
continue;
}
const analyzer = new LL1Analyzer(this.atn);
if (analyzer.look(startState, atnState2).contains(Token.EPSILON)) {
this.g.tool.errorManager.grammarError(
IssueCode.EpsilonOptional,
this.g.fileName,
rule.ast.children[0].token,
rule.name
);
continue optionalCheck;
}
}
if (bypassCount !== 1) {
throw new Error("Expected optional block with exactly 1 bypass alternative.");
}
}
return this.atn;
}
setCurrentRuleName(name) {
this.currentRule = this.g.getRule(name) ?? void 0;
}
/** From label `A` build graph `o-A->o`. */
tokenRef(node) {
const left = this.newState(BasicState);
const right = this.newState(BasicState);
const ttype = this.g.getTokenType(node.getText());
left.addTransition(new AtomTransition(right, ttype));
node.atnState = left;
return { left, right };
}
/**
* From set build single edge graph `o->o-set->o`. To conform to what an alt block looks like, must have extra
* state on left. This also handles `~A`, converted to `~{A}` set.
*/
set(associatedAST, terminals, invert) {
const left = this.newState(BasicState);
const right = this.newState(BasicState);
const set = new IntervalSet();
for (const t of terminals) {
const ttype = this.g.getTokenType(t.getText());
set.addOne(ttype);
}
if (invert) {
left.addTransition(new NotSetTransition(right, set));
} else {
left.addTransition(new SetTransition(right, set));
}
associatedAST.atnState = left;
return { left, right };
}
/** Not valid for non-lexers. */
range(a, b) {
this.g.tool.errorManager.grammarError(
IssueCode.TplemjRangeInParser,
this.g.fileName,
a.token,
a.token?.text,
b.token?.text
);
return this.tokenRef(a);
}
/** For a non-lexer, just build a simple token reference atom. */
stringLiteral(stringLiteralAST) {
return this.tokenRef(stringLiteralAST);
}
/** `[Aa]` char sets not allowed in parser */
charSetLiteral(charSetAST) {
return null;
}
/**
* For reference to rule `r`, build
*
* ```
* o->(r) o
* ```
*
* where `(r)` is the start of rule `r` and the trailing `o` is not linked to from rule ref state directly (uses
* {@see RuleTransition.followState}).
*/
ruleRef(node) {
const h = this._ruleRef(node);
return h;
}
epsilon(...args) {
if (args.length === 1) {
const [node] = args;
const left = this.newState(BasicState);
const right = this.newState(BasicState);
this.epsilon(left, right);
node.atnState = left;
return { left, right };
}
const [a, b, prepend] = args;
if (a !== null) {
const index = prepend ? 0 : a.transitions.length;
a.addTransitionAtIndex(index, new EpsilonTransition(b));
}
return void 0;
}
/**
* Build what amounts to an epsilon transition with a semantic predicate action. The `pred` is a pointer
* into the AST of the {@link ANTLRParser#SEMPRED} token.
*/
sempred(pred) {
const left = this.newState(BasicState);
const right = this.newState(BasicState);
let p;
if (pred.getOptionString(Constants.PrecedenceOptionName)) {
const precedence = Number.parseInt(pred.getOptionString(Constants.PrecedenceOptionName) ?? "0");
p = new PrecedencePredicateTransition(right, precedence);
} else {
const isCtxDependent = UseDefAnalyzer.actionIsContextDependent(pred);
p = new PredicateTransition(right, this.currentRule.index, this.g.sempreds.get(pred), isCtxDependent);
}
left.addTransition(p);
pred.atnState = left;
return { left, right };
}
/**
* Build what amounts to an epsilon transition with an action.
* The action goes into ATN though it is ignored during prediction
* if {@see ActionTransition.actionIndex actionIndex} `< 0`.
*/
action(astOrString) {
if (astOrString instanceof ActionAST) {
const left = this.newState(BasicState);
const right = this.newState(BasicState);
const a = new ActionTransition(right, this.currentRule.index, -1, false);
left.addTransition(a);
astOrString.atnState = left;
return { left, right };
}
throw new Error("This element is not valid in parsers.");
}
/**
* From `A|B|..|Z` alternative block build
*
* ```
* o->o-A->o->o (last ATNState is BlockEndState pointed to by all alts)
* | ^
* |->o-B->o--|
* | |
* ... |
* | |
* |->o-Z->o--|
* ```
*
* So start node points at every alternative with epsilon transition and
* every alt right side points at a block end ATNState.
*
* Special case: only one alternative: don't make a block with alt
* begin/end.
*
* Special case: if just a list of tokens/chars/sets, then collapse to a
* single edged o-set->o graph.
*
* TODO: Set alt number (1..n) in the states?
*/
block(blkAST, ebnfRoot, alts) {
if (ebnfRoot === null) {
if (alts.length === 1) {
const h = alts[0];
blkAST.atnState = h.left;
return h;
}
const start = this.newState(BasicBlockStartState);
if (alts.length > 1) {
this.atn.defineDecisionState(start);
}
return this.makeBlock(start, blkAST, alts);
}
switch (ebnfRoot.getType()) {
case ANTLRv4Parser.OPTIONAL: {
const start = this.newState(BasicBlockStartState);
this.atn.defineDecisionState(start);
const h = this.makeBlock(start, blkAST, alts);
return this.optional(ebnfRoot, h);
}
case ANTLRv4Parser.CLOSURE: {
const star = this.newState(StarBlockStartState);
if (alts.length > 1) {
this.atn.defineDecisionState(star);
}
const h = this.makeBlock(star, blkAST, alts);
return this.star(ebnfRoot, h);
}
case ANTLRv4Parser.POSITIVE_CLOSURE: {
const plus = this.newState(PlusBlockStartState);
if (alts.length > 1) {
this.atn.defineDecisionState(plus);
}
const h = this.makeBlock(plus, blkAST, alts);
return this.plus(ebnfRoot, h);
}
default:
}
return void 0;
}
alt(els) {
return this.elemList(els);
}
/** Build an atom with all possible values in its label. */
wildcard(node) {
const left = this.newState(BasicState);
const right = this.newState(BasicState);
left.addTransition(new WildcardTransition(right));
node.atnState = left;
return { left, right };
}
label(t) {
return t;
}
listLabel(t) {
return t;
}
lexerAltCommands(alt, commands) {
throw new Error("This element is not allowed in parsers.");
}
lexerCallCommand(_id, arg) {
throw new Error("This element is not allowed in parsers.");
}
lexerCommand(id) {
throw new Error("This element is not allowed in parsers.");
}
/** start-> rule - block -> end */
rule(ruleAST, name, blk) {
const r = this.g.getRule(name);
const start = this.atn.ruleToStartState[r.index];
this.epsilon(start, blk.left);
const stop = this.atn.ruleToStopState[r.index];
this.epsilon(blk.right, stop);
const h = { left: start, right: stop };
ruleAST.atnState = start;
return h;
}
_ruleRef(node) {
const r = this.g.getRule(node.getText());
if (r === null) {
this.g.tool.errorManager.grammarError(
IssueCode.InternalError,
this.g.fileName,
node.token,
"Rule " + node.getText() + " undefined"
);
return null;
}
const start = this.atn.ruleToStartState[r.index];
const left = this.newState(BasicState);
const right = this.newState(BasicState);
const ast = node;
let precedence = 0;
if (ast.getOptionString(Constants.PrecedenceOptionName)) {
precedence = Number.parseInt(ast.getOptionString(Constants.PrecedenceOptionName) ?? "0");
}
const call = new RuleTransition(start, r.index, precedence, right);
left.addTransition(call);
node.atnState = left;
return { left, right };
}
newState(nodeType) {
try {
const s = new nodeType();
if (!this.currentRule) {
s.ruleIndex = -1;
} else {
s.ruleIndex = this.currentRule.index;
}
this.atn.addState(s);
return s;
} catch (cause) {
const error = new Error(`Could not create ATN state of type ${nodeType.name}.`);
error.cause = cause;
throw error;
}
}
checkEpsilonClosure() {
for (const [rule, blkStart, blkStop] of this.preventEpsilonClosureBlocks) {
const analyzer = new LL1Analyzer(this.atn);
const lookahead = analyzer.look(blkStart, blkStop);
if (lookahead.contains(Token.EPSILON)) {
const errorType = rule instanceof LeftRecursiveRule ? IssueCode.EpsilonLrFollow : IssueCode.EpsilonClosure;
this.g.tool.errorManager.grammarError(
errorType,
this.g.fileName,
rule.ast.children[0].token,
rule.name
);
}
if (lookahead.contains(Token.EOF)) {
this.g.tool.errorManager.grammarError(
IssueCode.EofClosure,
this.g.fileName,
rule.ast.children[0].token,
rule.name
);
}
}
}
doCreateATN(rules) {
this.createRuleStartAndStopATNStates();
for (const r of rules) {
const blk = r.ast.getFirstChildWithType(ANTLRv4Parser.BLOCK);
const nodes = new CommonTreeNodeStream(blk);
const b = new ATNBuilder(this.g.tool.errorManager, nodes, this);
this.setCurrentRuleName(r.name);
const h = b.ruleBlock(null);
if (h) {
this.rule(r.ast, r.name, h);
}
}
}
addFollowLink(ruleIndex, right) {
const stop = this.atn.ruleToStopState[ruleIndex];
this.epsilon(stop, right);
}
elemList(els) {
const n = els.length;
for (let i = 0; i < n - 1; i++) {
const el = els[i];
let tr = null;
if (el.left.transitions.length === 1) {
tr = el.left.transitions[0];
}
const isRuleTrans = tr instanceof RuleTransition;
if (el.left.constructor.stateType === ATNState.BASIC && el.right && el.right.constructor.stateType === ATNState.BASIC && tr !== null && (isRuleTrans && tr.followState === el.right || tr.target === el.right)) {
let handle = null;
if (i + 1 < els.length) {
handle = els[i + 1];
}
if (handle !== null) {
if (isRuleTrans) {
tr.followState = handle.left;
} else {
tr.target = handle.left;
}
}
this.atn.removeState(el.right);
} else {
this.epsilon(el.right, els[i + 1].left);
}
}
const first = els[0];
const last = els[n - 1];
let left = null;
if (first !== null) {
left = first.left;
}
let right = null;
if (last !== null) {
right = last.right;
}
return { left, right };
}
/**
* From `(A)?` build either:
*
* ```
* o--A->o
* | ^
* o---->|
* ```
*
* or, if `A` is a block, just add an empty alt to the end of the block.
*/
optional(optAST, blk) {
const blkStart = blk.left;
const blkEnd = blk.right;
this.preventEpsilonOptionalBlocks.push([this.currentRule, blkStart, blkEnd]);
const greedy = optAST.isGreedy();
blkStart.nonGreedy = !greedy;
this.epsilon(blkStart, blk.right, !greedy);
optAST.atnState = blk.left;
return blk;
}
/**
* From `(blk)+` build
*
* ```
* |---------|
* v |
* [o-blk-o]->o->o
* ```
*
* We add a decision for loop back node to the existing one at `blk` start.
*/
plus(plusAST, blk) {
const blkStart = blk.left;
const blkEnd = blk.right;
this.preventEpsilonClosureBlocks.push([this.currentRule, blkStart, blkEnd]);
const loop = this.newState(PlusLoopbackState);
loop.nonGreedy = !plusAST.isGreedy();
this.atn.defineDecisionState(loop);
const end = this.newState(LoopEndState);
blkStart.loopBackState = loop;
end.loopBackState = loop;
plusAST.atnState = loop;
this.epsilon(blkEnd, loop);
const blkAST = plusAST.children[0];
if (plusAST.isGreedy()) {
if (this.expectNonGreedy(blkAST)) {
this.g.tool.errorManager.grammarError(
IssueCode.ExpectedNonGreedyWildcardBlock,
this.g.fileName,
plusAST.token,
plusAST.token?.text
);
}
this.epsilon(loop, blkStart);
this.epsilon(loop, end);
} else {
this.epsilon(loop, end);
this.epsilon(loop, blkStart);
}
return { left: blkStart, right: end };
}
/**
* From `(blk)*` build `( blk+ )?` with *two* decisions, one for entry and one for choosing alts of `blk`.
*
* ```
* |-------------|
* v |
* o--[o-blk-o]->o o
* | ^
* -----------------|
* ```
*
* Note that the optional bypass must jump outside the loop as `(A|B)*` is not the same thing as `(A|B|)+`.
*/
star(starAST, elem) {
const blkStart = elem.left;
const blkEnd = elem.right;
this.preventEpsilonClosureBlocks.push([this.currentRule, blkStart, blkEnd]);
const entry = this.newState(StarLoopEntryState);
entry.nonGreedy = !starAST.isGreedy();
this.atn.defineDecisionState(entry);
const end = this.newState(LoopEndState);
const loop = this.newState(StarLoopbackState);
entry.loopBackState = loop;
end.loopBackState = loop;
const blkAST = starAST.children[0];
if (starAST.isGreedy()) {
if (this.expectNonGreedy(blkAST)) {
this.g.tool.errorManager.grammarError(
IssueCode.ExpectedNonGreedyWildcardBlock,
this.g.fileName,
starAST.token,
starAST.token?.text
);
}
this.epsilon(entry, blkStart);
this.epsilon(entry, end);
} else {
this.epsilon(entry, end);
this.epsilon(entry, blkStart);
}
this.epsilon(blkEnd, loop);
this.epsilon(loop, entry);
starAST.atnState = entry;
return { left: entry, right: end };
}
addRuleFollowLinks() {
for (const p of this.atn.states) {
if (p !== null && p.constructor.stateType === ATNState.BASIC && p.transitions.length === 1 && p.transitions[0] instanceof RuleTransition) {
const rt = p.transitions[0];
this.addFollowLink(rt.ruleIndex, rt.followState);
}
}
}
/**
* Add an EOF transition to any rule end ATNState that points to nothing (i.e., for all those rules not invoked
* by another rule). These are start symbols then.
*
* Return the number of grammar entry points; i.e., how many rules are not invoked by another rule (they can
* only be invoked from outside). These are the start rules.
*/
addEOFTransitionToStartRules() {
let n = 0;
const eofTarget = this.newState(BasicState);
for (const r of this.g.rules.values()) {
const stop = this.atn.ruleToStopState[r.index];
if (stop.transitions.length > 0) {
continue;
}
++n;
const t = new AtomTransition(eofTarget, Token.EOF);
stop.addTransition(t);
}
return n;
}
expectNonGreedy(blkAST) {
return ParserATNFactory.blockHasWildcardAlt(blkAST);
}
makeBlock(start, blkAST, alts) {
const end = this.newState(BlockEndState);
start.endState = end;
for (const alt of alts) {
this.epsilon(start, alt.left);
this.epsilon(alt.right, end);
const opt = new TailEpsilonRemover(this.atn);
opt.visit(alt.left);
}
blkAST.atnState = start;
return { left: start, right: end };
}
/** Define all the rule begin/end ATNStates to solve forward reference issues. */
createRuleStartAndStopATNStates() {
this.atn.ruleToStartState = new Array(this.g.rules.size);
this.atn.ruleToStopState = new Array(this.g.rules.size);
for (const r of this.g.rules.values()) {
const start = this.newState(RuleStartState);
const stop = this.newState(RuleStopState);
start.stopState = stop;
start.isLeftRecursiveRule = r instanceof LeftRecursiveRule;
start.ruleIndex = r.index;
stop.ruleIndex = r.index;
this.atn.ruleToStartState[r.index] = start;
this.atn.ruleToStopState[r.index] = stop;
}
}
static {
ClassFactory.createParserATNFactory = (g) => {
return new ParserATNFactory(g);
};
}
}
export {
ParserATNFactory
};