UNPKG

antlr-ng

Version:

Next generation ANTLR Tool

619 lines (618 loc) 19.5 kB
var __defProp = Object.defineProperty; var __name = (target, value) => __defProp(target, "name", { value, configurable: true }); import { ATN, ATNState, ActionTransition, AtomTransition, BasicBlockStartState, BasicState, BlockEndState, EpsilonTransition, IntervalSet, LL1Analyzer, LoopEndState, NotSetTransition, PlusBlockStartState, PlusLoopbackState, PrecedencePredicateTransition, PredicateTransition, RuleStartState, RuleStopState, RuleTransition, SetTransition, StarBlockStartState, StarLoopEntryState, StarLoopbackState, Token, WildcardTransition } from "antlr4ng"; import { ClassFactory } from "../ClassFactory.js"; import { Constants } from "../Constants.js"; import { ANTLRv4Parser } from "../generated/ANTLRv4Parser.js"; import { UseDefAnalyzer } from "../semantics/UseDefAnalyzer.js"; import { ActionAST } from "../tool/ast/ActionAST.js"; import { AltAST } from "../tool/ast/AltAST.js"; import { IssueCode } from "../tool/Issues.js"; import { LeftRecursiveRule } from "../tool/LeftRecursiveRule.js"; import { LexerGrammar } from "../tool/LexerGrammar.js"; import { CommonTreeNodeStream } from "../tree/CommonTreeNodeStream.js"; import { ATNBuilder } from "../tree/walkers/ATNBuilder.js"; import { ATNOptimizer } from "./ATNOptimizer.js"; import { TailEpsilonRemover } from "./TailEpsilonRemover.js"; class ParserATNFactory { static { __name(this, "ParserATNFactory"); } currentRule; currentOuterAlt; g; atn; preventEpsilonClosureBlocks = new Array(); preventEpsilonOptionalBlocks = new Array(); constructor(g) { this.g = g; const atnType = g instanceof LexerGrammar ? ATN.LEXER : ATN.PARSER; const maxTokenType = g.getMaxTokenType(); this.atn = new ATN(atnType, maxTokenType); } /** * `(BLOCK (ALT .))` or `(BLOCK (ALT 'a') (ALT .))`. */ static blockHasWildcardAlt(block) { for (const alt of block.children) { if (!(alt instanceof AltAST)) { continue; } const altAST = alt; if (altAST.children.length === 1 || altAST.children.length === 2 && altAST.children[0].getType() === ANTLRv4Parser.ELEMENT_OPTIONS) { const e = altAST.children[altAST.children.length - 1]; if (e.getType() === ANTLRv4Parser.WILDCARD) { return true; } } } return false; } createATN() { this.doCreateATN(Array.from(this.g.rules.values())); this.addRuleFollowLinks(); this.addEOFTransitionToStartRules(); ATNOptimizer.optimize(this.g, this.atn); this.checkEpsilonClosure(); optionalCheck: for (const [rule, atnState1, atnState2] of this.preventEpsilonOptionalBlocks) { let bypassCount = 0; for (const transition of atnState1.transitions) { const startState = transition.target; if (startState === atnState2) { bypassCount++; continue; } const analyzer = new LL1Analyzer(this.atn); if (analyzer.look(startState, atnState2).contains(Token.EPSILON)) { this.g.tool.errorManager.grammarError( IssueCode.EpsilonOptional, this.g.fileName, rule.ast.children[0].token, rule.name ); continue optionalCheck; } } if (bypassCount !== 1) { throw new Error("Expected optional block with exactly 1 bypass alternative."); } } return this.atn; } setCurrentRuleName(name) { this.currentRule = this.g.getRule(name) ?? void 0; } /** From label `A` build graph `o-A->o`. */ tokenRef(node) { const left = this.newState(BasicState); const right = this.newState(BasicState); const ttype = this.g.getTokenType(node.getText()); left.addTransition(new AtomTransition(right, ttype)); node.atnState = left; return { left, right }; } /** * From set build single edge graph `o->o-set->o`. To conform to what an alt block looks like, must have extra * state on left. This also handles `~A`, converted to `~{A}` set. */ set(associatedAST, terminals, invert) { const left = this.newState(BasicState); const right = this.newState(BasicState); const set = new IntervalSet(); for (const t of terminals) { const ttype = this.g.getTokenType(t.getText()); set.addOne(ttype); } if (invert) { left.addTransition(new NotSetTransition(right, set)); } else { left.addTransition(new SetTransition(right, set)); } associatedAST.atnState = left; return { left, right }; } /** Not valid for non-lexers. */ range(a, b) { this.g.tool.errorManager.grammarError( IssueCode.TplemjRangeInParser, this.g.fileName, a.token, a.token?.text, b.token?.text ); return this.tokenRef(a); } /** For a non-lexer, just build a simple token reference atom. */ stringLiteral(stringLiteralAST) { return this.tokenRef(stringLiteralAST); } /** `[Aa]` char sets not allowed in parser */ charSetLiteral(charSetAST) { return null; } /** * For reference to rule `r`, build * * ``` * o->(r) o * ``` * * where `(r)` is the start of rule `r` and the trailing `o` is not linked to from rule ref state directly (uses * {@see RuleTransition.followState}). */ ruleRef(node) { const h = this._ruleRef(node); return h; } epsilon(...args) { if (args.length === 1) { const [node] = args; const left = this.newState(BasicState); const right = this.newState(BasicState); this.epsilon(left, right); node.atnState = left; return { left, right }; } const [a, b, prepend] = args; if (a !== null) { const index = prepend ? 0 : a.transitions.length; a.addTransitionAtIndex(index, new EpsilonTransition(b)); } return void 0; } /** * Build what amounts to an epsilon transition with a semantic predicate action. The `pred` is a pointer * into the AST of the {@link ANTLRParser#SEMPRED} token. */ sempred(pred) { const left = this.newState(BasicState); const right = this.newState(BasicState); let p; if (pred.getOptionString(Constants.PrecedenceOptionName)) { const precedence = Number.parseInt(pred.getOptionString(Constants.PrecedenceOptionName) ?? "0"); p = new PrecedencePredicateTransition(right, precedence); } else { const isCtxDependent = UseDefAnalyzer.actionIsContextDependent(pred); p = new PredicateTransition(right, this.currentRule.index, this.g.sempreds.get(pred), isCtxDependent); } left.addTransition(p); pred.atnState = left; return { left, right }; } /** * Build what amounts to an epsilon transition with an action. * The action goes into ATN though it is ignored during prediction * if {@see ActionTransition.actionIndex actionIndex} `< 0`. */ action(astOrString) { if (astOrString instanceof ActionAST) { const left = this.newState(BasicState); const right = this.newState(BasicState); const a = new ActionTransition(right, this.currentRule.index, -1, false); left.addTransition(a); astOrString.atnState = left; return { left, right }; } throw new Error("This element is not valid in parsers."); } /** * From `A|B|..|Z` alternative block build * * ``` * o->o-A->o->o (last ATNState is BlockEndState pointed to by all alts) * | ^ * |->o-B->o--| * | | * ... | * | | * |->o-Z->o--| * ``` * * So start node points at every alternative with epsilon transition and * every alt right side points at a block end ATNState. * * Special case: only one alternative: don't make a block with alt * begin/end. * * Special case: if just a list of tokens/chars/sets, then collapse to a * single edged o-set->o graph. * * TODO: Set alt number (1..n) in the states? */ block(blkAST, ebnfRoot, alts) { if (ebnfRoot === null) { if (alts.length === 1) { const h = alts[0]; blkAST.atnState = h.left; return h; } const start = this.newState(BasicBlockStartState); if (alts.length > 1) { this.atn.defineDecisionState(start); } return this.makeBlock(start, blkAST, alts); } switch (ebnfRoot.getType()) { case ANTLRv4Parser.OPTIONAL: { const start = this.newState(BasicBlockStartState); this.atn.defineDecisionState(start); const h = this.makeBlock(start, blkAST, alts); return this.optional(ebnfRoot, h); } case ANTLRv4Parser.CLOSURE: { const star = this.newState(StarBlockStartState); if (alts.length > 1) { this.atn.defineDecisionState(star); } const h = this.makeBlock(star, blkAST, alts); return this.star(ebnfRoot, h); } case ANTLRv4Parser.POSITIVE_CLOSURE: { const plus = this.newState(PlusBlockStartState); if (alts.length > 1) { this.atn.defineDecisionState(plus); } const h = this.makeBlock(plus, blkAST, alts); return this.plus(ebnfRoot, h); } default: } return void 0; } alt(els) { return this.elemList(els); } /** Build an atom with all possible values in its label. */ wildcard(node) { const left = this.newState(BasicState); const right = this.newState(BasicState); left.addTransition(new WildcardTransition(right)); node.atnState = left; return { left, right }; } label(t) { return t; } listLabel(t) { return t; } lexerAltCommands(alt, commands) { throw new Error("This element is not allowed in parsers."); } lexerCallCommand(_id, arg) { throw new Error("This element is not allowed in parsers."); } lexerCommand(id) { throw new Error("This element is not allowed in parsers."); } /** start-> rule - block -> end */ rule(ruleAST, name, blk) { const r = this.g.getRule(name); const start = this.atn.ruleToStartState[r.index]; this.epsilon(start, blk.left); const stop = this.atn.ruleToStopState[r.index]; this.epsilon(blk.right, stop); const h = { left: start, right: stop }; ruleAST.atnState = start; return h; } _ruleRef(node) { const r = this.g.getRule(node.getText()); if (r === null) { this.g.tool.errorManager.grammarError( IssueCode.InternalError, this.g.fileName, node.token, "Rule " + node.getText() + " undefined" ); return null; } const start = this.atn.ruleToStartState[r.index]; const left = this.newState(BasicState); const right = this.newState(BasicState); const ast = node; let precedence = 0; if (ast.getOptionString(Constants.PrecedenceOptionName)) { precedence = Number.parseInt(ast.getOptionString(Constants.PrecedenceOptionName) ?? "0"); } const call = new RuleTransition(start, r.index, precedence, right); left.addTransition(call); node.atnState = left; return { left, right }; } newState(nodeType) { try { const s = new nodeType(); if (!this.currentRule) { s.ruleIndex = -1; } else { s.ruleIndex = this.currentRule.index; } this.atn.addState(s); return s; } catch (cause) { const error = new Error(`Could not create ATN state of type ${nodeType.name}.`); error.cause = cause; throw error; } } checkEpsilonClosure() { for (const [rule, blkStart, blkStop] of this.preventEpsilonClosureBlocks) { const analyzer = new LL1Analyzer(this.atn); const lookahead = analyzer.look(blkStart, blkStop); if (lookahead.contains(Token.EPSILON)) { const errorType = rule instanceof LeftRecursiveRule ? IssueCode.EpsilonLrFollow : IssueCode.EpsilonClosure; this.g.tool.errorManager.grammarError( errorType, this.g.fileName, rule.ast.children[0].token, rule.name ); } if (lookahead.contains(Token.EOF)) { this.g.tool.errorManager.grammarError( IssueCode.EofClosure, this.g.fileName, rule.ast.children[0].token, rule.name ); } } } doCreateATN(rules) { this.createRuleStartAndStopATNStates(); for (const r of rules) { const blk = r.ast.getFirstChildWithType(ANTLRv4Parser.BLOCK); const nodes = new CommonTreeNodeStream(blk); const b = new ATNBuilder(this.g.tool.errorManager, nodes, this); this.setCurrentRuleName(r.name); const h = b.ruleBlock(null); if (h) { this.rule(r.ast, r.name, h); } } } addFollowLink(ruleIndex, right) { const stop = this.atn.ruleToStopState[ruleIndex]; this.epsilon(stop, right); } elemList(els) { const n = els.length; for (let i = 0; i < n - 1; i++) { const el = els[i]; let tr = null; if (el.left.transitions.length === 1) { tr = el.left.transitions[0]; } const isRuleTrans = tr instanceof RuleTransition; if (el.left.constructor.stateType === ATNState.BASIC && el.right && el.right.constructor.stateType === ATNState.BASIC && tr !== null && (isRuleTrans && tr.followState === el.right || tr.target === el.right)) { let handle = null; if (i + 1 < els.length) { handle = els[i + 1]; } if (handle !== null) { if (isRuleTrans) { tr.followState = handle.left; } else { tr.target = handle.left; } } this.atn.removeState(el.right); } else { this.epsilon(el.right, els[i + 1].left); } } const first = els[0]; const last = els[n - 1]; let left = null; if (first !== null) { left = first.left; } let right = null; if (last !== null) { right = last.right; } return { left, right }; } /** * From `(A)?` build either: * * ``` * o--A->o * | ^ * o---->| * ``` * * or, if `A` is a block, just add an empty alt to the end of the block. */ optional(optAST, blk) { const blkStart = blk.left; const blkEnd = blk.right; this.preventEpsilonOptionalBlocks.push([this.currentRule, blkStart, blkEnd]); const greedy = optAST.isGreedy(); blkStart.nonGreedy = !greedy; this.epsilon(blkStart, blk.right, !greedy); optAST.atnState = blk.left; return blk; } /** * From `(blk)+` build * * ``` * |---------| * v | * [o-blk-o]->o->o * ``` * * We add a decision for loop back node to the existing one at `blk` start. */ plus(plusAST, blk) { const blkStart = blk.left; const blkEnd = blk.right; this.preventEpsilonClosureBlocks.push([this.currentRule, blkStart, blkEnd]); const loop = this.newState(PlusLoopbackState); loop.nonGreedy = !plusAST.isGreedy(); this.atn.defineDecisionState(loop); const end = this.newState(LoopEndState); blkStart.loopBackState = loop; end.loopBackState = loop; plusAST.atnState = loop; this.epsilon(blkEnd, loop); const blkAST = plusAST.children[0]; if (plusAST.isGreedy()) { if (this.expectNonGreedy(blkAST)) { this.g.tool.errorManager.grammarError( IssueCode.ExpectedNonGreedyWildcardBlock, this.g.fileName, plusAST.token, plusAST.token?.text ); } this.epsilon(loop, blkStart); this.epsilon(loop, end); } else { this.epsilon(loop, end); this.epsilon(loop, blkStart); } return { left: blkStart, right: end }; } /** * From `(blk)*` build `( blk+ )?` with *two* decisions, one for entry and one for choosing alts of `blk`. * * ``` * |-------------| * v | * o--[o-blk-o]->o o * | ^ * -----------------| * ``` * * Note that the optional bypass must jump outside the loop as `(A|B)*` is not the same thing as `(A|B|)+`. */ star(starAST, elem) { const blkStart = elem.left; const blkEnd = elem.right; this.preventEpsilonClosureBlocks.push([this.currentRule, blkStart, blkEnd]); const entry = this.newState(StarLoopEntryState); entry.nonGreedy = !starAST.isGreedy(); this.atn.defineDecisionState(entry); const end = this.newState(LoopEndState); const loop = this.newState(StarLoopbackState); entry.loopBackState = loop; end.loopBackState = loop; const blkAST = starAST.children[0]; if (starAST.isGreedy()) { if (this.expectNonGreedy(blkAST)) { this.g.tool.errorManager.grammarError( IssueCode.ExpectedNonGreedyWildcardBlock, this.g.fileName, starAST.token, starAST.token?.text ); } this.epsilon(entry, blkStart); this.epsilon(entry, end); } else { this.epsilon(entry, end); this.epsilon(entry, blkStart); } this.epsilon(blkEnd, loop); this.epsilon(loop, entry); starAST.atnState = entry; return { left: entry, right: end }; } addRuleFollowLinks() { for (const p of this.atn.states) { if (p !== null && p.constructor.stateType === ATNState.BASIC && p.transitions.length === 1 && p.transitions[0] instanceof RuleTransition) { const rt = p.transitions[0]; this.addFollowLink(rt.ruleIndex, rt.followState); } } } /** * Add an EOF transition to any rule end ATNState that points to nothing (i.e., for all those rules not invoked * by another rule). These are start symbols then. * * Return the number of grammar entry points; i.e., how many rules are not invoked by another rule (they can * only be invoked from outside). These are the start rules. */ addEOFTransitionToStartRules() { let n = 0; const eofTarget = this.newState(BasicState); for (const r of this.g.rules.values()) { const stop = this.atn.ruleToStopState[r.index]; if (stop.transitions.length > 0) { continue; } ++n; const t = new AtomTransition(eofTarget, Token.EOF); stop.addTransition(t); } return n; } expectNonGreedy(blkAST) { return ParserATNFactory.blockHasWildcardAlt(blkAST); } makeBlock(start, blkAST, alts) { const end = this.newState(BlockEndState); start.endState = end; for (const alt of alts) { this.epsilon(start, alt.left); this.epsilon(alt.right, end); const opt = new TailEpsilonRemover(this.atn); opt.visit(alt.left); } blkAST.atnState = start; return { left: start, right: end }; } /** Define all the rule begin/end ATNStates to solve forward reference issues. */ createRuleStartAndStopATNStates() { this.atn.ruleToStartState = new Array(this.g.rules.size); this.atn.ruleToStopState = new Array(this.g.rules.size); for (const r of this.g.rules.values()) { const start = this.newState(RuleStartState); const stop = this.newState(RuleStopState); start.stopState = stop; start.isLeftRecursiveRule = r instanceof LeftRecursiveRule; start.ruleIndex = r.index; stop.ruleIndex = r.index; this.atn.ruleToStartState[r.index] = start; this.atn.ruleToStopState[r.index] = stop; } } static { ClassFactory.createParserATNFactory = (g) => { return new ParserATNFactory(g); }; } } export { ParserATNFactory };