UNPKG

antlr-ng

Version:

Next generation ANTLR Tool

374 lines (373 loc) 13.4 kB
var __defProp = Object.defineProperty; var __name = (target, value) => __defProp(target, "name", { value, configurable: true }); import { fileURLToPath } from "node:url"; import { CommonToken, IntervalSet, Token } from "antlr4ng"; import { STGroupFile } from "stringtemplate4ts"; import { Constants } from "../Constants.js"; import { CodeGenerator } from "../codegen/CodeGenerator.js"; import { ANTLRv4Parser } from "../generated/ANTLRv4Parser.js"; import { OrderedHashMap } from "../misc/OrderedHashMap.js"; import { dupTree } from "../support/helpers.js"; import { IssueCode } from "../tool/Issues.js"; import { GrammarAST } from "../tool/ast/GrammarAST.js"; import { GrammarASTWithOptions } from "../tool/ast/GrammarASTWithOptions.js"; import { CommonTreeNodeStream } from "../tree/CommonTreeNodeStream.js"; import { LeftRecursiveRuleWalker } from "../tree/walkers/LeftRecursiveRuleWalker.js"; var Associativity = /* @__PURE__ */ ((Associativity2) => { Associativity2["Left"] = "left"; Associativity2["Right"] = "right"; return Associativity2; })(Associativity || {}); ; class LeftRecursiveRuleAnalyzer extends LeftRecursiveRuleWalker { static { __name(this, "LeftRecursiveRuleAnalyzer"); } static templateGroupFile = fileURLToPath(new URL( "../../templates/LeftRecursiveRules.stg", import.meta.url )); static recRuleTemplates = new STGroupFile(LeftRecursiveRuleAnalyzer.templateGroupFile); tool; binaryAlts = /* @__PURE__ */ new Map(); ternaryAlts = /* @__PURE__ */ new Map(); suffixAlts = /* @__PURE__ */ new Map(); prefixAndOtherAlts = new Array(); /** Pointer to ID node of ^(= ID element) */ leftRecursiveRuleRefLabels = new Array(); /** Tokens from which rule AST comes from */ tokenStream; retvals; codegenTemplates; language; altAssociativity = /* @__PURE__ */ new Map(); constructor(ruleAST, tool, ruleName, language) { super( new CommonTreeNodeStream(ruleAST), tool.errorManager ); this.tool = tool; this.ruleName = ruleName; this.language = language; this.tokenStream = ruleAST.g.tokenStream; this.codegenTemplates = new CodeGenerator(language).templates; } /** * Match (RULE RULE_REF (BLOCK (ALT .*) (ALT RULE_REF[self] .*) (ALT .*))) * Match (RULE RULE_REF (BLOCK (ALT .*) (ALT (ASSIGN ID RULE_REF[self]) .*) (ALT .*))) */ static hasImmediateRecursiveRuleRefs(t, ruleName) { const blk = t.getFirstChildWithType(ANTLRv4Parser.BLOCK); if (blk === null) { return false; } const n = blk.children.length; for (let i = 0; i < n; i++) { const alt = blk.children[i]; let first = alt.children[0]; if (first.getType() === ANTLRv4Parser.ELEMENT_OPTIONS) { first = alt.children[1]; } if (first.getType() === ANTLRv4Parser.RULE_REF && first.getText() === ruleName) { return true; } if (first.children.length > 1) { const ruleRef = first.children[1]; if (ruleRef.getType() === ANTLRv4Parser.RULE_REF && ruleRef.getText() === ruleName) { return true; } } } return false; } setReturnValues(t) { this.retvals = t; } setAltAssoc(t, alt) { let assoc = "left" /* Left */; const a = t.getOptionString("assoc"); if (a) { if (a === "right" /* Right */.toString()) { assoc = "right" /* Right */; } else { if (a === "left" /* Left */.toString()) { assoc = "left" /* Left */; } else { this.tool.errorManager.grammarError( IssueCode.IllegalOptionValue, t.g.fileName, t.getOptionAST("assoc").token, "assoc", assoc ); } } } if (this.altAssociativity.get(alt) && this.altAssociativity.get(alt) !== assoc) { this.tool.errorManager.toolError(IssueCode.InternalError, "all operators of alt " + alt + " of left-recursive rule must have same associativity"); } this.altAssociativity.set(alt, assoc); } binaryAlt(originalAltTree, altNum) { let altTree = dupTree(originalAltTree); const altLabel = altTree.altLabel?.getText(); let label; let isListLabel = false; const lrLabel = this.stripLeftRecursion(altTree); if (lrLabel) { label = lrLabel.getText(); isListLabel = lrLabel.parent?.getType() === ANTLRv4Parser.PLUS_ASSIGN; this.leftRecursiveRuleRefLabels.push([lrLabel, altLabel]); } this.stripAltLabel(altTree); const nextPrec = this.nextPrecedence(altNum); altTree = this.addPrecedenceArgToRules(altTree, nextPrec); this.stripAltLabel(altTree); let altText = this.text(altTree); altText = altText.trim(); const a = { altNum, altText, leftRecursiveRuleRefLabel: label, altLabel, isListLabel, originalAltAST: originalAltTree, nextPrec: 0 }; a.nextPrec = nextPrec; this.binaryAlts.set(altNum, a); } prefixAlt(originalAltTree, altNum) { let altTree = dupTree(originalAltTree); this.stripAltLabel(altTree); const nextPrec = this.precedence(altNum); altTree = this.addPrecedenceArgToRules(altTree, nextPrec); let altText = this.text(altTree); altText = altText.trim(); const altLabel = altTree.altLabel?.getText() ?? void 0; const a = { altNum, altText, altLabel, isListLabel: false, originalAltAST: originalAltTree, nextPrec: 0 }; a.nextPrec = nextPrec; this.prefixAndOtherAlts.push(a); } suffixAlt(originalAltTree, altNum) { const altTree = dupTree(originalAltTree); const altLabel = altTree.altLabel?.getText(); let label; let isListLabel = false; const lrLabel = this.stripLeftRecursion(altTree); if (lrLabel) { label = lrLabel.getText(); isListLabel = lrLabel.parent?.getType() === ANTLRv4Parser.PLUS_ASSIGN; this.leftRecursiveRuleRefLabels.push([lrLabel, altLabel]); } this.stripAltLabel(altTree); let altText = this.text(altTree); altText = altText.trim(); const a = { altNum, altText, leftRecursiveRuleRefLabel: label, altLabel, isListLabel, originalAltAST: originalAltTree, nextPrec: 0 }; this.suffixAlts.set(altNum, a); } otherAlt(originalAltTree, altNum) { const altTree = dupTree(originalAltTree); this.stripAltLabel(altTree); const altText = this.text(altTree); const altLabel = altTree.altLabel?.getText() ?? void 0; const a = { altNum, altText, altLabel, isListLabel: false, originalAltAST: originalAltTree, nextPrec: 0 }; this.prefixAndOtherAlts.push(a); } // --------- get transformed rules ---------------- getArtificialOpPrecRule() { const ruleST = LeftRecursiveRuleAnalyzer.recRuleTemplates.getInstanceOf("recRule"); ruleST.add("ruleName", this.ruleName); const ruleArgST = this.codegenTemplates.getInstanceOf("recRuleArg"); ruleST.add("argName", ruleArgST); const setResultST = this.codegenTemplates.getInstanceOf("recRuleSetResultAction"); ruleST.add("setResultAction", setResultST); ruleST.add("userRetvals", this.retvals); const opPrecRuleAlts = new OrderedHashMap(); this.binaryAlts.forEach((value, key) => { opPrecRuleAlts.set(key, value); }); this.ternaryAlts.forEach((value, key) => { opPrecRuleAlts.set(key, value); }); this.suffixAlts.forEach((value, key) => { opPrecRuleAlts.set(key, value); }); for (const [alt, altInfo] of opPrecRuleAlts) { const altST = LeftRecursiveRuleAnalyzer.recRuleTemplates.getInstanceOf("recRuleAlt"); const predST = this.codegenTemplates.getInstanceOf("recRuleAltPredicate"); predST.add("opPrec", this.precedence(alt)); predST.add("ruleName", this.ruleName); altST.add("pred", predST); altST.add("alt", altInfo); altST.add("precOption", Constants.PrecedenceOptionName); altST.add("opPrec", this.precedence(alt)); ruleST.add("opAlts", altST); } ruleST.add("primaryAlts", this.prefixAndOtherAlts); const result = ruleST.render(); this.tool.logInfo({ component: "left-recursion", msg: result }); return result; } addPrecedenceArgToRules(t, prec) { const outerAltRuleRefs = t.getNodesWithTypePreorderDFS(IntervalSet.of( ANTLRv4Parser.RULE_REF, ANTLRv4Parser.RULE_REF )); for (const x of outerAltRuleRefs) { const ruleRef = x; const recursive = ruleRef.getText() === this.ruleName; const rightmost = ruleRef === outerAltRuleRefs[outerAltRuleRefs.length - 1]; if (recursive && rightmost) { const dummyValueNode = new GrammarAST(CommonToken.fromType(ANTLRv4Parser.INT, "" + prec)); ruleRef.setOption(Constants.PrecedenceOptionName, dummyValueNode); } } return t; } // TODO: this strips the tree properly, but since text() // uses the start of stop token index and gets text from that // ineffectively ignores this routine. stripLeftRecursion(altAST) { let lrLabel; let first = altAST.children[0]; let leftRecurRuleIndex = 0; if (first.getType() === ANTLRv4Parser.ELEMENT_OPTIONS) { first = altAST.children[1]; leftRecurRuleIndex = 1; } const rRef = first.children[1]; if (first.getType() === ANTLRv4Parser.RULE_REF && first.getText() === this.ruleName || rRef.getType() === ANTLRv4Parser.RULE_REF && rRef.getText() === this.ruleName) { if (first.getType() === ANTLRv4Parser.ASSIGN || first.getType() === ANTLRv4Parser.PLUS_ASSIGN) { lrLabel = first.children[0]; } altAST.deleteChild(leftRecurRuleIndex); const newFirstChild = altAST.children[leftRecurRuleIndex]; altAST.setTokenStartIndex(newFirstChild.getTokenStartIndex()); } return lrLabel; } /** Strip last 2 tokens if → label; alter indexes in altAST */ stripAltLabel(altAST) { const start = altAST.getTokenStartIndex(); const stop = altAST.getTokenStopIndex(); for (let i = stop; i >= start; i--) { if (this.tokenStream.get(i).type === ANTLRv4Parser.POUND) { altAST.setTokenStopIndex(i - 1); return; } } } text(t) { const tokenStartIndex = t.getTokenStartIndex(); const tokenStopIndex = t.getTokenStopIndex(); const ignore = new IntervalSet(); const optionsSubTrees = t.getNodesWithType(ANTLRv4Parser.ELEMENT_OPTIONS); for (const sub of optionsSubTrees) { ignore.addRange(sub.getTokenStartIndex(), sub.getTokenStopIndex()); } const noOptions = new IntervalSet(); const typeSet = new IntervalSet(); typeSet.addOne(ANTLRv4Parser.ASSIGN); typeSet.addOne(ANTLRv4Parser.PLUS_ASSIGN); const labeledSubTrees = t.getNodesWithType(typeSet); for (const sub of labeledSubTrees) { noOptions.addOne(sub.children[0].getTokenStartIndex()); } let result = ""; let i = tokenStartIndex; let skipNext = false; while (i <= tokenStopIndex) { if (skipNext || ignore.contains(i)) { skipNext = false; i++; continue; } const tok = this.tokenStream.get(i); let text = tok.text; let elementOptions = ""; if (!noOptions.contains(i)) { const node = t.getNodeWithTokenIndex(tok.tokenIndex); if (node !== null && (tok.type === ANTLRv4Parser.TOKEN_REF || tok.type === ANTLRv4Parser.STRING_LITERAL || tok.type === ANTLRv4Parser.RULE_REF)) { elementOptions += "tokenIndex=" + tok.tokenIndex; } if (node instanceof GrammarASTWithOptions) { const newText = node.getText(); if (newText.endsWith("?")) { text = newText; skipNext = true; } const o = node; for (const [key, value] of o.getOptions().entries()) { if (elementOptions.length > 0) { elementOptions += ","; } elementOptions += key; elementOptions += "="; elementOptions += value.getText(); } } } result += text; i++; if (tok.type === ANTLRv4Parser.RULE_REF) { while (i <= tokenStopIndex && this.tokenStream.get(i).channel !== Token.DEFAULT_CHANNEL) { ++i; } if (this.tokenStream.get(i).type === ANTLRv4Parser.BEGIN_ARGUMENT) { while (true) { result += this.tokenStream.get(i).text; if (this.tokenStream.get(i).type === ANTLRv4Parser.END_ARGUMENT) { break; } i++; } i++; } } if (elementOptions.length > 0) { result += "<" + elementOptions + ">"; } } return result; } precedence(alt) { return this.numAlts - alt + 1; } // Assumes left assoc nextPrecedence(alt) { const p = this.precedence(alt); if (this.altAssociativity.get(alt) === "right" /* Right */) { return p; } return p + 1; } toString() { return "PrecRuleOperatorCollector{binaryAlts=" + JSON.stringify(Object.fromEntries(this.binaryAlts)) + ", ternaryAlts=" + JSON.stringify(Object.fromEntries(this.ternaryAlts)) + ", suffixAlts=" + JSON.stringify(Object.fromEntries(this.suffixAlts)) + ", prefixAndOtherAlts=" + JSON.stringify(this.prefixAndOtherAlts) + "}"; } } export { LeftRecursiveRuleAnalyzer };