UNPKG

antlr-ng

Version:

Next generation ANTLR Tool

408 lines (407 loc) 15.9 kB
var __defProp = Object.defineProperty; var __name = (target, value) => __defProp(target, "name", { value, configurable: true }); import { CommonToken } from "antlr4ng"; import { ANTLRv4Parser } from "../generated/ANTLRv4Parser.js"; import { CommonTreeNodeStream } from "../tree/CommonTreeNodeStream.js"; import { TreeVisitor } from "../tree/TreeVisitor.js"; import { BlockSetTransformer } from "../tree/walkers/BlockSetTransformer.js"; import { Constants } from "../Constants.js"; import { GrammarToken } from "../parse/GrammarToken.js"; import { GrammarType } from "../support/GrammarType.js"; import { dupTree, isTokenName } from "../support/helpers.js"; import { Grammar } from "./Grammar.js"; import { IssueCode } from "./Issues.js"; import { AltAST } from "./ast/AltAST.js"; import { BlockAST } from "./ast/BlockAST.js"; import { GrammarAST } from "./ast/GrammarAST.js"; import { GrammarASTWithOptions } from "./ast/GrammarASTWithOptions.js"; import { GrammarRootAST } from "./ast/GrammarRootAST.js"; import { RuleAST } from "./ast/RuleAST.js"; import { TerminalAST } from "./ast/TerminalAST.js"; class GrammarTransformPipeline { constructor(g, tool) { this.g = g; this.tool = tool; } static { __name(this, "GrammarTransformPipeline"); } /** * Utility visitor that sets grammar ptr in each node * * @param g The grammar to set. * @param tree The tree to visit. */ static setGrammarPtr(g, tree) { const v = new TreeVisitor(); v.visit(tree, new class { pre(t) { t.g = g; return t; } post(t) { return t; } }()); } static augmentTokensWithOriginalPosition(g, tree) { const optionsSubTrees = tree.getNodesWithType(ANTLRv4Parser.ELEMENT_OPTIONS); for (const t of optionsSubTrees) { const elWithOpt = t.parent; if (elWithOpt instanceof GrammarASTWithOptions) { const options = elWithOpt.getOptions(); if (options.has(Constants.TokenIndexOptionName)) { const newTok = new GrammarToken(g, elWithOpt.token); newTok.originalTokenIndex = parseInt(options.get(Constants.TokenIndexOptionName).getText(), 10); elWithOpt.token = newTok; const originalNode = g.ast.getNodeWithTokenIndex(newTok.getTokenIndex()); if (originalNode) { elWithOpt.setTokenStartIndex(originalNode.getTokenStartIndex()); elWithOpt.setTokenStopIndex(originalNode.getTokenStopIndex()); } else { elWithOpt.setTokenStartIndex(newTok.getTokenIndex()); elWithOpt.setTokenStopIndex(newTok.getTokenIndex()); } } } } } process() { const grammarRoot = this.g.ast; this.tool.logInfo({ component: "grammar", msg: `before: ${grammarRoot.toStringTree()}` }); this.integrateImportedGrammars(this.g); this.reduceBlocksToSets(grammarRoot); this.tool.logInfo({ component: "grammar", msg: `after: ${grammarRoot.toStringTree()}` }); } reduceBlocksToSets(root) { const nodes = new CommonTreeNodeStream(root); const transformer = new BlockSetTransformer(this.tool.errorManager, nodes, this.g); transformer.downUp(root); } /** * Merges all the rules, token definitions, and named actions from imported grammars into the root grammar tree. * Perform: * * (tokens { X (= Y 'y')) + (tokens { Z ) -> (tokens { X (= Y 'y') Z) * (@ members {foo}) + (@ members {bar}) -> (@ members {foobar}) * (RULES (RULE x y)) + (RULES (RULE z)) -> (RULES (RULE x y z)) * Rules in root prevent same rule from being appended to RULES node. * * The goal is a complete combined grammar so we can ignore subordinate grammars. * * @param rootGrammar The root grammar to integrate the imported grammars into. */ integrateImportedGrammars(rootGrammar) { const imports = rootGrammar.getAllImportedGrammars(); if (imports.length === 0) { return; } const root = rootGrammar.ast; let channelsRoot = root.getFirstChildWithType(ANTLRv4Parser.CHANNELS); let tokensRoot = root.getFirstChildWithType(ANTLRv4Parser.TOKENS); const actionRoots = root.getNodesWithType(ANTLRv4Parser.AT); const rootRulesRoot = root.getFirstChildWithType(ANTLRv4Parser.RULES); const rootRuleNames = /* @__PURE__ */ new Set(); const rootRules = rootRulesRoot.getNodesWithType(ANTLRv4Parser.RULE); for (const r of rootRules) { rootRuleNames.add(r.children[0].getText()); } const rootModes = root.getNodesWithType(ANTLRv4Parser.MODE); const rootModeNames = /* @__PURE__ */ new Set(); for (const m of rootModes) { rootModeNames.add(m.children[0].getText()); } for (const imp of imports) { const importedChannelRoot = imp.ast.getFirstChildWithType(ANTLRv4Parser.CHANNELS); if (importedChannelRoot !== null) { rootGrammar.tool.logInfo({ component: "grammar", msg: `imported channels: ${importedChannelRoot.children}` }); if (channelsRoot === null) { channelsRoot = dupTree(importedChannelRoot); channelsRoot.g = rootGrammar; root.insertChild(1, channelsRoot); } else { for (const channel of importedChannelRoot.children) { let channelIsInRootGrammar = false; for (const rootChannel of channelsRoot.children) { const rootChannelText = rootChannel.getText(); if (rootChannelText === channel.getText()) { channelIsInRootGrammar = true; break; } } if (!channelIsInRootGrammar) { channelsRoot.addChild(channel.dupNode()); } } } } const importedTokensRoot = imp.ast.getFirstChildWithType(ANTLRv4Parser.TOKENS); if (importedTokensRoot !== null) { rootGrammar.tool.logInfo({ component: "grammar", msg: `imported tokens: ${importedTokensRoot.children}` }); if (tokensRoot === null) { const token = CommonToken.fromType(ANTLRv4Parser.TOKENS, "TOKENS"); tokensRoot = new GrammarAST(token); tokensRoot.g = rootGrammar; root.insertChild(1, tokensRoot); } tokensRoot.addChildren(importedTokensRoot.children); } const allActionRoots = new Array(); const importedActionRoots = imp.ast.getAllChildrenWithType(ANTLRv4Parser.AT); allActionRoots.push(...actionRoots); allActionRoots.push(...importedActionRoots); const namedActions = /* @__PURE__ */ new Map(); rootGrammar.tool.logInfo({ component: "grammar", msg: `imported actions: ${importedActionRoots}` }); for (const at of allActionRoots) { let scopeName = rootGrammar.getDefaultActionScope(); let scope; let name; let action; if (at.children.length > 2) { scope = at.children[0]; scopeName = scope.getText(); name = at.children[1]; action = at.children[2]; } else { name = at.children[0]; action = at.children[1]; } const prevAction = namedActions.get(scopeName)?.get(name.getText()); if (!prevAction) { const mapping = namedActions.get(scopeName) ?? /* @__PURE__ */ new Map(); mapping.set(name.getText(), action); namedActions.set(scopeName, mapping); } else { if (prevAction.g === at.g) { this.tool.errorManager.grammarError( IssueCode.ActionRedefinition, at.g.fileName, name.token, name.getText() ); } else { let s1 = prevAction.getText(); s1 = s1.substring(1, s1.length - 1); let s2 = action.getText(); s2 = s2.substring(1, s2.length - 1); const combinedAction = "{" + s1 + "\n" + s2 + "}"; prevAction.token.text = combinedAction; } } } for (const [scopeName, mapping] of namedActions) { for (const [name, action] of mapping) { rootGrammar.tool.logInfo({ component: "grammar", msg: `${action.g.name} ${scopeName}:${name}=${action.getText()}` }); if (action.g !== rootGrammar) { root.insertChild(1, action.parent); } } } const modes = imp.ast.getNodesWithType(ANTLRv4Parser.MODE); for (const m of modes) { rootGrammar.tool.logInfo({ component: "grammar", msg: `imported mode: ${m.toStringTree()}` }); const name = m.children[0].getText(); const rootAlreadyHasMode = rootModeNames.has(name); let destinationAST = null; if (rootAlreadyHasMode) { for (const m2 of rootModes) { if (m2.children[0].getText() === name) { destinationAST = m2; break; } } } else { destinationAST = m.dupNode(); destinationAST.addChild(m.children[0].dupNode()); } let addedRules = 0; const modeRules = m.getAllChildrenWithType(ANTLRv4Parser.RULE); for (const r of modeRules) { rootGrammar.tool.logInfo({ component: "grammar", msg: `imported rule: ${r.toStringTree()}` }); const ruleName = r.children[0].getText(); const rootAlreadyHasRule = rootRuleNames.has(ruleName); if (!rootAlreadyHasRule) { destinationAST?.addChild(r); addedRules++; rootRuleNames.add(ruleName); } } if (!rootAlreadyHasMode && addedRules > 0 && destinationAST) { rootGrammar.ast.addChild(destinationAST); rootModeNames.add(name); rootModes.push(destinationAST); } } const rules = imp.ast.getNodesWithType(ANTLRv4Parser.RULE); for (const r of rules) { rootGrammar.tool.logInfo({ component: "grammar", msg: `imported rule: ${r.toStringTree()}` }); const name = r.children[0].getText(); const rootAlreadyHasRule = rootRuleNames.has(name); if (!rootAlreadyHasRule) { rootRulesRoot.addChild(r); rootRuleNames.add(name); } } const optionsRoot = imp.ast.getFirstChildWithType(ANTLRv4Parser.OPTIONS); if (optionsRoot !== null) { let hasNewOption = false; for (const [key] of imp.ast.getOptions()) { const importOption = imp.ast.getOptionString(key); if (!importOption) { continue; } const rootOption = rootGrammar.ast.getOptionString(key); if (importOption !== rootOption) { hasNewOption = true; break; } } if (hasNewOption) { this.tool.errorManager.grammarError( IssueCode.OptionsInDelegate, optionsRoot.g.fileName, optionsRoot.token, imp.name ); } } } rootGrammar.tool.logInfo({ component: "grammar", msg: `Grammar: ${rootGrammar.ast.toStringTree()}` }); } /** * Build lexer grammar from combined grammar that looks like: * * (COMBINED_GRAMMAR A * (tokens { X (= Y 'y')) * (OPTIONS (= x 'y')) * (@ members {foo}) * (@ lexer header {package jj;}) * (RULES (RULE .+))) * * Move rules and actions to new tree, don't dup. Split AST apart. We'll have this grammar to share token symbols * later. Don't generate tokenVocab or tokens{} section. Copy over named actions. * * Side-effects: it removes children from GRAMMAR & RULES nodes in combined AST. Anything cut out is dup'd before * adding to lexer to avoid "who's ur daddy" issues. * * @param combinedGrammar The combined grammar to extract the implicit lexer from. * * @returns The lexer grammar AST. */ extractImplicitLexer(combinedGrammar) { const combinedContext = combinedGrammar.ast; const elements = combinedContext.children; const lexerName = `${combinedContext.children[0].getText()}Lexer`; const lexerAST = new GrammarRootAST( CommonToken.fromType(ANTLRv4Parser.GRAMMAR, "LEXER_GRAMMAR"), combinedGrammar.ast.tokenStream ); lexerAST.grammarType = GrammarType.Lexer; lexerAST.token.inputStream = combinedContext.token.inputStream; let token = CommonToken.fromType(ANTLRv4Parser.ID, lexerName); lexerAST.addChild(new GrammarAST(token)); const optionsRoot = combinedContext.getFirstChildWithType(ANTLRv4Parser.OPTIONS); if (optionsRoot !== null && optionsRoot.children.length !== 0) { const lexerOptionsRoot = optionsRoot.dupNode(); lexerAST.addChild(lexerOptionsRoot); const options = optionsRoot.children; for (const o of options) { const optionName = o.children[0].getText(); if (Grammar.lexerOptions.has(optionName) && !Grammar.doNotCopyOptionsToLexer.has(optionName)) { const optionTree = dupTree(o); lexerOptionsRoot.addChild(optionTree); lexerAST.setOption(optionName, optionTree.children[1]); } } } const actionsWeMoved = new Array(); for (const e of elements) { if (e.getType() === ANTLRv4Parser.AT) { lexerAST.addChild(dupTree(e)); if (e.children[0].getText() === "lexer") { actionsWeMoved.push(e); } } } for (const r of actionsWeMoved) { combinedContext.deleteChild(r); } const combinedRulesRoot = combinedContext.getFirstChildWithType(ANTLRv4Parser.RULES); if (combinedRulesRoot === null) { return lexerAST; } token = CommonToken.fromType(ANTLRv4Parser.RULES, "RULES"); const lexerRulesRoot = new GrammarAST(token); lexerAST.addChild(lexerRulesRoot); const rulesWeMoved = new Array(); let rules; if (combinedRulesRoot.children.length > 0) { rules = combinedRulesRoot.children; } else { rules = new Array(0); } for (const r of rules) { const ruleName = r.children[0].getText(); if (isTokenName(ruleName)) { lexerRulesRoot.addChild(dupTree(r)); rulesWeMoved.push(r); } } for (const r of rulesWeMoved) { combinedRulesRoot.deleteChild(r); } const litAliases = Grammar.getStringLiteralAliasesFromLexerRules(lexerAST); const stringLiterals = combinedGrammar.getStringLiterals(); let insertIndex = 0; nextLit: for (const lit of stringLiterals) { if (litAliases !== null) { for (const pair of litAliases) { const litAST = pair[1]; if (lit === litAST.getText()) { continue nextLit; } } } const ruleName = combinedGrammar.getStringLiteralLexerRuleName(lit); const litRule = new RuleAST(ANTLRv4Parser.RULE); const blk = new BlockAST(ANTLRv4Parser.BLOCK); const alt = new AltAST(ANTLRv4Parser.ALT); const slit = new TerminalAST(CommonToken.fromType(ANTLRv4Parser.STRING_LITERAL, lit)); alt.addChild(slit); blk.addChild(alt); const idToken = CommonToken.fromType(ANTLRv4Parser.TOKEN_REF, ruleName); litRule.addChild(new TerminalAST(idToken)); litRule.addChild(blk); lexerRulesRoot.insertChild(insertIndex, litRule); lexerRulesRoot.freshenParentAndChildIndexes(); insertIndex++; } lexerAST.sanityCheckParentAndChildIndexes(); combinedContext.sanityCheckParentAndChildIndexes(); combinedGrammar.tool.logInfo({ component: "grammar", msg: `after extract implicit lexer =${combinedContext.toStringTree()}` }); combinedGrammar.tool.logInfo({ component: "grammar", msg: `lexer =${lexerAST.toStringTree()}` }); if (lexerRulesRoot.children.length === 0) { return void 0; } return lexerAST; } } export { GrammarTransformPipeline };