UNPKG

antlr-ng

Version:

Next generation ANTLR Tool

1,028 lines (1,027 loc) 36.7 kB
var __defProp = Object.defineProperty; var __name = (target, value) => __defProp(target, "name", { value, configurable: true }); import { basename } from "node:path"; import { ATNDeserializer, ATNSerializer, CharStream, IntervalSet, LexerInterpreter, ParserInterpreter, Token, Vocabulary } from "antlr4ng"; import { ANTLRv4Parser } from "..//generated/ANTLRv4Parser.js"; import { TreeVisitor } from "../tree/TreeVisitor.js"; import { TreeWizard } from "../tree/TreeWizard.js"; import { GrammarTreeVisitor } from "../tree/walkers/GrammarTreeVisitor.js"; import { ClassFactory } from "../ClassFactory.js"; import { targetLanguages } from "../codegen/CodeGenerator.js"; import { Constants } from "../Constants.js"; import { CharSupport } from "../misc/CharSupport.js"; import { Utils } from "../misc/Utils.js"; import { TokenVocabParser } from "../parse/TokenVocabParser.js"; import { GrammarType } from "../support/GrammarType.js"; import { IssueCode } from "./Issues.js"; class Grammar { static { __name(this, "Grammar"); } /** * This value is used in the following situations to indicate that a token type does not have an associated * name which can be directly referenced in a grammar. * * - This value is the name and display name for the token with type {@link Token.INVALID_TYPE}. * - This value is the name for tokens with a type not represented by a named token. The display name for these * tokens is simply the string representation of the token type as an integer. */ static INVALID_TOKEN_NAME = "<INVALID>"; static caseInsensitiveOptionName = "caseInsensitive"; static parserOptions = /* @__PURE__ */ new Set(); static lexerOptions = Grammar.parserOptions; static lexerRuleOptions = /* @__PURE__ */ new Set([ Grammar.caseInsensitiveOptionName, Constants.PrecedenceOptionName, Constants.TokenIndexOptionName ]); static parseRuleOptions = /* @__PURE__ */ new Set(); static parserBlockOptions = /* @__PURE__ */ new Set(); static lexerBlockOptions = /* @__PURE__ */ new Set(); /** Legal options for rule refs like id&lt;key=value&gt; */ static ruleRefOptions = /* @__PURE__ */ new Set([ Constants.PrecedenceOptionName, Constants.TokenIndexOptionName ]); /** Legal options for terminal refs like ID&lt;assoc=right&gt; */ static tokenOptions = /* @__PURE__ */ new Set([ "assoc", Constants.TokenIndexOptionName ]); static actionOptions = /* @__PURE__ */ new Set(); static semPredOptions = /* @__PURE__ */ new Set(); static doNotCopyOptionsToLexer = /* @__PURE__ */ new Set(); static grammarAndLabelRefTypeToScope = /* @__PURE__ */ new Map(); static AUTO_GENERATED_TOKEN_NAME_PREFIX = "T__"; name = "<not set>"; /** * The ATN that represents the grammar with edges labelled with tokens or epsilon. It is more suitable to analysis * than an AST representation. */ atn; ast; /** Track token stream used to create this grammar */ tokenStream; /** * If we transform grammar, track original unaltered token stream. This is set to the same value as tokenStream * when tokenStream is initially set. * * If this field differs from tokenStream, then we have transformed the grammar. */ originalTokenStream; fileName; /** * Was this parser grammar created from a COMBINED grammar? If so, this is what we extracted. */ implicitLexer; /** If this is an extracted/implicit lexer, we point at original grammar. */ originalGrammar; /** All rules defined in this specific grammar, not imported. Also does not include lexical rules if combined. */ rules = /* @__PURE__ */ new Map(); decisionLookahead; tool; /** Map token like {@code ID} (but not literals like {@code 'while'}) to its token type. */ tokenNameToTypeMap = /* @__PURE__ */ new Map(); /** * Map token literals like {@code 'while'} to its token type. It may be that * {@code WHILE="while"=35}, in which case both {@link #tokenNameToTypeMap} * and this field will have entries both mapped to 35. */ stringLiteralToTypeMap = /* @__PURE__ */ new Map(); /** Reverse index for {@link stringLiteralToTypeMap}. Indexed with raw token type. 0 is invalid. */ typeToStringLiteralList = new Array(); /** * Map channel like `COMMENTS_CHANNEL` to its constant channel value. Only user-defined channels are * defined in this map. */ channelNameToValueMap = /* @__PURE__ */ new Map(); /** * Map a constant channel value to its name. Indexed with raw channel value. The predefined channels * {@link Token.DEFAULT_CHANNEL} and {@link Token.HIDDEN_CHANNEL} are not stored in this list, so the values * at the corresponding indexes is {@code null}. */ channelValueToNameList = new Array(); /** * Map a name to an action. The code generator will use this to fill holes in the output files. I track the AST * node for the action in case I need the line number for errors. */ namedActions = /* @__PURE__ */ new Map(); /** * Tracks all user lexer actions in all alternatives of all rules. Doesn't track sempreds. Maps tree node to * action index (alt number 1..n). */ lexerActions = /* @__PURE__ */ new Map(); /** Map a token type to its token name. Indexed with raw token type. 0 is invalid. */ typeToTokenList = []; /** All sempreds found in grammar; maps tree node to sempred index; sempred index is 0..n - 1. */ sempreds = /* @__PURE__ */ new Map(); importedGrammars = []; /** Used to invent rule names for 'keyword', ';', ... (0..n - 1). */ indexToRule = new Array(); decisionDFAs = /* @__PURE__ */ new Map(); /** Map the other direction upon demand. */ indexToPredMap = null; /** used to get rule indexes (0..n-1) */ ruleNumber = 0; stringLiteralRuleNumber = 0; /** * Token names and literal tokens like "void" are uniquely indexed, with -1 implying EOF. Characters are * different. They go from -1 (EOF) to \uFFFE. For example, 0 could be a binary byte you want to lexer. Labels * of DFA/ATN transitions can be both tokens and characters. I use negative numbers for bookkeeping labels * like EPSILON. Char/String literals and token types overlap in the same space, however. */ maxTokenType = Token.MIN_USER_TOKEN_TYPE - 1; /** * The maximum channel value which is assigned by this grammar. Values below {@link Token.MIN_USER_CHANNEL_VALUE} * are assumed to be predefined. */ maxChannelType = Token.MIN_USER_CHANNEL_VALUE - 1; /** If we're imported, who imported us? If null, implies grammar is root. */ parent = null; constructor(...args) { if (typeof args[0] !== "string") { [this.tool, this.ast] = args; this.name = this.ast.children[0].getText(); this.tokenStream = this.ast.tokenStream; this.originalTokenStream = this.tokenStream; this.initTokenSymbolTables(); } else { const grammarText = args[0]; let tokenVocabSource; if (args.length > 1) { tokenVocabSource = args[1]; } this.fileName = Constants.GrammarFromStringName; this.tool = ClassFactory.createTool(); const hush = { info: /* @__PURE__ */ __name((msg) => { }, "info"), error: /* @__PURE__ */ __name((msg) => { }, "error"), warning: /* @__PURE__ */ __name((msg) => { }, "warning") }; this.tool.errorManager.addListener(hush); const input = CharStream.fromString(grammarText); input.name = this.fileName; const root = this.tool.parse(input); if (!root) { throw new Error("Could not parse grammar"); } this.ast = root; this.tokenStream = root.tokenStream; this.originalTokenStream = this.tokenStream; const v = new TreeVisitor(); v.visit(this.ast, { pre: /* @__PURE__ */ __name((t) => { t.g = this; return t; }, "pre"), post: /* @__PURE__ */ __name((t) => { return t; }, "post") }); this.initTokenSymbolTables(); if (tokenVocabSource) { this.importVocab(tokenVocabSource); } } } static forFile(c, fileName, grammarText, tokenVocabSource, listener) { const grammar = new c(grammarText, tokenVocabSource); grammar.fileName = fileName; grammar.tool = ClassFactory.createTool(); const hush = { info: /* @__PURE__ */ __name((msg) => { }, "info"), error: /* @__PURE__ */ __name((msg) => { }, "error"), warning: /* @__PURE__ */ __name((msg) => { }, "warning") }; grammar.tool.errorManager.addListener(hush); if (listener) { grammar.tool.errorManager.addListener(listener); } const input = CharStream.fromString(grammarText); input.name = basename(fileName); const root = grammar.tool.parse(input); if (!root) { throw new Error("Could not parse grammar"); } grammar.ast = root; grammar.tokenStream = root.tokenStream; grammar.originalTokenStream = root.tokenStream; const v = new TreeVisitor(); v.visit(grammar.ast, { pre: /* @__PURE__ */ __name((t) => { t.g = grammar; return t; }, "pre"), post: /* @__PURE__ */ __name((t) => { return t; }, "post") }); grammar.initTokenSymbolTables(); if (tokenVocabSource) { grammar.importVocab(tokenVocabSource); } return grammar; } static getGrammarTypeToFileNameSuffix(type) { switch (type) { case GrammarType.Lexer: { return "Lexer"; } case GrammarType.Parser: { return "Parser"; } // If combined grammar, gen Parser and Lexer will be done later. case GrammarType.Combined: { return "Parser"; } default: { return "<invalid>"; } } } /** * Given ^(TOKEN_REF ^(OPTIONS ^(ELEMENT_OPTIONS (= assoc right)))) sets option assoc=right in TOKEN_REF. */ static setNodeOptions(node, options) { const t = node; if (t.children.length === 0 || options.children.length === 0) { return; } for (const o of options.children) { const c = o; if (c.getType() === ANTLRv4Parser.ASSIGN) { t.setOption(c.children[0].getText(), c.children[1]); } else { t.setOption(c.getText(), null); } } } /** @returns list of (TOKEN_NAME node, 'literal' node) pairs */ static getStringLiteralAliasesFromLexerRules(ast) { const patterns = [ "(RULE %name:TOKEN_REF (BLOCK (ALT %lit:STRING_LITERAL)))", "(RULE %name:TOKEN_REF (BLOCK (ALT %lit:STRING_LITERAL ACTION)))", "(RULE %name:TOKEN_REF (BLOCK (ALT %lit:STRING_LITERAL SEMPRED)))", "(RULE %name:TOKEN_REF (BLOCK (LEXER_ALT_ACTION (ALT %lit:STRING_LITERAL) .)))", "(RULE %name:TOKEN_REF (BLOCK (LEXER_ALT_ACTION (ALT %lit:STRING_LITERAL) . .)))", "(RULE %name:TOKEN_REF (BLOCK (LEXER_ALT_ACTION (ALT %lit:STRING_LITERAL) (LEXER_ACTION_CALL . .))))", "(RULE %name:TOKEN_REF (BLOCK (LEXER_ALT_ACTION (ALT %lit:STRING_LITERAL) . (LEXER_ACTION_CALL . .))))", "(RULE %name:TOKEN_REF (BLOCK (LEXER_ALT_ACTION (ALT %lit:STRING_LITERAL) (LEXER_ACTION_CALL . .) .)))" ]; const wiz = new TreeWizard(ANTLRv4Parser.symbolicNames); const lexerRuleToStringLiteral = new Array(); const ruleNodes = ast.getNodesWithType(ANTLRv4Parser.RULE); if (ruleNodes.length === 0) { return null; } for (const r of ruleNodes) { const name = r.children[0]; if (name.getType() === ANTLRv4Parser.TOKEN_REF) { let isLitRule; for (const pattern of patterns) { isLitRule = Grammar.defAlias(r, pattern, wiz, lexerRuleToStringLiteral); if (isLitRule) { break; } } } } return lexerRuleToStringLiteral; } static defAlias(r, pattern, wiz, lexerRuleToStringLiteral) { const nodes = /* @__PURE__ */ new Map(); if (wiz.parse(r, pattern, nodes)) { const litNode = nodes.get("lit"); const nameNode = nodes.get("name"); lexerRuleToStringLiteral.push([nameNode, litNode]); return true; } return false; } loadImportedGrammars(visited) { const i = this.ast.getFirstChildWithType(ANTLRv4Parser.IMPORT); if (i === null) { return; } visited.add(this.name); for (const c of i.children) { let t = c; let importedGrammarName = null; if (t.getType() === ANTLRv4Parser.ASSIGN) { t = t.children[1]; importedGrammarName = t.getText(); } else { if (t.getType() === ANTLRv4Parser.ID) { importedGrammarName = t.getText(); } } if (!importedGrammarName || visited.has(importedGrammarName)) { continue; } let g; try { g = this.tool.loadImportedGrammar(this, t); if (!g) { continue; } } catch { this.tool.errorManager.grammarError( IssueCode.ErrorReadingImportedGrammar, importedGrammarName, t.token, importedGrammarName, this.name ); continue; } g.parent = this; this.importedGrammars.push(g); g.loadImportedGrammars(visited); } } defineAction(atAST) { if (atAST.children.length === 2) { const name = atAST.children[0].getText(); this.namedActions.set(name, atAST.children[1]); } else { const scope = atAST.children[0].getText(); const grammarType = this.getTypeString(); if (scope === grammarType || scope === "parser" && grammarType === "combined") { const name = atAST.children[1].getText(); this.namedActions.set(name, atAST.children[2]); } } } /** * Defines the specified rule in the grammar. This method assigns the rule's {@link Rule.index} according to * the {@link ruleNumber} field, and adds the {@link Rule} instance to {@link rules} and {@link indexToRule}. * * @param r The rule to define in the grammar. * @returns `true` if the rule was added to the {@link Grammar} instance; otherwise, {@code false} if a rule with * this name already existed in the grammar instance. */ defineRule(r) { if (this.rules.has(r.name)) { return false; } this.rules.set(r.name, r); r.index = this.ruleNumber++; this.indexToRule.push(r); return true; } getRule(...args) { switch (args.length) { case 1: { if (typeof args[0] === "string") { const [name] = args; const r = this.rules.get(name); if (r) { return r; } return null; } else { const [index] = args; return this.indexToRule[index]; } } case 2: { const [grammarName, ruleName] = args; if (grammarName) { const g = this.getImportedGrammar(grammarName); if (g === null) { return null; } return g.rules.get(ruleName) ?? null; } return this.getRule(ruleName); } default: { return null; } } } /** Needed for tests. */ getATN() { if (!this.atn) { const factory = ClassFactory.createParserATNFactory(this); this.atn = factory.createATN(); } return this.atn; } /** * Get list of all imports from all grammars in the delegate subtree of g. The grammars are in import tree * preorder. Don't include ourselves in list as we're not a delegate of ourselves. */ getAllImportedGrammars() { const delegates = /* @__PURE__ */ new Map(); for (const d of this.importedGrammars) { delegates.set(d.fileName, d); const ds = d.getAllImportedGrammars(); for (const imported of ds) { delegates.set(imported.fileName, imported); } } return Array.from(delegates.values()); } getImportedGrammars() { return this.importedGrammars; } /** * Return list of imported grammars from root down to our parent. Order is [root, ..., this.parent] * (us not included). */ getGrammarAncestors() { const root = this.getOutermostGrammar(); if (this === root) { return null; } const grammars = new Array(); let p = this.parent; while (p !== null) { grammars.unshift(p); p = p.parent; } return grammars; } /** * @returns the grammar that imported us and our parents, or this if we're root. */ getOutermostGrammar() { if (this.parent === null) { return this; } return this.parent.getOutermostGrammar(); } /** * Gets the name of the generated recognizer; may or may not be same as grammar name. Recognizer is TParser and * TLexer from T if combined, else just use T regardless of grammar type. */ getRecognizerName() { let suffix = ""; const grammarsFromRootToMe = this.getOutermostGrammar().getGrammarAncestors(); let qualifiedName = this.name; if (grammarsFromRootToMe !== null) { qualifiedName = ""; for (const g of grammarsFromRootToMe) { qualifiedName += g.name; qualifiedName += "_"; } qualifiedName += this.name; } if (this.isCombined()) { suffix = Grammar.getGrammarTypeToFileNameSuffix(this.type); } return qualifiedName + suffix; } getStringLiteralLexerRuleName(_literal) { return `${Grammar.AUTO_GENERATED_TOKEN_NAME_PREFIX}${this.stringLiteralRuleNumber++}`; } /** @returns grammar directly imported by this grammar. */ getImportedGrammar(name) { for (const g of this.importedGrammars) { if (g.name === name) { return g; } } return null; } getTokenType(token) { let index; if (token.startsWith("'")) { index = this.stringLiteralToTypeMap.get(token); } else { index = this.tokenNameToTypeMap.get(token); } return index ?? Token.INVALID_TYPE; } /** * Gets the name by which a token can be referenced in the generated code. For tokens defined in a `tokens{}` * block or via a lexer rule, this is the declared name of the token. For token types generated by the use * of a string literal within a parser rule of a combined grammar, this is the automatically generated token * type which includes the {@link AUTO_GENERATED_TOKEN_NAME_PREFIX} prefix. For types which are not * associated with a defined token, this method returns {@link INVALID_TOKEN_NAME}. * * @param literalOrTokenType The token type. * * @returns The name of the token with the specified type. */ getTokenName(literalOrTokenType) { if (typeof literalOrTokenType === "string") { let grammar = this; while (grammar !== null) { if (grammar.stringLiteralToTypeMap.has(literalOrTokenType)) { return grammar.getTokenName(grammar.stringLiteralToTypeMap.get(literalOrTokenType)); } grammar = grammar.parent; } return null; } else { if (this.isLexer() && // TODO: make the min and max char values from the lexer options available here. literalOrTokenType >= 0 && literalOrTokenType <= 131071) { return CharSupport.getANTLRCharLiteralForChar(literalOrTokenType); } if (literalOrTokenType === Token.EOF) { return "EOF"; } if (literalOrTokenType >= 0 && literalOrTokenType < this.typeToTokenList.length && this.typeToTokenList[literalOrTokenType]) { return this.typeToTokenList[literalOrTokenType]; } return Grammar.INVALID_TOKEN_NAME; } } /** * Given a token type, get a meaningful name for it such as the ID or string literal. If this is a lexer and * the ttype is in the char vocabulary, compute an ANTLR-valid (possibly escaped) char literal. */ getTokenDisplayName(ttype) { if (this.isLexer() && ttype >= 0 && ttype <= 131071) { return CharSupport.getANTLRCharLiteralForChar(ttype); } if (ttype === Token.EOF) { return "EOF"; } if (ttype === Token.INVALID_TYPE) { return Grammar.INVALID_TOKEN_NAME; } if (ttype >= 0 && ttype < this.typeToStringLiteralList.length && this.typeToStringLiteralList[ttype] != null) { return this.typeToStringLiteralList[ttype] ?? String(ttype); } if (ttype >= 0 && ttype < this.typeToTokenList.length && this.typeToTokenList[ttype] != null) { return this.typeToTokenList[ttype] ?? String(ttype); } return String(ttype); } /** * Gets the constant channel value for a user-defined channel. * * This method only returns channel values for user-defined channels. All other channels, including the * predefined channels {@link Token.DEFAULT_CHANNEL} and {@link Token:HIDDEN_CHANNEL} along with * any channel defined in code (e.g. in a {@code @members{}} block), are ignored. * * @param channel The channel name. * * @returns The channel value, if `channel` is the name of a known user-defined token channel; otherwise, -1. */ getChannelValue(channel) { const index = this.channelNameToValueMap.get(channel); return index ?? -1; } /** * Gets an array of rule names for rules defined or imported by the grammar. The array index is the rule index, * and the value is the name of the rule with the corresponding {@link Rule.index}. * * If no rule is defined with an index for an element of the resulting array, the value of that element is * {@link INVALID_RULE_NAME}. * * @returns The names of all rules defined in the grammar. */ getRuleNames() { return [...this.rules.keys()]; } /** * Gets an array of token names for tokens defined or imported by the grammar. The array index is the token type, * and the value is the result of {@link getTokenName} for the corresponding token type. * * @returns The token names of all tokens defined in the grammar. */ getTokenNames() { const max = this.getMaxTokenType(); const tokenNames = []; for (let i = 0; i <= max; ++i) { tokenNames.push(this.getTokenName(i)); } return tokenNames; } /** * Gets an array of display names for tokens defined or imported by the grammar. The array index is the token * type, and the value is the result of {@link getTokenDisplayName} for the corresponding token type. * * @returns The display names of all tokens defined in the grammar. */ getTokenDisplayNames() { const numTokens = this.getMaxTokenType(); const tokenNames = new Array(numTokens + 1); tokenNames.fill(null); for (let i = 0; i < tokenNames.length; i++) { tokenNames[i] = this.getTokenDisplayName(i); } return tokenNames; } /** * Gets the literal names assigned to tokens in the grammar. */ getTokenLiteralNames() { const numTokens = this.getMaxTokenType(); const literalNames = new Array(numTokens + 1); literalNames.fill(null); for (let i = 0; i < Math.min(literalNames.length, this.typeToStringLiteralList.length); i++) { literalNames[i] = this.typeToStringLiteralList[i]; } for (const [key, value] of this.stringLiteralToTypeMap) { if (value >= 0 && value < literalNames.length && !literalNames[value]) { literalNames[value] = key; } } return literalNames; } /** * Gets the symbolic names assigned to tokens in the grammar. */ getTokenSymbolicNames() { const numTokens = this.getMaxTokenType(); const symbolicNames = new Array(numTokens + 1); symbolicNames.fill(null); for (let i = 0; i < Math.min(symbolicNames.length, this.typeToTokenList.length); i++) { const name = this.typeToTokenList[i]; if (!name || name.startsWith(Grammar.AUTO_GENERATED_TOKEN_NAME_PREFIX)) { continue; } symbolicNames[i] = name; } return symbolicNames; } /** * Gets a {@link Vocabulary} instance describing the vocabulary used by the grammar. */ getVocabulary() { return new Vocabulary(this.getTokenLiteralNames(), this.getTokenSymbolicNames()); } getIndexToPredicateMap() { const indexToPredMap = /* @__PURE__ */ new Map(); for (const r of this.rules.values()) { for (const a of r.actions) { if (a.astType === "PredAST") { indexToPredMap.set(this.sempreds.get(a), a); } } } return indexToPredMap; } getPredicateDisplayString(pred) { if (this.indexToPredMap === null) { this.indexToPredMap = this.getIndexToPredicateMap(); } const actionAST = this.indexToPredMap.get(pred.predIndex); return actionAST.getText(); } /** * What is the max char value possible for this grammar's target? Use unicode max if no target defined. */ getMaxCharValue() { return 131071; } /** @returns a set of all possible token or char types for this grammar. */ getTokenTypes() { if (this.isLexer()) { return this.getAllCharValues(); } return IntervalSet.of(Token.MIN_USER_TOKEN_TYPE, this.getMaxTokenType()); } /** * @returns min to max char as defined by the target. If no target, use max unicode char value. */ getAllCharValues() { return IntervalSet.of(0, this.getMaxCharValue()); } /** How many token types have been allocated so far? */ getMaxTokenType() { return this.typeToTokenList.length - 1; } /** @returns a new unique integer in the token type space. */ getNewTokenType() { this.maxTokenType++; return this.maxTokenType; } /** @returns a new unique integer in the channel value space. */ getNewChannelNumber() { this.maxChannelType++; return this.maxChannelType; } importTokensFromTokensFile() { const vocab = this.getOptionString("tokenVocab"); if (vocab) { const vParser = new TokenVocabParser( this, this.tool.getOutputDirectory(this.fileName), this.tool.toolParameters.lib ); const tokens = vParser.load(); this.tool.logInfo({ component: "grammar", msg: `tokens=${String(tokens)}` }); for (const t of tokens.keys()) { if (t.startsWith("'")) { this.defineStringLiteral(t, tokens.get(t)); } else { this.defineTokenName(t, tokens.get(t)); } } } } importVocab(importG) { for (const tokenName of importG.tokenNameToTypeMap.keys()) { this.defineTokenName(tokenName, importG.tokenNameToTypeMap.get(tokenName)); } for (const tokenName of importG.stringLiteralToTypeMap.keys()) { this.defineStringLiteral(tokenName, importG.stringLiteralToTypeMap.get(tokenName)); } for (const [key, value] of importG.channelNameToValueMap) { this.defineChannelName(key, value); } let max = Math.max(this.typeToTokenList.length, importG.typeToTokenList.length); Utils.setSize(this.typeToTokenList, max); for (let ttype = 0; ttype < importG.typeToTokenList.length; ttype++) { this.maxTokenType = Math.max(this.maxTokenType, ttype); this.typeToTokenList[ttype] = importG.typeToTokenList[ttype]; } max = Math.max(this.channelValueToNameList.length, importG.channelValueToNameList.length); Utils.setSize(this.channelValueToNameList, max); for (let channelValue = 0; channelValue < importG.channelValueToNameList.length; channelValue++) { this.maxChannelType = Math.max(this.maxChannelType, channelValue); this.channelValueToNameList[channelValue] = importG.channelValueToNameList[channelValue]; } } defineTokenName(name, ttype) { const prev = this.tokenNameToTypeMap.get(name); if (prev !== void 0) { return prev; } ttype ??= this.getNewTokenType(); this.tokenNameToTypeMap.set(name, ttype); this.setTokenForType(ttype, name); this.maxTokenType = Math.max(this.maxTokenType, ttype); return ttype; } defineStringLiteral(lit, ttype) { if (ttype === void 0) { if (this.stringLiteralToTypeMap.has(lit)) { return this.stringLiteralToTypeMap.get(lit); } ttype = this.getNewTokenType(); } if (!this.stringLiteralToTypeMap.has(lit)) { this.stringLiteralToTypeMap.set(lit, ttype); if (ttype >= this.typeToStringLiteralList.length) { Utils.setSize(this.typeToStringLiteralList, ttype + 1); } this.typeToStringLiteralList[ttype] = lit; this.setTokenForType(ttype, lit); return ttype; } return Token.INVALID_TYPE; } defineTokenAlias(name, lit) { const ttype = this.defineTokenName(name); this.stringLiteralToTypeMap.set(lit, ttype); this.setTokenForType(ttype, name); return ttype; } setTokenForType(ttype, text) { if (ttype === Token.EOF) { return; } if (ttype >= this.typeToTokenList.length) { Utils.setSize(this.typeToTokenList, ttype + 1); } const prevToken = this.typeToTokenList[ttype]; if (prevToken === null || prevToken.startsWith("'")) { this.typeToTokenList[ttype] = text; } } /** * Defines a token channel with a specified name. If a channel with the specified name already exists, the * previously assigned channel value is not altered. * * @param name The channel name. * * @returns The constant channel value assigned to the channel. */ defineChannelName(name, value) { if (value === void 0) { const prev2 = this.channelNameToValueMap.get(name); if (prev2 === void 0) { return this.defineChannelName(name, this.getNewChannelNumber()); } return prev2; } const prev = this.channelNameToValueMap.get(name); if (prev !== void 0) { return prev; } this.channelNameToValueMap.set(name, value); this.setChannelNameForValue(value, name); this.maxChannelType = Math.max(this.maxChannelType, value); return value; } /** * Sets the channel name associated with a particular channel value. If a name has already been assigned to the * channel with constant value `channelValue`, this method does nothing. * * @param channelValue The constant value for the channel. * @param name The channel name. */ setChannelNameForValue(channelValue, name) { if (channelValue >= this.channelValueToNameList.length) { Utils.setSize(this.channelValueToNameList, channelValue + 1); } const prevChannel = this.channelValueToNameList[channelValue]; if (!prevChannel) { this.channelValueToNameList[channelValue] = name; } } resolveToAttribute(...args) { return null; } resolvesToLabel(x, node) { return false; } resolvesToListLabel(x, node) { return false; } resolvesToToken(x, node) { return false; } resolvesToAttributeDict(x, node) { return false; } /** * Given a grammar type, what should be the default action scope? * If I say @members in a COMBINED grammar, for example, the default scope should be "parser". */ getDefaultActionScope() { switch (this.type) { case GrammarType.Lexer: { return "lexer"; } case GrammarType.Parser: case GrammarType.Combined: { return "parser"; } default: } return null; } get type() { return this.ast.grammarType; } isLexer() { return this.type === GrammarType.Lexer; } isParser() { return this.type === GrammarType.Parser; } isCombined() { return this.type === GrammarType.Combined; } getTypeString() { if (this.isLexer()) { return "lexer"; } if (this.isParser()) { return "parser"; } return "combined"; } getLanguage() { const language = this.getOptionString("language"); if (language && !targetLanguages.includes(language)) { this.tool.errorManager.toolError(IssueCode.CannotCreateTargetGenerator, language); } return language ?? "Java"; } getOptionString(key) { return this.ast.getOptionString(key); } getStringLiterals() { const strings = /* @__PURE__ */ new Set(); const collector = new class extends GrammarTreeVisitor { stringRef(ref) { strings.add(ref.getText()); } }(this.tool.errorManager, {}); collector.visitGrammar(this.ast); return strings; } createLexerInterpreter(input) { if (!this.atn) { throw new Error("The ATN must be created before creating a lexer interpreter. Have you called `Grammar.tool.process()`?"); } if (this.isParser()) { throw new Error("A lexer interpreter can only be created for a lexer or combined grammar."); } if (this.isCombined()) { return this.implicitLexer.createLexerInterpreter(input); } const allChannels = []; allChannels.push("DEFAULT_TOKEN_CHANNEL"); allChannels.push("HIDDEN"); allChannels.push(...this.channelValueToNameList); const serialized = ATNSerializer.getSerialized(this.atn); const deserializedATN = new ATNDeserializer().deserialize(serialized); return new LexerInterpreter( this.fileName, this.getVocabulary(), this.getRuleNames(), allChannels, [...this.modes.keys()], deserializedATN, input ); } createGrammarParserInterpreter(tokenStream) { if (!this.atn) { throw new Error("The ATN must be created before creating a lexer interpreter. Have you called `Grammar.tool.process()`?"); } if (this.isLexer()) { throw new Error("A parser interpreter can only be created for a parser or combined grammar."); } const serialized = ATNSerializer.getSerialized(this.atn); const deserializedATN = new ATNDeserializer().deserialize(serialized); return ClassFactory.createGrammarParserInterpreter(this, deserializedATN, tokenStream); } /** For testing. */ createParserInterpreter(tokenStream) { if (!this.atn) { throw new Error("The ATN must be created before creating a lexer interpreter. Have you called `Grammar.tool.process()`?"); } if (this.isLexer()) { throw new Error("A parser interpreter can only be created for a parser or combined grammar."); } const serialized = ATNSerializer.getSerialized(this.atn); const deserializedATN = new ATNDeserializer().deserialize(serialized); return new ParserInterpreter( this.fileName, this.getVocabulary(), this.getRuleNames(), deserializedATN, tokenStream ); } /** * Undefines the specified rule from this {@link Grammar} instance. The instance `r` is removed from * {@link rules} and {@link indexToRule}. This method updates the {@link Rule.index} field for all rules defined * after `r`, and decrements {@link ruleNumber} in preparation for adding new rules. * * This method does nothing if the current {@link Grammar} does not contain the instance `r` at index * `r.index` in {@link indexToRule}. * * @returns `true` if the rule was removed from the {@link Grammar} instance; otherwise, `false` if the * specified rule was not defined in the grammar. */ undefineRule(r) { if (r.index < 0 || r.index >= this.indexToRule.length || this.indexToRule[r.index] !== r) { return false; } this.rules.delete(r.name); this.indexToRule.splice(r.index, 1); for (let i = r.index; i < this.indexToRule.length; i++) { --this.indexToRule[i].index; } --this.ruleNumber; return true; } initTokenSymbolTables() { this.tokenNameToTypeMap.set("EOF", Token.EOF); this.typeToTokenList.push(null); } static { ClassFactory.createGrammar = (tool, grammar) => { return new Grammar(tool, grammar); }; Grammar.parserOptions.add("superClass"); Grammar.parserOptions.add("contextSuperClass"); Grammar.parserOptions.add("TokenLabelType"); Grammar.parserOptions.add("tokenVocab"); Grammar.parserOptions.add("language"); Grammar.parserOptions.add("accessLevel"); Grammar.parserOptions.add("exportMacro"); Grammar.parserOptions.add(Grammar.caseInsensitiveOptionName); Grammar.tokenOptions.add("assoc"); Grammar.tokenOptions.add(Constants.TokenIndexOptionName); Grammar.semPredOptions.add(Constants.PrecedenceOptionName); Grammar.semPredOptions.add("fail"); Grammar.doNotCopyOptionsToLexer.add("superClass"); Grammar.doNotCopyOptionsToLexer.add("TokenLabelType"); Grammar.doNotCopyOptionsToLexer.add("tokenVocab"); Grammar.grammarAndLabelRefTypeToScope.set("parser:RULE_LABEL", Constants.predefinedRulePropertiesDict); Grammar.grammarAndLabelRefTypeToScope.set("parser:TOKEN_LABEL", Constants.predefinedTokenDict); Grammar.grammarAndLabelRefTypeToScope.set("combined:RULE_LABEL", Constants.predefinedRulePropertiesDict); Grammar.grammarAndLabelRefTypeToScope.set("combined:TOKEN_LABEL", Constants.predefinedTokenDict); } } export { Grammar };