UNPKG

antlr-ng

Version:

Next generation ANTLR Tool

323 lines (322 loc) 16.4 kB
import { ATN, CharStream, IntervalSet, LexerInterpreter, ParserInterpreter, SemanticContext, TokenStream, Vocabulary } from "antlr4ng"; import { TreeWizard } from "../tree/TreeWizard.js"; import { type SupportedLanguage } from "../codegen/CodeGenerator.js"; import { type Constructor } from "../misc/Utils.js"; import { GrammarType } from "../support/GrammarType.js"; import type { IGrammar, ITool } from "../types.js"; import type { ActionAST } from "./ast/ActionAST.js"; import type { GrammarAST } from "./ast/GrammarAST.js"; import type { GrammarRootAST } from "./ast/GrammarRootAST.js"; import type { PredAST } from "./ast/PredAST.js"; import type { AttributeDict } from "./AttributeDict.js"; import type { GrammarParserInterpreter } from "./GrammarParserInterpreter.js"; import type { IAttribute } from "./IAttribute.js"; import type { IAttributeResolver } from "./IAttributeResolver.js"; import type { LexerGrammar } from "./LexerGrammar.js"; import type { Rule } from "./Rule.js"; import { ToolListener } from "./ToolListener.js"; export declare class Grammar implements IGrammar, IAttributeResolver { /** * This value is used in the following situations to indicate that a token type does not have an associated * name which can be directly referenced in a grammar. * * - This value is the name and display name for the token with type {@link Token.INVALID_TYPE}. * - This value is the name for tokens with a type not represented by a named token. The display name for these * tokens is simply the string representation of the token type as an integer. */ static readonly INVALID_TOKEN_NAME = "<INVALID>"; static readonly caseInsensitiveOptionName = "caseInsensitive"; static readonly parserOptions: Set<string>; static readonly lexerOptions: Set<string>; static readonly lexerRuleOptions: Set<string>; static readonly parseRuleOptions: Set<string>; static readonly parserBlockOptions: Set<string>; static readonly lexerBlockOptions: Set<string>; /** Legal options for rule refs like id&lt;key=value&gt; */ static readonly ruleRefOptions: Set<string>; /** Legal options for terminal refs like ID&lt;assoc=right&gt; */ static readonly tokenOptions: Set<string>; static readonly actionOptions: Set<string>; static readonly semPredOptions: Set<string>; static readonly doNotCopyOptionsToLexer: Set<string>; static readonly grammarAndLabelRefTypeToScope: Map<string, AttributeDict>; static readonly AUTO_GENERATED_TOKEN_NAME_PREFIX = "T__"; name: string; /** * The ATN that represents the grammar with edges labelled with tokens or epsilon. It is more suitable to analysis * than an AST representation. */ atn?: ATN; ast: GrammarRootAST; /** Track token stream used to create this grammar */ tokenStream: TokenStream; /** * If we transform grammar, track original unaltered token stream. This is set to the same value as tokenStream * when tokenStream is initially set. * * If this field differs from tokenStream, then we have transformed the grammar. */ originalTokenStream: TokenStream; fileName: string; /** * Was this parser grammar created from a COMBINED grammar? If so, this is what we extracted. */ implicitLexer: LexerGrammar | undefined; /** If this is an extracted/implicit lexer, we point at original grammar. */ originalGrammar?: Grammar; /** All rules defined in this specific grammar, not imported. Also does not include lexical rules if combined. */ rules: Map<string, Rule>; decisionLookahead: IntervalSet[][]; tool: ITool; /** Map token like {@code ID} (but not literals like {@code 'while'}) to its token type. */ readonly tokenNameToTypeMap: Map<string, number>; /** * Map token literals like {@code 'while'} to its token type. It may be that * {@code WHILE="while"=35}, in which case both {@link #tokenNameToTypeMap} * and this field will have entries both mapped to 35. */ readonly stringLiteralToTypeMap: Map<string, number>; /** Reverse index for {@link stringLiteralToTypeMap}. Indexed with raw token type. 0 is invalid. */ readonly typeToStringLiteralList: (string | null)[]; /** * Map channel like `COMMENTS_CHANNEL` to its constant channel value. Only user-defined channels are * defined in this map. */ readonly channelNameToValueMap: Map<string, number>; /** * Map a constant channel value to its name. Indexed with raw channel value. The predefined channels * {@link Token.DEFAULT_CHANNEL} and {@link Token.HIDDEN_CHANNEL} are not stored in this list, so the values * at the corresponding indexes is {@code null}. */ readonly channelValueToNameList: string[]; /** * Map a name to an action. The code generator will use this to fill holes in the output files. I track the AST * node for the action in case I need the line number for errors. */ namedActions: Map<string, ActionAST>; /** * Tracks all user lexer actions in all alternatives of all rules. Doesn't track sempreds. Maps tree node to * action index (alt number 1..n). */ lexerActions: Map<ActionAST, number>; /** Map a token type to its token name. Indexed with raw token type. 0 is invalid. */ readonly typeToTokenList: Array<string | null>; /** All sempreds found in grammar; maps tree node to sempred index; sempred index is 0..n - 1. */ sempreds: Map<PredAST, number>; private importedGrammars; /** Used to invent rule names for 'keyword', ';', ... (0..n - 1). */ private indexToRule; private decisionDFAs; /** Map the other direction upon demand. */ private indexToPredMap; /** used to get rule indexes (0..n-1) */ private ruleNumber; private stringLiteralRuleNumber; /** * Token names and literal tokens like "void" are uniquely indexed, with -1 implying EOF. Characters are * different. They go from -1 (EOF) to \uFFFE. For example, 0 could be a binary byte you want to lexer. Labels * of DFA/ATN transitions can be both tokens and characters. I use negative numbers for bookkeeping labels * like EPSILON. Char/String literals and token types overlap in the same space, however. */ private maxTokenType; /** * The maximum channel value which is assigned by this grammar. Values below {@link Token.MIN_USER_CHANNEL_VALUE} * are assumed to be predefined. */ private maxChannelType; /** If we're imported, who imported us? If null, implies grammar is root. */ private parent; constructor(tool: ITool, ast: GrammarRootAST); /** For testing */ constructor(grammarText: string, tokenVocabSource?: LexerGrammar); static forFile<T extends Grammar>(c: Constructor<T>, fileName: string, grammarText: string, tokenVocabSource?: Grammar, listener?: ToolListener): T; static getGrammarTypeToFileNameSuffix(type: GrammarType): string; /** * Given ^(TOKEN_REF ^(OPTIONS ^(ELEMENT_OPTIONS (= assoc right)))) sets option assoc=right in TOKEN_REF. */ static setNodeOptions(node: GrammarAST, options: GrammarAST): void; /** @returns list of (TOKEN_NAME node, 'literal' node) pairs */ static getStringLiteralAliasesFromLexerRules(ast: GrammarRootAST): Array<[GrammarAST, GrammarAST]> | null; protected static defAlias(r: GrammarAST, pattern: string, wiz: TreeWizard, lexerRuleToStringLiteral: Array<[GrammarAST, GrammarAST]>): boolean; loadImportedGrammars(visited: Set<string>): void; defineAction(atAST: GrammarAST): void; /** * Defines the specified rule in the grammar. This method assigns the rule's {@link Rule.index} according to * the {@link ruleNumber} field, and adds the {@link Rule} instance to {@link rules} and {@link indexToRule}. * * @param r The rule to define in the grammar. * @returns `true` if the rule was added to the {@link Grammar} instance; otherwise, {@code false} if a rule with * this name already existed in the grammar instance. */ defineRule(r: Rule): boolean; getRule(name: string | number): Rule | null; getRule(grammarName: string, ruleName: string): Rule | null; /** Needed for tests. */ getATN(): ATN; /** * Get list of all imports from all grammars in the delegate subtree of g. The grammars are in import tree * preorder. Don't include ourselves in list as we're not a delegate of ourselves. */ getAllImportedGrammars(): Grammar[]; getImportedGrammars(): Grammar[]; /** * Return list of imported grammars from root down to our parent. Order is [root, ..., this.parent] * (us not included). */ getGrammarAncestors(): Grammar[] | null; /** * @returns the grammar that imported us and our parents, or this if we're root. */ getOutermostGrammar(): Grammar; /** * Gets the name of the generated recognizer; may or may not be same as grammar name. Recognizer is TParser and * TLexer from T if combined, else just use T regardless of grammar type. */ getRecognizerName(): string; getStringLiteralLexerRuleName(_literal: string): string; /** @returns grammar directly imported by this grammar. */ getImportedGrammar(name: string): Grammar | null; getTokenType(token: string): number; /** * Gets the name by which a token can be referenced in the generated code. For tokens defined in a `tokens{}` * block or via a lexer rule, this is the declared name of the token. For token types generated by the use * of a string literal within a parser rule of a combined grammar, this is the automatically generated token * type which includes the {@link AUTO_GENERATED_TOKEN_NAME_PREFIX} prefix. For types which are not * associated with a defined token, this method returns {@link INVALID_TOKEN_NAME}. * * @param literalOrTokenType The token type. * * @returns The name of the token with the specified type. */ getTokenName(literalOrTokenType: number | string): string | null; /** * Given a token type, get a meaningful name for it such as the ID or string literal. If this is a lexer and * the ttype is in the char vocabulary, compute an ANTLR-valid (possibly escaped) char literal. */ getTokenDisplayName(ttype: number): string; /** * Gets the constant channel value for a user-defined channel. * * This method only returns channel values for user-defined channels. All other channels, including the * predefined channels {@link Token.DEFAULT_CHANNEL} and {@link Token:HIDDEN_CHANNEL} along with * any channel defined in code (e.g. in a {@code @members{}} block), are ignored. * * @param channel The channel name. * * @returns The channel value, if `channel` is the name of a known user-defined token channel; otherwise, -1. */ getChannelValue(channel: string): number; /** * Gets an array of rule names for rules defined or imported by the grammar. The array index is the rule index, * and the value is the name of the rule with the corresponding {@link Rule.index}. * * If no rule is defined with an index for an element of the resulting array, the value of that element is * {@link INVALID_RULE_NAME}. * * @returns The names of all rules defined in the grammar. */ getRuleNames(): string[]; /** * Gets an array of token names for tokens defined or imported by the grammar. The array index is the token type, * and the value is the result of {@link getTokenName} for the corresponding token type. * * @returns The token names of all tokens defined in the grammar. */ getTokenNames(): Array<string | null>; /** * Gets an array of display names for tokens defined or imported by the grammar. The array index is the token * type, and the value is the result of {@link getTokenDisplayName} for the corresponding token type. * * @returns The display names of all tokens defined in the grammar. */ getTokenDisplayNames(): Array<string | null>; /** * Gets the literal names assigned to tokens in the grammar. */ getTokenLiteralNames(): Array<string | null>; /** * Gets the symbolic names assigned to tokens in the grammar. */ getTokenSymbolicNames(): Array<string | null>; /** * Gets a {@link Vocabulary} instance describing the vocabulary used by the grammar. */ getVocabulary(): Vocabulary; getIndexToPredicateMap(): Map<number, PredAST>; getPredicateDisplayString(pred: SemanticContext.Predicate): string; /** * What is the max char value possible for this grammar's target? Use unicode max if no target defined. */ getMaxCharValue(): number; /** @returns a set of all possible token or char types for this grammar. */ getTokenTypes(): IntervalSet; /** * @returns min to max char as defined by the target. If no target, use max unicode char value. */ getAllCharValues(): IntervalSet; /** How many token types have been allocated so far? */ getMaxTokenType(): number; /** @returns a new unique integer in the token type space. */ getNewTokenType(): number; /** @returns a new unique integer in the channel value space. */ getNewChannelNumber(): number; importTokensFromTokensFile(): void; importVocab(importG: Grammar): void; defineTokenName(name: string, ttype?: number): number; defineStringLiteral(lit: string, ttype?: number): number; defineTokenAlias(name: string, lit: string): number; setTokenForType(ttype: number, text: string): void; /** * Defines a token channel with a specified name. If a channel with the specified name already exists, the * previously assigned channel value is not altered. * * @param name The channel name. * * @returns The constant channel value assigned to the channel. */ defineChannelName(name: string, value?: number): number; /** * Sets the channel name associated with a particular channel value. If a name has already been assigned to the * channel with constant value `channelValue`, this method does nothing. * * @param channelValue The constant value for the channel. * @param name The channel name. */ setChannelNameForValue(channelValue: number, name: string): void; resolveToAttribute(x: string, node: ActionAST): IAttribute; resolveToAttribute(x: string, y: string, node: ActionAST): IAttribute | null; resolvesToLabel(x: string, node: ActionAST): boolean; resolvesToListLabel(x: string, node: ActionAST): boolean; resolvesToToken(x: string, node: ActionAST): boolean; resolvesToAttributeDict(x: string, node: ActionAST): boolean; /** * Given a grammar type, what should be the default action scope? * If I say @members in a COMBINED grammar, for example, the default scope should be "parser". */ getDefaultActionScope(): string | null; get type(): GrammarType; isLexer(): boolean; isParser(): boolean; isCombined(): boolean; getTypeString(): string | null; getLanguage(): SupportedLanguage; getOptionString(key: string): string | undefined; getStringLiterals(): Set<string>; createLexerInterpreter(input: CharStream): LexerInterpreter; createGrammarParserInterpreter(tokenStream: TokenStream): GrammarParserInterpreter; /** For testing. */ createParserInterpreter(tokenStream: TokenStream): ParserInterpreter; /** * Undefines the specified rule from this {@link Grammar} instance. The instance `r` is removed from * {@link rules} and {@link indexToRule}. This method updates the {@link Rule.index} field for all rules defined * after `r`, and decrements {@link ruleNumber} in preparation for adding new rules. * * This method does nothing if the current {@link Grammar} does not contain the instance `r` at index * `r.index` in {@link indexToRule}. * * @returns `true` if the rule was removed from the {@link Grammar} instance; otherwise, `false` if the * specified rule was not defined in the grammar. */ protected undefineRule(r: Rule): boolean; protected initTokenSymbolTables(): void; }