antlr-ng
Version:
Next generation ANTLR Tool
323 lines (322 loc) • 16.4 kB
TypeScript
import { ATN, CharStream, IntervalSet, LexerInterpreter, ParserInterpreter, SemanticContext, TokenStream, Vocabulary } from "antlr4ng";
import { TreeWizard } from "../tree/TreeWizard.js";
import { type SupportedLanguage } from "../codegen/CodeGenerator.js";
import { type Constructor } from "../misc/Utils.js";
import { GrammarType } from "../support/GrammarType.js";
import type { IGrammar, ITool } from "../types.js";
import type { ActionAST } from "./ast/ActionAST.js";
import type { GrammarAST } from "./ast/GrammarAST.js";
import type { GrammarRootAST } from "./ast/GrammarRootAST.js";
import type { PredAST } from "./ast/PredAST.js";
import type { AttributeDict } from "./AttributeDict.js";
import type { GrammarParserInterpreter } from "./GrammarParserInterpreter.js";
import type { IAttribute } from "./IAttribute.js";
import type { IAttributeResolver } from "./IAttributeResolver.js";
import type { LexerGrammar } from "./LexerGrammar.js";
import type { Rule } from "./Rule.js";
import { ToolListener } from "./ToolListener.js";
export declare class Grammar implements IGrammar, IAttributeResolver {
/**
* This value is used in the following situations to indicate that a token type does not have an associated
* name which can be directly referenced in a grammar.
*
* - This value is the name and display name for the token with type {@link Token.INVALID_TYPE}.
* - This value is the name for tokens with a type not represented by a named token. The display name for these
* tokens is simply the string representation of the token type as an integer.
*/
static readonly INVALID_TOKEN_NAME = "<INVALID>";
static readonly caseInsensitiveOptionName = "caseInsensitive";
static readonly parserOptions: Set<string>;
static readonly lexerOptions: Set<string>;
static readonly lexerRuleOptions: Set<string>;
static readonly parseRuleOptions: Set<string>;
static readonly parserBlockOptions: Set<string>;
static readonly lexerBlockOptions: Set<string>;
/** Legal options for rule refs like id<key=value> */
static readonly ruleRefOptions: Set<string>;
/** Legal options for terminal refs like ID<assoc=right> */
static readonly tokenOptions: Set<string>;
static readonly actionOptions: Set<string>;
static readonly semPredOptions: Set<string>;
static readonly doNotCopyOptionsToLexer: Set<string>;
static readonly grammarAndLabelRefTypeToScope: Map<string, AttributeDict>;
static readonly AUTO_GENERATED_TOKEN_NAME_PREFIX = "T__";
name: string;
/**
* The ATN that represents the grammar with edges labelled with tokens or epsilon. It is more suitable to analysis
* than an AST representation.
*/
atn?: ATN;
ast: GrammarRootAST;
/** Track token stream used to create this grammar */
tokenStream: TokenStream;
/**
* If we transform grammar, track original unaltered token stream. This is set to the same value as tokenStream
* when tokenStream is initially set.
*
* If this field differs from tokenStream, then we have transformed the grammar.
*/
originalTokenStream: TokenStream;
fileName: string;
/**
* Was this parser grammar created from a COMBINED grammar? If so, this is what we extracted.
*/
implicitLexer: LexerGrammar | undefined;
/** If this is an extracted/implicit lexer, we point at original grammar. */
originalGrammar?: Grammar;
/** All rules defined in this specific grammar, not imported. Also does not include lexical rules if combined. */
rules: Map<string, Rule>;
decisionLookahead: IntervalSet[][];
tool: ITool;
/** Map token like {@code ID} (but not literals like {@code 'while'}) to its token type. */
readonly tokenNameToTypeMap: Map<string, number>;
/**
* Map token literals like {@code 'while'} to its token type. It may be that
* {@code WHILE="while"=35}, in which case both {@link #tokenNameToTypeMap}
* and this field will have entries both mapped to 35.
*/
readonly stringLiteralToTypeMap: Map<string, number>;
/** Reverse index for {@link stringLiteralToTypeMap}. Indexed with raw token type. 0 is invalid. */
readonly typeToStringLiteralList: (string | null)[];
/**
* Map channel like `COMMENTS_CHANNEL` to its constant channel value. Only user-defined channels are
* defined in this map.
*/
readonly channelNameToValueMap: Map<string, number>;
/**
* Map a constant channel value to its name. Indexed with raw channel value. The predefined channels
* {@link Token.DEFAULT_CHANNEL} and {@link Token.HIDDEN_CHANNEL} are not stored in this list, so the values
* at the corresponding indexes is {@code null}.
*/
readonly channelValueToNameList: string[];
/**
* Map a name to an action. The code generator will use this to fill holes in the output files. I track the AST
* node for the action in case I need the line number for errors.
*/
namedActions: Map<string, ActionAST>;
/**
* Tracks all user lexer actions in all alternatives of all rules. Doesn't track sempreds. Maps tree node to
* action index (alt number 1..n).
*/
lexerActions: Map<ActionAST, number>;
/** Map a token type to its token name. Indexed with raw token type. 0 is invalid. */
readonly typeToTokenList: Array<string | null>;
/** All sempreds found in grammar; maps tree node to sempred index; sempred index is 0..n - 1. */
sempreds: Map<PredAST, number>;
private importedGrammars;
/** Used to invent rule names for 'keyword', ';', ... (0..n - 1). */
private indexToRule;
private decisionDFAs;
/** Map the other direction upon demand. */
private indexToPredMap;
/** used to get rule indexes (0..n-1) */
private ruleNumber;
private stringLiteralRuleNumber;
/**
* Token names and literal tokens like "void" are uniquely indexed, with -1 implying EOF. Characters are
* different. They go from -1 (EOF) to \uFFFE. For example, 0 could be a binary byte you want to lexer. Labels
* of DFA/ATN transitions can be both tokens and characters. I use negative numbers for bookkeeping labels
* like EPSILON. Char/String literals and token types overlap in the same space, however.
*/
private maxTokenType;
/**
* The maximum channel value which is assigned by this grammar. Values below {@link Token.MIN_USER_CHANNEL_VALUE}
* are assumed to be predefined.
*/
private maxChannelType;
/** If we're imported, who imported us? If null, implies grammar is root. */
private parent;
constructor(tool: ITool, ast: GrammarRootAST);
/** For testing */
constructor(grammarText: string, tokenVocabSource?: LexerGrammar);
static forFile<T extends Grammar>(c: Constructor<T>, fileName: string, grammarText: string, tokenVocabSource?: Grammar, listener?: ToolListener): T;
static getGrammarTypeToFileNameSuffix(type: GrammarType): string;
/**
* Given ^(TOKEN_REF ^(OPTIONS ^(ELEMENT_OPTIONS (= assoc right)))) sets option assoc=right in TOKEN_REF.
*/
static setNodeOptions(node: GrammarAST, options: GrammarAST): void;
/** @returns list of (TOKEN_NAME node, 'literal' node) pairs */
static getStringLiteralAliasesFromLexerRules(ast: GrammarRootAST): Array<[GrammarAST, GrammarAST]> | null;
protected static defAlias(r: GrammarAST, pattern: string, wiz: TreeWizard, lexerRuleToStringLiteral: Array<[GrammarAST, GrammarAST]>): boolean;
loadImportedGrammars(visited: Set<string>): void;
defineAction(atAST: GrammarAST): void;
/**
* Defines the specified rule in the grammar. This method assigns the rule's {@link Rule.index} according to
* the {@link ruleNumber} field, and adds the {@link Rule} instance to {@link rules} and {@link indexToRule}.
*
* @param r The rule to define in the grammar.
* @returns `true` if the rule was added to the {@link Grammar} instance; otherwise, {@code false} if a rule with
* this name already existed in the grammar instance.
*/
defineRule(r: Rule): boolean;
getRule(name: string | number): Rule | null;
getRule(grammarName: string, ruleName: string): Rule | null;
/** Needed for tests. */
getATN(): ATN;
/**
* Get list of all imports from all grammars in the delegate subtree of g. The grammars are in import tree
* preorder. Don't include ourselves in list as we're not a delegate of ourselves.
*/
getAllImportedGrammars(): Grammar[];
getImportedGrammars(): Grammar[];
/**
* Return list of imported grammars from root down to our parent. Order is [root, ..., this.parent]
* (us not included).
*/
getGrammarAncestors(): Grammar[] | null;
/**
* @returns the grammar that imported us and our parents, or this if we're root.
*/
getOutermostGrammar(): Grammar;
/**
* Gets the name of the generated recognizer; may or may not be same as grammar name. Recognizer is TParser and
* TLexer from T if combined, else just use T regardless of grammar type.
*/
getRecognizerName(): string;
getStringLiteralLexerRuleName(_literal: string): string;
/** @returns grammar directly imported by this grammar. */
getImportedGrammar(name: string): Grammar | null;
getTokenType(token: string): number;
/**
* Gets the name by which a token can be referenced in the generated code. For tokens defined in a `tokens{}`
* block or via a lexer rule, this is the declared name of the token. For token types generated by the use
* of a string literal within a parser rule of a combined grammar, this is the automatically generated token
* type which includes the {@link AUTO_GENERATED_TOKEN_NAME_PREFIX} prefix. For types which are not
* associated with a defined token, this method returns {@link INVALID_TOKEN_NAME}.
*
* @param literalOrTokenType The token type.
*
* @returns The name of the token with the specified type.
*/
getTokenName(literalOrTokenType: number | string): string | null;
/**
* Given a token type, get a meaningful name for it such as the ID or string literal. If this is a lexer and
* the ttype is in the char vocabulary, compute an ANTLR-valid (possibly escaped) char literal.
*/
getTokenDisplayName(ttype: number): string;
/**
* Gets the constant channel value for a user-defined channel.
*
* This method only returns channel values for user-defined channels. All other channels, including the
* predefined channels {@link Token.DEFAULT_CHANNEL} and {@link Token:HIDDEN_CHANNEL} along with
* any channel defined in code (e.g. in a {@code @members{}} block), are ignored.
*
* @param channel The channel name.
*
* @returns The channel value, if `channel` is the name of a known user-defined token channel; otherwise, -1.
*/
getChannelValue(channel: string): number;
/**
* Gets an array of rule names for rules defined or imported by the grammar. The array index is the rule index,
* and the value is the name of the rule with the corresponding {@link Rule.index}.
*
* If no rule is defined with an index for an element of the resulting array, the value of that element is
* {@link INVALID_RULE_NAME}.
*
* @returns The names of all rules defined in the grammar.
*/
getRuleNames(): string[];
/**
* Gets an array of token names for tokens defined or imported by the grammar. The array index is the token type,
* and the value is the result of {@link getTokenName} for the corresponding token type.
*
* @returns The token names of all tokens defined in the grammar.
*/
getTokenNames(): Array<string | null>;
/**
* Gets an array of display names for tokens defined or imported by the grammar. The array index is the token
* type, and the value is the result of {@link getTokenDisplayName} for the corresponding token type.
*
* @returns The display names of all tokens defined in the grammar.
*/
getTokenDisplayNames(): Array<string | null>;
/**
* Gets the literal names assigned to tokens in the grammar.
*/
getTokenLiteralNames(): Array<string | null>;
/**
* Gets the symbolic names assigned to tokens in the grammar.
*/
getTokenSymbolicNames(): Array<string | null>;
/**
* Gets a {@link Vocabulary} instance describing the vocabulary used by the grammar.
*/
getVocabulary(): Vocabulary;
getIndexToPredicateMap(): Map<number, PredAST>;
getPredicateDisplayString(pred: SemanticContext.Predicate): string;
/**
* What is the max char value possible for this grammar's target? Use unicode max if no target defined.
*/
getMaxCharValue(): number;
/** @returns a set of all possible token or char types for this grammar. */
getTokenTypes(): IntervalSet;
/**
* @returns min to max char as defined by the target. If no target, use max unicode char value.
*/
getAllCharValues(): IntervalSet;
/** How many token types have been allocated so far? */
getMaxTokenType(): number;
/** @returns a new unique integer in the token type space. */
getNewTokenType(): number;
/** @returns a new unique integer in the channel value space. */
getNewChannelNumber(): number;
importTokensFromTokensFile(): void;
importVocab(importG: Grammar): void;
defineTokenName(name: string, ttype?: number): number;
defineStringLiteral(lit: string, ttype?: number): number;
defineTokenAlias(name: string, lit: string): number;
setTokenForType(ttype: number, text: string): void;
/**
* Defines a token channel with a specified name. If a channel with the specified name already exists, the
* previously assigned channel value is not altered.
*
* @param name The channel name.
*
* @returns The constant channel value assigned to the channel.
*/
defineChannelName(name: string, value?: number): number;
/**
* Sets the channel name associated with a particular channel value. If a name has already been assigned to the
* channel with constant value `channelValue`, this method does nothing.
*
* @param channelValue The constant value for the channel.
* @param name The channel name.
*/
setChannelNameForValue(channelValue: number, name: string): void;
resolveToAttribute(x: string, node: ActionAST): IAttribute;
resolveToAttribute(x: string, y: string, node: ActionAST): IAttribute | null;
resolvesToLabel(x: string, node: ActionAST): boolean;
resolvesToListLabel(x: string, node: ActionAST): boolean;
resolvesToToken(x: string, node: ActionAST): boolean;
resolvesToAttributeDict(x: string, node: ActionAST): boolean;
/**
* Given a grammar type, what should be the default action scope?
* If I say @members in a COMBINED grammar, for example, the default scope should be "parser".
*/
getDefaultActionScope(): string | null;
get type(): GrammarType;
isLexer(): boolean;
isParser(): boolean;
isCombined(): boolean;
getTypeString(): string | null;
getLanguage(): SupportedLanguage;
getOptionString(key: string): string | undefined;
getStringLiterals(): Set<string>;
createLexerInterpreter(input: CharStream): LexerInterpreter;
createGrammarParserInterpreter(tokenStream: TokenStream): GrammarParserInterpreter;
/** For testing. */
createParserInterpreter(tokenStream: TokenStream): ParserInterpreter;
/**
* Undefines the specified rule from this {@link Grammar} instance. The instance `r` is removed from
* {@link rules} and {@link indexToRule}. This method updates the {@link Rule.index} field for all rules defined
* after `r`, and decrements {@link ruleNumber} in preparation for adding new rules.
*
* This method does nothing if the current {@link Grammar} does not contain the instance `r` at index
* `r.index` in {@link indexToRule}.
*
* @returns `true` if the rule was removed from the {@link Grammar} instance; otherwise, `false` if the
* specified rule was not defined in the grammar.
*/
protected undefineRule(r: Rule): boolean;
protected initTokenSymbolTables(): void;
}