antlr-ng
Version:
Next generation ANTLR Tool
166 lines (165 loc) • 8.45 kB
TypeScript
import { STGroup, type IST } from "stringtemplate4ts";
import { Character } from "../support/Character.js";
import { Grammar } from "../tool/Grammar.js";
import { Rule } from "../tool/Rule.js";
import { GrammarAST } from "../tool/ast/GrammarAST.js";
import { CodeGenerator } from "./CodeGenerator.js";
import { RuleFunction } from "./model/RuleFunction.js";
/** Represets a single code point in Unicode. */
export type CodePoint = number;
export declare abstract class Target {
protected gen: CodeGenerator;
protected static readonly defaultCharValueEscape: Map<number, string>;
private static readonly languageTemplates;
constructor(gen: CodeGenerator);
protected static addEscapedChar(map: Map<Character, string>, key: number, representation?: number): void;
/**
* For pure strings of Unicode char, how can we display it in the target language as a literal. Useful for dumping
* predicates and such that may refer to chars that need to be escaped when represented as strings. Also,
* templates need to be escaped so that the target language can hold them as a string. Each target can have
* a different set in memory at same time.
*/
getTargetCharValueEscape(): Map<CodePoint, string> | undefined;
getCodeGenerator(): CodeGenerator;
/**
* ANTLR tool should check output templates / target are compatible with tool code generation. For now, a simple
* string match used on x.y of x.y.z scheme. We use a method to avoid mismatches between a template called
* VERSION. This value is checked against Tool.VERSION during load of templates.
*
* This additional method forces all targets 4.3 and beyond to add this method.
*/
getVersion(): string;
get templates(): STGroup;
escapeIfNeeded(identifier: string): string;
/**
* Get a meaningful name for a token type useful during code generation. Literals without associated names
* are converted to the string equivalent of their integer values. Used to generate x==ID and x==34 type
* comparisons etc... Essentially we are looking for the most obvious way to refer to a token type in the
* generated code.
*/
getTokenTypeAsTargetLabel(g: Grammar, ttype: number): string;
getTokenTypesAsTargetLabels(g: Grammar, tokenTypes: number[]): string[];
/**
* Given a random string of unicode chars, return a new string with optionally appropriate quote characters for
* target language and possibly with some escaped characters. For example, if the incoming string has actual
* newline characters, the output of this method would convert them to the two char sequence \n for Java, C,
* C++, ... The new string has double-quotes around it as well. Example string in memory:
*```
* a"[newlineChar]b'c[carriageReturnChar]d[tab]e\f
*```
* would be converted to the valid s:
*```
* "a\"\nb'c\rd\te\\f"
*```
* or
*```
* a\"\nb'c\rd\te\\f
*```
* depending on the quoted arg.
*/
getTargetStringLiteralFromString(s: string, quoted?: boolean): string;
/**
* Convert from an ANTLR string literal found in a grammar file to an equivalent string literal in the target
* language.
*
* For Java, this is the translation `'a\n"'` -> `"a\n\""`. Expect single quotes around the incoming literal.
* Just flip the quotes and replace double quotes with `\"`.
*
* Note that we have decided to allow people to use '\"' without penalty, so we must build the target string in
* a loop as {@link String.replaceAll} cannot handle both `\"` and `"` without a lot of messing around.
*/
getTargetStringLiteralFromANTLRStringLiteral(generator: CodeGenerator, literal: string, addQuotes: boolean, escapeSpecial?: boolean): string;
/** Assume 16-bit char. */
encodeInt16AsCharEscape(v: number): string;
getLoopLabel(ast: GrammarAST): string;
getLoopCounter(ast: GrammarAST): string;
getListLabel(label: string): string;
/**
* If we know which actual function, we can provide the actual ctx type. This will contain implicit labels etc...
* From outside, though, we see only ParserRuleContext unless there are externally visible stuff like args, locals,
* explicit labels, etc...
*/
getRuleFunctionContextStructName(ruleOrFunction: Rule | RuleFunction): string;
getAltLabelContextStructName(label: string): string;
/**
* Should be same for all refs to same token like ctx.ID within single rule function for literals like 'while',
* we gen _s<ttype>
*/
getImplicitTokenLabel(tokenName: string): string;
/** x=(A|B) */
getImplicitSetLabel(id: string): string;
getImplicitRuleLabel(ruleName: string): string;
getElementListName(name: string): string;
getElementName(name: string): string;
/**
* Generate TParser.java and TLexer.java from T.g4 if combined, else just use T.java as output regardless of type.
*/
getRecognizerFileName(header: boolean): string;
/**
* A given grammar T, return the listener name such as TListener.java, if we're using the Java target.
*/
getListenerFileName(header: boolean): string;
/**
* A given grammar T, return the visitor name such as TVisitor.java, if we're using the Java target.
*/
getVisitorFileName(header: boolean): string;
/**
* A given grammar T, return a blank listener implementation such as TBaseListener.java, if we're using the
* Java target.
*/
getBaseListenerFileName(header: boolean): string;
/**
* A given grammar T, return a blank listener implementation such as TBaseListener.java, if we're using the
* Java target.
*/
getBaseVisitorFileName(header: boolean): string;
/**
* Gets the maximum number of 16-bit unsigned integers that can be encoded in a single segment (a declaration in
* target language) of the serialized ATN. E.g., in C++, a small segment length results in multiple decls like:
*
* static const int32_t serializedATNSegment1[] = {
* 0x7, 0x12, 0x2, 0x13, 0x7, 0x13, 0x2, 0x14, 0x7, 0x14, 0x2, 0x15, 0x7,
* 0x15, 0x2, 0x16, 0x7, 0x16, 0x2, 0x17, 0x7, 0x17, 0x2, 0x18, 0x7,
* 0x18, 0x2, 0x19, 0x7, 0x19, 0x2, 0x1a, 0x7, 0x1a, 0x2, 0x1b, 0x7,
* 0x1b, 0x2, 0x1c, 0x7, 0x1c, 0x2, 0x1d, 0x7, 0x1d, 0x2, 0x1e, 0x7,
* 0x1e, 0x2, 0x1f, 0x7, 0x1f, 0x2, 0x20, 0x7, 0x20, 0x2, 0x21, 0x7,
* 0x21, 0x2, 0x22, 0x7, 0x22, 0x2, 0x23, 0x7, 0x23, 0x2, 0x24, 0x7,
* 0x24, 0x2, 0x25, 0x7, 0x25, 0x2, 0x26,
* };
*
* instead of one big one. Targets are free to ignore this like JavaScript does.
*
* This is primarily needed by Java target to limit size of any single ATN string to 65k length.
*
* {@link SerializedATN.getSegments}
*
* @returns the serialized ATN segment limit
*/
getSerializedATNSegmentLimit(): number;
/**
* How many bits should be used to do inline token type tests? Java assumes a 64-bit word for bitsets. Must be a
* valid word size for your target like 8, 16, 32, 64, etc...
*/
getInlineTestSetWordSize(): number;
grammarSymbolCausesIssueInGeneratedCode(idNode: GrammarAST): boolean;
templatesExist(): boolean;
wantsBaseListener(): boolean;
wantsBaseVisitor(): boolean;
supportsOverloadedMethods(): boolean;
isATNSerializedAsInts(): boolean;
needsHeader(): boolean;
genFile(g: Grammar | undefined, outputFileST: IST, fileName: string): void;
protected abstract get reservedWords(): Set<string>;
protected escapeWord(word: string): string;
/**
* Escape the Unicode code point appropriately for this language and append the escaped value to {@code sb}.
* It exists for flexibility and backward compatibility with external targets, The static method
* {@link UnicodeEscapes.appendEscapedCodePoint(StringBuilder, int, String)} can be used as well
* if default escaping method (Java) is used or language is officially supported
*/
protected createUnicodeEscapedCodePoint(codePoint: number, escape?: boolean): string;
protected shouldUseUnicodeEscapeForCodePointInDoubleQuotedString(codePoint: number): boolean;
protected escapeChar(v: number): string;
protected loadTemplates(): STGroup;
private loadTemplatesHelper;
}