UNPKG

antlr-ng

Version:

Next generation ANTLR Tool

166 lines (165 loc) 8.45 kB
import { STGroup, type IST } from "stringtemplate4ts"; import { Character } from "../support/Character.js"; import { Grammar } from "../tool/Grammar.js"; import { Rule } from "../tool/Rule.js"; import { GrammarAST } from "../tool/ast/GrammarAST.js"; import { CodeGenerator } from "./CodeGenerator.js"; import { RuleFunction } from "./model/RuleFunction.js"; /** Represets a single code point in Unicode. */ export type CodePoint = number; export declare abstract class Target { protected gen: CodeGenerator; protected static readonly defaultCharValueEscape: Map<number, string>; private static readonly languageTemplates; constructor(gen: CodeGenerator); protected static addEscapedChar(map: Map<Character, string>, key: number, representation?: number): void; /** * For pure strings of Unicode char, how can we display it in the target language as a literal. Useful for dumping * predicates and such that may refer to chars that need to be escaped when represented as strings. Also, * templates need to be escaped so that the target language can hold them as a string. Each target can have * a different set in memory at same time. */ getTargetCharValueEscape(): Map<CodePoint, string> | undefined; getCodeGenerator(): CodeGenerator; /** * ANTLR tool should check output templates / target are compatible with tool code generation. For now, a simple * string match used on x.y of x.y.z scheme. We use a method to avoid mismatches between a template called * VERSION. This value is checked against Tool.VERSION during load of templates. * * This additional method forces all targets 4.3 and beyond to add this method. */ getVersion(): string; get templates(): STGroup; escapeIfNeeded(identifier: string): string; /** * Get a meaningful name for a token type useful during code generation. Literals without associated names * are converted to the string equivalent of their integer values. Used to generate x==ID and x==34 type * comparisons etc... Essentially we are looking for the most obvious way to refer to a token type in the * generated code. */ getTokenTypeAsTargetLabel(g: Grammar, ttype: number): string; getTokenTypesAsTargetLabels(g: Grammar, tokenTypes: number[]): string[]; /** * Given a random string of unicode chars, return a new string with optionally appropriate quote characters for * target language and possibly with some escaped characters. For example, if the incoming string has actual * newline characters, the output of this method would convert them to the two char sequence \n for Java, C, * C++, ... The new string has double-quotes around it as well. Example string in memory: *``` * a"[newlineChar]b'c[carriageReturnChar]d[tab]e\f *``` * would be converted to the valid s: *``` * "a\"\nb'c\rd\te\\f" *``` * or *``` * a\"\nb'c\rd\te\\f *``` * depending on the quoted arg. */ getTargetStringLiteralFromString(s: string, quoted?: boolean): string; /** * Convert from an ANTLR string literal found in a grammar file to an equivalent string literal in the target * language. * * For Java, this is the translation `'a\n"'` -> `"a\n\""`. Expect single quotes around the incoming literal. * Just flip the quotes and replace double quotes with `\"`. * * Note that we have decided to allow people to use '\"' without penalty, so we must build the target string in * a loop as {@link String.replaceAll} cannot handle both `\"` and `"` without a lot of messing around. */ getTargetStringLiteralFromANTLRStringLiteral(generator: CodeGenerator, literal: string, addQuotes: boolean, escapeSpecial?: boolean): string; /** Assume 16-bit char. */ encodeInt16AsCharEscape(v: number): string; getLoopLabel(ast: GrammarAST): string; getLoopCounter(ast: GrammarAST): string; getListLabel(label: string): string; /** * If we know which actual function, we can provide the actual ctx type. This will contain implicit labels etc... * From outside, though, we see only ParserRuleContext unless there are externally visible stuff like args, locals, * explicit labels, etc... */ getRuleFunctionContextStructName(ruleOrFunction: Rule | RuleFunction): string; getAltLabelContextStructName(label: string): string; /** * Should be same for all refs to same token like ctx.ID within single rule function for literals like 'while', * we gen _s<ttype> */ getImplicitTokenLabel(tokenName: string): string; /** x=(A|B) */ getImplicitSetLabel(id: string): string; getImplicitRuleLabel(ruleName: string): string; getElementListName(name: string): string; getElementName(name: string): string; /** * Generate TParser.java and TLexer.java from T.g4 if combined, else just use T.java as output regardless of type. */ getRecognizerFileName(header: boolean): string; /** * A given grammar T, return the listener name such as TListener.java, if we're using the Java target. */ getListenerFileName(header: boolean): string; /** * A given grammar T, return the visitor name such as TVisitor.java, if we're using the Java target. */ getVisitorFileName(header: boolean): string; /** * A given grammar T, return a blank listener implementation such as TBaseListener.java, if we're using the * Java target. */ getBaseListenerFileName(header: boolean): string; /** * A given grammar T, return a blank listener implementation such as TBaseListener.java, if we're using the * Java target. */ getBaseVisitorFileName(header: boolean): string; /** * Gets the maximum number of 16-bit unsigned integers that can be encoded in a single segment (a declaration in * target language) of the serialized ATN. E.g., in C++, a small segment length results in multiple decls like: * * static const int32_t serializedATNSegment1[] = { * 0x7, 0x12, 0x2, 0x13, 0x7, 0x13, 0x2, 0x14, 0x7, 0x14, 0x2, 0x15, 0x7, * 0x15, 0x2, 0x16, 0x7, 0x16, 0x2, 0x17, 0x7, 0x17, 0x2, 0x18, 0x7, * 0x18, 0x2, 0x19, 0x7, 0x19, 0x2, 0x1a, 0x7, 0x1a, 0x2, 0x1b, 0x7, * 0x1b, 0x2, 0x1c, 0x7, 0x1c, 0x2, 0x1d, 0x7, 0x1d, 0x2, 0x1e, 0x7, * 0x1e, 0x2, 0x1f, 0x7, 0x1f, 0x2, 0x20, 0x7, 0x20, 0x2, 0x21, 0x7, * 0x21, 0x2, 0x22, 0x7, 0x22, 0x2, 0x23, 0x7, 0x23, 0x2, 0x24, 0x7, * 0x24, 0x2, 0x25, 0x7, 0x25, 0x2, 0x26, * }; * * instead of one big one. Targets are free to ignore this like JavaScript does. * * This is primarily needed by Java target to limit size of any single ATN string to 65k length. * * {@link SerializedATN.getSegments} * * @returns the serialized ATN segment limit */ getSerializedATNSegmentLimit(): number; /** * How many bits should be used to do inline token type tests? Java assumes a 64-bit word for bitsets. Must be a * valid word size for your target like 8, 16, 32, 64, etc... */ getInlineTestSetWordSize(): number; grammarSymbolCausesIssueInGeneratedCode(idNode: GrammarAST): boolean; templatesExist(): boolean; wantsBaseListener(): boolean; wantsBaseVisitor(): boolean; supportsOverloadedMethods(): boolean; isATNSerializedAsInts(): boolean; needsHeader(): boolean; genFile(g: Grammar | undefined, outputFileST: IST, fileName: string): void; protected abstract get reservedWords(): Set<string>; protected escapeWord(word: string): string; /** * Escape the Unicode code point appropriately for this language and append the escaped value to {@code sb}. * It exists for flexibility and backward compatibility with external targets, The static method * {@link UnicodeEscapes.appendEscapedCodePoint(StringBuilder, int, String)} can be used as well * if default escaping method (Java) is used or language is officially supported */ protected createUnicodeEscapedCodePoint(codePoint: number, escape?: boolean): string; protected shouldUseUnicodeEscapeForCodePointInDoubleQuotedString(codePoint: number): boolean; protected escapeChar(v: number): string; protected loadTemplates(): STGroup; private loadTemplatesHelper; }