antlr-ng
Version:
Next generation ANTLR Tool
512 lines (511 loc) • 17.7 kB
JavaScript
var __defProp = Object.defineProperty;
var __name = (target, value) => __defProp(target, "name", { value, configurable: true });
import { fileURLToPath } from "node:url";
import { RuntimeMetaData, Token } from "antlr4ng";
import {
NumberRenderer,
STGroup,
STGroupFile,
StringRenderer
} from "stringtemplate4ts";
import { ANTLRv4Parser } from "../generated/ANTLRv4Parser.js";
import { CharSupport } from "../misc/CharSupport.js";
import { Utils } from "../misc/Utils.js";
import { Character } from "../support/Character.js";
import { antlrVersion } from "../tool-parameters.js";
import { IssueCode } from "../tool/Issues.js";
import { Grammar } from "../tool/Grammar.js";
import { Rule } from "../tool/Rule.js";
import { UnicodeEscapes } from "./UnicodeEscapes.js";
class Target {
constructor(gen) {
this.gen = gen;
}
static {
__name(this, "Target");
}
static defaultCharValueEscape = /* @__PURE__ */ new Map([
[" ".codePointAt(0), "\\t"],
["\b".codePointAt(0), "\\b"],
["\n".codePointAt(0), "\\n"],
["\r".codePointAt(0), "\\r"],
["\f".codePointAt(0), "\\f"],
["'".codePointAt(0), "\\'"],
['"'.codePointAt(0), '\\"'],
["\\".codePointAt(0), "\\\\"]
]);
static languageTemplates = /* @__PURE__ */ new Map();
static addEscapedChar(map, key, representation) {
representation = representation ?? key;
map.set(key, "\\" + representation);
}
/**
* For pure strings of Unicode char, how can we display it in the target language as a literal. Useful for dumping
* predicates and such that may refer to chars that need to be escaped when represented as strings. Also,
* templates need to be escaped so that the target language can hold them as a string. Each target can have
* a different set in memory at same time.
*/
getTargetCharValueEscape() {
return Target.defaultCharValueEscape;
}
getCodeGenerator() {
return this.gen;
}
/**
* ANTLR tool should check output templates / target are compatible with tool code generation. For now, a simple
* string match used on x.y of x.y.z scheme. We use a method to avoid mismatches between a template called
* VERSION. This value is checked against Tool.VERSION during load of templates.
*
* This additional method forces all targets 4.3 and beyond to add this method.
*/
getVersion() {
return antlrVersion;
}
get templates() {
const language = this.gen.language;
let templates = Target.languageTemplates.get(language);
if (!templates) {
const version = this.getVersion();
const theirVersion = RuntimeMetaData.getMajorMinorVersion(version);
const ourVersion = RuntimeMetaData.getMajorMinorVersion(antlrVersion);
if (theirVersion !== ourVersion) {
this.gen.g.tool.errorManager.toolError(
IssueCode.IncompatibleToolAndTemplates,
version,
antlrVersion,
language
);
}
templates = this.loadTemplates();
Target.languageTemplates.set(language, templates);
}
return templates;
}
escapeIfNeeded(identifier) {
return this.reservedWords.has(identifier) ? this.escapeWord(identifier) : identifier;
}
/**
* Get a meaningful name for a token type useful during code generation. Literals without associated names
* are converted to the string equivalent of their integer values. Used to generate x==ID and x==34 type
* comparisons etc... Essentially we are looking for the most obvious way to refer to a token type in the
* generated code.
*/
getTokenTypeAsTargetLabel(g, ttype) {
const name = this.escapeIfNeeded(g.getTokenName(ttype));
if (Grammar.INVALID_TOKEN_NAME === name) {
return String(ttype);
}
return name;
}
getTokenTypesAsTargetLabels(g, tokenTypes) {
const labels = new Array(tokenTypes.length);
for (let i = 0; i < tokenTypes.length; i++) {
labels[i] = this.getTokenTypeAsTargetLabel(g, tokenTypes[i]);
}
return labels;
}
/**
* Given a random string of unicode chars, return a new string with optionally appropriate quote characters for
* target language and possibly with some escaped characters. For example, if the incoming string has actual
* newline characters, the output of this method would convert them to the two char sequence \n for Java, C,
* C++, ... The new string has double-quotes around it as well. Example string in memory:
*```
* a"[newlineChar]b'c[carriageReturnChar]d[tab]e\f
*```
* would be converted to the valid s:
*```
* "a\"\nb'c\rd\te\\f"
*```
* or
*```
* a\"\nb'c\rd\te\\f
*```
* depending on the quoted arg.
*/
getTargetStringLiteralFromString(s, quoted) {
quoted ??= true;
let result = "";
if (quoted) {
result += '"';
}
for (let i = 0; i < s.length; ) {
const c = s.codePointAt(i);
const escaped = c <= Character.MAX_VALUE ? this.getTargetCharValueEscape()?.get(Number(c)) : void 0;
if (c !== 39 && escaped) {
result += escaped;
} else if (this.shouldUseUnicodeEscapeForCodePointInDoubleQuotedString(c)) {
result += this.createUnicodeEscapedCodePoint(i);
} else {
result += String.fromCodePoint(c);
}
i += Character.charCount(c);
}
if (quoted) {
result += '"';
}
return result;
}
/**
* Convert from an ANTLR string literal found in a grammar file to an equivalent string literal in the target
* language.
*
* For Java, this is the translation `'a\n"'` -> `"a\n\""`. Expect single quotes around the incoming literal.
* Just flip the quotes and replace double quotes with `\"`.
*
* Note that we have decided to allow people to use '\"' without penalty, so we must build the target string in
* a loop as {@link String.replaceAll} cannot handle both `\"` and `"` without a lot of messing around.
*/
getTargetStringLiteralFromANTLRStringLiteral(generator, literal, addQuotes, escapeSpecial) {
escapeSpecial ??= false;
let result = "";
if (addQuotes) {
result += '"';
}
for (let i = 1; i < literal.length - 1; ) {
const codePoint = literal.codePointAt(i);
let toAdvance = Character.charCount(codePoint);
if (codePoint === 92) {
const escapedChar = literal.charAt(i + toAdvance);
toAdvance++;
switch (escapedChar) {
// Pass through any escapes that Java also needs
case "n":
case "r":
case "t":
case "b":
case "f":
case "\\": {
if (escapeSpecial && escapedChar !== "\\") {
result += "\\";
}
result += "\\" + escapedChar;
break;
}
case "u": {
if (literal.charAt(i + toAdvance) === "{") {
while (literal.charAt(i + toAdvance) !== "}") {
++toAdvance;
}
++toAdvance;
} else {
toAdvance += 4;
}
if (i + toAdvance <= literal.length) {
const fullEscape = literal.substring(i, i + toAdvance);
result += this.createUnicodeEscapedCodePoint(
CharSupport.getCharValueFromCharInGrammarLiteral(fullEscape),
escapeSpecial
);
}
break;
}
default: {
const codePoint2 = literal.codePointAt(i + toAdvance);
if (this.shouldUseUnicodeEscapeForCodePointInDoubleQuotedString(codePoint2)) {
result += this.createUnicodeEscapedCodePoint(codePoint2, escapeSpecial);
} else {
result += escapedChar;
}
break;
}
}
} else {
if (codePoint === 34) {
result += '\\"';
} else if (this.shouldUseUnicodeEscapeForCodePointInDoubleQuotedString(codePoint)) {
result += this.createUnicodeEscapedCodePoint(codePoint, escapeSpecial);
} else {
result += String.fromCodePoint(codePoint);
}
}
i += toAdvance;
}
if (addQuotes) {
result += '"';
}
return result;
}
/** Assume 16-bit char. */
encodeInt16AsCharEscape(v) {
if (v < Character.MIN_VALUE || v > Character.MAX_VALUE) {
throw new Error(`Cannot encode the specified value: ${v}`);
}
if (this.isATNSerializedAsInts()) {
return String(v);
}
const escaped = this.getTargetCharValueEscape()?.get(v);
if (escaped) {
return escaped;
}
switch (Character.getType(v)) {
case Character.CONTROL:
case Character.LINE_SEPARATOR:
case Character.PARAGRAPH_SEPARATOR: {
return this.escapeChar(v);
}
default: {
if (v <= 127) {
return String.fromCodePoint(v);
}
return this.escapeChar(v);
}
}
}
getLoopLabel(ast) {
return "loop" + ast.token.tokenIndex;
}
getLoopCounter(ast) {
return "cnt" + ast.token.tokenIndex;
}
getListLabel(label) {
const st = this.templates.getInstanceOf("ListLabelName");
st.add("label", label);
return st.render();
}
/**
* If we know which actual function, we can provide the actual ctx type. This will contain implicit labels etc...
* From outside, though, we see only ParserRuleContext unless there are externally visible stuff like args, locals,
* explicit labels, etc...
*/
getRuleFunctionContextStructName(ruleOrFunction) {
const rule = ruleOrFunction instanceof Rule ? ruleOrFunction : ruleOrFunction.rule;
if (rule.g.isLexer()) {
return this.templates.getInstanceOf("LexerRuleContext").render();
}
return Utils.capitalize(rule.name) + this.templates.getInstanceOf("RuleContextNameSuffix").render();
}
getAltLabelContextStructName(label) {
return Utils.capitalize(label) + this.templates.getInstanceOf("RuleContextNameSuffix").render();
}
/**
* Should be same for all refs to same token like ctx.ID within single rule function for literals like 'while',
* we gen _s<ttype>
*/
getImplicitTokenLabel(tokenName) {
const st = this.templates.getInstanceOf("ImplicitTokenLabel");
const ttype = this.getCodeGenerator().g.getTokenType(tokenName);
if (tokenName.startsWith("'")) {
return "s" + ttype;
}
const text = this.getTokenTypeAsTargetLabel(this.getCodeGenerator().g, ttype);
st.add("tokenName", text);
return st.render();
}
/** x=(A|B) */
getImplicitSetLabel(id) {
const st = this.templates.getInstanceOf("ImplicitSetLabel");
st.add("id", id);
return st.render();
}
getImplicitRuleLabel(ruleName) {
const st = this.templates.getInstanceOf("ImplicitRuleLabel");
st.add("ruleName", ruleName);
return st.render();
}
getElementListName(name) {
const st = this.templates.getInstanceOf("ElementListName");
st.add("elemName", this.getElementName(name));
return st.render();
}
getElementName(name) {
if (name === ".") {
return "_wild";
}
if (this.getCodeGenerator().g.getRule(name) !== null) {
return name;
}
const ttype = this.getCodeGenerator().g.getTokenType(name);
if (ttype === Token.INVALID_TYPE) {
return name;
}
return this.getTokenTypeAsTargetLabel(this.getCodeGenerator().g, ttype);
}
/**
* Generate TParser.java and TLexer.java from T.g4 if combined, else just use T.java as output regardless of type.
*/
getRecognizerFileName(header) {
const extST = this.templates.getInstanceOf("codeFileExtension");
const recognizerName = this.gen.g.getRecognizerName();
return recognizerName + extST.render();
}
/**
* A given grammar T, return the listener name such as TListener.java, if we're using the Java target.
*/
getListenerFileName(header) {
const extST = this.templates.getInstanceOf("codeFileExtension");
const listenerName = this.gen.g.name + "Listener";
return listenerName + extST.render();
}
/**
* A given grammar T, return the visitor name such as TVisitor.java, if we're using the Java target.
*/
getVisitorFileName(header) {
const extST = this.templates.getInstanceOf("codeFileExtension");
const listenerName = this.gen.g.name + "Visitor";
return listenerName + extST.render();
}
/**
* A given grammar T, return a blank listener implementation such as TBaseListener.java, if we're using the
* Java target.
*/
getBaseListenerFileName(header) {
const extST = this.templates.getInstanceOf("codeFileExtension");
const listenerName = this.gen.g.name + "BaseListener";
return listenerName + extST.render();
}
/**
* A given grammar T, return a blank listener implementation such as TBaseListener.java, if we're using the
* Java target.
*/
getBaseVisitorFileName(header) {
const extST = this.templates.getInstanceOf("codeFileExtension");
const listenerName = this.gen.g.name + "BaseVisitor";
return listenerName + extST.render();
}
/**
* Gets the maximum number of 16-bit unsigned integers that can be encoded in a single segment (a declaration in
* target language) of the serialized ATN. E.g., in C++, a small segment length results in multiple decls like:
*
* static const int32_t serializedATNSegment1[] = {
* 0x7, 0x12, 0x2, 0x13, 0x7, 0x13, 0x2, 0x14, 0x7, 0x14, 0x2, 0x15, 0x7,
* 0x15, 0x2, 0x16, 0x7, 0x16, 0x2, 0x17, 0x7, 0x17, 0x2, 0x18, 0x7,
* 0x18, 0x2, 0x19, 0x7, 0x19, 0x2, 0x1a, 0x7, 0x1a, 0x2, 0x1b, 0x7,
* 0x1b, 0x2, 0x1c, 0x7, 0x1c, 0x2, 0x1d, 0x7, 0x1d, 0x2, 0x1e, 0x7,
* 0x1e, 0x2, 0x1f, 0x7, 0x1f, 0x2, 0x20, 0x7, 0x20, 0x2, 0x21, 0x7,
* 0x21, 0x2, 0x22, 0x7, 0x22, 0x2, 0x23, 0x7, 0x23, 0x2, 0x24, 0x7,
* 0x24, 0x2, 0x25, 0x7, 0x25, 0x2, 0x26,
* };
*
* instead of one big one. Targets are free to ignore this like JavaScript does.
*
* This is primarily needed by Java target to limit size of any single ATN string to 65k length.
*
* {@link SerializedATN.getSegments}
*
* @returns the serialized ATN segment limit
*/
getSerializedATNSegmentLimit() {
return Number.MAX_VALUE;
}
/**
* How many bits should be used to do inline token type tests? Java assumes a 64-bit word for bitsets. Must be a
* valid word size for your target like 8, 16, 32, 64, etc...
*/
getInlineTestSetWordSize() {
return 64;
}
grammarSymbolCausesIssueInGeneratedCode(idNode) {
switch (idNode.parent?.getType()) {
case ANTLRv4Parser.ASSIGN: {
switch (idNode.parent.parent?.getType()) {
case ANTLRv4Parser.ELEMENT_OPTIONS:
case ANTLRv4Parser.OPTIONS: {
return false;
}
default: {
break;
}
}
break;
}
case ANTLRv4Parser.AT:
case ANTLRv4Parser.ELEMENT_OPTIONS: {
return false;
}
case ANTLRv4Parser.LEXER_ACTION_CALL: {
if (idNode.childIndex === 0) {
return false;
}
break;
}
default: {
break;
}
}
return this.reservedWords.has(idNode.getText());
}
templatesExist() {
return this.loadTemplatesHelper(false) !== void 0;
}
wantsBaseListener() {
return true;
}
wantsBaseVisitor() {
return true;
}
supportsOverloadedMethods() {
return true;
}
isATNSerializedAsInts() {
return true;
}
needsHeader() {
return false;
}
genFile(g, outputFileST, fileName) {
this.getCodeGenerator().write(outputFileST, fileName);
}
escapeWord(word) {
return word + "_";
}
/**
* Escape the Unicode code point appropriately for this language and append the escaped value to {@code sb}.
* It exists for flexibility and backward compatibility with external targets, The static method
* {@link UnicodeEscapes.appendEscapedCodePoint(StringBuilder, int, String)} can be used as well
* if default escaping method (Java) is used or language is officially supported
*/
createUnicodeEscapedCodePoint(codePoint, escape) {
let result = UnicodeEscapes.escapeCodePoint(codePoint, this.gen.language);
if (escape) {
result = "\\" + result;
}
return result;
}
shouldUseUnicodeEscapeForCodePointInDoubleQuotedString(codePoint) {
return codePoint < 32 || codePoint === 92 || codePoint >= 127;
}
escapeChar(v) {
return `\\u${v.toString(16).padStart(4, "0")}`;
}
loadTemplates() {
const result = this.loadTemplatesHelper(true);
result.registerRenderer(Number, new NumberRenderer());
result.registerRenderer(String, new StringRenderer());
result.setListener(new class {
constructor($outer) {
this.$outer = $outer;
}
compileTimeError(msg) {
this.reportError(msg);
}
runTimeError(msg) {
this.reportError(msg);
}
internalError(msg) {
this.reportError(msg);
}
iOError(msg) {
this.reportError(msg);
}
reportError(msg) {
this.$outer.gen.g.tool.errorManager.toolError(IssueCode.StringTemplateWarning, msg.toString());
}
}(this));
return result;
}
loadTemplatesHelper(reportErrorIfFail) {
const language = this.gen.language;
const groupFileName = fileURLToPath(new URL("../../templates/codegen/" + language + "/" + language + STGroup.GROUP_FILE_EXTENSION, import.meta.url));
try {
return new STGroupFile(groupFileName);
} catch (e) {
if (reportErrorIfFail) {
this.gen.g.tool.errorManager.toolError(IssueCode.MissingCodeGenTemplates, e, this.gen.language);
}
return void 0;
}
}
}
export {
Target
};