antlr-ng
Version:
Next generation ANTLR Tool
243 lines (242 loc) • 8 kB
JavaScript
var __defProp = Object.defineProperty;
var __name = (target, value) => __defProp(target, "name", { value, configurable: true });
import { Character } from "../support/Character.js";
import { IssueCode } from "../tool/Issues.js";
class CharSupport {
static {
__name(this, "CharSupport");
}
/** When converting ANTLR char and string literals, here is the value set of escape chars. */
static ANTLRLiteralEscapedCharValue = /* @__PURE__ */ new Map([
["n", "\n".codePointAt(0)],
["r", "\r".codePointAt(0)],
["t", " ".codePointAt(0)],
["b", "\b".codePointAt(0)],
["f", "\f".codePointAt(0)],
["\\", "\\".codePointAt(0)]
]);
/** Given a char, we need to be able to show as an ANTLR literal. */
static ANTLRLiteralCharValueEscape = /* @__PURE__ */ new Map([
["\n".codePointAt(0), "\\n"],
["\r".codePointAt(0), "\\r"],
[" ".codePointAt(0), "\\t"],
["\b".codePointAt(0), "\\b"],
["\f".codePointAt(0), "\\f"],
["\\".codePointAt(0), "\\\\"]
]);
static hexRegex = /^[0-9A-Fa-f]+$/;
/**
* @param c The code point to convert to an ANTLR char literal.
*
* @returns a string representing the escaped char for code c. E.g., if c has value 0x100, you will get "\\u0100".
* ASCII gets the usual char (non-hex) representation. Non-ASCII characters are spit out as \\uXXXX or
* \\u{XXXXXX} escapes.
*/
static getANTLRCharLiteralForChar(c) {
let result;
if (c < 0) {
result = "<INVALID>";
} else {
const charValueEscape = this.ANTLRLiteralCharValueEscape.get(c);
if (charValueEscape) {
result = charValueEscape;
} else {
if (Character.UnicodeBlock.of(c) === Character.UnicodeBlock.BASIC_LATIN && !Character.isISOControl(c)) {
if (c === 92) {
result = "\\\\";
} else {
if (c === 39) {
result = "\\'";
} else {
result = String.fromCodePoint(c);
}
}
} else {
if (c <= 65535) {
result = "\\u" + ("0000" + c.toString(16).toUpperCase()).slice(-4);
} else {
result = "\\u{" + ("000000" + c.toString(16).toUpperCase()).slice(-6) + "}";
}
}
}
}
return "'" + result + "'";
}
/**
* Given a literal like (the 3 char sequence with single quotes) 'a', returns the int value of 'a'. Converts
* escape sequences here also.
*
* @param literal The char literal to convert.
*
* @returns the code point value of the char literal or -1 if not a single char literal.
*/
static getCharValueFromGrammarCharLiteral(literal) {
if (literal.length < 3) {
return -1;
}
return CharSupport.getCharValueFromCharInGrammarLiteral(literal.substring(1, literal.length - 1));
}
/**
* Scans the given literal for escape sequences and returns the string.
*
* @param literal The string literal to examine.
* @param grammar The grammar with details for error reporting.
* @param position The position of the literal in the input string (needed for error reporting).
*
* @returns the string value of the literal or null if the literal is invalid.
*/
static getStringFromGrammarStringLiteral(literal, grammar, position) {
let reported = false;
const reportError = /* @__PURE__ */ __name((invalid, offset) => {
reported = true;
if (grammar && position) {
grammar.tool.errorManager.grammarError(
IssueCode.InvalidEscapeSequence,
grammar.fileName,
{ line: position.line, column: position.column + offset },
invalid
);
}
}, "reportError");
let buffer = "";
let i = 1;
const n = literal.length - 1;
let isInvalid = false;
while (i < n) {
reported = false;
let end = i + 1;
if (literal.charAt(i) === "\\") {
end = i + 2;
if (i + 1 < n && literal.charAt(i + 1) === "u") {
if (i + 2 < n && literal.charAt(i + 2) === "{") {
end = i + 3;
while (true) {
if (end + 1 > n) {
reportError(literal.substring(i, end), i);
isInvalid = true;
break;
}
const charAt = literal.charAt(end++);
if (charAt === "}") {
break;
}
if (!Character.isDigit(charAt.codePointAt(0)) && !(charAt >= "a" && charAt <= "f") && !(charAt >= "A" && charAt <= "F")) {
reportError(literal.substring(i, end - 1), i);
isInvalid = true;
break;
}
}
} else {
for (end = i + 2; end < i + 6; end++) {
if (end > n) {
isInvalid = true;
break;
} else {
const charAt = literal.charAt(end);
if (!Character.isDigit(charAt.codePointAt(0)) && !(charAt >= "a" && charAt <= "f") && !(charAt >= "A" && charAt <= "F")) {
const actualEnd = end >= n ? n : end + 1;
reportError(literal.substring(i, actualEnd), i);
isInvalid = true;
}
}
}
}
}
}
if (end > n) {
if (!reported) {
reportError(literal.substring(i, end), i);
}
isInvalid = true;
} else {
const esc = literal.substring(i, end);
const c = CharSupport.getCharValueFromCharInGrammarLiteral(esc);
if (c === -1) {
if (!reported) {
reportError(literal.substring(i, end), i);
}
isInvalid = true;
} else {
buffer += String.fromCodePoint(c);
}
}
i = end;
}
if (isInvalid) {
return null;
}
return buffer;
}
/**
* Given char x or \\t or \\u1234 returns the char value. Unnecessary escapes like '\{' yield -1.
*
* @param cstr The char to convert.
*
* @returns the code point value of the char or -1 if not a single char literal.
*/
static getCharValueFromCharInGrammarLiteral(cstr) {
switch (cstr.length) {
case 1: {
return cstr.codePointAt(0);
}
case 2: {
if (!cstr.startsWith("\\")) {
return -1;
}
const escapedChar = cstr[1];
if (escapedChar === "'") {
return escapedChar.codePointAt(0);
}
return this.ANTLRLiteralEscapedCharValue.get(escapedChar) ?? -1;
}
case 6: {
if (!cstr.startsWith("\\u")) {
return -1;
}
let startOff;
let endOff;
if (cstr.charAt(2) === "{") {
startOff = 3;
endOff = cstr.indexOf("}");
} else {
startOff = 2;
endOff = cstr.length;
}
return CharSupport.parseHexValue(cstr, startOff, endOff);
}
default: {
if (cstr.startsWith("\\u{")) {
return CharSupport.parseHexValue(cstr, 3, cstr.indexOf("}"));
}
return -1;
}
}
}
static parseHexValue(cstr, startOff, endOff) {
if (startOff < 0 || endOff < 0) {
return -1;
}
const hexString = cstr.substring(startOff, endOff);
if (!CharSupport.hexRegex.test(hexString)) {
return -1;
}
return parseInt(hexString, 16);
}
static capitalize(s) {
return Character.toUpperCase(s.charAt(0)) + s.substring(1);
}
static getIntervalSetEscapedString(intervalSet) {
const parts = [];
for (const interval of intervalSet) {
parts.push(CharSupport.getRangeEscapedString(interval.start, interval.stop));
}
return parts.join(" | ");
}
static getRangeEscapedString(codePointStart, codePointEnd) {
return codePointStart !== codePointEnd ? CharSupport.getANTLRCharLiteralForChar(codePointStart) + ".." + CharSupport.getANTLRCharLiteralForChar(codePointEnd) : CharSupport.getANTLRCharLiteralForChar(codePointStart);
}
}
;
export {
CharSupport
};