UNPKG

antlr-ng

Version:

Next generation ANTLR Tool

243 lines (242 loc) 8 kB
var __defProp = Object.defineProperty; var __name = (target, value) => __defProp(target, "name", { value, configurable: true }); import { Character } from "../support/Character.js"; import { IssueCode } from "../tool/Issues.js"; class CharSupport { static { __name(this, "CharSupport"); } /** When converting ANTLR char and string literals, here is the value set of escape chars. */ static ANTLRLiteralEscapedCharValue = /* @__PURE__ */ new Map([ ["n", "\n".codePointAt(0)], ["r", "\r".codePointAt(0)], ["t", " ".codePointAt(0)], ["b", "\b".codePointAt(0)], ["f", "\f".codePointAt(0)], ["\\", "\\".codePointAt(0)] ]); /** Given a char, we need to be able to show as an ANTLR literal. */ static ANTLRLiteralCharValueEscape = /* @__PURE__ */ new Map([ ["\n".codePointAt(0), "\\n"], ["\r".codePointAt(0), "\\r"], [" ".codePointAt(0), "\\t"], ["\b".codePointAt(0), "\\b"], ["\f".codePointAt(0), "\\f"], ["\\".codePointAt(0), "\\\\"] ]); static hexRegex = /^[0-9A-Fa-f]+$/; /** * @param c The code point to convert to an ANTLR char literal. * * @returns a string representing the escaped char for code c. E.g., if c has value 0x100, you will get "\\u0100". * ASCII gets the usual char (non-hex) representation. Non-ASCII characters are spit out as \\uXXXX or * \\u{XXXXXX} escapes. */ static getANTLRCharLiteralForChar(c) { let result; if (c < 0) { result = "<INVALID>"; } else { const charValueEscape = this.ANTLRLiteralCharValueEscape.get(c); if (charValueEscape) { result = charValueEscape; } else { if (Character.UnicodeBlock.of(c) === Character.UnicodeBlock.BASIC_LATIN && !Character.isISOControl(c)) { if (c === 92) { result = "\\\\"; } else { if (c === 39) { result = "\\'"; } else { result = String.fromCodePoint(c); } } } else { if (c <= 65535) { result = "\\u" + ("0000" + c.toString(16).toUpperCase()).slice(-4); } else { result = "\\u{" + ("000000" + c.toString(16).toUpperCase()).slice(-6) + "}"; } } } } return "'" + result + "'"; } /** * Given a literal like (the 3 char sequence with single quotes) 'a', returns the int value of 'a'. Converts * escape sequences here also. * * @param literal The char literal to convert. * * @returns the code point value of the char literal or -1 if not a single char literal. */ static getCharValueFromGrammarCharLiteral(literal) { if (literal.length < 3) { return -1; } return CharSupport.getCharValueFromCharInGrammarLiteral(literal.substring(1, literal.length - 1)); } /** * Scans the given literal for escape sequences and returns the string. * * @param literal The string literal to examine. * @param grammar The grammar with details for error reporting. * @param position The position of the literal in the input string (needed for error reporting). * * @returns the string value of the literal or null if the literal is invalid. */ static getStringFromGrammarStringLiteral(literal, grammar, position) { let reported = false; const reportError = /* @__PURE__ */ __name((invalid, offset) => { reported = true; if (grammar && position) { grammar.tool.errorManager.grammarError( IssueCode.InvalidEscapeSequence, grammar.fileName, { line: position.line, column: position.column + offset }, invalid ); } }, "reportError"); let buffer = ""; let i = 1; const n = literal.length - 1; let isInvalid = false; while (i < n) { reported = false; let end = i + 1; if (literal.charAt(i) === "\\") { end = i + 2; if (i + 1 < n && literal.charAt(i + 1) === "u") { if (i + 2 < n && literal.charAt(i + 2) === "{") { end = i + 3; while (true) { if (end + 1 > n) { reportError(literal.substring(i, end), i); isInvalid = true; break; } const charAt = literal.charAt(end++); if (charAt === "}") { break; } if (!Character.isDigit(charAt.codePointAt(0)) && !(charAt >= "a" && charAt <= "f") && !(charAt >= "A" && charAt <= "F")) { reportError(literal.substring(i, end - 1), i); isInvalid = true; break; } } } else { for (end = i + 2; end < i + 6; end++) { if (end > n) { isInvalid = true; break; } else { const charAt = literal.charAt(end); if (!Character.isDigit(charAt.codePointAt(0)) && !(charAt >= "a" && charAt <= "f") && !(charAt >= "A" && charAt <= "F")) { const actualEnd = end >= n ? n : end + 1; reportError(literal.substring(i, actualEnd), i); isInvalid = true; } } } } } } if (end > n) { if (!reported) { reportError(literal.substring(i, end), i); } isInvalid = true; } else { const esc = literal.substring(i, end); const c = CharSupport.getCharValueFromCharInGrammarLiteral(esc); if (c === -1) { if (!reported) { reportError(literal.substring(i, end), i); } isInvalid = true; } else { buffer += String.fromCodePoint(c); } } i = end; } if (isInvalid) { return null; } return buffer; } /** * Given char x or \\t or \\u1234 returns the char value. Unnecessary escapes like '\{' yield -1. * * @param cstr The char to convert. * * @returns the code point value of the char or -1 if not a single char literal. */ static getCharValueFromCharInGrammarLiteral(cstr) { switch (cstr.length) { case 1: { return cstr.codePointAt(0); } case 2: { if (!cstr.startsWith("\\")) { return -1; } const escapedChar = cstr[1]; if (escapedChar === "'") { return escapedChar.codePointAt(0); } return this.ANTLRLiteralEscapedCharValue.get(escapedChar) ?? -1; } case 6: { if (!cstr.startsWith("\\u")) { return -1; } let startOff; let endOff; if (cstr.charAt(2) === "{") { startOff = 3; endOff = cstr.indexOf("}"); } else { startOff = 2; endOff = cstr.length; } return CharSupport.parseHexValue(cstr, startOff, endOff); } default: { if (cstr.startsWith("\\u{")) { return CharSupport.parseHexValue(cstr, 3, cstr.indexOf("}")); } return -1; } } } static parseHexValue(cstr, startOff, endOff) { if (startOff < 0 || endOff < 0) { return -1; } const hexString = cstr.substring(startOff, endOff); if (!CharSupport.hexRegex.test(hexString)) { return -1; } return parseInt(hexString, 16); } static capitalize(s) { return Character.toUpperCase(s.charAt(0)) + s.substring(1); } static getIntervalSetEscapedString(intervalSet) { const parts = []; for (const interval of intervalSet) { parts.push(CharSupport.getRangeEscapedString(interval.start, interval.stop)); } return parts.join(" | "); } static getRangeEscapedString(codePointStart, codePointEnd) { return codePointStart !== codePointEnd ? CharSupport.getANTLRCharLiteralForChar(codePointStart) + ".." + CharSupport.getANTLRCharLiteralForChar(codePointEnd) : CharSupport.getANTLRCharLiteralForChar(codePointStart); } } ; export { CharSupport };