unraw
Version:
Convert raw escape sequences to their respective characters (undo String.raw).
190 lines (189 loc) • 7.64 kB
JavaScript
Object.defineProperty(exports, "__esModule", { value: true });
exports.unraw = exports.errorMessages = exports.ErrorType = void 0;
const errors_1 = require("./errors");
Object.defineProperty(exports, "ErrorType", { enumerable: true, get: function () { return errors_1.ErrorType; } });
Object.defineProperty(exports, "errorMessages", { enumerable: true, get: function () { return errors_1.errorMessages; } });
/**
* Parse a string as a base-16 number. This is more strict than `parseInt` as it
* will not allow any other characters, including (for example) "+", "-", and
* ".".
* @param hex A string containing a hexadecimal number.
* @returns The parsed integer, or `NaN` if the string is not a valid hex
* number.
*/
function parseHexToInt(hex) {
const isOnlyHexChars = !hex.match(/[^a-f0-9]/i);
return isOnlyHexChars ? parseInt(hex, 16) : NaN;
}
/**
* Check the validity and length of a hexadecimal code and optionally enforces
* a specific number of hex digits.
* @param hex The string to validate and parse.
* @param errorName The name of the error message to throw a `SyntaxError` with
* if `hex` is invalid. This is used to index `errorMessages`.
* @param enforcedLength If provided, will throw an error if `hex` is not
* exactly this many characters.
* @returns The parsed hex number as a normal number.
* @throws {SyntaxError} If the code is not valid.
*/
function validateAndParseHex(hex, errorName, enforcedLength) {
const parsedHex = parseHexToInt(hex);
if (Number.isNaN(parsedHex) ||
(enforcedLength !== undefined && enforcedLength !== hex.length)) {
throw new SyntaxError(errors_1.errorMessages.get(errorName));
}
return parsedHex;
}
/**
* Parse a two-digit hexadecimal character escape code.
* @param code The two-digit hexadecimal number that represents the character to
* output.
* @returns The single character represented by the code.
* @throws {SyntaxError} If the code is not valid hex or is not the right
* length.
*/
function parseHexadecimalCode(code) {
const parsedCode = validateAndParseHex(code, errors_1.ErrorType.MalformedHexadecimal, 2);
return String.fromCharCode(parsedCode);
}
/**
* Parse a four-digit Unicode character escape code.
* @param code The four-digit unicode number that represents the character to
* output.
* @param surrogateCode Optional four-digit unicode surrogate that represents
* the other half of the character to output.
* @returns The single character represented by the code.
* @throws {SyntaxError} If the codes are not valid hex or are not the right
* length.
*/
function parseUnicodeCode(code, surrogateCode) {
const parsedCode = validateAndParseHex(code, errors_1.ErrorType.MalformedUnicode, 4);
if (surrogateCode !== undefined) {
const parsedSurrogateCode = validateAndParseHex(surrogateCode, errors_1.ErrorType.MalformedUnicode, 4);
return String.fromCharCode(parsedCode, parsedSurrogateCode);
}
return String.fromCharCode(parsedCode);
}
/**
* Test if the text is surrounded by curly braces (`{}`).
* @param text Text to check.
* @returns `true` if the text is in the form `{*}`.
*/
function isCurlyBraced(text) {
return text.charAt(0) === "{" && text.charAt(text.length - 1) === "}";
}
/**
* Parse a Unicode code point character escape code.
* @param codePoint A unicode escape code point, including the surrounding curly
* braces.
* @returns The single character represented by the code.
* @throws {SyntaxError} If the code is not valid hex or does not have the
* surrounding curly braces.
*/
function parseUnicodeCodePointCode(codePoint) {
if (!isCurlyBraced(codePoint)) {
throw new SyntaxError(errors_1.errorMessages.get(errors_1.ErrorType.MalformedUnicode));
}
const withoutBraces = codePoint.slice(1, -1);
const parsedCode = validateAndParseHex(withoutBraces, errors_1.ErrorType.MalformedUnicode);
try {
return String.fromCodePoint(parsedCode);
}
catch (err) {
throw err instanceof RangeError
? new SyntaxError(errors_1.errorMessages.get(errors_1.ErrorType.CodePointLimit))
: err;
}
}
// Have to give overload that takes boolean for when compiler doesn't know if
// true or false
function parseOctalCode(code, error = false) {
if (error) {
throw new SyntaxError(errors_1.errorMessages.get(errors_1.ErrorType.OctalDeprecation));
}
// The original regex only allows digits so we don't need to have a strict
// octal parser like hexToInt. Length is not enforced for octals.
const parsedCode = parseInt(code, 8);
return String.fromCharCode(parsedCode);
}
/**
* Map of unescaped letters to their corresponding special JS escape characters.
* Intentionally does not include characters that map to themselves like "\'".
*/
const singleCharacterEscapes = new Map([
["b", "\b"],
["f", "\f"],
["n", "\n"],
["r", "\r"],
["t", "\t"],
["v", "\v"],
["0", "\0"]
]);
/**
* Parse a single character escape sequence and return the matching character.
* If none is matched, defaults to `code`.
* @param code A single character code.
*/
function parseSingleCharacterCode(code) {
return singleCharacterEscapes.get(code) || code;
}
/**
* Matches every escape sequence possible, including invalid ones.
*
* All capture groups (described below) are unique (only one will match), except
* for 4, which can only potentially match if 3 does.
*
* **Capture Groups:**
* 0. A single backslash
* 1. Hexadecimal code
* 2. Unicode code point code with surrounding curly braces
* 3. Unicode escape code with surrogate
* 4. Surrogate code
* 5. Unicode escape code without surrogate
* 6. Octal code _NOTE: includes "0"._
* 7. A single character (will never be \, x, u, or 0-3)
*/
const escapeMatch = /\\(?:(\\)|x([\s\S]{0,2})|u(\{[^}]*\}?)|u([\s\S]{4})\\u([^{][\s\S]{0,3})|u([\s\S]{0,4})|([0-3]?[0-7]{1,2})|([\s\S])|$)/g;
/**
* Replace raw escape character strings with their escape characters.
* @param raw A string where escape characters are represented as raw string
* values like `\'` rather than `'`.
* @param allowOctals If `true`, will process the now-deprecated octal escape
* sequences (ie, `\111`).
* @returns The processed string, with escape characters replaced by their
* respective actual Unicode characters.
*/
function unraw(raw, allowOctals = false) {
return raw.replace(escapeMatch, function (_, backslash, hex, codePoint, unicodeWithSurrogate, surrogate, unicode, octal, singleCharacter) {
// Compare groups to undefined because empty strings mean different errors
// Otherwise, `\u` would fail the same as `\` which is wrong.
if (backslash !== undefined) {
return "\\";
}
if (hex !== undefined) {
return parseHexadecimalCode(hex);
}
if (codePoint !== undefined) {
return parseUnicodeCodePointCode(codePoint);
}
if (unicodeWithSurrogate !== undefined) {
return parseUnicodeCode(unicodeWithSurrogate, surrogate);
}
if (unicode !== undefined) {
return parseUnicodeCode(unicode);
}
if (octal === "0") {
return "\0";
}
if (octal !== undefined) {
return parseOctalCode(octal, !allowOctals);
}
if (singleCharacter !== undefined) {
return parseSingleCharacterCode(singleCharacter);
}
throw new SyntaxError(errors_1.errorMessages.get(errors_1.ErrorType.EndOfString));
});
}
exports.unraw = unraw;
exports.default = unraw;
;