@firehammer/jexl
Version:
Javascript Expression Language: Powerful context-based expression parser and evaluator
259 lines (247 loc) • 9.47 kB
JavaScript
;
var _interopRequireDefault = require("@babel/runtime/helpers/interopRequireDefault");
var _classCallCheck2 = _interopRequireDefault(require("@babel/runtime/helpers/classCallCheck"));
var _createClass2 = _interopRequireDefault(require("@babel/runtime/helpers/createClass"));
/*
* Jexl
* Copyright 2020 Tom Shawver
*/
var numericRegex = /^-?(?:(?:[0-9]*\.[0-9]+)|[0-9]+)$/;
var identRegex = /^[a-zA-Zа-яА-Я_\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF$][a-zA-Zа-яА-Я0-9_\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF$]*$/;
var escEscRegex = /\\\\/g;
var whitespaceRegex = /^\s*$/;
var preOpRegexElems = [
// Strings
'"(?:[^"\\\\]*(?:\\\\.[^"\\\\]*)*)"', "'(?:[^'\\\\]*(?:\\\\.[^'\\\\]*)*)'",
// Whitespace
"\\s+",
// Booleans
"\\btrue\\b", "\\bfalse\\b"];
var postOpRegexElems = [
// Identifiers
"[a-zA-Z\u0430-\u044F\u0410-\u042F_\xC0-\xD6\xD8-\xF6\xF8-\xFF\\$][a-zA-Z0-9\u0430-\u044F\u0410-\u042F_\xC0-\xD6\xD8-\xF6\xF8-\xFF\\$]*",
// Numerics (without negative symbol)
"(?:(?:[0-9]*\\.[0-9]+)|[0-9]+)"];
var minusNegatesAfter = ["binaryOp", "unaryOp", "openParen", "openBracket", "question", "colon"];
/**
* Lexer is a collection of stateless, statically-accessed functions for the
* lexical parsing of a Jexl string. Its responsibility is to identify the
* "parts of speech" of a Jexl expression, and tokenize and label each, but
* to do only the most minimal syntax checking; the only errors the Lexer
* should be concerned with are if it's unable to identify the utility of
* any of its tokens. Errors stemming from these tokens not being in a
* sensible configuration should be left for the Parser to handle.
* @type {{}}
*/
var Lexer = /*#__PURE__*/function () {
function Lexer(grammar) {
(0, _classCallCheck2.default)(this, Lexer);
this._grammar = grammar;
}
/**
* Splits a Jexl expression string into an array of expression elements.
* @param {string} str A Jexl expression string
* @returns {Array<string>} An array of substrings defining the functional
* elements of the expression.
*/
(0, _createClass2.default)(Lexer, [{
key: "getElements",
value: function getElements(str) {
var regex = this._getSplitRegex();
return str.split(regex).filter(function (elem) {
// Remove empty strings
return elem;
});
}
/**
* Converts an array of expression elements into an array of tokens. Note that
* the resulting array may not equal the element array in length, as any
* elements that consist only of whitespace get appended to the previous
* token's "raw" property. For the structure of a token object, please see
* {@link Lexer#tokenize}.
* @param {Array<string>} elements An array of Jexl expression elements to be
* converted to tokens
* @returns {Array<{type, value, raw}>} an array of token objects.
*/
}, {
key: "getTokens",
value: function getTokens(elements) {
var tokens = [];
var negate = false;
for (var i = 0; i < elements.length; i++) {
if (this._isWhitespace(elements[i])) {
if (tokens.length) {
tokens[tokens.length - 1].raw += elements[i];
}
} else if (elements[i] === "-" && this._isNegative(tokens)) {
negate = true;
} else {
if (negate) {
elements[i] = "-" + elements[i];
negate = false;
}
tokens.push(this._createToken(elements[i]));
}
}
// Catch a - at the end of the string. Let the parser handle that issue.
if (negate) {
tokens.push(this._createToken("-"));
}
return tokens;
}
/**
* Converts a Jexl string into an array of tokens. Each token is an object
* in the following format:
*
* {
* type: <string>,
* [name]: <string>,
* value: <boolean|number|string>,
* raw: <string>
* }
*
* Type is one of the following:
*
* literal, identifier, binaryOp, unaryOp
*
* OR, if the token is a control character its type is the name of the element
* defined in the Grammar.
*
* Name appears only if the token is a control string found in
* {@link grammar#elements}, and is set to the name of the element.
*
* Value is the value of the token in the correct type (boolean or numeric as
* appropriate). Raw is the string representation of this value taken directly
* from the expression string, including any trailing spaces.
* @param {string} str The Jexl string to be tokenized
* @returns {Array<{type, value, raw}>} an array of token objects.
* @throws {Error} if the provided string contains an invalid token.
*/
}, {
key: "tokenize",
value: function tokenize(str) {
var elements = this.getElements(str);
return this.getTokens(elements);
}
/**
* Creates a new token object from an element of a Jexl string. See
* {@link Lexer#tokenize} for a description of the token object.
* @param {string} element The element from which a token should be made
* @returns {{value: number|boolean|string, [name]: string, type: string,
* raw: string}} a token object describing the provided element.
* @throws {Error} if the provided string is not a valid expression element.
* @private
*/
}, {
key: "_createToken",
value: function _createToken(element) {
var token = {
type: "literal",
value: element,
raw: element
};
if (element[0] === '"' || element[0] === "'") {
token.value = this._unquote(element);
} else if (element.match(numericRegex)) {
token.value = parseFloat(element);
} else if (element === "true" || element === "false") {
token.value = element === "true";
} else if (this._grammar.elements[element]) {
token.type = this._grammar.elements[element].type;
} else if (element.match(identRegex)) {
token.type = "identifier";
} else {
throw new Error("Invalid expression token: ".concat(element));
}
return token;
}
/**
* Escapes a string so that it can be treated as a string literal within a
* regular expression.
* @param {string} str The string to be escaped
* @returns {string} the RegExp-escaped string.
* @see https://developer.mozilla.org/en/docs/Web/JavaScript/Guide/Regular_Expressions
* @private
*/
}, {
key: "_escapeRegExp",
value: function _escapeRegExp(str) {
str = str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
if (str.match(identRegex)) {
str = "\\b" + str + "\\b";
}
return str;
}
/**
* Gets a RegEx object appropriate for splitting a Jexl string into its core
* elements.
* @returns {RegExp} An element-splitting RegExp object
* @private
*/
}, {
key: "_getSplitRegex",
value: function _getSplitRegex() {
var _this = this;
if (!this._splitRegex) {
// Sort by most characters to least, then regex escape each
var elemArray = Object.keys(this._grammar.elements).sort(function (a, b) {
return b.length - a.length;
}).map(function (elem) {
return _this._escapeRegExp(elem);
}, this);
this._splitRegex = new RegExp("(" + [preOpRegexElems.join("|"), elemArray.join("|"), postOpRegexElems.join("|")].join("|") + ")");
}
return this._splitRegex;
}
/**
* Determines whether the addition of a '-' token should be interpreted as a
* negative symbol for an upcoming number, given an array of tokens already
* processed.
* @param {Array<Object>} tokens An array of tokens already processed
* @returns {boolean} true if adding a '-' should be considered a negative
* symbol; false otherwise
* @private
*/
}, {
key: "_isNegative",
value: function _isNegative(tokens) {
if (!tokens.length) return true;
return minusNegatesAfter.some(function (type) {
return type === tokens[tokens.length - 1].type;
});
}
/**
* A utility function to determine if a string consists of only space
* characters.
* @param {string} str A string to be tested
* @returns {boolean} true if the string is empty or consists of only spaces;
* false otherwise.
* @private
*/
}, {
key: "_isWhitespace",
value: function _isWhitespace(str) {
return !!str.match(whitespaceRegex);
}
/**
* Removes the beginning and trailing quotes from a string, unescapes any
* escaped quotes on its interior, and unescapes any escaped escape
* characters. Note that this function is not defensive; it assumes that the
* provided string is not empty, and that its first and last characters are
* actually quotes.
* @param {string} str A string whose first and last characters are quotes
* @returns {string} a string with the surrounding quotes stripped and escapes
* properly processed.
* @private
*/
}, {
key: "_unquote",
value: function _unquote(str) {
var quote = str[0];
var escQuoteRegex = new RegExp("\\\\" + quote, "g");
return str.substr(1, str.length - 2).replace(escQuoteRegex, quote).replace(escEscRegex, "\\");
}
}]);
return Lexer;
}();
module.exports = Lexer;