UNPKG

@starptech/expression-parser

Version:

Framework agnostic template expression parser

github.com/StarpTech/prettyhtml

StarpTech/prettyhtml

404 lines (355 loc) • 11 kB

JavaScript

'use strict' const unclosedExpression = 'Unclosed expression.' const unclosedTemplateLiteral = 'Unclosed ES6 template literal.' const unexpectedCharInExpression = 'Unexpected character %1.' /** * Escape special characters in a given string, in preparation to create a regex. * * @param {string} str - Raw string * @returns {string} Escaped string. */ const escapeStr = str => str.replace(/(?=[-[\](){^*+?.$|\\])/g, '\\') const $_ES6_BQ = '`' /* * Mini-parser for expressions. * The main pourpose of this module is to find the end of an expression * and return its text without the enclosing brackets. * Does not works with comments, but supports ES6 template strings. */ /** * @exports exprExtr */ const S_SQ_STR = /'[^'\n\r\\]*(?:\\(?:\r\n?|[\S\s])[^'\n\r\\]*)*'/.source /** * Matches double quoted JS strings taking care about nested quotes * and EOLs (escaped EOLs are Ok). * * @const * @private */ const S_STRING = `${S_SQ_STR}|${S_SQ_STR.replace(/'/g, '"')}` /** * Regex cache * * @type {Object.<string, RegExp>} * @const * @private */ const reBr = {} /** * Makes an optimal regex that matches quoted strings, brackets, backquotes * and the closing brackets of an expression. * * @param {string} b - Closing brackets * @returns {RegExp} */ function _regex(b) { let re = reBr[b] if (!re) { let s = escapeStr(b) if (b.length > 1) { s = s + '|[' } else { s = /[{}[\]()]/.test(b) ? '[' : `[${s}` } reBr[b] = re = new RegExp(`${S_STRING}|${s}\`/\\{}[\\]()]`, 'g') } return re } /** * Searches the next backquote that signals the end of the ES6 Template Literal * or the "${" sequence that starts a JS expression, skipping any escaped * character. * * @param {string} code - Whole code * @param {number} pos - The start position of the template * @param {string[]} stack - To save nested ES6 TL count * @returns {number} The end of the string (-1 if not found) */ function skipES6TL(code, pos, stack) { // we are in the char following the backquote (`), // find the next unescaped backquote or the sequence "${" const re = /[`$\\]/g let c while (((re.lastIndex = pos), re.exec(code))) { pos = re.lastIndex c = code[pos - 1] if (c === '`') { return pos } if (c === '$' && code[pos++] === '{') { stack.push($_ES6_BQ, '}') return pos } // else this is an escaped char } throw formatError(code, unclosedTemplateLiteral, pos) } // safe characters to precced a regex (including `=>`, `**`, and `...`) const beforeReChars = '[{(,;:?=|&!^~>%*/' const beforeReSign = beforeReChars + '+-' // keyword that can preceed a regex (`in` is handled as special case) const beforeReWords = [ 'case', 'default', 'do', 'else', 'in', 'instanceof', 'prefix', 'return', 'typeof', 'void', 'yield' ] // Last chars of all the beforeReWords elements to speed up the process. const wordsEndChar = beforeReWords.reduce((s, w) => s + w.slice(-1), '') // Matches literal regex from the start of the buffer. // The buffer to search must not include line-endings. const RE_LIT_REGEX = /^\/(?=[^*>/])[^[/\\]*(?:(?:\\.|\[(?:\\.|[^\]\\]*)*\])[^[\\/]*)*?\/[gimuy]*/ // Valid characters for JavaScript variable names and literal numbers. const RE_JS_VCHAR = /[$\w]/ // Match dot characters that could be part of tricky regex const RE_DOT_CHAR = /.*/g /** * Searches the position of the previous non-blank character inside `code`, * starting with `pos - 1`. * * @param {string} code - Buffer to search * @param {number} pos - Starting position * @returns {number} Position of the first non-blank character to the left. * @private */ function _prev(code, pos) { while (--pos >= 0 && /\s/.test(code[pos])); return pos } /** * Check if the character in the `start` position within `code` can be a regex * and returns the position following this regex or `start+1` if this is not * one. * * NOTE: Ensure `start` points to a slash (this is not checked). * * @function skipRegex * @param {string} code - Buffer to test in * @param {number} start - Position the first slash inside `code` * @returns {number} Position of the char following the regex. * */ /* istanbul ignore next */ function skipRegex(code, start) { let pos = (RE_DOT_CHAR.lastIndex = start++) // `exec()` will extract from the slash to the end of the line // and the chained `match()` will match the possible regex. const match = (RE_DOT_CHAR.exec(code) || ' ')[0].match(RE_LIT_REGEX) if (match) { const next = pos + match[0].length // result comes from `re.match` pos = _prev(code, pos) let c = code[pos] // start of buffer or safe prefix? if (pos < 0 || beforeReChars.includes(c)) { return next } // from here, `pos` is >= 0 and `c` is code[pos] if (c === '.') { // can be `...` or something silly like 5./2 if (code[pos - 1] === '.') { start = next } } else { if (c === '+' || c === '-') { // tricky case if ( code[--pos] !== c || // if have a single operator or (pos = _prev(code, pos)) < 0 || // ...have `++` and no previous token beforeReSign.includes((c = code[pos])) ) { return next // ...this is a regex } } if (wordsEndChar.includes(c)) { // looks like a keyword? const end = pos + 1 // get the complete (previous) keyword while (--pos >= 0 && RE_JS_VCHAR.test(code[pos])); // it is in the allowed keywords list? if (beforeReWords.includes(code.slice(pos + 1, end))) { start = next } } } } return start } /** * Update the scopes stack removing or adding closures to it * @param {array} stack - array stacking the expression closures * @param {string} char - current char to add or remove from the stack * @param {string} idx - matching index * @param {string} code - expression code * @returns {object} result * @returns {object} result.char - either the char received or the closing braces * @returns {object} result.index - either a new index to skip part of the source code, * or 0 to keep from parsing from the old position */ function updateStack(stack, char, idx, code) { let index = 0 switch (char) { case '[': case '(': case '{': stack.push(char === '[' ? ']' : char === '(' ? ')' : '}') break case ')': case ']': case '}': if (char !== stack.pop()) { panic(code, unexpectedCharInExpression.replace('%1', char), index) } if (char === '}' && stack[stack.length - 1] === $_ES6_BQ) { char = stack.pop() } index = idx + 1 break case '/': index = skipRegex(code, idx) } return { char, index } } /** * Parses the code string searching the end of the expression. * It skips braces, quoted strings, regexes, and ES6 template literals. * * @function exprExtr * @param {string} code - Buffer to parse * @param {number} start - Position of the opening brace * @param {[string,string]} bp - Brackets pair * @returns {Object} Expression's end (after the closing brace) or -1 * if it is not an expr. */ function exprExtr(code, start, bp) { const [openingBraces, closingBraces] = bp const offset = start + openingBraces.length // skips the opening brace const stack = [] // expected closing braces ('`' for ES6 TL) const re = _regex(closingBraces) re.lastIndex = offset // begining of the expression let end let match while ((match = re.exec(code))) { const idx = match.index const str = match[0] end = re.lastIndex // end the iteration if (str === closingBraces && !stack.length) { return { text: code.slice(offset, idx), start, end } } const { char, index } = updateStack(stack, str[0], idx, code) // update the end value depending on the new index received end = index || end // update the regex last index re.lastIndex = char === $_ES6_BQ ? skipES6TL(code, end, stack) : end } if (stack.length) { panic(code, unclosedExpression, end) } } /** * Creates a regex for the given string and the left bracket. * The string is captured in $1. * * @param {ParserState} state - Parser state * @param {string} str - String to search * @returns {RegExp} Resulting regex. * @private */ function b0re(state, str) { const { brackets } = state const b0 = escapeStr(brackets[0]) const b1 = escapeStr(str) return new RegExp(`(${b1})|${b0}`, 'g') } /** * Find the end of the attribute value or text node * Extract expressions. * Detect if value have escaped brackets. * * @param {ParserState} state - Parser state * @returns {number} Ending position * @private */ function expr(state) { const re = b0re(state, state.brackets[1]) const node = {} const { unescape, expressions } = parseExpressions(state, re) if (node) { if (unescape) { node.unescape = unescape } if (expressions.length) { node.expressions = expressions } } return node } /** * Parse a text chunk finding all the expressions in it * @param {ParserState} state - Parser state * @param {RegExp} re - regex to match the expressions contents * @returns {object} result containing the expression found, the string to unescape and the end position */ function parseExpressions(state, re) { const { data, brackets } = state const expressions = [] let unescape, pos, match // Anything captured in $1 (closing quote or character) ends the loop... while ((match = re.exec(data))) { // ...else, we have an opening bracket and maybe an expression. pos = match.index if (data[pos - 1] === '\\') { unescape = match[0] // it is an escaped opening brace } else { const tmpExpr = exprExtr(data, pos, brackets) if (tmpExpr) { expressions.push(tmpExpr) re.lastIndex = tmpExpr.end } } } return { unescape, expressions } } function formatError(data, message, pos) { if (!pos) { pos = data.length } // count unix/mac/win eols const line = (data.slice(0, pos).match(/\r\n?|\n/g) || '').length + 1 let col = 0 while (--pos >= 0 && !/[\r\n]/.test(data[pos])) { ++col } return `[${line},${col}]: ${message}` } /** * Custom error handler can be implemented replacing this method. * The `state` object includes the buffer (`data`) * The error position (`loc`) contains line (base 1) and col (base 0). * * @param {string} msg - Error message * @param {pos} [number] - Position of the error */ function panic(data, msg, pos) { const message = formatError(data, msg, pos) throw new Error(message) } function parse(data, state) { return expr({ ...state, data }) } module.exports = parse