UNPKG

sql-where-parser

Version:

Parses an SQL-like WHERE string into various forms.

453 lines (385 loc) 14 kB
"use strict"; const Symbol = require('es6-symbol'); const TokenizeThis = require('tokenize-this'); /** * To distinguish between the binary minus and unary. * * @type {Symbol} */ const OPERATOR_UNARY_MINUS = Symbol('-'); /** * Number of operands in a unary operation. * * @type {number} */ const OPERATOR_TYPE_UNARY = 1; /** * Number of operands in a binary operation. * * @type {number} */ const OPERATOR_TYPE_BINARY = 2; /** * Number of operands in a ternary operation. * * @type {number} */ const OPERATOR_TYPE_TERNARY = 3; /** * Defining the use of the unary minus. * * @type {{operators: [{}], tokenizer: {shouldTokenize: string[], shouldMatch: string[], shouldDelimitBy: string[]}}} */ const unaryMinusDefinition = { [OPERATOR_UNARY_MINUS]: OPERATOR_TYPE_UNARY }; /** * A wrapper class around operators to distinguish them from regular tokens. */ class Operator { constructor(value, type, precedence) { this.value = value; this.type = type; this.precedence = precedence; } toJSON() { return this.value; } toString() { return `${this.value}`; } } /** * The main parser class. */ class SqlWhereParser { /** * * @param {{operators: [{}], tokenizer: {shouldTokenize: string[], shouldMatch: string[], shouldDelimitBy: string[]}}} [config] */ constructor(config) { if (!config) { config = {}; } /** * * @type {{operators: [{}], tokenizer: {shouldTokenize: string[], shouldMatch: string[], shouldDelimitBy: string[]}}} */ config = Object.assign({}, this.constructor.defaultConfig, config); /** * * @type {TokenizeThis} */ this.tokenizer = new TokenizeThis(config.tokenizer); /** * * @type {{}} */ this.operators = {}; /** * Flattens the operator definitions into a single object, * whose keys are the operators, and the values are the Operator class wrappers. */ config.operators.forEach((operators, precedence) => { Object.keys(operators).concat(Object.getOwnPropertySymbols(operators)).forEach((operator) => { this.operators[operator] = new Operator(operator, operators[operator], precedence); }); }); } /** * * @param {string} sql * @param {function} [evaluator] * @returns {{}} */ parse(sql, evaluator) { const operatorStack = []; const outputStream = []; let lastOperator = undefined; let tokenCount = 0; let lastTokenWasOperatorOrLeftParenthesis = false; if (!evaluator) { evaluator = this.defaultEvaluator; } /** * The following mess is an implementation of the Shunting-Yard Algorithm: http://wcipeg.com/wiki/Shunting_yard_algorithm * See also: https://en.wikipedia.org/wiki/Shunting-yard_algorithm */ this.tokenizer.tokenize(`(${sql})`, (token, surroundedBy) => { tokenCount++; /** * Read a token. */ if (typeof token === 'string' && !surroundedBy) { let normalizedToken = token.toUpperCase(); /** * If the token is an operator, o1, then: */ if (this.operators[normalizedToken]) { /** * Hard-coded rule for between to ignore the next AND. */ if (lastOperator === 'BETWEEN' && normalizedToken === 'AND') { lastOperator = 'AND'; return; } /** * If the conditions are right for unary minus, convert it. */ if (normalizedToken === '-' && (tokenCount === 1 || lastTokenWasOperatorOrLeftParenthesis)) { normalizedToken = OPERATOR_UNARY_MINUS; } /** * While there is an operator token o2 at the top of the operator stack, * and o1's precedence is less than or equal to that of o2, * pop o2 off the operator stack, onto the output queue: */ while (operatorStack[operatorStack.length - 1] && operatorStack[operatorStack.length - 1] !== '(' && this.operatorPrecedenceFromValues(normalizedToken, operatorStack[operatorStack.length - 1])) { const operator = this.operators[operatorStack.pop()]; const operands = []; let numOperands = operator.type; while (numOperands--) { operands.unshift(outputStream.pop()); } outputStream.push(evaluator(operator.value, operands)); } /** * At the end of iteration push o1 onto the operator stack. */ operatorStack.push(normalizedToken); lastOperator = normalizedToken; lastTokenWasOperatorOrLeftParenthesis = true; /** * If the token is a left parenthesis (i.e. "("), then push it onto the stack: */ } else if (token === '(') { operatorStack.push(token); lastTokenWasOperatorOrLeftParenthesis = true; /** * If the token is a right parenthesis (i.e. ")"): */ } else if (token === ')') { /** * Until the token at the top of the stack is a left parenthesis, * pop operators off the stack onto the output queue. */ while(operatorStack.length && operatorStack[operatorStack.length - 1] !== '(') { const operator = this.operators[operatorStack.pop()]; const operands = []; let numOperands = operator.type; while (numOperands--) { operands.unshift(outputStream.pop()); } outputStream.push(evaluator(operator.value, operands)); } if (!operatorStack.length) { throw new SyntaxError('Unmatched parenthesis.'); } /** * Pop the left parenthesis from the stack, but not onto the output queue. */ operatorStack.pop(); lastTokenWasOperatorOrLeftParenthesis = false; /** * Push everything else to the output queue. */ } else { outputStream.push(token); lastTokenWasOperatorOrLeftParenthesis = false; } /** * Push explicit strings to the output queue. */ } else { outputStream.push(token); lastTokenWasOperatorOrLeftParenthesis = false; } }); /** * While there are still operator tokens in the stack: */ while (operatorStack.length) { const operatorValue = operatorStack.pop(); /** * If the operator token on the top of the stack is a parenthesis, then there are mismatched parentheses. */ if (operatorValue === '(') { throw new SyntaxError('Unmatched parenthesis.'); } const operator = this.operators[operatorValue]; const operands = []; let numOperands = operator.type; while (numOperands--) { operands.unshift(outputStream.pop()); } /** * Pop the operator onto the output queue. */ outputStream.push(evaluator(operator.value, operands)); } if (outputStream.length > 1) { throw new SyntaxError('Could not reduce to a single expression.'); } return outputStream[0]; } /** * * @param {string} sql * @returns {[]} */ toArray(sql) { let expression = []; let tokenCount = 0; let lastToken = undefined; const expressionParentheses = []; this.tokenizer.tokenize(`(${sql})`, (token, surroundedBy) => { tokenCount++; switch (token) { case '(': expressionParentheses.push(expression.length); break; case ')': const precedenceParenthesisIndex = expressionParentheses.pop(); let expressionTokens = expression.splice(precedenceParenthesisIndex, expression.length); while(expressionTokens && expressionTokens.constructor === Array && expressionTokens.length === 1) { expressionTokens = expressionTokens[0]; } expression.push(expressionTokens); break; case '': break; case ',': break; default: let operator = null; if (!surroundedBy) { operator = this.getOperator(token); if (token === '-' && (tokenCount === 1 || (lastToken === '(' || (lastToken && lastToken.constructor === Operator)))) { operator = this.getOperator(OPERATOR_UNARY_MINUS); } } expression.push(operator ? operator : token); break; } lastToken = token; }); while(expression && expression.constructor === Array && expression.length === 1) { expression = expression[0]; } return expression; } /** * * @param {string|Symbol} operatorValue1 * @param {string|Symbol} operatorValue2 * @returns {boolean} */ operatorPrecedenceFromValues(operatorValue1, operatorValue2) { return this.operators[operatorValue2].precedence <= this.operators[operatorValue1].precedence; } /** * * @param {string|Symbol} operatorValue * @returns {*} */ getOperator(operatorValue) { if (typeof operatorValue === 'string') { return this.operators[operatorValue.toUpperCase()]; } if (typeof operatorValue === 'symbol') { return this.operators[operatorValue]; } return null; } /** * * @param {string|Symbol} operatorValue * @param {[]} operands * @returns {*} */ defaultEvaluator(operatorValue, operands) { /** * Convert back to regular minus, now that we have the proper number of operands. */ if (operatorValue === OPERATOR_UNARY_MINUS) { operatorValue = '-'; } /** * This is a trick to avoid the problem of inconsistent comma usage in SQL. */ if (operatorValue === ',') { return [].concat(operands[0], operands[1]); } return { [operatorValue]: operands }; } /** * * @returns {{operators: [{}], tokenizer: {shouldTokenize: string[], shouldMatch: string[], shouldDelimitBy: string[]}}} */ static get defaultConfig() { return { operators: [ // TODO: add more operator definitions { '!': OPERATOR_TYPE_UNARY }, unaryMinusDefinition, { '^': OPERATOR_TYPE_BINARY }, { '*': OPERATOR_TYPE_BINARY, '/': OPERATOR_TYPE_BINARY, '%': OPERATOR_TYPE_BINARY }, { '+': OPERATOR_TYPE_BINARY, '-': OPERATOR_TYPE_BINARY }, { '=': OPERATOR_TYPE_BINARY, '<': OPERATOR_TYPE_BINARY, '>': OPERATOR_TYPE_BINARY, '<=': OPERATOR_TYPE_BINARY, '>=': OPERATOR_TYPE_BINARY, '!=': OPERATOR_TYPE_BINARY }, { ',': OPERATOR_TYPE_BINARY // We treat commas as an operator, to aid in turning arbitrary numbers of comma-separated values into arrays. }, { 'NOT': OPERATOR_TYPE_UNARY }, { 'BETWEEN': OPERATOR_TYPE_TERNARY, 'IN': OPERATOR_TYPE_BINARY, 'IS': OPERATOR_TYPE_BINARY, 'LIKE': OPERATOR_TYPE_BINARY }, { 'AND': OPERATOR_TYPE_BINARY }, { 'OR': OPERATOR_TYPE_BINARY } ], tokenizer: { shouldTokenize: ['(', ')', ',', '*', '/', '%', '+', '-', '=', '!=','!', '<', '>', '<=', '>=', '^'], shouldMatch: ['"', "'", '`'], shouldDelimitBy: [' ', "\n", "\r", "\t"] } }; } static get Operator() { return Operator; } static get OPERATOR_UNARY_MINUS() { return OPERATOR_UNARY_MINUS; } } /** * * @type {SqlWhereParser} */ module.exports = SqlWhereParser;