UNPKG

bqpjs

Version:
579 lines (481 loc) 14.9 kB
/* Copyright 2019 Frederick C. Feibel <ffeibel@gmail.com> Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. Version: 0.1.1 - built on Sun Oct 13 2019 18:39:33 GMT-0500 (Central Daylight Time) */ 'use strict'; class Rule { constructor(pattern, operation, type = 'operator') { this.pattern = pattern; this.operation = operation; this.type = type; } test(str) { return str.search(this.pattern) } } class EscapeableRule extends Rule { constructor(name, operation, type) { super(name, operation, type); } test(str) { let result = super.test(str); if (result === -1) { return result } if (str.charAt(result - 1) === '\\') { return -1 } return result } } var rules = { and: new Rule(/AND/g, 'AND'), plus: new Rule(/\+/g, 'AND'), or: new Rule(/OR/g, 'OR'), tilde: new Rule(/~/g, 'OR'), not: new Rule(/NOT/g, 'NOT'), minus: new Rule(/-/g, 'NOT'), openParen: new Rule(/\(/g, 'open', 'grouping'), closeParen: new Rule(/\)/g, 'close','grouping'), quote: new EscapeableRule(/"/g, undefined, 'quote'), space: new Rule(/\s/g, undefined, 'whitespace') }; const findMatches = (rules) => { return (searchStr) => { // We can't make tokens yet because not all matches will be exactly a token // For example, termAND will match the AND test let matches = []; let subStr = ''; for (let currentIdx = 0; currentIdx < searchStr.length; currentIdx++) { subStr += searchStr.charAt(currentIdx); for (const rule of rules) { let matchStart = rule.test(subStr); if (matchStart !== -1 ) { matches.push({ subStr, currentIdx, matchStart, type: rule.type, operation: rule.operation }); subStr = ''; break } } } if (subStr !== '') { // We've iterated to the end of the search string but we have some // unmatched string remaining, which can only be a term matches.push({ subStr, currentIdx: searchStr.length, matchStart: -1, type: 'term', operation: undefined }); } return matches } }; class Token { constructor(value, type, operation, start = 0, end = 0) { this.value = value; this.type = type; if (operation) { this.operation = operation; } this.position = { start: start, end: end }; } static isTerm(token) { return (token.type === 'term') } static isOpenParen(token) { return (token.type === 'grouping' && token.operation === 'open') } static isCloseParen(token) { return (token.type === 'grouping' && token.operation === 'close') } static isOperator(token) { return (token.type === 'operator') } static isBinaryOperator(token) { return (Token.isOperator(token) && (token.operation === 'AND' || token.operation === 'OR')) } static isUnaryOperator(token) { return (Token.isOperator(token) && token.operation === 'NOT') } static create(value, type, currentPosition, operation) { const startPosition = calcStart(currentPosition, value.length); const endPosition = calcEnd(startPosition, value.length); return new Token(value, type, operation, startPosition, endPosition) } } // Assumes zero based index function calcStart(position, length) { return position - (length - 1) } // Assumes zero based index function calcEnd(position, length) { return position + (length - 1) } function unwrapExports (x) { return x && x.__esModule && Object.prototype.hasOwnProperty.call(x, 'default') ? x['default'] : x; } function createCommonjsModule(fn, module) { return module = { exports: {} }, fn(module, module.exports), module.exports; } var _0 = function(fn) { return function() { return fn.apply(null, arguments); }; }; var _1 = function(fn) { return function(a) { return fn.apply(null, arguments); }; }; var _2 = function(fn) { return function(a, b) { return fn.apply(null, arguments); }; }; var _3 = function(fn) { return function(a, b, c) { return fn.apply(null, arguments); }; }; var _4 = function(fn) { return function(a, b, c, d) { return fn.apply(null, arguments); }; }; var _5 = function(fn) { return function(a, b, c, d, e) { return fn.apply(null, arguments); }; }; var arityFn = [ _0, _1, _2, _3, _4, _5 ]; var N = function(fn, n) { if (n && n <= 5) { return arityFn[n](fn); } else { return fn; } }; var composeFunction = createCommonjsModule(function (module, exports) { Object.defineProperty(exports, '__esModule', { value: true }); exports['default'] = compose; // istanbul ignore next function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { 'default': obj }; } var _arityN2 = _interopRequireDefault(N); var compose2 = function compose2(f, g) { return function () { return f(g.apply(undefined, arguments)); }; }; function compose() { for (var _len = arguments.length, functions = Array(_len), _key = 0; _key < _len; _key++) { functions[_key] = arguments[_key]; } var funcs = functions.filter(function (fn) { return typeof fn === 'function'; }); var lastIdx = funcs.length - 1; var arity = 0; if (funcs.length <= 0) { throw new Error('No funcs passed'); } if (lastIdx >= 0 && funcs[lastIdx]) { arity = funcs[lastIdx].length; } return (0, _arityN2['default'])(funcs.reduce(compose2), arity); } module.exports = exports['default']; }); var compose = unwrapExports(composeFunction); const composeLeft = (...args) => compose(...args.reverse()); const isNot = (type) => { return token => token.type !== type }; const filter = (fn) => { return (array) => array.filter(fn) }; const reduce = (fn) => { return (array) => array.reduce(fn, []) }; const flatMap = (fn) => { return (array) => array.flatMap(fn) }; const pluck = (idx) => { return (array) => array[idx] }; const matchToToken = (match) => { let tokens = []; const { subStr, matchStart, currentIdx, type, operation} = match; if (matchStart >= 0) { let nonTerm = subStr.slice(matchStart); if (matchStart > 0 ) { // We've found a match prefixed with a term // EX: termAND or term) or term" or term' ' (with a space at the end) let term = subStr.slice(0, matchStart); tokens.push(Token.create(term, 'term', currentIdx - nonTerm.length)); } tokens.push(Token.create(nonTerm, type, currentIdx, operation)); } else { // Anything not a match must be a term tokens.push(Token.create(subStr, 'term', currentIdx - 1)); } return tokens }; const matchesToTokens = flatMap(matchToToken); const createTermsFromQuotes = (tokens) => { const newTokens = []; let currentValue = ''; let unclosedQuoteToken = null; for (const currentToken of tokens) { if (unclosedQuoteToken === null) { if (currentToken.type === 'quote') { unclosedQuoteToken = currentToken; } else { newTokens.push(currentToken); } } else { if (currentToken.type === 'quote') { newTokens.push(Token.create(currentValue, 'term', currentToken.position.end - 1)); currentValue = ''; unclosedQuoteToken = null; } else { currentValue += currentToken.value; } } } if (unclosedQuoteToken !== null) { throw new Error(`Unmatched quote at ${unclosedQuoteToken.position.start}`) } return newTokens }; const removeToken = (type) => filter(isNot(type)); const _insertDefaultOperator = (operation) => { return (accum, current, idx, tokens) => { let currentToken = current; let nextToken = idx + 1 === tokens.length ? {type: null} : tokens[idx + 1]; accum.push(currentToken); if ( // A B (currentToken.type === 'term' && nextToken.type === 'term') || // (A B) C (currentToken.operation === 'close' && nextToken.type === 'term') || // (A B) (C D) (currentToken.operation === 'close' && nextToken.operation === 'open') || // A NOT B or (B OR C) NOT A ((currentToken.type === 'term' || currentToken.operation === 'close') && nextToken.operation === 'NOT') ) { // This will be a token with a value of ' ', but a type and operation of // an operator. There will also be scenarios in which the default token // will share a position with the next token. EX: (A)(B) or "A""B" let newToken = Token.create( ' ', 'operator', currentToken.position.end + 1, operation ); accum.push(newToken); } return accum } }; const insertDefaultOperator = (operation) => reduce(_insertDefaultOperator(operation)); // Given a token, returns the tests necessary to determine next valid token const getDefaultTests = (token) => { const tests = { 'term': [Token.isBinaryOperator, Token.isUnaryOperator], 'NOT': [Token.isTerm, Token.isOpenParen], 'AND': [Token.isTerm, Token.isOpenParen, Token.isUnaryOperator], 'OR': [Token.isTerm, Token.isOpenParen, Token.isUnaryOperator], 'open': [Token.isTerm, Token.isUnaryOperator, Token.isOpenParen], 'close': [Token.isBinaryOperator, Token.isUnaryOperator] }; switch (token.type) { case 'grouping': case 'operator': return tests[token.operation] case 'term': return tests.term default: throw new Error('Unknown token type') } }; const invalidTokenError = (token) => { throw new Error(`Invalid token "${token.value}" at position ${token.position.start}`) }; const validate = (tokens) => { const openParenPostions = []; let tests = [Token.isTerm, Token.isOpenParen, Token.isUnaryOperator]; if (tokens.length === 1) { if (Token.isTerm(tokens[0])) { // No need to continue validating a single term quary return tokens } else { invalidTokenError(tokens[0]); } } for (const currentToken of tokens) { let inValid = true; tests.forEach((test) => { if(test(currentToken)) { inValid = false; } }); if (inValid) { invalidTokenError(currentToken); } if (Token.isOpenParen(currentToken)) { openParenPostions.push(currentToken.position.start); } if (Token.isCloseParen(currentToken)) { openParenPostions.pop(); } // Make new default rule based on current token and existence of open parens tests = getDefaultTests(currentToken); if (openParenPostions.length > 0) { if (currentToken.type === 'term' || currentToken.operation === 'close') { tests.push(Token.isCloseParen.bind(Token)); } } } if (openParenPostions.length > 0) { const lastIndex = openParenPostions.length - 1; throw new Error(`Expected ) to match ( at ${openParenPostions[lastIndex]}`) } return tokens }; var getTokenize = (userRules, defaultOperation) => { return composeLeft( findMatches(userRules), matchesToTokens, // createTermsFromQuotes must be done before whitespace is stripped createTermsFromQuotes, removeToken(rules.space.type), insertDefaultOperator(defaultOperation), validate ) }; const operators = { 'NOT': { precedence: 3 }, 'AND': { precedence: 2 }, 'OR': { precedence: 1 }, 'open': { precedence: 0 } }; // This is an implementation of Dijkstra's Shunting Yard const createRpn = (tokens) => { let output = []; const operatorStack = []; for (const token of tokens) { if (Token.isTerm(token)) { output.push(token); } else if (Token.isOpenParen(token)) { operatorStack.push(token); } else if (Token.isCloseParen(token)) { while(operatorStack.length > 0) { let lastIndex = operatorStack.length - 1; if (Token.isOpenParen(operatorStack[lastIndex])) { operatorStack.pop(); break } else { output.push(operatorStack.pop()); } } } else if (Token.isOperator(token)) { while(operatorStack.length > 0) { let lastIndex = operatorStack.length - 1; let lastItemInOperatorStack = operators[operatorStack[lastIndex].operation]; let currentOperator = operators[token.operation]; if (lastItemInOperatorStack.precedence >= currentOperator.precedence) { output.push(operatorStack.pop()); } else { break } } operatorStack.push(token); } else { throw new Error('Unenexpected token: ', token) } } // Affix any remaining operators if (operatorStack.length) { output = output.concat(operatorStack.reverse()); } return output }; const node = (obj, left = null, right = null) => { return Object.assign({}, obj, {left, right}) }; const rpnToTree = (acc, symbol) => { if (symbol.type === 'term') { acc.push(node(symbol)); } if (symbol.type === 'operator') { if (symbol.operation === 'NOT') { let right = acc.pop(); acc.push(node(symbol, null, right)); } else { let right = acc.pop(); let left = acc.pop(); acc.push(node(symbol, left, right)); } } return acc }; const createTree = compose(pluck(0), reduce(rpnToTree)); const ruleNames = ['and', 'plus', 'or', 'tilde', 'not', 'minus', 'openParen', 'closeParen', 'quote', 'space']; const defaultOperation = 'AND'; const selectedRules = ruleNames.filter((name)=>name in rules).map((name)=>rules[name]); const tokenize = getTokenize(selectedRules, defaultOperation); function bqpjs(searchStr) { let tokens = tokenize(searchStr); let rpn = createRpn(tokens); let tree = createTree(rpn); return { // tokens aren't really a part of the interface, but I'm exposing them // to make it easier to see what is happening _tokens: tokens, rpn: rpn, tree: tree } } module.exports = bqpjs;