UNPKG

dataframe-js

Version:

Immutable and functional data structure for datascientists and developpers

325 lines (262 loc) 11 kB
"use strict"; require("core-js/modules/es.symbol"); require("core-js/modules/es.symbol.description"); require("core-js/modules/es.symbol.iterator"); require("core-js/modules/es.array.concat"); require("core-js/modules/es.array.filter"); require("core-js/modules/es.array.find"); require("core-js/modules/es.array.find-index"); require("core-js/modules/es.array.for-each"); require("core-js/modules/es.array.from"); require("core-js/modules/es.array.includes"); require("core-js/modules/es.array.iterator"); require("core-js/modules/es.array.join"); require("core-js/modules/es.array.map"); require("core-js/modules/es.array.reduce"); require("core-js/modules/es.array.slice"); require("core-js/modules/es.date.to-string"); require("core-js/modules/es.number.constructor"); require("core-js/modules/es.number.is-nan"); require("core-js/modules/es.object.keys"); require("core-js/modules/es.object.to-string"); require("core-js/modules/es.regexp.exec"); require("core-js/modules/es.regexp.to-string"); require("core-js/modules/es.string.includes"); require("core-js/modules/es.string.iterator"); require("core-js/modules/es.string.replace"); require("core-js/modules/es.string.split"); require("core-js/modules/es.string.trim"); require("core-js/modules/web.dom-collections.iterator"); Object.defineProperty(exports, "__esModule", { value: true }); exports.default = sqlParser; var _reusables = require("../../reusables"); var _errors = require("../../errors"); function _slicedToArray(arr, i) { return _arrayWithHoles(arr) || _iterableToArrayLimit(arr, i) || _nonIterableRest(); } function _nonIterableRest() { throw new TypeError("Invalid attempt to destructure non-iterable instance"); } function _iterableToArrayLimit(arr, i) { var _arr = []; var _n = true; var _d = false; var _e = undefined; try { for (var _i = arr[Symbol.iterator](), _s; !(_n = (_s = _i.next()).done); _n = true) { _arr.push(_s.value); if (i && _arr.length === i) break; } } catch (err) { _d = true; _e = err; } finally { try { if (!_n && _i["return"] != null) _i["return"](); } finally { if (_d) throw _e; } } return _arr; } function _arrayWithHoles(arr) { if (Array.isArray(arr)) return arr; } function _toConsumableArray(arr) { return _arrayWithoutHoles(arr) || _iterableToArray(arr) || _nonIterableSpread(); } function _nonIterableSpread() { throw new TypeError("Invalid attempt to spread non-iterable instance"); } function _iterableToArray(iter) { if (Symbol.iterator in Object(iter) || Object.prototype.toString.call(iter) === "[object Arguments]") return Array.from(iter); } function _arrayWithoutHoles(arr) { if (Array.isArray(arr)) { for (var i = 0, arr2 = new Array(arr.length); i < arr.length; i++) { arr2[i] = arr[i]; } return arr2; } } var REPLACMENTS = [["INNER JOIN", "INNERJOIN"], ["LEFT JOIN", "LEFTJOIN"], ["RIGHT JOIN", "RIGHTJOIN"], ["FULL JOIN", "FULLJOIN"], ["GROUP BY", "GROUPBY"]]; var WHERE_OPERATORS = { IN: function IN(a, b) { return b.includes(a); }, LIKE: function LIKE(a, b) { return b.includes(a) || a.includes(b); }, ">=": function _(a, b) { return a >= b; }, "<=": function _(a, b) { return a <= b; }, "!=": function _(a, b) { return a !== b; }, "<": function _(a, b) { return a < b; }, ">": function _(a, b) { return a > b; }, "=": function _(a, b) { return a === b; }, AND: function AND(a, b) { return a && b; }, OR: function OR(a, b) { return a || b; } }; var SELECT_FUNCTIONS = { COUNT: function COUNT(df) { return df.count(); }, SUM: function SUM(df, column) { return df.stat.sum(column); }, MAX: function MAX(df, column) { return df.stat.max(column); }, MIN: function MIN(df, column) { return df.stat.min(column); }, AVG: function AVG(df, column) { return df.stat.mean(column); } }; function match(value) { for (var _len = arguments.length, cases = new Array(_len > 1 ? _len - 1 : 0), _key = 1; _key < _len; _key++) { cases[_key - 1] = arguments[_key]; } var casesGen = (0, _reusables.makeGenerator)(cases); var checker = function checker(nextCase) { return nextCase[0](value) ? nextCase[1](value) : checker(casesGen.next().value); }; return checker(casesGen.next().value); } function sqlArgsToArray(args) { return (0, _reusables.xReplace)(args.join(" "), [" ", ""]).split(","); } function joinHandler(operation, tables, type) { var ONKeywordLocation = operation.findIndex(function (word) { return word.toUpperCase() === "ON"; }) + 1; return function (df) { return df.join(tables[operation[0]], sqlArgsToArray(operation.filter(function (word, loc) { return loc >= ONKeywordLocation; })), type); }; } var OPERATIONS_HANDLER = { WHERE: function WHERE(operation) { var operationalTerms = (0, _reusables.xSplit)(operation.join(" "), " AND ", " OR "); return function (df) { return df.filter(function (row) { var conditionalOperators = operation.filter(function (term) { return ["AND", "OR"].includes(term.toUpperCase()); }); return operationalTerms.map(function (operationalTerm) { var operatorToApply = _reusables.xContains.apply(void 0, [operationalTerm].concat(_toConsumableArray(Object.keys(WHERE_OPERATORS))))[0]; var terms = operationalTerm.split(operatorToApply).map(function (term) { return term.trim(); }); if (!row.has(terms[0]) && row.has(terms[1])) { return WHERE_OPERATORS[operatorToApply]((0, _reusables.xReplace)(terms[0].trim(), ['"', ""], ["'", ""], ["`", ""]), String(row.get(terms[1]))); } var lastTermAsNumber = Number(terms[1]); return WHERE_OPERATORS[operatorToApply](String(row.get(terms[0])), Number.isNaN(lastTermAsNumber) ? (0, _reusables.xReplace)(terms[1].trim(), ['"', ""], ["'", ""], ["`", ""]) : lastTermAsNumber); }).reduce(function (prev, next) { return WHERE_OPERATORS[conditionalOperators.shift()](prev, next); }); }); }; }, JOIN: function JOIN(operation, tables) { return joinHandler(operation, tables, "inner"); }, INNERJOIN: function INNERJOIN(operation, tables) { return joinHandler(operation, tables, "inner"); }, LEFTJOIN: function LEFTJOIN(operation, tables) { return joinHandler(operation, tables, "left"); }, RIGHTJOIN: function RIGHTJOIN(operation, tables) { return joinHandler(operation, tables, "right"); }, FULLJOIN: function FULLJOIN(operation, tables) { return joinHandler(operation, tables, "full"); }, UNION: function UNION(operation, tables) { return function (df) { return df.union(operation[0].toUpperCase().includes("SELECT") ? sqlParser(operation.join(" "), tables) : tables[operation[0]]); }; }, GROUPBY: function GROUPBY(operation) { return function (df) { return df.groupBy.apply(df, _toConsumableArray(sqlArgsToArray(operation))); }; } }; function replaceTermsInQuery(query) { var replacedQuery = query; REPLACMENTS.forEach(function (_ref) { var _ref2 = _slicedToArray(_ref, 2), joinType = _ref2[0], replacment = _ref2[1]; replacedQuery = replacedQuery.replace(joinType, replacment).replace(joinType.toLowerCase(), replacment); }); return replacedQuery; } function sqlSplitter(query) { var splittedQuery = replaceTermsInQuery(query).split(" "); var fromLoc = splittedQuery.findIndex(function (word) { return word.toUpperCase() === "FROM"; }); if (fromLoc === -1) { throw new _errors.SQLParseError("Your query should contains FROM keyword"); } return { selections: splittedQuery.slice(0, fromLoc), table: splittedQuery[fromLoc + 1], operations: splittedQuery.slice(fromLoc + 2, splittedQuery.length) }; } function parseOperations(operations, tables) { var operationsLoc = operations.map(function (word, index) { return Object.keys(OPERATIONS_HANDLER).includes(word.toUpperCase()) ? index : undefined; }).filter(function (loc) { return loc !== undefined; }); return operationsLoc.map(function (loc, index) { return OPERATIONS_HANDLER[operations[loc].toUpperCase()](operations.slice(loc + 1, operationsLoc[index + 1] ? operationsLoc[index + 1] : operations.length), tables); }).reduce(function (prev, next) { return function (df) { return next(prev(df)); }; }, function (df) { return df; }); } function parseSelections(selections) { if (selections[0].toUpperCase() !== "SELECT") { throw new _errors.SQLParseError("Your query should begin with SELECT keyword"); } selections.shift(); return match(selections.join(" ").split(",").map(function (selection) { return selection.trim(); }), [function (value) { return (0, _reusables.xReplace)(value[0], [" ", ""]) === "*"; }, function () { return function (df) { return df; }; }], [function (value) { return value[0].toUpperCase().includes("DISTINCT"); }, function (value) { var columnName = (0, _reusables.xReplace)(value[0].split(" AS ")[0], ["DISTINCT", ""], ["distinct", ""], [" ", ""]); return function (df) { return df.distinct(columnName).rename(columnName, value[0].includes("AS") ? value[0].split("AS")[1].replace(" ", "") : columnName); }; }], [function (value) { return _reusables.xContains.apply(void 0, [value[0].toUpperCase()].concat(_toConsumableArray(Object.keys(SELECT_FUNCTIONS))))[0]; }, function (value) { return function (df) { var functionToApply = Object.keys(SELECT_FUNCTIONS).find(function (func) { return value[0].toUpperCase().includes(func); }); var applyFunction = function applyFunction(dfToImpact) { return SELECT_FUNCTIONS[functionToApply](dfToImpact, (0, _reusables.xReplace)(value[0], ["".concat(functionToApply.toLowerCase(), "("), ""], ["".concat(functionToApply, "("), ""], ["(", ""], [")", ""])); }; return df.on && df.df ? df.aggregate(applyFunction) : applyFunction(df); }; }], [function () { return true; }, function (value) { return function (df) { return df.select.apply(df, _toConsumableArray(value.map(function (column) { return column.split(" AS ")[0].replace(" ", ""); }))).renameAll(value.map(function (column) { return column.includes("AS") ? column.split("AS")[1].replace(" ", "") : column; })); }; }]); } function sqlParser(query, tables) { var _sqlSplitter = sqlSplitter(query), selections = _sqlSplitter.selections, table = _sqlSplitter.table, operations = _sqlSplitter.operations; if (!table || !Object.keys(tables).includes(table)) { throw new _errors.SQLParseError("Wrong table name in your query: ".concat(table)); } var applyOperations = parseOperations(operations, tables); var applySelections = parseSelections(selections); return applySelections(applyOperations(tables[table])); }