UNPKG

sql-formatter-plus

Version:

Formats whitespace in a SQL query to make it more readable

390 lines (363 loc) 14.6 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports["default"] = void 0; var _isEmpty = _interopRequireDefault(require("lodash/isEmpty")); var _escapeRegExp = _interopRequireDefault(require("lodash/escapeRegExp")); var _tokenTypes = _interopRequireDefault(require("./tokenTypes")); function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { "default": obj }; } function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } } function _defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } function _createClass(Constructor, protoProps, staticProps) { if (protoProps) _defineProperties(Constructor.prototype, protoProps); if (staticProps) _defineProperties(Constructor, staticProps); return Constructor; } var Tokenizer = /*#__PURE__*/ function () { /** * @param {Object} cfg * @param {String[]} cfg.reservedWords Reserved words in SQL * @param {String[]} cfg.reservedTopLevelWords Words that are set to new line separately * @param {String[]} cfg.reservedNewlineWords Words that are set to newline * @param {String[]} cfg.reservedTopLevelWordsNoIndent Words that are top level but have no indentation * @param {String[]} cfg.stringTypes String types to enable: "", '', ``, [], N'' * @param {String[]} cfg.openParens Opening parentheses to enable, like (, [ * @param {String[]} cfg.closeParens Closing parentheses to enable, like ), ] * @param {String[]} cfg.indexedPlaceholderTypes Prefixes for indexed placeholders, like ? * @param {String[]} cfg.namedPlaceholderTypes Prefixes for named placeholders, like @ and : * @param {String[]} cfg.lineCommentTypes Line comments to enable, like # and -- * @param {String[]} cfg.specialWordChars Special chars that can be found inside of words, like @ and # */ function Tokenizer(cfg) { _classCallCheck(this, Tokenizer); this.WHITESPACE_REGEX = /^([\t-\r \xA0\u1680\u2000-\u200A\u2028\u2029\u202F\u205F\u3000\uFEFF]+)/; this.NUMBER_REGEX = /^((\x2D[\t-\r \xA0\u1680\u2000-\u200A\u2028\u2029\u202F\u205F\u3000\uFEFF]*)?[0-9]+(\.[0-9]+)?|0x[0-9A-Fa-f]+|0b[01]+)\b/; this.OPERATOR_REGEX = /^(!=|<>|==|<=|>=|!<|!>|\|\||::|\x2D>>|\x2D>|~~\*|~~|!~~\*|!~~|~\*|!~\*|!~|:=|(?:[\0-\t\x0B\f\x0E-\u2027\u202A-\uD7FF\uE000-\uFFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]|[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?:[^\uD800-\uDBFF]|^)[\uDC00-\uDFFF]))/; this.BLOCK_COMMENT_REGEX = /^(\/\*(?:[\0-\uD7FF\uE000-\uFFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]|[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?:[^\uD800-\uDBFF]|^)[\uDC00-\uDFFF])*?(?:\*\/|$))/; this.LINE_COMMENT_REGEX = this.createLineCommentRegex(cfg.lineCommentTypes); this.RESERVED_TOP_LEVEL_REGEX = this.createReservedWordRegex(cfg.reservedTopLevelWords); this.RESERVED_TOP_LEVEL_NO_INDENT_REGEX = this.createReservedWordRegex(cfg.reservedTopLevelWordsNoIndent); this.RESERVED_NEWLINE_REGEX = this.createReservedWordRegex(cfg.reservedNewlineWords); this.RESERVED_PLAIN_REGEX = this.createReservedWordRegex(cfg.reservedWords); this.WORD_REGEX = this.createWordRegex(cfg.specialWordChars); this.STRING_REGEX = this.createStringRegex(cfg.stringTypes); this.OPEN_PAREN_REGEX = this.createParenRegex(cfg.openParens); this.CLOSE_PAREN_REGEX = this.createParenRegex(cfg.closeParens); this.INDEXED_PLACEHOLDER_REGEX = this.createPlaceholderRegex(cfg.indexedPlaceholderTypes, '[0-9]*'); this.IDENT_NAMED_PLACEHOLDER_REGEX = this.createPlaceholderRegex(cfg.namedPlaceholderTypes, '[a-zA-Z0-9._$]+'); this.STRING_NAMED_PLACEHOLDER_REGEX = this.createPlaceholderRegex(cfg.namedPlaceholderTypes, this.createStringPattern(cfg.stringTypes)); } _createClass(Tokenizer, [{ key: "createLineCommentRegex", value: function createLineCommentRegex(lineCommentTypes) { return new RegExp("^((?:".concat(lineCommentTypes.map(function (c) { return (0, _escapeRegExp["default"])(c); }).join('|'), ").*?(?:\r\n|\r|\n|$))"), 'u'); } }, { key: "createReservedWordRegex", value: function createReservedWordRegex(reservedWords) { var reservedWordsPattern = reservedWords.join('|').replace(/ /g, '\\s+'); return new RegExp("^(".concat(reservedWordsPattern, ")\\b"), 'iu'); } }, { key: "createWordRegex", value: function createWordRegex() { var specialChars = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : []; return new RegExp("^([\\p{Alphabetic}\\p{Mark}\\p{Decimal_Number}\\p{Connector_Punctuation}\\p{Join_Control}".concat(specialChars.join(''), "]+)"), 'u'); } }, { key: "createStringRegex", value: function createStringRegex(stringTypes) { return new RegExp('^(' + this.createStringPattern(stringTypes) + ')', 'u'); } // This enables the following string patterns: // 1. backtick quoted string using `` to escape // 2. square bracket quoted string (SQL Server) using ]] to escape // 3. double quoted string using "" or \" to escape // 4. single quoted string using '' or \' to escape // 5. national character quoted string using N'' or N\' to escape }, { key: "createStringPattern", value: function createStringPattern(stringTypes) { var patterns = { '``': '((`[^`]*($|`))+)', '[]': '((\\[[^\\]]*($|\\]))(\\][^\\]]*($|\\]))*)', '""': '(("[^"\\\\]*(?:\\\\.[^"\\\\]*)*("|$))+)', "''": "(('[^'\\\\]*(?:\\\\.[^'\\\\]*)*('|$))+)", "N''": "((N'[^N'\\\\]*(?:\\\\.[^N'\\\\]*)*('|$))+)" }; return stringTypes.map(function (t) { return patterns[t]; }).join('|'); } }, { key: "createParenRegex", value: function createParenRegex(parens) { var _this = this; return new RegExp('^(' + parens.map(function (p) { return _this.escapeParen(p); }).join('|') + ')', 'iu'); } }, { key: "escapeParen", value: function escapeParen(paren) { if (paren.length === 1) { // A single punctuation character return (0, _escapeRegExp["default"])(paren); } else { // longer word return '\\b' + paren + '\\b'; } } }, { key: "createPlaceholderRegex", value: function createPlaceholderRegex(types, pattern) { if ((0, _isEmpty["default"])(types)) { return false; } var typesRegex = types.map(_escapeRegExp["default"]).join('|'); return new RegExp("^((?:".concat(typesRegex, ")(?:").concat(pattern, "))"), 'u'); } /** * Takes a SQL string and breaks it into tokens. * Each token is an object with type and value. * * @param {String} input The SQL string * @return {Object[]} tokens An array of tokens. * @return {String} token.type * @return {String} token.value */ }, { key: "tokenize", value: function tokenize(input) { if (!input) return []; var tokens = []; var token; // Keep processing the string until it is empty while (input.length) { // Get the next token and the token type token = this.getNextToken(input, token); // Advance the string input = input.substring(token.value.length); tokens.push(token); } return tokens; } }, { key: "getNextToken", value: function getNextToken(input, previousToken) { return this.getWhitespaceToken(input) || this.getCommentToken(input) || this.getStringToken(input) || this.getOpenParenToken(input) || this.getCloseParenToken(input) || this.getPlaceholderToken(input) || this.getNumberToken(input) || this.getReservedWordToken(input, previousToken) || this.getWordToken(input) || this.getOperatorToken(input); } }, { key: "getWhitespaceToken", value: function getWhitespaceToken(input) { return this.getTokenOnFirstMatch({ input: input, type: _tokenTypes["default"].WHITESPACE, regex: this.WHITESPACE_REGEX }); } }, { key: "getCommentToken", value: function getCommentToken(input) { return this.getLineCommentToken(input) || this.getBlockCommentToken(input); } }, { key: "getLineCommentToken", value: function getLineCommentToken(input) { return this.getTokenOnFirstMatch({ input: input, type: _tokenTypes["default"].LINE_COMMENT, regex: this.LINE_COMMENT_REGEX }); } }, { key: "getBlockCommentToken", value: function getBlockCommentToken(input) { return this.getTokenOnFirstMatch({ input: input, type: _tokenTypes["default"].BLOCK_COMMENT, regex: this.BLOCK_COMMENT_REGEX }); } }, { key: "getStringToken", value: function getStringToken(input) { return this.getTokenOnFirstMatch({ input: input, type: _tokenTypes["default"].STRING, regex: this.STRING_REGEX }); } }, { key: "getOpenParenToken", value: function getOpenParenToken(input) { return this.getTokenOnFirstMatch({ input: input, type: _tokenTypes["default"].OPEN_PAREN, regex: this.OPEN_PAREN_REGEX }); } }, { key: "getCloseParenToken", value: function getCloseParenToken(input) { return this.getTokenOnFirstMatch({ input: input, type: _tokenTypes["default"].CLOSE_PAREN, regex: this.CLOSE_PAREN_REGEX }); } }, { key: "getPlaceholderToken", value: function getPlaceholderToken(input) { return this.getIdentNamedPlaceholderToken(input) || this.getStringNamedPlaceholderToken(input) || this.getIndexedPlaceholderToken(input); } }, { key: "getIdentNamedPlaceholderToken", value: function getIdentNamedPlaceholderToken(input) { return this.getPlaceholderTokenWithKey({ input: input, regex: this.IDENT_NAMED_PLACEHOLDER_REGEX, parseKey: function parseKey(v) { return v.slice(1); } }); } }, { key: "getStringNamedPlaceholderToken", value: function getStringNamedPlaceholderToken(input) { var _this2 = this; return this.getPlaceholderTokenWithKey({ input: input, regex: this.STRING_NAMED_PLACEHOLDER_REGEX, parseKey: function parseKey(v) { return _this2.getEscapedPlaceholderKey({ key: v.slice(2, -1), quoteChar: v.slice(-1) }); } }); } }, { key: "getIndexedPlaceholderToken", value: function getIndexedPlaceholderToken(input) { return this.getPlaceholderTokenWithKey({ input: input, regex: this.INDEXED_PLACEHOLDER_REGEX, parseKey: function parseKey(v) { return v.slice(1); } }); } }, { key: "getPlaceholderTokenWithKey", value: function getPlaceholderTokenWithKey(_ref) { var input = _ref.input, regex = _ref.regex, parseKey = _ref.parseKey; var token = this.getTokenOnFirstMatch({ input: input, regex: regex, type: _tokenTypes["default"].PLACEHOLDER }); if (token) { token.key = parseKey(token.value); } return token; } }, { key: "getEscapedPlaceholderKey", value: function getEscapedPlaceholderKey(_ref2) { var key = _ref2.key, quoteChar = _ref2.quoteChar; return key.replace(new RegExp((0, _escapeRegExp["default"])('\\' + quoteChar), 'gu'), quoteChar); } // Decimal, binary, or hex numbers }, { key: "getNumberToken", value: function getNumberToken(input) { return this.getTokenOnFirstMatch({ input: input, type: _tokenTypes["default"].NUMBER, regex: this.NUMBER_REGEX }); } // Punctuation and symbols }, { key: "getOperatorToken", value: function getOperatorToken(input) { return this.getTokenOnFirstMatch({ input: input, type: _tokenTypes["default"].OPERATOR, regex: this.OPERATOR_REGEX }); } }, { key: "getReservedWordToken", value: function getReservedWordToken(input, previousToken) { // A reserved word cannot be preceded by a "." // this makes it so in "my_table.from", "from" is not considered a reserved word if (previousToken && previousToken.value && previousToken.value === '.') { return; } return this.getTopLevelReservedToken(input) || this.getNewlineReservedToken(input) || this.getTopLevelReservedTokenNoIndent(input) || this.getPlainReservedToken(input); } }, { key: "getTopLevelReservedToken", value: function getTopLevelReservedToken(input) { return this.getTokenOnFirstMatch({ input: input, type: _tokenTypes["default"].RESERVED_TOP_LEVEL, regex: this.RESERVED_TOP_LEVEL_REGEX }); } }, { key: "getNewlineReservedToken", value: function getNewlineReservedToken(input) { return this.getTokenOnFirstMatch({ input: input, type: _tokenTypes["default"].RESERVED_NEWLINE, regex: this.RESERVED_NEWLINE_REGEX }); } }, { key: "getTopLevelReservedTokenNoIndent", value: function getTopLevelReservedTokenNoIndent(input) { return this.getTokenOnFirstMatch({ input: input, type: _tokenTypes["default"].RESERVED_TOP_LEVEL_NO_INDENT, regex: this.RESERVED_TOP_LEVEL_NO_INDENT_REGEX }); } }, { key: "getPlainReservedToken", value: function getPlainReservedToken(input) { return this.getTokenOnFirstMatch({ input: input, type: _tokenTypes["default"].RESERVED, regex: this.RESERVED_PLAIN_REGEX }); } }, { key: "getWordToken", value: function getWordToken(input) { return this.getTokenOnFirstMatch({ input: input, type: _tokenTypes["default"].WORD, regex: this.WORD_REGEX }); } }, { key: "getTokenOnFirstMatch", value: function getTokenOnFirstMatch(_ref3) { var input = _ref3.input, type = _ref3.type, regex = _ref3.regex; var matches = input.match(regex); if (matches) { return { type: type, value: matches[1] }; } } }]); return Tokenizer; }(); exports["default"] = Tokenizer; module.exports = exports.default;