sql-formatter-plus
Version:
Formats whitespace in a SQL query to make it more readable
390 lines (363 loc) • 14.6 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", {
value: true
});
exports["default"] = void 0;
var _isEmpty = _interopRequireDefault(require("lodash/isEmpty"));
var _escapeRegExp = _interopRequireDefault(require("lodash/escapeRegExp"));
var _tokenTypes = _interopRequireDefault(require("./tokenTypes"));
function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { "default": obj }; }
function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } }
function _defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } }
function _createClass(Constructor, protoProps, staticProps) { if (protoProps) _defineProperties(Constructor.prototype, protoProps); if (staticProps) _defineProperties(Constructor, staticProps); return Constructor; }
var Tokenizer =
/*#__PURE__*/
function () {
/**
* @param {Object} cfg
* @param {String[]} cfg.reservedWords Reserved words in SQL
* @param {String[]} cfg.reservedTopLevelWords Words that are set to new line separately
* @param {String[]} cfg.reservedNewlineWords Words that are set to newline
* @param {String[]} cfg.reservedTopLevelWordsNoIndent Words that are top level but have no indentation
* @param {String[]} cfg.stringTypes String types to enable: "", '', ``, [], N''
* @param {String[]} cfg.openParens Opening parentheses to enable, like (, [
* @param {String[]} cfg.closeParens Closing parentheses to enable, like ), ]
* @param {String[]} cfg.indexedPlaceholderTypes Prefixes for indexed placeholders, like ?
* @param {String[]} cfg.namedPlaceholderTypes Prefixes for named placeholders, like @ and :
* @param {String[]} cfg.lineCommentTypes Line comments to enable, like # and --
* @param {String[]} cfg.specialWordChars Special chars that can be found inside of words, like @ and #
*/
function Tokenizer(cfg) {
_classCallCheck(this, Tokenizer);
this.WHITESPACE_REGEX = /^([\t-\r \xA0\u1680\u2000-\u200A\u2028\u2029\u202F\u205F\u3000\uFEFF]+)/;
this.NUMBER_REGEX = /^((\x2D[\t-\r \xA0\u1680\u2000-\u200A\u2028\u2029\u202F\u205F\u3000\uFEFF]*)?[0-9]+(\.[0-9]+)?|0x[0-9A-Fa-f]+|0b[01]+)\b/;
this.OPERATOR_REGEX = /^(!=|<>|==|<=|>=|!<|!>|\|\||::|\x2D>>|\x2D>|~~\*|~~|!~~\*|!~~|~\*|!~\*|!~|:=|(?:[\0-\t\x0B\f\x0E-\u2027\u202A-\uD7FF\uE000-\uFFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]|[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?:[^\uD800-\uDBFF]|^)[\uDC00-\uDFFF]))/;
this.BLOCK_COMMENT_REGEX = /^(\/\*(?:[\0-\uD7FF\uE000-\uFFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]|[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?:[^\uD800-\uDBFF]|^)[\uDC00-\uDFFF])*?(?:\*\/|$))/;
this.LINE_COMMENT_REGEX = this.createLineCommentRegex(cfg.lineCommentTypes);
this.RESERVED_TOP_LEVEL_REGEX = this.createReservedWordRegex(cfg.reservedTopLevelWords);
this.RESERVED_TOP_LEVEL_NO_INDENT_REGEX = this.createReservedWordRegex(cfg.reservedTopLevelWordsNoIndent);
this.RESERVED_NEWLINE_REGEX = this.createReservedWordRegex(cfg.reservedNewlineWords);
this.RESERVED_PLAIN_REGEX = this.createReservedWordRegex(cfg.reservedWords);
this.WORD_REGEX = this.createWordRegex(cfg.specialWordChars);
this.STRING_REGEX = this.createStringRegex(cfg.stringTypes);
this.OPEN_PAREN_REGEX = this.createParenRegex(cfg.openParens);
this.CLOSE_PAREN_REGEX = this.createParenRegex(cfg.closeParens);
this.INDEXED_PLACEHOLDER_REGEX = this.createPlaceholderRegex(cfg.indexedPlaceholderTypes, '[0-9]*');
this.IDENT_NAMED_PLACEHOLDER_REGEX = this.createPlaceholderRegex(cfg.namedPlaceholderTypes, '[a-zA-Z0-9._$]+');
this.STRING_NAMED_PLACEHOLDER_REGEX = this.createPlaceholderRegex(cfg.namedPlaceholderTypes, this.createStringPattern(cfg.stringTypes));
}
_createClass(Tokenizer, [{
key: "createLineCommentRegex",
value: function createLineCommentRegex(lineCommentTypes) {
return new RegExp("^((?:".concat(lineCommentTypes.map(function (c) {
return (0, _escapeRegExp["default"])(c);
}).join('|'), ").*?(?:\r\n|\r|\n|$))"), 'u');
}
}, {
key: "createReservedWordRegex",
value: function createReservedWordRegex(reservedWords) {
var reservedWordsPattern = reservedWords.join('|').replace(/ /g, '\\s+');
return new RegExp("^(".concat(reservedWordsPattern, ")\\b"), 'iu');
}
}, {
key: "createWordRegex",
value: function createWordRegex() {
var specialChars = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : [];
return new RegExp("^([\\p{Alphabetic}\\p{Mark}\\p{Decimal_Number}\\p{Connector_Punctuation}\\p{Join_Control}".concat(specialChars.join(''), "]+)"), 'u');
}
}, {
key: "createStringRegex",
value: function createStringRegex(stringTypes) {
return new RegExp('^(' + this.createStringPattern(stringTypes) + ')', 'u');
} // This enables the following string patterns:
// 1. backtick quoted string using `` to escape
// 2. square bracket quoted string (SQL Server) using ]] to escape
// 3. double quoted string using "" or \" to escape
// 4. single quoted string using '' or \' to escape
// 5. national character quoted string using N'' or N\' to escape
}, {
key: "createStringPattern",
value: function createStringPattern(stringTypes) {
var patterns = {
'``': '((`[^`]*($|`))+)',
'[]': '((\\[[^\\]]*($|\\]))(\\][^\\]]*($|\\]))*)',
'""': '(("[^"\\\\]*(?:\\\\.[^"\\\\]*)*("|$))+)',
"''": "(('[^'\\\\]*(?:\\\\.[^'\\\\]*)*('|$))+)",
"N''": "((N'[^N'\\\\]*(?:\\\\.[^N'\\\\]*)*('|$))+)"
};
return stringTypes.map(function (t) {
return patterns[t];
}).join('|');
}
}, {
key: "createParenRegex",
value: function createParenRegex(parens) {
var _this = this;
return new RegExp('^(' + parens.map(function (p) {
return _this.escapeParen(p);
}).join('|') + ')', 'iu');
}
}, {
key: "escapeParen",
value: function escapeParen(paren) {
if (paren.length === 1) {
// A single punctuation character
return (0, _escapeRegExp["default"])(paren);
} else {
// longer word
return '\\b' + paren + '\\b';
}
}
}, {
key: "createPlaceholderRegex",
value: function createPlaceholderRegex(types, pattern) {
if ((0, _isEmpty["default"])(types)) {
return false;
}
var typesRegex = types.map(_escapeRegExp["default"]).join('|');
return new RegExp("^((?:".concat(typesRegex, ")(?:").concat(pattern, "))"), 'u');
}
/**
* Takes a SQL string and breaks it into tokens.
* Each token is an object with type and value.
*
* @param {String} input The SQL string
* @return {Object[]} tokens An array of tokens.
* @return {String} token.type
* @return {String} token.value
*/
}, {
key: "tokenize",
value: function tokenize(input) {
if (!input) return [];
var tokens = [];
var token; // Keep processing the string until it is empty
while (input.length) {
// Get the next token and the token type
token = this.getNextToken(input, token); // Advance the string
input = input.substring(token.value.length);
tokens.push(token);
}
return tokens;
}
}, {
key: "getNextToken",
value: function getNextToken(input, previousToken) {
return this.getWhitespaceToken(input) || this.getCommentToken(input) || this.getStringToken(input) || this.getOpenParenToken(input) || this.getCloseParenToken(input) || this.getPlaceholderToken(input) || this.getNumberToken(input) || this.getReservedWordToken(input, previousToken) || this.getWordToken(input) || this.getOperatorToken(input);
}
}, {
key: "getWhitespaceToken",
value: function getWhitespaceToken(input) {
return this.getTokenOnFirstMatch({
input: input,
type: _tokenTypes["default"].WHITESPACE,
regex: this.WHITESPACE_REGEX
});
}
}, {
key: "getCommentToken",
value: function getCommentToken(input) {
return this.getLineCommentToken(input) || this.getBlockCommentToken(input);
}
}, {
key: "getLineCommentToken",
value: function getLineCommentToken(input) {
return this.getTokenOnFirstMatch({
input: input,
type: _tokenTypes["default"].LINE_COMMENT,
regex: this.LINE_COMMENT_REGEX
});
}
}, {
key: "getBlockCommentToken",
value: function getBlockCommentToken(input) {
return this.getTokenOnFirstMatch({
input: input,
type: _tokenTypes["default"].BLOCK_COMMENT,
regex: this.BLOCK_COMMENT_REGEX
});
}
}, {
key: "getStringToken",
value: function getStringToken(input) {
return this.getTokenOnFirstMatch({
input: input,
type: _tokenTypes["default"].STRING,
regex: this.STRING_REGEX
});
}
}, {
key: "getOpenParenToken",
value: function getOpenParenToken(input) {
return this.getTokenOnFirstMatch({
input: input,
type: _tokenTypes["default"].OPEN_PAREN,
regex: this.OPEN_PAREN_REGEX
});
}
}, {
key: "getCloseParenToken",
value: function getCloseParenToken(input) {
return this.getTokenOnFirstMatch({
input: input,
type: _tokenTypes["default"].CLOSE_PAREN,
regex: this.CLOSE_PAREN_REGEX
});
}
}, {
key: "getPlaceholderToken",
value: function getPlaceholderToken(input) {
return this.getIdentNamedPlaceholderToken(input) || this.getStringNamedPlaceholderToken(input) || this.getIndexedPlaceholderToken(input);
}
}, {
key: "getIdentNamedPlaceholderToken",
value: function getIdentNamedPlaceholderToken(input) {
return this.getPlaceholderTokenWithKey({
input: input,
regex: this.IDENT_NAMED_PLACEHOLDER_REGEX,
parseKey: function parseKey(v) {
return v.slice(1);
}
});
}
}, {
key: "getStringNamedPlaceholderToken",
value: function getStringNamedPlaceholderToken(input) {
var _this2 = this;
return this.getPlaceholderTokenWithKey({
input: input,
regex: this.STRING_NAMED_PLACEHOLDER_REGEX,
parseKey: function parseKey(v) {
return _this2.getEscapedPlaceholderKey({
key: v.slice(2, -1),
quoteChar: v.slice(-1)
});
}
});
}
}, {
key: "getIndexedPlaceholderToken",
value: function getIndexedPlaceholderToken(input) {
return this.getPlaceholderTokenWithKey({
input: input,
regex: this.INDEXED_PLACEHOLDER_REGEX,
parseKey: function parseKey(v) {
return v.slice(1);
}
});
}
}, {
key: "getPlaceholderTokenWithKey",
value: function getPlaceholderTokenWithKey(_ref) {
var input = _ref.input,
regex = _ref.regex,
parseKey = _ref.parseKey;
var token = this.getTokenOnFirstMatch({
input: input,
regex: regex,
type: _tokenTypes["default"].PLACEHOLDER
});
if (token) {
token.key = parseKey(token.value);
}
return token;
}
}, {
key: "getEscapedPlaceholderKey",
value: function getEscapedPlaceholderKey(_ref2) {
var key = _ref2.key,
quoteChar = _ref2.quoteChar;
return key.replace(new RegExp((0, _escapeRegExp["default"])('\\' + quoteChar), 'gu'), quoteChar);
} // Decimal, binary, or hex numbers
}, {
key: "getNumberToken",
value: function getNumberToken(input) {
return this.getTokenOnFirstMatch({
input: input,
type: _tokenTypes["default"].NUMBER,
regex: this.NUMBER_REGEX
});
} // Punctuation and symbols
}, {
key: "getOperatorToken",
value: function getOperatorToken(input) {
return this.getTokenOnFirstMatch({
input: input,
type: _tokenTypes["default"].OPERATOR,
regex: this.OPERATOR_REGEX
});
}
}, {
key: "getReservedWordToken",
value: function getReservedWordToken(input, previousToken) {
// A reserved word cannot be preceded by a "."
// this makes it so in "my_table.from", "from" is not considered a reserved word
if (previousToken && previousToken.value && previousToken.value === '.') {
return;
}
return this.getTopLevelReservedToken(input) || this.getNewlineReservedToken(input) || this.getTopLevelReservedTokenNoIndent(input) || this.getPlainReservedToken(input);
}
}, {
key: "getTopLevelReservedToken",
value: function getTopLevelReservedToken(input) {
return this.getTokenOnFirstMatch({
input: input,
type: _tokenTypes["default"].RESERVED_TOP_LEVEL,
regex: this.RESERVED_TOP_LEVEL_REGEX
});
}
}, {
key: "getNewlineReservedToken",
value: function getNewlineReservedToken(input) {
return this.getTokenOnFirstMatch({
input: input,
type: _tokenTypes["default"].RESERVED_NEWLINE,
regex: this.RESERVED_NEWLINE_REGEX
});
}
}, {
key: "getTopLevelReservedTokenNoIndent",
value: function getTopLevelReservedTokenNoIndent(input) {
return this.getTokenOnFirstMatch({
input: input,
type: _tokenTypes["default"].RESERVED_TOP_LEVEL_NO_INDENT,
regex: this.RESERVED_TOP_LEVEL_NO_INDENT_REGEX
});
}
}, {
key: "getPlainReservedToken",
value: function getPlainReservedToken(input) {
return this.getTokenOnFirstMatch({
input: input,
type: _tokenTypes["default"].RESERVED,
regex: this.RESERVED_PLAIN_REGEX
});
}
}, {
key: "getWordToken",
value: function getWordToken(input) {
return this.getTokenOnFirstMatch({
input: input,
type: _tokenTypes["default"].WORD,
regex: this.WORD_REGEX
});
}
}, {
key: "getTokenOnFirstMatch",
value: function getTokenOnFirstMatch(_ref3) {
var input = _ref3.input,
type = _ref3.type,
regex = _ref3.regex;
var matches = input.match(regex);
if (matches) {
return {
type: type,
value: matches[1]
};
}
}
}]);
return Tokenizer;
}();
exports["default"] = Tokenizer;
module.exports = exports.default;