prettier-sql
Version:
Format whitespace in a SQL query to make it more readable
260 lines (211 loc) • 16.1 kB
JavaScript
;
function _typeof(obj) { "@babel/helpers - typeof"; if (typeof Symbol === "function" && typeof Symbol.iterator === "symbol") { _typeof = function _typeof(obj) { return typeof obj; }; } else { _typeof = function _typeof(obj) { return obj && typeof Symbol === "function" && obj.constructor === Symbol && obj !== Symbol.prototype ? "symbol" : typeof obj; }; } return _typeof(obj); }
Object.defineProperty(exports, "__esModule", {
value: true
});
exports["default"] = void 0;
var regexFactory = _interopRequireWildcard(require("./regexFactory"));
var _utils = require("../utils");
var _token = require("./token");
function _getRequireWildcardCache(nodeInterop) { if (typeof WeakMap !== "function") return null; var cacheBabelInterop = new WeakMap(); var cacheNodeInterop = new WeakMap(); return (_getRequireWildcardCache = function _getRequireWildcardCache(nodeInterop) { return nodeInterop ? cacheNodeInterop : cacheBabelInterop; })(nodeInterop); }
function _interopRequireWildcard(obj, nodeInterop) { if (!nodeInterop && obj && obj.__esModule) { return obj; } if (obj === null || _typeof(obj) !== "object" && typeof obj !== "function") { return { "default": obj }; } var cache = _getRequireWildcardCache(nodeInterop); if (cache && cache.has(obj)) { return cache.get(obj); } var newObj = {}; var hasPropertyDescriptor = Object.defineProperty && Object.getOwnPropertyDescriptor; for (var key in obj) { if (key !== "default" && Object.prototype.hasOwnProperty.call(obj, key)) { var desc = hasPropertyDescriptor ? Object.getOwnPropertyDescriptor(obj, key) : null; if (desc && (desc.get || desc.set)) { Object.defineProperty(newObj, key, desc); } else { newObj[key] = obj[key]; } } } newObj["default"] = obj; if (cache) { cache.set(obj, newObj); } return newObj; }
function ownKeys(object, enumerableOnly) { var keys = Object.keys(object); if (Object.getOwnPropertySymbols) { var symbols = Object.getOwnPropertySymbols(object); if (enumerableOnly) { symbols = symbols.filter(function (sym) { return Object.getOwnPropertyDescriptor(object, sym).enumerable; }); } keys.push.apply(keys, symbols); } return keys; }
function _objectSpread(target) { for (var i = 1; i < arguments.length; i++) { var source = arguments[i] != null ? arguments[i] : {}; if (i % 2) { ownKeys(Object(source), true).forEach(function (key) { _defineProperty(target, key, source[key]); }); } else if (Object.getOwnPropertyDescriptors) { Object.defineProperties(target, Object.getOwnPropertyDescriptors(source)); } else { ownKeys(Object(source)).forEach(function (key) { Object.defineProperty(target, key, Object.getOwnPropertyDescriptor(source, key)); }); } } return target; }
function _toConsumableArray(arr) { return _arrayWithoutHoles(arr) || _iterableToArray(arr) || _unsupportedIterableToArray(arr) || _nonIterableSpread(); }
function _nonIterableSpread() { throw new TypeError("Invalid attempt to spread non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method."); }
function _unsupportedIterableToArray(o, minLen) { if (!o) return; if (typeof o === "string") return _arrayLikeToArray(o, minLen); var n = Object.prototype.toString.call(o).slice(8, -1); if (n === "Object" && o.constructor) n = o.constructor.name; if (n === "Map" || n === "Set") return Array.from(o); if (n === "Arguments" || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(n)) return _arrayLikeToArray(o, minLen); }
function _iterableToArray(iter) { if (typeof Symbol !== "undefined" && iter[Symbol.iterator] != null || iter["@@iterator"] != null) return Array.from(iter); }
function _arrayWithoutHoles(arr) { if (Array.isArray(arr)) return _arrayLikeToArray(arr); }
function _arrayLikeToArray(arr, len) { if (len == null || len > arr.length) len = arr.length; for (var i = 0, arr2 = new Array(len); i < len; i++) { arr2[i] = arr[i]; } return arr2; }
function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } }
function _defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } }
function _createClass(Constructor, protoProps, staticProps) { if (protoProps) _defineProperties(Constructor.prototype, protoProps); if (staticProps) _defineProperties(Constructor, staticProps); return Constructor; }
function _defineProperty(obj, key, value) { if (key in obj) { Object.defineProperty(obj, key, { value: value, enumerable: true, configurable: true, writable: true }); } else { obj[key] = value; } return obj; }
// convert to partial type import in TS 4.5
var NULL_REGEX = /(?!)/; // zero-width negative lookahead, matches nothing
/** Struct that defines how a SQL language can be broken into tokens */
/** Converts SQL language string into a token stream */
var Tokenizer = /*#__PURE__*/function () {
/**
* @param {TokenizerOptions} cfg
* @param {string[]} cfg.reservedKeywords - Reserved words in SQL
* @param {string[]} cfg.reservedDependentClauses - Words that following a specific Statement and must have data attached
* @param {string[]} cfg.reservedLogicalOperators - Words that are set to newline
* @param {string[]} cfg.reservedCommands - Words that are set to new line separately
* @param {string[]} cfg.reservedBinaryCommands - Words that are top level but have no indentation
* @param {string[]} cfg.stringTypes - string types to enable - "", '', ``, [], N''
* @param {string[]} cfg.blockStart - Opening parentheses to enable, like (, [
* @param {string[]} cfg.blockEnd - Closing parentheses to enable, like ), ]
* @param {string[]} cfg.indexedPlaceholderTypes - Prefixes for indexed placeholders, like ?
* @param {string[]} cfg.namedPlaceholderTypes - Prefixes for named placeholders, like @ and :
* @param {string[]} cfg.lineCommentTypes - Line comments to enable, like # and --
* @param {string[]} cfg.specialWordChars - Special chars that can be found inside of words, like @ and #
* @param {string[]} cfg.operators - Additional operators to recognize
*/
function Tokenizer(cfg) {
var _this = this,
_cfg$specialWordChars,
_cfg$reservedDependen,
_cfg$operators,
_this$REGEX_MAP,
_cfg$indexedPlacehold;
_classCallCheck(this, Tokenizer);
_defineProperty(this, "WHITESPACE_REGEX", void 0);
_defineProperty(this, "REGEX_MAP", void 0);
_defineProperty(this, "INDEXED_PLACEHOLDER_REGEX", void 0);
_defineProperty(this, "IDENT_NAMED_PLACEHOLDER_REGEX", void 0);
_defineProperty(this, "STRING_NAMED_PLACEHOLDER_REGEX", void 0);
_defineProperty(this, "matchToken", function (tokenType) {
return function (input) {
return _this.getTokenOnFirstMatch({
input: input,
type: tokenType,
regex: _this.REGEX_MAP[tokenType]
});
};
});
this.WHITESPACE_REGEX = /^([\t-\r \xA0\u1680\u2000-\u200A\u2028\u2029\u202F\u205F\u3000\uFEFF]+)/;
var specialWordCharsAll = Object.values((_cfg$specialWordChars = cfg.specialWordChars) !== null && _cfg$specialWordChars !== void 0 ? _cfg$specialWordChars : {}).join('');
this.REGEX_MAP = (_this$REGEX_MAP = {}, _defineProperty(_this$REGEX_MAP, _token.TokenType.WORD, regexFactory.createWordRegex(cfg.specialWordChars)), _defineProperty(_this$REGEX_MAP, _token.TokenType.STRING, regexFactory.createStringRegex(cfg.stringTypes)), _defineProperty(_this$REGEX_MAP, _token.TokenType.RESERVED_KEYWORD, regexFactory.createReservedWordRegex(cfg.reservedKeywords, specialWordCharsAll)), _defineProperty(_this$REGEX_MAP, _token.TokenType.RESERVED_DEPENDENT_CLAUSE, regexFactory.createReservedWordRegex((_cfg$reservedDependen = cfg.reservedDependentClauses) !== null && _cfg$reservedDependen !== void 0 ? _cfg$reservedDependen : [], specialWordCharsAll)), _defineProperty(_this$REGEX_MAP, _token.TokenType.RESERVED_LOGICAL_OPERATOR, regexFactory.createReservedWordRegex(cfg.reservedLogicalOperators, specialWordCharsAll)), _defineProperty(_this$REGEX_MAP, _token.TokenType.RESERVED_COMMAND, regexFactory.createReservedWordRegex(cfg.reservedCommands, specialWordCharsAll)), _defineProperty(_this$REGEX_MAP, _token.TokenType.RESERVED_BINARY_COMMAND, regexFactory.createReservedWordRegex(cfg.reservedBinaryCommands, specialWordCharsAll)), _defineProperty(_this$REGEX_MAP, _token.TokenType.OPERATOR, regexFactory.createOperatorRegex('+-/*%&|^><=.,;[]{}`:$', ['<>', '<=', '>=', '!='].concat(_toConsumableArray((_cfg$operators = cfg.operators) !== null && _cfg$operators !== void 0 ? _cfg$operators : [])))), _defineProperty(_this$REGEX_MAP, _token.TokenType.BLOCK_START, regexFactory.createParenRegex(cfg.blockStart)), _defineProperty(_this$REGEX_MAP, _token.TokenType.BLOCK_END, regexFactory.createParenRegex(cfg.blockEnd)), _defineProperty(_this$REGEX_MAP, _token.TokenType.LINE_COMMENT, regexFactory.createLineCommentRegex(cfg.lineCommentTypes)), _defineProperty(_this$REGEX_MAP, _token.TokenType.BLOCK_COMMENT, /^(\/\*(?:(?![])[\s\S])*?(?:\*\/|$))/), _defineProperty(_this$REGEX_MAP, _token.TokenType.NUMBER, /^((\x2D[\t-\r \xA0\u1680\u2000-\u200A\u2028\u2029\u202F\u205F\u3000\uFEFF]*)?[0-9]+(\.[0-9]*)?([Ee][\+\x2D]?[0-9]+(\.[0-9]+)?)?|0x[0-9A-Fa-f]+|0b[01]+)/), _defineProperty(_this$REGEX_MAP, _token.TokenType.PLACEHOLDER, NULL_REGEX), _this$REGEX_MAP);
this.INDEXED_PLACEHOLDER_REGEX = regexFactory.createPlaceholderRegex((_cfg$indexedPlacehold = cfg.indexedPlaceholderTypes) !== null && _cfg$indexedPlacehold !== void 0 ? _cfg$indexedPlacehold : [], '[0-9]*');
this.IDENT_NAMED_PLACEHOLDER_REGEX = regexFactory.createPlaceholderRegex(cfg.namedPlaceholderTypes, '[a-zA-Z0-9._$]+');
this.STRING_NAMED_PLACEHOLDER_REGEX = regexFactory.createPlaceholderRegex(cfg.namedPlaceholderTypes, regexFactory.createStringPattern(cfg.stringTypes));
}
/**
* Takes a SQL string and breaks it into tokens.
* Each token is an object with type and value.
*
* @param {string} input - The SQL string
* @returns {Token[]} output token stream
*/
_createClass(Tokenizer, [{
key: "tokenize",
value: function tokenize(input) {
var tokens = [];
var token; // Keep processing the string until it is empty
while (input.length) {
// grab any preceding whitespace
var whitespaceBefore = this.getWhitespace(input);
input = input.substring(whitespaceBefore.length);
if (input.length) {
// Get the next token and the token type
token = this.getNextToken(input, token); // Advance the string
input = input.substring(token.value.length);
tokens.push(_objectSpread(_objectSpread({}, token), {}, {
whitespaceBefore: whitespaceBefore
}));
}
}
return tokens;
}
/** Matches preceding whitespace if present */
}, {
key: "getWhitespace",
value: function getWhitespace(input) {
var matches = input.match(this.WHITESPACE_REGEX);
return matches ? matches[1] : '';
}
/** Curried function of `getTokenOnFirstMatch` that allows token type to be passed first */
}, {
key: "getNextToken",
value:
/** Attempts to match next token from input string, tests RegExp patterns in decreasing priority */
function getNextToken(input, previousToken) {
return this.matchToken(_token.TokenType.LINE_COMMENT)(input) || this.matchToken(_token.TokenType.BLOCK_COMMENT)(input) || this.matchToken(_token.TokenType.STRING)(input) || this.matchToken(_token.TokenType.BLOCK_START)(input) || this.matchToken(_token.TokenType.BLOCK_END)(input) || this.getPlaceholderToken(input) || this.matchToken(_token.TokenType.NUMBER)(input) || this.getReservedWordToken(input, previousToken) || this.matchToken(_token.TokenType.WORD)(input) || this.matchToken(_token.TokenType.OPERATOR)(input);
}
/**
* Attempts to match a placeholder token pattern
* @return {Token | undefined} - The placeholder token if found, otherwise undefined
*/
}, {
key: "getPlaceholderToken",
value: function getPlaceholderToken(input) {
var _this$IDENT_NAMED_PLA,
_this$STRING_NAMED_PL,
_this2 = this,
_this$INDEXED_PLACEHO;
var placeholderTokenRegexMap = [// pattern for placeholder with identifier name
{
regex: (_this$IDENT_NAMED_PLA = this.IDENT_NAMED_PLACEHOLDER_REGEX) !== null && _this$IDENT_NAMED_PLA !== void 0 ? _this$IDENT_NAMED_PLA : NULL_REGEX,
parseKey: function parseKey(v) {
return v.slice(1);
}
}, // pattern for placeholder with string name
{
regex: (_this$STRING_NAMED_PL = this.STRING_NAMED_PLACEHOLDER_REGEX) !== null && _this$STRING_NAMED_PL !== void 0 ? _this$STRING_NAMED_PL : NULL_REGEX,
parseKey: function parseKey(v) {
return _this2.getEscapedPlaceholderKey({
key: v.slice(2, -1),
quoteChar: v.slice(-1)
});
}
}, // pattern for placeholder with numeric index
{
regex: (_this$INDEXED_PLACEHO = this.INDEXED_PLACEHOLDER_REGEX) !== null && _this$INDEXED_PLACEHO !== void 0 ? _this$INDEXED_PLACEHO : NULL_REGEX,
parseKey: function parseKey(v) {
return v.slice(1);
}
}];
return placeholderTokenRegexMap.reduce(function (acc, _ref) {
var regex = _ref.regex,
parseKey = _ref.parseKey;
var token = _this2.getTokenOnFirstMatch({
input: input,
regex: regex,
type: _token.TokenType.PLACEHOLDER
});
return token ? _objectSpread(_objectSpread({}, token), {}, {
key: parseKey(token.value)
}) : acc;
}, undefined);
}
}, {
key: "getEscapedPlaceholderKey",
value: function getEscapedPlaceholderKey(_ref2) {
var key = _ref2.key,
quoteChar = _ref2.quoteChar;
return key.replace(new RegExp((0, _utils.escapeRegExp)('\\' + quoteChar), 'gu'), quoteChar);
}
/**
* Attempts to match a Reserved word token pattern, avoiding edge cases of Reserved words within string tokens
* @return {Token | undefined} - The Reserved word token if found, otherwise undefined
*/
}, {
key: "getReservedWordToken",
value: function getReservedWordToken(input, previousToken) {
var _this3 = this;
// A reserved word cannot be preceded by a '.', '[', '`', or '"'
// this makes it so for "mytable.from", [from], `from`, "from" - from is not considered a Reserved word
if (previousToken && ['.', '[', '`', '"'].includes(previousToken.value)) {
return undefined;
} // prioritised list of Reserved token types
var reservedTokenList = [_token.TokenType.RESERVED_COMMAND, _token.TokenType.RESERVED_BINARY_COMMAND, _token.TokenType.RESERVED_DEPENDENT_CLAUSE, _token.TokenType.RESERVED_LOGICAL_OPERATOR, _token.TokenType.RESERVED_KEYWORD];
return reservedTokenList.reduce(function (matchedToken, tokenType) {
return matchedToken || _this3.matchToken(tokenType)(input);
}, undefined);
}
/**
* Attempts to match RegExp from head of input, returning undefined if not found
* @param {string} _.input - The string to match
* @param {TokenType} _.type - The type of token to match against
* @param {RegExp} _.regex - The regex to match
* @return {Token | undefined} - The matched token if found, otherwise undefined
*/
}, {
key: "getTokenOnFirstMatch",
value: function getTokenOnFirstMatch(_ref3) {
var input = _ref3.input,
type = _ref3.type,
regex = _ref3.regex;
var matches = input.match(regex);
return matches ? {
type: type,
value: matches[1]
} : undefined;
}
}]);
return Tokenizer;
}();
exports["default"] = Tokenizer;
module.exports = exports.default;
//# sourceMappingURL=Tokenizer.js.map