UNPKG

sql-formatter-plus

Version:

Formats whitespace in a SQL query to make it more readable

339 lines (294 loc) 10.4 kB
import isEmpty from 'lodash/isEmpty'; import escapeRegExp from 'lodash/escapeRegExp'; import tokenTypes from './tokenTypes'; export default class Tokenizer { /** * @param {Object} cfg * @param {String[]} cfg.reservedWords Reserved words in SQL * @param {String[]} cfg.reservedTopLevelWords Words that are set to new line separately * @param {String[]} cfg.reservedNewlineWords Words that are set to newline * @param {String[]} cfg.reservedTopLevelWordsNoIndent Words that are top level but have no indentation * @param {String[]} cfg.stringTypes String types to enable: "", '', ``, [], N'' * @param {String[]} cfg.openParens Opening parentheses to enable, like (, [ * @param {String[]} cfg.closeParens Closing parentheses to enable, like ), ] * @param {String[]} cfg.indexedPlaceholderTypes Prefixes for indexed placeholders, like ? * @param {String[]} cfg.namedPlaceholderTypes Prefixes for named placeholders, like @ and : * @param {String[]} cfg.lineCommentTypes Line comments to enable, like # and -- * @param {String[]} cfg.specialWordChars Special chars that can be found inside of words, like @ and # */ constructor(cfg) { this.WHITESPACE_REGEX = /^(\s+)/u; this.NUMBER_REGEX = /^((-\s*)?[0-9]+(\.[0-9]+)?|0x[0-9a-fA-F]+|0b[01]+)\b/u; this.OPERATOR_REGEX = /^(!=|<>|==|<=|>=|!<|!>|\|\||::|->>|->|~~\*|~~|!~~\*|!~~|~\*|!~\*|!~|:=|.)/u; this.BLOCK_COMMENT_REGEX = /^(\/\*[^]*?(?:\*\/|$))/u; this.LINE_COMMENT_REGEX = this.createLineCommentRegex(cfg.lineCommentTypes); this.RESERVED_TOP_LEVEL_REGEX = this.createReservedWordRegex(cfg.reservedTopLevelWords); this.RESERVED_TOP_LEVEL_NO_INDENT_REGEX = this.createReservedWordRegex( cfg.reservedTopLevelWordsNoIndent ); this.RESERVED_NEWLINE_REGEX = this.createReservedWordRegex(cfg.reservedNewlineWords); this.RESERVED_PLAIN_REGEX = this.createReservedWordRegex(cfg.reservedWords); this.WORD_REGEX = this.createWordRegex(cfg.specialWordChars); this.STRING_REGEX = this.createStringRegex(cfg.stringTypes); this.OPEN_PAREN_REGEX = this.createParenRegex(cfg.openParens); this.CLOSE_PAREN_REGEX = this.createParenRegex(cfg.closeParens); this.INDEXED_PLACEHOLDER_REGEX = this.createPlaceholderRegex( cfg.indexedPlaceholderTypes, '[0-9]*' ); this.IDENT_NAMED_PLACEHOLDER_REGEX = this.createPlaceholderRegex( cfg.namedPlaceholderTypes, '[a-zA-Z0-9._$]+' ); this.STRING_NAMED_PLACEHOLDER_REGEX = this.createPlaceholderRegex( cfg.namedPlaceholderTypes, this.createStringPattern(cfg.stringTypes) ); } createLineCommentRegex(lineCommentTypes) { return new RegExp( `^((?:${lineCommentTypes.map(c => escapeRegExp(c)).join('|')}).*?(?:\r\n|\r|\n|$))`, 'u' ); } createReservedWordRegex(reservedWords) { const reservedWordsPattern = reservedWords.join('|').replace(/ /gu, '\\s+'); return new RegExp(`^(${reservedWordsPattern})\\b`, 'iu'); } createWordRegex(specialChars = []) { return new RegExp( `^([\\p{Alphabetic}\\p{Mark}\\p{Decimal_Number}\\p{Connector_Punctuation}\\p{Join_Control}${specialChars.join( '' )}]+)`, 'u' ); } createStringRegex(stringTypes) { return new RegExp('^(' + this.createStringPattern(stringTypes) + ')', 'u'); } // This enables the following string patterns: // 1. backtick quoted string using `` to escape // 2. square bracket quoted string (SQL Server) using ]] to escape // 3. double quoted string using "" or \" to escape // 4. single quoted string using '' or \' to escape // 5. national character quoted string using N'' or N\' to escape createStringPattern(stringTypes) { const patterns = { '``': '((`[^`]*($|`))+)', '[]': '((\\[[^\\]]*($|\\]))(\\][^\\]]*($|\\]))*)', '""': '(("[^"\\\\]*(?:\\\\.[^"\\\\]*)*("|$))+)', "''": "(('[^'\\\\]*(?:\\\\.[^'\\\\]*)*('|$))+)", "N''": "((N'[^N'\\\\]*(?:\\\\.[^N'\\\\]*)*('|$))+)" }; return stringTypes.map(t => patterns[t]).join('|'); } createParenRegex(parens) { return new RegExp('^(' + parens.map(p => this.escapeParen(p)).join('|') + ')', 'iu'); } escapeParen(paren) { if (paren.length === 1) { // A single punctuation character return escapeRegExp(paren); } else { // longer word return '\\b' + paren + '\\b'; } } createPlaceholderRegex(types, pattern) { if (isEmpty(types)) { return false; } const typesRegex = types.map(escapeRegExp).join('|'); return new RegExp(`^((?:${typesRegex})(?:${pattern}))`, 'u'); } /** * Takes a SQL string and breaks it into tokens. * Each token is an object with type and value. * * @param {String} input The SQL string * @return {Object[]} tokens An array of tokens. * @return {String} token.type * @return {String} token.value */ tokenize(input) { if (!input) return []; const tokens = []; let token; // Keep processing the string until it is empty while (input.length) { // Get the next token and the token type token = this.getNextToken(input, token); // Advance the string input = input.substring(token.value.length); tokens.push(token); } return tokens; } getNextToken(input, previousToken) { return ( this.getWhitespaceToken(input) || this.getCommentToken(input) || this.getStringToken(input) || this.getOpenParenToken(input) || this.getCloseParenToken(input) || this.getPlaceholderToken(input) || this.getNumberToken(input) || this.getReservedWordToken(input, previousToken) || this.getWordToken(input) || this.getOperatorToken(input) ); } getWhitespaceToken(input) { return this.getTokenOnFirstMatch({ input, type: tokenTypes.WHITESPACE, regex: this.WHITESPACE_REGEX }); } getCommentToken(input) { return this.getLineCommentToken(input) || this.getBlockCommentToken(input); } getLineCommentToken(input) { return this.getTokenOnFirstMatch({ input, type: tokenTypes.LINE_COMMENT, regex: this.LINE_COMMENT_REGEX }); } getBlockCommentToken(input) { return this.getTokenOnFirstMatch({ input, type: tokenTypes.BLOCK_COMMENT, regex: this.BLOCK_COMMENT_REGEX }); } getStringToken(input) { return this.getTokenOnFirstMatch({ input, type: tokenTypes.STRING, regex: this.STRING_REGEX }); } getOpenParenToken(input) { return this.getTokenOnFirstMatch({ input, type: tokenTypes.OPEN_PAREN, regex: this.OPEN_PAREN_REGEX }); } getCloseParenToken(input) { return this.getTokenOnFirstMatch({ input, type: tokenTypes.CLOSE_PAREN, regex: this.CLOSE_PAREN_REGEX }); } getPlaceholderToken(input) { return ( this.getIdentNamedPlaceholderToken(input) || this.getStringNamedPlaceholderToken(input) || this.getIndexedPlaceholderToken(input) ); } getIdentNamedPlaceholderToken(input) { return this.getPlaceholderTokenWithKey({ input, regex: this.IDENT_NAMED_PLACEHOLDER_REGEX, parseKey: v => v.slice(1) }); } getStringNamedPlaceholderToken(input) { return this.getPlaceholderTokenWithKey({ input, regex: this.STRING_NAMED_PLACEHOLDER_REGEX, parseKey: v => this.getEscapedPlaceholderKey({ key: v.slice(2, -1), quoteChar: v.slice(-1) }) }); } getIndexedPlaceholderToken(input) { return this.getPlaceholderTokenWithKey({ input, regex: this.INDEXED_PLACEHOLDER_REGEX, parseKey: v => v.slice(1) }); } getPlaceholderTokenWithKey({ input, regex, parseKey }) { const token = this.getTokenOnFirstMatch({ input, regex, type: tokenTypes.PLACEHOLDER }); if (token) { token.key = parseKey(token.value); } return token; } getEscapedPlaceholderKey({ key, quoteChar }) { return key.replace(new RegExp(escapeRegExp('\\' + quoteChar), 'gu'), quoteChar); } // Decimal, binary, or hex numbers getNumberToken(input) { return this.getTokenOnFirstMatch({ input, type: tokenTypes.NUMBER, regex: this.NUMBER_REGEX }); } // Punctuation and symbols getOperatorToken(input) { return this.getTokenOnFirstMatch({ input, type: tokenTypes.OPERATOR, regex: this.OPERATOR_REGEX }); } getReservedWordToken(input, previousToken) { // A reserved word cannot be preceded by a "." // this makes it so in "my_table.from", "from" is not considered a reserved word if (previousToken && previousToken.value && previousToken.value === '.') { return; } return ( this.getTopLevelReservedToken(input) || this.getNewlineReservedToken(input) || this.getTopLevelReservedTokenNoIndent(input) || this.getPlainReservedToken(input) ); } getTopLevelReservedToken(input) { return this.getTokenOnFirstMatch({ input, type: tokenTypes.RESERVED_TOP_LEVEL, regex: this.RESERVED_TOP_LEVEL_REGEX }); } getNewlineReservedToken(input) { return this.getTokenOnFirstMatch({ input, type: tokenTypes.RESERVED_NEWLINE, regex: this.RESERVED_NEWLINE_REGEX }); } getTopLevelReservedTokenNoIndent(input) { return this.getTokenOnFirstMatch({ input, type: tokenTypes.RESERVED_TOP_LEVEL_NO_INDENT, regex: this.RESERVED_TOP_LEVEL_NO_INDENT_REGEX }); } getPlainReservedToken(input) { return this.getTokenOnFirstMatch({ input, type: tokenTypes.RESERVED, regex: this.RESERVED_PLAIN_REGEX }); } getWordToken(input) { return this.getTokenOnFirstMatch({ input, type: tokenTypes.WORD, regex: this.WORD_REGEX }); } getTokenOnFirstMatch({ input, type, regex }) { const matches = input.match(regex); if (matches) { return { type, value: matches[1] }; } } }