sql-formatter
Version:
Format whitespace in a SQL query to make it more readable
140 lines • 6.54 kB
JavaScript
import { sortByLengthDesc } from '../utils.js';
import { escapeRegExp, patternToRegex, prefixesPattern, withDashes } from './regexUtil.js';
/**
* Builds a RegExp for valid line comments in a SQL dialect
* @param {string[]} lineCommentTypes - list of character strings that denote line comments
*/
export const lineComment = (lineCommentTypes) => new RegExp(`(?:${lineCommentTypes.map(escapeRegExp).join('|')}).*?(?=\r\n|\r|\n|$)`, 'uy');
/**
* Builds a RegExp for matching either open- or close-parenthesis patterns
*/
export const parenthesis = (kind, extraParens = []) => {
const index = kind === 'open' ? 0 : 1;
const parens = ['()', ...extraParens].map(pair => pair[index]);
return patternToRegex(parens.map(escapeRegExp).join('|'));
};
/**
* Builds a RegExp containing all operators for a SQL dialect
*/
export const operator = (operators) => patternToRegex(`${sortByLengthDesc(operators).map(escapeRegExp).join('|')}`);
// Negative lookahead to avoid matching a keyword that's actually part of identifier,
// which can happen when identifier allows word-boundary characters inside it.
//
// For example "SELECT$ME" should be tokenized as:
// - ["SELECT$ME"] when $ is allowed inside identifiers
// - ["SELECT", "$", "ME"] when $ can't be part of identifiers.
const rejectIdentCharsPattern = ({ rest, dashes }) => rest || dashes ? `(?![${rest || ''}${dashes ? '-' : ''}])` : '';
/**
* Builds a RegExp for all Reserved Keywords in a SQL dialect
*/
export const reservedWord = (reservedKeywords, identChars = {}) => {
if (reservedKeywords.length === 0) {
return /^\b$/u;
}
const avoidIdentChars = rejectIdentCharsPattern(identChars);
const reservedKeywordsPattern = sortByLengthDesc(reservedKeywords)
.map(escapeRegExp)
.join('|')
.replace(/ /gu, '\\s+');
return new RegExp(`(?:${reservedKeywordsPattern})${avoidIdentChars}\\b`, 'iuy');
};
/**
* Builds a RegExp for parameter placeholder patterns
* @param {string[]} paramTypes - list of strings that denote placeholder types
* @param {string} pattern - string that denotes placeholder pattern
*/
export const parameter = (paramTypes, pattern) => {
if (!paramTypes.length) {
return undefined;
}
const typesRegex = paramTypes.map(escapeRegExp).join('|');
return patternToRegex(`(?:${typesRegex})(?:${pattern})`);
};
const buildQStringPatterns = () => {
const specialDelimiterMap = {
'<': '>',
'[': ']',
'(': ')',
'{': '}',
};
// base pattern for special delimiters, left must correspond with right
const singlePattern = "{left}(?:(?!{right}').)*?{right}";
// replace {left} and {right} with delimiters, collect as array
const patternList = Object.entries(specialDelimiterMap).map(([left, right]) => singlePattern.replace(/{left}/g, escapeRegExp(left)).replace(/{right}/g, escapeRegExp(right)));
const specialDelimiters = escapeRegExp(Object.keys(specialDelimiterMap).join(''));
// standard pattern for common delimiters, ignores special delimiters
const standardDelimiterPattern = String.raw `(?<tag>[^\s${specialDelimiters}])(?:(?!\k<tag>').)*?\k<tag>`;
// constructs final pattern by joining all cases
const qStringPattern = `[Qq]'(?:${standardDelimiterPattern}|${patternList.join('|')})'`;
return qStringPattern;
};
// Regex patterns for all supported quote styles.
//
// Most of them have a single escaping-style built in,
// but "" and '' support multiple versions of escapes,
// which must be selected with suffixes: -qq, -bs, -qq-bs, -raw
export const quotePatterns = {
// - backtick quoted (using `` to escape)
'``': '(?:`[^`]*`)+',
// - Transact-SQL square bracket quoted (using ]] to escape)
'[]': String.raw `(?:\[[^\]]*\])(?:\][^\]]*\])*`,
// double-quoted
'""-qq': String.raw `(?:"[^"]*")+`,
'""-bs': String.raw `(?:"[^"\\]*(?:\\.[^"\\]*)*")`,
'""-qq-bs': String.raw `(?:"[^"\\]*(?:\\.[^"\\]*)*")+`,
'""-raw': String.raw `(?:"[^"]*")`,
// single-quoted
"''-qq": String.raw `(?:'[^']*')+`,
"''-bs": String.raw `(?:'[^'\\]*(?:\\.[^'\\]*)*')`,
"''-qq-bs": String.raw `(?:'[^'\\]*(?:\\.[^'\\]*)*')+`,
"''-raw": String.raw `(?:'[^']*')`,
// PostgreSQL dollar-quoted
'$$': String.raw `(?<tag>\$\w*\$)[\s\S]*?\k<tag>`,
// BigQuery '''triple-quoted''' (using \' to escape)
"'''..'''": String.raw `'''[^\\]*?(?:\\.[^\\]*?)*?'''`,
// BigQuery """triple-quoted""" (using \" to escape)
'""".."""': String.raw `"""[^\\]*?(?:\\.[^\\]*?)*?"""`,
// Hive and Spark variables: ${name}
'{}': String.raw `(?:\{[^\}]*\})`,
// Oracle q'' strings: q'<text>' q'|text|' ...
"q''": buildQStringPatterns(),
};
const singleQuotePattern = (quoteTypes) => {
if (typeof quoteTypes === 'string') {
return quotePatterns[quoteTypes];
}
else if ('regex' in quoteTypes) {
return quoteTypes.regex;
}
else {
return prefixesPattern(quoteTypes) + quotePatterns[quoteTypes.quote];
}
};
/** Builds a RegExp for matching variables */
export const variable = (varTypes) => patternToRegex(varTypes
.map(varType => ('regex' in varType ? varType.regex : singleQuotePattern(varType)))
.join('|'));
/** Builds a quote-delimited pattern for matching all given quote types */
export const stringPattern = (quoteTypes) => quoteTypes.map(singleQuotePattern).join('|');
/** Builds a RegExp for matching quote-delimited patterns */
export const string = (quoteTypes) => patternToRegex(stringPattern(quoteTypes));
/**
* Builds a RegExp for valid identifiers in a SQL dialect
*/
export const identifier = (specialChars = {}) => patternToRegex(identifierPattern(specialChars));
/**
* Builds a RegExp string for valid identifiers in a SQL dialect
*/
export const identifierPattern = ({ first, rest, dashes, allowFirstCharNumber, } = {}) => {
// Unicode letters, diacritical marks and underscore
const letter = '\\p{Alphabetic}\\p{Mark}_';
// Numbers 0..9, plus various unicode numbers
const number = '\\p{Decimal_Number}';
const firstChars = escapeRegExp(first !== null && first !== void 0 ? first : '');
const restChars = escapeRegExp(rest !== null && rest !== void 0 ? rest : '');
const pattern = allowFirstCharNumber
? `[${letter}${number}${firstChars}][${letter}${number}${restChars}]*`
: `[${letter}${firstChars}][${letter}${number}${restChars}]*`;
return dashes ? withDashes(pattern) : pattern;
};
//# sourceMappingURL=regexFactory.js.map