prettier-sql
Version:
Format whitespace in a SQL query to make it more readable
91 lines (90 loc) • 4.28 kB
TypeScript
import * as regexFactory from './regexFactory';
import { Token, TokenType } from './token';
/** Struct that defines how a SQL language can be broken into tokens */
interface TokenizerOptions {
reservedKeywords: string[];
reservedCommands: string[];
reservedLogicalOperators: string[];
reservedDependentClauses: string[];
reservedBinaryCommands: string[];
stringTypes: regexFactory.StringPatternType[];
blockStart: string[];
blockEnd: string[];
indexedPlaceholderTypes?: string[];
namedPlaceholderTypes: string[];
lineCommentTypes: string[];
specialWordChars?: {
prefix?: string;
any?: string;
suffix?: string;
};
operators?: string[];
}
/** Converts SQL language string into a token stream */
export default class Tokenizer {
WHITESPACE_REGEX: RegExp;
REGEX_MAP: {
[tokenType in TokenType]: RegExp;
};
INDEXED_PLACEHOLDER_REGEX?: RegExp;
IDENT_NAMED_PLACEHOLDER_REGEX?: RegExp;
STRING_NAMED_PLACEHOLDER_REGEX?: RegExp;
/**
* @param {TokenizerOptions} cfg
* @param {string[]} cfg.reservedKeywords - Reserved words in SQL
* @param {string[]} cfg.reservedDependentClauses - Words that following a specific Statement and must have data attached
* @param {string[]} cfg.reservedLogicalOperators - Words that are set to newline
* @param {string[]} cfg.reservedCommands - Words that are set to new line separately
* @param {string[]} cfg.reservedBinaryCommands - Words that are top level but have no indentation
* @param {string[]} cfg.stringTypes - string types to enable - "", '', ``, [], N''
* @param {string[]} cfg.blockStart - Opening parentheses to enable, like (, [
* @param {string[]} cfg.blockEnd - Closing parentheses to enable, like ), ]
* @param {string[]} cfg.indexedPlaceholderTypes - Prefixes for indexed placeholders, like ?
* @param {string[]} cfg.namedPlaceholderTypes - Prefixes for named placeholders, like @ and :
* @param {string[]} cfg.lineCommentTypes - Line comments to enable, like # and --
* @param {string[]} cfg.specialWordChars - Special chars that can be found inside of words, like @ and #
* @param {string[]} cfg.operators - Additional operators to recognize
*/
constructor(cfg: TokenizerOptions);
/**
* Takes a SQL string and breaks it into tokens.
* Each token is an object with type and value.
*
* @param {string} input - The SQL string
* @returns {Token[]} output token stream
*/
tokenize(input: string): Token[];
/** Matches preceding whitespace if present */
getWhitespace(input: string): string;
/** Curried function of `getTokenOnFirstMatch` that allows token type to be passed first */
matchToken: (tokenType: TokenType) => (input: string) => Token | undefined;
/** Attempts to match next token from input string, tests RegExp patterns in decreasing priority */
getNextToken(input: string, previousToken?: Token): Token;
/**
* Attempts to match a placeholder token pattern
* @return {Token | undefined} - The placeholder token if found, otherwise undefined
*/
getPlaceholderToken(input: string): Token | undefined;
getEscapedPlaceholderKey({ key, quoteChar }: {
key: string;
quoteChar: string;
}): string;
/**
* Attempts to match a Reserved word token pattern, avoiding edge cases of Reserved words within string tokens
* @return {Token | undefined} - The Reserved word token if found, otherwise undefined
*/
getReservedWordToken(input: string, previousToken?: Token): Token | undefined;
/**
* Attempts to match RegExp from head of input, returning undefined if not found
* @param {string} _.input - The string to match
* @param {TokenType} _.type - The type of token to match against
* @param {RegExp} _.regex - The regex to match
* @return {Token | undefined} - The matched token if found, otherwise undefined
*/
getTokenOnFirstMatch({ input, type, regex, }: {
input: string;
type: TokenType;
regex: RegExp;
}): Token | undefined;
}
export {};