@kuindji/sql-type-parser
Version:
Type-level SQL parser for TypeScript
108 lines • 7.55 kB
TypeScript
/**
* Type-level SQL tokenization utilities
*
* These utilities are shared across all query type parsers.
*/
import type { Trim, ToUpperCase, Increment, Decrement } from "./utils.js";
/**
* Keywords that need to be normalized to uppercase
*/
type SQLKeyword = "SELECT" | "INSERT" | "UPDATE" | "DELETE" | "FROM" | "INTO" | "VALUES" | "SET" | "WHERE" | "AND" | "OR" | "NOT" | "AS" | "JOIN" | "INNER" | "LEFT" | "RIGHT" | "FULL" | "OUTER" | "CROSS" | "ON" | "ORDER" | "BY" | "ASC" | "DESC" | "GROUP" | "HAVING" | "LIMIT" | "OFFSET" | "DISTINCT" | "NULL" | "TRUE" | "FALSE" | "IS" | "IN" | "LIKE" | "ILIKE" | "BETWEEN" | "EXISTS" | "COUNT" | "SUM" | "AVG" | "MIN" | "MAX" | "WITH" | "UNION" | "INTERSECT" | "EXCEPT" | "ALL" | "RETURNING" | "DEFAULT" | "CONFLICT" | "DO" | "NOTHING" | "CASCADE" | "RESTRICT" | "USING" | "INTERVAL" | "YEAR" | "MONTH" | "DAY" | "HOUR" | "MINUTE" | "SECOND" | "WEEK" | "TO" | "NULLS" | "FIRST" | "LAST" | "CURRENT_DATE" | "CURRENT_TIME" | "CURRENT_TIMESTAMP" | "LOCALTIME" | "LOCALTIMESTAMP" | "CURRENT_USER" | "SESSION_USER" | "CURRENT_CATALOG" | "CURRENT_SCHEMA" | "CURRENT_ROLE";
/**
* Keywords that terminate the FROM clause
*/
export type FromTerminators = "WHERE" | "JOIN" | "INNER" | "LEFT" | "RIGHT" | "FULL" | "CROSS" | "ORDER" | "GROUP" | "HAVING" | "LIMIT" | "OFFSET" | "UNION" | "INTERSECT" | "EXCEPT" | "RETURNING";
/**
* Keywords that terminate the WHERE clause
*/
export type WhereTerminators = "ORDER" | "GROUP" | "HAVING" | "LIMIT" | "OFFSET" | "UNION" | "INTERSECT" | "EXCEPT" | "RETURNING";
/**
* Keywords that terminate ORDER BY
*/
export type OrderByTerminators = "LIMIT" | "OFFSET" | "UNION" | "INTERSECT" | "EXCEPT" | "RETURNING";
/**
* Union operators
*/
export type UnionOperator = "UNION" | "UNION ALL" | "INTERSECT" | "INTERSECT ALL" | "EXCEPT" | "EXCEPT ALL";
/**
* Normalize a word - uppercase if it's a SQL keyword
*/
type NormalizeWord<W extends string> = ToUpperCase<W> extends infer Upper extends string ? Upper extends SQLKeyword ? Upper : W : W;
/**
* Split, normalize, and join in one pass to reduce recursion depth
* Instead of: Split -> Normalize each -> Join
* We do: Process word by word, normalizing and joining as we go
*
* Context-aware: Words following AS are NOT normalized (they are aliases)
* PrevWasAS tracks if the previous word was the AS keyword
*/
type ProcessWords<T extends string, Acc extends string = "", PrevWasAS extends boolean = false> = Trim<T> extends "" ? Acc : Trim<T> extends `${infer First} ${infer Rest}` ? PrevWasAS extends true ? ProcessWords<Rest, Acc extends "" ? First : `${Acc} ${First}`, false> : NormalizeWord<First> extends infer Normalized extends string ? ProcessWords<Rest, Acc extends "" ? Normalized : `${Acc} ${Normalized}`, Normalized extends "AS" ? true : false> : never : PrevWasAS extends true ? Acc extends "" ? Trim<T> : `${Acc} ${Trim<T>}` : Acc extends "" ? NormalizeWord<Trim<T>> : `${Acc} ${NormalizeWord<Trim<T>>}`;
/**
* Split and normalize special characters (, ( ))
*/
type SplitSpecial<T extends string> = T extends `${infer L},${infer R}` ? `${SplitSpecial<L>} , ${SplitSpecial<R>}` : T extends `${infer L}(${infer R}` ? `${SplitSpecial<L>} ( ${SplitSpecial<R>}` : T extends `${infer L})${infer R}` ? `${SplitSpecial<L>} ) ${SplitSpecial<R>}` : T;
/**
* Remove block comments (multi-line style)
* Processes nested-style by removing from outermost opening to first closing
*/
type RemoveBlockComments<T extends string> = T extends `${infer L}/*${infer _}*/${infer R}` ? RemoveBlockComments<`${L} ${R}`> : T;
/**
* Remove single-line comments (-- to end of line)
* Handles both mid-string comments (with newline) and end-of-string comments
*/
type RemoveLineComments<T extends string> = T extends `${infer L}--${infer _}\n${infer R}` ? RemoveLineComments<`${L}\n${R}`> : T extends `${infer L}--${infer _}\r${infer R}` ? RemoveLineComments<`${L}\r${R}`> : T extends `${infer L}--${infer _}` ? L : T;
/**
* Remove all SQL comments (both block and line comments)
* Block comments are removed first, then line comments
*/
type RemoveComments<T extends string> = RemoveLineComments<RemoveBlockComments<T>>;
/**
* Replace tabs and newlines with spaces
*/
type ReplaceWhitespace<T extends string> = T extends `${infer L}\t${infer R}` ? ReplaceWhitespace<`${L} ${R}`> : T extends `${infer L}\n${infer R}` ? ReplaceWhitespace<`${L} ${R}`> : T extends `${infer L}\r${infer R}` ? ReplaceWhitespace<`${L} ${R}`> : T;
/**
* Collapse multiple spaces into single space
*/
type CollapseSpaces<T extends string> = T extends `${infer L} ${infer R}` ? CollapseSpaces<`${L} ${R}`> : T;
/**
* Normalize a SQL query string
* Uses ProcessWords to combine split/normalize/join into one pass
*
* Pipeline:
* 1. RemoveComments - strip SQL comments (-- line and block style)
* 2. ReplaceWhitespace - normalize tabs/newlines to spaces
* 3. SplitSpecial - add spaces around special characters
* 4. CollapseSpaces - reduce multiple spaces to single
* 5. ProcessWords - normalize SQL keywords to uppercase
*/
export type NormalizeSQL<T extends string> = ProcessWords<CollapseSpaces<SplitSpecial<ReplaceWhitespace<RemoveComments<T>>>>>;
/**
* Get the next token and remainder
*/
export type NextToken<T extends string> = Trim<T> extends `${infer Token} ${infer Rest}` ? [Token, Rest] : [Trim<T>, ""];
/**
* Check if string starts with a specific token
*/
export type StartsWith<T extends string, Token extends string> = NextToken<T> extends [infer First extends string, infer _] ? First extends Token ? true : false : false;
/**
* Extract content until a terminator keyword (respects parenthesis depth)
*/
export type ExtractUntil<T extends string, Terminators extends string, Depth extends number = 0, Acc extends string = ""> = NextToken<T> extends [infer Token extends string, infer Rest extends string] ? Token extends "(" ? ExtractUntil<Rest, Terminators, Increment<Depth>, `${Acc} ${Token}`> : Token extends ")" ? ExtractUntil<Rest, Terminators, Decrement<Depth>, `${Acc} ${Token}`> : Token extends Terminators ? Depth extends 0 ? [Trim<Acc>, Trim<`${Token} ${Rest}`>] : ExtractUntil<Rest, Terminators, Depth, `${Acc} ${Token}`> : Rest extends "" ? [Trim<`${Acc} ${Token}`>, ""] : ExtractUntil<Rest, Terminators, Depth, `${Acc} ${Token}`> : [Trim<Acc>, ""];
/**
* Split by comma (respects parenthesis depth)
*/
export type SplitByComma<T extends string, Depth extends number = 0, Current extends string = ""> = Trim<T> extends "" ? Current extends "" ? [] : [Trim<Current>] : NextToken<T> extends [infer Token extends string, infer Rest extends string] ? Token extends "(" ? SplitByComma<Rest, Increment<Depth>, `${Current} ${Token}`> : Token extends ")" ? SplitByComma<Rest, Decrement<Depth>, `${Current} ${Token}`> : Token extends "," ? Depth extends 0 ? [Trim<Current>, ...SplitByComma<Rest, 0, "">] : SplitByComma<Rest, Depth, `${Current} ${Token}`> : SplitByComma<Rest, Depth, `${Current} ${Token}`> : Current extends "" ? [] : [Trim<Current>];
/**
* Count opening parentheses
*/
export type CountOpen<T extends string, N extends number = 0> = T extends `${infer _}(${infer Right}` ? CountOpen<Right, Increment<N>> : N;
/**
* Count closing parentheses
*/
export type CountClose<T extends string, N extends number = 0> = T extends `${infer _})${infer Right}` ? CountClose<Right, Increment<N>> : N;
/**
* Check if parentheses are balanced
*/
export type ParensBalanced<T extends string> = CountOpen<T> extends CountClose<T> ? true : false;
export {};
//# sourceMappingURL=tokenizer.d.ts.map