rawsql-ts
Version:
[beta]High-performance SQL parser and AST analyzer written in TypeScript. Provides fast parsing and advanced transformation capabilities.
319 lines • 12.8 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.LiteralTokenReader = exports.literalKeywordParser = void 0;
const BaseTokenReader_1 = require("./BaseTokenReader");
const Lexeme_1 = require("../models/Lexeme");
const charLookupTable_1 = require("../utils/charLookupTable");
const KeywordParser_1 = require("../parsers/KeywordParser");
const KeywordTrie_1 = require("../models/KeywordTrie");
/**
* Reads SQL literal tokens (numbers, strings)
*/
const keywords = [
["null"],
["true"],
["false"],
["current_date"],
["current_time"],
["current_timestamp"],
["localtime"],
["localtimestamp"],
["unbounded"],
["normalized"],
["nfc", "normalized"],
["nfd", "normalized"],
["nfkc", "normalized"],
["nfkd", "normalized"],
["nfc"],
["nfd"],
["nfkc"],
["nfkd"],
];
const trie = new KeywordTrie_1.KeywordTrie(keywords);
exports.literalKeywordParser = new KeywordParser_1.KeywordParser(trie);
class LiteralTokenReader extends BaseTokenReader_1.BaseTokenReader {
/**
* Try to read a literal token
*/
tryRead(previous) {
if (this.isEndOfInput()) {
return null;
}
const char = this.input[this.position];
// String literal - check before keywords to prevent 'null' being treated as null
if (char === '\'') {
const value = this.readSingleQuotedString();
return this.createLexeme(Lexeme_1.TokenType.Literal, value);
}
// Check for keyword literals
const keyword = this.tryReadKeyword();
if (keyword) {
return keyword;
}
// Decimal token starting with a dot
if (char === '.' && this.canRead(1) && charLookupTable_1.CharLookupTable.isDigit(this.input[this.position + 1])) {
return this.createLexeme(Lexeme_1.TokenType.Literal, this.readDigit());
}
// Digit tokens
if (charLookupTable_1.CharLookupTable.isDigit(char)) {
return this.createLexeme(Lexeme_1.TokenType.Literal, this.readDigit());
}
// PostgreSQL dollar-quoted string ($$content$$ or $tag$content$tag$)
if (char === '$' && this.isDollarQuotedString()) {
return this.createLexeme(Lexeme_1.TokenType.Literal, this.readDollarQuotedString());
}
// SQL Server MONEY literal ($123.45)
// Only treat as MONEY if it contains decimal point or comma to avoid conflict with PostgreSQL $1 parameters
if (char === '$' && this.canRead(1) && charLookupTable_1.CharLookupTable.isDigit(this.input[this.position + 1])) {
// Look ahead to see if this looks like a MONEY literal (has . or ,)
let pos = this.position + 1;
let hasDecimalOrComma = false;
while (pos < this.input.length && (charLookupTable_1.CharLookupTable.isDigit(this.input[pos]) || this.input[pos] === ',' || this.input[pos] === '.')) {
if (this.input[pos] === '.' || this.input[pos] === ',') {
hasDecimalOrComma = true;
break;
}
pos++;
}
if (hasDecimalOrComma) {
this.position++; // Skip $
const numberPart = this.readMoneyDigit();
return this.createLexeme(Lexeme_1.TokenType.Literal, '$' + numberPart);
}
}
// Signed number
if ((char === '+' || char === '-') && this.determineSignOrOperator(previous) === "sign") {
const sign = char;
this.position++;
// Skip whitespace after sign
const pos = this.position;
while (this.canRead() && charLookupTable_1.CharLookupTable.isWhitespace(this.input[this.position])) {
this.position++;
}
if (this.canRead() && (charLookupTable_1.CharLookupTable.isDigit(this.input[this.position]) ||
(this.input[this.position] === '.' &&
this.canRead(1) &&
charLookupTable_1.CharLookupTable.isDigit(this.input[this.position + 1])))) {
return this.createLexeme(Lexeme_1.TokenType.Literal, sign === '-' ? sign + this.readDigit() : this.readDigit());
}
// Not a number, restore position
this.position = pos - 1; // Adjust for the increment at the beginning
}
return null;
}
tryReadKeyword() {
// Check for keyword literals
const result = exports.literalKeywordParser.parse(this.input, this.position);
if (result) {
this.position = result.newPosition;
return this.createLexeme(Lexeme_1.TokenType.Literal, result.keyword);
}
return null;
}
/**
* Determines if the current context treats '+' or '-' as a numeric sign or an operator.
* This method is used to differentiate between operators and numeric signs (e.g., '+' or '-').
*
* For example:
* - In `1-1`, the '-' is treated as an operator, so the expression is split into `1`, `-`, and `1`.
* - In `-1`, the '-' is treated as a sign, making `-1` a single, indivisible literal.
*
* The logic for determining whether '+' or '-' is a sign or an operator is as follows:
* - If there is no previous lexeme, it is considered the start of the input, so the sign is valid.
* - If the previous lexeme is a literal, identifier, parameter, or closing parenthesis, the sign is treated as an operator.
*
* @param previous The previous lexeme in the input stream.
* @returns "sign" if the context allows for a numeric sign, otherwise "operator".
*/
determineSignOrOperator(previous) {
// If there is no previous lexeme, treat as a sign
if (previous === null) {
return "sign";
}
// If the previous lexeme is a literal, identifier, parameter, or closing parenthesis, treat as an operator
const isOperatorContext = (previous.type & Lexeme_1.TokenType.Literal) ||
(previous.type & Lexeme_1.TokenType.Identifier) ||
(previous.type & Lexeme_1.TokenType.Parameter) ||
(previous.type & Lexeme_1.TokenType.CloseParen);
return isOperatorContext ? "operator" : "sign";
}
/**
* Read a numeric value
*/
readDigit() {
const start = this.position;
let hasDot = false;
let hasExponent = false;
// Consider 0x, 0b, 0o
if (this.canRead(1) &&
this.input[this.position] === '0' &&
"xbo".includes(this.input[this.position + 1].toLowerCase())) {
const prefixType = this.input[this.position + 1].toLowerCase();
this.position += 2;
// Continue to get numeric and hexadecimal notation strings
const isHex = prefixType === 'x';
while (this.canRead()) {
const c = this.input[this.position];
if (charLookupTable_1.CharLookupTable.isDigit(c) || (isHex && charLookupTable_1.CharLookupTable.isHexChar(c))) {
this.position++;
}
else {
break;
}
}
return this.input.slice(start, this.position);
}
// If starting with dot, note it
if (this.input[start] === '.') {
hasDot = true;
this.position++;
}
// Consider decimal point and exponential notation
while (this.canRead()) {
const char = this.input[this.position];
if (char === '.' && !hasDot) {
hasDot = true;
}
else if ((char === 'e' || char === 'E') && !hasExponent) {
hasExponent = true;
if (this.canRead(1) && (this.input[this.position + 1] === '+' || this.input[this.position + 1] === '-')) {
this.position++;
}
}
else if (!charLookupTable_1.CharLookupTable.isDigit(char)) {
break;
}
this.position++;
}
if (start === this.position) {
throw new Error(`Unexpected character. position: ${start}\n${this.getDebugPositionInfo(start)}`);
}
if (this.input[start] === '.') {
// If the number starts with a dot, add 0 to the front
return '0' + this.input.slice(start, this.position);
}
return this.input.slice(start, this.position);
}
/**
* Read a MONEY value (allows commas as thousand separators)
*/
readMoneyDigit() {
const start = this.position;
let hasDot = false;
// Consider decimal point and comma separators
while (this.canRead()) {
const char = this.input[this.position];
if (char === '.' && !hasDot) {
hasDot = true;
}
else if (char === ',' && !hasDot) {
// Allow comma as thousand separator before decimal point
}
else if (!charLookupTable_1.CharLookupTable.isDigit(char)) {
break;
}
this.position++;
}
if (start === this.position) {
throw new Error(`Unexpected character. position: ${start}\n${this.getDebugPositionInfo(start)}`);
}
return this.input.slice(start, this.position);
}
/**
* Read a string literal
*/
readSingleQuotedString() {
const start = this.position;
let closed = false;
this.read("'");
while (this.canRead()) {
const char = this.input[this.position];
this.position++;
// escape character check
if (char === "\\" && this.canRead(1)) {
this.position++;
continue;
}
else if (char === '\'') {
closed = true;
break;
}
}
if (closed === false) {
throw new Error(`Single quote is not closed. position: ${start}\n${this.getDebugPositionInfo(start)}`);
}
return this.input.slice(start, this.position);
}
/**
* Check if the current position starts a PostgreSQL dollar-quoted string
*/
isDollarQuotedString() {
if (!this.canRead(1)) {
return false;
}
// Check for $$ pattern
if (this.input[this.position + 1] === '$') {
return true;
}
// Check for $tag$ pattern
let pos = this.position + 1;
while (pos < this.input.length) {
const char = this.input[pos];
if (char === '$') {
return true;
}
if (!this.isAlphanumeric(char) && char !== '_') {
return false;
}
pos++;
}
return false;
}
/**
* Read a PostgreSQL dollar-quoted string
*/
readDollarQuotedString() {
const start = this.position;
// Read the opening tag
this.position++; // Skip initial $
let tag = '';
// Read tag characters until the closing $
while (this.canRead() && this.input[this.position] !== '$') {
tag += this.input[this.position];
this.position++;
}
if (!this.canRead()) {
throw new Error(`Unexpected end of input while reading dollar-quoted string tag at position ${start}`);
}
this.position++; // Skip closing $ of opening tag
// Now read the content until we find the closing tag
const openingTag = '$' + tag + '$';
const closingTag = openingTag;
let content = '';
while (this.canRead()) {
// Check if we're at the start of the closing tag
if (this.input.substring(this.position, this.position + closingTag.length) === closingTag) {
// Found closing tag
this.position += closingTag.length;
return openingTag + content + closingTag;
}
content += this.input[this.position];
this.position++;
}
throw new Error(`Unclosed dollar-quoted string starting at position ${start}. Expected closing tag: ${closingTag}`);
}
/**
* Check if character is alphanumeric (letter or digit)
*/
isAlphanumeric(char) {
if (char.length !== 1)
return false;
const code = char.charCodeAt(0);
// Check if digit (0-9) or letter (a-z, A-Z)
return (code >= 48 && code <= 57) || // 0-9
(code >= 65 && code <= 90) || // A-Z
(code >= 97 && code <= 122); // a-z
}
}
exports.LiteralTokenReader = LiteralTokenReader;
//# sourceMappingURL=LiteralTokenReader.js.map