UNPKG

rawsql-ts

Version:

[beta]High-performance SQL parser and AST analyzer written in TypeScript. Provides fast parsing and advanced transformation capabilities.

206 lines 8.28 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.LiteralTokenReader = exports.literalKeywordParser = void 0; const BaseTokenReader_1 = require("./BaseTokenReader"); const Lexeme_1 = require("../models/Lexeme"); const charLookupTable_1 = require("../utils/charLookupTable"); const KeywordParser_1 = require("../parsers/KeywordParser"); const KeywordTrie_1 = require("../models/KeywordTrie"); /** * Reads SQL literal tokens (numbers, strings) */ const keywords = [ ["null"], ["true"], ["false"], ["current_date"], ["current_time"], ["current_timestamp"], ["localtime"], ["localtimestamp"], ["unbounded"], ["normalized"], ["nfc", "normalized"], ["nfd", "normalized"], ["nfkc", "normalized"], ["nfkd", "normalized"], ["nfc"], ["nfd"], ["nfkc"], ["nfkd"], ]; const trie = new KeywordTrie_1.KeywordTrie(keywords); exports.literalKeywordParser = new KeywordParser_1.KeywordParser(trie); class LiteralTokenReader extends BaseTokenReader_1.BaseTokenReader { /** * Try to read a literal token */ tryRead(previous) { if (this.isEndOfInput()) { return null; } const char = this.input[this.position]; // Check for keyword literals const keyword = this.tryReadKeyword(); if (keyword) { return keyword; } // Decimal token starting with a dot if (char === '.' && this.canRead(1) && charLookupTable_1.CharLookupTable.isDigit(this.input[this.position + 1])) { return this.createLexeme(Lexeme_1.TokenType.Literal, this.readDigit()); } // String literal if (char === '\'') { const value = this.readSingleQuotedString(false); return this.createLexeme(Lexeme_1.TokenType.Literal, value); } // Digit tokens if (charLookupTable_1.CharLookupTable.isDigit(char)) { return this.createLexeme(Lexeme_1.TokenType.Literal, this.readDigit()); } // Signed number if ((char === '+' || char === '-') && this.determineSignOrOperator(previous) === "sign") { const sign = char; this.position++; // Skip whitespace after sign const pos = this.position; while (this.canRead() && charLookupTable_1.CharLookupTable.isWhitespace(this.input[this.position])) { this.position++; } if (this.canRead() && (charLookupTable_1.CharLookupTable.isDigit(this.input[this.position]) || (this.input[this.position] === '.' && this.canRead(1) && charLookupTable_1.CharLookupTable.isDigit(this.input[this.position + 1])))) { return this.createLexeme(Lexeme_1.TokenType.Literal, sign === '-' ? sign + this.readDigit() : this.readDigit()); } // Not a number, restore position this.position = pos - 1; // Adjust for the increment at the beginning } return null; } tryReadKeyword() { // Check for keyword literals const result = exports.literalKeywordParser.parse(this.input, this.position); if (result) { this.position = result.newPosition; return this.createLexeme(Lexeme_1.TokenType.Literal, result.keyword); } return null; } /** * Determines if the current context treats '+' or '-' as a numeric sign or an operator. * This method is used to differentiate between operators and numeric signs (e.g., '+' or '-'). * * For example: * - In `1-1`, the '-' is treated as an operator, so the expression is split into `1`, `-`, and `1`. * - In `-1`, the '-' is treated as a sign, making `-1` a single, indivisible literal. * * The logic for determining whether '+' or '-' is a sign or an operator is as follows: * - If there is no previous lexeme, it is considered the start of the input, so the sign is valid. * - If the previous lexeme is a literal or an identifier (e.g., `a.id`), the sign is treated as an operator. * - If the previous lexeme is a closing parenthesis (e.g., `count(*)`), the sign is also treated as an operator. * * @param previous The previous lexeme in the input stream. * @returns "sign" if the context allows for a numeric sign, otherwise "operator". */ determineSignOrOperator(previous) { // If there is no previous lexeme, treat as a sign if (previous === null) { return "sign"; } // If the previous lexeme is a literal, identifier, or closing parenthesis, treat as an operator const isOperatorContext = previous.type === Lexeme_1.TokenType.Literal || previous.type === Lexeme_1.TokenType.Identifier || previous.type === Lexeme_1.TokenType.CloseParen; return isOperatorContext ? "operator" : "sign"; } /** * Read a numeric value */ readDigit() { const start = this.position; let hasDot = false; let hasExponent = false; // Consider 0x, 0b, 0o if (this.canRead(1) && this.input[this.position] === '0' && "xbo".includes(this.input[this.position + 1].toLowerCase())) { const prefixType = this.input[this.position + 1].toLowerCase(); this.position += 2; // Continue to get numeric and hexadecimal notation strings const isHex = prefixType === 'x'; while (this.canRead()) { const c = this.input[this.position]; if (charLookupTable_1.CharLookupTable.isDigit(c) || (isHex && charLookupTable_1.CharLookupTable.isHexChar(c))) { this.position++; } else { break; } } return this.input.slice(start, this.position); } // If starting with dot, note it if (this.input[start] === '.') { hasDot = true; this.position++; } // Consider decimal point and exponential notation while (this.canRead()) { const char = this.input[this.position]; if (char === '.' && !hasDot) { hasDot = true; } else if ((char === 'e' || char === 'E') && !hasExponent) { hasExponent = true; if (this.canRead(1) && (this.input[this.position + 1] === '+' || this.input[this.position + 1] === '-')) { this.position++; } } else if (!charLookupTable_1.CharLookupTable.isDigit(char)) { break; } this.position++; } if (start === this.position) { throw new Error(`Unexpected character. position: ${start}\n${this.getDebugPositionInfo(start)}`); } if (this.input[start] === '.') { // If the number starts with a dot, add 0 to the front return '0' + this.input.slice(start, this.position); } return this.input.slice(start, this.position); } /** * Read a string literal */ readSingleQuotedString(includeSingleQuote) { const start = this.position; let closed = false; this.read("'"); while (this.canRead()) { const char = this.input[this.position]; this.position++; // escape character check if (char === "\\" && this.canRead(1)) { this.position++; continue; } else if (char === '\'') { closed = true; break; } } if (closed === false) { throw new Error(`Single quote is not closed. position: ${start}\n${this.getDebugPositionInfo(start)}`); } if (includeSingleQuote) { const value = this.input.slice(start, this.position); return value; } else { const value = this.input.slice(start + 1, this.position - 1); return value; } } } exports.LiteralTokenReader = LiteralTokenReader; //# sourceMappingURL=LiteralTokenReader.js.map