rawsql-ts
Version:
High-performance SQL parser and AST analyzer written in TypeScript. Provides fast parsing and advanced transformation capabilities.
562 lines • 30.1 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.ValueParser = void 0;
const Lexeme_1 = require("../models/Lexeme");
const ValueComponent_1 = require("../models/ValueComponent");
const SqlTokenizer_1 = require("./SqlTokenizer");
const IdentifierParser_1 = require("./IdentifierParser");
const LiteralParser_1 = require("./LiteralParser");
const ParenExpressionParser_1 = require("./ParenExpressionParser");
const UnaryExpressionParser_1 = require("./UnaryExpressionParser");
const ParameterExpressionParser_1 = require("./ParameterExpressionParser");
const StringSpecifierExpressionParser_1 = require("./StringSpecifierExpressionParser");
const CommandExpressionParser_1 = require("./CommandExpressionParser");
const FunctionExpressionParser_1 = require("./FunctionExpressionParser");
const FullNameParser_1 = require("./FullNameParser");
const ParseError_1 = require("./ParseError");
const OperatorPrecedence_1 = require("../utils/OperatorPrecedence");
class ValueParser {
// Parse SQL string to AST (was: parse)
static parse(query) {
const tokenizer = new SqlTokenizer_1.SqlTokenizer(query); // Initialize tokenizer
const lexemes = tokenizer.readLexmes(); // Get tokens
// Parse
const result = this.parseFromLexeme(lexemes, 0);
// Error if there are remaining tokens
if (result.newIndex < lexemes.length) {
throw ParseError_1.ParseError.fromUnparsedLexemes(lexemes, result.newIndex, `[ValueParser]`);
}
return result.value;
}
/**
* Parse from lexeme array with logical operator controls
*/
static parseFromLexeme(lexemes, index, allowAndOperator = true, allowOrOperator = true) {
return this.parseExpressionWithPrecedence(lexemes, index, 0, allowAndOperator, allowOrOperator);
}
/**
* Parse expressions with operator precedence handling
* Uses precedence climbing algorithm
*/
static parseExpressionWithPrecedence(lexemes, index, minPrecedence, allowAndOperator = true, allowOrOperator = true) {
let idx = index;
// Parse the primary expression (left side)
const comment = lexemes[idx].comments;
const positionedComments = lexemes[idx].positionedComments;
const left = this.parseItem(lexemes, idx);
// Transfer positioned comments if they exist and the component doesn't handle its own comments
if (positionedComments && positionedComments.length > 0 && !left.value.positionedComments) {
left.value.positionedComments = positionedComments;
}
// Fall back to legacy comments if positioned comments aren't available
else if (left.value.comments === null && comment && comment.length > 0) {
left.value.comments = comment;
}
idx = left.newIndex;
let result = left.value;
// Handle postfix array access ([...])
const arrayAccessResult = this.parseArrayAccess(lexemes, idx, result);
result = arrayAccessResult.value;
idx = arrayAccessResult.newIndex;
// Process operators with precedence
while (idx < lexemes.length && (lexemes[idx].type & Lexeme_1.TokenType.Operator)) {
const operatorToken = lexemes[idx];
const operator = operatorToken.value;
// Check if this operator is allowed
if (!allowAndOperator && operator.toLowerCase() === "and") {
break;
}
if (!allowOrOperator && operator.toLowerCase() === "or") {
break;
}
// Get operator precedence
const precedence = OperatorPrecedence_1.OperatorPrecedence.getPrecedence(operator);
// If this operator has lower precedence than minimum, stop
if (precedence < minPrecedence) {
break;
}
idx++; // consume operator // Handle BETWEEN specially as it has different syntax
if (OperatorPrecedence_1.OperatorPrecedence.isBetweenOperator(operator)) {
const betweenResult = FunctionExpressionParser_1.FunctionExpressionParser.parseBetweenExpression(lexemes, idx, result, operator.toLowerCase().includes('not'));
result = betweenResult.value;
idx = betweenResult.newIndex;
continue;
}
// Handle :: (cast) operator specially
if (operator === "::") {
const typeValue = FunctionExpressionParser_1.FunctionExpressionParser.parseTypeValue(lexemes, idx);
result = new ValueComponent_1.CastExpression(result, typeValue.value);
idx = typeValue.newIndex;
continue;
}
// For left-associative operators, use precedence + 1
const nextMinPrecedence = precedence + 1;
// Parse the right-hand side with higher precedence
const rightResult = this.parseExpressionWithPrecedence(lexemes, idx, nextMinPrecedence, allowAndOperator, allowOrOperator);
idx = rightResult.newIndex;
// Create binary expression with operator comments preserved
const binaryExpr = new ValueComponent_1.BinaryExpression(result, operator, rightResult.value);
// Transfer operator token comments to the operator RawString
if (operatorToken.comments && operatorToken.comments.length > 0) {
binaryExpr.operator.comments = operatorToken.comments;
}
if (operatorToken.positionedComments && operatorToken.positionedComments.length > 0) {
binaryExpr.operator.positionedComments = operatorToken.positionedComments;
}
result = binaryExpr;
}
return { value: result, newIndex: idx };
}
/**
* Transfer positioned comments from lexeme to value component if the component doesn't already handle them
*/
static transferPositionedComments(lexeme, value) {
if (lexeme.positionedComments && lexeme.positionedComments.length > 0) {
const beforeComments = lexeme.positionedComments.filter(comment => comment.position === 'before');
const afterComments = lexeme.positionedComments.filter(comment => comment.position === 'after');
if (beforeComments.length > 0) {
const clonedBefore = beforeComments.map(comment => ({
position: comment.position,
comments: [...comment.comments],
}));
value.positionedComments = value.positionedComments
? [...clonedBefore, ...value.positionedComments]
: clonedBefore;
}
if (afterComments.length > 0) {
const clonedAfter = afterComments.map(comment => ({
position: comment.position,
comments: [...comment.comments],
}));
value.positionedComments = value.positionedComments
? [...value.positionedComments, ...clonedAfter]
: clonedAfter;
}
// Preserve other comment positions when no before/after segments were processed.
if (!beforeComments.length && !afterComments.length && !value.positionedComments) {
value.positionedComments = lexeme.positionedComments.map(comment => ({
position: comment.position,
comments: [...comment.comments],
}));
}
return;
}
// Fall back to legacy comments if positioned comments aren't available
else if (value.comments === null && lexeme.comments && lexeme.comments.length > 0) {
value.comments = lexeme.comments;
}
}
static parseItem(lexemes, index) {
let idx = index;
// Range check
if (idx >= lexemes.length) {
throw new Error(`Unexpected end of lexemes at index ${index}`);
}
const current = lexemes[idx];
if (current.type & Lexeme_1.TokenType.Identifier && current.type & Lexeme_1.TokenType.Operator && current.type & Lexeme_1.TokenType.Type) {
// Check if this is followed by parentheses (function call)
if (idx + 1 < lexemes.length && (lexemes[idx + 1].type & Lexeme_1.TokenType.OpenParen)) {
// Determine if this is a type constructor or function call
if (this.isTypeConstructor(lexemes, idx + 1, current.value)) {
// Type constructor
const result = FunctionExpressionParser_1.FunctionExpressionParser.parseTypeValue(lexemes, idx);
this.transferPositionedComments(current, result.value);
return { value: result.value, newIndex: result.newIndex };
}
else {
// Function call
const result = FunctionExpressionParser_1.FunctionExpressionParser.parseFromLexeme(lexemes, idx);
this.transferPositionedComments(current, result.value);
return result;
}
}
// Typed literal format pattern
// e.g., `interval '2 days'`
const first = IdentifierParser_1.IdentifierParser.parseFromLexeme(lexemes, idx);
if (first.newIndex >= lexemes.length) {
this.transferPositionedComments(current, first.value);
return first;
}
const next = lexemes[first.newIndex];
if (next.type & Lexeme_1.TokenType.Literal) {
// Typed literal format
const literalIndex = first.newIndex;
const literalLexeme = lexemes[literalIndex];
const second = LiteralParser_1.LiteralParser.parseFromLexeme(lexemes, literalIndex);
// Preserve comments that belong to the literal part of typed literal expressions.
this.transferPositionedComments(literalLexeme, second.value);
const result = new ValueComponent_1.UnaryExpression(lexemes[idx].value, second.value);
this.transferPositionedComments(current, result);
return { value: result, newIndex: second.newIndex };
}
this.transferPositionedComments(current, first.value);
return first;
}
else if (current.type & Lexeme_1.TokenType.Identifier) {
const { namespaces, name, newIndex } = FullNameParser_1.FullNameParser.parseFromLexeme(lexemes, idx);
// Namespace is also recognized as Identifier.
// Since functions and types, as well as columns (tables), can have namespaces,
// it is necessary to determine by the last element of the identifier.
if (lexemes[newIndex - 1].type & Lexeme_1.TokenType.Function) {
const result = FunctionExpressionParser_1.FunctionExpressionParser.parseFromLexeme(lexemes, idx);
this.transferPositionedComments(current, result.value);
return result;
}
else if (lexemes[newIndex - 1].type & Lexeme_1.TokenType.Type) {
// Handle Type tokens that also have Identifier flag
if (newIndex < lexemes.length && (lexemes[newIndex].type & Lexeme_1.TokenType.OpenParen)) {
// Determine if this is a type constructor or function call
if (this.isTypeConstructor(lexemes, newIndex, name.name)) {
// Type constructor (NUMERIC(10,2), VARCHAR(50), etc.)
const result = FunctionExpressionParser_1.FunctionExpressionParser.parseTypeValue(lexemes, idx);
this.transferPositionedComments(current, result.value);
return { value: result.value, newIndex: result.newIndex };
}
else {
// Function call (DATE('2025-01-01'), etc.)
const result = FunctionExpressionParser_1.FunctionExpressionParser.parseFromLexeme(lexemes, idx);
this.transferPositionedComments(current, result.value);
return result;
}
}
else {
// Handle standalone type tokens
const value = new ValueComponent_1.TypeValue(namespaces, name);
this.transferPositionedComments(current, value);
return { value, newIndex };
}
}
const value = new ValueComponent_1.ColumnReference(namespaces, name);
this.transferPositionedComments(current, value);
return { value, newIndex };
}
else if (current.type & Lexeme_1.TokenType.Literal) {
const result = LiteralParser_1.LiteralParser.parseFromLexeme(lexemes, idx);
this.transferPositionedComments(current, result.value);
return result;
}
else if (current.type & Lexeme_1.TokenType.OpenParen) {
const result = ParenExpressionParser_1.ParenExpressionParser.parseFromLexeme(lexemes, idx);
this.transferPositionedComments(current, result.value);
return result;
}
else if (current.type & Lexeme_1.TokenType.Function) {
const result = FunctionExpressionParser_1.FunctionExpressionParser.parseFromLexeme(lexemes, idx);
this.transferPositionedComments(current, result.value);
return result;
}
else if (current.type & Lexeme_1.TokenType.Operator) {
const result = UnaryExpressionParser_1.UnaryExpressionParser.parseFromLexeme(lexemes, idx);
this.transferPositionedComments(current, result.value);
return result;
}
else if (current.type & Lexeme_1.TokenType.Parameter) {
const result = ParameterExpressionParser_1.ParameterExpressionParser.parseFromLexeme(lexemes, idx);
this.transferPositionedComments(current, result.value);
return result;
}
else if (current.type & Lexeme_1.TokenType.StringSpecifier) {
const result = StringSpecifierExpressionParser_1.StringSpecifierExpressionParser.parseFromLexeme(lexemes, idx);
this.transferPositionedComments(current, result.value);
return result;
}
else if (current.type & Lexeme_1.TokenType.Command) {
const result = CommandExpressionParser_1.CommandExpressionParser.parseFromLexeme(lexemes, idx);
this.transferPositionedComments(current, result.value);
return result;
}
else if (current.type & Lexeme_1.TokenType.OpenBracket) {
// SQLServer escape identifier format. e.g. [dbo] or [dbo].[table]
const { namespaces, name, newIndex } = FullNameParser_1.FullNameParser.parseFromLexeme(lexemes, idx);
const value = new ValueComponent_1.ColumnReference(namespaces, name);
this.transferPositionedComments(current, value);
return { value, newIndex };
}
else if (current.type & Lexeme_1.TokenType.Type) {
// Check if this type token is followed by an opening parenthesis
const { namespaces, name, newIndex } = FullNameParser_1.FullNameParser.parseFromLexeme(lexemes, idx);
if (newIndex < lexemes.length && (lexemes[newIndex].type & Lexeme_1.TokenType.OpenParen)) {
// Determine if this is a type constructor or function call
if (this.isTypeConstructor(lexemes, newIndex, name.name)) {
// Type constructor (NUMERIC(10,2), VARCHAR(50), etc.)
const result = FunctionExpressionParser_1.FunctionExpressionParser.parseTypeValue(lexemes, idx);
this.transferPositionedComments(current, result.value);
return { value: result.value, newIndex: result.newIndex };
}
else {
// Function call (DATE('2025-01-01'), etc.)
const result = FunctionExpressionParser_1.FunctionExpressionParser.parseFromLexeme(lexemes, idx);
this.transferPositionedComments(current, result.value);
return result;
}
}
else {
// Handle standalone type tokens
const value = new ValueComponent_1.TypeValue(namespaces, name);
this.transferPositionedComments(current, value);
return { value, newIndex };
}
}
throw ParseError_1.ParseError.fromUnparsedLexemes(lexemes, idx, `[ValueParser] Invalid lexeme.`);
}
static parseArgument(openToken, closeToken, lexemes, index) {
let idx = index;
const args = [];
// Check for opening parenthesis
if (idx < lexemes.length && lexemes[idx].type === openToken) {
// Capture comments from opening parenthesis
const openParenToken = lexemes[idx];
idx++;
if (idx < lexemes.length && lexemes[idx].type === closeToken) {
// If there are no arguments, return an empty ValueList
idx++;
return { value: new ValueComponent_1.ValueList([]), newIndex: idx };
}
// If the next element is `*`, treat `*` as an Identifier
if (idx < lexemes.length && lexemes[idx].value === "*") {
const wildcard = new ValueComponent_1.ColumnReference(null, "*");
// Transfer opening paren comments to wildcard
if (openParenToken.positionedComments && openParenToken.positionedComments.length > 0) {
// Convert "after" positioned comments from opening paren to "before" comments for the argument
const beforeComments = openParenToken.positionedComments.filter(pc => pc.position === 'after');
if (beforeComments.length > 0) {
wildcard.positionedComments = beforeComments.map(pc => ({
position: 'before',
comments: pc.comments
}));
}
}
else if (openParenToken.comments && openParenToken.comments.length > 0) {
wildcard.comments = openParenToken.comments;
}
idx++;
// The next element must be closeToken
if (idx < lexemes.length && lexemes[idx].type === closeToken) {
idx++;
return { value: wildcard, newIndex: idx };
}
else {
throw ParseError_1.ParseError.fromUnparsedLexemes(lexemes, idx, `Expected closing parenthesis after wildcard '*'.`);
}
}
// Parse the value inside
const result = this.parseFromLexeme(lexemes, idx);
idx = result.newIndex;
// Transfer opening paren comments to the first argument
if (openParenToken.positionedComments && openParenToken.positionedComments.length > 0) {
// Convert "after" positioned comments from opening paren to "before" comments for the argument
const afterComments = openParenToken.positionedComments.filter(pc => pc.position === 'after');
if (afterComments.length > 0) {
const beforeComments = afterComments.map(pc => ({
position: 'before',
comments: pc.comments
}));
// Merge with existing positioned comments
if (result.value.positionedComments) {
result.value.positionedComments = [...beforeComments, ...result.value.positionedComments];
}
else {
result.value.positionedComments = beforeComments;
}
}
}
else if (openParenToken.comments && openParenToken.comments.length > 0) {
// Fall back to legacy comments
if (result.value.comments) {
result.value.comments = openParenToken.comments.concat(result.value.comments);
}
else {
result.value.comments = openParenToken.comments;
}
}
args.push(result.value);
// Continue reading if the next element is a comma
while (idx < lexemes.length && (lexemes[idx].type & Lexeme_1.TokenType.Comma)) {
idx++;
const argResult = this.parseFromLexeme(lexemes, idx);
idx = argResult.newIndex;
args.push(argResult.value);
}
// Check for closing parenthesis
if (idx < lexemes.length && lexemes[idx].type === closeToken) {
idx++;
if (args.length === 1) {
// Return as is if there is only one argument
return { value: args[0], newIndex: idx };
}
// Create ValueCollection if there are multiple arguments
const value = new ValueComponent_1.ValueList(args);
return { value, newIndex: idx };
}
else {
throw ParseError_1.ParseError.fromUnparsedLexemes(lexemes, idx, `Missing closing parenthesis.`);
}
}
throw ParseError_1.ParseError.fromUnparsedLexemes(lexemes, index, `Expected opening parenthesis.`);
}
/**
* Parse postfix array access operations [index] or [start:end]
* @param lexemes Array of lexemes
* @param index Current index
* @param baseExpression The base expression to apply array access to
* @returns Result with potentially modified expression and new index
*/
static parseArrayAccess(lexemes, index, baseExpression) {
let idx = index;
let result = baseExpression;
// Check for array access syntax [...]
while (idx < lexemes.length && (lexemes[idx].type & Lexeme_1.TokenType.OpenBracket)) {
// Check if this is SQL Server bracket identifier by looking ahead
if (this.isSqlServerBracketIdentifier(lexemes, idx)) {
break; // This is SQL Server bracket syntax, not array access
}
idx++; // consume opening bracket
if (idx >= lexemes.length) {
throw new Error(`Expected array index or slice after '[' at index ${idx - 1}`);
}
// Check for empty brackets []
if (lexemes[idx].type & Lexeme_1.TokenType.CloseBracket) {
throw new Error(`Empty array access brackets not supported at index ${idx}`);
}
// First, check if this is a slice by looking for colon pattern
let startExpr = null;
let isSlice = false;
// Parse the first part (could be start of slice or single index)
if (lexemes[idx].type & Lexeme_1.TokenType.Operator && lexemes[idx].value === ":") {
// Starts with colon [:end] - start is null
isSlice = true;
idx++; // consume colon
}
else {
// Parse the first expression (but with higher precedence than colon)
const colonPrecedence = OperatorPrecedence_1.OperatorPrecedence.getPrecedence(":");
const firstResult = this.parseExpressionWithPrecedence(lexemes, idx, colonPrecedence + 1);
startExpr = firstResult.value;
idx = firstResult.newIndex;
// Check if next token is colon
if (idx < lexemes.length && lexemes[idx].type & Lexeme_1.TokenType.Operator && lexemes[idx].value === ":") {
isSlice = true;
idx++; // consume colon
}
}
if (isSlice) {
// This is a slice expression [start:end]
let endExpr = null;
// Check if there's an end expression or if it's an open slice like [1:]
if (idx < lexemes.length && !(lexemes[idx].type & Lexeme_1.TokenType.CloseBracket)) {
const colonPrecedence = OperatorPrecedence_1.OperatorPrecedence.getPrecedence(":");
const endResult = this.parseExpressionWithPrecedence(lexemes, idx, colonPrecedence + 1);
endExpr = endResult.value;
idx = endResult.newIndex;
}
// Expect closing bracket
if (idx >= lexemes.length || !(lexemes[idx].type & Lexeme_1.TokenType.CloseBracket)) {
throw new Error(`Expected ']' after array slice at index ${idx}`);
}
idx++; // consume closing bracket
// Create ArraySliceExpression
result = new ValueComponent_1.ArraySliceExpression(result, startExpr, endExpr);
}
else {
// This is a single index access [index]
// Need to parse the full expression if it wasn't already parsed
if (!startExpr) {
const indexResult = this.parseFromLexeme(lexemes, idx);
startExpr = indexResult.value;
idx = indexResult.newIndex;
}
// Expect closing bracket
if (idx >= lexemes.length || !(lexemes[idx].type & Lexeme_1.TokenType.CloseBracket)) {
throw new Error(`Expected ']' after array index at index ${idx}`);
}
idx++; // consume closing bracket
// Create ArrayIndexExpression
result = new ValueComponent_1.ArrayIndexExpression(result, startExpr);
}
}
return { value: result, newIndex: idx };
}
/**
* Check if the bracket at the given index represents SQL Server bracket identifier syntax
* Returns true if this looks like [identifier] or [schema].[table] syntax
*/
static isSqlServerBracketIdentifier(lexemes, bracketIndex) {
let idx = bracketIndex + 1; // Start after opening bracket
if (idx >= lexemes.length)
return false;
// SQL Server bracket identifiers should contain only identifiers and dots
while (idx < lexemes.length && !(lexemes[idx].type & Lexeme_1.TokenType.CloseBracket)) {
const token = lexemes[idx];
// Allow identifiers and dots in SQL Server bracket syntax
if ((token.type & Lexeme_1.TokenType.Identifier) ||
(token.type & Lexeme_1.TokenType.Operator && token.value === ".")) {
idx++;
continue;
}
// If we find anything else (numbers, expressions, colons), it's array access
return false;
}
// If we reached the end without finding a closing bracket, it's malformed
if (idx >= lexemes.length)
return false;
// If the closing bracket is immediately followed by a dot, it's likely SQL Server syntax
// like [dbo].[table]
const closingBracketIndex = idx;
if (closingBracketIndex + 1 < lexemes.length) {
const nextToken = lexemes[closingBracketIndex + 1];
if (nextToken.type & Lexeme_1.TokenType.Operator && nextToken.value === ".") {
return true;
}
}
// Check if the content looks like a simple identifier (no colons, expressions, etc.)
idx = bracketIndex + 1;
let hasOnlyIdentifiersAndDots = true;
while (idx < closingBracketIndex) {
const token = lexemes[idx];
if (!((token.type & Lexeme_1.TokenType.Identifier) ||
(token.type & Lexeme_1.TokenType.Operator && token.value === "."))) {
hasOnlyIdentifiersAndDots = false;
break;
}
idx++;
}
// If it contains only identifiers and dots, it's likely SQL Server syntax
return hasOnlyIdentifiersAndDots;
}
/**
* Determines if a type token followed by parentheses is a type constructor or function call
* @param lexemes Array of lexemes
* @param openParenIndex Index of the opening parenthesis
* @param typeName Name of the type/function
* @returns True if this is a type constructor, false if it's a function call
*/
static isTypeConstructor(lexemes, openParenIndex, typeName) {
// These are always type constructors regardless of content
const alwaysTypeConstructors = [
'NUMERIC', 'DECIMAL', 'VARCHAR', 'CHAR', 'CHARACTER',
'TIMESTAMP', 'TIME', 'INTERVAL'
];
const upperTypeName = typeName.toUpperCase();
if (alwaysTypeConstructors.includes(upperTypeName)) {
return true;
}
// For DATE, check if the first argument is a string literal (function) or not (type)
if (upperTypeName === 'DATE') {
const firstArgIndex = openParenIndex + 1;
if (firstArgIndex < lexemes.length) {
const firstArg = lexemes[firstArgIndex];
const isStringLiteral = (firstArg.type & Lexeme_1.TokenType.Literal) &&
typeof firstArg.value === 'string' &&
isNaN(Number(firstArg.value));
// If first argument is a string literal, it's a function call
// DATE('2025-01-01') -> function
// DATE(6) -> type constructor
return !isStringLiteral;
}
}
// Default: assume it's a function call for ambiguous cases
return false;
}
}
exports.ValueParser = ValueParser;
//# sourceMappingURL=ValueParser.js.map