UNPKG

rawsql-ts

Version:

High-performance SQL parser and AST analyzer written in TypeScript. Provides fast parsing and advanced transformation capabilities.

558 lines 28.9 kB
import { TokenType } from "../models/Lexeme"; import { ColumnReference, TypeValue, UnaryExpression, ValueList, BinaryExpression, CastExpression, ArraySliceExpression, ArrayIndexExpression } from "../models/ValueComponent"; import { SqlTokenizer } from "./SqlTokenizer"; import { IdentifierParser } from "./IdentifierParser"; import { LiteralParser } from "./LiteralParser"; import { ParenExpressionParser } from "./ParenExpressionParser"; import { UnaryExpressionParser } from "./UnaryExpressionParser"; import { ParameterExpressionParser } from "./ParameterExpressionParser"; import { StringSpecifierExpressionParser } from "./StringSpecifierExpressionParser"; import { CommandExpressionParser } from "./CommandExpressionParser"; import { FunctionExpressionParser } from "./FunctionExpressionParser"; import { FullNameParser } from "./FullNameParser"; import { ParseError } from "./ParseError"; import { OperatorPrecedence } from "../utils/OperatorPrecedence"; export class ValueParser { // Parse SQL string to AST (was: parse) static parse(query) { const tokenizer = new SqlTokenizer(query); // Initialize tokenizer const lexemes = tokenizer.readLexmes(); // Get tokens // Parse const result = this.parseFromLexeme(lexemes, 0); // Error if there are remaining tokens if (result.newIndex < lexemes.length) { throw ParseError.fromUnparsedLexemes(lexemes, result.newIndex, `[ValueParser]`); } return result.value; } /** * Parse from lexeme array with logical operator controls */ static parseFromLexeme(lexemes, index, allowAndOperator = true, allowOrOperator = true) { return this.parseExpressionWithPrecedence(lexemes, index, 0, allowAndOperator, allowOrOperator); } /** * Parse expressions with operator precedence handling * Uses precedence climbing algorithm */ static parseExpressionWithPrecedence(lexemes, index, minPrecedence, allowAndOperator = true, allowOrOperator = true) { let idx = index; // Parse the primary expression (left side) const comment = lexemes[idx].comments; const positionedComments = lexemes[idx].positionedComments; const left = this.parseItem(lexemes, idx); // Transfer positioned comments if they exist and the component doesn't handle its own comments if (positionedComments && positionedComments.length > 0 && !left.value.positionedComments) { left.value.positionedComments = positionedComments; } // Fall back to legacy comments if positioned comments aren't available else if (left.value.comments === null && comment && comment.length > 0) { left.value.comments = comment; } idx = left.newIndex; let result = left.value; // Handle postfix array access ([...]) const arrayAccessResult = this.parseArrayAccess(lexemes, idx, result); result = arrayAccessResult.value; idx = arrayAccessResult.newIndex; // Process operators with precedence while (idx < lexemes.length && (lexemes[idx].type & TokenType.Operator)) { const operatorToken = lexemes[idx]; const operator = operatorToken.value; // Check if this operator is allowed if (!allowAndOperator && operator.toLowerCase() === "and") { break; } if (!allowOrOperator && operator.toLowerCase() === "or") { break; } // Get operator precedence const precedence = OperatorPrecedence.getPrecedence(operator); // If this operator has lower precedence than minimum, stop if (precedence < minPrecedence) { break; } idx++; // consume operator // Handle BETWEEN specially as it has different syntax if (OperatorPrecedence.isBetweenOperator(operator)) { const betweenResult = FunctionExpressionParser.parseBetweenExpression(lexemes, idx, result, operator.toLowerCase().includes('not')); result = betweenResult.value; idx = betweenResult.newIndex; continue; } // Handle :: (cast) operator specially if (operator === "::") { const typeValue = FunctionExpressionParser.parseTypeValue(lexemes, idx); result = new CastExpression(result, typeValue.value); idx = typeValue.newIndex; continue; } // For left-associative operators, use precedence + 1 const nextMinPrecedence = precedence + 1; // Parse the right-hand side with higher precedence const rightResult = this.parseExpressionWithPrecedence(lexemes, idx, nextMinPrecedence, allowAndOperator, allowOrOperator); idx = rightResult.newIndex; // Create binary expression with operator comments preserved const binaryExpr = new BinaryExpression(result, operator, rightResult.value); // Transfer operator token comments to the operator RawString if (operatorToken.comments && operatorToken.comments.length > 0) { binaryExpr.operator.comments = operatorToken.comments; } if (operatorToken.positionedComments && operatorToken.positionedComments.length > 0) { binaryExpr.operator.positionedComments = operatorToken.positionedComments; } result = binaryExpr; } return { value: result, newIndex: idx }; } /** * Transfer positioned comments from lexeme to value component if the component doesn't already handle them */ static transferPositionedComments(lexeme, value) { if (lexeme.positionedComments && lexeme.positionedComments.length > 0) { const beforeComments = lexeme.positionedComments.filter(comment => comment.position === 'before'); const afterComments = lexeme.positionedComments.filter(comment => comment.position === 'after'); if (beforeComments.length > 0) { const clonedBefore = beforeComments.map(comment => ({ position: comment.position, comments: [...comment.comments], })); value.positionedComments = value.positionedComments ? [...clonedBefore, ...value.positionedComments] : clonedBefore; } if (afterComments.length > 0) { const clonedAfter = afterComments.map(comment => ({ position: comment.position, comments: [...comment.comments], })); value.positionedComments = value.positionedComments ? [...value.positionedComments, ...clonedAfter] : clonedAfter; } // Preserve other comment positions when no before/after segments were processed. if (!beforeComments.length && !afterComments.length && !value.positionedComments) { value.positionedComments = lexeme.positionedComments.map(comment => ({ position: comment.position, comments: [...comment.comments], })); } return; } // Fall back to legacy comments if positioned comments aren't available else if (value.comments === null && lexeme.comments && lexeme.comments.length > 0) { value.comments = lexeme.comments; } } static parseItem(lexemes, index) { let idx = index; // Range check if (idx >= lexemes.length) { throw new Error(`Unexpected end of lexemes at index ${index}`); } const current = lexemes[idx]; if (current.type & TokenType.Identifier && current.type & TokenType.Operator && current.type & TokenType.Type) { // Check if this is followed by parentheses (function call) if (idx + 1 < lexemes.length && (lexemes[idx + 1].type & TokenType.OpenParen)) { // Determine if this is a type constructor or function call if (this.isTypeConstructor(lexemes, idx + 1, current.value)) { // Type constructor const result = FunctionExpressionParser.parseTypeValue(lexemes, idx); this.transferPositionedComments(current, result.value); return { value: result.value, newIndex: result.newIndex }; } else { // Function call const result = FunctionExpressionParser.parseFromLexeme(lexemes, idx); this.transferPositionedComments(current, result.value); return result; } } // Typed literal format pattern // e.g., `interval '2 days'` const first = IdentifierParser.parseFromLexeme(lexemes, idx); if (first.newIndex >= lexemes.length) { this.transferPositionedComments(current, first.value); return first; } const next = lexemes[first.newIndex]; if (next.type & TokenType.Literal) { // Typed literal format const literalIndex = first.newIndex; const literalLexeme = lexemes[literalIndex]; const second = LiteralParser.parseFromLexeme(lexemes, literalIndex); // Preserve comments that belong to the literal part of typed literal expressions. this.transferPositionedComments(literalLexeme, second.value); const result = new UnaryExpression(lexemes[idx].value, second.value); this.transferPositionedComments(current, result); return { value: result, newIndex: second.newIndex }; } this.transferPositionedComments(current, first.value); return first; } else if (current.type & TokenType.Identifier) { const { namespaces, name, newIndex } = FullNameParser.parseFromLexeme(lexemes, idx); // Namespace is also recognized as Identifier. // Since functions and types, as well as columns (tables), can have namespaces, // it is necessary to determine by the last element of the identifier. if (lexemes[newIndex - 1].type & TokenType.Function) { const result = FunctionExpressionParser.parseFromLexeme(lexemes, idx); this.transferPositionedComments(current, result.value); return result; } else if (lexemes[newIndex - 1].type & TokenType.Type) { // Handle Type tokens that also have Identifier flag if (newIndex < lexemes.length && (lexemes[newIndex].type & TokenType.OpenParen)) { // Determine if this is a type constructor or function call if (this.isTypeConstructor(lexemes, newIndex, name.name)) { // Type constructor (NUMERIC(10,2), VARCHAR(50), etc.) const result = FunctionExpressionParser.parseTypeValue(lexemes, idx); this.transferPositionedComments(current, result.value); return { value: result.value, newIndex: result.newIndex }; } else { // Function call (DATE('2025-01-01'), etc.) const result = FunctionExpressionParser.parseFromLexeme(lexemes, idx); this.transferPositionedComments(current, result.value); return result; } } else { // Handle standalone type tokens const value = new TypeValue(namespaces, name); this.transferPositionedComments(current, value); return { value, newIndex }; } } const value = new ColumnReference(namespaces, name); this.transferPositionedComments(current, value); return { value, newIndex }; } else if (current.type & TokenType.Literal) { const result = LiteralParser.parseFromLexeme(lexemes, idx); this.transferPositionedComments(current, result.value); return result; } else if (current.type & TokenType.OpenParen) { const result = ParenExpressionParser.parseFromLexeme(lexemes, idx); this.transferPositionedComments(current, result.value); return result; } else if (current.type & TokenType.Function) { const result = FunctionExpressionParser.parseFromLexeme(lexemes, idx); this.transferPositionedComments(current, result.value); return result; } else if (current.type & TokenType.Operator) { const result = UnaryExpressionParser.parseFromLexeme(lexemes, idx); this.transferPositionedComments(current, result.value); return result; } else if (current.type & TokenType.Parameter) { const result = ParameterExpressionParser.parseFromLexeme(lexemes, idx); this.transferPositionedComments(current, result.value); return result; } else if (current.type & TokenType.StringSpecifier) { const result = StringSpecifierExpressionParser.parseFromLexeme(lexemes, idx); this.transferPositionedComments(current, result.value); return result; } else if (current.type & TokenType.Command) { const result = CommandExpressionParser.parseFromLexeme(lexemes, idx); this.transferPositionedComments(current, result.value); return result; } else if (current.type & TokenType.OpenBracket) { // SQLServer escape identifier format. e.g. [dbo] or [dbo].[table] const { namespaces, name, newIndex } = FullNameParser.parseFromLexeme(lexemes, idx); const value = new ColumnReference(namespaces, name); this.transferPositionedComments(current, value); return { value, newIndex }; } else if (current.type & TokenType.Type) { // Check if this type token is followed by an opening parenthesis const { namespaces, name, newIndex } = FullNameParser.parseFromLexeme(lexemes, idx); if (newIndex < lexemes.length && (lexemes[newIndex].type & TokenType.OpenParen)) { // Determine if this is a type constructor or function call if (this.isTypeConstructor(lexemes, newIndex, name.name)) { // Type constructor (NUMERIC(10,2), VARCHAR(50), etc.) const result = FunctionExpressionParser.parseTypeValue(lexemes, idx); this.transferPositionedComments(current, result.value); return { value: result.value, newIndex: result.newIndex }; } else { // Function call (DATE('2025-01-01'), etc.) const result = FunctionExpressionParser.parseFromLexeme(lexemes, idx); this.transferPositionedComments(current, result.value); return result; } } else { // Handle standalone type tokens const value = new TypeValue(namespaces, name); this.transferPositionedComments(current, value); return { value, newIndex }; } } throw ParseError.fromUnparsedLexemes(lexemes, idx, `[ValueParser] Invalid lexeme.`); } static parseArgument(openToken, closeToken, lexemes, index) { let idx = index; const args = []; // Check for opening parenthesis if (idx < lexemes.length && lexemes[idx].type === openToken) { // Capture comments from opening parenthesis const openParenToken = lexemes[idx]; idx++; if (idx < lexemes.length && lexemes[idx].type === closeToken) { // If there are no arguments, return an empty ValueList idx++; return { value: new ValueList([]), newIndex: idx }; } // If the next element is `*`, treat `*` as an Identifier if (idx < lexemes.length && lexemes[idx].value === "*") { const wildcard = new ColumnReference(null, "*"); // Transfer opening paren comments to wildcard if (openParenToken.positionedComments && openParenToken.positionedComments.length > 0) { // Convert "after" positioned comments from opening paren to "before" comments for the argument const beforeComments = openParenToken.positionedComments.filter(pc => pc.position === 'after'); if (beforeComments.length > 0) { wildcard.positionedComments = beforeComments.map(pc => ({ position: 'before', comments: pc.comments })); } } else if (openParenToken.comments && openParenToken.comments.length > 0) { wildcard.comments = openParenToken.comments; } idx++; // The next element must be closeToken if (idx < lexemes.length && lexemes[idx].type === closeToken) { idx++; return { value: wildcard, newIndex: idx }; } else { throw ParseError.fromUnparsedLexemes(lexemes, idx, `Expected closing parenthesis after wildcard '*'.`); } } // Parse the value inside const result = this.parseFromLexeme(lexemes, idx); idx = result.newIndex; // Transfer opening paren comments to the first argument if (openParenToken.positionedComments && openParenToken.positionedComments.length > 0) { // Convert "after" positioned comments from opening paren to "before" comments for the argument const afterComments = openParenToken.positionedComments.filter(pc => pc.position === 'after'); if (afterComments.length > 0) { const beforeComments = afterComments.map(pc => ({ position: 'before', comments: pc.comments })); // Merge with existing positioned comments if (result.value.positionedComments) { result.value.positionedComments = [...beforeComments, ...result.value.positionedComments]; } else { result.value.positionedComments = beforeComments; } } } else if (openParenToken.comments && openParenToken.comments.length > 0) { // Fall back to legacy comments if (result.value.comments) { result.value.comments = openParenToken.comments.concat(result.value.comments); } else { result.value.comments = openParenToken.comments; } } args.push(result.value); // Continue reading if the next element is a comma while (idx < lexemes.length && (lexemes[idx].type & TokenType.Comma)) { idx++; const argResult = this.parseFromLexeme(lexemes, idx); idx = argResult.newIndex; args.push(argResult.value); } // Check for closing parenthesis if (idx < lexemes.length && lexemes[idx].type === closeToken) { idx++; if (args.length === 1) { // Return as is if there is only one argument return { value: args[0], newIndex: idx }; } // Create ValueCollection if there are multiple arguments const value = new ValueList(args); return { value, newIndex: idx }; } else { throw ParseError.fromUnparsedLexemes(lexemes, idx, `Missing closing parenthesis.`); } } throw ParseError.fromUnparsedLexemes(lexemes, index, `Expected opening parenthesis.`); } /** * Parse postfix array access operations [index] or [start:end] * @param lexemes Array of lexemes * @param index Current index * @param baseExpression The base expression to apply array access to * @returns Result with potentially modified expression and new index */ static parseArrayAccess(lexemes, index, baseExpression) { let idx = index; let result = baseExpression; // Check for array access syntax [...] while (idx < lexemes.length && (lexemes[idx].type & TokenType.OpenBracket)) { // Check if this is SQL Server bracket identifier by looking ahead if (this.isSqlServerBracketIdentifier(lexemes, idx)) { break; // This is SQL Server bracket syntax, not array access } idx++; // consume opening bracket if (idx >= lexemes.length) { throw new Error(`Expected array index or slice after '[' at index ${idx - 1}`); } // Check for empty brackets [] if (lexemes[idx].type & TokenType.CloseBracket) { throw new Error(`Empty array access brackets not supported at index ${idx}`); } // First, check if this is a slice by looking for colon pattern let startExpr = null; let isSlice = false; // Parse the first part (could be start of slice or single index) if (lexemes[idx].type & TokenType.Operator && lexemes[idx].value === ":") { // Starts with colon [:end] - start is null isSlice = true; idx++; // consume colon } else { // Parse the first expression (but with higher precedence than colon) const colonPrecedence = OperatorPrecedence.getPrecedence(":"); const firstResult = this.parseExpressionWithPrecedence(lexemes, idx, colonPrecedence + 1); startExpr = firstResult.value; idx = firstResult.newIndex; // Check if next token is colon if (idx < lexemes.length && lexemes[idx].type & TokenType.Operator && lexemes[idx].value === ":") { isSlice = true; idx++; // consume colon } } if (isSlice) { // This is a slice expression [start:end] let endExpr = null; // Check if there's an end expression or if it's an open slice like [1:] if (idx < lexemes.length && !(lexemes[idx].type & TokenType.CloseBracket)) { const colonPrecedence = OperatorPrecedence.getPrecedence(":"); const endResult = this.parseExpressionWithPrecedence(lexemes, idx, colonPrecedence + 1); endExpr = endResult.value; idx = endResult.newIndex; } // Expect closing bracket if (idx >= lexemes.length || !(lexemes[idx].type & TokenType.CloseBracket)) { throw new Error(`Expected ']' after array slice at index ${idx}`); } idx++; // consume closing bracket // Create ArraySliceExpression result = new ArraySliceExpression(result, startExpr, endExpr); } else { // This is a single index access [index] // Need to parse the full expression if it wasn't already parsed if (!startExpr) { const indexResult = this.parseFromLexeme(lexemes, idx); startExpr = indexResult.value; idx = indexResult.newIndex; } // Expect closing bracket if (idx >= lexemes.length || !(lexemes[idx].type & TokenType.CloseBracket)) { throw new Error(`Expected ']' after array index at index ${idx}`); } idx++; // consume closing bracket // Create ArrayIndexExpression result = new ArrayIndexExpression(result, startExpr); } } return { value: result, newIndex: idx }; } /** * Check if the bracket at the given index represents SQL Server bracket identifier syntax * Returns true if this looks like [identifier] or [schema].[table] syntax */ static isSqlServerBracketIdentifier(lexemes, bracketIndex) { let idx = bracketIndex + 1; // Start after opening bracket if (idx >= lexemes.length) return false; // SQL Server bracket identifiers should contain only identifiers and dots while (idx < lexemes.length && !(lexemes[idx].type & TokenType.CloseBracket)) { const token = lexemes[idx]; // Allow identifiers and dots in SQL Server bracket syntax if ((token.type & TokenType.Identifier) || (token.type & TokenType.Operator && token.value === ".")) { idx++; continue; } // If we find anything else (numbers, expressions, colons), it's array access return false; } // If we reached the end without finding a closing bracket, it's malformed if (idx >= lexemes.length) return false; // If the closing bracket is immediately followed by a dot, it's likely SQL Server syntax // like [dbo].[table] const closingBracketIndex = idx; if (closingBracketIndex + 1 < lexemes.length) { const nextToken = lexemes[closingBracketIndex + 1]; if (nextToken.type & TokenType.Operator && nextToken.value === ".") { return true; } } // Check if the content looks like a simple identifier (no colons, expressions, etc.) idx = bracketIndex + 1; let hasOnlyIdentifiersAndDots = true; while (idx < closingBracketIndex) { const token = lexemes[idx]; if (!((token.type & TokenType.Identifier) || (token.type & TokenType.Operator && token.value === "."))) { hasOnlyIdentifiersAndDots = false; break; } idx++; } // If it contains only identifiers and dots, it's likely SQL Server syntax return hasOnlyIdentifiersAndDots; } /** * Determines if a type token followed by parentheses is a type constructor or function call * @param lexemes Array of lexemes * @param openParenIndex Index of the opening parenthesis * @param typeName Name of the type/function * @returns True if this is a type constructor, false if it's a function call */ static isTypeConstructor(lexemes, openParenIndex, typeName) { // These are always type constructors regardless of content const alwaysTypeConstructors = [ 'NUMERIC', 'DECIMAL', 'VARCHAR', 'CHAR', 'CHARACTER', 'TIMESTAMP', 'TIME', 'INTERVAL' ]; const upperTypeName = typeName.toUpperCase(); if (alwaysTypeConstructors.includes(upperTypeName)) { return true; } // For DATE, check if the first argument is a string literal (function) or not (type) if (upperTypeName === 'DATE') { const firstArgIndex = openParenIndex + 1; if (firstArgIndex < lexemes.length) { const firstArg = lexemes[firstArgIndex]; const isStringLiteral = (firstArg.type & TokenType.Literal) && typeof firstArg.value === 'string' && isNaN(Number(firstArg.value)); // If first argument is a string literal, it's a function call // DATE('2025-01-01') -> function // DATE(6) -> type constructor return !isStringLiteral; } } // Default: assume it's a function call for ambiguous cases return false; } } //# sourceMappingURL=ValueParser.js.map