UNPKG

rawsql-ts

Version:

High-performance SQL parser and AST analyzer written in TypeScript. Provides fast parsing and advanced transformation capabilities.

github.com/mk3008/rawsql-ts/tree/main/packages/core

mk3008/rawsql-ts

562 lines • 30.1 kB

JavaScript

"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.ValueParser = void 0; const Lexeme_1 = require("../models/Lexeme"); const ValueComponent_1 = require("../models/ValueComponent"); const SqlTokenizer_1 = require("./SqlTokenizer"); const IdentifierParser_1 = require("./IdentifierParser"); const LiteralParser_1 = require("./LiteralParser"); const ParenExpressionParser_1 = require("./ParenExpressionParser"); const UnaryExpressionParser_1 = require("./UnaryExpressionParser"); const ParameterExpressionParser_1 = require("./ParameterExpressionParser"); const StringSpecifierExpressionParser_1 = require("./StringSpecifierExpressionParser"); const CommandExpressionParser_1 = require("./CommandExpressionParser"); const FunctionExpressionParser_1 = require("./FunctionExpressionParser"); const FullNameParser_1 = require("./FullNameParser"); const ParseError_1 = require("./ParseError"); const OperatorPrecedence_1 = require("../utils/OperatorPrecedence"); class ValueParser { // Parse SQL string to AST (was: parse) static parse(query) { const tokenizer = new SqlTokenizer_1.SqlTokenizer(query); // Initialize tokenizer const lexemes = tokenizer.readLexmes(); // Get tokens // Parse const result = this.parseFromLexeme(lexemes, 0); // Error if there are remaining tokens if (result.newIndex < lexemes.length) { throw ParseError_1.ParseError.fromUnparsedLexemes(lexemes, result.newIndex, `[ValueParser]`); } return result.value; } /** * Parse from lexeme array with logical operator controls */ static parseFromLexeme(lexemes, index, allowAndOperator = true, allowOrOperator = true) { return this.parseExpressionWithPrecedence(lexemes, index, 0, allowAndOperator, allowOrOperator); } /** * Parse expressions with operator precedence handling * Uses precedence climbing algorithm */ static parseExpressionWithPrecedence(lexemes, index, minPrecedence, allowAndOperator = true, allowOrOperator = true) { let idx = index; // Parse the primary expression (left side) const comment = lexemes[idx].comments; const positionedComments = lexemes[idx].positionedComments; const left = this.parseItem(lexemes, idx); // Transfer positioned comments if they exist and the component doesn't handle its own comments if (positionedComments && positionedComments.length > 0 && !left.value.positionedComments) { left.value.positionedComments = positionedComments; } // Fall back to legacy comments if positioned comments aren't available else if (left.value.comments === null && comment && comment.length > 0) { left.value.comments = comment; } idx = left.newIndex; let result = left.value; // Handle postfix array access ([...]) const arrayAccessResult = this.parseArrayAccess(lexemes, idx, result); result = arrayAccessResult.value; idx = arrayAccessResult.newIndex; // Process operators with precedence while (idx < lexemes.length && (lexemes[idx].type & Lexeme_1.TokenType.Operator)) { const operatorToken = lexemes[idx]; const operator = operatorToken.value; // Check if this operator is allowed if (!allowAndOperator && operator.toLowerCase() === "and") { break; } if (!allowOrOperator && operator.toLowerCase() === "or") { break; } // Get operator precedence const precedence = OperatorPrecedence_1.OperatorPrecedence.getPrecedence(operator); // If this operator has lower precedence than minimum, stop if (precedence < minPrecedence) { break; } idx++; // consume operator // Handle BETWEEN specially as it has different syntax if (OperatorPrecedence_1.OperatorPrecedence.isBetweenOperator(operator)) { const betweenResult = FunctionExpressionParser_1.FunctionExpressionParser.parseBetweenExpression(lexemes, idx, result, operator.toLowerCase().includes('not')); result = betweenResult.value; idx = betweenResult.newIndex; continue; } // Handle :: (cast) operator specially if (operator === "::") { const typeValue = FunctionExpressionParser_1.FunctionExpressionParser.parseTypeValue(lexemes, idx); result = new ValueComponent_1.CastExpression(result, typeValue.value); idx = typeValue.newIndex; continue; } // For left-associative operators, use precedence + 1 const nextMinPrecedence = precedence + 1; // Parse the right-hand side with higher precedence const rightResult = this.parseExpressionWithPrecedence(lexemes, idx, nextMinPrecedence, allowAndOperator, allowOrOperator); idx = rightResult.newIndex; // Create binary expression with operator comments preserved const binaryExpr = new ValueComponent_1.BinaryExpression(result, operator, rightResult.value); // Transfer operator token comments to the operator RawString if (operatorToken.comments && operatorToken.comments.length > 0) { binaryExpr.operator.comments = operatorToken.comments; } if (operatorToken.positionedComments && operatorToken.positionedComments.length > 0) { binaryExpr.operator.positionedComments = operatorToken.positionedComments; } result = binaryExpr; } return { value: result, newIndex: idx }; } /** * Transfer positioned comments from lexeme to value component if the component doesn't already handle them */ static transferPositionedComments(lexeme, value) { if (lexeme.positionedComments && lexeme.positionedComments.length > 0) { const beforeComments = lexeme.positionedComments.filter(comment => comment.position === 'before'); const afterComments = lexeme.positionedComments.filter(comment => comment.position === 'after'); if (beforeComments.length > 0) { const clonedBefore = beforeComments.map(comment => ({ position: comment.position, comments: [...comment.comments], })); value.positionedComments = value.positionedComments ? [...clonedBefore, ...value.positionedComments] : clonedBefore; } if (afterComments.length > 0) { const clonedAfter = afterComments.map(comment => ({ position: comment.position, comments: [...comment.comments], })); value.positionedComments = value.positionedComments ? [...value.positionedComments, ...clonedAfter] : clonedAfter; } // Preserve other comment positions when no before/after segments were processed. if (!beforeComments.length && !afterComments.length && !value.positionedComments) { value.positionedComments = lexeme.positionedComments.map(comment => ({ position: comment.position, comments: [...comment.comments], })); } return; } // Fall back to legacy comments if positioned comments aren't available else if (value.comments === null && lexeme.comments && lexeme.comments.length > 0) { value.comments = lexeme.comments; } } static parseItem(lexemes, index) { let idx = index; // Range check if (idx >= lexemes.length) { throw new Error(`Unexpected end of lexemes at index ${index}`); } const current = lexemes[idx]; if (current.type & Lexeme_1.TokenType.Identifier && current.type & Lexeme_1.TokenType.Operator && current.type & Lexeme_1.TokenType.Type) { // Check if this is followed by parentheses (function call) if (idx + 1 < lexemes.length && (lexemes[idx + 1].type & Lexeme_1.TokenType.OpenParen)) { // Determine if this is a type constructor or function call if (this.isTypeConstructor(lexemes, idx + 1, current.value)) { // Type constructor const result = FunctionExpressionParser_1.FunctionExpressionParser.parseTypeValue(lexemes, idx); this.transferPositionedComments(current, result.value); return { value: result.value, newIndex: result.newIndex }; } else { // Function call const result = FunctionExpressionParser_1.FunctionExpressionParser.parseFromLexeme(lexemes, idx); this.transferPositionedComments(current, result.value); return result; } } // Typed literal format pattern // e.g., `interval '2 days'` const first = IdentifierParser_1.IdentifierParser.parseFromLexeme(lexemes, idx); if (first.newIndex >= lexemes.length) { this.transferPositionedComments(current, first.value); return first; } const next = lexemes[first.newIndex]; if (next.type & Lexeme_1.TokenType.Literal) { // Typed literal format const literalIndex = first.newIndex; const literalLexeme = lexemes[literalIndex]; const second = LiteralParser_1.LiteralParser.parseFromLexeme(lexemes, literalIndex); // Preserve comments that belong to the literal part of typed literal expressions. this.transferPositionedComments(literalLexeme, second.value); const result = new ValueComponent_1.UnaryExpression(lexemes[idx].value, second.value); this.transferPositionedComments(current, result); return { value: result, newIndex: second.newIndex }; } this.transferPositionedComments(current, first.value); return first; } else if (current.type & Lexeme_1.TokenType.Identifier) { const { namespaces, name, newIndex } = FullNameParser_1.FullNameParser.parseFromLexeme(lexemes, idx); // Namespace is also recognized as Identifier. // Since functions and types, as well as columns (tables), can have namespaces, // it is necessary to determine by the last element of the identifier. if (lexemes[newIndex - 1].type & Lexeme_1.TokenType.Function) { const result = FunctionExpressionParser_1.FunctionExpressionParser.parseFromLexeme(lexemes, idx); this.transferPositionedComments(current, result.value); return result; } else if (lexemes[newIndex - 1].type & Lexeme_1.TokenType.Type) { // Handle Type tokens that also have Identifier flag if (newIndex < lexemes.length && (lexemes[newIndex].type & Lexeme_1.TokenType.OpenParen)) { // Determine if this is a type constructor or function call if (this.isTypeConstructor(lexemes, newIndex, name.name)) { // Type constructor (NUMERIC(10,2), VARCHAR(50), etc.) const result = FunctionExpressionParser_1.FunctionExpressionParser.parseTypeValue(lexemes, idx); this.transferPositionedComments(current, result.value); return { value: result.value, newIndex: result.newIndex }; } else { // Function call (DATE('2025-01-01'), etc.) const result = FunctionExpressionParser_1.FunctionExpressionParser.parseFromLexeme(lexemes, idx); this.transferPositionedComments(current, result.value); return result; } } else { // Handle standalone type tokens const value = new ValueComponent_1.TypeValue(namespaces, name); this.transferPositionedComments(current, value); return { value, newIndex }; } } const value = new ValueComponent_1.ColumnReference(namespaces, name); this.transferPositionedComments(current, value); return { value, newIndex }; } else if (current.type & Lexeme_1.TokenType.Literal) { const result = LiteralParser_1.LiteralParser.parseFromLexeme(lexemes, idx); this.transferPositionedComments(current, result.value); return result; } else if (current.type & Lexeme_1.TokenType.OpenParen) { const result = ParenExpressionParser_1.ParenExpressionParser.parseFromLexeme(lexemes, idx); this.transferPositionedComments(current, result.value); return result; } else if (current.type & Lexeme_1.TokenType.Function) { const result = FunctionExpressionParser_1.FunctionExpressionParser.parseFromLexeme(lexemes, idx); this.transferPositionedComments(current, result.value); return result; } else if (current.type & Lexeme_1.TokenType.Operator) { const result = UnaryExpressionParser_1.UnaryExpressionParser.parseFromLexeme(lexemes, idx); this.transferPositionedComments(current, result.value); return result; } else if (current.type & Lexeme_1.TokenType.Parameter) { const result = ParameterExpressionParser_1.ParameterExpressionParser.parseFromLexeme(lexemes, idx); this.transferPositionedComments(current, result.value); return result; } else if (current.type & Lexeme_1.TokenType.StringSpecifier) { const result = StringSpecifierExpressionParser_1.StringSpecifierExpressionParser.parseFromLexeme(lexemes, idx); this.transferPositionedComments(current, result.value); return result; } else if (current.type & Lexeme_1.TokenType.Command) { const result = CommandExpressionParser_1.CommandExpressionParser.parseFromLexeme(lexemes, idx); this.transferPositionedComments(current, result.value); return result; } else if (current.type & Lexeme_1.TokenType.OpenBracket) { // SQLServer escape identifier format. e.g. [dbo] or [dbo].[table] const { namespaces, name, newIndex } = FullNameParser_1.FullNameParser.parseFromLexeme(lexemes, idx); const value = new ValueComponent_1.ColumnReference(namespaces, name); this.transferPositionedComments(current, value); return { value, newIndex }; } else if (current.type & Lexeme_1.TokenType.Type) { // Check if this type token is followed by an opening parenthesis const { namespaces, name, newIndex } = FullNameParser_1.FullNameParser.parseFromLexeme(lexemes, idx); if (newIndex < lexemes.length && (lexemes[newIndex].type & Lexeme_1.TokenType.OpenParen)) { // Determine if this is a type constructor or function call if (this.isTypeConstructor(lexemes, newIndex, name.name)) { // Type constructor (NUMERIC(10,2), VARCHAR(50), etc.) const result = FunctionExpressionParser_1.FunctionExpressionParser.parseTypeValue(lexemes, idx); this.transferPositionedComments(current, result.value); return { value: result.value, newIndex: result.newIndex }; } else { // Function call (DATE('2025-01-01'), etc.) const result = FunctionExpressionParser_1.FunctionExpressionParser.parseFromLexeme(lexemes, idx); this.transferPositionedComments(current, result.value); return result; } } else { // Handle standalone type tokens const value = new ValueComponent_1.TypeValue(namespaces, name); this.transferPositionedComments(current, value); return { value, newIndex }; } } throw ParseError_1.ParseError.fromUnparsedLexemes(lexemes, idx, `[ValueParser] Invalid lexeme.`); } static parseArgument(openToken, closeToken, lexemes, index) { let idx = index; const args = []; // Check for opening parenthesis if (idx < lexemes.length && lexemes[idx].type === openToken) { // Capture comments from opening parenthesis const openParenToken = lexemes[idx]; idx++; if (idx < lexemes.length && lexemes[idx].type === closeToken) { // If there are no arguments, return an empty ValueList idx++; return { value: new ValueComponent_1.ValueList([]), newIndex: idx }; } // If the next element is `*`, treat `*` as an Identifier if (idx < lexemes.length && lexemes[idx].value === "*") { const wildcard = new ValueComponent_1.ColumnReference(null, "*"); // Transfer opening paren comments to wildcard if (openParenToken.positionedComments && openParenToken.positionedComments.length > 0) { // Convert "after" positioned comments from opening paren to "before" comments for the argument const beforeComments = openParenToken.positionedComments.filter(pc => pc.position === 'after'); if (beforeComments.length > 0) { wildcard.positionedComments = beforeComments.map(pc => ({ position: 'before', comments: pc.comments })); } } else if (openParenToken.comments && openParenToken.comments.length > 0) { wildcard.comments = openParenToken.comments; } idx++; // The next element must be closeToken if (idx < lexemes.length && lexemes[idx].type === closeToken) { idx++; return { value: wildcard, newIndex: idx }; } else { throw ParseError_1.ParseError.fromUnparsedLexemes(lexemes, idx, `Expected closing parenthesis after wildcard '*'.`); } } // Parse the value inside const result = this.parseFromLexeme(lexemes, idx); idx = result.newIndex; // Transfer opening paren comments to the first argument if (openParenToken.positionedComments && openParenToken.positionedComments.length > 0) { // Convert "after" positioned comments from opening paren to "before" comments for the argument const afterComments = openParenToken.positionedComments.filter(pc => pc.position === 'after'); if (afterComments.length > 0) { const beforeComments = afterComments.map(pc => ({ position: 'before', comments: pc.comments })); // Merge with existing positioned comments if (result.value.positionedComments) { result.value.positionedComments = [...beforeComments, ...result.value.positionedComments]; } else { result.value.positionedComments = beforeComments; } } } else if (openParenToken.comments && openParenToken.comments.length > 0) { // Fall back to legacy comments if (result.value.comments) { result.value.comments = openParenToken.comments.concat(result.value.comments); } else { result.value.comments = openParenToken.comments; } } args.push(result.value); // Continue reading if the next element is a comma while (idx < lexemes.length && (lexemes[idx].type & Lexeme_1.TokenType.Comma)) { idx++; const argResult = this.parseFromLexeme(lexemes, idx); idx = argResult.newIndex; args.push(argResult.value); } // Check for closing parenthesis if (idx < lexemes.length && lexemes[idx].type === closeToken) { idx++; if (args.length === 1) { // Return as is if there is only one argument return { value: args[0], newIndex: idx }; } // Create ValueCollection if there are multiple arguments const value = new ValueComponent_1.ValueList(args); return { value, newIndex: idx }; } else { throw ParseError_1.ParseError.fromUnparsedLexemes(lexemes, idx, `Missing closing parenthesis.`); } } throw ParseError_1.ParseError.fromUnparsedLexemes(lexemes, index, `Expected opening parenthesis.`); } /** * Parse postfix array access operations [index] or [start:end] * @param lexemes Array of lexemes * @param index Current index * @param baseExpression The base expression to apply array access to * @returns Result with potentially modified expression and new index */ static parseArrayAccess(lexemes, index, baseExpression) { let idx = index; let result = baseExpression; // Check for array access syntax [...] while (idx < lexemes.length && (lexemes[idx].type & Lexeme_1.TokenType.OpenBracket)) { // Check if this is SQL Server bracket identifier by looking ahead if (this.isSqlServerBracketIdentifier(lexemes, idx)) { break; // This is SQL Server bracket syntax, not array access } idx++; // consume opening bracket if (idx >= lexemes.length) { throw new Error(`Expected array index or slice after '[' at index ${idx - 1}`); } // Check for empty brackets [] if (lexemes[idx].type & Lexeme_1.TokenType.CloseBracket) { throw new Error(`Empty array access brackets not supported at index ${idx}`); } // First, check if this is a slice by looking for colon pattern let startExpr = null; let isSlice = false; // Parse the first part (could be start of slice or single index) if (lexemes[idx].type & Lexeme_1.TokenType.Operator && lexemes[idx].value === ":") { // Starts with colon [:end] - start is null isSlice = true; idx++; // consume colon } else { // Parse the first expression (but with higher precedence than colon) const colonPrecedence = OperatorPrecedence_1.OperatorPrecedence.getPrecedence(":"); const firstResult = this.parseExpressionWithPrecedence(lexemes, idx, colonPrecedence + 1); startExpr = firstResult.value; idx = firstResult.newIndex; // Check if next token is colon if (idx < lexemes.length && lexemes[idx].type & Lexeme_1.TokenType.Operator && lexemes[idx].value === ":") { isSlice = true; idx++; // consume colon } } if (isSlice) { // This is a slice expression [start:end] let endExpr = null; // Check if there's an end expression or if it's an open slice like [1:] if (idx < lexemes.length && !(lexemes[idx].type & Lexeme_1.TokenType.CloseBracket)) { const colonPrecedence = OperatorPrecedence_1.OperatorPrecedence.getPrecedence(":"); const endResult = this.parseExpressionWithPrecedence(lexemes, idx, colonPrecedence + 1); endExpr = endResult.value; idx = endResult.newIndex; } // Expect closing bracket if (idx >= lexemes.length || !(lexemes[idx].type & Lexeme_1.TokenType.CloseBracket)) { throw new Error(`Expected ']' after array slice at index ${idx}`); } idx++; // consume closing bracket // Create ArraySliceExpression result = new ValueComponent_1.ArraySliceExpression(result, startExpr, endExpr); } else { // This is a single index access [index] // Need to parse the full expression if it wasn't already parsed if (!startExpr) { const indexResult = this.parseFromLexeme(lexemes, idx); startExpr = indexResult.value; idx = indexResult.newIndex; } // Expect closing bracket if (idx >= lexemes.length || !(lexemes[idx].type & Lexeme_1.TokenType.CloseBracket)) { throw new Error(`Expected ']' after array index at index ${idx}`); } idx++; // consume closing bracket // Create ArrayIndexExpression result = new ValueComponent_1.ArrayIndexExpression(result, startExpr); } } return { value: result, newIndex: idx }; } /** * Check if the bracket at the given index represents SQL Server bracket identifier syntax * Returns true if this looks like [identifier] or [schema].[table] syntax */ static isSqlServerBracketIdentifier(lexemes, bracketIndex) { let idx = bracketIndex + 1; // Start after opening bracket if (idx >= lexemes.length) return false; // SQL Server bracket identifiers should contain only identifiers and dots while (idx < lexemes.length && !(lexemes[idx].type & Lexeme_1.TokenType.CloseBracket)) { const token = lexemes[idx]; // Allow identifiers and dots in SQL Server bracket syntax if ((token.type & Lexeme_1.TokenType.Identifier) || (token.type & Lexeme_1.TokenType.Operator && token.value === ".")) { idx++; continue; } // If we find anything else (numbers, expressions, colons), it's array access return false; } // If we reached the end without finding a closing bracket, it's malformed if (idx >= lexemes.length) return false; // If the closing bracket is immediately followed by a dot, it's likely SQL Server syntax // like [dbo].[table] const closingBracketIndex = idx; if (closingBracketIndex + 1 < lexemes.length) { const nextToken = lexemes[closingBracketIndex + 1]; if (nextToken.type & Lexeme_1.TokenType.Operator && nextToken.value === ".") { return true; } } // Check if the content looks like a simple identifier (no colons, expressions, etc.) idx = bracketIndex + 1; let hasOnlyIdentifiersAndDots = true; while (idx < closingBracketIndex) { const token = lexemes[idx]; if (!((token.type & Lexeme_1.TokenType.Identifier) || (token.type & Lexeme_1.TokenType.Operator && token.value === "."))) { hasOnlyIdentifiersAndDots = false; break; } idx++; } // If it contains only identifiers and dots, it's likely SQL Server syntax return hasOnlyIdentifiersAndDots; } /** * Determines if a type token followed by parentheses is a type constructor or function call * @param lexemes Array of lexemes * @param openParenIndex Index of the opening parenthesis * @param typeName Name of the type/function * @returns True if this is a type constructor, false if it's a function call */ static isTypeConstructor(lexemes, openParenIndex, typeName) { // These are always type constructors regardless of content const alwaysTypeConstructors = [ 'NUMERIC', 'DECIMAL', 'VARCHAR', 'CHAR', 'CHARACTER', 'TIMESTAMP', 'TIME', 'INTERVAL' ]; const upperTypeName = typeName.toUpperCase(); if (alwaysTypeConstructors.includes(upperTypeName)) { return true; } // For DATE, check if the first argument is a string literal (function) or not (type) if (upperTypeName === 'DATE') { const firstArgIndex = openParenIndex + 1; if (firstArgIndex < lexemes.length) { const firstArg = lexemes[firstArgIndex]; const isStringLiteral = (firstArg.type & Lexeme_1.TokenType.Literal) && typeof firstArg.value === 'string' && isNaN(Number(firstArg.value)); // If first argument is a string literal, it's a function call // DATE('2025-01-01') -> function // DATE(6) -> type constructor return !isStringLiteral; } } // Default: assume it's a function call for ambiguous cases return false; } } exports.ValueParser = ValueParser; //# sourceMappingURL=ValueParser.js.map