UNPKG

rawsql-ts

Version:

[beta]High-performance SQL parser and AST analyzer written in TypeScript. Provides fast parsing and advanced transformation capabilities.

github.com/mk3008/rawsql-ts/tree/main/packages/core

mk3008/rawsql-ts

428 lines • 22.5 kB

JavaScript

"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.ValueParser = void 0; const Lexeme_1 = require("../models/Lexeme"); const ValueComponent_1 = require("../models/ValueComponent"); const SqlTokenizer_1 = require("./SqlTokenizer"); const IdentifierParser_1 = require("./IdentifierParser"); const LiteralParser_1 = require("./LiteralParser"); const ParenExpressionParser_1 = require("./ParenExpressionParser"); const UnaryExpressionParser_1 = require("./UnaryExpressionParser"); const ParameterExpressionParser_1 = require("./ParameterExpressionParser"); const StringSpecifierExpressionParser_1 = require("./StringSpecifierExpressionParser"); const CommandExpressionParser_1 = require("./CommandExpressionParser"); const FunctionExpressionParser_1 = require("./FunctionExpressionParser"); const FullNameParser_1 = require("./FullNameParser"); const ParseError_1 = require("./ParseError"); const OperatorPrecedence_1 = require("../utils/OperatorPrecedence"); class ValueParser { // Parse SQL string to AST (was: parse) static parse(query) { const tokenizer = new SqlTokenizer_1.SqlTokenizer(query); // Initialize tokenizer const lexemes = tokenizer.readLexmes(); // Get tokens // Parse const result = this.parseFromLexeme(lexemes, 0); // Error if there are remaining tokens if (result.newIndex < lexemes.length) { throw ParseError_1.ParseError.fromUnparsedLexemes(lexemes, result.newIndex, `[ValueParser]`); } return result.value; } /** * Parse from lexeme array with logical operator controls */ static parseFromLexeme(lexemes, index, allowAndOperator = true, allowOrOperator = true) { return this.parseExpressionWithPrecedence(lexemes, index, 0, allowAndOperator, allowOrOperator); } /** * Parse expressions with operator precedence handling * Uses precedence climbing algorithm */ static parseExpressionWithPrecedence(lexemes, index, minPrecedence, allowAndOperator = true, allowOrOperator = true) { let idx = index; // Parse the primary expression (left side) const comment = lexemes[idx].comments; const left = this.parseItem(lexemes, idx); left.value.comments = comment; idx = left.newIndex; let result = left.value; // Handle postfix array access ([...]) const arrayAccessResult = this.parseArrayAccess(lexemes, idx, result); result = arrayAccessResult.value; idx = arrayAccessResult.newIndex; // Process operators with precedence while (idx < lexemes.length && (lexemes[idx].type & Lexeme_1.TokenType.Operator)) { const operatorToken = lexemes[idx]; const operator = operatorToken.value; // Check if this operator is allowed if (!allowAndOperator && operator.toLowerCase() === "and") { break; } if (!allowOrOperator && operator.toLowerCase() === "or") { break; } // Get operator precedence const precedence = OperatorPrecedence_1.OperatorPrecedence.getPrecedence(operator); // If this operator has lower precedence than minimum, stop if (precedence < minPrecedence) { break; } idx++; // consume operator // Handle BETWEEN specially as it has different syntax if (OperatorPrecedence_1.OperatorPrecedence.isBetweenOperator(operator)) { const betweenResult = FunctionExpressionParser_1.FunctionExpressionParser.parseBetweenExpression(lexemes, idx, result, operator.toLowerCase().includes('not')); result = betweenResult.value; idx = betweenResult.newIndex; continue; } // Handle :: (cast) operator specially if (operator === "::") { const typeValue = FunctionExpressionParser_1.FunctionExpressionParser.parseTypeValue(lexemes, idx); result = new ValueComponent_1.CastExpression(result, typeValue.value); idx = typeValue.newIndex; continue; } // For left-associative operators, use precedence + 1 const nextMinPrecedence = precedence + 1; // Parse the right-hand side with higher precedence const rightResult = this.parseExpressionWithPrecedence(lexemes, idx, nextMinPrecedence, allowAndOperator, allowOrOperator); idx = rightResult.newIndex; // Create binary expression directly result = new ValueComponent_1.BinaryExpression(result, operator, rightResult.value); } return { value: result, newIndex: idx }; } static parseItem(lexemes, index) { let idx = index; // Range check if (idx >= lexemes.length) { throw new Error(`Unexpected end of lexemes at index ${index}`); } const current = lexemes[idx]; if (current.type & Lexeme_1.TokenType.Identifier && current.type & Lexeme_1.TokenType.Operator && current.type & Lexeme_1.TokenType.Type) { // Check if this is followed by parentheses (function call) if (idx + 1 < lexemes.length && (lexemes[idx + 1].type & Lexeme_1.TokenType.OpenParen)) { // Determine if this is a type constructor or function call if (this.isTypeConstructor(lexemes, idx + 1, current.value)) { // Type constructor const typeValue = FunctionExpressionParser_1.FunctionExpressionParser.parseTypeValue(lexemes, idx); return { value: typeValue.value, newIndex: typeValue.newIndex }; } else { // Function call return FunctionExpressionParser_1.FunctionExpressionParser.parseFromLexeme(lexemes, idx); } } // Typed literal format pattern // e.g., `interval '2 days'` const first = IdentifierParser_1.IdentifierParser.parseFromLexeme(lexemes, idx); if (first.newIndex >= lexemes.length) { return first; } const next = lexemes[first.newIndex]; if (next.type & Lexeme_1.TokenType.Literal) { // Typed literal format const second = LiteralParser_1.LiteralParser.parseFromLexeme(lexemes, first.newIndex); const result = new ValueComponent_1.UnaryExpression(lexemes[idx].value, second.value); return { value: result, newIndex: second.newIndex }; } return first; } else if (current.type & Lexeme_1.TokenType.Identifier) { const { namespaces, name, newIndex } = FullNameParser_1.FullNameParser.parseFromLexeme(lexemes, idx); // Namespace is also recognized as Identifier. // Since functions and types, as well as columns (tables), can have namespaces, // it is necessary to determine by the last element of the identifier. if (lexemes[newIndex - 1].type & Lexeme_1.TokenType.Function) { return FunctionExpressionParser_1.FunctionExpressionParser.parseFromLexeme(lexemes, idx); } else if (lexemes[newIndex - 1].type & Lexeme_1.TokenType.Type) { // Handle Type tokens that also have Identifier flag if (newIndex < lexemes.length && (lexemes[newIndex].type & Lexeme_1.TokenType.OpenParen)) { // Determine if this is a type constructor or function call if (this.isTypeConstructor(lexemes, newIndex, name.name)) { // Type constructor (NUMERIC(10,2), VARCHAR(50), etc.) const typeValue = FunctionExpressionParser_1.FunctionExpressionParser.parseTypeValue(lexemes, idx); return { value: typeValue.value, newIndex: typeValue.newIndex }; } else { // Function call (DATE('2025-01-01'), etc.) return FunctionExpressionParser_1.FunctionExpressionParser.parseFromLexeme(lexemes, idx); } } else { // Handle standalone type tokens const value = new ValueComponent_1.TypeValue(namespaces, name); return { value, newIndex }; } } const value = new ValueComponent_1.ColumnReference(namespaces, name); return { value, newIndex }; } else if (current.type & Lexeme_1.TokenType.Literal) { return LiteralParser_1.LiteralParser.parseFromLexeme(lexemes, idx); } else if (current.type & Lexeme_1.TokenType.OpenParen) { return ParenExpressionParser_1.ParenExpressionParser.parseFromLexeme(lexemes, idx); } else if (current.type & Lexeme_1.TokenType.Function) { return FunctionExpressionParser_1.FunctionExpressionParser.parseFromLexeme(lexemes, idx); } else if (current.type & Lexeme_1.TokenType.Operator) { return UnaryExpressionParser_1.UnaryExpressionParser.parseFromLexeme(lexemes, idx); } else if (current.type & Lexeme_1.TokenType.Parameter) { return ParameterExpressionParser_1.ParameterExpressionParser.parseFromLexeme(lexemes, idx); } else if (current.type & Lexeme_1.TokenType.StringSpecifier) { return StringSpecifierExpressionParser_1.StringSpecifierExpressionParser.parseFromLexeme(lexemes, idx); } else if (current.type & Lexeme_1.TokenType.Command) { return CommandExpressionParser_1.CommandExpressionParser.parseFromLexeme(lexemes, idx); } else if (current.type & Lexeme_1.TokenType.OpenBracket) { // SQLServer escape identifier format. e.g. [dbo] or [dbo].[table] const { namespaces, name, newIndex } = FullNameParser_1.FullNameParser.parseFromLexeme(lexemes, idx); const value = new ValueComponent_1.ColumnReference(namespaces, name); return { value, newIndex }; } else if (current.type & Lexeme_1.TokenType.Type) { // Check if this type token is followed by an opening parenthesis const { namespaces, name, newIndex } = FullNameParser_1.FullNameParser.parseFromLexeme(lexemes, idx); if (newIndex < lexemes.length && (lexemes[newIndex].type & Lexeme_1.TokenType.OpenParen)) { // Determine if this is a type constructor or function call if (this.isTypeConstructor(lexemes, newIndex, name.name)) { // Type constructor (NUMERIC(10,2), VARCHAR(50), etc.) const typeValue = FunctionExpressionParser_1.FunctionExpressionParser.parseTypeValue(lexemes, idx); return { value: typeValue.value, newIndex: typeValue.newIndex }; } else { // Function call (DATE('2025-01-01'), etc.) return FunctionExpressionParser_1.FunctionExpressionParser.parseFromLexeme(lexemes, idx); } } else { // Handle standalone type tokens const value = new ValueComponent_1.TypeValue(namespaces, name); return { value, newIndex }; } } throw ParseError_1.ParseError.fromUnparsedLexemes(lexemes, idx, `[ValueParser] Invalid lexeme.`); } static parseArgument(openToken, closeToken, lexemes, index) { let idx = index; const args = []; // Check for opening parenthesis if (idx < lexemes.length && lexemes[idx].type === openToken) { idx++; if (idx < lexemes.length && lexemes[idx].type === closeToken) { // If there are no arguments, return an empty ValueList idx++; return { value: new ValueComponent_1.ValueList([]), newIndex: idx }; } // If the next element is `*`, treat `*` as an Identifier if (idx < lexemes.length && lexemes[idx].value === "*") { const wildcard = new ValueComponent_1.ColumnReference(null, "*"); idx++; // The next element must be closeToken if (idx < lexemes.length && lexemes[idx].type === closeToken) { idx++; return { value: wildcard, newIndex: idx }; } else { throw ParseError_1.ParseError.fromUnparsedLexemes(lexemes, idx, `Expected closing parenthesis after wildcard '*'.`); } } // Parse the value inside const result = this.parseFromLexeme(lexemes, idx); idx = result.newIndex; args.push(result.value); // Continue reading if the next element is a comma while (idx < lexemes.length && (lexemes[idx].type & Lexeme_1.TokenType.Comma)) { idx++; const argResult = this.parseFromLexeme(lexemes, idx); idx = argResult.newIndex; args.push(argResult.value); } // Check for closing parenthesis if (idx < lexemes.length && lexemes[idx].type === closeToken) { idx++; if (args.length === 1) { // Return as is if there is only one argument return { value: args[0], newIndex: idx }; } // Create ValueCollection if there are multiple arguments const value = new ValueComponent_1.ValueList(args); return { value, newIndex: idx }; } else { throw ParseError_1.ParseError.fromUnparsedLexemes(lexemes, idx, `Missing closing parenthesis.`); } } throw ParseError_1.ParseError.fromUnparsedLexemes(lexemes, index, `Expected opening parenthesis.`); } /** * Parse postfix array access operations [index] or [start:end] * @param lexemes Array of lexemes * @param index Current index * @param baseExpression The base expression to apply array access to * @returns Result with potentially modified expression and new index */ static parseArrayAccess(lexemes, index, baseExpression) { let idx = index; let result = baseExpression; // Check for array access syntax [...] while (idx < lexemes.length && (lexemes[idx].type & Lexeme_1.TokenType.OpenBracket)) { // Check if this is SQL Server bracket identifier by looking ahead if (this.isSqlServerBracketIdentifier(lexemes, idx)) { break; // This is SQL Server bracket syntax, not array access } idx++; // consume opening bracket if (idx >= lexemes.length) { throw new Error(`Expected array index or slice after '[' at index ${idx - 1}`); } // Check for empty brackets [] if (lexemes[idx].type & Lexeme_1.TokenType.CloseBracket) { throw new Error(`Empty array access brackets not supported at index ${idx}`); } // First, check if this is a slice by looking for colon pattern let startExpr = null; let isSlice = false; // Parse the first part (could be start of slice or single index) if (lexemes[idx].type & Lexeme_1.TokenType.Operator && lexemes[idx].value === ":") { // Starts with colon [:end] - start is null isSlice = true; idx++; // consume colon } else { // Parse the first expression (but with higher precedence than colon) const colonPrecedence = OperatorPrecedence_1.OperatorPrecedence.getPrecedence(":"); const firstResult = this.parseExpressionWithPrecedence(lexemes, idx, colonPrecedence + 1); startExpr = firstResult.value; idx = firstResult.newIndex; // Check if next token is colon if (idx < lexemes.length && lexemes[idx].type & Lexeme_1.TokenType.Operator && lexemes[idx].value === ":") { isSlice = true; idx++; // consume colon } } if (isSlice) { // This is a slice expression [start:end] let endExpr = null; // Check if there's an end expression or if it's an open slice like [1:] if (idx < lexemes.length && !(lexemes[idx].type & Lexeme_1.TokenType.CloseBracket)) { const colonPrecedence = OperatorPrecedence_1.OperatorPrecedence.getPrecedence(":"); const endResult = this.parseExpressionWithPrecedence(lexemes, idx, colonPrecedence + 1); endExpr = endResult.value; idx = endResult.newIndex; } // Expect closing bracket if (idx >= lexemes.length || !(lexemes[idx].type & Lexeme_1.TokenType.CloseBracket)) { throw new Error(`Expected ']' after array slice at index ${idx}`); } idx++; // consume closing bracket // Create ArraySliceExpression result = new ValueComponent_1.ArraySliceExpression(result, startExpr, endExpr); } else { // This is a single index access [index] // Need to parse the full expression if it wasn't already parsed if (!startExpr) { const indexResult = this.parseFromLexeme(lexemes, idx); startExpr = indexResult.value; idx = indexResult.newIndex; } // Expect closing bracket if (idx >= lexemes.length || !(lexemes[idx].type & Lexeme_1.TokenType.CloseBracket)) { throw new Error(`Expected ']' after array index at index ${idx}`); } idx++; // consume closing bracket // Create ArrayIndexExpression result = new ValueComponent_1.ArrayIndexExpression(result, startExpr); } } return { value: result, newIndex: idx }; } /** * Check if the bracket at the given index represents SQL Server bracket identifier syntax * Returns true if this looks like [identifier] or [schema].[table] syntax */ static isSqlServerBracketIdentifier(lexemes, bracketIndex) { let idx = bracketIndex + 1; // Start after opening bracket if (idx >= lexemes.length) return false; // SQL Server bracket identifiers should contain only identifiers and dots while (idx < lexemes.length && !(lexemes[idx].type & Lexeme_1.TokenType.CloseBracket)) { const token = lexemes[idx]; // Allow identifiers and dots in SQL Server bracket syntax if ((token.type & Lexeme_1.TokenType.Identifier) || (token.type & Lexeme_1.TokenType.Operator && token.value === ".")) { idx++; continue; } // If we find anything else (numbers, expressions, colons), it's array access return false; } // If we reached the end without finding a closing bracket, it's malformed if (idx >= lexemes.length) return false; // If the closing bracket is immediately followed by a dot, it's likely SQL Server syntax // like [dbo].[table] const closingBracketIndex = idx; if (closingBracketIndex + 1 < lexemes.length) { const nextToken = lexemes[closingBracketIndex + 1]; if (nextToken.type & Lexeme_1.TokenType.Operator && nextToken.value === ".") { return true; } } // Check if the content looks like a simple identifier (no colons, expressions, etc.) idx = bracketIndex + 1; let hasOnlyIdentifiersAndDots = true; while (idx < closingBracketIndex) { const token = lexemes[idx]; if (!((token.type & Lexeme_1.TokenType.Identifier) || (token.type & Lexeme_1.TokenType.Operator && token.value === "."))) { hasOnlyIdentifiersAndDots = false; break; } idx++; } // If it contains only identifiers and dots, it's likely SQL Server syntax return hasOnlyIdentifiersAndDots; } /** * Determines if a type token followed by parentheses is a type constructor or function call * @param lexemes Array of lexemes * @param openParenIndex Index of the opening parenthesis * @param typeName Name of the type/function * @returns True if this is a type constructor, false if it's a function call */ static isTypeConstructor(lexemes, openParenIndex, typeName) { // These are always type constructors regardless of content const alwaysTypeConstructors = [ 'NUMERIC', 'DECIMAL', 'VARCHAR', 'CHAR', 'CHARACTER', 'TIMESTAMP', 'TIME', 'INTERVAL' ]; const upperTypeName = typeName.toUpperCase(); if (alwaysTypeConstructors.includes(upperTypeName)) { return true; } // For DATE, check if the first argument is a string literal (function) or not (type) if (upperTypeName === 'DATE') { const firstArgIndex = openParenIndex + 1; if (firstArgIndex < lexemes.length) { const firstArg = lexemes[firstArgIndex]; const isStringLiteral = (firstArg.type & Lexeme_1.TokenType.Literal) && typeof firstArg.value === 'string' && isNaN(Number(firstArg.value)); // If first argument is a string literal, it's a function call // DATE('2025-01-01') -> function // DATE(6) -> type constructor return !isStringLiteral; } } // Default: assume it's a function call for ambiguous cases return false; } } exports.ValueParser = ValueParser; //# sourceMappingURL=ValueParser.js.map