UNPKG

rawsql-ts

Version:

[beta]High-performance SQL parser and AST analyzer written in TypeScript. Provides fast parsing and advanced transformation capabilities.

483 lines 26.1 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.MergeQueryParser = void 0; const MergeQuery_1 = require("../models/MergeQuery"); const Clause_1 = require("../models/Clause"); const ValueComponent_1 = require("../models/ValueComponent"); const Lexeme_1 = require("../models/Lexeme"); const SqlTokenizer_1 = require("./SqlTokenizer"); const WithClauseParser_1 = require("./WithClauseParser"); const SourceExpressionParser_1 = require("./SourceExpressionParser"); const ValueParser_1 = require("./ValueParser"); const WhereClauseParser_1 = require("./WhereClauseParser"); const FullNameParser_1 = require("./FullNameParser"); const LexemeCommentUtils_1 = require("./utils/LexemeCommentUtils"); class MergeQueryParser { /** * Parse SQL string to MergeQuery AST. * @param query SQL string */ static parse(query) { const tokenizer = new SqlTokenizer_1.SqlTokenizer(query); const lexemes = tokenizer.readLexemes(); const result = this.parseFromLexeme(lexemes, 0); if (result.newIndex < lexemes.length) { throw new Error(`Syntax error: Unexpected token "${lexemes[result.newIndex].value}" at position ${result.newIndex}. The MERGE statement is complete but there are additional tokens.`); } return result.value; } /** * Parse from lexeme array (for internal use and tests). */ static parseFromLexeme(lexemes, index) { var _a, _b, _c, _d, _e, _f, _g, _h, _j; let idx = index; // Parse optional WITH clause wrapping the MERGE statement for CTE support. let withClause = null; if (((_a = lexemes[idx]) === null || _a === void 0 ? void 0 : _a.value) === "with") { const withResult = WithClauseParser_1.WithClauseParser.parseFromLexeme(lexemes, idx); withClause = withResult.value; idx = withResult.newIndex; } // Ensure the statement begins with MERGE INTO. const mergeKeywordLexeme = lexemes[idx]; const mergeKeywordComments = (0, LexemeCommentUtils_1.extractLexemeComments)(mergeKeywordLexeme); if ((mergeKeywordLexeme === null || mergeKeywordLexeme === void 0 ? void 0 : mergeKeywordLexeme.value) !== "merge into") { const actual = (_c = (_b = lexemes[idx]) === null || _b === void 0 ? void 0 : _b.value) !== null && _c !== void 0 ? _c : "end of input"; throw new Error(`[MergeQueryParser] Syntax error at position ${idx}: expected 'MERGE INTO' but found '${actual}'.`); } idx++; // Parse target source expression (table or alias assignment). const targetResult = SourceExpressionParser_1.SourceExpressionParser.parseFromLexeme(lexemes, idx); const target = targetResult.value; idx = targetResult.newIndex; // Attach inline comments following MERGE INTO to the target source. this.addUniquePositionedComments(target.datasource, "before", mergeKeywordComments.after); // Consume USING clause introducing the source relation. if (((_d = lexemes[idx]) === null || _d === void 0 ? void 0 : _d.value) !== "using") { const actual = (_f = (_e = lexemes[idx]) === null || _e === void 0 ? void 0 : _e.value) !== null && _f !== void 0 ? _f : "end of input"; throw new Error(`[MergeQueryParser] Syntax error at position ${idx}: expected 'USING' but found '${actual}'.`); } idx++; // Parse source expression providing the dataset to merge with target. const sourceResult = SourceExpressionParser_1.SourceExpressionParser.parseFromLexeme(lexemes, idx); const source = sourceResult.value; idx = sourceResult.newIndex; // Require ON clause defining the match predicate. if (((_g = lexemes[idx]) === null || _g === void 0 ? void 0 : _g.value) !== "on") { const actual = (_j = (_h = lexemes[idx]) === null || _h === void 0 ? void 0 : _h.value) !== null && _j !== void 0 ? _j : "end of input"; throw new Error(`[MergeQueryParser] Syntax error at position ${idx}: expected 'ON' but found '${actual}'.`); } idx++; // Parse ON condition allowing any valid boolean expression. const onConditionResult = ValueParser_1.ValueParser.parseFromLexeme(lexemes, idx); const onCondition = onConditionResult.value; idx = onConditionResult.newIndex; // Accumulate WHEN clauses that describe matched and unmatched behaviors. const whenResult = this.parseWhenClauses(lexemes, idx); if (whenResult.clauses.length === 0) { throw new Error("[MergeQueryParser] MERGE statement must contain at least one WHEN clause."); } const mergeQuery = new MergeQuery_1.MergeQuery({ withClause, target, source, onCondition, whenClauses: whenResult.clauses }); // Preserve leading comments that precede the MERGE keyword. this.addUniquePositionedComments(mergeQuery, "before", mergeKeywordComments.before); return { value: mergeQuery, newIndex: whenResult.newIndex }; } static parseWhenClauses(lexemes, index) { var _a, _b; const clauses = []; let idx = index; // Iterate until no further WHEN keyword is found. while (this.getLowerValue(lexemes[idx]) === "when") { idx++; // Determine the match type (matched, not matched, not matched by ...) const { matchType, newIndex: matchIndex } = this.parseMatchType(lexemes, idx); idx = matchIndex; // Parse optional AND condition that narrows the clause applicability. let additionalCondition = null; if (this.getLowerValue(lexemes[idx]) === "and") { idx++; const conditionResult = ValueParser_1.ValueParser.parseFromLexeme(lexemes, idx); additionalCondition = conditionResult.value; idx = conditionResult.newIndex; } // Expect THEN before capturing the action body. const thenLexeme = lexemes[idx]; if (this.getLowerValue(thenLexeme) !== "then") { const actual = (_a = thenLexeme === null || thenLexeme === void 0 ? void 0 : thenLexeme.value) !== null && _a !== void 0 ? _a : "end of input"; throw new Error(`[MergeQueryParser] Syntax error at position ${idx}: expected 'THEN' but found '${actual}'.`); } const thenComments = (0, LexemeCommentUtils_1.extractLexemeComments)(thenLexeme); const commentsBeforeThen = []; const precedingLexeme = lexemes[idx - 1]; if (precedingLexeme) { const precedingComments = (0, LexemeCommentUtils_1.extractLexemeComments)(precedingLexeme); this.mergeUnique(commentsBeforeThen, precedingComments.after); } this.mergeUnique(commentsBeforeThen, thenComments.before); idx++; // Dispatch to clause-specific action parser with comments that follow THEN. const actionResult = this.parseAction(lexemes, idx, (_b = thenComments.after) !== null && _b !== void 0 ? _b : []); idx = actionResult.newIndex; const whenClause = new MergeQuery_1.MergeWhenClause(matchType, actionResult.action, additionalCondition); whenClause.addThenLeadingComments(commentsBeforeThen); clauses.push(whenClause); } return { clauses, newIndex: idx }; } static parseMatchType(lexemes, index) { var _a, _b; let idx = index; const value = this.getLowerValue(lexemes[idx]); // Handle WHEN MATCHED scenario directly. if (value === "matched") { idx++; return { matchType: "matched", newIndex: idx }; } // Handle the different NOT MATCHED variants (tokenized as atomic commands). if (value === "not matched") { idx++; let matchType = "not_matched"; return { matchType, newIndex: idx }; } if (value === "not matched by source") { idx++; return { matchType: "not_matched_by_source", newIndex: idx }; } if (value === "not matched by target") { idx++; return { matchType: "not_matched_by_target", newIndex: idx }; } const actual = (_b = (_a = lexemes[idx]) === null || _a === void 0 ? void 0 : _a.value) !== null && _b !== void 0 ? _b : "end of input"; throw new Error(`[MergeQueryParser] Syntax error at position ${idx}: expected 'MATCHED' or 'NOT MATCHED' but found '${actual}'.`); } static parseAction(lexemes, index, leadingComments = []) { var _a; let idx = index; const token = lexemes[idx]; if (!token) { throw new Error("[MergeQueryParser] Unexpected end of input while parsing WHEN clause action."); } const tokenValue = token.value.toLowerCase(); const tokenComments = (0, LexemeCommentUtils_1.extractLexemeComments)(token); const actionLeadingComments = []; this.mergeUnique(actionLeadingComments, leadingComments); this.mergeUnique(actionLeadingComments, tokenComments.before); // Handle UPDATE branches (accepting 'update' or 'update set'). if (tokenValue === "update" || tokenValue === "update set") { const expectSetKeyword = tokenValue === "update"; idx++; const pendingSetClauseComments = tokenComments.after; const setResult = this.parseSetClause(lexemes, idx, expectSetKeyword, pendingSetClauseComments); idx = setResult.newIndex; // Allow optional WHERE predicate to further limit updated rows. let whereClause = null; if (((_a = lexemes[idx]) === null || _a === void 0 ? void 0 : _a.value) === "where") { const whereResult = WhereClauseParser_1.WhereClauseParser.parseFromLexeme(lexemes, idx); whereClause = whereResult.value; idx = whereResult.newIndex; } const action = new MergeQuery_1.MergeUpdateAction(setResult.setClause, whereClause); this.addUniquePositionedComments(action, "before", actionLeadingComments); return { action, newIndex: idx }; } // Handle DELETE (optional WHERE clause mirrors UPDATE behavior). if (tokenValue === "delete") { idx++; let whereClause = null; if (this.getLowerValue(lexemes[idx]) === "where") { const whereResult = WhereClauseParser_1.WhereClauseParser.parseFromLexeme(lexemes, idx); whereClause = whereResult.value; idx = whereResult.newIndex; } const action = new MergeQuery_1.MergeDeleteAction(whereClause); this.addUniquePositionedComments(action, "before", actionLeadingComments); this.addUniquePositionedComments(action, "after", tokenComments.after); return { action, newIndex: idx }; } // Interpret DO NOTHING keyword sequence. if (tokenValue === "do nothing") { idx++; const action = new MergeQuery_1.MergeDoNothingAction(); this.addUniquePositionedComments(action, "before", actionLeadingComments); this.addUniquePositionedComments(action, "after", tokenComments.after); return { action, newIndex: idx }; } if (tokenValue === "insert default values") { idx++; const columnResult = this.parseInsertColumnProjection(lexemes, idx, tokenComments.after); idx = columnResult.newIndex; const action = new MergeQuery_1.MergeInsertAction({ columns: columnResult.columns, defaultValues: true }); this.addUniquePositionedComments(action, "before", actionLeadingComments); this.addUniquePositionedComments(action, "after", columnResult.trailingComments); return { action, newIndex: idx }; } // Parse INSERT clauses including column projection and VALUES/default values. if (tokenValue === "insert") { idx++; const insertResult = this.parseInsertAction(lexemes, idx, { pendingCommentsAfterInsert: tokenComments.after }); this.addUniquePositionedComments(insertResult.action, "before", actionLeadingComments); return insertResult; } const actual = token.value; throw new Error(`[MergeQueryParser] Unsupported action '${actual}'. Only UPDATE, DELETE, INSERT, and DO NOTHING are supported within MERGE WHEN clauses.`); } static parseSetClause(lexemes, index, expectSetKeyword, pendingCommentsAfterUpdate = []) { var _a, _b, _c, _d, _e, _f; let idx = index; // Capture comments that accompany the SET keyword so they can be reapplied later. let setKeywordComments = (0, LexemeCommentUtils_1.extractLexemeComments)(lexemes[idx]); if (expectSetKeyword) { if (this.getLowerValue(lexemes[idx]) !== "set") { const actual = (_b = (_a = lexemes[idx]) === null || _a === void 0 ? void 0 : _a.value) !== null && _b !== void 0 ? _b : "end of input"; throw new Error(`[MergeQueryParser] Syntax error at position ${idx}: expected 'SET' but found '${actual}'.`); } idx++; } else if (this.getLowerValue(lexemes[idx]) === "set") { setKeywordComments = (0, LexemeCommentUtils_1.extractLexemeComments)(lexemes[idx]); idx++; } else { setKeywordComments = { before: [], after: [] }; } const items = []; let pendingBeforeForNext = []; // Comments trailing UPDATE or SET precede the first assignment. this.mergeUnique(pendingBeforeForNext, pendingCommentsAfterUpdate); this.mergeUnique(pendingBeforeForNext, setKeywordComments.after); // Parse comma-separated column assignments. while (idx < lexemes.length) { const currentLexeme = lexemes[idx]; if (!currentLexeme) { break; } // Stop when we encounter tokens that belong to the next clause (e.g., WHERE or WHEN). if (this.isSetClauseTerminator(currentLexeme)) { break; } if (!(currentLexeme.type & (Lexeme_1.TokenType.Identifier | Lexeme_1.TokenType.Function | Lexeme_1.TokenType.Type | Lexeme_1.TokenType.OpenBracket))) { break; } const columnComments = (0, LexemeCommentUtils_1.extractLexemeComments)(currentLexeme); const { namespaces, name, newIndex } = FullNameParser_1.FullNameParser.parseFromLexeme(lexemes, idx); idx = newIndex; if (((_c = lexemes[idx]) === null || _c === void 0 ? void 0 : _c.type) !== Lexeme_1.TokenType.Operator || lexemes[idx].value !== "=") { const actual = (_e = (_d = lexemes[idx]) === null || _d === void 0 ? void 0 : _d.value) !== null && _e !== void 0 ? _e : "end of input"; throw new Error(`[MergeQueryParser] Syntax error at position ${idx}: expected '=' in SET clause but found '${actual}'.`); } const equalsLexeme = lexemes[idx]; const equalsComments = (0, LexemeCommentUtils_1.extractLexemeComments)(equalsLexeme); idx++; const valueResult = ValueParser_1.ValueParser.parseFromLexeme(lexemes, idx); idx = valueResult.newIndex; const setItem = new Clause_1.SetClauseItem({ namespaces, column: name }, valueResult.value); // Move buffered comments to the column identifier before processing the assignment. const beforeComments = []; this.mergeUnique(beforeComments, pendingBeforeForNext); this.mergeUnique(beforeComments, columnComments.before); this.addUniquePositionedComments(name, "before", beforeComments); pendingBeforeForNext = []; // Keep trailing identifier comments attached to the column token. this.addUniquePositionedComments(name, "after", columnComments.after); // Preserve comments embedded around the '=' operator. this.addUniquePositionedComments(setItem, "after", equalsComments.before); this.addUniquePositionedComments(valueResult.value, "before", equalsComments.after); items.push(setItem); if (((_f = lexemes[idx]) === null || _f === void 0 ? void 0 : _f.type) === Lexeme_1.TokenType.Comma) { const commaLexeme = lexemes[idx]; const commaComments = (0, LexemeCommentUtils_1.extractLexemeComments)(commaLexeme); idx++; // Comments before the comma belong to the current assignment. this.addUniquePositionedComments(setItem, "after", commaComments.before); // Comments after the comma should precede the following assignment. pendingBeforeForNext = []; this.mergeUnique(pendingBeforeForNext, commaComments.after); continue; } break; } if (pendingBeforeForNext.length > 0 && items.length > 0) { this.addUniquePositionedComments(items[items.length - 1], "after", pendingBeforeForNext); } if (items.length === 0) { throw new Error("[MergeQueryParser] SET clause must contain at least one column assignment."); } const setClause = new Clause_1.SetClause(items); // Any comments before SET belong in front of the entire clause. this.addUniquePositionedComments(setClause, "before", setKeywordComments.before); return { setClause, newIndex: idx }; } static parseInsertAction(lexemes, index, options) { var _a, _b, _c; let idx = index; const pendingAfterInsert = (_a = options === null || options === void 0 ? void 0 : options.pendingCommentsAfterInsert) !== null && _a !== void 0 ? _a : []; // Parse column projection and capture any comments that should precede VALUES. const columnResult = this.parseInsertColumnProjection(lexemes, idx, pendingAfterInsert); let columns = columnResult.columns; idx = columnResult.newIndex; let pendingBeforeValues = columnResult.trailingComments; // Parse VALUES (...) payload referencing source columns. if (this.getLowerValue(lexemes[idx]) === "values") { const valuesLexeme = lexemes[idx]; const valuesComments = (0, LexemeCommentUtils_1.extractLexemeComments)(valuesLexeme); idx++; // Comments carried forward should appear before the tuple list. const beforeValuesComments = []; this.mergeUnique(beforeValuesComments, pendingBeforeValues); this.mergeUnique(beforeValuesComments, valuesComments.before); const valuesResult = ValueParser_1.ValueParser.parseArgument(Lexeme_1.TokenType.OpenParen, Lexeme_1.TokenType.CloseParen, lexemes, idx); idx = valuesResult.newIndex; if (!(valuesResult.value instanceof ValueComponent_1.ValueList)) { throw new Error("[MergeQueryParser] Unexpected VALUES payload. Expected a parenthesized value list."); } const valueList = valuesResult.value; const closingParenComments = (0, LexemeCommentUtils_1.extractLexemeComments)(lexemes[idx - 1]); this.addUniquePositionedComments(valueList, "after", closingParenComments.after); this.addUniquePositionedComments(valueList, "after", valuesComments.after); const action = new MergeQuery_1.MergeInsertAction({ columns, values: valueList }); action.addValuesLeadingComments(beforeValuesComments); return { action, newIndex: idx }; } const actual = (_c = (_b = lexemes[idx]) === null || _b === void 0 ? void 0 : _b.value) !== null && _c !== void 0 ? _c : "end of input"; throw new Error(`[MergeQueryParser] Unsupported INSERT payload '${actual}'. Use VALUES (...) or DEFAULT VALUES.`); } static parseInsertColumnProjection(lexemes, index, pendingBeforeFirstColumn) { var _a, _b, _c, _d, _e; let idx = index; // Without parentheses there is no column projection, so pass comments forward to VALUES. if (((_a = lexemes[idx]) === null || _a === void 0 ? void 0 : _a.type) !== Lexeme_1.TokenType.OpenParen) { return { columns: null, newIndex: idx, trailingComments: [...pendingBeforeFirstColumn] }; } const openParenLexeme = lexemes[idx]; const parenComments = (0, LexemeCommentUtils_1.extractLexemeComments)(openParenLexeme); idx++; const columns = []; let pendingBeforeForNext = []; // Seed the first identifier with comments that trail INSERT or the opening parenthesis. this.mergeUnique(pendingBeforeForNext, pendingBeforeFirstColumn); this.mergeUnique(pendingBeforeForNext, parenComments.before); this.mergeUnique(pendingBeforeForNext, parenComments.after); while (idx < lexemes.length && (lexemes[idx].type & Lexeme_1.TokenType.Identifier)) { const columnLexeme = lexemes[idx]; const columnComments = (0, LexemeCommentUtils_1.extractLexemeComments)(columnLexeme); const column = new ValueComponent_1.IdentifierString(columnLexeme.value); // Move buffered comments so they precede the current identifier. const beforeComments = []; this.mergeUnique(beforeComments, pendingBeforeForNext); this.mergeUnique(beforeComments, columnComments.before); this.addUniquePositionedComments(column, "before", beforeComments); pendingBeforeForNext = []; // Preserve comments that trail the identifier itself. this.addUniquePositionedComments(column, "after", columnComments.after); columns.push(column); idx++; if (((_b = lexemes[idx]) === null || _b === void 0 ? void 0 : _b.type) === Lexeme_1.TokenType.Comma) { const commaLexeme = lexemes[idx]; const commaComments = (0, LexemeCommentUtils_1.extractLexemeComments)(commaLexeme); idx++; // Attach comma-leading comments to the current column. this.addUniquePositionedComments(column, "after", commaComments.before); // Comments after the comma prepare the next identifier. pendingBeforeForNext = []; this.mergeUnique(pendingBeforeForNext, commaComments.after); continue; } break; } if (pendingBeforeForNext.length > 0 && columns.length > 0) { this.addUniquePositionedComments(columns[columns.length - 1], "after", pendingBeforeForNext); pendingBeforeForNext = []; } if (((_c = lexemes[idx]) === null || _c === void 0 ? void 0 : _c.type) !== Lexeme_1.TokenType.CloseParen) { const actual = (_e = (_d = lexemes[idx]) === null || _d === void 0 ? void 0 : _d.value) !== null && _e !== void 0 ? _e : "end of input"; throw new Error(`[MergeQueryParser] Syntax error at position ${idx}: expected ')' after column list but found '${actual}'.`); } const closeParenLexeme = lexemes[idx]; const closeParenComments = (0, LexemeCommentUtils_1.extractLexemeComments)(closeParenLexeme); idx++; if (closeParenComments.before.length > 0 && columns.length > 0) { this.addUniquePositionedComments(columns[columns.length - 1], "after", closeParenComments.before); } const trailingComments = []; this.mergeUnique(trailingComments, closeParenComments.after); this.mergeUnique(trailingComments, pendingBeforeForNext); return { columns: columns.length > 0 ? columns : [], newIndex: idx, trailingComments }; } static isSetClauseTerminator(lexeme) { if (!lexeme) { return false; } // Normalize to lowercase so we can compare mixed-case keywords safely. const value = this.getLowerValue(lexeme); if (!value) { return false; } return value === "where" || value === "from" || value === "returning" || value === "when"; } static mergeUnique(target, source) { if (!source || source.length === 0) { return; } for (const comment of source) { if (!target.includes(comment)) { target.push(comment); } } } static addUniquePositionedComments(component, position, comments) { if (!component || !comments || comments.length === 0) { return; } const existing = component.getPositionedComments(position); const newOnes = comments.filter(comment => !existing.includes(comment)); if (newOnes.length > 0) { component.addPositionedComments(position, newOnes); } } static getLowerValue(lexeme) { if (!lexeme) { return null; } return typeof lexeme.value === "string" ? lexeme.value.toLowerCase() : null; } } exports.MergeQueryParser = MergeQueryParser; //# sourceMappingURL=MergeQueryParser.js.map