rawsql-ts
Version:
[beta]High-performance SQL parser and AST analyzer written in TypeScript. Provides fast parsing and advanced transformation capabilities.
483 lines • 26.1 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.MergeQueryParser = void 0;
const MergeQuery_1 = require("../models/MergeQuery");
const Clause_1 = require("../models/Clause");
const ValueComponent_1 = require("../models/ValueComponent");
const Lexeme_1 = require("../models/Lexeme");
const SqlTokenizer_1 = require("./SqlTokenizer");
const WithClauseParser_1 = require("./WithClauseParser");
const SourceExpressionParser_1 = require("./SourceExpressionParser");
const ValueParser_1 = require("./ValueParser");
const WhereClauseParser_1 = require("./WhereClauseParser");
const FullNameParser_1 = require("./FullNameParser");
const LexemeCommentUtils_1 = require("./utils/LexemeCommentUtils");
class MergeQueryParser {
/**
* Parse SQL string to MergeQuery AST.
* @param query SQL string
*/
static parse(query) {
const tokenizer = new SqlTokenizer_1.SqlTokenizer(query);
const lexemes = tokenizer.readLexemes();
const result = this.parseFromLexeme(lexemes, 0);
if (result.newIndex < lexemes.length) {
throw new Error(`Syntax error: Unexpected token "${lexemes[result.newIndex].value}" at position ${result.newIndex}. The MERGE statement is complete but there are additional tokens.`);
}
return result.value;
}
/**
* Parse from lexeme array (for internal use and tests).
*/
static parseFromLexeme(lexemes, index) {
var _a, _b, _c, _d, _e, _f, _g, _h, _j;
let idx = index;
// Parse optional WITH clause wrapping the MERGE statement for CTE support.
let withClause = null;
if (((_a = lexemes[idx]) === null || _a === void 0 ? void 0 : _a.value) === "with") {
const withResult = WithClauseParser_1.WithClauseParser.parseFromLexeme(lexemes, idx);
withClause = withResult.value;
idx = withResult.newIndex;
}
// Ensure the statement begins with MERGE INTO.
const mergeKeywordLexeme = lexemes[idx];
const mergeKeywordComments = (0, LexemeCommentUtils_1.extractLexemeComments)(mergeKeywordLexeme);
if ((mergeKeywordLexeme === null || mergeKeywordLexeme === void 0 ? void 0 : mergeKeywordLexeme.value) !== "merge into") {
const actual = (_c = (_b = lexemes[idx]) === null || _b === void 0 ? void 0 : _b.value) !== null && _c !== void 0 ? _c : "end of input";
throw new Error(`[MergeQueryParser] Syntax error at position ${idx}: expected 'MERGE INTO' but found '${actual}'.`);
}
idx++;
// Parse target source expression (table or alias assignment).
const targetResult = SourceExpressionParser_1.SourceExpressionParser.parseFromLexeme(lexemes, idx);
const target = targetResult.value;
idx = targetResult.newIndex;
// Attach inline comments following MERGE INTO to the target source.
this.addUniquePositionedComments(target.datasource, "before", mergeKeywordComments.after);
// Consume USING clause introducing the source relation.
if (((_d = lexemes[idx]) === null || _d === void 0 ? void 0 : _d.value) !== "using") {
const actual = (_f = (_e = lexemes[idx]) === null || _e === void 0 ? void 0 : _e.value) !== null && _f !== void 0 ? _f : "end of input";
throw new Error(`[MergeQueryParser] Syntax error at position ${idx}: expected 'USING' but found '${actual}'.`);
}
idx++;
// Parse source expression providing the dataset to merge with target.
const sourceResult = SourceExpressionParser_1.SourceExpressionParser.parseFromLexeme(lexemes, idx);
const source = sourceResult.value;
idx = sourceResult.newIndex;
// Require ON clause defining the match predicate.
if (((_g = lexemes[idx]) === null || _g === void 0 ? void 0 : _g.value) !== "on") {
const actual = (_j = (_h = lexemes[idx]) === null || _h === void 0 ? void 0 : _h.value) !== null && _j !== void 0 ? _j : "end of input";
throw new Error(`[MergeQueryParser] Syntax error at position ${idx}: expected 'ON' but found '${actual}'.`);
}
idx++;
// Parse ON condition allowing any valid boolean expression.
const onConditionResult = ValueParser_1.ValueParser.parseFromLexeme(lexemes, idx);
const onCondition = onConditionResult.value;
idx = onConditionResult.newIndex;
// Accumulate WHEN clauses that describe matched and unmatched behaviors.
const whenResult = this.parseWhenClauses(lexemes, idx);
if (whenResult.clauses.length === 0) {
throw new Error("[MergeQueryParser] MERGE statement must contain at least one WHEN clause.");
}
const mergeQuery = new MergeQuery_1.MergeQuery({
withClause,
target,
source,
onCondition,
whenClauses: whenResult.clauses
});
// Preserve leading comments that precede the MERGE keyword.
this.addUniquePositionedComments(mergeQuery, "before", mergeKeywordComments.before);
return {
value: mergeQuery,
newIndex: whenResult.newIndex
};
}
static parseWhenClauses(lexemes, index) {
var _a, _b;
const clauses = [];
let idx = index;
// Iterate until no further WHEN keyword is found.
while (this.getLowerValue(lexemes[idx]) === "when") {
idx++;
// Determine the match type (matched, not matched, not matched by ...)
const { matchType, newIndex: matchIndex } = this.parseMatchType(lexemes, idx);
idx = matchIndex;
// Parse optional AND condition that narrows the clause applicability.
let additionalCondition = null;
if (this.getLowerValue(lexemes[idx]) === "and") {
idx++;
const conditionResult = ValueParser_1.ValueParser.parseFromLexeme(lexemes, idx);
additionalCondition = conditionResult.value;
idx = conditionResult.newIndex;
}
// Expect THEN before capturing the action body.
const thenLexeme = lexemes[idx];
if (this.getLowerValue(thenLexeme) !== "then") {
const actual = (_a = thenLexeme === null || thenLexeme === void 0 ? void 0 : thenLexeme.value) !== null && _a !== void 0 ? _a : "end of input";
throw new Error(`[MergeQueryParser] Syntax error at position ${idx}: expected 'THEN' but found '${actual}'.`);
}
const thenComments = (0, LexemeCommentUtils_1.extractLexemeComments)(thenLexeme);
const commentsBeforeThen = [];
const precedingLexeme = lexemes[idx - 1];
if (precedingLexeme) {
const precedingComments = (0, LexemeCommentUtils_1.extractLexemeComments)(precedingLexeme);
this.mergeUnique(commentsBeforeThen, precedingComments.after);
}
this.mergeUnique(commentsBeforeThen, thenComments.before);
idx++;
// Dispatch to clause-specific action parser with comments that follow THEN.
const actionResult = this.parseAction(lexemes, idx, (_b = thenComments.after) !== null && _b !== void 0 ? _b : []);
idx = actionResult.newIndex;
const whenClause = new MergeQuery_1.MergeWhenClause(matchType, actionResult.action, additionalCondition);
whenClause.addThenLeadingComments(commentsBeforeThen);
clauses.push(whenClause);
}
return { clauses, newIndex: idx };
}
static parseMatchType(lexemes, index) {
var _a, _b;
let idx = index;
const value = this.getLowerValue(lexemes[idx]);
// Handle WHEN MATCHED scenario directly.
if (value === "matched") {
idx++;
return { matchType: "matched", newIndex: idx };
}
// Handle the different NOT MATCHED variants (tokenized as atomic commands).
if (value === "not matched") {
idx++;
let matchType = "not_matched";
return { matchType, newIndex: idx };
}
if (value === "not matched by source") {
idx++;
return { matchType: "not_matched_by_source", newIndex: idx };
}
if (value === "not matched by target") {
idx++;
return { matchType: "not_matched_by_target", newIndex: idx };
}
const actual = (_b = (_a = lexemes[idx]) === null || _a === void 0 ? void 0 : _a.value) !== null && _b !== void 0 ? _b : "end of input";
throw new Error(`[MergeQueryParser] Syntax error at position ${idx}: expected 'MATCHED' or 'NOT MATCHED' but found '${actual}'.`);
}
static parseAction(lexemes, index, leadingComments = []) {
var _a;
let idx = index;
const token = lexemes[idx];
if (!token) {
throw new Error("[MergeQueryParser] Unexpected end of input while parsing WHEN clause action.");
}
const tokenValue = token.value.toLowerCase();
const tokenComments = (0, LexemeCommentUtils_1.extractLexemeComments)(token);
const actionLeadingComments = [];
this.mergeUnique(actionLeadingComments, leadingComments);
this.mergeUnique(actionLeadingComments, tokenComments.before);
// Handle UPDATE branches (accepting 'update' or 'update set').
if (tokenValue === "update" || tokenValue === "update set") {
const expectSetKeyword = tokenValue === "update";
idx++;
const pendingSetClauseComments = tokenComments.after;
const setResult = this.parseSetClause(lexemes, idx, expectSetKeyword, pendingSetClauseComments);
idx = setResult.newIndex;
// Allow optional WHERE predicate to further limit updated rows.
let whereClause = null;
if (((_a = lexemes[idx]) === null || _a === void 0 ? void 0 : _a.value) === "where") {
const whereResult = WhereClauseParser_1.WhereClauseParser.parseFromLexeme(lexemes, idx);
whereClause = whereResult.value;
idx = whereResult.newIndex;
}
const action = new MergeQuery_1.MergeUpdateAction(setResult.setClause, whereClause);
this.addUniquePositionedComments(action, "before", actionLeadingComments);
return {
action,
newIndex: idx
};
}
// Handle DELETE (optional WHERE clause mirrors UPDATE behavior).
if (tokenValue === "delete") {
idx++;
let whereClause = null;
if (this.getLowerValue(lexemes[idx]) === "where") {
const whereResult = WhereClauseParser_1.WhereClauseParser.parseFromLexeme(lexemes, idx);
whereClause = whereResult.value;
idx = whereResult.newIndex;
}
const action = new MergeQuery_1.MergeDeleteAction(whereClause);
this.addUniquePositionedComments(action, "before", actionLeadingComments);
this.addUniquePositionedComments(action, "after", tokenComments.after);
return {
action,
newIndex: idx
};
}
// Interpret DO NOTHING keyword sequence.
if (tokenValue === "do nothing") {
idx++;
const action = new MergeQuery_1.MergeDoNothingAction();
this.addUniquePositionedComments(action, "before", actionLeadingComments);
this.addUniquePositionedComments(action, "after", tokenComments.after);
return { action, newIndex: idx };
}
if (tokenValue === "insert default values") {
idx++;
const columnResult = this.parseInsertColumnProjection(lexemes, idx, tokenComments.after);
idx = columnResult.newIndex;
const action = new MergeQuery_1.MergeInsertAction({
columns: columnResult.columns,
defaultValues: true
});
this.addUniquePositionedComments(action, "before", actionLeadingComments);
this.addUniquePositionedComments(action, "after", columnResult.trailingComments);
return {
action,
newIndex: idx
};
}
// Parse INSERT clauses including column projection and VALUES/default values.
if (tokenValue === "insert") {
idx++;
const insertResult = this.parseInsertAction(lexemes, idx, {
pendingCommentsAfterInsert: tokenComments.after
});
this.addUniquePositionedComments(insertResult.action, "before", actionLeadingComments);
return insertResult;
}
const actual = token.value;
throw new Error(`[MergeQueryParser] Unsupported action '${actual}'. Only UPDATE, DELETE, INSERT, and DO NOTHING are supported within MERGE WHEN clauses.`);
}
static parseSetClause(lexemes, index, expectSetKeyword, pendingCommentsAfterUpdate = []) {
var _a, _b, _c, _d, _e, _f;
let idx = index;
// Capture comments that accompany the SET keyword so they can be reapplied later.
let setKeywordComments = (0, LexemeCommentUtils_1.extractLexemeComments)(lexemes[idx]);
if (expectSetKeyword) {
if (this.getLowerValue(lexemes[idx]) !== "set") {
const actual = (_b = (_a = lexemes[idx]) === null || _a === void 0 ? void 0 : _a.value) !== null && _b !== void 0 ? _b : "end of input";
throw new Error(`[MergeQueryParser] Syntax error at position ${idx}: expected 'SET' but found '${actual}'.`);
}
idx++;
}
else if (this.getLowerValue(lexemes[idx]) === "set") {
setKeywordComments = (0, LexemeCommentUtils_1.extractLexemeComments)(lexemes[idx]);
idx++;
}
else {
setKeywordComments = { before: [], after: [] };
}
const items = [];
let pendingBeforeForNext = [];
// Comments trailing UPDATE or SET precede the first assignment.
this.mergeUnique(pendingBeforeForNext, pendingCommentsAfterUpdate);
this.mergeUnique(pendingBeforeForNext, setKeywordComments.after);
// Parse comma-separated column assignments.
while (idx < lexemes.length) {
const currentLexeme = lexemes[idx];
if (!currentLexeme) {
break;
}
// Stop when we encounter tokens that belong to the next clause (e.g., WHERE or WHEN).
if (this.isSetClauseTerminator(currentLexeme)) {
break;
}
if (!(currentLexeme.type & (Lexeme_1.TokenType.Identifier | Lexeme_1.TokenType.Function | Lexeme_1.TokenType.Type | Lexeme_1.TokenType.OpenBracket))) {
break;
}
const columnComments = (0, LexemeCommentUtils_1.extractLexemeComments)(currentLexeme);
const { namespaces, name, newIndex } = FullNameParser_1.FullNameParser.parseFromLexeme(lexemes, idx);
idx = newIndex;
if (((_c = lexemes[idx]) === null || _c === void 0 ? void 0 : _c.type) !== Lexeme_1.TokenType.Operator || lexemes[idx].value !== "=") {
const actual = (_e = (_d = lexemes[idx]) === null || _d === void 0 ? void 0 : _d.value) !== null && _e !== void 0 ? _e : "end of input";
throw new Error(`[MergeQueryParser] Syntax error at position ${idx}: expected '=' in SET clause but found '${actual}'.`);
}
const equalsLexeme = lexemes[idx];
const equalsComments = (0, LexemeCommentUtils_1.extractLexemeComments)(equalsLexeme);
idx++;
const valueResult = ValueParser_1.ValueParser.parseFromLexeme(lexemes, idx);
idx = valueResult.newIndex;
const setItem = new Clause_1.SetClauseItem({ namespaces, column: name }, valueResult.value);
// Move buffered comments to the column identifier before processing the assignment.
const beforeComments = [];
this.mergeUnique(beforeComments, pendingBeforeForNext);
this.mergeUnique(beforeComments, columnComments.before);
this.addUniquePositionedComments(name, "before", beforeComments);
pendingBeforeForNext = [];
// Keep trailing identifier comments attached to the column token.
this.addUniquePositionedComments(name, "after", columnComments.after);
// Preserve comments embedded around the '=' operator.
this.addUniquePositionedComments(setItem, "after", equalsComments.before);
this.addUniquePositionedComments(valueResult.value, "before", equalsComments.after);
items.push(setItem);
if (((_f = lexemes[idx]) === null || _f === void 0 ? void 0 : _f.type) === Lexeme_1.TokenType.Comma) {
const commaLexeme = lexemes[idx];
const commaComments = (0, LexemeCommentUtils_1.extractLexemeComments)(commaLexeme);
idx++;
// Comments before the comma belong to the current assignment.
this.addUniquePositionedComments(setItem, "after", commaComments.before);
// Comments after the comma should precede the following assignment.
pendingBeforeForNext = [];
this.mergeUnique(pendingBeforeForNext, commaComments.after);
continue;
}
break;
}
if (pendingBeforeForNext.length > 0 && items.length > 0) {
this.addUniquePositionedComments(items[items.length - 1], "after", pendingBeforeForNext);
}
if (items.length === 0) {
throw new Error("[MergeQueryParser] SET clause must contain at least one column assignment.");
}
const setClause = new Clause_1.SetClause(items);
// Any comments before SET belong in front of the entire clause.
this.addUniquePositionedComments(setClause, "before", setKeywordComments.before);
return { setClause, newIndex: idx };
}
static parseInsertAction(lexemes, index, options) {
var _a, _b, _c;
let idx = index;
const pendingAfterInsert = (_a = options === null || options === void 0 ? void 0 : options.pendingCommentsAfterInsert) !== null && _a !== void 0 ? _a : [];
// Parse column projection and capture any comments that should precede VALUES.
const columnResult = this.parseInsertColumnProjection(lexemes, idx, pendingAfterInsert);
let columns = columnResult.columns;
idx = columnResult.newIndex;
let pendingBeforeValues = columnResult.trailingComments;
// Parse VALUES (...) payload referencing source columns.
if (this.getLowerValue(lexemes[idx]) === "values") {
const valuesLexeme = lexemes[idx];
const valuesComments = (0, LexemeCommentUtils_1.extractLexemeComments)(valuesLexeme);
idx++;
// Comments carried forward should appear before the tuple list.
const beforeValuesComments = [];
this.mergeUnique(beforeValuesComments, pendingBeforeValues);
this.mergeUnique(beforeValuesComments, valuesComments.before);
const valuesResult = ValueParser_1.ValueParser.parseArgument(Lexeme_1.TokenType.OpenParen, Lexeme_1.TokenType.CloseParen, lexemes, idx);
idx = valuesResult.newIndex;
if (!(valuesResult.value instanceof ValueComponent_1.ValueList)) {
throw new Error("[MergeQueryParser] Unexpected VALUES payload. Expected a parenthesized value list.");
}
const valueList = valuesResult.value;
const closingParenComments = (0, LexemeCommentUtils_1.extractLexemeComments)(lexemes[idx - 1]);
this.addUniquePositionedComments(valueList, "after", closingParenComments.after);
this.addUniquePositionedComments(valueList, "after", valuesComments.after);
const action = new MergeQuery_1.MergeInsertAction({
columns,
values: valueList
});
action.addValuesLeadingComments(beforeValuesComments);
return {
action,
newIndex: idx
};
}
const actual = (_c = (_b = lexemes[idx]) === null || _b === void 0 ? void 0 : _b.value) !== null && _c !== void 0 ? _c : "end of input";
throw new Error(`[MergeQueryParser] Unsupported INSERT payload '${actual}'. Use VALUES (...) or DEFAULT VALUES.`);
}
static parseInsertColumnProjection(lexemes, index, pendingBeforeFirstColumn) {
var _a, _b, _c, _d, _e;
let idx = index;
// Without parentheses there is no column projection, so pass comments forward to VALUES.
if (((_a = lexemes[idx]) === null || _a === void 0 ? void 0 : _a.type) !== Lexeme_1.TokenType.OpenParen) {
return {
columns: null,
newIndex: idx,
trailingComments: [...pendingBeforeFirstColumn]
};
}
const openParenLexeme = lexemes[idx];
const parenComments = (0, LexemeCommentUtils_1.extractLexemeComments)(openParenLexeme);
idx++;
const columns = [];
let pendingBeforeForNext = [];
// Seed the first identifier with comments that trail INSERT or the opening parenthesis.
this.mergeUnique(pendingBeforeForNext, pendingBeforeFirstColumn);
this.mergeUnique(pendingBeforeForNext, parenComments.before);
this.mergeUnique(pendingBeforeForNext, parenComments.after);
while (idx < lexemes.length && (lexemes[idx].type & Lexeme_1.TokenType.Identifier)) {
const columnLexeme = lexemes[idx];
const columnComments = (0, LexemeCommentUtils_1.extractLexemeComments)(columnLexeme);
const column = new ValueComponent_1.IdentifierString(columnLexeme.value);
// Move buffered comments so they precede the current identifier.
const beforeComments = [];
this.mergeUnique(beforeComments, pendingBeforeForNext);
this.mergeUnique(beforeComments, columnComments.before);
this.addUniquePositionedComments(column, "before", beforeComments);
pendingBeforeForNext = [];
// Preserve comments that trail the identifier itself.
this.addUniquePositionedComments(column, "after", columnComments.after);
columns.push(column);
idx++;
if (((_b = lexemes[idx]) === null || _b === void 0 ? void 0 : _b.type) === Lexeme_1.TokenType.Comma) {
const commaLexeme = lexemes[idx];
const commaComments = (0, LexemeCommentUtils_1.extractLexemeComments)(commaLexeme);
idx++;
// Attach comma-leading comments to the current column.
this.addUniquePositionedComments(column, "after", commaComments.before);
// Comments after the comma prepare the next identifier.
pendingBeforeForNext = [];
this.mergeUnique(pendingBeforeForNext, commaComments.after);
continue;
}
break;
}
if (pendingBeforeForNext.length > 0 && columns.length > 0) {
this.addUniquePositionedComments(columns[columns.length - 1], "after", pendingBeforeForNext);
pendingBeforeForNext = [];
}
if (((_c = lexemes[idx]) === null || _c === void 0 ? void 0 : _c.type) !== Lexeme_1.TokenType.CloseParen) {
const actual = (_e = (_d = lexemes[idx]) === null || _d === void 0 ? void 0 : _d.value) !== null && _e !== void 0 ? _e : "end of input";
throw new Error(`[MergeQueryParser] Syntax error at position ${idx}: expected ')' after column list but found '${actual}'.`);
}
const closeParenLexeme = lexemes[idx];
const closeParenComments = (0, LexemeCommentUtils_1.extractLexemeComments)(closeParenLexeme);
idx++;
if (closeParenComments.before.length > 0 && columns.length > 0) {
this.addUniquePositionedComments(columns[columns.length - 1], "after", closeParenComments.before);
}
const trailingComments = [];
this.mergeUnique(trailingComments, closeParenComments.after);
this.mergeUnique(trailingComments, pendingBeforeForNext);
return {
columns: columns.length > 0 ? columns : [],
newIndex: idx,
trailingComments
};
}
static isSetClauseTerminator(lexeme) {
if (!lexeme) {
return false;
}
// Normalize to lowercase so we can compare mixed-case keywords safely.
const value = this.getLowerValue(lexeme);
if (!value) {
return false;
}
return value === "where" || value === "from" || value === "returning" || value === "when";
}
static mergeUnique(target, source) {
if (!source || source.length === 0) {
return;
}
for (const comment of source) {
if (!target.includes(comment)) {
target.push(comment);
}
}
}
static addUniquePositionedComments(component, position, comments) {
if (!component || !comments || comments.length === 0) {
return;
}
const existing = component.getPositionedComments(position);
const newOnes = comments.filter(comment => !existing.includes(comment));
if (newOnes.length > 0) {
component.addPositionedComments(position, newOnes);
}
}
static getLowerValue(lexeme) {
if (!lexeme) {
return null;
}
return typeof lexeme.value === "string" ? lexeme.value.toLowerCase() : null;
}
}
exports.MergeQueryParser = MergeQueryParser;
//# sourceMappingURL=MergeQueryParser.js.map