rawsql-ts
Version:
[beta]High-performance SQL parser and AST analyzer written in TypeScript. Provides fast parsing and advanced transformation capabilities.
560 lines • 27.3 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.SelectQueryParser = void 0;
const SelectQuery_1 = require("../models/SelectQuery");
const SelectClauseParser_1 = require("./SelectClauseParser");
const FromClauseParser_1 = require("./FromClauseParser");
const WhereClauseParser_1 = require("./WhereClauseParser");
const GroupByParser_1 = require("./GroupByParser");
const HavingParser_1 = require("./HavingParser");
const OrderByClauseParser_1 = require("./OrderByClauseParser");
const WindowClauseParser_1 = require("./WindowClauseParser");
const LimitClauseParser_1 = require("./LimitClauseParser");
const ForClauseParser_1 = require("./ForClauseParser");
const SqlTokenizer_1 = require("./SqlTokenizer");
const WithClauseParser_1 = require("./WithClauseParser");
const ValuesQueryParser_1 = require("./ValuesQueryParser");
const FetchClauseParser_1 = require("./FetchClauseParser");
const OffsetClauseParser_1 = require("./OffsetClauseParser");
const CTERegionDetector_1 = require("../utils/CTERegionDetector");
class SelectQueryParser {
// Parse SQL string to AST (was: parse)
static parse(query) {
const tokenizer = new SqlTokenizer_1.SqlTokenizer(query);
const lexemes = tokenizer.readLexmes();
// Parse
const result = this.parseFromLexeme(lexemes, 0);
// Error if there are remaining tokens
if (result.newIndex < lexemes.length) {
throw new Error(`[SelectQueryParser] Syntax error: Unexpected token "${lexemes[result.newIndex].value}" at position ${result.newIndex}. The SELECT query is complete but there are additional tokens.`);
}
return result.value;
}
/**
* Analyzes SQL string for parsing without throwing errors.
* Returns a result object containing the parsed query on success,
* or error information if parsing fails.
*
* @param query SQL string to analyze
* @returns Analysis result containing query, error information, and success status
*/
/**
* Calculate character position from token index by finding token in original query
*/
static calculateCharacterPosition(query, lexemes, tokenIndex) {
var _a;
if (tokenIndex >= lexemes.length) {
return query.length;
}
// If lexeme has position information, use it
const lexeme = lexemes[tokenIndex];
if (((_a = lexeme.position) === null || _a === void 0 ? void 0 : _a.startPosition) !== undefined) {
return lexeme.position.startPosition;
}
// Fallback: search for token in original query
// Build search pattern from tokens up to the target
let searchStart = 0;
for (let i = 0; i < tokenIndex; i++) {
const tokenValue = lexemes[i].value;
const tokenPos = query.indexOf(tokenValue, searchStart);
if (tokenPos !== -1) {
searchStart = tokenPos + tokenValue.length;
}
}
const targetToken = lexemes[tokenIndex].value;
const tokenPos = query.indexOf(targetToken, searchStart);
return tokenPos !== -1 ? tokenPos : searchStart;
}
static analyze(query) {
let lexemes = [];
try {
const tokenizer = new SqlTokenizer_1.SqlTokenizer(query);
lexemes = tokenizer.readLexmes();
// Parse
const result = this.parseFromLexeme(lexemes, 0);
// Check for remaining tokens
if (result.newIndex < lexemes.length) {
const remainingTokens = lexemes.slice(result.newIndex).map(lex => lex.value);
const errorLexeme = lexemes[result.newIndex];
const errorPosition = this.calculateCharacterPosition(query, lexemes, result.newIndex);
return {
success: false,
query: result.value,
error: `Syntax error: Unexpected token "${errorLexeme.value}" at character position ${errorPosition}. The SELECT query is complete but there are additional tokens.`,
errorPosition: errorPosition,
remainingTokens: remainingTokens
};
}
return {
success: true,
query: result.value
};
}
catch (error) {
// Extract position information from error message if available
let errorPosition;
const errorMessage = error instanceof Error ? error.message : String(error);
// Try to extract token index from error message and convert to character position
const positionMatch = errorMessage.match(/position (\d+)/);
if (positionMatch) {
const tokenIndex = parseInt(positionMatch[1], 10);
errorPosition = this.calculateCharacterPosition(query, lexemes, tokenIndex);
}
return {
success: false,
error: errorMessage,
errorPosition: errorPosition
};
}
}
/**
* Asynchronously parse SQL string to AST.
* This method wraps the synchronous parse logic in a Promise for future extensibility.
* @param query SQL string to parse
* @returns Promise<SelectQuery>
*/
static async parseAsync(query) {
// For now, just wrap the sync parse in a resolved Promise
return Promise.resolve(this.parse(query));
}
/**
* Transfer headerComments from source query to target query and clear from source
* @param source Source query to transfer headerComments from
* @param target Target query to receive headerComments
*/
static transferHeaderComments(source, target) {
if (source.headerComments) {
target.headerComments = source.headerComments;
// Clear headerComments from the source query to avoid duplication
source.headerComments = null;
}
}
static extractUnionTokenComments(unionLexeme) {
const comments = [];
if (unionLexeme.positionedComments && unionLexeme.positionedComments.length > 0) {
for (const posComment of unionLexeme.positionedComments) {
if (posComment.comments && posComment.comments.length > 0) {
comments.push(...posComment.comments);
}
}
unionLexeme.positionedComments = undefined;
}
if (unionLexeme.comments && unionLexeme.comments.length > 0) {
comments.push(...unionLexeme.comments);
unionLexeme.comments = null;
}
return comments.length > 0 ? comments : null;
}
// Parse from lexeme array (was: parse)
static parseFromLexeme(lexemes, index) {
let idx = index;
if (idx >= lexemes.length) {
throw new Error(`Syntax error: Unexpected end of input at position ${index}.`);
}
// Check if the first token is a SELECT keyword or VALUES
const firstToken = lexemes[idx].value;
if (!this.selectCommandSet.has(firstToken) && firstToken !== 'values') {
throw new Error(`Syntax error at position ${idx}: Expected 'SELECT' or 'VALUES' keyword but found "${lexemes[idx].value}".`);
}
let firstResult = this.selectCommandSet.has(firstToken)
? this.parseSimpleSelectQuery(lexemes, idx)
: this.parseValuesQuery(lexemes, idx);
let query = firstResult.value;
idx = firstResult.newIndex;
// check 'union'
while (idx < lexemes.length && this.unionCommandSet.has(lexemes[idx].value.toLowerCase())) {
const operatorLexeme = lexemes[idx];
const operator = operatorLexeme.value.toLowerCase();
const unionComments = this.extractUnionTokenComments(operatorLexeme);
idx++;
if (idx >= lexemes.length) {
throw new Error(`Syntax error at position ${idx}: Expected a query after '${operator.toUpperCase()}' but found end of input.`);
}
const nextToken = lexemes[idx].value.toLowerCase();
if (this.selectCommandSet.has(nextToken)) {
const result = this.parseSimpleSelectQuery(lexemes, idx);
const binaryQuery = new SelectQuery_1.BinarySelectQuery(query, operator, result.value);
// Transfer headerComments from the first query to the BinarySelectQuery
this.transferHeaderComments(query, binaryQuery);
// Assign UNION comments to right query as headerComments (semantic positioning)
if (unionComments && unionComments.length > 0) {
if (result.value.headerComments) {
// Prepend UNION comments to existing headerComments
result.value.headerComments = [...unionComments, ...result.value.headerComments];
}
else {
result.value.headerComments = unionComments;
}
}
query = binaryQuery;
idx = result.newIndex;
}
else if (nextToken === 'values') {
const result = this.parseValuesQuery(lexemes, idx);
const binaryQuery = new SelectQuery_1.BinarySelectQuery(query, operator, result.value);
// Transfer headerComments from the first query to the BinarySelectQuery
this.transferHeaderComments(query, binaryQuery);
// Assign UNION comments to the right side query as headerComments (semantic positioning)
if (unionComments && unionComments.length > 0) {
if (result.value.headerComments) {
result.value.headerComments = [...unionComments, ...result.value.headerComments];
}
else {
result.value.headerComments = unionComments;
}
}
query = binaryQuery;
idx = result.newIndex;
}
else {
throw new Error(`Syntax error at position ${idx}: Expected 'SELECT' or 'VALUES' after '${operator.toUpperCase()}' but found "${lexemes[idx].value}".`);
}
}
return { value: query, newIndex: idx };
}
static parseSimpleSelectQuery(lexemes, index) {
var _a, _b;
let idx = index;
// 1. Parse optional WITH clause and collect header comments
const { withClauseResult, newIndex: withEndIndex, selectQuery: queryTemplate } = this.parseWithClauseAndComments(lexemes, idx);
idx = withEndIndex;
// 2. Parse all SQL clauses sequentially
const { clauses, newIndex: clausesEndIndex, selectTokenComments } = this.parseAllClauses(lexemes, idx, withClauseResult);
idx = clausesEndIndex;
// Merge SELECT token comments based on presence of WITH clause
if (selectTokenComments && selectTokenComments.length > 0) {
if (withClauseResult) {
const existingBetween = (_a = queryTemplate.betweenClauseComments) !== null && _a !== void 0 ? _a : [];
const merged = [...existingBetween];
for (const comment of selectTokenComments) {
if (!merged.includes(comment)) {
merged.push(comment);
}
}
queryTemplate.betweenClauseComments = merged;
queryTemplate.mainSelectPrefixComments = undefined;
}
else {
const existingHeader = (_b = queryTemplate.headerComments) !== null && _b !== void 0 ? _b : [];
queryTemplate.headerComments = [
...existingHeader,
...selectTokenComments
];
}
}
// 3. Create final query with parsed clauses
const selectQuery = new SelectQuery_1.SimpleSelectQuery({
withClause: withClauseResult ? withClauseResult.value : null,
...clauses
});
// 4. Apply collected comments directly to the query
this.applyCommentsToQuery(selectQuery, queryTemplate, withClauseResult);
return { value: selectQuery, newIndex: idx };
}
// Parse WITH clause and collect header comments
static parseWithClauseAndComments(lexemes, index) {
let idx = index;
let withClauseResult = null;
const queryTemplate = {};
// Collect header comments before WITH or SELECT
queryTemplate.headerComments = this.collectHeaderComments(lexemes, idx);
// Skip to WITH or SELECT token
while (idx < lexemes.length &&
lexemes[idx].value.toLowerCase() !== 'with' &&
lexemes[idx].value.toLowerCase() !== 'select') {
idx++;
}
// Collect 'before' comments from WITH token
if (idx < lexemes.length && lexemes[idx].value.toLowerCase() === 'with') {
this.collectWithTokenHeaderComments(lexemes[idx], queryTemplate);
}
// Parse WITH clause if present
if (idx < lexemes.length && lexemes[idx].value === 'with') {
withClauseResult = WithClauseParser_1.WithClauseParser.parseFromLexeme(lexemes, idx);
idx = withClauseResult.newIndex;
// Collect comments between WITH clause and SELECT
queryTemplate.mainSelectPrefixComments = this.collectMainSelectPrefixComments(lexemes, withClauseResult, idx);
queryTemplate.betweenClauseComments = this.collectBetweenClauseComments(lexemes, withClauseResult, idx);
}
return { withClauseResult, newIndex: idx, selectQuery: queryTemplate };
}
// Parse all SQL clauses (SELECT, FROM, WHERE, etc.)
static parseAllClauses(lexemes, index, withClauseResult) {
let idx = index;
// Find and parse SELECT clause
idx = this.findMainSelectToken(lexemes, idx, withClauseResult);
const selectTokenComments = this.collectSelectTokenComments(lexemes, idx);
const selectClauseResult = SelectClauseParser_1.SelectClauseParser.parseFromLexeme(lexemes, idx);
idx = selectClauseResult.newIndex;
// Parse optional clauses
const fromClauseResult = this.parseOptionalClause(lexemes, idx, 'from', FromClauseParser_1.FromClauseParser);
idx = fromClauseResult.newIndex;
const whereClauseResult = this.parseOptionalClause(lexemes, fromClauseResult.newIndex, 'where', WhereClauseParser_1.WhereClauseParser);
idx = whereClauseResult.newIndex;
const groupByClauseResult = this.parseOptionalClause(lexemes, whereClauseResult.newIndex, 'group by', GroupByParser_1.GroupByClauseParser);
idx = groupByClauseResult.newIndex;
const havingClauseResult = this.parseOptionalClause(lexemes, groupByClauseResult.newIndex, 'having', HavingParser_1.HavingClauseParser);
idx = havingClauseResult.newIndex;
const windowClauseResult = this.parseOptionalClause(lexemes, havingClauseResult.newIndex, 'window', WindowClauseParser_1.WindowClauseParser);
idx = windowClauseResult.newIndex;
const orderByClauseResult = this.parseOptionalClause(lexemes, windowClauseResult.newIndex, 'order by', OrderByClauseParser_1.OrderByClauseParser);
idx = orderByClauseResult.newIndex;
const limitClauseResult = this.parseOptionalClause(lexemes, orderByClauseResult.newIndex, 'limit', LimitClauseParser_1.LimitClauseParser);
idx = limitClauseResult.newIndex;
const offsetClauseResult = this.parseOptionalClause(lexemes, limitClauseResult.newIndex, 'offset', OffsetClauseParser_1.OffsetClauseParser);
idx = offsetClauseResult.newIndex;
const fetchClauseResult = this.parseOptionalClause(lexemes, offsetClauseResult.newIndex, 'fetch', FetchClauseParser_1.FetchClauseParser);
idx = fetchClauseResult.newIndex;
const forClauseResult = this.parseOptionalClause(lexemes, fetchClauseResult.newIndex, 'for', ForClauseParser_1.ForClauseParser);
idx = forClauseResult.newIndex;
const clauses = {
selectClause: selectClauseResult.value,
fromClause: fromClauseResult.value,
whereClause: whereClauseResult.value,
groupByClause: groupByClauseResult.value,
havingClause: havingClauseResult.value,
orderByClause: orderByClauseResult.value,
windowClause: windowClauseResult.value,
limitClause: limitClauseResult.value,
offsetClause: offsetClauseResult.value,
fetchClause: fetchClauseResult.value,
forClause: forClauseResult.value
};
return { clauses, newIndex: idx, selectTokenComments };
}
// Helper to parse optional clauses
static parseOptionalClause(lexemes, index, keyword, parser) {
if (index < lexemes.length && lexemes[index].value.toLowerCase() === keyword) {
return parser.parseFromLexeme(lexemes, index);
}
return { value: null, newIndex: index };
}
// Collect header comments before meaningful tokens
static collectHeaderComments(lexemes, startIndex) {
const headerComments = [];
let idx = startIndex;
while (idx < lexemes.length &&
lexemes[idx].value.toLowerCase() !== 'with' &&
lexemes[idx].value.toLowerCase() !== 'select') {
const token = lexemes[idx];
if (token.positionedComments) {
for (const posComment of token.positionedComments) {
if (posComment.comments) {
headerComments.push(...posComment.comments);
}
}
}
if (token.comments && token.comments.length > 0) {
headerComments.push(...token.comments);
}
idx++;
}
return headerComments;
}
// Collect 'before' positioned comments from WITH token
static collectWithTokenHeaderComments(withToken, queryTemplate) {
if (!withToken.positionedComments)
return;
if (!queryTemplate.headerComments)
queryTemplate.headerComments = [];
const remainingPositioned = [];
for (const posComment of withToken.positionedComments) {
if (posComment.position === 'before' && posComment.comments) {
queryTemplate.headerComments.push(...posComment.comments);
}
else {
remainingPositioned.push(posComment);
}
}
withToken.positionedComments = remainingPositioned.length > 0 ? remainingPositioned : undefined;
}
// Collect comments between WITH clause and main SELECT
static collectMainSelectPrefixComments(lexemes, withClauseResult, currentIndex) {
const mainSelectPrefixComments = [];
// Get trailing comments from WITH clause
if (withClauseResult === null || withClauseResult === void 0 ? void 0 : withClauseResult.value.trailingComments) {
mainSelectPrefixComments.push(...withClauseResult.value.trailingComments);
}
// Find main SELECT token
const mainSelectIdx = this.findMainSelectIndex(lexemes, withClauseResult, currentIndex);
// Scan tokens between WITH end and main SELECT
if (withClauseResult && mainSelectIdx > withClauseResult.newIndex) {
for (let tempIdx = withClauseResult.newIndex; tempIdx < mainSelectIdx; tempIdx++) {
const token = lexemes[tempIdx];
if (token.positionedComments) {
for (const posComment of token.positionedComments) {
if (posComment.comments) {
mainSelectPrefixComments.push(...posComment.comments);
}
}
}
if (token.comments && token.comments.length > 0) {
mainSelectPrefixComments.push(...token.comments);
}
}
}
return mainSelectPrefixComments;
}
// Collect comments between clauses
static collectBetweenClauseComments(lexemes, withClauseResult, currentIndex) {
if (!withClauseResult)
return [];
const betweenClauseComments = [];
const withEndIndex = withClauseResult.newIndex;
const scanStartIndex = Math.max(0, withEndIndex - 1);
for (let i = scanStartIndex; i < currentIndex; i++) {
const token = lexemes[i];
if (token.positionedComments && token.positionedComments.length > 0) {
for (const posComment of token.positionedComments) {
if (posComment.comments) {
betweenClauseComments.push(...posComment.comments);
}
}
token.positionedComments = undefined;
}
if (token.comments && token.comments.length > 0) {
betweenClauseComments.push(...token.comments);
token.comments = null;
}
}
return betweenClauseComments;
}
// Find main SELECT token index
static findMainSelectIndex(lexemes, withClauseResult, fallbackIndex) {
if (withClauseResult) {
for (let i = withClauseResult.newIndex; i < lexemes.length; i++) {
if (lexemes[i].value.toLowerCase() === 'select') {
return i;
}
}
}
return fallbackIndex;
}
// Find and validate main SELECT token
static findMainSelectToken(lexemes, index, withClauseResult) {
const mainSelectIdx = this.findMainSelectIndex(lexemes, withClauseResult, index);
if (mainSelectIdx >= lexemes.length || lexemes[mainSelectIdx].value !== 'select') {
throw new Error(`Syntax error at position ${mainSelectIdx}: Expected 'SELECT' keyword but found "${mainSelectIdx < lexemes.length ? lexemes[mainSelectIdx].value : 'end of input'}". SELECT queries must start with the SELECT keyword.`);
}
return mainSelectIdx;
}
// Collect and clear comments from SELECT token
static collectSelectTokenComments(lexemes, selectIndex) {
const selectToken = lexemes[selectIndex];
const selectComments = [];
if (selectToken.comments && selectToken.comments.length > 0) {
selectComments.push(...selectToken.comments);
selectToken.comments = null;
}
if (selectToken.positionedComments && selectToken.positionedComments.length > 0) {
for (const posComment of selectToken.positionedComments) {
if (posComment.position === 'before' && posComment.comments) {
selectComments.push(...posComment.comments);
}
}
selectToken.positionedComments = undefined;
}
return selectComments;
}
// Apply all collected comments directly to the query
static applyCommentsToQuery(selectQuery, queryTemplate, withClauseResult) {
var _a;
// Apply header comments directly
if (((_a = queryTemplate.headerComments) === null || _a === void 0 ? void 0 : _a.length) > 0) {
selectQuery.headerComments = queryTemplate.headerComments;
}
// Merge helper to avoid duplicate between-clause comments
const mergeBetweenComments = (source) => {
var _a;
if (!source || source.length === 0) {
return;
}
const existing = (_a = selectQuery.comments) !== null && _a !== void 0 ? _a : [];
const merged = [];
for (const comment of source) {
if (!merged.includes(comment)) {
merged.push(comment);
}
}
for (const comment of existing) {
if (!merged.includes(comment)) {
merged.push(comment);
}
}
selectQuery.comments = merged;
};
mergeBetweenComments(queryTemplate.mainSelectPrefixComments);
mergeBetweenComments(queryTemplate.betweenClauseComments);
}
static parseValuesQuery(lexemes, index) {
// Use ValuesQueryParser to parse VALUES clause
const result = ValuesQueryParser_1.ValuesQueryParser.parseFromLexeme(lexemes, index);
// Return the result from ValuesQueryParser directly
return { value: result.value, newIndex: result.newIndex };
}
/**
* Get the CTE name at the specified cursor position.
*
* This method provides a simple interface for retrieving the CTE name
* based on a 1D cursor position in the SQL text.
*
* @deprecated Use CTERegionDetector.getCursorCte() instead for better API consistency
* @param sql - The SQL string to analyze
* @param cursorPosition - The cursor position (0-based character offset)
* @returns The CTE name if cursor is in a CTE, null otherwise
*
* @example
* ```typescript
* const sql = `WITH users AS (SELECT * FROM table) SELECT * FROM users`;
* const cteName = SelectQueryParser.getCursorCte(sql, 25);
* console.log(cteName); // "users"
* ```
*/
static getCursorCte(sql, cursorPosition) {
return CTERegionDetector_1.CTERegionDetector.getCursorCte(sql, cursorPosition);
}
/**
* Get the CTE name at the specified 2D coordinates (line, column).
*
* This method provides a convenient interface for editor integrations
* that work with line/column coordinates instead of character positions.
*
* @deprecated Use CTERegionDetector.getCursorCteAt() instead for better API consistency
* @param sql - The SQL string to analyze
* @param line - The line number (1-based)
* @param column - The column number (1-based)
* @returns The CTE name if cursor is in a CTE, null otherwise
*
* @example
* ```typescript
* const sql = `WITH users AS (\n SELECT * FROM table\n) SELECT * FROM users`;
* const cteName = SelectQueryParser.getCursorCteAt(sql, 2, 5);
* console.log(cteName); // "users"
* ```
*/
static getCursorCteAt(sql, line, column) {
return CTERegionDetector_1.CTERegionDetector.getCursorCteAt(sql, line, column);
}
/**
* Convert character position to line/column coordinates.
*
* @deprecated Use CTERegionDetector.positionToLineColumn() instead for better API consistency
* @param text - The text to analyze
* @param position - The character position (0-based)
* @returns Object with line and column (1-based), or null if invalid position
*/
static positionToLineColumn(text, position) {
return CTERegionDetector_1.CTERegionDetector.positionToLineColumn(text, position);
}
}
exports.SelectQueryParser = SelectQueryParser;
SelectQueryParser.unionCommandSet = new Set([
"union",
"union all",
"intersect",
"intersect all",
"except",
"except all",
]);
SelectQueryParser.selectCommandSet = new Set(["with", "select"]);
//# sourceMappingURL=SelectQueryParser.js.map