rawsql-ts
Version:
[beta]High-performance SQL parser and AST analyzer written in TypeScript. Provides fast parsing and advanced transformation capabilities.
464 lines • 17.5 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.CTERegionDetector = void 0;
const Lexeme_1 = require("../models/Lexeme");
const LexemeCursor_1 = require("./LexemeCursor");
/**
* Utility class for detecting CTE (Common Table Expression) regions and extracting executable SQL.
*
* Designed for SQL editor features where users want to execute specific CTE parts based on cursor position.
* This enables editors to provide "run current section" functionality that intelligently executes
* either the CTE the cursor is in, or the main query.
*
* @example Basic usage - Analyze cursor position
* ```typescript
* const sql = `
* WITH users_cte AS (
* SELECT id, name FROM users WHERE active = true
* )
* SELECT * FROM users_cte ORDER BY name
* `;
*
* const cursorPosition = 50; // Inside the CTE
* const analysis = CTERegionDetector.analyzeCursorPosition(sql, cursorPosition);
*
* if (analysis.isInCTE) {
* console.log(`Execute CTE: ${analysis.cteRegion.name}`);
* executeSQL(analysis.executableSQL); // Runs just the CTE SELECT
* }
* ```
*
* @example Get all executable sections
* ```typescript
* const positions = CTERegionDetector.getCTEPositions(sql);
* // Returns: [
* // { name: 'users_cte', startPosition: 17, type: 'CTE' },
* // { name: 'MAIN_QUERY', startPosition: 120, type: 'MAIN_QUERY' }
* // ]
* ```
*/
class CTERegionDetector {
/**
* Analyze cursor position and return information about the current context.
*
* This is the main method for SQL editor integration. It determines whether the cursor
* is inside a CTE or the main query, and provides the appropriate executable SQL.
*
* @param sql - The complete SQL string to analyze
* @param cursorPosition - The cursor position (0-based character offset)
* @returns Analysis result containing context information and executable SQL
*
* @example
* ```typescript
* const sql = `WITH users AS (SELECT * FROM table) SELECT * FROM users`;
* const analysis = CTERegionDetector.analyzeCursorPosition(sql, 25);
*
* if (analysis.isInCTE) {
* console.log(`Cursor is in CTE: ${analysis.cteRegion.name}`);
* executeSQL(analysis.executableSQL); // Execute just the CTE
* } else {
* console.log('Cursor is in main query');
* executeSQL(analysis.executableSQL); // Execute the full query
* }
* ```
*/
static analyzeCursorPosition(sql, cursorPosition) {
const cteRegions = this.extractCTERegions(sql);
const extendedRegions = this.calculateExtendedCTEBoundaries(sql, cteRegions);
// Find which CTE region contains the cursor using extended boundaries
const currentCTE = extendedRegions.find(region => cursorPosition >= region.startPosition && cursorPosition < region.extendedEndPosition);
if (currentCTE) {
return {
isInCTE: true,
cteRegion: {
name: currentCTE.name,
startPosition: currentCTE.startPosition,
endPosition: currentCTE.endPosition,
sqlContent: currentCTE.sqlContent
},
executableSQL: currentCTE.sqlContent
};
}
else {
// Cursor is in main query - return full SQL or main SELECT part
const mainSQL = this.extractMainQuery(sql, cteRegions);
return {
isInCTE: false,
cteRegion: null,
executableSQL: mainSQL
};
}
}
/**
* Get the CTE name at the specified cursor position (simplified interface).
*
* This method provides a simple interface for retrieving just the CTE name
* without additional context information.
*
* @param sql - The SQL string to analyze
* @param cursorPosition - The cursor position (0-based character offset)
* @returns The CTE name if cursor is in a CTE, null otherwise
*
* @example
* ```typescript
* const sql = `WITH users AS (SELECT * FROM table) SELECT * FROM users`;
* const cteName = CTERegionDetector.getCursorCte(sql, 25);
* console.log(cteName); // "users"
* ```
*/
static getCursorCte(sql, cursorPosition) {
var _a;
try {
const analysis = this.analyzeCursorPosition(sql, cursorPosition);
return analysis.isInCTE ? ((_a = analysis.cteRegion) === null || _a === void 0 ? void 0 : _a.name) || null : null;
}
catch (error) {
return null;
}
}
/**
* Get the CTE name at the specified 2D coordinates (line, column).
*
* This method provides a convenient interface for editor integrations
* that work with line/column coordinates instead of character positions.
*
* @param sql - The SQL string to analyze
* @param line - The line number (1-based)
* @param column - The column number (1-based)
* @returns The CTE name if cursor is in a CTE, null otherwise
*
* @example
* ```typescript
* const sql = `WITH users AS (\n SELECT * FROM table\n) SELECT * FROM users`;
* const cteName = CTERegionDetector.getCursorCteAt(sql, 2, 5);
* console.log(cteName); // "users"
* ```
*/
static getCursorCteAt(sql, line, column) {
try {
const position = this.lineColumnToPosition(sql, line, column);
if (position === -1) {
return null;
}
return this.getCursorCte(sql, position);
}
catch (error) {
return null;
}
}
/**
* Convert line/column coordinates to character position.
*
* @param text - The text to analyze
* @param line - The line number (1-based)
* @param column - The column number (1-based)
* @returns The character position (0-based), or -1 if invalid coordinates
*/
static lineColumnToPosition(text, line, column) {
if (line < 1 || column < 1) {
return -1;
}
const lines = text.split('\n');
if (line > lines.length) {
return -1;
}
const targetLine = lines[line - 1];
if (column > targetLine.length + 1) { // +1 to allow position at end of line
return -1;
}
// Calculate position by summing lengths of previous lines plus newlines
let position = 0;
for (let i = 0; i < line - 1; i++) {
position += lines[i].length + 1; // +1 for the newline character
}
position += column - 1; // column is 1-based, position is 0-based
return position;
}
/**
* Convert character position to line/column coordinates.
*
* @param text - The text to analyze
* @param position - The character position (0-based)
* @returns Object with line and column (1-based), or null if invalid position
*/
static positionToLineColumn(text, position) {
if (position < 0 || position > text.length) {
return null;
}
const beforePosition = text.substring(0, position);
const lines = beforePosition.split('\n');
return {
line: lines.length,
column: lines[lines.length - 1].length + 1
};
}
/**
* Extract all CTE regions from SQL text with their boundaries and executable content.
*
* Parses the SQL to identify all Common Table Expressions and their locations,
* providing the information needed for syntax highlighting, code folding, and selective execution.
*
* @param sql - The SQL string to analyze
* @returns Array of CTE regions with their boundaries and content
*
* @example
* ```typescript
* const sql = `
* WITH
* users AS (SELECT * FROM people),
* orders AS (SELECT * FROM purchases)
* SELECT * FROM users JOIN orders
* `;
*
* const regions = CTERegionDetector.extractCTERegions(sql);
* // Returns: [
* // { name: 'users', startPosition: 23, endPosition: 45, sqlContent: 'SELECT * FROM people' },
* // { name: 'orders', startPosition: 55, endPosition: 80, sqlContent: 'SELECT * FROM purchases' }
* // ]
* ```
*/
static extractCTERegions(sql) {
const lexemes = LexemeCursor_1.LexemeCursor.getAllLexemesWithPosition(sql);
const cteRegions = [];
let i = 0;
let inWithClause = false;
while (i < lexemes.length) {
const lexeme = lexemes[i];
// Detect WITH clause start
if (lexeme.value.toLowerCase() === 'with' && !inWithClause) {
inWithClause = true;
i++;
continue;
}
// Skip RECURSIVE keyword if present
if (inWithClause && lexeme.value.toLowerCase() === 'recursive') {
i++;
continue;
}
// Detect CTE definition (identifier followed by AS)
if (inWithClause &&
lexeme.type === Lexeme_1.TokenType.Identifier &&
i + 1 < lexemes.length &&
lexemes[i + 1].value.toLowerCase() === 'as') {
const cteName = lexeme.value;
const cteStartPos = lexeme.position.startPosition;
// Find the opening parenthesis after AS
let parenIndex = i + 2;
while (parenIndex < lexemes.length && lexemes[parenIndex].value !== '(') {
parenIndex++;
}
if (parenIndex < lexemes.length) {
// Find matching closing parenthesis
const cteEndInfo = this.findMatchingParen(lexemes, parenIndex);
if (cteEndInfo) {
const cteEndPos = cteEndInfo.endPosition;
const sqlContent = this.extractCTESQL(sql, lexemes, parenIndex, cteEndInfo.index);
cteRegions.push({
name: cteName,
startPosition: cteStartPos,
endPosition: cteEndPos,
sqlContent: sqlContent
});
i = cteEndInfo.index + 1;
continue;
}
}
}
// Check if we've reached the main SELECT (end of WITH clause)
if (inWithClause && lexeme.value.toLowerCase() === 'select') {
// Verify this is not a SELECT inside a CTE by checking context
if (this.isMainQuerySelect(lexemes, i)) {
break;
}
}
i++;
}
return cteRegions;
}
/**
* Find matching closing parenthesis for CTE definition
*/
static findMatchingParen(lexemes, openParenIndex) {
let depth = 1;
let i = openParenIndex + 1;
while (i < lexemes.length && depth > 0) {
if (lexemes[i].value === '(') {
depth++;
}
else if (lexemes[i].value === ')') {
depth--;
}
if (depth === 0) {
return {
index: i,
endPosition: lexemes[i].position.endPosition
};
}
i++;
}
return null;
}
/**
* Extract the SQL content of a CTE (the SELECT statement inside parentheses)
*/
static extractCTESQL(sql, lexemes, openParenIndex, closeParenIndex) {
const startPos = lexemes[openParenIndex + 1].position.startPosition;
const endPos = lexemes[closeParenIndex - 1].position.endPosition;
return sql.substring(startPos, endPos).trim();
}
/**
* Check if a SELECT lexeme is the main query SELECT (not inside a CTE)
*/
static isMainQuerySelect(lexemes, selectIndex) {
// Look backwards to see if we're still in a parenthesized context
let depth = 0;
for (let i = selectIndex - 1; i >= 0; i--) {
if (lexemes[i].value === ')') {
depth++;
}
else if (lexemes[i].value === '(') {
depth--;
}
}
return depth === 0; // We're at top level if depth is 0
}
/**
* Calculate extended CTE boundaries for better cursor position detection.
* Extended boundaries include the space between CTEs and before the main query.
*/
static calculateExtendedCTEBoundaries(sql, cteRegions) {
if (cteRegions.length === 0) {
return [];
}
return cteRegions.map((region, index) => {
let extendedEndPosition;
if (index < cteRegions.length - 1) {
// Not the last CTE - extend to the start of the next CTE
extendedEndPosition = cteRegions[index + 1].startPosition;
}
else {
// Last CTE - extend to the start of the main query
const mainQueryStart = this.findMainQueryStart(sql, region.endPosition);
extendedEndPosition = mainQueryStart;
}
return {
...region,
extendedEndPosition
};
});
}
/**
* Find the start position of the main query after the last CTE
*/
static findMainQueryStart(sql, afterPosition) {
// Look for the main SELECT keyword after the CTE definitions
let pos = afterPosition;
while (pos < sql.length) {
const remaining = sql.substring(pos).toLowerCase().trim();
if (remaining.startsWith('select')) {
// Find the actual position of SELECT in the original text
const selectIndex = sql.toLowerCase().indexOf('select', pos);
return selectIndex !== -1 ? selectIndex : pos;
}
pos++;
}
return sql.length; // If no main query found, extend to end of string
}
/**
* Extract the main query part (non-CTE SQL)
*/
static extractMainQuery(sql, cteRegions) {
if (cteRegions.length === 0) {
return sql.trim();
}
// Find the end of the last CTE
const lastCTE = cteRegions[cteRegions.length - 1];
const mainQueryStart = lastCTE.endPosition;
// Find the main SELECT
let selectPos = mainQueryStart;
while (selectPos < sql.length) {
const remaining = sql.substring(selectPos).toLowerCase().trim();
if (remaining.startsWith('select')) {
break;
}
selectPos++;
}
return sql.substring(selectPos).trim();
}
/**
* Get a list of all executable sections (CTEs and main query) with their start positions.
*
* This method is particularly useful for building editor UI features such as:
* - Dropdown menus for section selection
* - Sidebar navigation for large queries
* - Quick jump functionality
* - "Run section" buttons
*
* @param sql - The SQL string to analyze
* @returns Array of executable sections with their names, positions, and types
*
* @example
* ```typescript
* const sql = `
* WITH monthly_sales AS (SELECT ...),
* yearly_summary AS (SELECT ...)
* SELECT * FROM yearly_summary
* `;
*
* const positions = CTERegionDetector.getCTEPositions(sql);
* // Returns: [
* // { name: 'monthly_sales', startPosition: 17, type: 'CTE' },
* // { name: 'yearly_summary', startPosition: 55, type: 'CTE' },
* // { name: 'MAIN_QUERY', startPosition: 120, type: 'MAIN_QUERY' }
* // ]
*
* // Use for editor UI
* positions.forEach(section => {
* addMenuItem(`${section.type}: ${section.name}`, () => {
* jumpToPosition(section.startPosition);
* });
* });
* ```
*/
static getCTEPositions(sql) {
const cteRegions = this.extractCTERegions(sql);
const results = [];
// Add CTE regions
cteRegions.forEach(region => {
results.push({
name: region.name,
startPosition: region.startPosition,
type: 'CTE'
});
});
// Add main query position
if (cteRegions.length > 0) {
const lastCTE = cteRegions[cteRegions.length - 1];
let mainQueryPos = lastCTE.endPosition;
// Find the SELECT keyword
while (mainQueryPos < sql.length) {
const remaining = sql.substring(mainQueryPos).toLowerCase().trim();
if (remaining.startsWith('select')) {
results.push({
name: 'MAIN_QUERY',
startPosition: mainQueryPos,
type: 'MAIN_QUERY'
});
break;
}
mainQueryPos++;
}
}
else {
// No CTEs, entire SQL is main query
results.push({
name: 'MAIN_QUERY',
startPosition: 0,
type: 'MAIN_QUERY'
});
}
return results;
}
}
exports.CTERegionDetector = CTERegionDetector;
//# sourceMappingURL=CTERegionDetector.js.map