rawsql-ts
Version:
[beta]High-performance SQL parser and AST analyzer written in TypeScript. Provides fast parsing and advanced transformation capabilities.
314 lines • 11.5 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.LexemeCursor = void 0;
const Lexeme_1 = require("../models/Lexeme");
const stringUtils_1 = require("./stringUtils");
/**
* Utility class for cursor-to-lexeme mapping in SQL text.
*
* Provides functionality to find lexemes at specific cursor positions for IDE integration.
* Handles SQL parsing with proper comment and whitespace handling for editor features.
*
* @example Basic usage
* ```typescript
* const sql = "SELECT id FROM users WHERE active = true";
* const lexeme = LexemeCursor.findLexemeAtPosition(sql, 7); // position at 'id'
* console.log(lexeme?.value); // 'id'
* ```
*/
class LexemeCursor {
/**
* Find the lexeme at the specified line and column position.
*
* Designed for GUI editor integration where users select alias text.
* Uses 1-based line and column indexing to match editor conventions.
*
* @param sql - The SQL string to analyze
* @param position - Line and column position (1-based)
* @returns The lexeme at the position, or null if not found
*
* @example
* ```typescript
* const sql = "SELECT user_id FROM orders";
* const lexeme = LexemeCursor.findLexemeAtLineColumn(sql, { line: 1, column: 8 });
* console.log(lexeme?.value); // 'user_id'
* ```
*/
static findLexemeAtLineColumn(sql, position) {
const charOffset = this.lineColumnToCharOffset(sql, position);
if (charOffset === -1) {
return null;
}
return this.findLexemeAtPosition(sql, charOffset);
}
/**
* Find the lexeme at the specified cursor position.
*
* Performs intelligent SQL parsing with proper comment and whitespace handling.
* Returns null if cursor is in whitespace or comments.
*
* @param sql - The SQL string to analyze
* @param cursorPosition - The cursor position (0-based character offset)
* @returns The lexeme at the position, or null if not found
*
* @example
* ```typescript
* const sql = "SELECT user_id FROM orders";
* const lexeme = LexemeCursor.findLexemeAtPosition(sql, 7);
* console.log(lexeme?.value); // 'user_id'
* ```
*/
static findLexemeAtPosition(sql, cursorPosition) {
if (cursorPosition < 0 || cursorPosition >= sql.length) {
return null;
}
const lexemes = this.getAllLexemesWithPosition(sql);
for (const lexeme of lexemes) {
if (lexeme.position &&
cursorPosition >= lexeme.position.startPosition &&
cursorPosition < lexeme.position.endPosition) {
return lexeme;
}
}
return null;
}
/**
* Get all lexemes with position information from SQL text.
*
* Tokenizes the entire SQL string with precise position information.
* Useful for syntax highlighting, code analysis, and editor features.
*
* @param sql - The SQL string to tokenize
* @returns Array of lexemes with position information (excludes comments/whitespace)
*
* @example
* ```typescript
* const sql = "SELECT id FROM users";
* const lexemes = LexemeCursor.getAllLexemesWithPosition(sql);
* lexemes.forEach(l => console.log(`${l.value} at ${l.position.startPosition}`));
* ```
*/
static getAllLexemesWithPosition(sql) {
if (!(sql === null || sql === void 0 ? void 0 : sql.trim())) {
return [];
}
try {
const lexemes = [];
let position = 0;
while (position < sql.length) {
position = this.skipWhitespaceAndComments(sql, position);
if (position >= sql.length) {
break;
}
const lexeme = this.parseNextToken(sql, position);
if (lexeme) {
lexemes.push(lexeme);
position = lexeme.position.endPosition;
}
else {
position++; // Skip unknown character
}
}
return lexemes;
}
catch (error) {
return [];
}
}
/**
* Skip whitespace and comments, returning new position
*/
static skipWhitespaceAndComments(sql, position) {
const result = stringUtils_1.StringUtils.readWhiteSpaceAndComment(sql, position);
return result.position;
}
/**
* Parse the next token starting at the given position
*/
static parseNextToken(sql, startPos) {
const char = sql[startPos];
// String literals
if (char === "'" || char === '"') {
return this.parseStringLiteral(sql, startPos);
}
// Operators and special characters
if (/[=<>!+\-*/%().*]/.test(char)) {
return this.parseOperator(sql, startPos);
}
// Comma
if (char === ',') {
return this.createLexeme(Lexeme_1.TokenType.Comma, ',', startPos, startPos + 1);
}
// Word tokens (identifiers, commands, functions)
if (/[a-zA-Z0-9_]/.test(char)) {
return this.parseWordToken(sql, startPos);
}
return null;
}
/**
* Parse string literal tokens
*/
static parseStringLiteral(sql, startPos) {
const quote = sql[startPos];
let position = startPos + 1;
let token = quote;
while (position < sql.length && sql[position] !== quote) {
token += sql[position++];
}
if (position < sql.length) {
token += sql[position++]; // closing quote
}
return this.createLexeme(Lexeme_1.TokenType.Literal, token, startPos, position);
}
/**
* Parse operator tokens
*/
static parseOperator(sql, startPos) {
let token = sql[startPos];
let position = startPos + 1;
// Handle compound operators (<=, >=, !=, etc.)
if (position < sql.length && /[=<>!]/.test(sql[position]) && /[=<>!]/.test(token)) {
token += sql[position++];
}
const tokenType = this.getOperatorTokenType(token);
return this.createLexeme(tokenType, token, startPos, position);
}
/**
* Parse word tokens (identifiers, commands, functions)
*/
static parseWordToken(sql, startPos) {
let position = startPos;
let token = '';
while (position < sql.length && /[a-zA-Z0-9_]/.test(sql[position])) {
token += sql[position++];
}
const tokenType = this.getWordTokenType(token, sql, position);
const value = this.shouldLowercase(tokenType) ? token.toLowerCase() : token;
return this.createLexeme(tokenType, value, startPos, position);
}
/**
* Determine the token type for operators
*/
static getOperatorTokenType(token) {
switch (token) {
case '(': return Lexeme_1.TokenType.OpenParen;
case ')': return Lexeme_1.TokenType.CloseParen;
case '*': return Lexeme_1.TokenType.Identifier; // Treat * as identifier for SELECT *
default: return Lexeme_1.TokenType.Operator;
}
}
/**
* Determine the token type for word tokens
*/
static getWordTokenType(token, sql, position) {
const lowerToken = token.toLowerCase();
// Check if it's a command
if (this.SQL_COMMANDS.has(lowerToken)) {
return Lexeme_1.TokenType.Command;
}
// Check if it's followed by parentheses (function)
const nextNonWhitespacePos = this.skipWhitespaceAndComments(sql, position);
if (nextNonWhitespacePos < sql.length && sql[nextNonWhitespacePos] === '(') {
return Lexeme_1.TokenType.Function;
}
return Lexeme_1.TokenType.Identifier;
}
/**
* Check if token value should be lowercased
*/
static shouldLowercase(tokenType) {
return !!(tokenType & Lexeme_1.TokenType.Command) ||
!!(tokenType & Lexeme_1.TokenType.Operator) ||
!!(tokenType & Lexeme_1.TokenType.Function);
}
/**
* Create a lexeme with position information
*/
static createLexeme(type, value, startPos, endPos) {
return {
type,
value,
comments: null,
position: {
startPosition: startPos,
endPosition: endPos
}
};
}
/**
* Convert line and column position to character offset.
*
* @param sql - The SQL string
* @param position - Line and column position (1-based)
* @returns Character offset (0-based), or -1 if position is out of bounds
*/
static lineColumnToCharOffset(sql, position) {
if (position.line < 1 || position.column < 1) {
return -1;
}
const lines = sql.split('\n');
if (position.line > lines.length) {
return -1; // Line out of bounds
}
const targetLine = lines[position.line - 1];
if (position.column > targetLine.length + 1) {
return -1; // Column out of bounds
}
// Calculate character offset
let offset = 0;
for (let i = 0; i < position.line - 1; i++) {
offset += lines[i].length + 1; // +1 for newline character
}
offset += position.column - 1;
return offset;
}
/**
* Convert character offset to line and column position.
*
* @param sql - The SQL string
* @param charOffset - Character offset (0-based)
* @returns Line and column position (1-based), or null if offset is out of bounds
*/
static charOffsetToLineColumn(sql, charOffset) {
if (charOffset < 0 || charOffset > sql.length) {
return null;
}
const lines = sql.split('\n');
let currentOffset = 0;
for (let lineIndex = 0; lineIndex < lines.length; lineIndex++) {
const lineLength = lines[lineIndex].length;
// Check if the offset is within this line
if (charOffset < currentOffset + lineLength) {
return {
line: lineIndex + 1,
column: charOffset - currentOffset + 1
};
}
// Check if the offset is exactly at the end of this line (newline position)
if (charOffset === currentOffset + lineLength && lineIndex < lines.length - 1) {
// Position at newline - return start of next line
return {
line: lineIndex + 2,
column: 1
};
}
currentOffset += lineLength + 1; // +1 for newline character
}
// Handle position at the very end of the text
if (charOffset === sql.length) {
const lastLine = lines[lines.length - 1];
return {
line: lines.length,
column: lastLine.length + 1
};
}
return null;
}
}
exports.LexemeCursor = LexemeCursor;
LexemeCursor.SQL_COMMANDS = new Set([
'select', 'from', 'where', 'and', 'or', 'order', 'by', 'group', 'having',
'limit', 'offset', 'as', 'on', 'inner', 'left', 'right', 'join', 'union',
'insert', 'update', 'delete', 'into', 'values', 'set'
]);
//# sourceMappingURL=LexemeCursor.js.map