rawsql-ts
Version:
[beta]High-performance SQL parser and AST analyzer written in TypeScript. Provides fast parsing and advanced transformation capabilities.
191 lines • 7.88 kB
JavaScript
Object.defineProperty(exports, "__esModule", { value: true });
exports.StringUtils = void 0;
const charLookupTable_1 = require("./charLookupTable");
/**
* Utilities for string operations during tokenization
*/
class StringUtils {
/**
* Creates a visual representation of an error position in text
* @param input The input text
* @param errPosition The error position
* @returns A string with a caret pointing to the error position
*/
static getDebugPositionInfo(input, errPosition) {
// Get 5 characters before and after the error
// If the start and end points are out of the string range, keep them within the range
// Display ^ at the error position on the next line
const start = Math.max(0, errPosition - 5);
const end = Math.min(input.length, errPosition + 5);
const debugInfo = input.slice(start, end);
const caret = ' '.repeat(errPosition - start) + '^';
return `${debugInfo}\n${caret}`;
}
/**
* Skip white space characters.
*/
static skipWhiteSpace(input, position) {
const length = input.length;
/*
* Optimization: Try to skip 4 spaces at once (for 4-space indents).
* This is effective when SQL is deeply nested and uses 4-space indentation.
* In typical cases, charCodeAt in a loop is fastest, but for large/indented SQL,
* this can reduce the number of iterations and improve stability (lower error/deviation in benchmarks).
* If indentation is not 4 spaces, this check is skipped quickly, so overhead is minimal.
*
* Even for 2-space indents or mixed indents (2, 4, tab),
* the remaining whitespace is handled by the following loop, so there is no performance loss.
*
* Benchmark results show that this optimization does not slow down short queries,
* and can make long/indented queries more stable and slightly faster.
*/
while (position + 4 <= length && input.slice(position, position + 4) === ' ') {
position += 4;
}
// Then skip remaining whitespace one by one (space, tab, newline, carriage return)
while (position < length) {
const charCode = input.charCodeAt(position);
// ' '=32, '\t'=9, '\n'=10, '\r'=13
if (charCode !== 32 && charCode !== 9 && charCode !== 10 && charCode !== 13) {
break;
}
position++;
}
return position;
}
/**
* Skip line comment.
*/
static readLineComment(input, position) {
if (position + 1 >= input.length) {
return { newPosition: position, comment: null };
}
// '-'=45
if (input.charCodeAt(position) === 45 && input.charCodeAt(position + 1) === 45) {
const start = position;
position += 2;
// '\n'=10
while (position < input.length && input.charCodeAt(position) !== 10) {
position++;
}
// Return the trimmed comment content (excluding -- tokens)
const comment = input.slice(start + 2, position).trim();
return { newPosition: position, comment };
}
return { newPosition: position, comment: null };
}
/**
* Skip block comment.
*/
static readBlockComment(input, position) {
if (position + 3 >= input.length) {
return { newPosition: position, comments: null };
}
// '/'=47, '*'=42, '+'=43
if (input.charCodeAt(position) === 47 && input.charCodeAt(position + 1) === 42 && input.charCodeAt(position + 2) !== 43) {
const start = position;
position += 2;
while (position + 1 < input.length) {
// '*'=42, '/'=47
if (input.charCodeAt(position) === 42 && input.charCodeAt(position + 1) === 47) {
position += 2;
// Process the comment content
const lines = input.slice(start + 2, position - 2).replace(/\r/g, '').split('\n');
for (let i = 0; i < lines.length; i++) {
lines[i] = lines[i].trim();
}
// Remove empty lines, but only at the beginning and end
while (lines.length > 0 && lines[0] === '') {
lines.shift();
}
while (lines.length > 0 && lines[lines.length - 1] === '') {
lines.pop();
}
return { newPosition: position, comments: lines };
}
position++;
}
throw new Error(`Block comment is not closed. position: ${position}`);
}
return { newPosition: position, comments: null };
}
/**
* Skip white space characters and SQL comments.
* @returns Object containing the new position and an array of skipped comments
*/
static readWhiteSpaceAndComment(input, position) {
const lines = [];
const length = input.length;
while (position < length) {
// Store current position
const oldPosition = position;
// Skip whitespace first
position = StringUtils.skipWhiteSpace(input, position);
if (position !== oldPosition) {
continue;
}
// Fast character code check
const charCode = input.charCodeAt(position);
// '-'=45 (Line comment)
if (charCode === 45) {
const lineCommentResult = StringUtils.readLineComment(input, position);
if (lineCommentResult.newPosition !== position) {
position = lineCommentResult.newPosition;
if (lineCommentResult.comment) {
lines.push(lineCommentResult.comment.trim());
}
continue;
}
}
// '/'=47 (Block comment)
else if (charCode === 47) {
const blockCommentResult = StringUtils.readBlockComment(input, position);
if (blockCommentResult.newPosition !== position) {
position = blockCommentResult.newPosition;
if (blockCommentResult.comments) {
lines.push(...blockCommentResult.comments);
}
continue;
}
}
// No more whitespace or comments found
break;
}
return { position, lines: lines };
}
/**
* Read a regular identifier.
*/
static readRegularIdentifier(input, position) {
const result = this.tryReadRegularIdentifier(input, position);
if (!result) {
throw new Error(`Unexpected character. position: ${position}\n${StringUtils.getDebugPositionInfo(input, position)}`);
}
return result;
}
static tryReadRegularIdentifier(input, position) {
const start = position;
while (position < input.length) {
if (charLookupTable_1.CharLookupTable.isDelimiter(input[position])) {
break;
}
position++;
}
if (start === position) {
return null;
}
// Check index range before checking for [] (array type)
while (position + 1 < input.length &&
input[position] === '[' &&
input[position + 1] === ']') {
position += 2; // Skip the []
}
return {
identifier: input.slice(start, position),
newPosition: position
};
}
}
exports.StringUtils = StringUtils;
//# sourceMappingURL=stringUtils.js.map
;