crapifyme
Version:
Ultra-fast developer productivity CLI tools - remove comments, logs, and more
403 lines • 15 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.EnhancedTokenizer = void 0;
const error_handler_1 = require("./error-handler");
const logger_1 = require("./logger");
const performance_monitor_1 = require("./performance-monitor");
class EnhancedTokenizer {
constructor(logger) {
this.content = '';
this.position = 0;
this.contextStack = [];
this.logger = logger || new logger_1.Logger(false, false, false);
this.errorHandler = new error_handler_1.ErrorHandler(this.logger, true);
this.performanceMonitor = new performance_monitor_1.PerformanceMonitor(this.logger);
}
tokenize(content) {
this.content = content;
this.position = 0;
this.contextStack = [];
this.errorHandler.clear();
const tokens = [];
let iterations = 0;
const maxIterations = content.length * 2;
const originalLength = content.length;
try {
while (this.position < this.content.length && iterations < maxIterations) {
const startPos = this.position;
try {
const token = this.nextTokenOptimized();
if (token) {
tokens.push(token);
if (this.position <= startPos) {
this.errorHandler.recordError({
category: error_handler_1.ErrorCategory.TOKENIZATION,
severity: error_handler_1.ErrorSeverity.HIGH,
message: `Forced position advancement to prevent infinite loop`,
position: this.position
});
this.position = startPos + 1;
}
}
else {
break;
}
}
catch (tokenError) {
this.errorHandler.recordError({
category: error_handler_1.ErrorCategory.TOKENIZATION,
severity: error_handler_1.ErrorSeverity.MEDIUM,
message: `Token parsing error: ${tokenError instanceof Error ? tokenError.message : 'Unknown error'}`,
position: this.position,
originalError: tokenError instanceof Error ? tokenError : undefined
});
this.position = Math.min(startPos + 1, this.content.length);
}
iterations++;
}
if (iterations >= maxIterations) {
this.errorHandler.recordError({
category: error_handler_1.ErrorCategory.TOKENIZATION,
severity: error_handler_1.ErrorSeverity.CRITICAL,
message: `Maximum iteration limit reached (${maxIterations}), possible infinite loop`,
position: this.position
});
}
const processedLength = tokens.reduce((sum, token) => sum + token.value.length, 0);
this.errorHandler.validateParsingCompletion(originalLength, processedLength, tokens.length);
return tokens;
}
catch (error) {
this.errorHandler.recordError({
category: error_handler_1.ErrorCategory.TOKENIZATION,
severity: error_handler_1.ErrorSeverity.CRITICAL,
message: `Critical tokenization error: ${error instanceof Error ? error.message : 'Unknown error'}`,
position: this.position,
originalError: error instanceof Error ? error : undefined
});
throw error;
}
}
nextTokenOptimized() {
if (this.position >= this.content.length) {
return null;
}
const startPos = this.position;
const char = this.content[this.position];
const hasNext = this.position + 1 < this.content.length;
const next = hasNext ? this.content[this.position + 1] : '';
const currentContext = this.getCurrentContext();
if (currentContext &&
(currentContext.type === 'string' || currentContext.type === 'template')) {
return this.parseCodeSequenceOptimized(startPos);
}
switch (char) {
case "'":
return this.parseStringTokenWithRecovery(startPos, "'");
case '"':
return this.parseStringTokenWithRecovery(startPos, '"');
case '`':
return this.parseTemplateStringTokenWithRecovery(startPos);
case '/':
if (next === '*') {
return this.parseBlockCommentToken(startPos);
}
else if (next === '/') {
return this.parseLineCommentToken(startPos);
}
else if (this.couldBeRegexOptimized()) {
return this.parseRegexTokenWithRecovery(startPos);
}
break;
case '<':
if (hasNext && this.content.substring(this.position, this.position + 4) === '<!--') {
return this.parseHtmlCommentToken(startPos);
}
break;
}
return this.parseCodeSequenceOptimized(startPos);
}
parseCodeSequenceOptimized(startPos) {
const initialPosition = this.position;
let endPos = this.position;
while (endPos < this.content.length) {
const char = this.content[endPos];
switch (char) {
case "'":
case '"':
case '`':
this.position = endPos;
return this.createCodeToken(startPos, endPos);
case '/':
const next = endPos + 1 < this.content.length ? this.content[endPos + 1] : '';
if (next === '/' || next === '*' || this.couldBeRegexAtPosition(endPos)) {
this.position = endPos;
return this.createCodeToken(startPos, endPos);
}
break;
case '<':
if (this.content.substring(endPos, endPos + 4) === '<!--') {
this.position = endPos;
return this.createCodeToken(startPos, endPos);
}
break;
}
endPos++;
if (EnhancedTokenizer.WHITESPACE.test(char)) {
break;
}
}
if (endPos === initialPosition && endPos < this.content.length) {
endPos++;
}
this.position = endPos;
return this.createCodeToken(startPos, endPos);
}
createCodeToken(startPos, endPos) {
return {
type: 'code',
value: this.content.substring(startPos, endPos),
context: { type: 'code' },
startPos,
endPos
};
}
couldBeRegexAtPosition(pos) {
const savedPosition = this.position;
this.position = pos;
const result = this.couldBeRegexOptimized();
this.position = savedPosition;
return result;
}
parseStringTokenWithRecovery(startPos, quote) {
let value = '';
value += this.content[this.position];
this.position++;
while (this.position < this.content.length) {
const char = this.content[this.position];
if (char === '\\') {
value += char;
this.position++;
if (this.position < this.content.length) {
value += this.content[this.position];
this.position++;
}
}
else if (char === quote) {
value += char;
this.position++;
break;
}
else if (char === '\n' && quote !== '`') {
this.errorHandler.recordError({
category: error_handler_1.ErrorCategory.STRING_HANDLING,
severity: error_handler_1.ErrorSeverity.MEDIUM,
message: `Unterminated string literal at line ${this.errorHandler.calculateLineColumn(this.content, startPos).line}, column ${this.errorHandler.calculateLineColumn(this.content, startPos).column}`,
position: startPos
});
break;
}
else {
value += char;
this.position++;
}
}
return {
type: 'string',
value,
context: { type: 'string', quote },
startPos,
endPos: this.position
};
}
parseTemplateStringTokenWithRecovery(startPos) {
let value = '';
let interpolationDepth = 0;
value += this.content[this.position];
this.position++;
while (this.position < this.content.length) {
const char = this.content[this.position];
if (char === '\\') {
value += char;
this.position++;
if (this.position < this.content.length) {
value += this.content[this.position];
this.position++;
}
}
else if (char === '$' &&
this.position + 1 < this.content.length &&
this.content[this.position + 1] === '{') {
value += char;
this.position++;
value += this.content[this.position];
this.position++;
interpolationDepth++;
}
else if (char === '}' && interpolationDepth > 0) {
value += char;
this.position++;
interpolationDepth--;
}
else if (char === '`' && interpolationDepth === 0) {
value += char;
this.position++;
break;
}
else {
value += char;
this.position++;
}
}
return {
type: 'string',
value,
context: { type: 'template', interpolationDepth },
startPos,
endPos: this.position
};
}
parseRegexTokenWithRecovery(startPos) {
let value = '';
value += this.content[this.position];
this.position++;
while (this.position < this.content.length) {
const char = this.content[this.position];
if (char === '\\') {
value += char;
this.position++;
if (this.position < this.content.length) {
value += this.content[this.position];
this.position++;
}
}
else if (char === '/') {
value += char;
this.position++;
while (this.position < this.content.length && /[gimuy]/.test(this.content[this.position])) {
value += this.content[this.position];
this.position++;
}
break;
}
else if (char === '\n') {
this.errorHandler.recordError({
category: error_handler_1.ErrorCategory.REGEX,
severity: error_handler_1.ErrorSeverity.MEDIUM,
message: `Unterminated regex literal at line ${this.errorHandler.calculateLineColumn(this.content, startPos).line}, column ${this.errorHandler.calculateLineColumn(this.content, startPos).column}`,
position: startPos
});
break;
}
else {
value += char;
this.position++;
}
}
return {
type: 'regex',
value,
context: { type: 'regex' },
startPos,
endPos: this.position
};
}
parseLineCommentToken(startPos) {
let value = '';
while (this.position < this.content.length && this.content[this.position] !== '\n') {
value += this.content[this.position];
this.position++;
}
return {
type: 'comment',
value,
context: { type: 'comment' },
startPos,
endPos: this.position
};
}
parseBlockCommentToken(startPos) {
let value = '';
value += this.content[this.position] + this.content[this.position + 1];
this.position += 2;
while (this.position < this.content.length - 1) {
if (this.content[this.position] === '*' && this.content[this.position + 1] === '/') {
value += '*/';
this.position += 2;
break;
}
value += this.content[this.position];
this.position++;
}
return {
type: 'comment',
value,
context: { type: 'comment' },
startPos,
endPos: this.position
};
}
parseHtmlCommentToken(startPos) {
let value = '';
value += this.content.substring(this.position, this.position + 4);
this.position += 4;
while (this.position < this.content.length - 2) {
if (this.content.substring(this.position, this.position + 3) === '-->') {
value += '-->';
this.position += 3;
break;
}
value += this.content[this.position];
this.position++;
}
return {
type: 'comment',
value,
context: { type: 'comment' },
startPos,
endPos: this.position
};
}
couldBeRegexOptimized() {
let i = this.position - 1;
while (i >= 0 && /\s/.test(this.content[i])) {
i--;
}
if (i < 0)
return true;
const prevChar = this.content[i];
const regexPrecedingChars = new Set([
'=',
'(',
'[',
',',
':',
';',
'!',
'&',
'|',
'?',
'+',
'-',
'*',
'/',
'%',
'{',
'}',
'\n'
]);
return regexPrecedingChars.has(prevChar);
}
getCurrentContext() {
return this.contextStack[this.contextStack.length - 1];
}
getErrorHandler() {
return this.errorHandler;
}
getErrorSummary() {
return this.errorHandler.getErrorSummary();
}
}
exports.EnhancedTokenizer = EnhancedTokenizer;
EnhancedTokenizer.WHITESPACE = /\s/;
//# sourceMappingURL=enhanced-tokenizer.js.map