crapifyme
Version:
Ultra-fast developer productivity CLI tools - remove comments, logs, and more
607 lines • 25.1 kB
JavaScript
"use strict";
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.AdvancedCommentRemover = void 0;
const path_1 = __importDefault(require("path"));
const enhanced_tokenizer_1 = require("../../shared/enhanced-tokenizer");
const error_handler_1 = require("../../shared/error-handler");
const logger_1 = require("../../shared/logger");
const performance_monitor_1 = require("../../shared/performance-monitor");
const rule_manager_1 = require("../../shared/rule-manager");
const types_1 = require("../../shared/types");
class AdvancedCommentRemover {
constructor(keepPatterns, options = {}) {
this.keepPatterns = keepPatterns.filter(p => p.trim().length > 0);
this.logger = options.logger || new logger_1.Logger(false, false, false);
this.errorHandler = new error_handler_1.ErrorHandler(this.logger, true);
this.enhancedTokenizer = new enhanced_tokenizer_1.EnhancedTokenizer(this.logger);
this.ruleManager = new rule_manager_1.PreservationRuleManager();
this.useEnhancedTokenizer = options.useEnhancedTokenizer !== false;
this.performanceMonitor = new performance_monitor_1.PerformanceMonitor(this.logger);
this.configurePreservationRules(options);
this.addCustomPatterns();
}
removeComments(content, filePath) {
const extension = this.getFileExtension(filePath);
this.errorHandler.clear();
try {
if (this.useEnhancedTokenizer) {
const result = this.removeCommentsWithEnhancedTokenizer(content, filePath);
return this.enhanceResultWithErrorInfo(result, filePath);
}
else {
const result = this.removeCommentsWithLegacyTokenizer(content, extension);
return this.enhanceResultWithErrorInfo(result, filePath);
}
}
catch (error) {
this.errorHandler.recordError({
category: error_handler_1.ErrorCategory.FILE_PROCESSING,
severity: error_handler_1.ErrorSeverity.CRITICAL,
message: `Critical error processing file ${filePath}: ${error instanceof Error ? error.message : 'Unknown error'}`,
originalError: error instanceof Error ? error : undefined
});
this.logger.warn(`Enhanced tokenizer failed for ${filePath}, falling back to legacy tokenizer`);
try {
const result = this.removeCommentsWithLegacyTokenizer(content, extension);
return this.enhanceResultWithErrorInfo(result, filePath);
}
catch (fallbackError) {
this.errorHandler.recordError({
category: error_handler_1.ErrorCategory.FILE_PROCESSING,
severity: error_handler_1.ErrorSeverity.CRITICAL,
message: `Fallback tokenizer also failed for ${filePath}: ${fallbackError instanceof Error ? fallbackError.message : 'Unknown error'}`,
originalError: fallbackError instanceof Error ? fallbackError : undefined
});
return this.createFailsafeResult(content, filePath);
}
}
}
removeCommentsWithEnhancedTokenizer(content, filePath) {
try {
this.performanceMonitor.startMonitoring();
const tokens = this.enhancedTokenizer.tokenize(content);
const useOptimizedBuilder = content.length > 100000;
const result = useOptimizedBuilder ? new performance_monitor_1.OptimizedStringBuilder() : [];
let removed = 0;
let preserved = 0;
for (const token of tokens) {
if (token.type === 'comment') {
try {
if (this.shouldPreserveCommentEnhanced(token.value)) {
if (useOptimizedBuilder) {
result.append(token.value);
}
else {
result.push(token.value);
}
preserved++;
}
else {
removed++;
}
}
catch (preservationError) {
this.errorHandler.recordError({
category: error_handler_1.ErrorCategory.PRESERVATION,
severity: error_handler_1.ErrorSeverity.MEDIUM,
message: `Error in comment preservation logic: ${preservationError instanceof Error ? preservationError.message : 'Unknown error'}`,
position: token.startPos,
originalError: preservationError instanceof Error ? preservationError : undefined
});
if (useOptimizedBuilder) {
result.append(token.value);
}
else {
result.push(token.value);
}
preserved++;
}
}
else {
if (useOptimizedBuilder) {
result.append(token.value);
}
else {
result.push(token.value);
}
}
}
const processedContent = useOptimizedBuilder
? result.toString()
: result.join('');
const metrics = this.performanceMonitor.stopMonitoring(tokens.length, content.length);
this.validateProcessingResult(content, processedContent, tokens.length, filePath);
return {
content: processedContent,
modified: content !== processedContent,
removed,
preserved,
performanceMetrics: metrics
};
}
catch (error) {
this.errorHandler.recordError({
category: error_handler_1.ErrorCategory.FILE_PROCESSING,
severity: error_handler_1.ErrorSeverity.HIGH,
message: `Enhanced tokenizer processing failed: ${error instanceof Error ? error.message : 'Unknown error'}`,
originalError: error instanceof Error ? error : undefined
});
throw error;
}
}
removeCommentsWithLegacyTokenizer(content, extension) {
try {
const tokens = this.tokenizeWithErrorHandling(content, extension);
const result = [];
let removed = 0;
let preserved = 0;
for (const token of tokens) {
if (token.type === 'comment') {
try {
if (this.shouldPreserveComment(token.value)) {
result.push(token.value);
preserved++;
}
else {
removed++;
}
}
catch (preservationError) {
this.errorHandler.recordError({
category: error_handler_1.ErrorCategory.PRESERVATION,
severity: error_handler_1.ErrorSeverity.MEDIUM,
message: `Error in legacy comment preservation logic: ${preservationError instanceof Error ? preservationError.message : 'Unknown error'}`,
originalError: preservationError instanceof Error ? preservationError : undefined
});
result.push(token.value);
preserved++;
}
}
else {
result.push(token.value);
}
}
const processedContent = result.join('');
this.validateProcessingResult(content, processedContent, tokens.length, extension);
return {
content: processedContent,
modified: content !== processedContent,
removed,
preserved
};
}
catch (error) {
this.errorHandler.recordError({
category: error_handler_1.ErrorCategory.FILE_PROCESSING,
severity: error_handler_1.ErrorSeverity.HIGH,
message: `Legacy tokenizer processing failed: ${error instanceof Error ? error.message : 'Unknown error'}`,
originalError: error instanceof Error ? error : undefined
});
throw error;
}
}
getFileExtension(filePath) {
return path_1.default.extname(filePath).slice(1).toLowerCase();
}
tokenize(content, extension) {
const tokens = [];
let i = 0;
while (i < content.length) {
const char = content[i];
const next = content[i + 1];
if (char === "'") {
const str = this.parseString(content, i, "'");
tokens.push({ type: 'string', value: str.value });
i = str.end;
continue;
}
if (char === '"') {
const str = this.parseString(content, i, '"');
tokens.push({ type: 'string', value: str.value });
i = str.end;
continue;
}
if (char === '`') {
const str = this.parseTemplateString(content, i);
tokens.push({ type: 'string', value: str.value });
i = str.end;
continue;
}
if (char === '/' && next === '*') {
const comment = this.parseBlockComment(content, i);
tokens.push({ type: 'comment', value: comment.value });
i = comment.end;
continue;
}
if (char === '/' && next === '/') {
if (this.isPartOfUrl(content, i)) {
tokens.push({ type: 'code', value: char });
i++;
continue;
}
const comment = this.parseLineComment(content, i);
tokens.push({ type: 'comment', value: comment.value });
i = comment.end;
continue;
}
if (char === '<' && content.substr(i, 4) === '<!--') {
const comment = this.parseHtmlComment(content, i);
tokens.push({ type: 'comment', value: comment.value });
i = comment.end;
continue;
}
if (char === '#' && this.isHashCommentFile(extension)) {
const comment = this.parseHashComment(content, i);
tokens.push({ type: 'comment', value: comment.value });
i = comment.end;
continue;
}
tokens.push({ type: 'code', value: char });
i++;
}
return tokens;
}
parseString(content, start, quote) {
let i = start + 1;
let value = quote;
while (i < content.length) {
const char = content[i];
value += char;
if (char === '\\') {
i++;
if (i < content.length) {
value += content[i];
}
}
else if (char === quote) {
return { value, end: i + 1 };
}
i++;
}
return { value, end: i };
}
parseTemplateString(content, start) {
let i = start + 1;
let value = '`';
while (i < content.length) {
const char = content[i];
value += char;
if (char === '\\') {
i++;
if (i < content.length) {
value += content[i];
}
}
else if (char === '`') {
return { value, end: i + 1 };
}
i++;
}
return { value, end: i };
}
parseLineComment(content, start) {
let i = start;
let value = '';
while (i < content.length && content[i] !== '\n') {
value += content[i];
i++;
}
return { value, end: i };
}
parseBlockComment(content, start) {
let i = start + 2;
let value = '/*';
while (i < content.length - 1) {
value += content[i];
if (content[i] === '*' && content[i + 1] === '/') {
value += '/';
return { value, end: i + 2 };
}
i++;
}
return { value, end: i };
}
parseHtmlComment(content, start) {
let i = start + 4;
let value = '<!--';
while (i < content.length - 2) {
value += content[i];
if (content.substr(i, 3) === '-->') {
value += '-->';
return { value, end: i + 3 };
}
i++;
}
return { value, end: i };
}
parseHashComment(content, start) {
let i = start;
let value = '';
while (i < content.length && content[i] !== '\n') {
value += content[i];
i++;
}
return { value, end: i };
}
isHashCommentFile(extension) {
const hashCommentExtensions = [
'py',
'sh',
'bash',
'zsh',
'fish',
'rb',
'pl',
'yaml',
'yml',
'toml',
'conf',
'env'
];
return hashCommentExtensions.includes(extension);
}
isPartOfUrl(content, position) {
if (position < 5)
return false;
if (position >= 5) {
const beforeHttp = content.substring(position - 5, position);
if (beforeHttp === 'http:') {
return true;
}
}
if (position >= 6) {
const beforeHttps = content.substring(position - 6, position);
if (beforeHttps === 'https:') {
return true;
}
}
if (position >= 4) {
const beforeFtp = content.substring(position - 4, position);
if (beforeFtp === 'ftp:') {
return true;
}
}
return false;
}
shouldPreserveCommentEnhanced(comment) {
if (this.ruleManager.shouldPreserveComment(comment)) {
return true;
}
return this.shouldPreserveComment(comment);
}
shouldPreserveComment(comment) {
if (this.keepPatterns.length === 0)
return false;
return this.keepPatterns.some(pattern => {
const regex = new RegExp(pattern.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'), 'i');
return regex.test(comment);
});
}
configurePreservationRules(options) {
if (options.preserveFramework === false) {
this.ruleManager.getRulesByCategory(types_1.CommentCategory.FRAMEWORK).forEach(rule => {
this.ruleManager.removeRule(rule.name);
});
}
if (options.preserveDevelopment === false) {
this.ruleManager.getRulesByCategory(types_1.CommentCategory.DEVELOPMENT).forEach(rule => {
this.ruleManager.removeRule(rule.name);
});
}
if (options.preserveTooling === false) {
this.ruleManager.getRulesByCategory(types_1.CommentCategory.TOOLING).forEach(rule => {
this.ruleManager.removeRule(rule.name);
});
}
if (options.preserveDocumentation === false) {
this.ruleManager.getRulesByCategory(types_1.CommentCategory.DOCUMENTATION).forEach(rule => {
this.ruleManager.removeRule(rule.name);
});
}
if (options.customRules && options.customRules.length > 0) {
const priority = options.rulePriority || 100;
options.customRules.forEach((pattern, index) => {
try {
this.ruleManager.addCustomPattern(`cli-custom-pattern-${index}`, pattern, priority);
}
catch (error) {
console.warn(`Invalid custom regex pattern ignored: ${pattern}`);
}
});
}
}
addCustomPatterns() {
this.keepPatterns.forEach((pattern, index) => {
try {
this.ruleManager.addCustomPattern(`custom-pattern-${index}`, pattern, 50);
}
catch (error) {
console.warn(`Invalid regex pattern ignored: ${pattern}`);
}
});
}
getRuleManager() {
return this.ruleManager;
}
setUseEnhancedTokenizer(enabled) {
this.useEnhancedTokenizer = enabled;
}
enhanceResultWithErrorInfo(result, filePath) {
const tokenizerErrors = this.enhancedTokenizer.getErrorHandler().getErrors();
const allErrors = [...this.errorHandler.getErrors(), ...tokenizerErrors];
const enhancedResult = {
...result,
errors: allErrors,
warnings: [
...this.errorHandler.getWarnings(),
...this.enhancedTokenizer.getErrorHandler().getWarnings()
],
hasErrors: allErrors.length > 0,
hasCriticalErrors: allErrors.some(error => error.severity === error_handler_1.ErrorSeverity.CRITICAL)
};
if (allErrors.length > 0) {
const errorSummary = this.getErrorSummary(allErrors);
this.logger.warn(`File ${filePath} processed with ${errorSummary.total} errors: ${JSON.stringify(errorSummary.bySeverity)}`);
}
return enhancedResult;
}
createFailsafeResult(content, filePath) {
this.logger.error(`All parsing methods failed for ${filePath}, returning original content`);
return {
content,
modified: false,
removed: 0,
preserved: 0,
errors: this.errorHandler.getErrors(),
warnings: this.errorHandler.getWarnings(),
hasErrors: true,
hasCriticalErrors: this.errorHandler.hasCriticalErrors()
};
}
tokenizeWithErrorHandling(content, extension) {
try {
return this.tokenize(content, extension);
}
catch (error) {
this.errorHandler.recordError({
category: error_handler_1.ErrorCategory.TOKENIZATION,
severity: error_handler_1.ErrorSeverity.HIGH,
message: `Legacy tokenization failed: ${error instanceof Error ? error.message : 'Unknown error'}`,
originalError: error instanceof Error ? error : undefined
});
return [
{
type: 'code',
value: content
}
];
}
}
validateProcessingResult(originalContent, processedContent, tokensProcessed, context) {
const originalLength = originalContent.length;
const processedLength = processedContent.length;
const lengthDifference = originalLength - processedLength;
const maxAllowedRemoval = originalLength * 0.5;
if (lengthDifference > maxAllowedRemoval) {
this.errorHandler.recordError({
category: error_handler_1.ErrorCategory.FILE_PROCESSING,
severity: error_handler_1.ErrorSeverity.HIGH,
message: `Excessive content removal detected in ${context}: removed ${lengthDifference}/${originalLength} characters (${Math.round((lengthDifference / originalLength) * 100)}%)`
});
}
this.validateUrlIntegrity(originalContent, processedContent, context);
if (tokensProcessed === 0 && originalLength > 0) {
this.errorHandler.recordError({
category: error_handler_1.ErrorCategory.TOKENIZATION,
severity: error_handler_1.ErrorSeverity.HIGH,
message: `No tokens processed for non-empty content in ${context}`
});
}
if (processedContent.includes('\uFFFD')) {
this.errorHandler.recordError({
category: error_handler_1.ErrorCategory.FILE_PROCESSING,
severity: error_handler_1.ErrorSeverity.MEDIUM,
message: `Potential encoding issues detected in processed content for ${context}`
});
}
}
validateUrlIntegrity(originalContent, processedContent, context) {
const brokenUrlPattern = /(https?:)(?!\s*\/\/)/g;
const brokenUrls = processedContent.match(brokenUrlPattern);
if (brokenUrls) {
this.errorHandler.recordError({
category: error_handler_1.ErrorCategory.FILE_PROCESSING,
severity: error_handler_1.ErrorSeverity.CRITICAL,
message: `URL corruption detected in ${context}: found broken URLs ${brokenUrls.join(', ')}`
});
}
const incompleteLinks = processedContent.match(/\[.*?\]\([^)]*(?:https?:)$/g);
if (incompleteLinks) {
this.errorHandler.recordError({
category: error_handler_1.ErrorCategory.FILE_PROCESSING,
severity: error_handler_1.ErrorSeverity.CRITICAL,
message: `Markdown link corruption detected in ${context}: found incomplete links`
});
}
const originalUrls = originalContent.match(/https?:\/\/[^\s)]+/g);
const processedUrls = processedContent.match(/https?:\/\/[^\s)]+/g);
if (originalUrls && (!processedUrls || processedUrls.length < originalUrls.length)) {
this.errorHandler.recordError({
category: error_handler_1.ErrorCategory.FILE_PROCESSING,
severity: error_handler_1.ErrorSeverity.CRITICAL,
message: `URL removal detected in ${context}: ${originalUrls.length - (processedUrls?.length || 0)} URLs were corrupted or removed`
});
}
}
getErrorSummary(errors) {
const bySeverity = {
[error_handler_1.ErrorSeverity.LOW]: 0,
[error_handler_1.ErrorSeverity.MEDIUM]: 0,
[error_handler_1.ErrorSeverity.HIGH]: 0,
[error_handler_1.ErrorSeverity.CRITICAL]: 0
};
errors.forEach(error => {
if (error.severity && bySeverity.hasOwnProperty(error.severity)) {
bySeverity[error.severity]++;
}
});
return {
total: errors.length,
bySeverity
};
}
getErrorHandler() {
return this.errorHandler;
}
getProcessingStats() {
const tokenizerErrors = this.enhancedTokenizer.getErrorHandler().getErrors();
const allErrors = [...this.errorHandler.getErrors(), ...tokenizerErrors];
const allWarnings = [
...this.errorHandler.getWarnings(),
...this.enhancedTokenizer.getErrorHandler().getWarnings()
];
return {
errors: allErrors,
warnings: allWarnings,
errorSummary: this.getErrorSummaryDetailed(allErrors),
hasErrors: allErrors.length > 0,
hasCriticalErrors: allErrors.some(error => error.severity === error_handler_1.ErrorSeverity.CRITICAL)
};
}
getErrorSummaryDetailed(errors) {
const bySeverity = {
[error_handler_1.ErrorSeverity.LOW]: 0,
[error_handler_1.ErrorSeverity.MEDIUM]: 0,
[error_handler_1.ErrorSeverity.HIGH]: 0,
[error_handler_1.ErrorSeverity.CRITICAL]: 0
};
const byCategory = {
[error_handler_1.ErrorCategory.PARSING]: 0,
[error_handler_1.ErrorCategory.TOKENIZATION]: 0,
[error_handler_1.ErrorCategory.REGEX]: 0,
[error_handler_1.ErrorCategory.STRING_HANDLING]: 0,
[error_handler_1.ErrorCategory.TEMPLATE_LITERAL]: 0,
[error_handler_1.ErrorCategory.COMMENT_DETECTION]: 0,
[error_handler_1.ErrorCategory.PRESERVATION]: 0,
[error_handler_1.ErrorCategory.FILE_PROCESSING]: 0
};
errors.forEach(error => {
if (error.severity && bySeverity.hasOwnProperty(error.severity)) {
bySeverity[error.severity]++;
}
if (error.category && byCategory.hasOwnProperty(error.category)) {
byCategory[error.category]++;
}
});
return {
total: errors.length,
bySeverity,
byCategory
};
}
}
exports.AdvancedCommentRemover = AdvancedCommentRemover;
//# sourceMappingURL=advanced-logic.js.map