UNPKG

crapifyme

Version:

Ultra-fast developer productivity CLI tools - remove comments, logs, and more

607 lines 25.1 kB
"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.AdvancedCommentRemover = void 0; const path_1 = __importDefault(require("path")); const enhanced_tokenizer_1 = require("../../shared/enhanced-tokenizer"); const error_handler_1 = require("../../shared/error-handler"); const logger_1 = require("../../shared/logger"); const performance_monitor_1 = require("../../shared/performance-monitor"); const rule_manager_1 = require("../../shared/rule-manager"); const types_1 = require("../../shared/types"); class AdvancedCommentRemover { constructor(keepPatterns, options = {}) { this.keepPatterns = keepPatterns.filter(p => p.trim().length > 0); this.logger = options.logger || new logger_1.Logger(false, false, false); this.errorHandler = new error_handler_1.ErrorHandler(this.logger, true); this.enhancedTokenizer = new enhanced_tokenizer_1.EnhancedTokenizer(this.logger); this.ruleManager = new rule_manager_1.PreservationRuleManager(); this.useEnhancedTokenizer = options.useEnhancedTokenizer !== false; this.performanceMonitor = new performance_monitor_1.PerformanceMonitor(this.logger); this.configurePreservationRules(options); this.addCustomPatterns(); } removeComments(content, filePath) { const extension = this.getFileExtension(filePath); this.errorHandler.clear(); try { if (this.useEnhancedTokenizer) { const result = this.removeCommentsWithEnhancedTokenizer(content, filePath); return this.enhanceResultWithErrorInfo(result, filePath); } else { const result = this.removeCommentsWithLegacyTokenizer(content, extension); return this.enhanceResultWithErrorInfo(result, filePath); } } catch (error) { this.errorHandler.recordError({ category: error_handler_1.ErrorCategory.FILE_PROCESSING, severity: error_handler_1.ErrorSeverity.CRITICAL, message: `Critical error processing file ${filePath}: ${error instanceof Error ? error.message : 'Unknown error'}`, originalError: error instanceof Error ? error : undefined }); this.logger.warn(`Enhanced tokenizer failed for ${filePath}, falling back to legacy tokenizer`); try { const result = this.removeCommentsWithLegacyTokenizer(content, extension); return this.enhanceResultWithErrorInfo(result, filePath); } catch (fallbackError) { this.errorHandler.recordError({ category: error_handler_1.ErrorCategory.FILE_PROCESSING, severity: error_handler_1.ErrorSeverity.CRITICAL, message: `Fallback tokenizer also failed for ${filePath}: ${fallbackError instanceof Error ? fallbackError.message : 'Unknown error'}`, originalError: fallbackError instanceof Error ? fallbackError : undefined }); return this.createFailsafeResult(content, filePath); } } } removeCommentsWithEnhancedTokenizer(content, filePath) { try { this.performanceMonitor.startMonitoring(); const tokens = this.enhancedTokenizer.tokenize(content); const useOptimizedBuilder = content.length > 100000; const result = useOptimizedBuilder ? new performance_monitor_1.OptimizedStringBuilder() : []; let removed = 0; let preserved = 0; for (const token of tokens) { if (token.type === 'comment') { try { if (this.shouldPreserveCommentEnhanced(token.value)) { if (useOptimizedBuilder) { result.append(token.value); } else { result.push(token.value); } preserved++; } else { removed++; } } catch (preservationError) { this.errorHandler.recordError({ category: error_handler_1.ErrorCategory.PRESERVATION, severity: error_handler_1.ErrorSeverity.MEDIUM, message: `Error in comment preservation logic: ${preservationError instanceof Error ? preservationError.message : 'Unknown error'}`, position: token.startPos, originalError: preservationError instanceof Error ? preservationError : undefined }); if (useOptimizedBuilder) { result.append(token.value); } else { result.push(token.value); } preserved++; } } else { if (useOptimizedBuilder) { result.append(token.value); } else { result.push(token.value); } } } const processedContent = useOptimizedBuilder ? result.toString() : result.join(''); const metrics = this.performanceMonitor.stopMonitoring(tokens.length, content.length); this.validateProcessingResult(content, processedContent, tokens.length, filePath); return { content: processedContent, modified: content !== processedContent, removed, preserved, performanceMetrics: metrics }; } catch (error) { this.errorHandler.recordError({ category: error_handler_1.ErrorCategory.FILE_PROCESSING, severity: error_handler_1.ErrorSeverity.HIGH, message: `Enhanced tokenizer processing failed: ${error instanceof Error ? error.message : 'Unknown error'}`, originalError: error instanceof Error ? error : undefined }); throw error; } } removeCommentsWithLegacyTokenizer(content, extension) { try { const tokens = this.tokenizeWithErrorHandling(content, extension); const result = []; let removed = 0; let preserved = 0; for (const token of tokens) { if (token.type === 'comment') { try { if (this.shouldPreserveComment(token.value)) { result.push(token.value); preserved++; } else { removed++; } } catch (preservationError) { this.errorHandler.recordError({ category: error_handler_1.ErrorCategory.PRESERVATION, severity: error_handler_1.ErrorSeverity.MEDIUM, message: `Error in legacy comment preservation logic: ${preservationError instanceof Error ? preservationError.message : 'Unknown error'}`, originalError: preservationError instanceof Error ? preservationError : undefined }); result.push(token.value); preserved++; } } else { result.push(token.value); } } const processedContent = result.join(''); this.validateProcessingResult(content, processedContent, tokens.length, extension); return { content: processedContent, modified: content !== processedContent, removed, preserved }; } catch (error) { this.errorHandler.recordError({ category: error_handler_1.ErrorCategory.FILE_PROCESSING, severity: error_handler_1.ErrorSeverity.HIGH, message: `Legacy tokenizer processing failed: ${error instanceof Error ? error.message : 'Unknown error'}`, originalError: error instanceof Error ? error : undefined }); throw error; } } getFileExtension(filePath) { return path_1.default.extname(filePath).slice(1).toLowerCase(); } tokenize(content, extension) { const tokens = []; let i = 0; while (i < content.length) { const char = content[i]; const next = content[i + 1]; if (char === "'") { const str = this.parseString(content, i, "'"); tokens.push({ type: 'string', value: str.value }); i = str.end; continue; } if (char === '"') { const str = this.parseString(content, i, '"'); tokens.push({ type: 'string', value: str.value }); i = str.end; continue; } if (char === '`') { const str = this.parseTemplateString(content, i); tokens.push({ type: 'string', value: str.value }); i = str.end; continue; } if (char === '/' && next === '*') { const comment = this.parseBlockComment(content, i); tokens.push({ type: 'comment', value: comment.value }); i = comment.end; continue; } if (char === '/' && next === '/') { if (this.isPartOfUrl(content, i)) { tokens.push({ type: 'code', value: char }); i++; continue; } const comment = this.parseLineComment(content, i); tokens.push({ type: 'comment', value: comment.value }); i = comment.end; continue; } if (char === '<' && content.substr(i, 4) === '<!--') { const comment = this.parseHtmlComment(content, i); tokens.push({ type: 'comment', value: comment.value }); i = comment.end; continue; } if (char === '#' && this.isHashCommentFile(extension)) { const comment = this.parseHashComment(content, i); tokens.push({ type: 'comment', value: comment.value }); i = comment.end; continue; } tokens.push({ type: 'code', value: char }); i++; } return tokens; } parseString(content, start, quote) { let i = start + 1; let value = quote; while (i < content.length) { const char = content[i]; value += char; if (char === '\\') { i++; if (i < content.length) { value += content[i]; } } else if (char === quote) { return { value, end: i + 1 }; } i++; } return { value, end: i }; } parseTemplateString(content, start) { let i = start + 1; let value = '`'; while (i < content.length) { const char = content[i]; value += char; if (char === '\\') { i++; if (i < content.length) { value += content[i]; } } else if (char === '`') { return { value, end: i + 1 }; } i++; } return { value, end: i }; } parseLineComment(content, start) { let i = start; let value = ''; while (i < content.length && content[i] !== '\n') { value += content[i]; i++; } return { value, end: i }; } parseBlockComment(content, start) { let i = start + 2; let value = '/*'; while (i < content.length - 1) { value += content[i]; if (content[i] === '*' && content[i + 1] === '/') { value += '/'; return { value, end: i + 2 }; } i++; } return { value, end: i }; } parseHtmlComment(content, start) { let i = start + 4; let value = '<!--'; while (i < content.length - 2) { value += content[i]; if (content.substr(i, 3) === '-->') { value += '-->'; return { value, end: i + 3 }; } i++; } return { value, end: i }; } parseHashComment(content, start) { let i = start; let value = ''; while (i < content.length && content[i] !== '\n') { value += content[i]; i++; } return { value, end: i }; } isHashCommentFile(extension) { const hashCommentExtensions = [ 'py', 'sh', 'bash', 'zsh', 'fish', 'rb', 'pl', 'yaml', 'yml', 'toml', 'conf', 'env' ]; return hashCommentExtensions.includes(extension); } isPartOfUrl(content, position) { if (position < 5) return false; if (position >= 5) { const beforeHttp = content.substring(position - 5, position); if (beforeHttp === 'http:') { return true; } } if (position >= 6) { const beforeHttps = content.substring(position - 6, position); if (beforeHttps === 'https:') { return true; } } if (position >= 4) { const beforeFtp = content.substring(position - 4, position); if (beforeFtp === 'ftp:') { return true; } } return false; } shouldPreserveCommentEnhanced(comment) { if (this.ruleManager.shouldPreserveComment(comment)) { return true; } return this.shouldPreserveComment(comment); } shouldPreserveComment(comment) { if (this.keepPatterns.length === 0) return false; return this.keepPatterns.some(pattern => { const regex = new RegExp(pattern.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'), 'i'); return regex.test(comment); }); } configurePreservationRules(options) { if (options.preserveFramework === false) { this.ruleManager.getRulesByCategory(types_1.CommentCategory.FRAMEWORK).forEach(rule => { this.ruleManager.removeRule(rule.name); }); } if (options.preserveDevelopment === false) { this.ruleManager.getRulesByCategory(types_1.CommentCategory.DEVELOPMENT).forEach(rule => { this.ruleManager.removeRule(rule.name); }); } if (options.preserveTooling === false) { this.ruleManager.getRulesByCategory(types_1.CommentCategory.TOOLING).forEach(rule => { this.ruleManager.removeRule(rule.name); }); } if (options.preserveDocumentation === false) { this.ruleManager.getRulesByCategory(types_1.CommentCategory.DOCUMENTATION).forEach(rule => { this.ruleManager.removeRule(rule.name); }); } if (options.customRules && options.customRules.length > 0) { const priority = options.rulePriority || 100; options.customRules.forEach((pattern, index) => { try { this.ruleManager.addCustomPattern(`cli-custom-pattern-${index}`, pattern, priority); } catch (error) { console.warn(`Invalid custom regex pattern ignored: ${pattern}`); } }); } } addCustomPatterns() { this.keepPatterns.forEach((pattern, index) => { try { this.ruleManager.addCustomPattern(`custom-pattern-${index}`, pattern, 50); } catch (error) { console.warn(`Invalid regex pattern ignored: ${pattern}`); } }); } getRuleManager() { return this.ruleManager; } setUseEnhancedTokenizer(enabled) { this.useEnhancedTokenizer = enabled; } enhanceResultWithErrorInfo(result, filePath) { const tokenizerErrors = this.enhancedTokenizer.getErrorHandler().getErrors(); const allErrors = [...this.errorHandler.getErrors(), ...tokenizerErrors]; const enhancedResult = { ...result, errors: allErrors, warnings: [ ...this.errorHandler.getWarnings(), ...this.enhancedTokenizer.getErrorHandler().getWarnings() ], hasErrors: allErrors.length > 0, hasCriticalErrors: allErrors.some(error => error.severity === error_handler_1.ErrorSeverity.CRITICAL) }; if (allErrors.length > 0) { const errorSummary = this.getErrorSummary(allErrors); this.logger.warn(`File ${filePath} processed with ${errorSummary.total} errors: ${JSON.stringify(errorSummary.bySeverity)}`); } return enhancedResult; } createFailsafeResult(content, filePath) { this.logger.error(`All parsing methods failed for ${filePath}, returning original content`); return { content, modified: false, removed: 0, preserved: 0, errors: this.errorHandler.getErrors(), warnings: this.errorHandler.getWarnings(), hasErrors: true, hasCriticalErrors: this.errorHandler.hasCriticalErrors() }; } tokenizeWithErrorHandling(content, extension) { try { return this.tokenize(content, extension); } catch (error) { this.errorHandler.recordError({ category: error_handler_1.ErrorCategory.TOKENIZATION, severity: error_handler_1.ErrorSeverity.HIGH, message: `Legacy tokenization failed: ${error instanceof Error ? error.message : 'Unknown error'}`, originalError: error instanceof Error ? error : undefined }); return [ { type: 'code', value: content } ]; } } validateProcessingResult(originalContent, processedContent, tokensProcessed, context) { const originalLength = originalContent.length; const processedLength = processedContent.length; const lengthDifference = originalLength - processedLength; const maxAllowedRemoval = originalLength * 0.5; if (lengthDifference > maxAllowedRemoval) { this.errorHandler.recordError({ category: error_handler_1.ErrorCategory.FILE_PROCESSING, severity: error_handler_1.ErrorSeverity.HIGH, message: `Excessive content removal detected in ${context}: removed ${lengthDifference}/${originalLength} characters (${Math.round((lengthDifference / originalLength) * 100)}%)` }); } this.validateUrlIntegrity(originalContent, processedContent, context); if (tokensProcessed === 0 && originalLength > 0) { this.errorHandler.recordError({ category: error_handler_1.ErrorCategory.TOKENIZATION, severity: error_handler_1.ErrorSeverity.HIGH, message: `No tokens processed for non-empty content in ${context}` }); } if (processedContent.includes('\uFFFD')) { this.errorHandler.recordError({ category: error_handler_1.ErrorCategory.FILE_PROCESSING, severity: error_handler_1.ErrorSeverity.MEDIUM, message: `Potential encoding issues detected in processed content for ${context}` }); } } validateUrlIntegrity(originalContent, processedContent, context) { const brokenUrlPattern = /(https?:)(?!\s*\/\/)/g; const brokenUrls = processedContent.match(brokenUrlPattern); if (brokenUrls) { this.errorHandler.recordError({ category: error_handler_1.ErrorCategory.FILE_PROCESSING, severity: error_handler_1.ErrorSeverity.CRITICAL, message: `URL corruption detected in ${context}: found broken URLs ${brokenUrls.join(', ')}` }); } const incompleteLinks = processedContent.match(/\[.*?\]\([^)]*(?:https?:)$/g); if (incompleteLinks) { this.errorHandler.recordError({ category: error_handler_1.ErrorCategory.FILE_PROCESSING, severity: error_handler_1.ErrorSeverity.CRITICAL, message: `Markdown link corruption detected in ${context}: found incomplete links` }); } const originalUrls = originalContent.match(/https?:\/\/[^\s)]+/g); const processedUrls = processedContent.match(/https?:\/\/[^\s)]+/g); if (originalUrls && (!processedUrls || processedUrls.length < originalUrls.length)) { this.errorHandler.recordError({ category: error_handler_1.ErrorCategory.FILE_PROCESSING, severity: error_handler_1.ErrorSeverity.CRITICAL, message: `URL removal detected in ${context}: ${originalUrls.length - (processedUrls?.length || 0)} URLs were corrupted or removed` }); } } getErrorSummary(errors) { const bySeverity = { [error_handler_1.ErrorSeverity.LOW]: 0, [error_handler_1.ErrorSeverity.MEDIUM]: 0, [error_handler_1.ErrorSeverity.HIGH]: 0, [error_handler_1.ErrorSeverity.CRITICAL]: 0 }; errors.forEach(error => { if (error.severity && bySeverity.hasOwnProperty(error.severity)) { bySeverity[error.severity]++; } }); return { total: errors.length, bySeverity }; } getErrorHandler() { return this.errorHandler; } getProcessingStats() { const tokenizerErrors = this.enhancedTokenizer.getErrorHandler().getErrors(); const allErrors = [...this.errorHandler.getErrors(), ...tokenizerErrors]; const allWarnings = [ ...this.errorHandler.getWarnings(), ...this.enhancedTokenizer.getErrorHandler().getWarnings() ]; return { errors: allErrors, warnings: allWarnings, errorSummary: this.getErrorSummaryDetailed(allErrors), hasErrors: allErrors.length > 0, hasCriticalErrors: allErrors.some(error => error.severity === error_handler_1.ErrorSeverity.CRITICAL) }; } getErrorSummaryDetailed(errors) { const bySeverity = { [error_handler_1.ErrorSeverity.LOW]: 0, [error_handler_1.ErrorSeverity.MEDIUM]: 0, [error_handler_1.ErrorSeverity.HIGH]: 0, [error_handler_1.ErrorSeverity.CRITICAL]: 0 }; const byCategory = { [error_handler_1.ErrorCategory.PARSING]: 0, [error_handler_1.ErrorCategory.TOKENIZATION]: 0, [error_handler_1.ErrorCategory.REGEX]: 0, [error_handler_1.ErrorCategory.STRING_HANDLING]: 0, [error_handler_1.ErrorCategory.TEMPLATE_LITERAL]: 0, [error_handler_1.ErrorCategory.COMMENT_DETECTION]: 0, [error_handler_1.ErrorCategory.PRESERVATION]: 0, [error_handler_1.ErrorCategory.FILE_PROCESSING]: 0 }; errors.forEach(error => { if (error.severity && bySeverity.hasOwnProperty(error.severity)) { bySeverity[error.severity]++; } if (error.category && byCategory.hasOwnProperty(error.category)) { byCategory[error.category]++; } }); return { total: errors.length, bySeverity, byCategory }; } } exports.AdvancedCommentRemover = AdvancedCommentRemover; //# sourceMappingURL=advanced-logic.js.map