UNPKG

@drmhse/remove-comments

Version:

A robust, zero-dependency CLI tool for removing comments from source code using token-based state machine parsing. Supports JavaScript, TypeScript, Java, Kotlin, Python, Vue, React, and Dart with advanced template literal processing.

553 lines (489 loc) 19.5 kB
#!/usr/bin/env node const fs = require('fs').promises; const path = require('path'); // Zero-dependency, superior comment removal implementation // Based on token-based state machine with enhanced template literal support const STATES = { CODE: 'code', STRING_SINGLE: 'string_single', STRING_DOUBLE: 'string_double', STRING_TEMPLATE: 'string_template', TEMPLATE_EXPR: 'template_expr', REGEX: 'regex', LINE_COMMENT: 'line_comment', BLOCK_COMMENT: 'block_comment', ESCAPE: 'escape' }; // Enhanced JavaScript/TypeScript processor with proper template literal handling function processJavaScriptFamily(content) { let result = ''; let state = STATES.CODE; let i = 0; let braceDepth = 0; let regexAllowed = true; // Track if regex is allowed in current context let previousStringState = null; // Track which string context we were in let previousCodeState = STATES.CODE; // Track which code context to return to after comments let isEscaped = false; while (i < content.length) { const char = content[i]; const next = content[i + 1]; // Check if current character is escaped isEscaped = (i > 0 && content[i - 1] === '\\' && !isEscaped); // Handle escape sequences if (state === STATES.ESCAPE) { result += char; state = previousStringState || STATES.CODE; // Return to previous string context i++; continue; } // Handle backslash escaping if (char === '\\' && (state === STATES.STRING_SINGLE || state === STATES.STRING_DOUBLE || state === STATES.STRING_TEMPLATE || state === STATES.REGEX)) { result += char; previousStringState = state; state = STATES.ESCAPE; i++; continue; } switch (state) { case STATES.CODE: if (char === "'") { state = STATES.STRING_SINGLE; previousStringState = STATES.CODE; result += char; } else if (char === '"') { state = STATES.STRING_DOUBLE; previousStringState = STATES.CODE; result += char; } else if (char === '`') { state = STATES.STRING_TEMPLATE; result += char; } else if (char === '/' && next === '/') { state = STATES.LINE_COMMENT; previousCodeState = STATES.CODE; i++; // Skip the second / } else if (char === '/' && next === '*') { state = STATES.BLOCK_COMMENT; previousCodeState = STATES.CODE; i++; // Skip the * } else if (char === '/' && isRegexContext(content, i)) { state = STATES.REGEX; result += char; } else { result += char; } break; case STATES.STRING_SINGLE: result += char; if (char === "'" && !isEscaped) { state = previousStringState || STATES.CODE; previousStringState = null; } break; case STATES.STRING_DOUBLE: result += char; if (char === '"' && !isEscaped) { state = previousStringState || STATES.CODE; previousStringState = null; } break; case STATES.STRING_TEMPLATE: result += char; if (char === '`') { state = STATES.CODE; } else if (char === '$' && next === '{') { state = STATES.TEMPLATE_EXPR; braceDepth = 1; result += next; i++; // Skip the { } break; case STATES.TEMPLATE_EXPR: // Template expressions are JavaScript contexts - handle like CODE but track braces if (char === '{') { braceDepth++; result += char; } else if (char === '}') { braceDepth--; result += char; if (braceDepth === 0) { state = STATES.STRING_TEMPLATE; } } else if (char === "'" && !isEscaped) { state = STATES.STRING_SINGLE; previousStringState = STATES.TEMPLATE_EXPR; result += char; } else if (char === '"' && !isEscaped) { state = STATES.STRING_DOUBLE; previousStringState = STATES.TEMPLATE_EXPR; result += char; } else if (char === '`' && !isEscaped) { state = STATES.STRING_TEMPLATE; previousStringState = STATES.TEMPLATE_EXPR; result += char; } else if (char === '/' && next === '/' && !isEscaped) { state = STATES.LINE_COMMENT; previousCodeState = STATES.TEMPLATE_EXPR; i++; // Skip the second / // Don't add comment to result } else if (char === '/' && next === '*' && !isEscaped) { state = STATES.BLOCK_COMMENT; previousCodeState = STATES.TEMPLATE_EXPR; i++; // Skip the * // Don't add comment to result } else if (char === '/' && !isEscaped && isRegexContext(content, i)) { state = STATES.REGEX; result += char; } else { result += char; } break; case STATES.REGEX: result += char; if (char === '/') { // End of regex - look for flags while (i + 1 < content.length && /[gimsuyx]/.test(content[i + 1])) { i++; result += content[i]; } state = STATES.CODE; } break; case STATES.LINE_COMMENT: if (char === '\n' || char === '\r') { result += char; state = previousCodeState; previousCodeState = STATES.CODE; } // Skip comment content break; case STATES.BLOCK_COMMENT: if (char === '*' && next === '/') { state = previousCodeState; previousCodeState = STATES.CODE; i++; // Skip the / } else if (char === '\n') { result += char; // Preserve newlines } // Skip comment content break; } i++; } return result; } // Helper functions function getPreviousState(currentState) { // Return to appropriate string state after escape return STATES.CODE; // Simplified for now } function isRegexContext(content, pos) { // Look backwards to determine if this is likely a regex let j = pos - 1; while (j >= 0 && /\s/.test(content[j])) j--; // Skip whitespace if (j < 0) return true; const prevChar = content[j]; // Regex likely after these tokens return /[=,({\[;:!&|?+\-*%^~<>]/.test(prevChar) || (j >= 5 && content.substring(j-5, j+1) === 'return') || (j >= 2 && content.substring(j-2, j+1) === 'new'); } function updateRegexAllowed(char, current) { // Update whether regex is allowed based on previous character if (/[)\]}\w]/.test(char)) return false; // After identifier, ), ], } if (/[=,({\[;:!&|?+\-*%^~<>]/.test(char)) return true; return current; } // Processors using the new implementation const JS_TS_PROCESSOR = { process: processJavaScriptFamily }; const DART_PROCESSOR = { process: processJavaScriptFamily }; const REACT_PROCESSOR = { process: (content) => { // First process JSX comments {/* */} content = content.replace(/\{\s*\/\*[\s\S]*?\*\/\s*\}/g, (match) => { return '\n'.repeat((match.match(/\n/g) || []).length); }); // Then process regular JavaScript return processJavaScriptFamily(content); } }; // Simple Java/Kotlin processor (C-style comments) function processJavaFamily(content) { let result = ''; let state = STATES.CODE; let i = 0; let previousStringState = null; let isEscaped = false; while (i < content.length) { const char = content[i]; const next = content[i + 1]; // Check if current character is escaped isEscaped = (i > 0 && content[i - 1] === '\\' && !isEscaped); // Handle escape sequences if (state === STATES.ESCAPE) { result += char; state = previousStringState || STATES.CODE; previousStringState = null; i++; continue; } // Handle backslash escaping if (char === '\\' && (state === STATES.STRING_SINGLE || state === STATES.STRING_DOUBLE)) { result += char; previousStringState = state; state = STATES.ESCAPE; i++; continue; } switch (state) { case STATES.CODE: if (char === "'") { state = STATES.STRING_SINGLE; result += char; } else if (char === '"') { state = STATES.STRING_DOUBLE; result += char; } else if (char === '/' && next === '/') { state = STATES.LINE_COMMENT; i++; // Skip second / } else if (char === '/' && next === '*') { state = STATES.BLOCK_COMMENT; i++; // Skip * } else { result += char; } break; case STATES.STRING_SINGLE: result += char; if (char === "'" && !isEscaped) state = STATES.CODE; break; case STATES.STRING_DOUBLE: result += char; if (char === '"' && !isEscaped) state = STATES.CODE; break; case STATES.LINE_COMMENT: if (char === '\n' || char === '\r') { result += char; state = STATES.CODE; } break; case STATES.BLOCK_COMMENT: if (char === '*' && next === '/') { state = STATES.CODE; i++; // Skip / } else if (char === '\n') { result += char; } break; } i++; } return result; } const JAVA_KOTLIN_PROCESSOR = { process: processJavaFamily }; // Python processor - keep our working regex-based approach as strip-comments is too aggressive const PYTHON_PROCESSOR = { process: (content) => { const regex = new RegExp([ /("""[\s\S]*?""")/, // Triple double quotes /('''[\s\S]*?''')/, // Triple single quotes /("(?:[^"\\]|\\.)*")/, // Double quoted strings /('(?:[^'\\]|\\.)*')/, // Single quoted strings /(#[^\r\n]*)/ // Hash comments ].map(r => r.source).join('|'), 'g'); return content.replace(regex, (match, tripleDouble, tripleSingle, doubleQuote, singleQuote, hashComment) => { // Keep all string literals (including docstrings) if (tripleDouble || tripleSingle || doubleQuote || singleQuote) return match; // Remove hash comments if (hashComment) return ''; return match; }); } }; const LANGUAGE_PROCESSORS = { '.js': JS_TS_PROCESSOR, '.ts': JS_TS_PROCESSOR, '.dart': DART_PROCESSOR, '.vue': JS_TS_PROCESSOR, // Vue script sections use JavaScript '.jsx': REACT_PROCESSOR, '.tsx': REACT_PROCESSOR, '.java': JAVA_KOTLIN_PROCESSOR, '.kt': JAVA_KOTLIN_PROCESSOR, '.kts': JAVA_KOTLIN_PROCESSOR, '.py': PYTHON_PROCESSOR, }; const TARGET_EXTENSIONS = new Set(Object.keys(LANGUAGE_PROCESSORS)); // Vue processor - only processes <script> sections const VUE_PROCESSOR = { process: (content) => { return content.replace(/(<script[^>]*>)([\s\S]*?)(<\/script>)/g, (match, open, script, close) => { const cleanedScript = JS_TS_PROCESSOR.process(script); return `${open}${cleanedScript}${close}`; }); } }; function cleanFileContent(content, filePath) { const extension = path.extname(filePath); if (extension === '.vue') { return VUE_PROCESSOR.process(content); } const processor = LANGUAGE_PROCESSORS[extension]; if (processor) { return processor.process(content); } return content; } function gitignorePatternToRegex(pattern) { pattern = pattern.trim(); if (pattern === '' || pattern.startsWith('#')) return null; const isNegated = pattern.startsWith('!'); if (isNegated) pattern = pattern.slice(1); const isAnchored = pattern.startsWith('/'); if (isAnchored) pattern = pattern.slice(1); const mustBeDir = pattern.endsWith('/'); if (mustBeDir) pattern = pattern.slice(0, -1); let regexString = pattern .replace(/[.+?^${}()|[\]\\]/g, '\\$&') .replace(/\*\*/g, '.*') .replace(/\*/g, '[^/]*'); if (isAnchored || pattern.includes('/')) { regexString = `^${regexString}`; } else { regexString = `(^|/)${regexString}`; } if (mustBeDir) { regexString += '(/|$)'; } else { regexString += '($|/)'; } return { regex: new RegExp(regexString), isNegated }; } async function createHierarchicalFilter(directory, rootDir, parentIsIgnored) { const gitignorePath = path.join(directory, '.gitignore'); let localRules = []; try { const gitignoreContent = await fs.readFile(gitignorePath, 'utf8'); localRules = gitignoreContent .split(/\r?\n/) .map(gitignorePatternToRegex) .filter(rule => rule !== null); } catch (error) { if (error.code !== 'ENOENT') { console.error(`[ERROR] Could not read .gitignore in ${directory}:`, error); } } return (relativePathFromRoot) => { if (parentIsIgnored(relativePathFromRoot)) { return true; } const fullPath = path.join(rootDir, relativePathFromRoot); const pathRelativeToHere = path.relative(directory, fullPath); if (pathRelativeToHere === '') { return false; } const normalizedPath = pathRelativeToHere.replace(/\\/g, '/'); let isLocallyIgnored = false; for (const rule of localRules) { if (rule.regex.test(normalizedPath)) { isLocallyIgnored = !rule.isNegated; } } return isLocallyIgnored; }; } // --- Core File System & Execution Logic --- const BASE_EXCLUSIONS = ['node_modules/', 'dist/', '.git/', '.vscode/']; async function processFile(filePath, isDryRun) { try { const originalContent = await fs.readFile(filePath, 'utf8'); const cleanedContent = cleanFileContent(originalContent, filePath); if (originalContent !== cleanedContent) { if (isDryRun) { console.log(`[DRY RUN] Would clean: ${filePath}`); } else { await fs.writeFile(filePath, cleanedContent, 'utf8'); console.log(`[CLEANED] ${filePath}`); } } } catch (error) { console.error(`[ERROR] Failed to process ${filePath}:`, error); } } async function walkAndClean(directory, rootDir, parentIsIgnored, isDryRun) { const isIgnored = await createHierarchicalFilter(directory, rootDir, parentIsIgnored); try { const entries = await fs.readdir(directory, { withFileTypes: true }); for (const entry of entries) { const fullPath = path.join(directory, entry.name); const relativePathFromRoot = path.relative(rootDir, fullPath); if (isIgnored(relativePathFromRoot)) { continue; } if (entry.isDirectory()) { await walkAndClean(fullPath, rootDir, isIgnored, isDryRun); } else if (TARGET_EXTENSIONS.has(path.extname(entry.name))) { await processFile(fullPath, isDryRun); } } } catch (error) { if (error.code !== 'ENOENT') { console.error(`[ERROR] Could not read directory ${directory}:`, error); } } } async function main() { console.log('Starting comment cleanup...'); const args = process.argv.slice(2); const isDryRun = args.includes('--dry-run'); let targetPath = args.find(arg => !arg.startsWith('--')) || '.'; targetPath = path.resolve(targetPath); console.log(`Target: ${targetPath}`); if (isDryRun) { console.log('--- DRY RUN MODE: No files will be changed. ---'); } try { const stats = await fs.stat(targetPath); if (stats.isDirectory()) { console.log('Target is a directory. Starting recursive scan...'); const baseRules = BASE_EXCLUSIONS.map(gitignorePatternToRegex).filter(r => r !== null); const baseIsIgnored = (relativePath) => { const normalizedPath = relativePath.replace(/\\/g, '/'); for (const rule of baseRules) { if (rule.regex.test(normalizedPath)) { return true; } } return false; }; await walkAndClean(targetPath, targetPath, baseIsIgnored, isDryRun); } else if (stats.isFile()) { console.log('Target is a single file.'); if (TARGET_EXTENSIONS.has(path.extname(targetPath))) { await processFile(targetPath, isDryRun); } else { console.log(`[SKIPPED] File type (${path.extname(targetPath)}) is not supported.`); } } else { console.error(`[ERROR] Unsupported path type for: ${targetPath}`); } } catch (error) { if (error.code === 'ENOENT') { console.error(`[ERROR] Path not found: ${targetPath}`); } else { throw error; } } console.log('Cleanup complete.'); } // Only run main if this file is executed directly (not imported) if (require.main === module) { main().catch(err => { console.error('An unexpected error occurred:', err); }); } // Export for testing module.exports = { cleanFileContent, LANGUAGE_PROCESSORS, VUE_PROCESSOR };