comment-strip-cli
Version:
A powerful CLI tool to strip comments from source code files while preserving strings and important metadata
455 lines (377 loc) • 11.6 kB
JavaScript
const { getLanguageConfig, getLanguageConfigByFilename } = require('./languages.js');
function stripComments(content, fileExtension, commentTypes = ['all'], filename = null) {
let langConfig = getLanguageConfig(fileExtension);
if (!langConfig && filename) {
langConfig = getLanguageConfigByFilename(filename);
}
if (!langConfig) {
console.warn(`Warning: Unsupported file '${filename || fileExtension}', skipping...`);
return content;
}
let result = content;
const shouldProcessAll = commentTypes.includes('all');
const typesToProcess = [];
if (shouldProcessAll) {
if (langConfig.single) typesToProcess.push('single');
if (langConfig.block) typesToProcess.push('block');
if (langConfig.hash) typesToProcess.push('hash');
if (langConfig.semicolon) typesToProcess.push('semicolon');
} else {
if (commentTypes.includes('//') && langConfig.single) typesToProcess.push('single');
if (commentTypes.includes('/* */') && langConfig.block) typesToProcess.push('block');
if (commentTypes.includes('#') && langConfig.hash) typesToProcess.push('hash');
if (commentTypes.includes(';') && langConfig.semicolon) typesToProcess.push('semicolon');
if (commentTypes.includes('--') && langConfig.single && langConfig.single.pattern === '--') typesToProcess.push('single');
}
// Special handling for Solidity - preserve SPDX license
const isSolidity = fileExtension === 'sol';
let spdxLine = null;
if (isSolidity) {
const lines = result.split('\n');
const spdxIndex = lines.findIndex(line =>
line.trim().startsWith('// SPDX-License-Identifier:')
);
if (spdxIndex !== -1) {
spdxLine = lines[spdxIndex];
}
}
// Process block comments first (multi-line)
if (typesToProcess.includes('block')) {
result = stripBlockComments(result, langConfig.block.pattern, isSolidity);
}
// Then process single-line comments
if (typesToProcess.includes('single')) {
result = stripSingleLineComments(result, langConfig.single.pattern, isSolidity);
}
// Process hash comments
if (typesToProcess.includes('hash')) {
result = stripHashComments(result);
}
// Process semicolon comments (Assembly, INI, etc.)
if (typesToProcess.includes('semicolon')) {
result = stripSemicolonComments(result);
}
// Restore SPDX license for Solidity
if (isSolidity && spdxLine) {
const lines = result.split('\n');
if (lines[0].trim() !== spdxLine.trim()) {
lines.unshift(spdxLine);
result = lines.join('\n');
}
}
// Auto-format
result = autoFormat(result, fileExtension);
return result;
}
function stripBlockComments(content, pattern, isSolidity = false) {
let result = '';
let i = 0;
let inString = false;
let stringChar = null;
let inBlockComment = false;
// Parse different block comment patterns
let startPattern, endPattern;
if (pattern === '/* */') {
startPattern = '/*';
endPattern = '*/';
} else if (pattern === '--[[ ]]') {
startPattern = '--[[';
endPattern = ']]';
} else if (pattern === '{- -}') {
startPattern = '{-';
endPattern = '-}';
} else {
return content; // Unsupported pattern
}
while (i < content.length) {
const char = content[i];
const nextChar = content[i + 1];
// Handle escape sequences in strings
if (inString && char === '\\' && nextChar) {
result += char + nextChar;
i += 2;
continue;
}
// Handle string boundaries
if (!inBlockComment && !inString && (char === '"' || char === "'" || char === '`')) {
inString = true;
stringChar = char;
result += char;
i++;
continue;
}
if (inString && char === stringChar) {
inString = false;
stringChar = null;
result += char;
i++;
continue;
}
// If we're in a string, just copy
if (inString) {
result += char;
i++;
continue;
}
// Handle block comment start
if (!inBlockComment && content.substring(i, i + startPattern.length) === startPattern) {
// Special case for Solidity: preserve SPDX if it's in a block comment
if (isSolidity && startPattern === '/*') {
const remainingContent = content.substring(i);
const endOfComment = remainingContent.indexOf('*/');
if (endOfComment !== -1) {
const commentContent = remainingContent.substring(0, endOfComment + 2);
if (commentContent.includes('SPDX-License-Identifier:')) {
result += commentContent;
i += endOfComment + 2;
continue;
}
}
}
inBlockComment = true;
i += startPattern.length;
continue;
}
// Handle block comment end
if (inBlockComment && content.substring(i, i + endPattern.length) === endPattern) {
inBlockComment = false;
i += endPattern.length;
continue;
}
// Skip characters inside block comments
if (inBlockComment) {
i++;
continue;
}
// Copy regular characters
result += char;
i++;
}
return result;
}
function stripSingleLineComments(content, pattern, isSolidity = false) {
const lines = content.split('\n');
const processedLines = [];
for (let line of lines) {
// Special case for Solidity: preserve SPDX license
if (isSolidity && line.trim().startsWith('// SPDX-License-Identifier:')) {
processedLines.push(line);
continue;
}
let result = '';
let i = 0;
let inString = false;
let stringChar = null;
while (i < line.length) {
const char = line[i];
const nextChar = line[i + 1];
// Handle escape sequences in strings
if (inString && char === '\\' && nextChar) {
result += char + nextChar;
i += 2;
continue;
}
// Handle string boundaries
if (!inString && (char === '"' || char === "'" || char === '`')) {
inString = true;
stringChar = char;
result += char;
i++;
continue;
}
if (inString && char === stringChar) {
inString = false;
stringChar = null;
result += char;
i++;
continue;
}
// If we're in a string, just copy
if (inString) {
result += char;
i++;
continue;
}
// Check for comment patterns outside strings
if (pattern === '//' && char === '/' && nextChar === '/') {
break; // Remove everything from here to end of line
} else if (pattern === '--' && char === '-' && nextChar === '-') {
break;
}
result += char;
i++;
}
processedLines.push(result.trimEnd());
}
return processedLines.join('\n');
}
function stripHashComments(content) {
const lines = content.split('\n');
const processedLines = [];
for (let line of lines) {
if (line.startsWith('#!')) {
processedLines.push(line);
continue;
}
let result = '';
let i = 0;
let inString = false;
let stringChar = null;
while (i < line.length) {
const char = line[i];
// Handle escape sequences in strings
if (inString && char === '\\' && i + 1 < line.length) {
result += char + line[i + 1];
i += 2;
continue;
}
// Handle string boundaries
if (!inString && (char === '"' || char === "'" || char === '`')) {
inString = true;
stringChar = char;
result += char;
i++;
continue;
}
if (inString && char === stringChar) {
inString = false;
stringChar = null;
result += char;
i++;
continue;
}
// If we're in a string, just copy
if (inString) {
result += char;
i++;
continue;
}
// Check for # comment outside strings
if (char === '#') {
break; // Remove everything from here to end of line
}
result += char;
i++;
}
processedLines.push(result.trimEnd());
}
return processedLines.join('\n');
}
function stripSemicolonComments(content) {
const lines = content.split('\n');
const processedLines = [];
for (let line of lines) {
let result = '';
let i = 0;
let inString = false;
let stringChar = null;
while (i < line.length) {
const char = line[i];
// Handle string boundaries
if (!inString && (char === '"' || char === "'")) {
inString = true;
stringChar = char;
result += char;
i++;
continue;
}
if (inString && char === stringChar) {
inString = false;
stringChar = null;
result += char;
i++;
continue;
}
if (inString) {
result += char;
i++;
continue;
}
// Check for ; comment
if (char === ';') {
break;
}
result += char;
i++;
}
processedLines.push(result.trimEnd());
}
return processedLines.join('\n');
}
function autoFormat(content, fileExtension) {
const lines = content.split('\n');
const formattedLines = [];
let emptyLineCount = 0;
for (let i = 0; i < lines.length; i++) {
let line = lines[i].trimEnd();
// Handle empty lines - keep max 2 consecutive empty lines
if (line.trim() === '') {
emptyLineCount++;
if (emptyLineCount <= 2) {
formattedLines.push('');
}
continue;
}
emptyLineCount = 0;
// Language-specific formatting
if (['js', 'mjs', 'jsx', 'ts', 'tsx'].includes(fileExtension)) {
line = formatJavaScript(line);
} else if (['c', 'cpp', 'cxx', 'cc', 'h', 'hpp', 'hxx'].includes(fileExtension)) {
line = formatCFamily(line);
} else if (['java', 'kt', 'kts', 'scala', 'sc', 'swift', 'dart'].includes(fileExtension)) {
line = formatCFamily(line);
} else if (['py', 'pyw'].includes(fileExtension)) {
line = formatPython(line);
} else if (['sol'].includes(fileExtension)) {
line = formatSolidity(line);
} else if (['rs'].includes(fileExtension)) {
line = formatRust(line);
} else if (['go'].includes(fileExtension)) {
line = formatGo(line);
}
formattedLines.push(line);
}
// Remove trailing empty lines
while (formattedLines.length > 0 && formattedLines[formattedLines.length - 1].trim() === '') {
formattedLines.pop();
}
return formattedLines.join('\n') + (formattedLines.length > 0 ? '\n' : '');
}
function formatJavaScript(line) {
return line
.replace(/\s*{\s*$/, ' {')
.replace(/}\s*else\s*{/g, '} else {')
.replace(/\bif\(/g, 'if (')
.replace(/\bfor\(/g, 'for (')
.replace(/\bwhile\(/g, 'while (');
}
function formatCFamily(line) {
return line
.replace(/\s*{\s*$/, ' {')
.replace(/}\s*else\s*{/g, '} else {')
.replace(/\bif\(/g, 'if (')
.replace(/\bfor\(/g, 'for (')
.replace(/\bwhile\(/g, 'while (');
}
function formatPython(line) {
return line.replace(/\s+$/, '');
}
function formatSolidity(line) {
return formatCFamily(line)
.replace(/\bfunction\s+/g, 'function ')
.replace(/\bmodifier\s+/g, 'modifier ');
}
function formatRust(line) {
return line
.replace(/\s*{\s*$/, ' {')
.replace(/}\s*else\s*{/g, '} else {')
.replace(/\bfn\s+/g, 'fn ')
.replace(/\blet\s+/g, 'let ');
}
function formatGo(line) {
return line
.replace(/\s*{\s*$/, ' {')
.replace(/\bif\s+/g, 'if ')
.replace(/\bfor\s+/g, 'for ');
}
module.exports = { stripComments };