unqommented
Version:
A Node.js utility that quickly identifies files with uncommented code in your codebase. Designed for developers who want to efficiently tell LLMs exactly which files need comments added.
448 lines (407 loc) • 17.8 kB
JavaScript
/**
* @file This file contains utility functions for common operations such as file system interaction,
* data validation, and string manipulation. Each function is designed to be robust, with clear
* error handling and documentation.
* @module utils
*/
const fs = require('fs');
const path = require('path');
const validator = require('validator');
const crypto = require('crypto'); // Used for secure random ID generation
const _ = require('lodash');
const fastGlob = require('fast-glob');
const readline = require('readline');
/**
* @function createLimiter
* @description Creates a concurrency limiter to manage the number of parallel async tasks.
* @rationale This in-house limiter was implemented to avoid introducing ESM-only dependencies
* like 'p-limit', ensuring the library remains fully CommonJS compatible. This is a strategic
* choice to maintain broader compatibility and a smaller dependency tree.
* @scalability By controlling the number of concurrent file operations, this function is critical
* for preventing resource exhaustion (e.g., file handle limits) when scanning a large number of
* files. The `CONCURRENCY_LIMIT` offers a trade-off between execution speed and system stability.
* @param {number} max - The maximum number of concurrent tasks allowed.
* @returns {Function} A function that takes an async function as input and returns a new
* function that respects the concurrency limit.
*/
function createLimiter(max) {
// Enforce that the limit is a positive integer, preventing invalid states.
if (!Number.isInteger(max) || max <= 0) {
throw new Error(localVars.ERROR_MESSAGES.LIMIT_NOT_POSITIVE_INT);
}
const queue = []; // Stores pending tasks
let activeCount = 0; // Tracks the number of currently running tasks
// The next function is the core of the limiter. It checks if a new task can be started
// and, if so, dequeues and executes it.
const next = () => {
// Do not start a new task if the limit is reached or the queue is empty.
if (activeCount >= max || queue.length === 0) return;
const { fn, resolve, reject } = queue.shift();
activeCount += 1;
// Execute the task and handle its completion, ensuring the next task is triggered.
Promise.resolve()
.then(fn)
.then(result => {
activeCount -= 1;
resolve(result);
next();
})
.catch(error => {
activeCount -= 1;
reject(error);
next();
});
};
// The returned function wraps the user's async function in a Promise, adding it to the
// queue and starting the execution loop.
return fn => {
return new Promise((resolve, reject) => {
queue.push({ fn, resolve, reject });
next();
});
};
}
const localVars = require('../config/localVars');
const { qerrors } = require('qerrors');
/**
* @function normalizePath
* @description Ensures that file paths are consistent across different operating systems.
* @rationale This function is crucial for cross-platform compatibility. It replaces both
* forward slashes and backslashes with the operating system's default separator (`path.sep`),
* preventing errors that can arise from inconsistent path formats.
* @param {string} dirPath - The file path to normalize.
* @returns {string} The normalized file path.
*/
function normalizePath(dirPath) {
return typeof dirPath === 'string'
? dirPath.replace(localVars.REGEX_PATTERNS.PATH_ALL_SLASHES, path.sep)
: dirPath;
}
/**
* @function removeQuotedStrings
* @description Removes quoted strings from a line of code.
* @rationale This is a key utility for accurately detecting comments. It prevents the code
* from misinterpreting comment markers (e.g., `//`, `/*`) that appear inside of string literals.
* @scalability While effective, this regex can be CPU-intensive on files with extremely long
* lines. This is a trade-off for improved accuracy in comment detection.
* @param {string} text - The line of code to sanitize.
* @returns {string} The line of code without quoted strings.
*/
function removeQuotedStrings(text) {
// This regex matches single, double, or backtick-quoted strings, handling escaped characters.
return text.replace(/(["'`])(?:\\.|(?!\1)[^\\])*?\1/g, '');
}
/**
* @function formatString
* @description Formats a string by capitalizing the first letter and trimming whitespace.
* @rationale This function is designed to standardize string inputs, which is crucial for
* maintaining data consistency, especially when dealing with user-provided data.
* @param {string} input - The string to format.
* @returns {string} The formatted string.
* @throws {Error} If the input is not a string or is empty.
*/
function formatString(input) {
// Type validation ensures that the function operates on the expected data type.
if (typeof input !== 'string') {
throw new Error(localVars.ERROR_MESSAGES.INPUT_NOT_STRING);
}
// Check for empty strings after trimming to handle whitespace-only inputs.
if (input.trim().length === 0) {
throw new Error(localVars.ERROR_MESSAGES.INPUT_EMPTY);
}
// Use lodash for robust capitalization and trimming.
return _.capitalize(input.trim());
}
/**
* @function validateEmail
* @description Validates an email address using the 'validator' library.
* @rationale Leveraging a well-tested library like 'validator' is a security best practice.
* It provides robust email validation, protecting the system from invalid or malicious data.
* @param {string} email - The email address to validate.
* @returns {boolean} True if the email is valid, false otherwise.
* @throws {Error} If the email parameter is not a string.
*/
function validateEmail(email) {
if (typeof email !== 'string') {
throw new Error(localVars.ERROR_MESSAGES.EMAIL_NOT_STRING);
}
return validator.isEmail(email);
}
/**
* @function generateId
* @description Generates a unique identifier using Node.js's crypto module.
* @rationale Using `crypto.randomBytes` is a secure method for generating random IDs and
* avoids ESM-only dependencies that could complicate the build or execution of the CLI.
* @param {number} [length=8] - The desired length of the ID.
* @returns {string} A random alphanumeric ID.
* @throws {Error} If the length is not a positive number.
*/
function generateId(length = 8) {
if (!Number.isInteger(length) || length <= 0) {
throw new Error(localVars.ERROR_MESSAGES.LENGTH_NOT_POSITIVE);
}
// Generate random bytes and convert to a hex string, then slice to the desired length.
return crypto.randomBytes(Math.ceil(length / 2)).toString('hex').slice(0, length);
}
/**
* @function validateDirectory
* @description Validates if a given path is a valid, existing directory.
* @rationale This is a critical prerequisite for any file system operations. It prevents
* errors and unexpected behavior by ensuring that the target directory exists and is accessible.
* @param {string} dirPath - The path to validate.
* @throws {Error} If the path is not a string, does not exist, or is not a directory.
*/
async function validateDirectory(dirPath) {
if (typeof dirPath !== 'string') {
throw new Error(localVars.ERROR_MESSAGES.BASEDIR_NOT_STRING);
}
const normalized = normalizePath(dirPath);
try {
const stats = await fs.promises.stat(normalized);
if (!stats.isDirectory()) {
throw new Error(`${localVars.ERROR_MESSAGES.PATH_NOT_DIR_PREFIX}${normalized}`);
}
} catch (error) {
// Provide a more specific error message for non-existent directories.
if (error.code === 'ENOENT') {
qerrors(error, 'validateDirectory', { dirPath: normalized });
throw new Error(`${localVars.ERROR_MESSAGES.DIR_NOT_EXIST_PREFIX}${normalized}`);
}
qerrors(error, 'validateDirectory', { dirPath: normalized });
throw error;
}
}
/**
* @function findUncommentedFiles
* @description Recursively scans a directory to find files with uncommented code. This is the core
* function of the utility, orchestrating the file discovery, processing, and result aggregation.
* @workflow
* 1. Validate the base directory.
* 2. Build glob patterns and ignore rules from `localVars`.
* 3. Use `fast-glob` to stream file paths efficiently.
* 4. For each file, create a task managed by the `createLimiter`.
* 5. The task checks if the file has uncommented code using `hasUncommentedCode`.
* 6. Results are collected, and errors are logged.
* 7. A specific retry mechanism is implemented for `EMFILE` errors.
* @param {string} baseDir - The directory to scan.
* @param {stream.Writable} [outputStream=null] - An optional stream to write results to.
* @returns {Promise<Object>} An object containing `uncommentedFiles` and `errors`.
* @throws {Error} If `baseDir` is not a valid directory.
*/
async function findUncommentedFiles(baseDir, outputStream = null) {
const normalizedDir = normalizePath(baseDir);
await validateDirectory(normalizedDir);
if (outputStream && typeof outputStream.write !== 'function') {
throw new Error(localVars.ERROR_MESSAGES.OUTPUT_STREAM_INVALID);
}
const absoluteBaseDir = path.resolve(normalizedDir);
const uncommentedFiles = [];
const errors = [];
// Use fast-glob for efficient, asynchronous file system traversal. The glob pattern
// and ignore rules are dynamically constructed from `localVars` for configurability.
const globPattern = `**/*.{${localVars.SUPPORTED_FILE_EXT_REGEX}}`;
const otherIgnores = localVars.IGNORED_DIRECTORIES
.filter(dir => dir !== 'node_modules')
.flatMap(dir => [`${dir}/**`, `**/${dir}/**`]);
const globOptions = {
cwd: absoluteBaseDir,
ignore: ['node_modules/**', '**/node_modules/**', ...otherIgnores],
onlyFiles: true,
absolute: true,
};
// The limiter ensures that file processing is done in concurrent batches, optimizing
// for speed while respecting system resource limits.
const limit = createLimiter(localVars.CONCURRENCY_LIMIT);
let tasks = [];
try {
// Process files as a stream to avoid loading the entire file list into memory.
for await (const file of fastGlob.stream(globPattern, globOptions)) {
const task = limit(async () => {
const fileName = path.basename(file);
if (localVars.IGNORED_FILENAMES_REGEX.test(fileName)) {
return null;
}
try {
const hasUncommented = await module.exports.hasUncommentedCode(file);
if (hasUncommented) {
const relative = path.relative(absoluteBaseDir, file).replace(/\\/g, '/');
// If an output stream is provided, write results directly to it. This is more
// memory-efficient than collecting results in an array.
if (outputStream) {
const ok = outputStream.write(`${relative}\n`);
if (!ok) {
// Handle backpressure by waiting for the stream to drain.
await new Promise(resolve => outputStream.once('drain', resolve));
}
return null;
}
return relative;
}
} catch (error) {
// Implement a specific retry mechanism for EMFILE errors (too many open files).
// This is a reactive strategy for robustness. The setTimeout provides a simple
// backoff, which is a trade-off for simplicity over a more complex implementation.
if (error.code === 'EMFILE') {
await new Promise(resolve => setTimeout(resolve, 100));
try {
const retry = await module.exports.hasUncommentedCode(file);
if (retry) {
const rel = path.relative(absoluteBaseDir, file).replace(/\\/g, '/');
if (outputStream) {
const ok = outputStream.write(`${rel}\n`);
if (!ok) {
await new Promise(resolve => outputStream.once('drain', resolve));
}
return null;
}
return rel;
}
return null;
} catch (err) {
errors.push({ file, error: err.message });
qerrors(err, 'findUncommentedFiles', { file });
return null;
}
}
errors.push({ file, error: error.message });
qerrors(error, 'findUncommentedFiles', { file });
}
return null;
});
tasks.push(task);
// Process tasks in batches to manage concurrency.
if (tasks.length >= localVars.CONCURRENCY_LIMIT) {
const results = await Promise.all(tasks);
if (!outputStream) {
results.forEach(result => { if (result) { uncommentedFiles.push(result); } });
}
tasks = [];
}
}
// Process any remaining tasks.
if (tasks.length > 0) {
const results = await Promise.all(tasks);
if (!outputStream) {
results.forEach(result => { if (result) { uncommentedFiles.push(result); } });
}
}
} catch (error) {
errors.push({ error: error.message });
qerrors(error, 'findUncommentedFiles', { baseDir });
}
return { uncommentedFiles, errors };
}
/**
* @function hasUncommentedCode
* @description Checks if a single file contains uncommented executable code.
* @rationale This function streams the file line-by-line using `readline` to minimize memory
* consumption, making it suitable for analyzing very large files without high RAM usage.
* @scalability The line-by-line processing and regex operations are CPU-bound. While this
* avoids memory bottlenecks, it can impact performance on a large number of files. This is a
* trade-off for memory efficiency.
* @param {string} filePath - The absolute path of the file to inspect.
* @returns {Promise<boolean>} True if uncommented code is found, otherwise false.
*/
async function hasUncommentedCode(filePath) {
return new Promise((resolve, reject) => {
const fileStream = fs.createReadStream(filePath, { encoding: localVars.FILE_ENCODING });
const rl = readline.createInterface({
input: fileStream,
crlfDelay: Infinity,
});
const ext = path.extname(filePath).toLowerCase();
// Use different comment patterns for different file types (e.g., Python uses #).
const commentRegex = ext === '.py'
? localVars.REGEX_PATTERNS.ALL_COMMENTS
: localVars.REGEX_PATTERNS.ALL_COMMENTS_NO_HASH;
let inBlockComment = false; // State to track if we are inside a multi-line comment.
let resolved = false;
// The cleanup function is crucial for resource management. It ensures that file streams
// and readline interfaces are properly closed to prevent resource leaks, especially in
// error conditions or when uncommented code is found early.
const cleanup = () => {
if (!resolved) {
resolved = true;
rl.close();
fileStream.destroy();
}
};
rl.on('line', (line) => {
try {
let originalLine = line;
let currentLine = removeQuotedStrings(originalLine).trim();
// Handle lines that are part of a multi-line block comment.
if (inBlockComment) {
const endCommentIndex = currentLine.indexOf('*/');
if (endCommentIndex !== -1) {
inBlockComment = false;
const originalEndIndex = originalLine.indexOf('*/');
if (originalEndIndex !== -1) {
originalLine = originalLine.substring(originalEndIndex + 2);
currentLine = removeQuotedStrings(originalLine).trim();
} else {
return;
}
} else {
return; // Still inside a block comment.
}
}
// Handle single-line and multi-line block comments within the same line.
while (currentLine.includes('/*')) {
const startCommentIndex = currentLine.indexOf('/*');
const endCommentIndex = currentLine.indexOf('*/', startCommentIndex + 2);
if (endCommentIndex !== -1) {
originalLine = originalLine.substring(0, startCommentIndex) + originalLine.substring(endCommentIndex + 2);
currentLine = removeQuotedStrings(originalLine).trim();
} else {
inBlockComment = true;
originalLine = originalLine.substring(0, startCommentIndex);
currentLine = removeQuotedStrings(originalLine).trim();
break;
}
}
if (inBlockComment && currentLine.length === 0) return;
// Remove single-line comments.
currentLine = currentLine.replace(commentRegex, '').trim();
// Check if the remaining line contains executable code.
if (currentLine.length > 0 &&
!localVars.REGEX_PATTERNS.CLOSING_BRACKETS.test(currentLine) &&
!localVars.STRICT_MODES.includes(currentLine) &&
!currentLine.startsWith('#!')) {
// Found uncommented code, so we can stop processing and resolve.
cleanup();
resolve(true);
}
} catch (error) {
cleanup();
reject(error);
}
});
// Handle errors from the readline interface.
rl.on('error', (error) => { if (!resolved) { cleanup(); reject(error); } });
// If the end of the file is reached without finding uncommented code, resolve to false.
rl.on('close', () => {
if (!resolved) {
cleanup();
resolve(false);
}
});
// Handle errors from the file stream itself.
fileStream.on('error', (error) => {
if (!resolved) {
cleanup();
reject(error);
}
});
});
}
module.exports = {
findUncommentedFiles,
validateDirectory,
hasUncommentedCode,
normalizePath,
formatString,
validateEmail,
generateId,
createLimiter,
};