UNPKG

unqommented

Version:

A Node.js utility that quickly identifies files with uncommented code in your codebase. Designed for developers who want to efficiently tell LLMs exactly which files need comments added.

448 lines (407 loc) 17.8 kB
/** * @file This file contains utility functions for common operations such as file system interaction, * data validation, and string manipulation. Each function is designed to be robust, with clear * error handling and documentation. * @module utils */ const fs = require('fs'); const path = require('path'); const validator = require('validator'); const crypto = require('crypto'); // Used for secure random ID generation const _ = require('lodash'); const fastGlob = require('fast-glob'); const readline = require('readline'); /** * @function createLimiter * @description Creates a concurrency limiter to manage the number of parallel async tasks. * @rationale This in-house limiter was implemented to avoid introducing ESM-only dependencies * like 'p-limit', ensuring the library remains fully CommonJS compatible. This is a strategic * choice to maintain broader compatibility and a smaller dependency tree. * @scalability By controlling the number of concurrent file operations, this function is critical * for preventing resource exhaustion (e.g., file handle limits) when scanning a large number of * files. The `CONCURRENCY_LIMIT` offers a trade-off between execution speed and system stability. * @param {number} max - The maximum number of concurrent tasks allowed. * @returns {Function} A function that takes an async function as input and returns a new * function that respects the concurrency limit. */ function createLimiter(max) { // Enforce that the limit is a positive integer, preventing invalid states. if (!Number.isInteger(max) || max <= 0) { throw new Error(localVars.ERROR_MESSAGES.LIMIT_NOT_POSITIVE_INT); } const queue = []; // Stores pending tasks let activeCount = 0; // Tracks the number of currently running tasks // The next function is the core of the limiter. It checks if a new task can be started // and, if so, dequeues and executes it. const next = () => { // Do not start a new task if the limit is reached or the queue is empty. if (activeCount >= max || queue.length === 0) return; const { fn, resolve, reject } = queue.shift(); activeCount += 1; // Execute the task and handle its completion, ensuring the next task is triggered. Promise.resolve() .then(fn) .then(result => { activeCount -= 1; resolve(result); next(); }) .catch(error => { activeCount -= 1; reject(error); next(); }); }; // The returned function wraps the user's async function in a Promise, adding it to the // queue and starting the execution loop. return fn => { return new Promise((resolve, reject) => { queue.push({ fn, resolve, reject }); next(); }); }; } const localVars = require('../config/localVars'); const { qerrors } = require('qerrors'); /** * @function normalizePath * @description Ensures that file paths are consistent across different operating systems. * @rationale This function is crucial for cross-platform compatibility. It replaces both * forward slashes and backslashes with the operating system's default separator (`path.sep`), * preventing errors that can arise from inconsistent path formats. * @param {string} dirPath - The file path to normalize. * @returns {string} The normalized file path. */ function normalizePath(dirPath) { return typeof dirPath === 'string' ? dirPath.replace(localVars.REGEX_PATTERNS.PATH_ALL_SLASHES, path.sep) : dirPath; } /** * @function removeQuotedStrings * @description Removes quoted strings from a line of code. * @rationale This is a key utility for accurately detecting comments. It prevents the code * from misinterpreting comment markers (e.g., `//`, `/*`) that appear inside of string literals. * @scalability While effective, this regex can be CPU-intensive on files with extremely long * lines. This is a trade-off for improved accuracy in comment detection. * @param {string} text - The line of code to sanitize. * @returns {string} The line of code without quoted strings. */ function removeQuotedStrings(text) { // This regex matches single, double, or backtick-quoted strings, handling escaped characters. return text.replace(/(["'`])(?:\\.|(?!\1)[^\\])*?\1/g, ''); } /** * @function formatString * @description Formats a string by capitalizing the first letter and trimming whitespace. * @rationale This function is designed to standardize string inputs, which is crucial for * maintaining data consistency, especially when dealing with user-provided data. * @param {string} input - The string to format. * @returns {string} The formatted string. * @throws {Error} If the input is not a string or is empty. */ function formatString(input) { // Type validation ensures that the function operates on the expected data type. if (typeof input !== 'string') { throw new Error(localVars.ERROR_MESSAGES.INPUT_NOT_STRING); } // Check for empty strings after trimming to handle whitespace-only inputs. if (input.trim().length === 0) { throw new Error(localVars.ERROR_MESSAGES.INPUT_EMPTY); } // Use lodash for robust capitalization and trimming. return _.capitalize(input.trim()); } /** * @function validateEmail * @description Validates an email address using the 'validator' library. * @rationale Leveraging a well-tested library like 'validator' is a security best practice. * It provides robust email validation, protecting the system from invalid or malicious data. * @param {string} email - The email address to validate. * @returns {boolean} True if the email is valid, false otherwise. * @throws {Error} If the email parameter is not a string. */ function validateEmail(email) { if (typeof email !== 'string') { throw new Error(localVars.ERROR_MESSAGES.EMAIL_NOT_STRING); } return validator.isEmail(email); } /** * @function generateId * @description Generates a unique identifier using Node.js's crypto module. * @rationale Using `crypto.randomBytes` is a secure method for generating random IDs and * avoids ESM-only dependencies that could complicate the build or execution of the CLI. * @param {number} [length=8] - The desired length of the ID. * @returns {string} A random alphanumeric ID. * @throws {Error} If the length is not a positive number. */ function generateId(length = 8) { if (!Number.isInteger(length) || length <= 0) { throw new Error(localVars.ERROR_MESSAGES.LENGTH_NOT_POSITIVE); } // Generate random bytes and convert to a hex string, then slice to the desired length. return crypto.randomBytes(Math.ceil(length / 2)).toString('hex').slice(0, length); } /** * @function validateDirectory * @description Validates if a given path is a valid, existing directory. * @rationale This is a critical prerequisite for any file system operations. It prevents * errors and unexpected behavior by ensuring that the target directory exists and is accessible. * @param {string} dirPath - The path to validate. * @throws {Error} If the path is not a string, does not exist, or is not a directory. */ async function validateDirectory(dirPath) { if (typeof dirPath !== 'string') { throw new Error(localVars.ERROR_MESSAGES.BASEDIR_NOT_STRING); } const normalized = normalizePath(dirPath); try { const stats = await fs.promises.stat(normalized); if (!stats.isDirectory()) { throw new Error(`${localVars.ERROR_MESSAGES.PATH_NOT_DIR_PREFIX}${normalized}`); } } catch (error) { // Provide a more specific error message for non-existent directories. if (error.code === 'ENOENT') { qerrors(error, 'validateDirectory', { dirPath: normalized }); throw new Error(`${localVars.ERROR_MESSAGES.DIR_NOT_EXIST_PREFIX}${normalized}`); } qerrors(error, 'validateDirectory', { dirPath: normalized }); throw error; } } /** * @function findUncommentedFiles * @description Recursively scans a directory to find files with uncommented code. This is the core * function of the utility, orchestrating the file discovery, processing, and result aggregation. * @workflow * 1. Validate the base directory. * 2. Build glob patterns and ignore rules from `localVars`. * 3. Use `fast-glob` to stream file paths efficiently. * 4. For each file, create a task managed by the `createLimiter`. * 5. The task checks if the file has uncommented code using `hasUncommentedCode`. * 6. Results are collected, and errors are logged. * 7. A specific retry mechanism is implemented for `EMFILE` errors. * @param {string} baseDir - The directory to scan. * @param {stream.Writable} [outputStream=null] - An optional stream to write results to. * @returns {Promise<Object>} An object containing `uncommentedFiles` and `errors`. * @throws {Error} If `baseDir` is not a valid directory. */ async function findUncommentedFiles(baseDir, outputStream = null) { const normalizedDir = normalizePath(baseDir); await validateDirectory(normalizedDir); if (outputStream && typeof outputStream.write !== 'function') { throw new Error(localVars.ERROR_MESSAGES.OUTPUT_STREAM_INVALID); } const absoluteBaseDir = path.resolve(normalizedDir); const uncommentedFiles = []; const errors = []; // Use fast-glob for efficient, asynchronous file system traversal. The glob pattern // and ignore rules are dynamically constructed from `localVars` for configurability. const globPattern = `**/*.{${localVars.SUPPORTED_FILE_EXT_REGEX}}`; const otherIgnores = localVars.IGNORED_DIRECTORIES .filter(dir => dir !== 'node_modules') .flatMap(dir => [`${dir}/**`, `**/${dir}/**`]); const globOptions = { cwd: absoluteBaseDir, ignore: ['node_modules/**', '**/node_modules/**', ...otherIgnores], onlyFiles: true, absolute: true, }; // The limiter ensures that file processing is done in concurrent batches, optimizing // for speed while respecting system resource limits. const limit = createLimiter(localVars.CONCURRENCY_LIMIT); let tasks = []; try { // Process files as a stream to avoid loading the entire file list into memory. for await (const file of fastGlob.stream(globPattern, globOptions)) { const task = limit(async () => { const fileName = path.basename(file); if (localVars.IGNORED_FILENAMES_REGEX.test(fileName)) { return null; } try { const hasUncommented = await module.exports.hasUncommentedCode(file); if (hasUncommented) { const relative = path.relative(absoluteBaseDir, file).replace(/\\/g, '/'); // If an output stream is provided, write results directly to it. This is more // memory-efficient than collecting results in an array. if (outputStream) { const ok = outputStream.write(`${relative}\n`); if (!ok) { // Handle backpressure by waiting for the stream to drain. await new Promise(resolve => outputStream.once('drain', resolve)); } return null; } return relative; } } catch (error) { // Implement a specific retry mechanism for EMFILE errors (too many open files). // This is a reactive strategy for robustness. The setTimeout provides a simple // backoff, which is a trade-off for simplicity over a more complex implementation. if (error.code === 'EMFILE') { await new Promise(resolve => setTimeout(resolve, 100)); try { const retry = await module.exports.hasUncommentedCode(file); if (retry) { const rel = path.relative(absoluteBaseDir, file).replace(/\\/g, '/'); if (outputStream) { const ok = outputStream.write(`${rel}\n`); if (!ok) { await new Promise(resolve => outputStream.once('drain', resolve)); } return null; } return rel; } return null; } catch (err) { errors.push({ file, error: err.message }); qerrors(err, 'findUncommentedFiles', { file }); return null; } } errors.push({ file, error: error.message }); qerrors(error, 'findUncommentedFiles', { file }); } return null; }); tasks.push(task); // Process tasks in batches to manage concurrency. if (tasks.length >= localVars.CONCURRENCY_LIMIT) { const results = await Promise.all(tasks); if (!outputStream) { results.forEach(result => { if (result) { uncommentedFiles.push(result); } }); } tasks = []; } } // Process any remaining tasks. if (tasks.length > 0) { const results = await Promise.all(tasks); if (!outputStream) { results.forEach(result => { if (result) { uncommentedFiles.push(result); } }); } } } catch (error) { errors.push({ error: error.message }); qerrors(error, 'findUncommentedFiles', { baseDir }); } return { uncommentedFiles, errors }; } /** * @function hasUncommentedCode * @description Checks if a single file contains uncommented executable code. * @rationale This function streams the file line-by-line using `readline` to minimize memory * consumption, making it suitable for analyzing very large files without high RAM usage. * @scalability The line-by-line processing and regex operations are CPU-bound. While this * avoids memory bottlenecks, it can impact performance on a large number of files. This is a * trade-off for memory efficiency. * @param {string} filePath - The absolute path of the file to inspect. * @returns {Promise<boolean>} True if uncommented code is found, otherwise false. */ async function hasUncommentedCode(filePath) { return new Promise((resolve, reject) => { const fileStream = fs.createReadStream(filePath, { encoding: localVars.FILE_ENCODING }); const rl = readline.createInterface({ input: fileStream, crlfDelay: Infinity, }); const ext = path.extname(filePath).toLowerCase(); // Use different comment patterns for different file types (e.g., Python uses #). const commentRegex = ext === '.py' ? localVars.REGEX_PATTERNS.ALL_COMMENTS : localVars.REGEX_PATTERNS.ALL_COMMENTS_NO_HASH; let inBlockComment = false; // State to track if we are inside a multi-line comment. let resolved = false; // The cleanup function is crucial for resource management. It ensures that file streams // and readline interfaces are properly closed to prevent resource leaks, especially in // error conditions or when uncommented code is found early. const cleanup = () => { if (!resolved) { resolved = true; rl.close(); fileStream.destroy(); } }; rl.on('line', (line) => { try { let originalLine = line; let currentLine = removeQuotedStrings(originalLine).trim(); // Handle lines that are part of a multi-line block comment. if (inBlockComment) { const endCommentIndex = currentLine.indexOf('*/'); if (endCommentIndex !== -1) { inBlockComment = false; const originalEndIndex = originalLine.indexOf('*/'); if (originalEndIndex !== -1) { originalLine = originalLine.substring(originalEndIndex + 2); currentLine = removeQuotedStrings(originalLine).trim(); } else { return; } } else { return; // Still inside a block comment. } } // Handle single-line and multi-line block comments within the same line. while (currentLine.includes('/*')) { const startCommentIndex = currentLine.indexOf('/*'); const endCommentIndex = currentLine.indexOf('*/', startCommentIndex + 2); if (endCommentIndex !== -1) { originalLine = originalLine.substring(0, startCommentIndex) + originalLine.substring(endCommentIndex + 2); currentLine = removeQuotedStrings(originalLine).trim(); } else { inBlockComment = true; originalLine = originalLine.substring(0, startCommentIndex); currentLine = removeQuotedStrings(originalLine).trim(); break; } } if (inBlockComment && currentLine.length === 0) return; // Remove single-line comments. currentLine = currentLine.replace(commentRegex, '').trim(); // Check if the remaining line contains executable code. if (currentLine.length > 0 && !localVars.REGEX_PATTERNS.CLOSING_BRACKETS.test(currentLine) && !localVars.STRICT_MODES.includes(currentLine) && !currentLine.startsWith('#!')) { // Found uncommented code, so we can stop processing and resolve. cleanup(); resolve(true); } } catch (error) { cleanup(); reject(error); } }); // Handle errors from the readline interface. rl.on('error', (error) => { if (!resolved) { cleanup(); reject(error); } }); // If the end of the file is reached without finding uncommented code, resolve to false. rl.on('close', () => { if (!resolved) { cleanup(); resolve(false); } }); // Handle errors from the file stream itself. fileStream.on('error', (error) => { if (!resolved) { cleanup(); reject(error); } }); }); } module.exports = { findUncommentedFiles, validateDirectory, hasUncommentedCode, normalizePath, formatString, validateEmail, generateId, createLimiter, };