UNPKG

unqommented

Version:

A Node.js utility that quickly identifies files with uncommented code in your codebase. Designed for developers who want to efficiently tell LLMs exactly which files need comments added.

448 lines (407 loc) • 17.8 kB

JavaScript

/** * @file This file contains utility functions for common operations such as file system interaction, * data validation, and string manipulation. Each function is designed to be robust, with clear * error handling and documentation. * @module utils */ const fs = require('fs'); const path = require('path'); const validator = require('validator'); const crypto = require('crypto'); // Used for secure random ID generation const _ = require('lodash'); const fastGlob = require('fast-glob'); const readline = require('readline'); /** * @function createLimiter * @description Creates a concurrency limiter to manage the number of parallel async tasks. * @rationale This in-house limiter was implemented to avoid introducing ESM-only dependencies * like 'p-limit', ensuring the library remains fully CommonJS compatible. This is a strategic * choice to maintain broader compatibility and a smaller dependency tree. * @scalability By controlling the number of concurrent file operations, this function is critical * for preventing resource exhaustion (e.g., file handle limits) when scanning a large number of * files. The `CONCURRENCY_LIMIT` offers a trade-off between execution speed and system stability. * @param {number} max - The maximum number of concurrent tasks allowed. * @returns {Function} A function that takes an async function as input and returns a new * function that respects the concurrency limit. */ function createLimiter(max) { // Enforce that the limit is a positive integer, preventing invalid states. if (!Number.isInteger(max) || max <= 0) { throw new Error(localVars.ERROR_MESSAGES.LIMIT_NOT_POSITIVE_INT); } const queue = []; // Stores pending tasks let activeCount = 0; // Tracks the number of currently running tasks // The next function is the core of the limiter. It checks if a new task can be started // and, if so, dequeues and executes it. const next = () => { // Do not start a new task if the limit is reached or the queue is empty. if (activeCount >= max || queue.length === 0) return; const { fn, resolve, reject } = queue.shift(); activeCount += 1; // Execute the task and handle its completion, ensuring the next task is triggered. Promise.resolve() .then(fn) .then(result => { activeCount -= 1; resolve(result); next(); }) .catch(error => { activeCount -= 1; reject(error); next(); }); }; // The returned function wraps the user's async function in a Promise, adding it to the // queue and starting the execution loop. return fn => { return new Promise((resolve, reject) => { queue.push({ fn, resolve, reject }); next(); }); }; } const localVars = require('../config/localVars'); const { qerrors } = require('qerrors'); /** * @function normalizePath * @description Ensures that file paths are consistent across different operating systems. * @rationale This function is crucial for cross-platform compatibility. It replaces both * forward slashes and backslashes with the operating system's default separator (`path.sep`), * preventing errors that can arise from inconsistent path formats. * @param {string} dirPath - The file path to normalize. * @returns {string} The normalized file path. */ function normalizePath(dirPath) { return typeof dirPath === 'string' ? dirPath.replace(localVars.REGEX_PATTERNS.PATH_ALL_SLASHES, path.sep) : dirPath; } /** * @function removeQuotedStrings * @description Removes quoted strings from a line of code. * @rationale This is a key utility for accurately detecting comments. It prevents the code * from misinterpreting comment markers (e.g., `//`, `/*`) that appear inside of string literals. * @scalability While effective, this regex can be CPU-intensive on files with extremely long * lines. This is a trade-off for improved accuracy in comment detection. * @param {string} text - The line of code to sanitize. * @returns {string} The line of code without quoted strings. */ function removeQuotedStrings(text) { // This regex matches single, double, or backtick-quoted strings, handling escaped characters. return text.replace(/(["'`])(?:\\.|(?!\1)[^\\])*?\1/g, ''); } /** * @function formatString * @description Formats a string by capitalizing the first letter and trimming whitespace. * @rationale This function is designed to standardize string inputs, which is crucial for * maintaining data consistency, especially when dealing with user-provided data. * @param {string} input - The string to format. * @returns {string} The formatted string. * @throws {Error} If the input is not a string or is empty. */ function formatString(input) { // Type validation ensures that the function operates on the expected data type. if (typeof input !== 'string') { throw new Error(localVars.ERROR_MESSAGES.INPUT_NOT_STRING); } // Check for empty strings after trimming to handle whitespace-only inputs. if (input.trim().length === 0) { throw new Error(localVars.ERROR_MESSAGES.INPUT_EMPTY); } // Use lodash for robust capitalization and trimming. return _.capitalize(input.trim()); } /** * @function validateEmail * @description Validates an email address using the 'validator' library. * @rationale Leveraging a well-tested library like 'validator' is a security best practice. * It provides robust email validation, protecting the system from invalid or malicious data. * @param {string} email - The email address to validate. * @returns {boolean} True if the email is valid, false otherwise. * @throws {Error} If the email parameter is not a string. */ function validateEmail(email) { if (typeof email !== 'string') { throw new Error(localVars.ERROR_MESSAGES.EMAIL_NOT_STRING); } return validator.isEmail(email); } /** * @function generateId * @description Generates a unique identifier using Node.js's crypto module. * @rationale Using `crypto.randomBytes` is a secure method for generating random IDs and * avoids ESM-only dependencies that could complicate the build or execution of the CLI. * @param {number} [length=8] - The desired length of the ID. * @returns {string} A random alphanumeric ID. * @throws {Error} If the length is not a positive number. */ function generateId(length = 8) { if (!Number.isInteger(length) || length <= 0) { throw new Error(localVars.ERROR_MESSAGES.LENGTH_NOT_POSITIVE); } // Generate random bytes and convert to a hex string, then slice to the desired length. return crypto.randomBytes(Math.ceil(length / 2)).toString('hex').slice(0, length); } /** * @function validateDirectory * @description Validates if a given path is a valid, existing directory. * @rationale This is a critical prerequisite for any file system operations. It prevents * errors and unexpected behavior by ensuring that the target directory exists and is accessible. * @param {string} dirPath - The path to validate. * @throws {Error} If the path is not a string, does not exist, or is not a directory. */ async function validateDirectory(dirPath) { if (typeof dirPath !== 'string') { throw new Error(localVars.ERROR_MESSAGES.BASEDIR_NOT_STRING); } const normalized = normalizePath(dirPath); try { const stats = await fs.promises.stat(normalized); if (!stats.isDirectory()) { throw new Error(`${localVars.ERROR_MESSAGES.PATH_NOT_DIR_PREFIX}${normalized}`); } } catch (error) { // Provide a more specific error message for non-existent directories. if (error.code === 'ENOENT') { qerrors(error, 'validateDirectory', { dirPath: normalized }); throw new Error(`${localVars.ERROR_MESSAGES.DIR_NOT_EXIST_PREFIX}${normalized}`); } qerrors(error, 'validateDirectory', { dirPath: normalized }); throw error; } } /** * @function findUncommentedFiles * @description Recursively scans a directory to find files with uncommented code. This is the core * function of the utility, orchestrating the file discovery, processing, and result aggregation. * @workflow * 1. Validate the base directory. * 2. Build glob patterns and ignore rules from `localVars`. * 3. Use `fast-glob` to stream file paths efficiently. * 4. For each file, create a task managed by the `createLimiter`. * 5. The task checks if the file has uncommented code using `hasUncommentedCode`. * 6. Results are collected, and errors are logged. * 7. A specific retry mechanism is implemented for `EMFILE` errors. * @param {string} baseDir - The directory to scan. * @param {stream.Writable} [outputStream=null] - An optional stream to write results to. * @returns {Promise<Object>} An object containing `uncommentedFiles` and `errors`. * @throws {Error} If `baseDir` is not a valid directory. */ async function findUncommentedFiles(baseDir, outputStream = null) { const normalizedDir = normalizePath(baseDir); await validateDirectory(normalizedDir); if (outputStream && typeof outputStream.write !== 'function') { throw new Error(localVars.ERROR_MESSAGES.OUTPUT_STREAM_INVALID); } const absoluteBaseDir = path.resolve(normalizedDir); const uncommentedFiles = []; const errors = []; // Use fast-glob for efficient, asynchronous file system traversal. The glob pattern // and ignore rules are dynamically constructed from `localVars` for configurability. const globPattern = `**/*.{${localVars.SUPPORTED_FILE_EXT_REGEX}}`; const otherIgnores = localVars.IGNORED_DIRECTORIES .filter(dir => dir !== 'node_modules') .flatMap(dir => [`${dir}/**`, `**/${dir}/**`]); const globOptions = { cwd: absoluteBaseDir, ignore: ['node_modules/**', '**/node_modules/**', ...otherIgnores], onlyFiles: true, absolute: true, }; // The limiter ensures that file processing is done in concurrent batches, optimizing // for speed while respecting system resource limits. const limit = createLimiter(localVars.CONCURRENCY_LIMIT); let tasks = []; try { // Process files as a stream to avoid loading the entire file list into memory. for await (const file of fastGlob.stream(globPattern, globOptions)) { const task = limit(async () => { const fileName = path.basename(file); if (localVars.IGNORED_FILENAMES_REGEX.test(fileName)) { return null; } try { const hasUncommented = await module.exports.hasUncommentedCode(file); if (hasUncommented) { const relative = path.relative(absoluteBaseDir, file).replace(/\\/g, '/'); // If an output stream is provided, write results directly to it. This is more // memory-efficient than collecting results in an array. if (outputStream) { const ok = outputStream.write(`${relative}\n`); if (!ok) { // Handle backpressure by waiting for the stream to drain. await new Promise(resolve => outputStream.once('drain', resolve)); } return null; } return relative; } } catch (error) { // Implement a specific retry mechanism for EMFILE errors (too many open files). // This is a reactive strategy for robustness. The setTimeout provides a simple // backoff, which is a trade-off for simplicity over a more complex implementation. if (error.code === 'EMFILE') { await new Promise(resolve => setTimeout(resolve, 100)); try { const retry = await module.exports.hasUncommentedCode(file); if (retry) { const rel = path.relative(absoluteBaseDir, file).replace(/\\/g, '/'); if (outputStream) { const ok = outputStream.write(`${rel}\n`); if (!ok) { await new Promise(resolve => outputStream.once('drain', resolve)); } return null; } return rel; } return null; } catch (err) { errors.push({ file, error: err.message }); qerrors(err, 'findUncommentedFiles', { file }); return null; } } errors.push({ file, error: error.message }); qerrors(error, 'findUncommentedFiles', { file }); } return null; }); tasks.push(task); // Process tasks in batches to manage concurrency. if (tasks.length >= localVars.CONCURRENCY_LIMIT) { const results = await Promise.all(tasks); if (!outputStream) { results.forEach(result => { if (result) { uncommentedFiles.push(result); } }); } tasks = []; } } // Process any remaining tasks. if (tasks.length > 0) { const results = await Promise.all(tasks); if (!outputStream) { results.forEach(result => { if (result) { uncommentedFiles.push(result); } }); } } } catch (error) { errors.push({ error: error.message }); qerrors(error, 'findUncommentedFiles', { baseDir }); } return { uncommentedFiles, errors }; } /** * @function hasUncommentedCode * @description Checks if a single file contains uncommented executable code. * @rationale This function streams the file line-by-line using `readline` to minimize memory * consumption, making it suitable for analyzing very large files without high RAM usage. * @scalability The line-by-line processing and regex operations are CPU-bound. While this * avoids memory bottlenecks, it can impact performance on a large number of files. This is a * trade-off for memory efficiency. * @param {string} filePath - The absolute path of the file to inspect. * @returns {Promise<boolean>} True if uncommented code is found, otherwise false. */ async function hasUncommentedCode(filePath) { return new Promise((resolve, reject) => { const fileStream = fs.createReadStream(filePath, { encoding: localVars.FILE_ENCODING }); const rl = readline.createInterface({ input: fileStream, crlfDelay: Infinity, }); const ext = path.extname(filePath).toLowerCase(); // Use different comment patterns for different file types (e.g., Python uses #). const commentRegex = ext === '.py' ? localVars.REGEX_PATTERNS.ALL_COMMENTS : localVars.REGEX_PATTERNS.ALL_COMMENTS_NO_HASH; let inBlockComment = false; // State to track if we are inside a multi-line comment. let resolved = false; // The cleanup function is crucial for resource management. It ensures that file streams // and readline interfaces are properly closed to prevent resource leaks, especially in // error conditions or when uncommented code is found early. const cleanup = () => { if (!resolved) { resolved = true; rl.close(); fileStream.destroy(); } }; rl.on('line', (line) => { try { let originalLine = line; let currentLine = removeQuotedStrings(originalLine).trim(); // Handle lines that are part of a multi-line block comment. if (inBlockComment) { const endCommentIndex = currentLine.indexOf('*/'); if (endCommentIndex !== -1) { inBlockComment = false; const originalEndIndex = originalLine.indexOf('*/'); if (originalEndIndex !== -1) { originalLine = originalLine.substring(originalEndIndex + 2); currentLine = removeQuotedStrings(originalLine).trim(); } else { return; } } else { return; // Still inside a block comment. } } // Handle single-line and multi-line block comments within the same line. while (currentLine.includes('/*')) { const startCommentIndex = currentLine.indexOf('/*'); const endCommentIndex = currentLine.indexOf('*/', startCommentIndex + 2); if (endCommentIndex !== -1) { originalLine = originalLine.substring(0, startCommentIndex) + originalLine.substring(endCommentIndex + 2); currentLine = removeQuotedStrings(originalLine).trim(); } else { inBlockComment = true; originalLine = originalLine.substring(0, startCommentIndex); currentLine = removeQuotedStrings(originalLine).trim(); break; } } if (inBlockComment && currentLine.length === 0) return; // Remove single-line comments. currentLine = currentLine.replace(commentRegex, '').trim(); // Check if the remaining line contains executable code. if (currentLine.length > 0 && !localVars.REGEX_PATTERNS.CLOSING_BRACKETS.test(currentLine) && !localVars.STRICT_MODES.includes(currentLine) && !currentLine.startsWith('#!')) { // Found uncommented code, so we can stop processing and resolve. cleanup(); resolve(true); } } catch (error) { cleanup(); reject(error); } }); // Handle errors from the readline interface. rl.on('error', (error) => { if (!resolved) { cleanup(); reject(error); } }); // If the end of the file is reached without finding uncommented code, resolve to false. rl.on('close', () => { if (!resolved) { cleanup(); resolve(false); } }); // Handle errors from the file stream itself. fileStream.on('error', (error) => { if (!resolved) { cleanup(); reject(error); } }); }); } module.exports = { findUncommentedFiles, validateDirectory, hasUncommentedCode, normalizePath, formatString, validateEmail, generateId, createLimiter, };