UNPKG

unqommented

Version:

A Node.js utility that quickly identifies files with uncommented code in your codebase. Designed for developers who want to efficiently tell LLMs exactly which files need comments added.

279 lines (251 loc) • 11.2 kB

JavaScript

/** * @file Core code analysis engine for uncommented code detection * @description This module implements the heart of the unqommented tool - sophisticated * static analysis to identify executable code lines that lack documentation comments. * @rationale The analysis engine uses streaming file processing and regex-based parsing * to handle large codebases efficiently while maintaining accuracy across multiple * programming languages and comment styles. * @performance_considerations Uses readline streaming to minimize memory footprint, * making it suitable for analyzing very large files without consuming excessive RAM. * @module code-analyzer */ const fs = require('fs'); const path = require('path'); const readline = require('readline'); const fastGlob = require('fast-glob'); const localVars = require('../config/localVars'); const { qerrors } = require('qerrors'); const { normalizePath, validateDirectory } = require('./file-utils'); const { removeQuotedStrings } = require('./string-utils'); const { createLimiter } = require('./concurrency-utils'); /** * @function hasUncommentedCode * @description Checks if a single file contains uncommented executable code. * @rationale This function streams the file line-by-line using `readline` to minimize memory * consumption, making it suitable for analyzing very large files without high RAM usage. * @scalability The line-by-line processing and regex operations are CPU-bound. While this * avoids memory bottlenecks, it can impact performance on a large number of files. This is a * trade-off for memory efficiency. * @param {string} filePath - The absolute path of the file to inspect. * @returns {Promise<boolean>} True if uncommented code is found, otherwise false. */ async function hasUncommentedCode(filePath) { return new Promise((resolve, reject) => { const fileStream = fs.createReadStream(filePath, { encoding: localVars.FILE_ENCODING }); const rl = readline.createInterface({ input: fileStream, crlfDelay: Infinity, }); const ext = path.extname(filePath).toLowerCase(); // Use different comment patterns for different file types (e.g., Python uses #). const commentRegex = ext === '.py' ? localVars.REGEX_PATTERNS.ALL_COMMENTS : localVars.REGEX_PATTERNS.ALL_COMMENTS_NO_HASH; let inBlockComment = false; // State to track if we are inside a multi-line comment. let resolved = false; // The cleanup function is crucial for resource management. It ensures that file streams // and readline interfaces are properly closed to prevent resource leaks, especially in // error conditions or when uncommented code is found early. const cleanup = () => { if (!resolved) { resolved = true; rl.close(); fileStream.destroy(); } }; rl.on('line', (line) => { try { let originalLine = line; let currentLine = removeQuotedStrings(originalLine).trim(); // Handle lines that are part of a multi-line block comment. if (inBlockComment) { const endCommentIndex = currentLine.indexOf('*/'); if (endCommentIndex !== -1) { inBlockComment = false; const originalEndIndex = originalLine.indexOf('*/'); if (originalEndIndex !== -1) { originalLine = originalLine.substring(originalEndIndex + 2); currentLine = removeQuotedStrings(originalLine).trim(); } else { return; } } else { return; // Still inside a block comment. } } // Handle single-line and multi-line block comments within the same line. while (currentLine.includes('/*')) { const startCommentIndex = currentLine.indexOf('/*'); const endCommentIndex = currentLine.indexOf('*/', startCommentIndex + 2); if (endCommentIndex !== -1) { originalLine = originalLine.substring(0, startCommentIndex) + originalLine.substring(endCommentIndex + 2); currentLine = removeQuotedStrings(originalLine).trim(); } else { inBlockComment = true; originalLine = originalLine.substring(0, startCommentIndex); currentLine = removeQuotedStrings(originalLine).trim(); break; } } if (inBlockComment && currentLine.length === 0) return; // Remove single-line comments. currentLine = currentLine.replace(commentRegex, '').trim(); // Check if the remaining line contains executable code. if (currentLine.length > 0 && !localVars.REGEX_PATTERNS.CLOSING_BRACKETS.test(currentLine) && !localVars.STRICT_MODES.includes(currentLine) && !currentLine.startsWith('#!')) { // Found uncommented code, so we can stop processing and resolve. cleanup(); resolve(true); } } catch (error) { cleanup(); reject(error); } }); // Handle errors from the readline interface. rl.on('error', (error) => { if (!resolved) { cleanup(); reject(error); } }); // If the end of the file is reached without finding uncommented code, resolve to false. rl.on('close', () => { if (!resolved) { cleanup(); resolve(false); } }); // Handle errors from the file stream itself. fileStream.on('error', (error) => { if (!resolved) { cleanup(); reject(error); } }); }); } /** * @function findUncommentedFiles * @description Recursively scans a directory to find files with uncommented code. This is the core * function of the utility, orchestrating the file discovery, processing, and result aggregation. * @workflow * 1. Validate the base directory. * 2. Build glob patterns and ignore rules from `localVars`. * 3. Use `fast-glob` to stream file paths efficiently. * 4. For each file, create a task managed by the `createLimiter`. * 5. The task checks if the file has uncommented code using `hasUncommentedCode`. * 6. Results are collected, and errors are logged. * 7. A specific retry mechanism is implemented for `EMFILE` errors. * @param {string} baseDir - The directory to scan. * @param {stream.Writable} [outputStream=null] - An optional stream to write results to. * @param {Function} [hasUncommentedCodeFn=null] - Optional dependency injection for testing * @returns {Promise<Object>} An object containing `uncommentedFiles` and `errors`. * @throws {Error} If `baseDir` is not a valid directory. */ async function findUncommentedFiles(baseDir, outputStream = null, hasUncommentedCodeFn = null) { const normalizedDir = normalizePath(baseDir); await validateDirectory(normalizedDir); if (outputStream && typeof outputStream.write !== 'function') { throw new Error(localVars.ERROR_MESSAGES.OUTPUT_STREAM_INVALID); } // Use dependency injection for testing, otherwise use the local function const checkFunction = hasUncommentedCodeFn || hasUncommentedCode; const absoluteBaseDir = path.resolve(normalizedDir); const uncommentedFiles = []; const errors = []; // Use fast-glob for efficient, asynchronous file system traversal. The glob pattern // and ignore rules are dynamically constructed from `localVars` for configurability. const globPattern = `**/*.{${localVars.SUPPORTED_FILE_EXT_REGEX}}`; const otherIgnores = localVars.IGNORED_DIRECTORIES .filter(dir => dir !== 'node_modules') .flatMap(dir => [`${dir}/**`, `**/${dir}/**`]); const globOptions = { cwd: absoluteBaseDir, ignore: ['node_modules/**', '**/node_modules/**', ...otherIgnores], onlyFiles: true, absolute: true, }; // The limiter ensures that file processing is done in concurrent batches, optimizing // for speed while respecting system resource limits. const limit = createLimiter(localVars.CONCURRENCY_LIMIT); let tasks = []; try { // Process files as a stream to avoid loading the entire file list into memory. for await (const file of fastGlob.stream(globPattern, globOptions)) { const task = limit(async () => { const fileName = path.basename(file); if (localVars.IGNORED_FILENAMES_REGEX.test(fileName)) { return null; } try { const hasUncommented = await checkFunction(file); if (hasUncommented) { const relative = path.relative(absoluteBaseDir, file).replace(/\\/g, '/'); // If an output stream is provided, write results directly to it. This is more // memory-efficient than collecting results in an array. if (outputStream) { const ok = outputStream.write(`${relative}\n`); if (!ok) { // Handle backpressure by waiting for the stream to drain. await new Promise(resolve => outputStream.once('drain', resolve)); } return null; } return relative; } } catch (error) { // Implement a specific retry mechanism for EMFILE errors (too many open files). // This is a reactive strategy for robustness. The setTimeout provides a simple // backoff, which is a trade-off for simplicity over a more complex implementation. if (error.code === 'EMFILE') { await new Promise(resolve => setTimeout(resolve, 100)); try { const retry = await checkFunction(file); if (retry) { const rel = path.relative(absoluteBaseDir, file).replace(/\\/g, '/'); if (outputStream) { const ok = outputStream.write(`${rel}\n`); if (!ok) { await new Promise(resolve => outputStream.once('drain', resolve)); } return null; } return rel; } return null; } catch (err) { errors.push({ file, error: err.message }); qerrors(err, 'findUncommentedFiles', { file }); return null; } } errors.push({ file, error: error.message }); qerrors(error, 'findUncommentedFiles', { file }); } return null; }); tasks.push(task); // Process tasks in batches to manage concurrency. if (tasks.length >= localVars.CONCURRENCY_LIMIT) { const results = await Promise.all(tasks); if (!outputStream) { results.forEach(result => { if (result) { uncommentedFiles.push(result); } }); } tasks = []; } } // Process any remaining tasks. if (tasks.length > 0) { const results = await Promise.all(tasks); if (!outputStream) { results.forEach(result => { if (result) { uncommentedFiles.push(result); } }); } } } catch (error) { errors.push({ error: error.message }); qerrors(error, 'findUncommentedFiles', { baseDir }); } return { uncommentedFiles, errors }; } module.exports = { hasUncommentedCode, findUncommentedFiles, };