UNPKG

unqommented

Version:

A Node.js utility that quickly identifies files with uncommented code in your codebase. Designed for developers who want to efficiently tell LLMs exactly which files need comments added.

279 lines (251 loc) 11.2 kB
/** * @file Core code analysis engine for uncommented code detection * @description This module implements the heart of the unqommented tool - sophisticated * static analysis to identify executable code lines that lack documentation comments. * @rationale The analysis engine uses streaming file processing and regex-based parsing * to handle large codebases efficiently while maintaining accuracy across multiple * programming languages and comment styles. * @performance_considerations Uses readline streaming to minimize memory footprint, * making it suitable for analyzing very large files without consuming excessive RAM. * @module code-analyzer */ const fs = require('fs'); const path = require('path'); const readline = require('readline'); const fastGlob = require('fast-glob'); const localVars = require('../config/localVars'); const { qerrors } = require('qerrors'); const { normalizePath, validateDirectory } = require('./file-utils'); const { removeQuotedStrings } = require('./string-utils'); const { createLimiter } = require('./concurrency-utils'); /** * @function hasUncommentedCode * @description Checks if a single file contains uncommented executable code. * @rationale This function streams the file line-by-line using `readline` to minimize memory * consumption, making it suitable for analyzing very large files without high RAM usage. * @scalability The line-by-line processing and regex operations are CPU-bound. While this * avoids memory bottlenecks, it can impact performance on a large number of files. This is a * trade-off for memory efficiency. * @param {string} filePath - The absolute path of the file to inspect. * @returns {Promise<boolean>} True if uncommented code is found, otherwise false. */ async function hasUncommentedCode(filePath) { return new Promise((resolve, reject) => { const fileStream = fs.createReadStream(filePath, { encoding: localVars.FILE_ENCODING }); const rl = readline.createInterface({ input: fileStream, crlfDelay: Infinity, }); const ext = path.extname(filePath).toLowerCase(); // Use different comment patterns for different file types (e.g., Python uses #). const commentRegex = ext === '.py' ? localVars.REGEX_PATTERNS.ALL_COMMENTS : localVars.REGEX_PATTERNS.ALL_COMMENTS_NO_HASH; let inBlockComment = false; // State to track if we are inside a multi-line comment. let resolved = false; // The cleanup function is crucial for resource management. It ensures that file streams // and readline interfaces are properly closed to prevent resource leaks, especially in // error conditions or when uncommented code is found early. const cleanup = () => { if (!resolved) { resolved = true; rl.close(); fileStream.destroy(); } }; rl.on('line', (line) => { try { let originalLine = line; let currentLine = removeQuotedStrings(originalLine).trim(); // Handle lines that are part of a multi-line block comment. if (inBlockComment) { const endCommentIndex = currentLine.indexOf('*/'); if (endCommentIndex !== -1) { inBlockComment = false; const originalEndIndex = originalLine.indexOf('*/'); if (originalEndIndex !== -1) { originalLine = originalLine.substring(originalEndIndex + 2); currentLine = removeQuotedStrings(originalLine).trim(); } else { return; } } else { return; // Still inside a block comment. } } // Handle single-line and multi-line block comments within the same line. while (currentLine.includes('/*')) { const startCommentIndex = currentLine.indexOf('/*'); const endCommentIndex = currentLine.indexOf('*/', startCommentIndex + 2); if (endCommentIndex !== -1) { originalLine = originalLine.substring(0, startCommentIndex) + originalLine.substring(endCommentIndex + 2); currentLine = removeQuotedStrings(originalLine).trim(); } else { inBlockComment = true; originalLine = originalLine.substring(0, startCommentIndex); currentLine = removeQuotedStrings(originalLine).trim(); break; } } if (inBlockComment && currentLine.length === 0) return; // Remove single-line comments. currentLine = currentLine.replace(commentRegex, '').trim(); // Check if the remaining line contains executable code. if (currentLine.length > 0 && !localVars.REGEX_PATTERNS.CLOSING_BRACKETS.test(currentLine) && !localVars.STRICT_MODES.includes(currentLine) && !currentLine.startsWith('#!')) { // Found uncommented code, so we can stop processing and resolve. cleanup(); resolve(true); } } catch (error) { cleanup(); reject(error); } }); // Handle errors from the readline interface. rl.on('error', (error) => { if (!resolved) { cleanup(); reject(error); } }); // If the end of the file is reached without finding uncommented code, resolve to false. rl.on('close', () => { if (!resolved) { cleanup(); resolve(false); } }); // Handle errors from the file stream itself. fileStream.on('error', (error) => { if (!resolved) { cleanup(); reject(error); } }); }); } /** * @function findUncommentedFiles * @description Recursively scans a directory to find files with uncommented code. This is the core * function of the utility, orchestrating the file discovery, processing, and result aggregation. * @workflow * 1. Validate the base directory. * 2. Build glob patterns and ignore rules from `localVars`. * 3. Use `fast-glob` to stream file paths efficiently. * 4. For each file, create a task managed by the `createLimiter`. * 5. The task checks if the file has uncommented code using `hasUncommentedCode`. * 6. Results are collected, and errors are logged. * 7. A specific retry mechanism is implemented for `EMFILE` errors. * @param {string} baseDir - The directory to scan. * @param {stream.Writable} [outputStream=null] - An optional stream to write results to. * @param {Function} [hasUncommentedCodeFn=null] - Optional dependency injection for testing * @returns {Promise<Object>} An object containing `uncommentedFiles` and `errors`. * @throws {Error} If `baseDir` is not a valid directory. */ async function findUncommentedFiles(baseDir, outputStream = null, hasUncommentedCodeFn = null) { const normalizedDir = normalizePath(baseDir); await validateDirectory(normalizedDir); if (outputStream && typeof outputStream.write !== 'function') { throw new Error(localVars.ERROR_MESSAGES.OUTPUT_STREAM_INVALID); } // Use dependency injection for testing, otherwise use the local function const checkFunction = hasUncommentedCodeFn || hasUncommentedCode; const absoluteBaseDir = path.resolve(normalizedDir); const uncommentedFiles = []; const errors = []; // Use fast-glob for efficient, asynchronous file system traversal. The glob pattern // and ignore rules are dynamically constructed from `localVars` for configurability. const globPattern = `**/*.{${localVars.SUPPORTED_FILE_EXT_REGEX}}`; const otherIgnores = localVars.IGNORED_DIRECTORIES .filter(dir => dir !== 'node_modules') .flatMap(dir => [`${dir}/**`, `**/${dir}/**`]); const globOptions = { cwd: absoluteBaseDir, ignore: ['node_modules/**', '**/node_modules/**', ...otherIgnores], onlyFiles: true, absolute: true, }; // The limiter ensures that file processing is done in concurrent batches, optimizing // for speed while respecting system resource limits. const limit = createLimiter(localVars.CONCURRENCY_LIMIT); let tasks = []; try { // Process files as a stream to avoid loading the entire file list into memory. for await (const file of fastGlob.stream(globPattern, globOptions)) { const task = limit(async () => { const fileName = path.basename(file); if (localVars.IGNORED_FILENAMES_REGEX.test(fileName)) { return null; } try { const hasUncommented = await checkFunction(file); if (hasUncommented) { const relative = path.relative(absoluteBaseDir, file).replace(/\\/g, '/'); // If an output stream is provided, write results directly to it. This is more // memory-efficient than collecting results in an array. if (outputStream) { const ok = outputStream.write(`${relative}\n`); if (!ok) { // Handle backpressure by waiting for the stream to drain. await new Promise(resolve => outputStream.once('drain', resolve)); } return null; } return relative; } } catch (error) { // Implement a specific retry mechanism for EMFILE errors (too many open files). // This is a reactive strategy for robustness. The setTimeout provides a simple // backoff, which is a trade-off for simplicity over a more complex implementation. if (error.code === 'EMFILE') { await new Promise(resolve => setTimeout(resolve, 100)); try { const retry = await checkFunction(file); if (retry) { const rel = path.relative(absoluteBaseDir, file).replace(/\\/g, '/'); if (outputStream) { const ok = outputStream.write(`${rel}\n`); if (!ok) { await new Promise(resolve => outputStream.once('drain', resolve)); } return null; } return rel; } return null; } catch (err) { errors.push({ file, error: err.message }); qerrors(err, 'findUncommentedFiles', { file }); return null; } } errors.push({ file, error: error.message }); qerrors(error, 'findUncommentedFiles', { file }); } return null; }); tasks.push(task); // Process tasks in batches to manage concurrency. if (tasks.length >= localVars.CONCURRENCY_LIMIT) { const results = await Promise.all(tasks); if (!outputStream) { results.forEach(result => { if (result) { uncommentedFiles.push(result); } }); } tasks = []; } } // Process any remaining tasks. if (tasks.length > 0) { const results = await Promise.all(tasks); if (!outputStream) { results.forEach(result => { if (result) { uncommentedFiles.push(result); } }); } } } catch (error) { errors.push({ error: error.message }); qerrors(error, 'findUncommentedFiles', { baseDir }); } return { uncommentedFiles, errors }; } module.exports = { hasUncommentedCode, findUncommentedFiles, };