unqommented
Version:
A Node.js utility that quickly identifies files with uncommented code in your codebase. Designed for developers who want to efficiently tell LLMs exactly which files need comments added.
279 lines (251 loc) • 11.2 kB
JavaScript
/**
* @file Core code analysis engine for uncommented code detection
* @description This module implements the heart of the unqommented tool - sophisticated
* static analysis to identify executable code lines that lack documentation comments.
* @rationale The analysis engine uses streaming file processing and regex-based parsing
* to handle large codebases efficiently while maintaining accuracy across multiple
* programming languages and comment styles.
* @performance_considerations Uses readline streaming to minimize memory footprint,
* making it suitable for analyzing very large files without consuming excessive RAM.
* @module code-analyzer
*/
const fs = require('fs');
const path = require('path');
const readline = require('readline');
const fastGlob = require('fast-glob');
const localVars = require('../config/localVars');
const { qerrors } = require('qerrors');
const { normalizePath, validateDirectory } = require('./file-utils');
const { removeQuotedStrings } = require('./string-utils');
const { createLimiter } = require('./concurrency-utils');
/**
* @function hasUncommentedCode
* @description Checks if a single file contains uncommented executable code.
* @rationale This function streams the file line-by-line using `readline` to minimize memory
* consumption, making it suitable for analyzing very large files without high RAM usage.
* @scalability The line-by-line processing and regex operations are CPU-bound. While this
* avoids memory bottlenecks, it can impact performance on a large number of files. This is a
* trade-off for memory efficiency.
* @param {string} filePath - The absolute path of the file to inspect.
* @returns {Promise<boolean>} True if uncommented code is found, otherwise false.
*/
async function hasUncommentedCode(filePath) {
return new Promise((resolve, reject) => {
const fileStream = fs.createReadStream(filePath, { encoding: localVars.FILE_ENCODING });
const rl = readline.createInterface({
input: fileStream,
crlfDelay: Infinity,
});
const ext = path.extname(filePath).toLowerCase();
// Use different comment patterns for different file types (e.g., Python uses #).
const commentRegex = ext === '.py'
? localVars.REGEX_PATTERNS.ALL_COMMENTS
: localVars.REGEX_PATTERNS.ALL_COMMENTS_NO_HASH;
let inBlockComment = false; // State to track if we are inside a multi-line comment.
let resolved = false;
// The cleanup function is crucial for resource management. It ensures that file streams
// and readline interfaces are properly closed to prevent resource leaks, especially in
// error conditions or when uncommented code is found early.
const cleanup = () => {
if (!resolved) {
resolved = true;
rl.close();
fileStream.destroy();
}
};
rl.on('line', (line) => {
try {
let originalLine = line;
let currentLine = removeQuotedStrings(originalLine).trim();
// Handle lines that are part of a multi-line block comment.
if (inBlockComment) {
const endCommentIndex = currentLine.indexOf('*/');
if (endCommentIndex !== -1) {
inBlockComment = false;
const originalEndIndex = originalLine.indexOf('*/');
if (originalEndIndex !== -1) {
originalLine = originalLine.substring(originalEndIndex + 2);
currentLine = removeQuotedStrings(originalLine).trim();
} else {
return;
}
} else {
return; // Still inside a block comment.
}
}
// Handle single-line and multi-line block comments within the same line.
while (currentLine.includes('/*')) {
const startCommentIndex = currentLine.indexOf('/*');
const endCommentIndex = currentLine.indexOf('*/', startCommentIndex + 2);
if (endCommentIndex !== -1) {
originalLine = originalLine.substring(0, startCommentIndex) + originalLine.substring(endCommentIndex + 2);
currentLine = removeQuotedStrings(originalLine).trim();
} else {
inBlockComment = true;
originalLine = originalLine.substring(0, startCommentIndex);
currentLine = removeQuotedStrings(originalLine).trim();
break;
}
}
if (inBlockComment && currentLine.length === 0) return;
// Remove single-line comments.
currentLine = currentLine.replace(commentRegex, '').trim();
// Check if the remaining line contains executable code.
if (currentLine.length > 0 &&
!localVars.REGEX_PATTERNS.CLOSING_BRACKETS.test(currentLine) &&
!localVars.STRICT_MODES.includes(currentLine) &&
!currentLine.startsWith('#!')) {
// Found uncommented code, so we can stop processing and resolve.
cleanup();
resolve(true);
}
} catch (error) {
cleanup();
reject(error);
}
});
// Handle errors from the readline interface.
rl.on('error', (error) => { if (!resolved) { cleanup(); reject(error); } });
// If the end of the file is reached without finding uncommented code, resolve to false.
rl.on('close', () => {
if (!resolved) {
cleanup();
resolve(false);
}
});
// Handle errors from the file stream itself.
fileStream.on('error', (error) => {
if (!resolved) {
cleanup();
reject(error);
}
});
});
}
/**
* @function findUncommentedFiles
* @description Recursively scans a directory to find files with uncommented code. This is the core
* function of the utility, orchestrating the file discovery, processing, and result aggregation.
* @workflow
* 1. Validate the base directory.
* 2. Build glob patterns and ignore rules from `localVars`.
* 3. Use `fast-glob` to stream file paths efficiently.
* 4. For each file, create a task managed by the `createLimiter`.
* 5. The task checks if the file has uncommented code using `hasUncommentedCode`.
* 6. Results are collected, and errors are logged.
* 7. A specific retry mechanism is implemented for `EMFILE` errors.
* @param {string} baseDir - The directory to scan.
* @param {stream.Writable} [outputStream=null] - An optional stream to write results to.
* @param {Function} [hasUncommentedCodeFn=null] - Optional dependency injection for testing
* @returns {Promise<Object>} An object containing `uncommentedFiles` and `errors`.
* @throws {Error} If `baseDir` is not a valid directory.
*/
async function findUncommentedFiles(baseDir, outputStream = null, hasUncommentedCodeFn = null) {
const normalizedDir = normalizePath(baseDir);
await validateDirectory(normalizedDir);
if (outputStream && typeof outputStream.write !== 'function') {
throw new Error(localVars.ERROR_MESSAGES.OUTPUT_STREAM_INVALID);
}
// Use dependency injection for testing, otherwise use the local function
const checkFunction = hasUncommentedCodeFn || hasUncommentedCode;
const absoluteBaseDir = path.resolve(normalizedDir);
const uncommentedFiles = [];
const errors = [];
// Use fast-glob for efficient, asynchronous file system traversal. The glob pattern
// and ignore rules are dynamically constructed from `localVars` for configurability.
const globPattern = `**/*.{${localVars.SUPPORTED_FILE_EXT_REGEX}}`;
const otherIgnores = localVars.IGNORED_DIRECTORIES
.filter(dir => dir !== 'node_modules')
.flatMap(dir => [`${dir}/**`, `**/${dir}/**`]);
const globOptions = {
cwd: absoluteBaseDir,
ignore: ['node_modules/**', '**/node_modules/**', ...otherIgnores],
onlyFiles: true,
absolute: true,
};
// The limiter ensures that file processing is done in concurrent batches, optimizing
// for speed while respecting system resource limits.
const limit = createLimiter(localVars.CONCURRENCY_LIMIT);
let tasks = [];
try {
// Process files as a stream to avoid loading the entire file list into memory.
for await (const file of fastGlob.stream(globPattern, globOptions)) {
const task = limit(async () => {
const fileName = path.basename(file);
if (localVars.IGNORED_FILENAMES_REGEX.test(fileName)) {
return null;
}
try {
const hasUncommented = await checkFunction(file);
if (hasUncommented) {
const relative = path.relative(absoluteBaseDir, file).replace(/\\/g, '/');
// If an output stream is provided, write results directly to it. This is more
// memory-efficient than collecting results in an array.
if (outputStream) {
const ok = outputStream.write(`${relative}\n`);
if (!ok) {
// Handle backpressure by waiting for the stream to drain.
await new Promise(resolve => outputStream.once('drain', resolve));
}
return null;
}
return relative;
}
} catch (error) {
// Implement a specific retry mechanism for EMFILE errors (too many open files).
// This is a reactive strategy for robustness. The setTimeout provides a simple
// backoff, which is a trade-off for simplicity over a more complex implementation.
if (error.code === 'EMFILE') {
await new Promise(resolve => setTimeout(resolve, 100));
try {
const retry = await checkFunction(file);
if (retry) {
const rel = path.relative(absoluteBaseDir, file).replace(/\\/g, '/');
if (outputStream) {
const ok = outputStream.write(`${rel}\n`);
if (!ok) {
await new Promise(resolve => outputStream.once('drain', resolve));
}
return null;
}
return rel;
}
return null;
} catch (err) {
errors.push({ file, error: err.message });
qerrors(err, 'findUncommentedFiles', { file });
return null;
}
}
errors.push({ file, error: error.message });
qerrors(error, 'findUncommentedFiles', { file });
}
return null;
});
tasks.push(task);
// Process tasks in batches to manage concurrency.
if (tasks.length >= localVars.CONCURRENCY_LIMIT) {
const results = await Promise.all(tasks);
if (!outputStream) {
results.forEach(result => { if (result) { uncommentedFiles.push(result); } });
}
tasks = [];
}
}
// Process any remaining tasks.
if (tasks.length > 0) {
const results = await Promise.all(tasks);
if (!outputStream) {
results.forEach(result => { if (result) { uncommentedFiles.push(result); } });
}
}
} catch (error) {
errors.push({ error: error.message });
qerrors(error, 'findUncommentedFiles', { baseDir });
}
return { uncommentedFiles, errors };
}
module.exports = {
hasUncommentedCode,
findUncommentedFiles,
};