UNPKG

codesummary

Version:

Cross-platform CLI tool that generates professional PDF documentation and RAG-optimized JSON outputs from project source code. Perfect for code reviews, audits, documentation, and AI/ML applications with semantic chunking and precision offsets.

468 lines (409 loc) 16.1 kB
import fs from 'fs-extra'; import path from 'path'; import chalk from 'chalk'; import ErrorHandler from './errorHandler.js'; /** * File Scanner for CodeSummary * Handles recursive directory traversal and file filtering */ export class Scanner { constructor(config) { this.config = config; this.allowedExtensions = new Set(config.allowedExtensions.map(ext => ext.toLowerCase())); this.excludeDirs = new Set(config.excludeDirs); this.excludeFiles = config.excludeFiles || []; } /** * Scan a directory recursively and return files grouped by extension * @param {string} rootPath - Root directory to scan * @returns {Promise<object>} Object with extensions as keys and file arrays as values */ async scanDirectory(rootPath) { const scanErrors = []; const scanWarnings = []; try { // For scanner paths, we only need basic validation (no aggressive sanitization) if (!rootPath || typeof rootPath !== 'string') { throw new Error('Invalid root path: must be a non-empty string'); } // Just resolve the path and validate it exists const resolvedRoot = path.resolve(rootPath); const stats = await fs.stat(resolvedRoot); if (!stats.isDirectory()) { throw new Error(`Path is not a directory: ${resolvedRoot}`); } console.log(chalk.gray(`Scanning directory: ${resolvedRoot}`)); const filesByExtension = {}; const scannedFiles = new Set(); // Prevent duplicates const scanContext = { errors: scanErrors, warnings: scanWarnings, skippedDirectories: 0, skippedFiles: 0, processedFiles: 0 }; await this.walkDirectory(resolvedRoot, resolvedRoot, filesByExtension, scannedFiles, scanContext); // Sort files within each extension group Object.keys(filesByExtension).forEach(ext => { filesByExtension[ext].sort((a, b) => a.relativePath.localeCompare(b.relativePath)); }); // Report scan summary with warnings/errors this.reportScanIssues(scanContext); return filesByExtension; } catch (error) { if (error.code === 'ENOENT') { throw new Error(`Directory does not exist: ${rootPath}`); } else if (error.code === 'EACCES') { throw new Error(`Permission denied accessing directory: ${rootPath}`); } throw error; } } /** * Recursively walk through directory structure * @param {string} currentPath - Current directory being processed * @param {string} rootPath - Original root path for relative path calculation * @param {object} filesByExtension - Accumulator object for results * @param {Set} scannedFiles - Set to track processed files and avoid duplicates * @param {object} scanContext - Context object to track scan statistics */ async walkDirectory(currentPath, rootPath, filesByExtension, scannedFiles, scanContext) { try { const entries = await fs.readdir(currentPath, { withFileTypes: true }); for (const entry of entries) { const fullPath = path.join(currentPath, entry.name); const relativePath = path.relative(rootPath, fullPath); if (entry.isDirectory()) { // Skip excluded directories and hidden directories (unless explicitly allowed) if (this.shouldSkipDirectory(entry.name, relativePath)) { scanContext.skippedDirectories++; continue; } // Recursively scan subdirectory await this.walkDirectory(fullPath, rootPath, filesByExtension, scannedFiles, scanContext); } else if (entry.isFile()) { // Process file if it matches criteria await this.processFile(fullPath, rootPath, filesByExtension, scannedFiles, scanContext); } else if (entry.isSymbolicLink()) { // Handle symbolic links with caution scanContext.warnings.push(`Skipped symbolic link: ${relativePath}`); } // Skip other special files (devices, sockets, etc.) } } catch (error) { // Track errors in context for better reporting const relativePath = path.relative(rootPath, currentPath); if (error.code === 'EACCES' || error.code === 'EPERM') { scanContext.errors.push(`Permission denied: ${relativePath}`); } else if (error.code === 'ENOENT') { scanContext.warnings.push(`Directory no longer exists: ${relativePath}`); } else if (error.code === 'ENOTDIR') { scanContext.warnings.push(`Path is not a directory: ${relativePath}`); } else { scanContext.errors.push(`Cannot read directory ${relativePath}: ${error.message}`); } } } /** * Process a single file and add it to results if it matches criteria * @param {string} fullPath - Absolute path to the file * @param {string} rootPath - Root path for relative calculation * @param {object} filesByExtension - Results accumulator * @param {Set} scannedFiles - Set of already processed files * @param {object} scanContext - Context object to track scan statistics */ async processFile(fullPath, rootPath, filesByExtension, scannedFiles, scanContext) { try { const relativePath = path.relative(rootPath, fullPath); // Avoid processing the same file twice (in case of symlinks) if (scannedFiles.has(fullPath)) { return; } scannedFiles.add(fullPath); const extension = path.extname(relativePath).toLowerCase(); // Skip files without extensions or not in allowed list if (!extension || !this.allowedExtensions.has(extension)) { scanContext.skippedFiles++; return; } // Skip hidden files (starting with .) unless explicitly needed const fileName = path.basename(relativePath); if (fileName.startsWith('.') && !this.isAllowedHiddenFile(fileName)) { scanContext.skippedFiles++; return; } // Check if file should be excluded by pattern (e.g., *-lock.json) if (this.shouldExcludeFile(fileName)) { scanContext.skippedFiles++; return; } // Verify file is readable const stats = await fs.stat(fullPath); if (!stats.isFile()) { scanContext.warnings.push(`Skipped non-regular file: ${relativePath}`); return; } // Check file size limits const MAX_INDIVIDUAL_FILE_SIZE = 100 * 1024 * 1024; // 100MB per file if (stats.size > MAX_INDIVIDUAL_FILE_SIZE) { scanContext.warnings.push(`Skipped large file (${Math.round(stats.size / 1024 / 1024)}MB): ${relativePath}`); scanContext.skippedFiles++; return; } // Add to results if (!filesByExtension[extension]) { filesByExtension[extension] = []; } filesByExtension[extension].push({ relativePath: relativePath.replace(/\\/g, '/'), // Normalize path separators absolutePath: fullPath, size: stats.size, modified: stats.mtime }); scanContext.processedFiles++; } catch (error) { // Handle file processing errors with appropriate context const relativePath = path.relative(rootPath, fullPath); if (error.code === 'EACCES' || error.code === 'EPERM') { scanContext.errors.push(`Permission denied: ${relativePath}`); } else if (error.code === 'ENOENT') { // File might have been deleted during scan scanContext.warnings.push(`File no longer exists: ${relativePath}`); } else if (error.code === 'EISDIR') { scanContext.warnings.push(`Path is a directory, not a file: ${relativePath}`); } else { scanContext.errors.push(`Cannot process file ${relativePath}: ${error.message}`); } scanContext.skippedFiles++; } } /** * Determine if a directory should be skipped * @param {string} dirName - Directory name * @param {string} relativePath - Relative path from root * @returns {boolean} True if directory should be skipped */ shouldSkipDirectory(dirName, relativePath) { // Skip directories in exclude list if (this.excludeDirs.has(dirName)) { return true; } // Skip hidden directories (starting with .) unless explicitly allowed if (dirName.startsWith('.') && !this.isAllowedHiddenDirectory(dirName)) { return true; } // Skip common build/cache directories that might not be in exclude list const commonSkipDirs = new Set([ 'tmp', 'temp', 'cache', '.cache', 'logs', '.logs', 'bower_components', 'vendor', '.vendor' ]); if (commonSkipDirs.has(dirName.toLowerCase())) { return true; } return false; } /** * Check if a file should be excluded based on patterns * @param {string} fileName - File name to check * @returns {boolean} True if file should be excluded */ shouldExcludeFile(fileName) { for (const pattern of this.excludeFiles) { if (this.matchesPattern(fileName, pattern)) { return true; } } return false; } /** * Simple glob pattern matching * @param {string} fileName - File name to test * @param {string} pattern - Pattern to match (supports * wildcards) * @returns {boolean} True if pattern matches */ matchesPattern(fileName, pattern) { // Exact match if (pattern === fileName) { return true; } // Convert glob pattern to regex const regexPattern = pattern .replace(/\./g, '\\.') // Escape dots .replace(/\*/g, '.*'); // Convert * to .* const regex = new RegExp(`^${regexPattern}$`, 'i'); return regex.test(fileName); } /** * Check if a hidden file should be included * @param {string} fileName - File name * @returns {boolean} True if file should be included */ isAllowedHiddenFile(fileName) { const allowedHiddenFiles = new Set([ '.gitignore', '.gitattributes', '.editorconfig', '.eslintrc.js', '.eslintrc.json', '.prettierrc', '.env.example', '.htaccess' ]); return allowedHiddenFiles.has(fileName); } /** * Check if a hidden directory should be included * @param {string} dirName - Directory name * @returns {boolean} True if directory should be included */ isAllowedHiddenDirectory(dirName) { const allowedHiddenDirs = new Set([ '.github', '.gitlab', '.circleci' ]); return allowedHiddenDirs.has(dirName); } /** * Get file extension descriptions for user display * @param {object} filesByExtension - Files grouped by extension * @returns {Array} Array of extension info objects */ getExtensionInfo(filesByExtension) { const extensionDescriptions = { '.js': 'JavaScript', '.ts': 'TypeScript', '.jsx': 'React JSX', '.tsx': 'TypeScript JSX', '.json': 'JSON', '.xml': 'XML', '.html': 'HTML', '.css': 'CSS', '.scss': 'SCSS', '.sass': 'Sass', '.md': 'Markdown', '.txt': 'Text', '.py': 'Python', '.java': 'Java', '.cs': 'C#', '.cpp': 'C++', '.c': 'C', '.h': 'Header', '.yaml': 'YAML', '.yml': 'YAML', '.sh': 'Shell Script', '.bat': 'Batch File', '.ps1': 'PowerShell', '.php': 'PHP', '.rb': 'Ruby', '.go': 'Go', '.rs': 'Rust', '.swift': 'Swift', '.kt': 'Kotlin', '.scala': 'Scala', '.vue': 'Vue.js', '.svelte': 'Svelte', '.dockerfile': 'Dockerfile', '.sql': 'SQL', '.graphql': 'GraphQL' }; return Object.keys(filesByExtension) .sort() .map(ext => ({ extension: ext, description: extensionDescriptions[ext] || 'Unknown', count: filesByExtension[ext].length, files: filesByExtension[ext] })); } /** * Calculate total statistics for scanned files * @param {object} filesByExtension - Files grouped by extension * @returns {object} Statistics object */ calculateStatistics(filesByExtension) { let totalFiles = 0; let totalSize = 0; const extensionCount = Object.keys(filesByExtension).length; Object.values(filesByExtension).forEach(files => { totalFiles += files.length; totalSize += files.reduce((sum, file) => sum + file.size, 0); }); return { totalFiles, totalSize, extensionCount, averageFileSize: totalFiles > 0 ? Math.round(totalSize / totalFiles) : 0, totalSizeFormatted: this.formatFileSize(totalSize) }; } /** * Format file size in human readable format * @param {number} bytes - Size in bytes * @returns {string} Formatted size string */ formatFileSize(bytes) { const units = ['B', 'KB', 'MB', 'GB']; let size = bytes; let unitIndex = 0; while (size >= 1024 && unitIndex < units.length - 1) { size /= 1024; unitIndex++; } return `${size.toFixed(1)} ${units[unitIndex]}`; } /** * Report scan issues and statistics * @param {object} scanContext - Context object with scan statistics */ reportScanIssues(scanContext) { const { errors, warnings, skippedDirectories, skippedFiles, processedFiles } = scanContext; // Report critical errors if (errors.length > 0) { console.log(chalk.red(`\n⚠️ ${errors.length} scan error(s):`)); errors.slice(0, 5).forEach(error => { console.log(chalk.red(` • ${error}`)); }); if (errors.length > 5) { console.log(chalk.gray(` ... and ${errors.length - 5} more errors`)); } } // Report warnings (less critical) if (warnings.length > 0 && process.env.NODE_ENV === 'development') { console.log(chalk.yellow(`\n⚠️ ${warnings.length} scan warning(s):`)); warnings.slice(0, 3).forEach(warning => { console.log(chalk.yellow(` • ${warning}`)); }); if (warnings.length > 3) { console.log(chalk.gray(` ... and ${warnings.length - 3} more warnings`)); } } // Report summary statistics const totalIssues = errors.length + warnings.length; if (skippedFiles > 0 || skippedDirectories > 0 || totalIssues > 0) { console.log(chalk.gray(`\n📊 Scan Statistics:`)); console.log(chalk.gray(` Processed: ${processedFiles} files`)); if (skippedFiles > 0) { console.log(chalk.gray(` Skipped: ${skippedFiles} files`)); } if (skippedDirectories > 0) { console.log(chalk.gray(` Skipped: ${skippedDirectories} directories`)); } if (totalIssues > 0) { console.log(chalk.gray(` Issues: ${errors.length} errors, ${warnings.length} warnings`)); } } // Warn if scan completeness is compromised if (errors.length > 0) { console.log(chalk.yellow(`\n⚠️ WARNING: Scan may be incomplete due to ${errors.length} access errors.`)); console.log(chalk.gray(' Some files or directories could not be accessed.')); } } /** * Display scan results summary * @param {object} filesByExtension - Files grouped by extension */ displayScanSummary(filesByExtension) { const stats = this.calculateStatistics(filesByExtension); const extensions = Object.keys(filesByExtension).sort(); console.log(chalk.cyan('\n📊 Scan Summary:')); console.log(chalk.gray(` Extensions found: ${extensions.join(', ')}`)); console.log(chalk.gray(` Total files: ${stats.totalFiles}`)); console.log(chalk.gray(` Total size: ${stats.totalSizeFormatted}`)); console.log(); } } export default Scanner;