agentsqripts
Version:
Comprehensive static code analysis toolkit for identifying technical debt, security vulnerabilities, performance issues, and code quality problems
204 lines (196 loc) • 8.48 kB
JavaScript
/**
* @file Core file analysis utilities for content processing and metadata extraction
* @description Single responsibility: Provide fundamental file analysis operations across AgentSqripts
*
* This utility module provides essential file analysis operations including extension validation,
* content extraction with metadata, line counting, and code block extraction. It serves as
* the foundation for more specialized analyzers while maintaining consistent file handling
* patterns across the platform.
*
* Design rationale:
* - Simple, focused functions enable easy composition in complex analyzers
* - Promise-based operations with concurrent execution for performance
* - Consistent return patterns facilitate reliable error handling
* - Building-block approach supports diverse analysis requirements
*/
const fs = require('fs').promises;
/**
* Validate file extensions with case-insensitive matching
*
* Technical function: Determines if file matches supported analysis extensions
*
* Implementation rationale:
* - Regex-based extension extraction handles complex file names reliably
* - Case-insensitive matching accommodates different naming conventions
* - Null-safe design prevents errors on malformed paths
* - Array-based extension checking enables flexible filtering
*
* Extension matching strategy:
* - Regex /\.[^.]+$/ captures final extension including dot
* - toLowerCase() normalizes case variations (e.g., .JS, .Js, .js)
* - Array.includes() provides O(n) lookup sufficient for small extension lists
* - Short-circuit evaluation optimizes for invalid/missing extensions
*
* File filtering use cases:
* - JavaScript/TypeScript project analysis (.js, .ts, .jsx, .tsx)
* - Configuration file detection (.json, .yaml, .env)
* - Documentation analysis (.md, .txt, .rst)
* - Template file processing (.html, .ejs, .hbs)
*
* @param {string} filePath - File path to check for valid extension
* @param {Array<string>} extensions - Array of valid extensions (e.g., ['.js', '.ts'])
* @returns {boolean} True if file extension matches any in provided array
* @example
* hasValidExtension('app.js', ['.js', '.ts']) // true
* hasValidExtension('config.JSON', ['.json']) // true (case insensitive)
* hasValidExtension('README.md', ['.js']) // false
*/
function hasValidExtension(filePath, extensions) {
const ext = filePath.toLowerCase().match(/\.[^.]+$/);
return ext && extensions.includes(ext[0]);
}
/**
* Retrieve file content and metadata concurrently for efficient analysis
*
* Technical function: Concurrent file content and stats retrieval with structured return
*
* Implementation rationale:
* - Promise.all() enables concurrent I/O operations for performance optimization
* - UTF-8 encoding specification ensures consistent text handling
* - Structured return object provides both content and metadata in single call
* - fs.promises API maintains non-blocking behavior for large file operations
*
* Concurrent operation benefits:
* - Single system call overhead instead of sequential operations
* - Reduced total I/O time for file analysis initialization
* - Atomic operation semantics (both succeed or both fail)
* - Consistent file state snapshot (content matches stats)
*
* Metadata utilization patterns:
* - File size for memory allocation and processing strategies
* - Modification time for cache invalidation and change detection
* - Creation time for project timeline analysis
* - File permissions for access pattern analysis
*
* Error handling considerations:
* - Both operations fail atomically on any error
* - File access errors (ENOENT, EACCES) handled by caller
* - Large file handling delegated to streaming in specialized cases
* - UTF-8 encoding errors indicate binary file misclassification
*
* @param {string} filePath - Path to file for content and metadata retrieval
* @returns {Promise<{content: string, stats: fs.Stats}>} Object with file content and stats
* @example
* const {content, stats} = await getFileData('app.js');
* console.log(`File size: ${stats.size} bytes`);
* console.log(`Lines: ${content.split('\n').length}`);
*/
async function getFileData(filePath) {
const [content, stats] = await Promise.all([
fs.readFile(filePath, 'utf-8'),
fs.stat(filePath)
]);
return { content, stats };
}
/**
* Count lines in file content with cross-platform line ending support
*
* Technical function: Accurate line counting with universal line ending handling
*
* Implementation rationale:
* - String.split('\n') handles multiple line ending formats through normalization
* - Simple implementation maintains performance for large files
* - Consistent behavior across platforms (Windows CRLF, Unix LF, classic Mac CR)
* - Direct counting without regex overhead for performance
*
* Line counting strategy:
* - JavaScript's split() method normalizes line endings automatically
* - Split on '\n' handles both '\n' and '\r\n' correctly
* - Empty string produces length 1 (appropriate for empty file with implicit line)
* - No special handling needed for final line without ending
*
* Line count applications:
* - Code complexity metrics (lines of code, cyclomatic complexity)
* - File size estimation for processing strategies
* - Progress tracking during large file analysis
* - Reporting and visualization of codebase statistics
*
* Performance characteristics:
* - O(n) time complexity where n is file content length
* - Single pass through content without regex processing
* - Memory efficient - no intermediate arrays stored
* - Suitable for files up to several MB without streaming
*
* @param {string} content - File content string to count lines in
* @returns {number} Number of lines in content (always >= 1 for non-empty strings)
* @example
* countLines('hello\nworld') // 2
* countLines('single line') // 1
* countLines('') // 1
*/
function countLines(content) {
return content.split('\n').length;
}
/**
* Extract sliding window code blocks for duplicate and similarity analysis
*
* Technical function: Generate overlapping code blocks for pattern detection
*
* Implementation rationale:
* - Sliding window approach captures all possible duplicate patterns
* - Configurable block size adapts to different analysis granularities
* - Line-based splitting maintains code structure and readability
* - Trim filtering removes empty blocks from consideration
*
* Block extraction strategy:
* - Overlapping windows ensure no duplicate patterns are missed
* - Fixed-size blocks enable consistent similarity comparisons
* - Line-based boundaries respect code structure and syntax
* - Position tracking enables precise location reporting
*
* Block size considerations:
* - Small blocks (3-5 lines): Catch small duplicate snippets
* - Medium blocks (8-12 lines): Identify substantial duplicate logic
* - Large blocks (15+ lines): Find major duplicated components
* - Variable block sizes could be added for comprehensive analysis
*
* Duplicate detection workflow:
* - Extract all blocks of target size from file content
* - Compare blocks using string matching or semantic analysis
* - Group similar blocks by similarity threshold
* - Report duplicate groups with location information
*
* Memory optimization opportunities:
* - Hash-based block identification for large files
* - Streaming extraction for files exceeding memory limits
* - Block content normalization (whitespace, comments) for better matching
* - Lazy evaluation of block content for memory efficiency
*
* @param {string} content - File content to extract blocks from
* @param {number} minLines - Minimum lines per block (default: 5)
* @returns {Array<{start: number, end: number, content: string}>} Array of code blocks with positions
* @example
* const blocks = extractCodeBlocks('line1\nline2\nline3\nline4\nline5\nline6', 3);
* // Returns 4 blocks: [1-3], [2-4], [3-5], [4-6]
*/
function extractCodeBlocks(content, minLines = 5) {
const lines = content.split('\n');
const blocks = [];
for (let i = 0; i < lines.length - minLines + 1; i++) {
const block = lines.slice(i, i + minLines).join('\n');
if (block.trim()) {
blocks.push({
start: i + 1,
end: i + minLines,
content: block
});
}
}
return blocks;
}
module.exports = {
hasValidExtension,
getFileData,
countLines,
extractCodeBlocks
};