UNPKG

codedetective

Version:

AI-powered tool to analyze codebases, reconstruct development timelines, and suggest where to resume work

385 lines 11.9 kB
import fs from 'fs'; import path from 'path'; import { glob } from 'glob'; // Map of file extensions to types const FILE_TYPES = { // Code js: 'JavaScript', ts: 'TypeScript', jsx: 'React JSX', tsx: 'React TSX', py: 'Python', rb: 'Ruby', php: 'PHP', java: 'Java', c: 'C', cpp: 'C++', cs: 'C#', go: 'Go', rs: 'Rust', swift: 'Swift', kt: 'Kotlin', scala: 'Scala', r: 'R', jl: 'Julia', // Web html: 'HTML', css: 'CSS', scss: 'SCSS', sass: 'Sass', less: 'Less', // Data json: 'JSON', yaml: 'YAML', yml: 'YAML', xml: 'XML', csv: 'CSV', tsv: 'TSV', md: 'Markdown', // Configuration toml: 'TOML', ini: 'INI', conf: 'Config', config: 'Config', // Shell sh: 'Shell', bash: 'Bash', zsh: 'Zsh', bat: 'Batch', ps1: 'PowerShell' }; // File categories for grouping similar file types export var FileCategory; (function (FileCategory) { FileCategory["CODE"] = "code"; FileCategory["FIGURE"] = "figure"; FileCategory["DATA_TABLE"] = "data_table"; FileCategory["DOCUMENT"] = "document"; FileCategory["ARCHIVE"] = "archive"; FileCategory["BINARY"] = "binary"; FileCategory["MEDIA"] = "media"; FileCategory["OTHER"] = "other"; })(FileCategory || (FileCategory = {})); // Map extensions to categories const FILE_CATEGORIES = { // Code files 'js': FileCategory.CODE, 'ts': FileCategory.CODE, 'jsx': FileCategory.CODE, 'tsx': FileCategory.CODE, 'py': FileCategory.CODE, 'rb': FileCategory.CODE, 'php': FileCategory.CODE, 'java': FileCategory.CODE, 'c': FileCategory.CODE, 'cpp': FileCategory.CODE, 'cs': FileCategory.CODE, 'go': FileCategory.CODE, 'rs': FileCategory.CODE, 'swift': FileCategory.CODE, 'kt': FileCategory.CODE, 'scala': FileCategory.CODE, 'r': FileCategory.CODE, 'jl': FileCategory.CODE, 'ipynb': FileCategory.CODE, 'html': FileCategory.CODE, 'css': FileCategory.CODE, 'scss': FileCategory.CODE, 'sass': FileCategory.CODE, 'less': FileCategory.CODE, 'sh': FileCategory.CODE, 'bash': FileCategory.CODE, 'zsh': FileCategory.CODE, 'bat': FileCategory.CODE, 'ps1': FileCategory.CODE, // Figure files 'png': FileCategory.FIGURE, 'jpg': FileCategory.FIGURE, 'jpeg': FileCategory.FIGURE, 'gif': FileCategory.FIGURE, 'bmp': FileCategory.FIGURE, 'svg': FileCategory.FIGURE, 'webp': FileCategory.FIGURE, 'ico': FileCategory.FIGURE, 'pdf': FileCategory.FIGURE, // Data table files 'csv': FileCategory.DATA_TABLE, 'tsv': FileCategory.DATA_TABLE, 'xlsx': FileCategory.DATA_TABLE, 'xls': FileCategory.DATA_TABLE, 'parquet': FileCategory.DATA_TABLE, 'RData': FileCategory.DATA_TABLE, 'Rdata': FileCategory.DATA_TABLE, 'rda': FileCategory.DATA_TABLE, // Document files 'doc': FileCategory.DOCUMENT, 'docx': FileCategory.DOCUMENT, 'ppt': FileCategory.DOCUMENT, 'pptx': FileCategory.DOCUMENT, 'md': FileCategory.DOCUMENT, 'txt': FileCategory.DOCUMENT, // Archive files 'zip': FileCategory.ARCHIVE, 'tar': FileCategory.ARCHIVE, 'gz': FileCategory.ARCHIVE, 'bz2': FileCategory.ARCHIVE, '7z': FileCategory.ARCHIVE, 'rar': FileCategory.ARCHIVE, // Binary files 'exe': FileCategory.BINARY, 'dll': FileCategory.BINARY, 'so': FileCategory.BINARY, 'dylib': FileCategory.BINARY, 'bin': FileCategory.BINARY, // Media files 'mp3': FileCategory.MEDIA, 'mp4': FileCategory.MEDIA, 'wav': FileCategory.MEDIA, 'avi': FileCategory.MEDIA, 'mov': FileCategory.MEDIA, 'mkv': FileCategory.MEDIA, // Others 'ttf': FileCategory.OTHER, 'woff': FileCategory.OTHER, 'woff2': FileCategory.OTHER, 'eot': FileCategory.OTHER }; // File extensions that should be considered binary/non-text const BINARY_EXTENSIONS = new Set([ // Images 'png', 'jpg', 'jpeg', 'gif', 'bmp', 'ico', 'svg', 'webp', // Documents 'pdf', 'doc', 'docx', 'ppt', 'pptx', 'xls', 'xlsx', // Archives 'zip', 'tar', 'gz', 'bz2', '7z', 'rar', // Binaries 'exe', 'dll', 'so', 'dylib', 'bin', // Media 'mp3', 'mp4', 'wav', 'avi', 'mov', 'mkv', // Others 'ttf', 'woff', 'woff2', 'eot' ]); /** * Checks if a file can be read as text */ function isTextFile(filename) { const extension = path.extname(filename).toLowerCase().slice(1); return !BINARY_EXTENSIONS.has(extension); } /** * Simple class to handle ignoring files based on patterns */ class IgnoreMatcher { patterns = []; constructor(directory) { // Default ignore patterns this.patterns = [ 'node_modules/**', '.git/**', 'dist/**', 'build/**', 'coverage/**' ]; // Add patterns from .gitignore if it exists const gitignorePath = path.join(directory, '.gitignore'); if (fs.existsSync(gitignorePath)) { try { const gitignoreContent = fs.readFileSync(gitignorePath, 'utf8'); const gitignorePatterns = gitignoreContent .split('\n') .map(line => line.trim()) .filter(line => line && !line.startsWith('#')); this.patterns.push(...gitignorePatterns); } catch (error) { console.warn('Error reading .gitignore file:', error); } } } /** * Check if a path should be ignored */ ignores(filePath) { return this.patterns.some(pattern => { // Convert glob pattern to regex let regexPattern = pattern .replace(/\./g, '\\.') .replace(/\*/g, '.*') .replace(/\?/g, '.'); // Handle directory pattern if (pattern.endsWith('/')) { regexPattern += '.*'; } const regex = new RegExp(`^${regexPattern}$`); return regex.test(filePath); }); } } /** * Get file extension and map it to a type */ function getFileType(filePath) { const extension = path.extname(filePath).toLowerCase().slice(1); return FILE_TYPES[extension] || extension || 'Unknown'; } /** * Get file extension and map it to a category */ function getFileCategory(filePath) { const extension = path.extname(filePath).toLowerCase().slice(1); // Check for case-sensitive extensions like RData const extension2 = path.extname(filePath).slice(1); return FILE_CATEGORIES[extension] || FILE_CATEGORIES[extension2] || FileCategory.OTHER; } /** * Get a preview of the file content (first few lines) */ function getFilePreview(filePath, maxLength = 1000) { if (!isTextFile(filePath)) { return undefined; } try { const content = fs.readFileSync(filePath, 'utf8'); if (content.length <= maxLength) { return content; } return content.slice(0, maxLength) + '...'; } catch (error) { return undefined; } } /** * Analyze a single file */ function analyzeFile(filePath, rootPath) { const stats = fs.statSync(filePath); const relativePath = path.relative(rootPath, filePath); return { path: relativePath, name: path.basename(filePath), extension: path.extname(filePath).slice(1), size: stats.size, lastModified: stats.mtime, preview: getFilePreview(filePath), type: getFileType(filePath), category: getFileCategory(filePath) }; } /** * Recursively analyze a directory */ function analyzeDirectory(directoryPath, rootPath, ignoreMatcher, currentDepth = 0, maxDepth = 3) { const relativePath = path.relative(rootPath, directoryPath); const items = fs.readdirSync(directoryPath); const files = []; const subdirectories = []; for (const item of items) { const itemPath = path.join(directoryPath, item); const relativeItemPath = path.relative(rootPath, itemPath); // Skip if item is ignored if (ignoreMatcher.ignores(relativeItemPath)) { continue; } const stats = fs.statSync(itemPath); if (stats.isDirectory()) { if (currentDepth < maxDepth) { subdirectories.push(analyzeDirectory(itemPath, rootPath, ignoreMatcher, currentDepth + 1, maxDepth)); } } else if (stats.isFile()) { files.push(analyzeFile(itemPath, rootPath)); } } return { path: relativePath || '.', name: path.basename(directoryPath), files, subdirectories, depth: currentDepth }; } /** * Collect all files recursively from the directory structure */ function collectAllFiles(structure) { const allFiles = [...structure.files]; for (const subdir of structure.subdirectories) { allFiles.push(...collectAllFiles(subdir)); } return allFiles; } /** * Analyze repository and generate a comprehensive data structure */ export async function analyzeRepository(options) { const { directory, maxDepth = 3, respectGitignore = true, includePattern, excludePattern, category } = options; // Ensure directory exists if (!fs.existsSync(directory)) { throw new Error(`Directory not found: ${directory}`); } // Create ignore matcher const ignoreMatcher = new IgnoreMatcher(directory); // Add custom exclude pattern if provided if (excludePattern && ignoreMatcher instanceof IgnoreMatcher) { // Add method to IgnoreMatcher class ignoreMatcher.patterns.push(excludePattern); } // Analyze directory structure const structure = analyzeDirectory(directory, directory, ignoreMatcher, 0, maxDepth); // Get all files from structure let allFiles = collectAllFiles(structure); // Apply include pattern if provided if (includePattern) { const includeGlob = await glob(includePattern, { cwd: directory }); allFiles = allFiles.filter(file => includeGlob.includes(file.path)); } // Filter by category if provided if (category) { allFiles = allFiles.filter(file => file.category === category); } // Group files by type const filesByType = {}; const fileTypeDistribution = {}; // Group files by category const filesByCategory = {}; const fileCategoryDistribution = {}; for (const file of allFiles) { // Process by type if (!filesByType[file.type]) { filesByType[file.type] = []; } filesByType[file.type].push(file); fileTypeDistribution[file.type] = (fileTypeDistribution[file.type] || 0) + 1; // Process by category if (!filesByCategory[file.category]) { filesByCategory[file.category] = []; } filesByCategory[file.category].push(file); fileCategoryDistribution[file.category] = (fileCategoryDistribution[file.category] || 0) + 1; } // Calculate total size const totalSize = allFiles.reduce((sum, file) => sum + file.size, 0); // Get largest files const largestFiles = [...allFiles].sort((a, b) => b.size - a.size).slice(0, 10); // Get recently modified files const recentlyModifiedFiles = [...allFiles] .sort((a, b) => b.lastModified.getTime() - a.lastModified.getTime()) .slice(0, 10); return { rootPath: directory, name: path.basename(directory), structure, filesByType, filesByCategory, totalFiles: allFiles.length, totalSize, statistics: { fileTypeDistribution, fileCategoryDistribution, largestFiles, recentlyModifiedFiles } }; } //# sourceMappingURL=analyzer.js.map