UNPKG

erosolar-cli

Version:

Unified AI agent framework for the command line - Multi-provider support with schema-driven tools, code intelligence, and transparent reasoning

1,297 lines 76 kB
/** * Learn Tools - Codebase exploration and learning tools for understanding codebases. * * These tools enable deep codebase exploration without requiring external API calls * for the core analysis. The AI can use these tools to build a comprehensive * understanding of any codebase's architecture, patterns, and conventions. * * Features: * - Codebase structure analysis * - Pattern detection and learning * - Architecture understanding * - File relationship mapping * - Topic-based exploration */ import { readFileSync, existsSync, readdirSync, statSync } from 'node:fs'; import { join, relative, extname, basename } from 'node:path'; import { buildError } from '../core/errors.js'; // ===================================================== // Constants // ===================================================== const IGNORED_DIRS = new Set([ 'node_modules', '.git', '.svn', '.hg', 'dist', 'build', 'out', '.next', '.nuxt', '.output', 'coverage', '.nyc_output', '.cache', '.turbo', '.vercel', '.netlify', '__pycache__', '.pytest_cache', '.mypy_cache', '.ruff_cache', 'venv', '.venv', 'env', '.env', 'target', 'vendor', '.idea', '.vscode', ]); const LANGUAGE_MAP = { '.ts': 'TypeScript', '.tsx': 'TypeScript React', '.js': 'JavaScript', '.jsx': 'JavaScript React', '.mjs': 'JavaScript (ESM)', '.cjs': 'JavaScript (CJS)', '.py': 'Python', '.pyw': 'Python', '.pyi': 'Python Stub', '.rs': 'Rust', '.go': 'Go', '.java': 'Java', '.kt': 'Kotlin', '.kts': 'Kotlin Script', '.scala': 'Scala', '.rb': 'Ruby', '.php': 'PHP', '.cs': 'C#', '.fs': 'F#', '.cpp': 'C++', '.cc': 'C++', '.cxx': 'C++', '.c': 'C', '.h': 'C/C++ Header', '.hpp': 'C++ Header', '.swift': 'Swift', '.m': 'Objective-C', '.mm': 'Objective-C++', '.vue': 'Vue', '.svelte': 'Svelte', '.elm': 'Elm', '.ex': 'Elixir', '.exs': 'Elixir Script', '.erl': 'Erlang', '.hs': 'Haskell', '.ml': 'OCaml', '.mli': 'OCaml Interface', '.lua': 'Lua', '.pl': 'Perl', '.pm': 'Perl Module', '.sh': 'Shell', '.bash': 'Bash', '.zsh': 'Zsh', '.fish': 'Fish', '.ps1': 'PowerShell', '.sql': 'SQL', '.json': 'JSON', '.yaml': 'YAML', '.yml': 'YAML', '.toml': 'TOML', '.xml': 'XML', '.md': 'Markdown', '.mdx': 'MDX', '.html': 'HTML', '.htm': 'HTML', '.css': 'CSS', '.scss': 'SCSS', '.sass': 'Sass', '.less': 'Less', '.styl': 'Stylus', }; const CONFIG_FILES = { 'package.json': { type: 'npm', purpose: 'Node.js package configuration and dependencies' }, 'tsconfig.json': { type: 'typescript', purpose: 'TypeScript compiler configuration' }, 'pyproject.toml': { type: 'python', purpose: 'Python project configuration (PEP 517/518)' }, 'setup.py': { type: 'python', purpose: 'Python package setup (legacy)' }, 'requirements.txt': { type: 'python', purpose: 'Python dependencies' }, 'Cargo.toml': { type: 'rust', purpose: 'Rust package manifest' }, 'go.mod': { type: 'go', purpose: 'Go module definition' }, 'pom.xml': { type: 'maven', purpose: 'Maven project configuration' }, 'build.gradle': { type: 'gradle', purpose: 'Gradle build configuration' }, 'build.gradle.kts': { type: 'gradle', purpose: 'Gradle Kotlin build configuration' }, 'Gemfile': { type: 'ruby', purpose: 'Ruby dependencies (Bundler)' }, 'composer.json': { type: 'php', purpose: 'PHP Composer dependencies' }, '.eslintrc.json': { type: 'linting', purpose: 'ESLint configuration' }, '.eslintrc.js': { type: 'linting', purpose: 'ESLint configuration' }, '.prettierrc': { type: 'formatting', purpose: 'Prettier configuration' }, 'prettier.config.js': { type: 'formatting', purpose: 'Prettier configuration' }, '.gitignore': { type: 'git', purpose: 'Git ignore patterns' }, '.dockerignore': { type: 'docker', purpose: 'Docker ignore patterns' }, 'Dockerfile': { type: 'docker', purpose: 'Docker image definition' }, 'docker-compose.yml': { type: 'docker', purpose: 'Docker Compose services' }, 'docker-compose.yaml': { type: 'docker', purpose: 'Docker Compose services' }, 'Makefile': { type: 'build', purpose: 'Make build automation' }, '.env.example': { type: 'config', purpose: 'Environment variable template' }, 'jest.config.js': { type: 'testing', purpose: 'Jest test configuration' }, 'vitest.config.ts': { type: 'testing', purpose: 'Vitest test configuration' }, 'webpack.config.js': { type: 'bundler', purpose: 'Webpack bundler configuration' }, 'vite.config.ts': { type: 'bundler', purpose: 'Vite build tool configuration' }, 'rollup.config.js': { type: 'bundler', purpose: 'Rollup bundler configuration' }, 'next.config.js': { type: 'framework', purpose: 'Next.js configuration' }, 'nuxt.config.ts': { type: 'framework', purpose: 'Nuxt.js configuration' }, 'tailwind.config.js': { type: 'css', purpose: 'Tailwind CSS configuration' }, '.github/workflows': { type: 'ci', purpose: 'GitHub Actions workflows' }, '.gitlab-ci.yml': { type: 'ci', purpose: 'GitLab CI/CD configuration' }, 'Jenkinsfile': { type: 'ci', purpose: 'Jenkins pipeline definition' }, }; const ARCHITECTURE_PATTERNS = [ { name: 'MVC (Model-View-Controller)', indicators: ['models', 'views', 'controllers', 'routes'], type: 'architectural', }, { name: 'Clean Architecture', indicators: ['domain', 'application', 'infrastructure', 'presentation', 'entities', 'use-cases', 'usecases'], type: 'architectural', }, { name: 'Hexagonal Architecture', indicators: ['ports', 'adapters', 'domain', 'application'], type: 'architectural', }, { name: 'Feature-based Structure', indicators: ['features', 'modules'], type: 'structural', }, { name: 'Component-based', indicators: ['components', 'shared', 'common'], type: 'structural', }, { name: 'Layered Architecture', indicators: ['api', 'services', 'repositories', 'data', 'business'], type: 'architectural', }, { name: 'Microservices', indicators: ['services', 'gateway', 'docker-compose'], type: 'architectural', }, { name: 'Monorepo', indicators: ['packages', 'apps', 'libs', 'workspace'], type: 'structural', }, { name: 'Plugin Architecture', indicators: ['plugins', 'extensions', 'addons'], type: 'architectural', }, { name: 'Event-Driven', indicators: ['events', 'handlers', 'listeners', 'subscribers', 'publishers'], type: 'architectural', }, ]; // ===================================================== // Tool Creation // ===================================================== export function createLearnTools(workingDir) { return [ createLearnCodebaseTool(workingDir), createLearnFileTool(workingDir), createLearnTopicTool(workingDir), createLearnSummaryTool(workingDir), ]; } // ===================================================== // learn_codebase Tool // ===================================================== function createLearnCodebaseTool(workingDir) { return { name: 'learn_codebase', description: `Analyze and learn the entire codebase structure, architecture, patterns, and conventions. This tool provides a comprehensive overview of the project without requiring external API calls. Use this to understand: - Project structure and organization - Primary programming languages used - Architectural patterns detected - Configuration and build setup - Entry points and key files - Dependencies and package management`, parameters: { type: 'object', properties: { depth: { type: 'number', description: 'Maximum directory depth to analyze (default: 5)', }, includeHidden: { type: 'boolean', description: 'Include hidden files/directories in analysis (default: false)', }, focusPath: { type: 'string', description: 'Focus analysis on a specific subdirectory', }, }, additionalProperties: false, }, cacheable: true, handler: async (args) => { try { const depth = typeof args['depth'] === 'number' ? args['depth'] : 5; const includeHidden = args['includeHidden'] === true; const focusPath = args['focusPath']; const targetDir = focusPath ? resolveFilePath(workingDir, focusPath) : workingDir; if (!existsSync(targetDir)) { return `Error: Directory not found: ${targetDir}`; } // Deep analysis with progressive output const analysis = await analyzeCodebaseDeep(targetDir, workingDir, depth, includeHidden); return formatCodebaseAnalysis(analysis); } catch (error) { return buildError('analyzing codebase', error, { workingDir }); } }, }; } // ===================================================== // learn_file Tool // ===================================================== function createLearnFileTool(workingDir) { return { name: 'learn_file', description: `Deep-learn a specific file's purpose, structure, patterns, and relationships. This tool provides detailed analysis of a single file including: - File purpose and responsibilities - Imports and dependencies - Exports and public interface - Functions and classes with their purposes - Complexity metrics - Relationships to other files`, parameters: { type: 'object', properties: { path: { type: 'string', description: 'Path to the file to analyze', }, includeRelationships: { type: 'boolean', description: 'Analyze relationships to other files (default: true)', }, }, required: ['path'], additionalProperties: false, }, cacheable: true, handler: async (args) => { try { const filePath = resolveFilePath(workingDir, args['path']); const includeRelationships = args['includeRelationships'] !== false; if (!existsSync(filePath)) { return `Error: File not found: ${filePath}`; } const stat = statSync(filePath); if (stat.isDirectory()) { return `Error: Path is a directory, not a file: ${filePath}`; } const analysis = analyzeFile(filePath, workingDir, includeRelationships); return formatFileAnalysis(analysis); } catch (error) { return buildError('analyzing file', error, { path: String(args['path']) }); } }, }; } // ===================================================== // learn_topic Tool // ===================================================== function createLearnTopicTool(workingDir) { return { name: 'learn_topic', description: `Learn about a specific topic, pattern, or concept within the codebase. Use this to understand how specific patterns are implemented, such as: - Authentication/authorization patterns - Error handling conventions - Data validation approaches - API design patterns - State management - Testing patterns - Any custom pattern or convention`, parameters: { type: 'object', properties: { topic: { type: 'string', description: 'The topic or pattern to learn about (e.g., "authentication", "error handling", "api routes")', }, maxFiles: { type: 'number', description: 'Maximum number of relevant files to analyze (default: 10)', }, maxExamples: { type: 'number', description: 'Maximum number of code examples to include (default: 5)', }, }, required: ['topic'], additionalProperties: false, }, cacheable: true, handler: async (args) => { try { const topic = args['topic']; const maxFiles = typeof args['maxFiles'] === 'number' ? args['maxFiles'] : 10; const maxExamples = typeof args['maxExamples'] === 'number' ? args['maxExamples'] : 5; if (!topic || !topic.trim()) { return 'Error: topic must be a non-empty string'; } const analysis = analyzeTopic(workingDir, topic.trim(), maxFiles, maxExamples); return formatTopicAnalysis(analysis); } catch (error) { return buildError('analyzing topic', error, { topic: String(args['topic']) }); } }, }; } // ===================================================== // learn_summary Tool // ===================================================== function createLearnSummaryTool(workingDir) { return { name: 'learn_summary', description: `Generate a learning summary for the codebase suitable for onboarding. This creates a comprehensive summary including: - Quick start guide - Key concepts and terminology - Architecture overview - Important files and their purposes - Common patterns and conventions - Development workflow suggestions`, parameters: { type: 'object', properties: { format: { type: 'string', description: 'Output format: "markdown" (default) or "text"', enum: ['markdown', 'text'], }, focus: { type: 'string', description: 'Focus area for the summary (e.g., "frontend", "backend", "api")', }, }, additionalProperties: false, }, cacheable: true, handler: async (args) => { try { const format = args['format'] || 'markdown'; const focus = args['focus']; const analysis = analyzeCodebase(workingDir, workingDir, 4, false); return formatLearningSummary(analysis, format, focus); } catch (error) { return buildError('generating learning summary', error, { workingDir }); } }, }; } // ===================================================== // Analysis Functions // ===================================================== /** * Deep codebase analysis with thorough exploration. * Does actual file content analysis for better insights. */ async function analyzeCodebaseDeep(targetDir, workingDir, maxDepth, includeHidden) { const files = []; const directories = []; const configFiles = []; // Phase 1: Directory structure traversal const structure = buildDirectoryTree(targetDir, workingDir, 0, maxDepth, includeHidden, files, directories, configFiles); // Phase 2: File type detection and language breakdown const languageCounts = new Map(); for (const file of files) { const lang = LANGUAGE_MAP[file.ext] || 'Other'; const existing = languageCounts.get(lang) || { ext: file.ext, count: 0 }; existing.count++; languageCounts.set(lang, existing); } const totalFiles = files.length; const languages = Array.from(languageCounts.entries()) .map(([language, data]) => ({ language, extension: data.ext, fileCount: data.count, percentage: totalFiles > 0 ? (data.count / totalFiles) * 100 : 0, })) .sort((a, b) => b.fileCount - a.fileCount); // Phase 3: Architecture pattern detection const dirNames = directories.map((d) => basename(d).toLowerCase()); const patterns = detectPatterns(dirNames, files.map((f) => f.path)); // Phase 4: Configuration file analysis with deep inspection const configDetails = analyzeConfigFilesDeep(configFiles, targetDir); // Phase 5: Dependency analysis const dependencies = analyzeDependencies(targetDir); // Phase 6: Entry point identification const entryPoints = findEntryPoints(files.map((f) => f.path), configFiles); // Phase 7: Component and layer mapping const architecture = buildArchitectureInsights(patterns, dirNames, targetDir, workingDir); // Phase 8: Deep source file analysis for patterns and complexity const keyFiles = files .filter(f => LANGUAGE_MAP[f.ext] && f.size < 100000) .slice(0, 50); const codePatterns = new Map(); let totalComplexity = 0; let analyzedFiles = 0; for (const file of keyFiles) { try { const fullPath = join(workingDir, file.path); const content = readFileSync(fullPath, 'utf-8'); const filePatterns = detectCodePatterns(content, file.ext); // Accumulate pattern counts for (const pattern of filePatterns) { codePatterns.set(pattern, (codePatterns.get(pattern) || 0) + 1); } // Calculate complexity metrics const complexity = calculateComplexity(content); totalComplexity += complexity.cyclomaticComplexity; analyzedFiles++; } catch { // Skip unreadable files } } // Add detected code patterns to the patterns list for (const [patternName, count] of codePatterns.entries()) { if (count >= 3) { // Only include patterns found in multiple files patterns.push({ name: patternName, type: 'design', description: `Found ${patternName} pattern in ${count} files`, evidence: [`Detected in ${count} source files`], confidence: count >= 10 ? 'high' : count >= 5 ? 'medium' : 'low', }); } } return { rootDir: relative(workingDir, targetDir) || '.', totalFiles, totalDirectories: directories.length, languages, structure, patterns, architecture, entryPoints, configFiles: configDetails, dependencies, }; } function analyzeConfigFilesDeep(configs, targetDir) { // Enhance config info with actual content analysis where useful return configs.map(config => { try { const fullPath = join(targetDir, config.path); if (config.name === 'package.json' && existsSync(fullPath)) { const content = JSON.parse(readFileSync(fullPath, 'utf-8')); const scripts = Object.keys(content.scripts || {}).slice(0, 5); if (scripts.length > 0) { return { ...config, purpose: `${config.purpose} (scripts: ${scripts.join(', ')})`, }; } } } catch { // Keep original } return config; }); } function analyzeCodebase(targetDir, workingDir, maxDepth, includeHidden) { const files = []; const directories = []; const configFiles = []; // Build directory tree and collect files const structure = buildDirectoryTree(targetDir, workingDir, 0, maxDepth, includeHidden, files, directories, configFiles); // Calculate language breakdown const languageCounts = new Map(); for (const file of files) { const lang = LANGUAGE_MAP[file.ext] || 'Other'; const existing = languageCounts.get(lang) || { ext: file.ext, count: 0 }; existing.count++; languageCounts.set(lang, existing); } const totalFiles = files.length; const languages = Array.from(languageCounts.entries()) .map(([language, data]) => ({ language, extension: data.ext, fileCount: data.count, percentage: totalFiles > 0 ? (data.count / totalFiles) * 100 : 0, })) .sort((a, b) => b.fileCount - a.fileCount); // Detect architecture patterns const dirNames = directories.map((d) => basename(d).toLowerCase()); const patterns = detectPatterns(dirNames, files.map((f) => f.path)); // Build architecture insights const architecture = buildArchitectureInsights(patterns, dirNames, targetDir, workingDir); // Find entry points const entryPoints = findEntryPoints(files.map((f) => f.path), configFiles); // Analyze dependencies const dependencies = analyzeDependencies(targetDir); return { rootDir: relative(workingDir, targetDir) || '.', totalFiles, totalDirectories: directories.length, languages, structure, patterns, architecture, entryPoints, configFiles, dependencies, }; } function buildDirectoryTree(dir, workingDir, depth, maxDepth, includeHidden, files, directories, configFiles) { const name = basename(dir) || dir; const relPath = relative(workingDir, dir) || '.'; const node = { name, path: relPath, type: 'directory', children: [], }; if (depth >= maxDepth) { return node; } try { const entries = readdirSync(dir, { withFileTypes: true }); for (const entry of entries) { // Skip hidden files/dirs if not requested if (!includeHidden && entry.name.startsWith('.')) { continue; } // Skip ignored directories if (IGNORED_DIRS.has(entry.name)) { continue; } const fullPath = join(dir, entry.name); const entryRelPath = relative(workingDir, fullPath); if (entry.isDirectory()) { directories.push(fullPath); const childNode = buildDirectoryTree(fullPath, workingDir, depth + 1, maxDepth, includeHidden, files, directories, configFiles); node.children.push(childNode); } else if (entry.isFile()) { try { const stat = statSync(fullPath); const ext = extname(entry.name).toLowerCase(); const language = LANGUAGE_MAP[ext]; files.push({ path: entryRelPath, ext, size: stat.size }); // Check if it's a config file const configInfo = CONFIG_FILES[entry.name]; if (configInfo) { configFiles.push({ name: entry.name, path: entryRelPath, type: configInfo.type, purpose: configInfo.purpose, }); } node.children.push({ name: entry.name, path: entryRelPath, type: 'file', size: stat.size, language, }); } catch { // Skip files we can't stat } } } } catch { // Skip directories we can't read } return node; } function detectPatterns(dirNames, filePaths) { const patterns = []; const dirNameSet = new Set(dirNames); const filePathsLower = filePaths.map((p) => p.toLowerCase()); for (const pattern of ARCHITECTURE_PATTERNS) { const matches = pattern.indicators.filter((ind) => dirNameSet.has(ind)); if (matches.length >= 2 || (matches.length >= 1 && pattern.indicators.length <= 2)) { const confidence = matches.length >= 3 ? 'high' : matches.length >= 2 ? 'medium' : 'low'; patterns.push({ name: pattern.name, type: pattern.type, description: `Detected ${pattern.name} pattern based on directory structure`, evidence: matches.map((m) => `Found "${m}" directory`), confidence, }); } } // Detect naming conventions const hasKebabCase = filePathsLower.some((p) => /[a-z]+-[a-z]+/.test(basename(p))); const hasCamelCase = filePaths.some((p) => /[a-z]+[A-Z][a-z]+/.test(basename(p))); const hasPascalCase = filePaths.some((p) => /^[A-Z][a-z]+[A-Z]/.test(basename(p))); const hasSnakeCase = filePathsLower.some((p) => /[a-z]+_[a-z]+/.test(basename(p))); const namingConventions = []; if (hasKebabCase) namingConventions.push('kebab-case'); if (hasCamelCase) namingConventions.push('camelCase'); if (hasPascalCase) namingConventions.push('PascalCase'); if (hasSnakeCase) namingConventions.push('snake_case'); if (namingConventions.length > 0) { patterns.push({ name: 'File Naming Convention', type: 'naming', description: `Uses ${namingConventions.join(', ')} naming convention(s)`, evidence: namingConventions.map((n) => `Detected ${n} pattern in filenames`), confidence: 'medium', }); } // Detect test patterns const hasTestDir = dirNameSet.has('test') || dirNameSet.has('tests') || dirNameSet.has('__tests__'); const hasSpecFiles = filePathsLower.some((p) => p.includes('.spec.') || p.includes('.test.')); if (hasTestDir || hasSpecFiles) { patterns.push({ name: 'Testing Structure', type: 'structural', description: hasTestDir ? 'Uses dedicated test directory' : 'Uses co-located test files (.spec/.test)', evidence: hasTestDir ? ['Found test/tests/__tests__ directory'] : ['Found .spec or .test files alongside source'], confidence: 'high', }); } return patterns; } function buildArchitectureInsights(patterns, dirNames, targetDir, workingDir) { const archPattern = patterns.find((p) => p.type === 'architectural'); const type = archPattern?.name || 'Custom/Unknown'; const layers = []; const components = []; // Identify layers based on common directory names const layerDirs = ['api', 'routes', 'controllers', 'services', 'models', 'views', 'components', 'utils', 'lib', 'core']; for (const layer of layerDirs) { if (dirNames.includes(layer)) { layers.push(layer); } } // Build component info from top-level directories try { const entries = readdirSync(targetDir, { withFileTypes: true }); for (const entry of entries) { if (entry.isDirectory() && !IGNORED_DIRS.has(entry.name) && !entry.name.startsWith('.')) { const componentPath = relative(workingDir, join(targetDir, entry.name)); components.push({ name: entry.name, type: inferComponentType(entry.name), path: componentPath, responsibilities: inferResponsibilities(entry.name), }); } } } catch { // Ignore errors } const dataFlow = inferDataFlow(layers, type); return { type, layers, components, dataFlow, }; } function inferComponentType(name) { const lower = name.toLowerCase(); if (['api', 'routes', 'controllers', 'handlers'].includes(lower)) return 'API Layer'; if (['services', 'business', 'domain'].includes(lower)) return 'Business Logic'; if (['models', 'entities', 'schemas'].includes(lower)) return 'Data Models'; if (['views', 'pages', 'screens'].includes(lower)) return 'Presentation'; if (['components', 'ui'].includes(lower)) return 'UI Components'; if (['utils', 'helpers', 'lib', 'common', 'shared'].includes(lower)) return 'Utilities'; if (['config', 'configs', 'settings'].includes(lower)) return 'Configuration'; if (['test', 'tests', '__tests__', 'spec'].includes(lower)) return 'Testing'; if (['types', 'interfaces', 'contracts'].includes(lower)) return 'Type Definitions'; if (['middleware', 'middlewares'].includes(lower)) return 'Middleware'; if (['plugins', 'extensions', 'addons'].includes(lower)) return 'Extensions'; return 'Module'; } function inferResponsibilities(name) { const lower = name.toLowerCase(); const responsibilities = []; if (['api', 'routes'].includes(lower)) { responsibilities.push('HTTP request handling', 'Route definitions', 'Request/response processing'); } else if (lower === 'controllers') { responsibilities.push('Request handling', 'Input validation', 'Response formatting'); } else if (lower === 'services') { responsibilities.push('Business logic', 'Data orchestration', 'External integrations'); } else if (lower === 'models') { responsibilities.push('Data structures', 'Database schemas', 'Data validation'); } else if (['views', 'pages'].includes(lower)) { responsibilities.push('UI rendering', 'Page composition', 'Layout management'); } else if (lower === 'components') { responsibilities.push('Reusable UI elements', 'Component logic', 'State management'); } else if (['utils', 'helpers'].includes(lower)) { responsibilities.push('Utility functions', 'Common helpers', 'Shared logic'); } return responsibilities.length > 0 ? responsibilities : ['Module functionality']; } function inferDataFlow(layers, archType) { if (archType.includes('MVC')) { return ['Request → Controller → Model → View → Response']; } if (archType.includes('Clean')) { return [ 'External → Controllers → Use Cases → Entities', 'Entities → Use Cases → Presenters → External', ]; } if (archType.includes('Layered')) { return ['API → Services → Repositories → Database']; } if (layers.length > 0) { return [`Request → ${layers.join(' → ')} → Response`]; } return ['Standard request/response flow']; } function findEntryPoints(filePaths, configFiles) { const entryPoints = []; // Check for common entry point patterns const entryPatterns = [ 'index.ts', 'index.js', 'main.ts', 'main.js', 'app.ts', 'app.js', 'server.ts', 'server.js', 'cli.ts', 'cli.js', '__main__.py', 'main.py', 'app.py', 'manage.py', 'main.go', 'main.rs', 'lib.rs', ]; for (const pattern of entryPatterns) { const match = filePaths.find((p) => basename(p) === pattern || p.endsWith(`/src/${pattern}`) || p.endsWith(`/bin/${pattern}`)); if (match) { entryPoints.push(match); } } // Check package.json for main/bin const pkgJson = configFiles.find((c) => c.name === 'package.json'); if (pkgJson) { entryPoints.push(`${pkgJson.path} (see "main" or "bin" fields)`); } return [...new Set(entryPoints)]; } function analyzeDependencies(dir) { const result = { dependencies: [], devDependencies: [], hasDependencyFile: false, }; // Check package.json const pkgPath = join(dir, 'package.json'); if (existsSync(pkgPath)) { try { const pkg = JSON.parse(readFileSync(pkgPath, 'utf-8')); result.packageManager = 'npm'; result.hasDependencyFile = true; result.dependencies = Object.keys(pkg.dependencies || {}).slice(0, 20); result.devDependencies = Object.keys(pkg.devDependencies || {}).slice(0, 20); } catch { // Ignore parse errors } } // Check pyproject.toml const pyprojectPath = join(dir, 'pyproject.toml'); if (existsSync(pyprojectPath)) { result.packageManager = result.packageManager || 'pip'; result.hasDependencyFile = true; // Basic TOML parsing for dependencies try { const content = readFileSync(pyprojectPath, 'utf-8'); const depMatch = content.match(/dependencies\s*=\s*\[([\s\S]*?)\]/); if (depMatch && depMatch[1]) { const deps = depMatch[1].match(/"([^"]+)"/g) || []; result.dependencies = deps.map((d) => d.replace(/"/g, '').split(/[<>=!]/)[0]?.trim() ?? '').slice(0, 20); } } catch { // Ignore parse errors } } // Check Cargo.toml const cargoPath = join(dir, 'Cargo.toml'); if (existsSync(cargoPath)) { result.packageManager = 'cargo'; result.hasDependencyFile = true; } // Check go.mod const goModPath = join(dir, 'go.mod'); if (existsSync(goModPath)) { result.packageManager = 'go modules'; result.hasDependencyFile = true; } return result; } function analyzeFile(filePath, workingDir, includeRelationships) { const content = readFileSync(filePath, 'utf-8'); const lines = content.split('\n'); const ext = extname(filePath).toLowerCase(); const language = LANGUAGE_MAP[ext] || 'Unknown'; const relPath = relative(workingDir, filePath); const imports = extractImports(content, ext); const exports = extractExports(content, ext); const functions = extractFunctions(content, ext); const classes = extractClasses(content, ext); const patterns = detectCodePatterns(content, ext); const complexity = calculateComplexity(content); const purpose = inferFilePurpose(basename(filePath), content, imports, exports, functions, classes); const relationships = []; if (includeRelationships) { // Build relationships from imports for (const imp of imports) { if (imp.isRelative) { relationships.push({ targetFile: imp.resolvedPath || imp.source, type: 'imports', symbols: imp.specifiers, }); } } } return { path: relPath, language, size: content.length, lineCount: lines.length, purpose, imports, exports, functions, classes, patterns, relationships, complexity, }; } function extractImports(content, ext) { const imports = []; if (['.ts', '.tsx', '.js', '.jsx', '.mjs', '.cjs'].includes(ext)) { // ES6 imports const importRegex = /import\s+(?:(\*\s+as\s+\w+)|(\{[^}]+\})|(\w+)(?:\s*,\s*\{([^}]+)\})?)\s+from\s+['"]([^'"]+)['"]/g; let match; while ((match = importRegex.exec(content)) !== null) { const source = match[5] || ''; let specifiers = []; if (match[1]) { // namespace import specifiers = [match[1].trim()]; } else if (match[2]) { // named imports specifiers = match[2] .replace(/[{}]/g, '') .split(',') .map((s) => s.trim()) .filter(Boolean); } else if (match[3]) { // default import specifiers = [match[3]]; if (match[4]) { // additional named imports specifiers.push(...match[4] .split(',') .map((s) => s.trim()) .filter(Boolean)); } } imports.push({ source, specifiers, isRelative: source.startsWith('.') || source.startsWith('/'), }); } // CommonJS requires const requireRegex = /(?:const|let|var)\s+(?:(\{[^}]+\})|(\w+))\s*=\s*require\s*\(\s*['"]([^'"]+)['"]\s*\)/g; while ((match = requireRegex.exec(content)) !== null) { const source = match[3] || ''; let specifiers = []; if (match[1]) { specifiers = match[1] .replace(/[{}]/g, '') .split(',') .map((s) => s.trim()) .filter(Boolean); } else if (match[2]) { specifiers = [match[2]]; } imports.push({ source, specifiers, isRelative: source.startsWith('.') || source.startsWith('/'), }); } } else if (ext === '.py') { // Python imports const fromImportRegex = /from\s+([^\s]+)\s+import\s+(.+)/g; let match; while ((match = fromImportRegex.exec(content)) !== null) { const source = match[1] ?? ''; const specifiers = (match[2] ?? '') .split(',') .map((s) => s.trim().split(' as ')[0]?.trim() ?? '') .filter(Boolean); imports.push({ source, specifiers, isRelative: source.startsWith('.'), }); } const importRegex = /^import\s+([^\s,]+(?:\s*,\s*[^\s,]+)*)/gm; while ((match = importRegex.exec(content)) !== null) { const modules = (match[1] ?? '').split(',').map((s) => s.trim().split(' as ')[0]?.trim() ?? ''); for (const mod of modules) { if (mod) { imports.push({ source: mod, specifiers: [mod], isRelative: mod.startsWith('.'), }); } } } } return imports; } function extractExports(content, ext) { const exports = []; if (['.ts', '.tsx', '.js', '.jsx', '.mjs', '.cjs'].includes(ext)) { // Export default const defaultMatch = content.match(/export\s+default\s+(?:class|function)?\s*(\w+)?/); if (defaultMatch) { exports.push({ name: defaultMatch[1] || 'default', type: 'default', }); } // Named exports const namedExportRegex = /export\s+(?:const|let|var|function|class|interface|type|enum)\s+(\w+)/g; let match; while ((match = namedExportRegex.exec(content)) !== null) { const name = match[1] || ''; const line = content.substring(0, match.index).split('\n').length; const lineContent = content.split('\n')[line - 1] || ''; let type = 'named'; if (lineContent.includes('interface')) type = 'interface'; else if (lineContent.includes('type')) type = 'type'; else if (lineContent.includes('class')) type = 'class'; else if (lineContent.includes('function')) type = 'function'; exports.push({ name, type }); } // Re-exports const reExportRegex = /export\s+\{([^}]+)\}\s+from/g; while ((match = reExportRegex.exec(content)) !== null) { const names = (match[1] ?? '').split(',').map((s) => s.trim().split(' as ')[0]?.trim() ?? ''); for (const name of names) { if (name) { exports.push({ name, type: 'named' }); } } } } else if (ext === '.py') { // Python __all__ const allMatch = content.match(/__all__\s*=\s*\[([\s\S]*?)\]/); if (allMatch && allMatch[1]) { const names = (allMatch[1].match(/['"]([^'"]+)['"]/g) || []).map((s) => s.replace(/['"]/g, '')); for (const name of names) { exports.push({ name, type: 'named' }); } } // Public functions/classes (not starting with _) const defRegex = /^(?:def|class)\s+([a-zA-Z][a-zA-Z0-9_]*)/gm; let match; while ((match = defRegex.exec(content)) !== null) { const name = match[1] || ''; if (!name.startsWith('_')) { const lineContent = content.split('\n')[content.substring(0, match.index).split('\n').length - 1] || ''; exports.push({ name, type: lineContent.startsWith('class') ? 'class' : 'function', }); } } } return exports; } function extractFunctions(content, ext) { const functions = []; if (['.ts', '.tsx', '.js', '.jsx', '.mjs', '.cjs'].includes(ext)) { // Regular functions const funcRegex = /(?:export\s+)?(?:async\s+)?function\s+(\w+)\s*\(([^)]*)\)(?:\s*:\s*([^{]+))?\s*\{/g; let match; while ((match = funcRegex.exec(content)) !== null) { const name = match[1] || ''; const params = (match[2] || '').split(',').map((p) => p.trim().split(':')[0]?.trim() ?? '').filter(Boolean); const returnType = (match[3] || '').trim() || undefined; const line = content.substring(0, match.index).split('\n').length; const isAsync = content.substring(match.index - 20, match.index).includes('async'); const isExported = content.substring(match.index - 20, match.index).includes('export'); functions.push({ name, line, parameters: params, returnType, isAsync, isExported, complexity: 1, }); } // Arrow functions const arrowRegex = /(?:export\s+)?(?:const|let|var)\s+(\w+)\s*=\s*(?:async\s+)?\(?([^)=]*)\)?\s*(?::\s*([^=]+))?\s*=>/g; while ((match = arrowRegex.exec(content)) !== null) { const name = match[1] || ''; const params = (match[2] || '').split(',').map((p) => p.trim().split(':')[0]?.trim() ?? '').filter(Boolean); const returnType = (match[3] || '').trim() || undefined; const line = content.substring(0, match.index).split('\n').length; const isAsync = content.substring(match.index, match.index + 50).includes('async'); const isExported = content.substring(match.index - 20, match.index).includes('export'); functions.push({ name, line, parameters: params, returnType, isAsync, isExported, complexity: 1, }); } } else if (ext === '.py') { const defRegex = /(?:async\s+)?def\s+(\w+)\s*\(([^)]*)\)(?:\s*->\s*([^:]+))?\s*:/g; let match; while ((match = defRegex.exec(content)) !== null) { const name = match[1] ?? ''; const params = (match[2] ?? '') .split(',') .map((p) => (p.trim().split(':')[0]?.split('=')[0]?.trim()) ?? '') .filter((p) => p && p !== 'self' && p !== 'cls'); const returnType = (match[3] || '').trim() || undefined; const line = content.substring(0, match.index).split('\n').length; const isAsync = content.substring(match.index - 10, match.index).includes('async'); functions.push({ name, line, parameters: params, returnType, isAsync, isExported: !name.startsWith('_'), complexity: 1, }); } } return functions; } function extractClasses(content, ext) { const classes = []; if (['.ts', '.tsx', '.js', '.jsx', '.mjs', '.cjs'].includes(ext)) { const classRegex = /(?:export\s+)?class\s+(\w+)(?:\s+extends\s+(\w+))?(?:\s+implements\s+([^{]+))?\s*\{/g; let match; while ((match = classRegex.exec(content)) !== null) { const name = match[1] || ''; const line = content.substring(0, match.index).split('\n').length; const extendsClass = match[2]; const implementsList = match[3] ? match[3] .split(',') .map((s) => s.trim()) .filter(Boolean) : undefined; const isExported = content.substring(match.index - 20, match.index).includes('export'); // Find class body and extract methods/properties const classStart = match.index + match[0].length; let braceCount = 1; let classEnd = classStart; for (let i = classStart; i < content.length && braceCount > 0; i++) { if (content[i] === '{') braceCount++; if (content[i] === '}') braceCount--; classEnd = i; } const classBody = content.substring(classStart, classEnd); const methods = []; const properties = []; // Extract methods const methodRegex = /(?:async\s+)?(?:public|private|protected)?\s*(\w+)\s*\([^)]*\)/g; let methodMatch; while ((methodMatch = methodRegex.exec(classBody)) !== null) { if (methodMatch[1] && methodMatch[1] !== 'constructor') { methods.push(methodMatch[1]); } } // Extract properties const propRegex = /(?:public|private|protected|readonly)?\s+(\w+)\s*[:=]/g; let propMatch; while ((propMatch = propRegex.exec(classBody)) !== null) { if (propMatch[1]) { properties.push(propMatch[1]); } } classes.push({ name, line, methods, properties, extends: extendsClass, implements: implementsList, isExported, }); } } else if (ext === '.py') { const classRegex = /class\s+(\w+)(?:\s*\(([^)]*)\))?\s*:/g; let match; while ((match = classRegex.exec(content)) !== null) { const name = match[1] || ''; const line = content.substring(0, match.index).split('\n').length; const parentClasses = match[2] ? match[2] .split(',') .map((s) => s.trim()) .filter(Boolean) : []; classes.push({ name, line, methods: [], properties: [], extends: parentClasses[0], implements: parentClasses.slice(1), isExported: !name.startsWith('_'), }); } } return classes; } function detectCodePatterns(content, _ext) { const patterns = []; // Common patterns detection if (/async\s+function|async\s+\(|await\s+/.test(content)) { patterns.push('Async/Await'); } if (/Promise\.all|Promise\.race|Promise\.allSettled/.test(content)) { patterns.push('Promise Combinators'); } if (/try\s*\{[\s\S]*?\}\s*catch/.test(content)) { patterns.push('Try-Catch Error Handling'); } if (/\.map\s*\(|\.filter\s*\(|\.reduce\s*\(/.test(content)) { patterns.push('Functional Array Methods'); } if (/Object\.freeze|Object\.seal|readonly\s+/.test(content)) { patterns.push('Immutability'); } if (/interface\s+\w+|type\s+\w+\s*=/.test(content)) { patterns.push('TypeScript Types'); } if (/\bclass\s+\w+/.test(content)) { patterns.push('Object-Oriented'); } if (/export\s+default|export\s+\{|module\.exports/.test(content)) { patterns.push('Module Pattern'); } if (/\.test\(|\.spec\.|describe\s*\(|it\s*\(|expect\s*\(/.test(content)) { patterns.push('Testing'); } if (/console\.(log|error|warn|debug)/.test(content)) { patterns.push('Console Logging'); } if (/@decorator|@\w+\s*\(|@\w+\s*\n/.test(content)) { patterns.push('Decorators'); } if (/useEffect|useState|useCallback|useMemo/.test(content)) { patterns.push('React Hooks'); } if (/createSlice|createReducer|createAction/.test(content)) { patterns.push('Redux Toolkit'); } return patterns; } function calculateComplexity(content) { const lines = content.split('\n'); const linesOfCode = lines.filter((l) => l.trim() && !l.trim().startsWith('//')).length; const linesOfComments = lines.filter((l) => l.trim().startsWith('//')).length; // Simple cyclomatic complexity estimation let cyclomaticComplexity = 1; const controlFlowPatterns = /\bif\b|\belse\b|\bfor\b|\bwhile\b|\bcase\b|\bcatch\b|\b\?\s*:/g; const matches = content.match(controlFlowPatterns); if (matches) { cyclomaticComplexity += matches.length; } // Cognitive complexity (simplified) let cognitiveComplexity = cyclomaticComplexity; const nestedPatterns = /\{\s*\{|\bif\b.*\bif\b/g; const nestedMatches = content.match(nestedPatterns); if (nestedMatches) { cognitiveComplexity += nestedMatches.length * 2; } // Maintainability index (simplified, 0-100 scale) cons