erosolar-cli
Version:
Unified AI agent framework for the command line - Multi-provider support with schema-driven tools, code intelligence, and transparent reasoning
1,297 lines • 76 kB
JavaScript
/**
* Learn Tools - Codebase exploration and learning tools for understanding codebases.
*
* These tools enable deep codebase exploration without requiring external API calls
* for the core analysis. The AI can use these tools to build a comprehensive
* understanding of any codebase's architecture, patterns, and conventions.
*
* Features:
* - Codebase structure analysis
* - Pattern detection and learning
* - Architecture understanding
* - File relationship mapping
* - Topic-based exploration
*/
import { readFileSync, existsSync, readdirSync, statSync } from 'node:fs';
import { join, relative, extname, basename } from 'node:path';
import { buildError } from '../core/errors.js';
// =====================================================
// Constants
// =====================================================
const IGNORED_DIRS = new Set([
'node_modules',
'.git',
'.svn',
'.hg',
'dist',
'build',
'out',
'.next',
'.nuxt',
'.output',
'coverage',
'.nyc_output',
'.cache',
'.turbo',
'.vercel',
'.netlify',
'__pycache__',
'.pytest_cache',
'.mypy_cache',
'.ruff_cache',
'venv',
'.venv',
'env',
'.env',
'target',
'vendor',
'.idea',
'.vscode',
]);
const LANGUAGE_MAP = {
'.ts': 'TypeScript',
'.tsx': 'TypeScript React',
'.js': 'JavaScript',
'.jsx': 'JavaScript React',
'.mjs': 'JavaScript (ESM)',
'.cjs': 'JavaScript (CJS)',
'.py': 'Python',
'.pyw': 'Python',
'.pyi': 'Python Stub',
'.rs': 'Rust',
'.go': 'Go',
'.java': 'Java',
'.kt': 'Kotlin',
'.kts': 'Kotlin Script',
'.scala': 'Scala',
'.rb': 'Ruby',
'.php': 'PHP',
'.cs': 'C#',
'.fs': 'F#',
'.cpp': 'C++',
'.cc': 'C++',
'.cxx': 'C++',
'.c': 'C',
'.h': 'C/C++ Header',
'.hpp': 'C++ Header',
'.swift': 'Swift',
'.m': 'Objective-C',
'.mm': 'Objective-C++',
'.vue': 'Vue',
'.svelte': 'Svelte',
'.elm': 'Elm',
'.ex': 'Elixir',
'.exs': 'Elixir Script',
'.erl': 'Erlang',
'.hs': 'Haskell',
'.ml': 'OCaml',
'.mli': 'OCaml Interface',
'.lua': 'Lua',
'.pl': 'Perl',
'.pm': 'Perl Module',
'.sh': 'Shell',
'.bash': 'Bash',
'.zsh': 'Zsh',
'.fish': 'Fish',
'.ps1': 'PowerShell',
'.sql': 'SQL',
'.json': 'JSON',
'.yaml': 'YAML',
'.yml': 'YAML',
'.toml': 'TOML',
'.xml': 'XML',
'.md': 'Markdown',
'.mdx': 'MDX',
'.html': 'HTML',
'.htm': 'HTML',
'.css': 'CSS',
'.scss': 'SCSS',
'.sass': 'Sass',
'.less': 'Less',
'.styl': 'Stylus',
};
const CONFIG_FILES = {
'package.json': { type: 'npm', purpose: 'Node.js package configuration and dependencies' },
'tsconfig.json': { type: 'typescript', purpose: 'TypeScript compiler configuration' },
'pyproject.toml': { type: 'python', purpose: 'Python project configuration (PEP 517/518)' },
'setup.py': { type: 'python', purpose: 'Python package setup (legacy)' },
'requirements.txt': { type: 'python', purpose: 'Python dependencies' },
'Cargo.toml': { type: 'rust', purpose: 'Rust package manifest' },
'go.mod': { type: 'go', purpose: 'Go module definition' },
'pom.xml': { type: 'maven', purpose: 'Maven project configuration' },
'build.gradle': { type: 'gradle', purpose: 'Gradle build configuration' },
'build.gradle.kts': { type: 'gradle', purpose: 'Gradle Kotlin build configuration' },
'Gemfile': { type: 'ruby', purpose: 'Ruby dependencies (Bundler)' },
'composer.json': { type: 'php', purpose: 'PHP Composer dependencies' },
'.eslintrc.json': { type: 'linting', purpose: 'ESLint configuration' },
'.eslintrc.js': { type: 'linting', purpose: 'ESLint configuration' },
'.prettierrc': { type: 'formatting', purpose: 'Prettier configuration' },
'prettier.config.js': { type: 'formatting', purpose: 'Prettier configuration' },
'.gitignore': { type: 'git', purpose: 'Git ignore patterns' },
'.dockerignore': { type: 'docker', purpose: 'Docker ignore patterns' },
'Dockerfile': { type: 'docker', purpose: 'Docker image definition' },
'docker-compose.yml': { type: 'docker', purpose: 'Docker Compose services' },
'docker-compose.yaml': { type: 'docker', purpose: 'Docker Compose services' },
'Makefile': { type: 'build', purpose: 'Make build automation' },
'.env.example': { type: 'config', purpose: 'Environment variable template' },
'jest.config.js': { type: 'testing', purpose: 'Jest test configuration' },
'vitest.config.ts': { type: 'testing', purpose: 'Vitest test configuration' },
'webpack.config.js': { type: 'bundler', purpose: 'Webpack bundler configuration' },
'vite.config.ts': { type: 'bundler', purpose: 'Vite build tool configuration' },
'rollup.config.js': { type: 'bundler', purpose: 'Rollup bundler configuration' },
'next.config.js': { type: 'framework', purpose: 'Next.js configuration' },
'nuxt.config.ts': { type: 'framework', purpose: 'Nuxt.js configuration' },
'tailwind.config.js': { type: 'css', purpose: 'Tailwind CSS configuration' },
'.github/workflows': { type: 'ci', purpose: 'GitHub Actions workflows' },
'.gitlab-ci.yml': { type: 'ci', purpose: 'GitLab CI/CD configuration' },
'Jenkinsfile': { type: 'ci', purpose: 'Jenkins pipeline definition' },
};
const ARCHITECTURE_PATTERNS = [
{
name: 'MVC (Model-View-Controller)',
indicators: ['models', 'views', 'controllers', 'routes'],
type: 'architectural',
},
{
name: 'Clean Architecture',
indicators: ['domain', 'application', 'infrastructure', 'presentation', 'entities', 'use-cases', 'usecases'],
type: 'architectural',
},
{
name: 'Hexagonal Architecture',
indicators: ['ports', 'adapters', 'domain', 'application'],
type: 'architectural',
},
{
name: 'Feature-based Structure',
indicators: ['features', 'modules'],
type: 'structural',
},
{
name: 'Component-based',
indicators: ['components', 'shared', 'common'],
type: 'structural',
},
{
name: 'Layered Architecture',
indicators: ['api', 'services', 'repositories', 'data', 'business'],
type: 'architectural',
},
{
name: 'Microservices',
indicators: ['services', 'gateway', 'docker-compose'],
type: 'architectural',
},
{
name: 'Monorepo',
indicators: ['packages', 'apps', 'libs', 'workspace'],
type: 'structural',
},
{
name: 'Plugin Architecture',
indicators: ['plugins', 'extensions', 'addons'],
type: 'architectural',
},
{
name: 'Event-Driven',
indicators: ['events', 'handlers', 'listeners', 'subscribers', 'publishers'],
type: 'architectural',
},
];
// =====================================================
// Tool Creation
// =====================================================
export function createLearnTools(workingDir) {
return [
createLearnCodebaseTool(workingDir),
createLearnFileTool(workingDir),
createLearnTopicTool(workingDir),
createLearnSummaryTool(workingDir),
];
}
// =====================================================
// learn_codebase Tool
// =====================================================
function createLearnCodebaseTool(workingDir) {
return {
name: 'learn_codebase',
description: `Analyze and learn the entire codebase structure, architecture, patterns, and conventions.
This tool provides a comprehensive overview of the project without requiring external API calls.
Use this to understand:
- Project structure and organization
- Primary programming languages used
- Architectural patterns detected
- Configuration and build setup
- Entry points and key files
- Dependencies and package management`,
parameters: {
type: 'object',
properties: {
depth: {
type: 'number',
description: 'Maximum directory depth to analyze (default: 5)',
},
includeHidden: {
type: 'boolean',
description: 'Include hidden files/directories in analysis (default: false)',
},
focusPath: {
type: 'string',
description: 'Focus analysis on a specific subdirectory',
},
},
additionalProperties: false,
},
cacheable: true,
handler: async (args) => {
try {
const depth = typeof args['depth'] === 'number' ? args['depth'] : 5;
const includeHidden = args['includeHidden'] === true;
const focusPath = args['focusPath'];
const targetDir = focusPath ? resolveFilePath(workingDir, focusPath) : workingDir;
if (!existsSync(targetDir)) {
return `Error: Directory not found: ${targetDir}`;
}
// Deep analysis with progressive output
const analysis = await analyzeCodebaseDeep(targetDir, workingDir, depth, includeHidden);
return formatCodebaseAnalysis(analysis);
}
catch (error) {
return buildError('analyzing codebase', error, { workingDir });
}
},
};
}
// =====================================================
// learn_file Tool
// =====================================================
function createLearnFileTool(workingDir) {
return {
name: 'learn_file',
description: `Deep-learn a specific file's purpose, structure, patterns, and relationships.
This tool provides detailed analysis of a single file including:
- File purpose and responsibilities
- Imports and dependencies
- Exports and public interface
- Functions and classes with their purposes
- Complexity metrics
- Relationships to other files`,
parameters: {
type: 'object',
properties: {
path: {
type: 'string',
description: 'Path to the file to analyze',
},
includeRelationships: {
type: 'boolean',
description: 'Analyze relationships to other files (default: true)',
},
},
required: ['path'],
additionalProperties: false,
},
cacheable: true,
handler: async (args) => {
try {
const filePath = resolveFilePath(workingDir, args['path']);
const includeRelationships = args['includeRelationships'] !== false;
if (!existsSync(filePath)) {
return `Error: File not found: ${filePath}`;
}
const stat = statSync(filePath);
if (stat.isDirectory()) {
return `Error: Path is a directory, not a file: ${filePath}`;
}
const analysis = analyzeFile(filePath, workingDir, includeRelationships);
return formatFileAnalysis(analysis);
}
catch (error) {
return buildError('analyzing file', error, { path: String(args['path']) });
}
},
};
}
// =====================================================
// learn_topic Tool
// =====================================================
function createLearnTopicTool(workingDir) {
return {
name: 'learn_topic',
description: `Learn about a specific topic, pattern, or concept within the codebase.
Use this to understand how specific patterns are implemented, such as:
- Authentication/authorization patterns
- Error handling conventions
- Data validation approaches
- API design patterns
- State management
- Testing patterns
- Any custom pattern or convention`,
parameters: {
type: 'object',
properties: {
topic: {
type: 'string',
description: 'The topic or pattern to learn about (e.g., "authentication", "error handling", "api routes")',
},
maxFiles: {
type: 'number',
description: 'Maximum number of relevant files to analyze (default: 10)',
},
maxExamples: {
type: 'number',
description: 'Maximum number of code examples to include (default: 5)',
},
},
required: ['topic'],
additionalProperties: false,
},
cacheable: true,
handler: async (args) => {
try {
const topic = args['topic'];
const maxFiles = typeof args['maxFiles'] === 'number' ? args['maxFiles'] : 10;
const maxExamples = typeof args['maxExamples'] === 'number' ? args['maxExamples'] : 5;
if (!topic || !topic.trim()) {
return 'Error: topic must be a non-empty string';
}
const analysis = analyzeTopic(workingDir, topic.trim(), maxFiles, maxExamples);
return formatTopicAnalysis(analysis);
}
catch (error) {
return buildError('analyzing topic', error, { topic: String(args['topic']) });
}
},
};
}
// =====================================================
// learn_summary Tool
// =====================================================
function createLearnSummaryTool(workingDir) {
return {
name: 'learn_summary',
description: `Generate a learning summary for the codebase suitable for onboarding.
This creates a comprehensive summary including:
- Quick start guide
- Key concepts and terminology
- Architecture overview
- Important files and their purposes
- Common patterns and conventions
- Development workflow suggestions`,
parameters: {
type: 'object',
properties: {
format: {
type: 'string',
description: 'Output format: "markdown" (default) or "text"',
enum: ['markdown', 'text'],
},
focus: {
type: 'string',
description: 'Focus area for the summary (e.g., "frontend", "backend", "api")',
},
},
additionalProperties: false,
},
cacheable: true,
handler: async (args) => {
try {
const format = args['format'] || 'markdown';
const focus = args['focus'];
const analysis = analyzeCodebase(workingDir, workingDir, 4, false);
return formatLearningSummary(analysis, format, focus);
}
catch (error) {
return buildError('generating learning summary', error, { workingDir });
}
},
};
}
// =====================================================
// Analysis Functions
// =====================================================
/**
* Deep codebase analysis with thorough exploration.
* Does actual file content analysis for better insights.
*/
async function analyzeCodebaseDeep(targetDir, workingDir, maxDepth, includeHidden) {
const files = [];
const directories = [];
const configFiles = [];
// Phase 1: Directory structure traversal
const structure = buildDirectoryTree(targetDir, workingDir, 0, maxDepth, includeHidden, files, directories, configFiles);
// Phase 2: File type detection and language breakdown
const languageCounts = new Map();
for (const file of files) {
const lang = LANGUAGE_MAP[file.ext] || 'Other';
const existing = languageCounts.get(lang) || { ext: file.ext, count: 0 };
existing.count++;
languageCounts.set(lang, existing);
}
const totalFiles = files.length;
const languages = Array.from(languageCounts.entries())
.map(([language, data]) => ({
language,
extension: data.ext,
fileCount: data.count,
percentage: totalFiles > 0 ? (data.count / totalFiles) * 100 : 0,
}))
.sort((a, b) => b.fileCount - a.fileCount);
// Phase 3: Architecture pattern detection
const dirNames = directories.map((d) => basename(d).toLowerCase());
const patterns = detectPatterns(dirNames, files.map((f) => f.path));
// Phase 4: Configuration file analysis with deep inspection
const configDetails = analyzeConfigFilesDeep(configFiles, targetDir);
// Phase 5: Dependency analysis
const dependencies = analyzeDependencies(targetDir);
// Phase 6: Entry point identification
const entryPoints = findEntryPoints(files.map((f) => f.path), configFiles);
// Phase 7: Component and layer mapping
const architecture = buildArchitectureInsights(patterns, dirNames, targetDir, workingDir);
// Phase 8: Deep source file analysis for patterns and complexity
const keyFiles = files
.filter(f => LANGUAGE_MAP[f.ext] && f.size < 100000)
.slice(0, 50);
const codePatterns = new Map();
let totalComplexity = 0;
let analyzedFiles = 0;
for (const file of keyFiles) {
try {
const fullPath = join(workingDir, file.path);
const content = readFileSync(fullPath, 'utf-8');
const filePatterns = detectCodePatterns(content, file.ext);
// Accumulate pattern counts
for (const pattern of filePatterns) {
codePatterns.set(pattern, (codePatterns.get(pattern) || 0) + 1);
}
// Calculate complexity metrics
const complexity = calculateComplexity(content);
totalComplexity += complexity.cyclomaticComplexity;
analyzedFiles++;
}
catch {
// Skip unreadable files
}
}
// Add detected code patterns to the patterns list
for (const [patternName, count] of codePatterns.entries()) {
if (count >= 3) { // Only include patterns found in multiple files
patterns.push({
name: patternName,
type: 'design',
description: `Found ${patternName} pattern in ${count} files`,
evidence: [`Detected in ${count} source files`],
confidence: count >= 10 ? 'high' : count >= 5 ? 'medium' : 'low',
});
}
}
return {
rootDir: relative(workingDir, targetDir) || '.',
totalFiles,
totalDirectories: directories.length,
languages,
structure,
patterns,
architecture,
entryPoints,
configFiles: configDetails,
dependencies,
};
}
function analyzeConfigFilesDeep(configs, targetDir) {
// Enhance config info with actual content analysis where useful
return configs.map(config => {
try {
const fullPath = join(targetDir, config.path);
if (config.name === 'package.json' && existsSync(fullPath)) {
const content = JSON.parse(readFileSync(fullPath, 'utf-8'));
const scripts = Object.keys(content.scripts || {}).slice(0, 5);
if (scripts.length > 0) {
return {
...config,
purpose: `${config.purpose} (scripts: ${scripts.join(', ')})`,
};
}
}
}
catch {
// Keep original
}
return config;
});
}
function analyzeCodebase(targetDir, workingDir, maxDepth, includeHidden) {
const files = [];
const directories = [];
const configFiles = [];
// Build directory tree and collect files
const structure = buildDirectoryTree(targetDir, workingDir, 0, maxDepth, includeHidden, files, directories, configFiles);
// Calculate language breakdown
const languageCounts = new Map();
for (const file of files) {
const lang = LANGUAGE_MAP[file.ext] || 'Other';
const existing = languageCounts.get(lang) || { ext: file.ext, count: 0 };
existing.count++;
languageCounts.set(lang, existing);
}
const totalFiles = files.length;
const languages = Array.from(languageCounts.entries())
.map(([language, data]) => ({
language,
extension: data.ext,
fileCount: data.count,
percentage: totalFiles > 0 ? (data.count / totalFiles) * 100 : 0,
}))
.sort((a, b) => b.fileCount - a.fileCount);
// Detect architecture patterns
const dirNames = directories.map((d) => basename(d).toLowerCase());
const patterns = detectPatterns(dirNames, files.map((f) => f.path));
// Build architecture insights
const architecture = buildArchitectureInsights(patterns, dirNames, targetDir, workingDir);
// Find entry points
const entryPoints = findEntryPoints(files.map((f) => f.path), configFiles);
// Analyze dependencies
const dependencies = analyzeDependencies(targetDir);
return {
rootDir: relative(workingDir, targetDir) || '.',
totalFiles,
totalDirectories: directories.length,
languages,
structure,
patterns,
architecture,
entryPoints,
configFiles,
dependencies,
};
}
function buildDirectoryTree(dir, workingDir, depth, maxDepth, includeHidden, files, directories, configFiles) {
const name = basename(dir) || dir;
const relPath = relative(workingDir, dir) || '.';
const node = {
name,
path: relPath,
type: 'directory',
children: [],
};
if (depth >= maxDepth) {
return node;
}
try {
const entries = readdirSync(dir, { withFileTypes: true });
for (const entry of entries) {
// Skip hidden files/dirs if not requested
if (!includeHidden && entry.name.startsWith('.')) {
continue;
}
// Skip ignored directories
if (IGNORED_DIRS.has(entry.name)) {
continue;
}
const fullPath = join(dir, entry.name);
const entryRelPath = relative(workingDir, fullPath);
if (entry.isDirectory()) {
directories.push(fullPath);
const childNode = buildDirectoryTree(fullPath, workingDir, depth + 1, maxDepth, includeHidden, files, directories, configFiles);
node.children.push(childNode);
}
else if (entry.isFile()) {
try {
const stat = statSync(fullPath);
const ext = extname(entry.name).toLowerCase();
const language = LANGUAGE_MAP[ext];
files.push({ path: entryRelPath, ext, size: stat.size });
// Check if it's a config file
const configInfo = CONFIG_FILES[entry.name];
if (configInfo) {
configFiles.push({
name: entry.name,
path: entryRelPath,
type: configInfo.type,
purpose: configInfo.purpose,
});
}
node.children.push({
name: entry.name,
path: entryRelPath,
type: 'file',
size: stat.size,
language,
});
}
catch {
// Skip files we can't stat
}
}
}
}
catch {
// Skip directories we can't read
}
return node;
}
function detectPatterns(dirNames, filePaths) {
const patterns = [];
const dirNameSet = new Set(dirNames);
const filePathsLower = filePaths.map((p) => p.toLowerCase());
for (const pattern of ARCHITECTURE_PATTERNS) {
const matches = pattern.indicators.filter((ind) => dirNameSet.has(ind));
if (matches.length >= 2 || (matches.length >= 1 && pattern.indicators.length <= 2)) {
const confidence = matches.length >= 3 ? 'high' : matches.length >= 2 ? 'medium' : 'low';
patterns.push({
name: pattern.name,
type: pattern.type,
description: `Detected ${pattern.name} pattern based on directory structure`,
evidence: matches.map((m) => `Found "${m}" directory`),
confidence,
});
}
}
// Detect naming conventions
const hasKebabCase = filePathsLower.some((p) => /[a-z]+-[a-z]+/.test(basename(p)));
const hasCamelCase = filePaths.some((p) => /[a-z]+[A-Z][a-z]+/.test(basename(p)));
const hasPascalCase = filePaths.some((p) => /^[A-Z][a-z]+[A-Z]/.test(basename(p)));
const hasSnakeCase = filePathsLower.some((p) => /[a-z]+_[a-z]+/.test(basename(p)));
const namingConventions = [];
if (hasKebabCase)
namingConventions.push('kebab-case');
if (hasCamelCase)
namingConventions.push('camelCase');
if (hasPascalCase)
namingConventions.push('PascalCase');
if (hasSnakeCase)
namingConventions.push('snake_case');
if (namingConventions.length > 0) {
patterns.push({
name: 'File Naming Convention',
type: 'naming',
description: `Uses ${namingConventions.join(', ')} naming convention(s)`,
evidence: namingConventions.map((n) => `Detected ${n} pattern in filenames`),
confidence: 'medium',
});
}
// Detect test patterns
const hasTestDir = dirNameSet.has('test') || dirNameSet.has('tests') || dirNameSet.has('__tests__');
const hasSpecFiles = filePathsLower.some((p) => p.includes('.spec.') || p.includes('.test.'));
if (hasTestDir || hasSpecFiles) {
patterns.push({
name: 'Testing Structure',
type: 'structural',
description: hasTestDir
? 'Uses dedicated test directory'
: 'Uses co-located test files (.spec/.test)',
evidence: hasTestDir
? ['Found test/tests/__tests__ directory']
: ['Found .spec or .test files alongside source'],
confidence: 'high',
});
}
return patterns;
}
function buildArchitectureInsights(patterns, dirNames, targetDir, workingDir) {
const archPattern = patterns.find((p) => p.type === 'architectural');
const type = archPattern?.name || 'Custom/Unknown';
const layers = [];
const components = [];
// Identify layers based on common directory names
const layerDirs = ['api', 'routes', 'controllers', 'services', 'models', 'views', 'components', 'utils', 'lib', 'core'];
for (const layer of layerDirs) {
if (dirNames.includes(layer)) {
layers.push(layer);
}
}
// Build component info from top-level directories
try {
const entries = readdirSync(targetDir, { withFileTypes: true });
for (const entry of entries) {
if (entry.isDirectory() && !IGNORED_DIRS.has(entry.name) && !entry.name.startsWith('.')) {
const componentPath = relative(workingDir, join(targetDir, entry.name));
components.push({
name: entry.name,
type: inferComponentType(entry.name),
path: componentPath,
responsibilities: inferResponsibilities(entry.name),
});
}
}
}
catch {
// Ignore errors
}
const dataFlow = inferDataFlow(layers, type);
return {
type,
layers,
components,
dataFlow,
};
}
function inferComponentType(name) {
const lower = name.toLowerCase();
if (['api', 'routes', 'controllers', 'handlers'].includes(lower))
return 'API Layer';
if (['services', 'business', 'domain'].includes(lower))
return 'Business Logic';
if (['models', 'entities', 'schemas'].includes(lower))
return 'Data Models';
if (['views', 'pages', 'screens'].includes(lower))
return 'Presentation';
if (['components', 'ui'].includes(lower))
return 'UI Components';
if (['utils', 'helpers', 'lib', 'common', 'shared'].includes(lower))
return 'Utilities';
if (['config', 'configs', 'settings'].includes(lower))
return 'Configuration';
if (['test', 'tests', '__tests__', 'spec'].includes(lower))
return 'Testing';
if (['types', 'interfaces', 'contracts'].includes(lower))
return 'Type Definitions';
if (['middleware', 'middlewares'].includes(lower))
return 'Middleware';
if (['plugins', 'extensions', 'addons'].includes(lower))
return 'Extensions';
return 'Module';
}
function inferResponsibilities(name) {
const lower = name.toLowerCase();
const responsibilities = [];
if (['api', 'routes'].includes(lower)) {
responsibilities.push('HTTP request handling', 'Route definitions', 'Request/response processing');
}
else if (lower === 'controllers') {
responsibilities.push('Request handling', 'Input validation', 'Response formatting');
}
else if (lower === 'services') {
responsibilities.push('Business logic', 'Data orchestration', 'External integrations');
}
else if (lower === 'models') {
responsibilities.push('Data structures', 'Database schemas', 'Data validation');
}
else if (['views', 'pages'].includes(lower)) {
responsibilities.push('UI rendering', 'Page composition', 'Layout management');
}
else if (lower === 'components') {
responsibilities.push('Reusable UI elements', 'Component logic', 'State management');
}
else if (['utils', 'helpers'].includes(lower)) {
responsibilities.push('Utility functions', 'Common helpers', 'Shared logic');
}
return responsibilities.length > 0 ? responsibilities : ['Module functionality'];
}
function inferDataFlow(layers, archType) {
if (archType.includes('MVC')) {
return ['Request → Controller → Model → View → Response'];
}
if (archType.includes('Clean')) {
return [
'External → Controllers → Use Cases → Entities',
'Entities → Use Cases → Presenters → External',
];
}
if (archType.includes('Layered')) {
return ['API → Services → Repositories → Database'];
}
if (layers.length > 0) {
return [`Request → ${layers.join(' → ')} → Response`];
}
return ['Standard request/response flow'];
}
function findEntryPoints(filePaths, configFiles) {
const entryPoints = [];
// Check for common entry point patterns
const entryPatterns = [
'index.ts',
'index.js',
'main.ts',
'main.js',
'app.ts',
'app.js',
'server.ts',
'server.js',
'cli.ts',
'cli.js',
'__main__.py',
'main.py',
'app.py',
'manage.py',
'main.go',
'main.rs',
'lib.rs',
];
for (const pattern of entryPatterns) {
const match = filePaths.find((p) => basename(p) === pattern || p.endsWith(`/src/${pattern}`) || p.endsWith(`/bin/${pattern}`));
if (match) {
entryPoints.push(match);
}
}
// Check package.json for main/bin
const pkgJson = configFiles.find((c) => c.name === 'package.json');
if (pkgJson) {
entryPoints.push(`${pkgJson.path} (see "main" or "bin" fields)`);
}
return [...new Set(entryPoints)];
}
function analyzeDependencies(dir) {
const result = {
dependencies: [],
devDependencies: [],
hasDependencyFile: false,
};
// Check package.json
const pkgPath = join(dir, 'package.json');
if (existsSync(pkgPath)) {
try {
const pkg = JSON.parse(readFileSync(pkgPath, 'utf-8'));
result.packageManager = 'npm';
result.hasDependencyFile = true;
result.dependencies = Object.keys(pkg.dependencies || {}).slice(0, 20);
result.devDependencies = Object.keys(pkg.devDependencies || {}).slice(0, 20);
}
catch {
// Ignore parse errors
}
}
// Check pyproject.toml
const pyprojectPath = join(dir, 'pyproject.toml');
if (existsSync(pyprojectPath)) {
result.packageManager = result.packageManager || 'pip';
result.hasDependencyFile = true;
// Basic TOML parsing for dependencies
try {
const content = readFileSync(pyprojectPath, 'utf-8');
const depMatch = content.match(/dependencies\s*=\s*\[([\s\S]*?)\]/);
if (depMatch && depMatch[1]) {
const deps = depMatch[1].match(/"([^"]+)"/g) || [];
result.dependencies = deps.map((d) => d.replace(/"/g, '').split(/[<>=!]/)[0]?.trim() ?? '').slice(0, 20);
}
}
catch {
// Ignore parse errors
}
}
// Check Cargo.toml
const cargoPath = join(dir, 'Cargo.toml');
if (existsSync(cargoPath)) {
result.packageManager = 'cargo';
result.hasDependencyFile = true;
}
// Check go.mod
const goModPath = join(dir, 'go.mod');
if (existsSync(goModPath)) {
result.packageManager = 'go modules';
result.hasDependencyFile = true;
}
return result;
}
function analyzeFile(filePath, workingDir, includeRelationships) {
const content = readFileSync(filePath, 'utf-8');
const lines = content.split('\n');
const ext = extname(filePath).toLowerCase();
const language = LANGUAGE_MAP[ext] || 'Unknown';
const relPath = relative(workingDir, filePath);
const imports = extractImports(content, ext);
const exports = extractExports(content, ext);
const functions = extractFunctions(content, ext);
const classes = extractClasses(content, ext);
const patterns = detectCodePatterns(content, ext);
const complexity = calculateComplexity(content);
const purpose = inferFilePurpose(basename(filePath), content, imports, exports, functions, classes);
const relationships = [];
if (includeRelationships) {
// Build relationships from imports
for (const imp of imports) {
if (imp.isRelative) {
relationships.push({
targetFile: imp.resolvedPath || imp.source,
type: 'imports',
symbols: imp.specifiers,
});
}
}
}
return {
path: relPath,
language,
size: content.length,
lineCount: lines.length,
purpose,
imports,
exports,
functions,
classes,
patterns,
relationships,
complexity,
};
}
function extractImports(content, ext) {
const imports = [];
if (['.ts', '.tsx', '.js', '.jsx', '.mjs', '.cjs'].includes(ext)) {
// ES6 imports
const importRegex = /import\s+(?:(\*\s+as\s+\w+)|(\{[^}]+\})|(\w+)(?:\s*,\s*\{([^}]+)\})?)\s+from\s+['"]([^'"]+)['"]/g;
let match;
while ((match = importRegex.exec(content)) !== null) {
const source = match[5] || '';
let specifiers = [];
if (match[1]) {
// namespace import
specifiers = [match[1].trim()];
}
else if (match[2]) {
// named imports
specifiers = match[2]
.replace(/[{}]/g, '')
.split(',')
.map((s) => s.trim())
.filter(Boolean);
}
else if (match[3]) {
// default import
specifiers = [match[3]];
if (match[4]) {
// additional named imports
specifiers.push(...match[4]
.split(',')
.map((s) => s.trim())
.filter(Boolean));
}
}
imports.push({
source,
specifiers,
isRelative: source.startsWith('.') || source.startsWith('/'),
});
}
// CommonJS requires
const requireRegex = /(?:const|let|var)\s+(?:(\{[^}]+\})|(\w+))\s*=\s*require\s*\(\s*['"]([^'"]+)['"]\s*\)/g;
while ((match = requireRegex.exec(content)) !== null) {
const source = match[3] || '';
let specifiers = [];
if (match[1]) {
specifiers = match[1]
.replace(/[{}]/g, '')
.split(',')
.map((s) => s.trim())
.filter(Boolean);
}
else if (match[2]) {
specifiers = [match[2]];
}
imports.push({
source,
specifiers,
isRelative: source.startsWith('.') || source.startsWith('/'),
});
}
}
else if (ext === '.py') {
// Python imports
const fromImportRegex = /from\s+([^\s]+)\s+import\s+(.+)/g;
let match;
while ((match = fromImportRegex.exec(content)) !== null) {
const source = match[1] ?? '';
const specifiers = (match[2] ?? '')
.split(',')
.map((s) => s.trim().split(' as ')[0]?.trim() ?? '')
.filter(Boolean);
imports.push({
source,
specifiers,
isRelative: source.startsWith('.'),
});
}
const importRegex = /^import\s+([^\s,]+(?:\s*,\s*[^\s,]+)*)/gm;
while ((match = importRegex.exec(content)) !== null) {
const modules = (match[1] ?? '').split(',').map((s) => s.trim().split(' as ')[0]?.trim() ?? '');
for (const mod of modules) {
if (mod) {
imports.push({
source: mod,
specifiers: [mod],
isRelative: mod.startsWith('.'),
});
}
}
}
}
return imports;
}
function extractExports(content, ext) {
const exports = [];
if (['.ts', '.tsx', '.js', '.jsx', '.mjs', '.cjs'].includes(ext)) {
// Export default
const defaultMatch = content.match(/export\s+default\s+(?:class|function)?\s*(\w+)?/);
if (defaultMatch) {
exports.push({
name: defaultMatch[1] || 'default',
type: 'default',
});
}
// Named exports
const namedExportRegex = /export\s+(?:const|let|var|function|class|interface|type|enum)\s+(\w+)/g;
let match;
while ((match = namedExportRegex.exec(content)) !== null) {
const name = match[1] || '';
const line = content.substring(0, match.index).split('\n').length;
const lineContent = content.split('\n')[line - 1] || '';
let type = 'named';
if (lineContent.includes('interface'))
type = 'interface';
else if (lineContent.includes('type'))
type = 'type';
else if (lineContent.includes('class'))
type = 'class';
else if (lineContent.includes('function'))
type = 'function';
exports.push({ name, type });
}
// Re-exports
const reExportRegex = /export\s+\{([^}]+)\}\s+from/g;
while ((match = reExportRegex.exec(content)) !== null) {
const names = (match[1] ?? '').split(',').map((s) => s.trim().split(' as ')[0]?.trim() ?? '');
for (const name of names) {
if (name) {
exports.push({ name, type: 'named' });
}
}
}
}
else if (ext === '.py') {
// Python __all__
const allMatch = content.match(/__all__\s*=\s*\[([\s\S]*?)\]/);
if (allMatch && allMatch[1]) {
const names = (allMatch[1].match(/['"]([^'"]+)['"]/g) || []).map((s) => s.replace(/['"]/g, ''));
for (const name of names) {
exports.push({ name, type: 'named' });
}
}
// Public functions/classes (not starting with _)
const defRegex = /^(?:def|class)\s+([a-zA-Z][a-zA-Z0-9_]*)/gm;
let match;
while ((match = defRegex.exec(content)) !== null) {
const name = match[1] || '';
if (!name.startsWith('_')) {
const lineContent = content.split('\n')[content.substring(0, match.index).split('\n').length - 1] || '';
exports.push({
name,
type: lineContent.startsWith('class') ? 'class' : 'function',
});
}
}
}
return exports;
}
function extractFunctions(content, ext) {
const functions = [];
if (['.ts', '.tsx', '.js', '.jsx', '.mjs', '.cjs'].includes(ext)) {
// Regular functions
const funcRegex = /(?:export\s+)?(?:async\s+)?function\s+(\w+)\s*\(([^)]*)\)(?:\s*:\s*([^{]+))?\s*\{/g;
let match;
while ((match = funcRegex.exec(content)) !== null) {
const name = match[1] || '';
const params = (match[2] || '').split(',').map((p) => p.trim().split(':')[0]?.trim() ?? '').filter(Boolean);
const returnType = (match[3] || '').trim() || undefined;
const line = content.substring(0, match.index).split('\n').length;
const isAsync = content.substring(match.index - 20, match.index).includes('async');
const isExported = content.substring(match.index - 20, match.index).includes('export');
functions.push({
name,
line,
parameters: params,
returnType,
isAsync,
isExported,
complexity: 1,
});
}
// Arrow functions
const arrowRegex = /(?:export\s+)?(?:const|let|var)\s+(\w+)\s*=\s*(?:async\s+)?\(?([^)=]*)\)?\s*(?::\s*([^=]+))?\s*=>/g;
while ((match = arrowRegex.exec(content)) !== null) {
const name = match[1] || '';
const params = (match[2] || '').split(',').map((p) => p.trim().split(':')[0]?.trim() ?? '').filter(Boolean);
const returnType = (match[3] || '').trim() || undefined;
const line = content.substring(0, match.index).split('\n').length;
const isAsync = content.substring(match.index, match.index + 50).includes('async');
const isExported = content.substring(match.index - 20, match.index).includes('export');
functions.push({
name,
line,
parameters: params,
returnType,
isAsync,
isExported,
complexity: 1,
});
}
}
else if (ext === '.py') {
const defRegex = /(?:async\s+)?def\s+(\w+)\s*\(([^)]*)\)(?:\s*->\s*([^:]+))?\s*:/g;
let match;
while ((match = defRegex.exec(content)) !== null) {
const name = match[1] ?? '';
const params = (match[2] ?? '')
.split(',')
.map((p) => (p.trim().split(':')[0]?.split('=')[0]?.trim()) ?? '')
.filter((p) => p && p !== 'self' && p !== 'cls');
const returnType = (match[3] || '').trim() || undefined;
const line = content.substring(0, match.index).split('\n').length;
const isAsync = content.substring(match.index - 10, match.index).includes('async');
functions.push({
name,
line,
parameters: params,
returnType,
isAsync,
isExported: !name.startsWith('_'),
complexity: 1,
});
}
}
return functions;
}
function extractClasses(content, ext) {
const classes = [];
if (['.ts', '.tsx', '.js', '.jsx', '.mjs', '.cjs'].includes(ext)) {
const classRegex = /(?:export\s+)?class\s+(\w+)(?:\s+extends\s+(\w+))?(?:\s+implements\s+([^{]+))?\s*\{/g;
let match;
while ((match = classRegex.exec(content)) !== null) {
const name = match[1] || '';
const line = content.substring(0, match.index).split('\n').length;
const extendsClass = match[2];
const implementsList = match[3]
? match[3]
.split(',')
.map((s) => s.trim())
.filter(Boolean)
: undefined;
const isExported = content.substring(match.index - 20, match.index).includes('export');
// Find class body and extract methods/properties
const classStart = match.index + match[0].length;
let braceCount = 1;
let classEnd = classStart;
for (let i = classStart; i < content.length && braceCount > 0; i++) {
if (content[i] === '{')
braceCount++;
if (content[i] === '}')
braceCount--;
classEnd = i;
}
const classBody = content.substring(classStart, classEnd);
const methods = [];
const properties = [];
// Extract methods
const methodRegex = /(?:async\s+)?(?:public|private|protected)?\s*(\w+)\s*\([^)]*\)/g;
let methodMatch;
while ((methodMatch = methodRegex.exec(classBody)) !== null) {
if (methodMatch[1] && methodMatch[1] !== 'constructor') {
methods.push(methodMatch[1]);
}
}
// Extract properties
const propRegex = /(?:public|private|protected|readonly)?\s+(\w+)\s*[:=]/g;
let propMatch;
while ((propMatch = propRegex.exec(classBody)) !== null) {
if (propMatch[1]) {
properties.push(propMatch[1]);
}
}
classes.push({
name,
line,
methods,
properties,
extends: extendsClass,
implements: implementsList,
isExported,
});
}
}
else if (ext === '.py') {
const classRegex = /class\s+(\w+)(?:\s*\(([^)]*)\))?\s*:/g;
let match;
while ((match = classRegex.exec(content)) !== null) {
const name = match[1] || '';
const line = content.substring(0, match.index).split('\n').length;
const parentClasses = match[2]
? match[2]
.split(',')
.map((s) => s.trim())
.filter(Boolean)
: [];
classes.push({
name,
line,
methods: [],
properties: [],
extends: parentClasses[0],
implements: parentClasses.slice(1),
isExported: !name.startsWith('_'),
});
}
}
return classes;
}
function detectCodePatterns(content, _ext) {
const patterns = [];
// Common patterns detection
if (/async\s+function|async\s+\(|await\s+/.test(content)) {
patterns.push('Async/Await');
}
if (/Promise\.all|Promise\.race|Promise\.allSettled/.test(content)) {
patterns.push('Promise Combinators');
}
if (/try\s*\{[\s\S]*?\}\s*catch/.test(content)) {
patterns.push('Try-Catch Error Handling');
}
if (/\.map\s*\(|\.filter\s*\(|\.reduce\s*\(/.test(content)) {
patterns.push('Functional Array Methods');
}
if (/Object\.freeze|Object\.seal|readonly\s+/.test(content)) {
patterns.push('Immutability');
}
if (/interface\s+\w+|type\s+\w+\s*=/.test(content)) {
patterns.push('TypeScript Types');
}
if (/\bclass\s+\w+/.test(content)) {
patterns.push('Object-Oriented');
}
if (/export\s+default|export\s+\{|module\.exports/.test(content)) {
patterns.push('Module Pattern');
}
if (/\.test\(|\.spec\.|describe\s*\(|it\s*\(|expect\s*\(/.test(content)) {
patterns.push('Testing');
}
if (/console\.(log|error|warn|debug)/.test(content)) {
patterns.push('Console Logging');
}
if (/@decorator|@\w+\s*\(|@\w+\s*\n/.test(content)) {
patterns.push('Decorators');
}
if (/useEffect|useState|useCallback|useMemo/.test(content)) {
patterns.push('React Hooks');
}
if (/createSlice|createReducer|createAction/.test(content)) {
patterns.push('Redux Toolkit');
}
return patterns;
}
function calculateComplexity(content) {
const lines = content.split('\n');
const linesOfCode = lines.filter((l) => l.trim() && !l.trim().startsWith('//')).length;
const linesOfComments = lines.filter((l) => l.trim().startsWith('//')).length;
// Simple cyclomatic complexity estimation
let cyclomaticComplexity = 1;
const controlFlowPatterns = /\bif\b|\belse\b|\bfor\b|\bwhile\b|\bcase\b|\bcatch\b|\b\?\s*:/g;
const matches = content.match(controlFlowPatterns);
if (matches) {
cyclomaticComplexity += matches.length;
}
// Cognitive complexity (simplified)
let cognitiveComplexity = cyclomaticComplexity;
const nestedPatterns = /\{\s*\{|\bif\b.*\bif\b/g;
const nestedMatches = content.match(nestedPatterns);
if (nestedMatches) {
cognitiveComplexity += nestedMatches.length * 2;
}
// Maintainability index (simplified, 0-100 scale)
cons