UNPKG

aiwg

Version:

Cognitive architecture for AI-augmented software development with structured memory, ensemble validation, and closed-loop correction. FAIR-aligned artifacts, 84% cost reduction via human-in-the-loop, standards adopted by 100+ organizations.

785 lines (675 loc) 22.8 kB
/** * @file codebase-analyzer.ts * @description Brownfield codebase analysis for generating intake documentation * * Implements F-003/UC-003: Codebase Intake Generation * - Scans existing codebases to extract structure, dependencies, and metrics * - Generates intake forms automatically from code analysis * - Identifies technologies, frameworks, and architectural patterns * - Estimates technical debt and modernization opportunities * * @implements NFR-ACC-002: >85% accurate technology detection * @implements NFR-PERF-003: <2min analysis for medium codebases (<50k LOC) */ import { promises as fs } from 'fs'; import path from 'path'; // ============================================================================ // Types and Interfaces // ============================================================================ export interface CodebaseMetrics { totalFiles: number; totalLines: number; codeLines: number; commentLines: number; blankLines: number; filesByLanguage: Record<string, number>; linesByLanguage: Record<string, number>; } export interface DependencyInfo { name: string; version: string; type: 'production' | 'development' | 'peer'; vulnerabilities?: number; lastUpdated?: string; } export interface TechnologyStack { languages: Array<{ language: string; percentage: number; files: number }>; frameworks: Array<{ name: string; version?: string; confidence: number }>; databases: Array<{ type: string; confidence: number }>; buildTools: string[]; testFrameworks: string[]; cicd: string[]; } export interface ArchitecturePattern { pattern: string; // MVC, microservices, monolith, etc. confidence: number; // 0-1 indicators: string[]; } export interface TechnicalDebt { category: 'deprecated' | 'outdated' | 'security' | 'complexity' | 'duplication'; severity: 'critical' | 'high' | 'medium' | 'low'; description: string; location: string; estimatedEffort: string; // hours or story points } export interface CodebaseAnalysisResult { projectName: string; projectPath: string; analyzedAt: Date; metrics: CodebaseMetrics; technologies: TechnologyStack; dependencies: DependencyInfo[]; architecture: ArchitecturePattern[]; technicalDebt: TechnicalDebt[]; recommendations: string[]; estimatedComplexity: 'simple' | 'moderate' | 'complex' | 'enterprise'; } export interface AnalysisOptions { path: string; excludePaths?: string[]; // node_modules, .git, etc. maxFiles?: number; // Performance limit detectFrameworks?: boolean; // Default: true detectDebt?: boolean; // Default: true scanDependencies?: boolean; // Default: true } // ============================================================================ // Codebase Analyzer Class // ============================================================================ export class CodebaseAnalyzer { private readonly defaultExcludePaths = [ 'node_modules', '.git', '.next', '.nuxt', 'dist', 'build', 'coverage', '.venv', 'venv', '__pycache__', '.pytest_cache', 'target', // Java 'bin', // Various 'obj' // .NET ]; private readonly languageExtensions: Record<string, string> = { '.js': 'JavaScript', '.jsx': 'JavaScript', '.ts': 'TypeScript', '.tsx': 'TypeScript', '.py': 'Python', '.java': 'Java', '.kt': 'Kotlin', '.go': 'Go', '.rb': 'Ruby', '.php': 'PHP', '.cs': 'C#', '.cpp': 'C++', '.c': 'C', '.rs': 'Rust', '.swift': 'Swift', '.m': 'Objective-C', '.sh': 'Shell', '.sql': 'SQL', '.html': 'HTML', '.css': 'CSS', '.scss': 'SCSS', '.sass': 'Sass', '.json': 'JSON', '.yaml': 'YAML', '.yml': 'YAML', '.xml': 'XML', '.md': 'Markdown' }; /** * Analyze a codebase and generate comprehensive analysis */ public async analyze(options: AnalysisOptions): Promise<CodebaseAnalysisResult> { const startTime = Date.now(); // Validate path exists await this.validatePath(options.path); // Gather metrics const metrics = await this.gatherMetrics(options); // Detect technologies const technologies = options.detectFrameworks !== false ? await this.detectTechnologies(options.path, metrics) : this.getEmptyTechStack(); // Scan dependencies const dependencies = options.scanDependencies !== false ? await this.scanDependencies(options.path) : []; // Detect architecture const architecture = await this.detectArchitecture(options.path, metrics); // Detect technical debt const technicalDebt = options.detectDebt !== false ? await this.detectTechnicalDebt(options.path, metrics, dependencies) : []; // Generate recommendations const recommendations = this.generateRecommendations( metrics, technologies, dependencies, technicalDebt ); // Estimate complexity const estimatedComplexity = this.estimateComplexity(metrics, technologies, architecture); const duration = Date.now() - startTime; // Log performance (NFR-PERF-003: <2min for medium codebases) if (metrics.totalLines > 10000 && metrics.totalLines < 50000) { console.log(`Analysis duration: ${duration}ms for ${metrics.totalLines} lines`); } return { projectName: path.basename(options.path), projectPath: options.path, analyzedAt: new Date(), metrics, technologies, dependencies, architecture, technicalDebt, recommendations, estimatedComplexity }; } /** * Validate analysis path exists and is accessible */ private async validatePath(targetPath: string): Promise<void> { try { const stats = await fs.stat(targetPath); if (!stats.isDirectory()) { throw new Error(`Path is not a directory: ${targetPath}`); } } catch (error) { throw new Error(`Invalid path: ${targetPath} - ${error}`); } } /** * Gather codebase metrics (lines, files, languages) */ private async gatherMetrics(options: AnalysisOptions): Promise<CodebaseMetrics> { const metrics: CodebaseMetrics = { totalFiles: 0, totalLines: 0, codeLines: 0, commentLines: 0, blankLines: 0, filesByLanguage: {}, linesByLanguage: {} }; const excludePaths = options.excludePaths || this.defaultExcludePaths; const maxFiles = options.maxFiles || 10000; await this.walkDirectory(options.path, async (filePath) => { // Check exclusions if (this.shouldExclude(filePath, excludePaths)) { return; } // Check file limit if (metrics.totalFiles >= maxFiles) { return; } const ext = path.extname(filePath); const language = this.languageExtensions[ext]; if (!language) { return; // Skip unknown file types } metrics.totalFiles++; // Count file by language metrics.filesByLanguage[language] = (metrics.filesByLanguage[language] || 0) + 1; // Count lines try { const content = await fs.readFile(filePath, 'utf-8'); const lines = content.split('\n'); metrics.totalLines += lines.length; metrics.linesByLanguage[language] = (metrics.linesByLanguage[language] || 0) + lines.length; // Classify lines (simple heuristic) for (const line of lines) { const trimmed = line.trim(); if (trimmed === '') { metrics.blankLines++; } else if (trimmed.startsWith('//') || trimmed.startsWith('#') || trimmed.startsWith('/*') || trimmed.startsWith('*')) { metrics.commentLines++; } else { metrics.codeLines++; } } } catch { // Skip files that can't be read } }); return metrics; } /** * Detect technologies, frameworks, and tools */ private async detectTechnologies( basePath: string, _metrics: CodebaseMetrics ): Promise<TechnologyStack> { const technologies: TechnologyStack = { languages: [], frameworks: [], databases: [], buildTools: [], testFrameworks: [], cicd: [] }; // Calculate language percentages const totalLines = _metrics.totalLines || 1; for (const [language, lines] of Object.entries(_metrics.linesByLanguage)) { technologies.languages.push({ language, percentage: (lines / totalLines) * 100, files: _metrics.filesByLanguage[language] || 0 }); } // Sort by percentage technologies.languages.sort((a, b) => b.percentage - a.percentage); // Detect frameworks from config files technologies.frameworks = await this.detectFrameworks(basePath); // Detect databases from config/dependencies technologies.databases = await this.detectDatabases(basePath); // Detect build tools technologies.buildTools = await this.detectBuildTools(basePath); // Detect test frameworks technologies.testFrameworks = await this.detectTestFrameworks(basePath); // Detect CI/CD technologies.cicd = await this.detectCICD(basePath); return technologies; } /** * Detect frameworks from package.json, requirements.txt, pom.xml, etc. */ private async detectFrameworks(basePath: string): Promise<Array<{ name: string; version?: string; confidence: number }>> { const frameworks: Array<{ name: string; version?: string; confidence: number }> = []; // Check package.json (Node.js) try { const packageJson = await fs.readFile(path.join(basePath, 'package.json'), 'utf-8'); const pkg = JSON.parse(packageJson); const deps = { ...pkg.dependencies, ...pkg.devDependencies }; if (deps['react']) { frameworks.push({ name: 'React', version: deps['react'], confidence: 1.0 }); } if (deps['vue']) { frameworks.push({ name: 'Vue.js', version: deps['vue'], confidence: 1.0 }); } if (deps['angular'] || deps['@angular/core']) { frameworks.push({ name: 'Angular', version: deps['@angular/core'] || deps['angular'], confidence: 1.0 }); } if (deps['express']) { frameworks.push({ name: 'Express.js', version: deps['express'], confidence: 1.0 }); } if (deps['next']) { frameworks.push({ name: 'Next.js', version: deps['next'], confidence: 1.0 }); } if (deps['nuxt']) { frameworks.push({ name: 'Nuxt.js', version: deps['nuxt'], confidence: 1.0 }); } } catch { // No package.json or parse error } // Check requirements.txt (Python) try { const requirements = await fs.readFile(path.join(basePath, 'requirements.txt'), 'utf-8'); if (requirements.includes('django')) { frameworks.push({ name: 'Django', confidence: 0.9 }); } if (requirements.includes('flask')) { frameworks.push({ name: 'Flask', confidence: 0.9 }); } if (requirements.includes('fastapi')) { frameworks.push({ name: 'FastAPI', confidence: 0.9 }); } } catch { // No requirements.txt } // Check pom.xml (Java/Maven) try { const pom = await fs.readFile(path.join(basePath, 'pom.xml'), 'utf-8'); if (pom.includes('spring-boot')) { frameworks.push({ name: 'Spring Boot', confidence: 0.95 }); } } catch { // No pom.xml } // Check Gemfile (Ruby) try { const gemfile = await fs.readFile(path.join(basePath, 'Gemfile'), 'utf-8'); if (gemfile.includes('rails')) { frameworks.push({ name: 'Ruby on Rails', confidence: 0.95 }); } } catch { // No Gemfile } return frameworks; } /** * Detect databases from configuration files */ private async detectDatabases(basePath: string): Promise<Array<{ type: string; confidence: number }>> { const databases: Array<{ type: string; confidence: number }> = []; // Check for database config files const configPatterns = [ { file: 'database.yml', db: 'PostgreSQL', confidence: 0.7 }, { file: '.env', db: 'Various', confidence: 0.5 }, { file: 'docker-compose.yml', db: 'Various', confidence: 0.6 } ]; for (const pattern of configPatterns) { try { const content = await fs.readFile(path.join(basePath, pattern.file), 'utf-8'); if (content.includes('postgres')) { databases.push({ type: 'PostgreSQL', confidence: pattern.confidence + 0.3 }); } if (content.includes('mysql')) { databases.push({ type: 'MySQL', confidence: pattern.confidence + 0.3 }); } if (content.includes('mongodb') || content.includes('mongo')) { databases.push({ type: 'MongoDB', confidence: pattern.confidence + 0.3 }); } if (content.includes('redis')) { databases.push({ type: 'Redis', confidence: pattern.confidence + 0.3 }); } if (content.includes('sqlite')) { databases.push({ type: 'SQLite', confidence: pattern.confidence + 0.3 }); } } catch { // File doesn't exist } } return databases; } /** * Detect build tools */ private async detectBuildTools(basePath: string): Promise<string[]> { const buildTools: string[] = []; const checkFiles = [ { file: 'package.json', tool: 'npm' }, { file: 'yarn.lock', tool: 'yarn' }, { file: 'pnpm-lock.yaml', tool: 'pnpm' }, { file: 'Makefile', tool: 'make' }, { file: 'webpack.config.js', tool: 'webpack' }, { file: 'vite.config.ts', tool: 'Vite' }, { file: 'rollup.config.js', tool: 'Rollup' }, { file: 'pom.xml', tool: 'Maven' }, { file: 'build.gradle', tool: 'Gradle' }, { file: 'CMakeLists.txt', tool: 'CMake' } ]; for (const check of checkFiles) { try { await fs.access(path.join(basePath, check.file)); buildTools.push(check.tool); } catch { // File doesn't exist } } return buildTools; } /** * Detect test frameworks */ private async detectTestFrameworks(basePath: string): Promise<string[]> { const frameworks: string[] = []; try { const packageJson = await fs.readFile(path.join(basePath, 'package.json'), 'utf-8'); const pkg = JSON.parse(packageJson); const deps = { ...pkg.dependencies, ...pkg.devDependencies }; if (deps['jest']) frameworks.push('Jest'); if (deps['vitest']) frameworks.push('Vitest'); if (deps['mocha']) frameworks.push('Mocha'); if (deps['@testing-library/react']) frameworks.push('React Testing Library'); if (deps['cypress']) frameworks.push('Cypress'); if (deps['playwright']) frameworks.push('Playwright'); } catch { // No package.json } try { const requirements = await fs.readFile(path.join(basePath, 'requirements.txt'), 'utf-8'); if (requirements.includes('pytest')) frameworks.push('pytest'); if (requirements.includes('unittest')) frameworks.push('unittest'); } catch { // No requirements.txt } return frameworks; } /** * Detect CI/CD configuration */ private async detectCICD(basePath: string): Promise<string[]> { const cicd: string[] = []; const checks = [ { path: '.github/workflows', name: 'GitHub Actions' }, { path: '.gitlab-ci.yml', name: 'GitLab CI' }, { path: '.travis.yml', name: 'Travis CI' }, { path: '.circleci/config.yml', name: 'CircleCI' }, { path: 'Jenkinsfile', name: 'Jenkins' }, { path: 'azure-pipelines.yml', name: 'Azure Pipelines' } ]; for (const check of checks) { try { await fs.access(path.join(basePath, check.path)); cicd.push(check.name); } catch { // Path doesn't exist } } return cicd; } /** * Scan project dependencies */ private async scanDependencies(basePath: string): Promise<DependencyInfo[]> { const dependencies: DependencyInfo[] = []; try { const packageJson = await fs.readFile(path.join(basePath, 'package.json'), 'utf-8'); const pkg = JSON.parse(packageJson); for (const [name, version] of Object.entries(pkg.dependencies || {})) { dependencies.push({ name, version: version as string, type: 'production' }); } for (const [name, version] of Object.entries(pkg.devDependencies || {})) { dependencies.push({ name, version: version as string, type: 'development' }); } } catch { // No package.json or parse error } return dependencies; } /** * Detect architecture patterns */ private async detectArchitecture( basePath: string, _metrics: CodebaseMetrics ): Promise<ArchitecturePattern[]> { const patterns: ArchitecturePattern[] = []; // Check for common directory structures try { const entries = await fs.readdir(basePath); // MVC pattern if (entries.includes('models') && entries.includes('views') && entries.includes('controllers')) { patterns.push({ pattern: 'MVC (Model-View-Controller)', confidence: 0.9, indicators: ['models/', 'views/', 'controllers/ directories present'] }); } // Microservices if (entries.includes('services') || entries.includes('microservices')) { patterns.push({ pattern: 'Microservices', confidence: 0.7, indicators: ['services/ directory, possibly multiple service subdirectories'] }); } // Layered architecture if (entries.some(e => ['src', 'lib', 'app'].includes(e))) { patterns.push({ pattern: 'Layered Architecture', confidence: 0.6, indicators: ['Standard src/lib/app structure'] }); } // Monolith (default if no specific pattern detected) if (patterns.length === 0) { patterns.push({ pattern: 'Monolithic', confidence: 0.5, indicators: ['No clear microservices or modular structure'] }); } } catch { // Error reading directory } return patterns; } /** * Detect technical debt */ private async detectTechnicalDebt( _basePath: string, metrics: CodebaseMetrics, dependencies: DependencyInfo[] ): Promise<TechnicalDebt[]> { const debt: TechnicalDebt[] = []; // Check for outdated dependencies const outdatedDeps = dependencies.filter(d => d.version.startsWith('^0.') || d.version.startsWith('~0.') ); if (outdatedDeps.length > 0) { debt.push({ category: 'outdated', severity: 'medium', description: `${outdatedDeps.length} dependencies on pre-1.0 versions`, location: 'package.json', estimatedEffort: `${outdatedDeps.length * 2}h` }); } // Check for high complexity (many files) if (metrics.totalFiles > 1000) { debt.push({ category: 'complexity', severity: 'high', description: `High file count (${metrics.totalFiles} files) indicates potential refactoring needs`, location: 'Codebase-wide', estimatedEffort: '80-160h' }); } // Check for low test coverage (heuristic: no test files) if (!metrics.filesByLanguage['Test']) { debt.push({ category: 'complexity', severity: 'high', description: 'No test files detected - missing test coverage', location: 'Codebase-wide', estimatedEffort: '40-80h' }); } return debt; } /** * Generate recommendations based on analysis */ private generateRecommendations( metrics: CodebaseMetrics, technologies: TechnologyStack, _dependencies: DependencyInfo[], technicalDebt: TechnicalDebt[] ): string[] { const recommendations: string[] = []; // High-level recommendations based on analysis if (metrics.totalLines > 100000) { recommendations.push('Consider breaking down monolith into smaller modules or microservices'); } if (technicalDebt.some(d => d.category === 'outdated')) { recommendations.push('Update outdated dependencies to improve security and performance'); } if (technologies.testFrameworks.length === 0) { recommendations.push('Add automated testing framework (Jest, Vitest, pytest, etc.)'); } if (technologies.cicd.length === 0) { recommendations.push('Implement CI/CD pipeline for automated testing and deployment'); } if (technicalDebt.length > 5) { recommendations.push('Schedule technical debt sprint to address accumulated issues'); } return recommendations; } /** * Estimate project complexity */ private estimateComplexity( metrics: CodebaseMetrics, technologies: TechnologyStack, architecture: ArchitecturePattern[] ): 'simple' | 'moderate' | 'complex' | 'enterprise' { let score = 0; // Lines of code if (metrics.totalLines < 10000) score += 1; else if (metrics.totalLines < 50000) score += 2; else if (metrics.totalLines < 200000) score += 3; else score += 4; // Number of languages score += technologies.languages.length; // Number of frameworks score += technologies.frameworks.length; // Architecture complexity if (architecture.some(a => a.pattern.includes('Microservices'))) { score += 3; } // Classify if (score <= 4) return 'simple'; if (score <= 8) return 'moderate'; if (score <= 12) return 'complex'; return 'enterprise'; } /** * Walk directory recursively */ private async walkDirectory( dir: string, callback: (filePath: string) => Promise<void> ): Promise<void> { try { const entries = await fs.readdir(dir, { withFileTypes: true }); for (const entry of entries) { const fullPath = path.join(dir, entry.name); if (entry.isDirectory()) { await this.walkDirectory(fullPath, callback); } else if (entry.isFile()) { await callback(fullPath); } } } catch { // Skip directories that can't be read } } /** * Check if path should be excluded */ private shouldExclude(filePath: string, excludePaths: string[]): boolean { return excludePaths.some(exclude => filePath.includes(`/${exclude}/`) || filePath.endsWith(`/${exclude}`)); } /** * Get empty tech stack */ private getEmptyTechStack(): TechnologyStack { return { languages: [], frameworks: [], databases: [], buildTools: [], testFrameworks: [], cicd: [] }; } } export default CodebaseAnalyzer;