UNPKG

mnemos-coder

Version:

CLI-based coding agent with graph-based execution loop and terminal UI

504 lines (490 loc) 19.4 kB
/** * End-to-end validation system for testing agent functionality */ import { EventEmitter } from 'events'; import { promises as fs } from 'fs'; import path from 'path'; import { SandboxManager } from './SandboxManager.js'; import { ExecutionLogger } from './ExecutionLogger.js'; export class E2EValidator extends EventEmitter { sandboxManager; customValidators = new Map(); testResults = []; constructor() { super(); this.sandboxManager = new SandboxManager(); this.registerBuiltinValidators(); } /** * Register a custom validator function */ registerValidator(id, validator) { this.customValidators.set(id, validator); } /** * Run a single test scenario */ async runScenario(scenario) { const startTime = Date.now(); const logger = new ExecutionLogger(); this.emit('scenarioStart', { scenario }); const result = { scenario_id: scenario.id, success: false, duration_ms: 0, errors: [], warnings: [], details: { setup_success: false, execution_success: false, validation_success: false, cleanup_success: false }, metrics: { files_created: 0, files_modified: 0, files_deleted: 0, peak_memory_mb: 0, output_length: 0 }, artifacts: { sandbox_path: '', logs_path: '', output: '', error_output: '' } }; let sandboxPath; try { // 1. Setup phase this.emit('scenarioPhase', { scenario, phase: 'setup' }); sandboxPath = await this.setupScenario(scenario); result.artifacts.sandbox_path = sandboxPath; result.details.setup_success = true; // 2. Execution phase this.emit('scenarioPhase', { scenario, phase: 'execution' }); logger.startCapture(); const executionResult = await this.executeScenario(scenario, sandboxPath); const capturedOutput = logger.stopCapture(); result.artifacts.output = capturedOutput; result.artifacts.error_output = executionResult.error || ''; result.details.execution_success = executionResult.success; result.metrics.output_length = capturedOutput.length; // 3. Validation phase this.emit('scenarioPhase', { scenario, phase: 'validation' }); const validationErrors = await this.validateResults(scenario, sandboxPath, executionResult); result.errors.push(...validationErrors); result.details.validation_success = validationErrors.length === 0; // 4. Metrics collection await this.collectMetrics(scenario, sandboxPath, result); // 5. Overall success determination result.success = result.details.setup_success && result.details.execution_success && result.details.validation_success; } catch (error) { const errorMsg = error instanceof Error ? error.message : String(error); result.errors.push(errorMsg); this.emit('scenarioError', { scenario, error: errorMsg }); } finally { // 6. Cleanup phase this.emit('scenarioPhase', { scenario, phase: 'cleanup' }); try { if (sandboxPath) { // Export logs before cleanup const logsPath = path.join(sandboxPath, 'execution.log'); await logger.exportLogs('txt', logsPath); result.artifacts.logs_path = logsPath; // Cleanup sandbox unless preserving files if (!scenario.cleanup?.preserve_files?.length) { await this.sandboxManager.cleanup(); } } result.details.cleanup_success = true; } catch (cleanupError) { result.warnings.push(`Cleanup failed: ${cleanupError}`); } result.duration_ms = Date.now() - startTime; this.testResults.push(result); this.emit('scenarioComplete', { scenario, result }); } return result; } /** * Run a complete test suite */ async runSuite(suite) { const startTime = Date.now(); this.emit('suiteStart', { suite }); // Run global setup if provided if (suite.global_setup) { await suite.global_setup(); } const results = []; try { // Run scenarios sequentially to avoid resource conflicts for (const scenario of suite.scenarios) { const result = await this.runScenario(scenario); results.push(result); } } finally { // Run global cleanup if provided if (suite.global_cleanup) { try { await suite.global_cleanup(); } catch (error) { console.warn('Global cleanup failed:', error); } } } const duration = Date.now() - startTime; const passed = results.filter(r => r.success).length; const failed = results.length - passed; const summary = { total: results.length, passed, failed, duration_ms: duration, success_rate: passed / results.length }; this.emit('suiteComplete', { suite, results, summary }); return { name: suite.name, results, summary }; } /** * Generate comprehensive test report */ async generateReport(results, outputPath, format = 'html') { switch (format) { case 'json': await this.generateJSONReport(results, outputPath); break; case 'markdown': await this.generateMarkdownReport(results, outputPath); break; case 'html': await this.generateHTMLReport(results, outputPath); break; } } /** * Get test statistics */ getStatistics(results) { const total = results.length; const passed = results.filter(r => r.success).length; const successRate = passed / total; const durations = results.map(r => r.duration_ms); const averageDuration = durations.reduce((sum, d) => sum + d, 0) / total; // Collect all errors and count occurrences const errorCounts = new Map(); results.forEach(r => { r.errors.forEach(error => { errorCounts.set(error, (errorCounts.get(error) || 0) + 1); }); }); const commonErrors = Array.from(errorCounts.entries()) .map(([error, count]) => ({ error, count })) .sort((a, b) => b.count - a.count) .slice(0, 10); const memoryUsages = results.map(r => r.metrics.peak_memory_mb); return { success_rate: successRate, average_duration: averageDuration, common_errors: commonErrors, performance_metrics: { fastest_test: Math.min(...durations), slowest_test: Math.max(...durations), average_memory: memoryUsages.reduce((sum, m) => sum + m, 0) / total, peak_memory: Math.max(...memoryUsages) }, category_breakdown: {} // Would need scenario metadata to implement }; } async setupScenario(scenario) { const sandboxPath = await this.sandboxManager.createSandbox(); if (scenario.setup) { // Create files for (const [filename, content] of Object.entries(scenario.setup.files)) { const filePath = path.join(sandboxPath, filename); await fs.mkdir(path.dirname(filePath), { recursive: true }); await fs.writeFile(filePath, content, 'utf-8'); } // Install dependencies if specified if (scenario.setup.dependencies?.length) { const packageJson = { name: 'test-project', version: '1.0.0', dependencies: scenario.setup.dependencies.reduce((acc, dep) => { const [name, version] = dep.includes('@') ? dep.split('@') : [dep, 'latest']; acc[name] = version; return acc; }, {}) }; await fs.writeFile(path.join(sandboxPath, 'package.json'), JSON.stringify(packageJson, null, 2)); } } return sandboxPath; } async executeScenario(scenario, sandboxPath) { const { spawn } = await import('child_process'); const startTime = Date.now(); return new Promise((resolve) => { // Build command: node ./dist/cli.js --run "command" --workspace sandboxPath --output json const cliPath = path.resolve('./dist/cli.js'); const args = [ cliPath, '--run', scenario.command, '--workspace', sandboxPath, '--output', 'json' ]; const childProcess = spawn('node', args, { cwd: process.cwd(), stdio: ['pipe', 'pipe', 'pipe'], timeout: scenario.expectations?.duration_max_ms || 60000 }); let stdout = ''; let stderr = ''; childProcess.stdout.on('data', (data) => { stdout += data.toString(); }); childProcess.stderr.on('data', (data) => { stderr += data.toString(); }); childProcess.on('close', (code) => { const duration = Date.now() - startTime; resolve({ success: code === 0, output: stdout, error: stderr, exit_code: code || 0, duration }); }); childProcess.on('error', (error) => { resolve({ success: false, output: '', error: error.message, exit_code: 1, duration: Date.now() - startTime }); }); }); } async validateResults(scenario, sandboxPath, executionResult) { const errors = []; const expectations = scenario.expectations; // Validate success expectation if (expectations.success !== executionResult.success) { errors.push(`Expected success: ${expectations.success}, got: ${executionResult.success}`); } // Validate duration if (expectations.duration_max_ms && executionResult.duration > expectations.duration_max_ms) { errors.push(`Execution took too long: ${executionResult.duration}ms > ${expectations.duration_max_ms}ms`); } // Validate file operations if (expectations.files_created) { for (const file of expectations.files_created) { const filePath = path.join(sandboxPath, file); try { await fs.access(filePath); } catch { errors.push(`Expected file not created: ${file}`); } } } // Validate file content if (expectations.content_contains) { for (const check of expectations.content_contains) { const filePath = path.join(sandboxPath, check.file); try { const content = await fs.readFile(filePath, 'utf-8'); const pattern = typeof check.text === 'string' ? check.text : check.text; if (typeof pattern === 'string') { if (!content.includes(pattern)) { errors.push(`File ${check.file} does not contain expected text: ${pattern}`); } } else { if (!pattern.test(content)) { errors.push(`File ${check.file} does not match expected pattern: ${pattern}`); } } } catch { errors.push(`Could not read file for content validation: ${check.file}`); } } } // Validate output if (expectations.output_contains) { const pattern = expectations.output_contains; if (typeof pattern === 'string') { if (!executionResult.output.includes(pattern)) { errors.push(`Output does not contain expected text: ${pattern}`); } } else { if (!pattern.test(executionResult.output)) { errors.push(`Output does not match expected pattern: ${pattern}`); } } } // Run custom validators if (expectations.custom_validators) { for (const validatorId of expectations.custom_validators) { const validator = this.customValidators.get(validatorId); if (validator) { const validatorErrors = await validator(executionResult, scenario); errors.push(...validatorErrors); } else { errors.push(`Custom validator not found: ${validatorId}`); } } } return errors; } async collectMetrics(scenario, sandboxPath, result) { try { // Count files const files = await fs.readdir(sandboxPath, { recursive: true }); result.metrics.files_created = files.length; // Calculate memory usage (simplified) const memoryUsage = process.memoryUsage(); result.metrics.peak_memory_mb = memoryUsage.heapUsed / (1024 * 1024); } catch (error) { result.warnings.push(`Failed to collect metrics: ${error}`); } } registerBuiltinValidators() { // Code quality validator this.registerValidator('code_quality', async (result, scenario) => { const errors = []; // Check for basic code quality indicators if (result.output.includes('syntax error')) { errors.push('Syntax errors detected in output'); } if (result.output.includes('TODO') || result.output.includes('FIXME')) { errors.push('TODO/FIXME comments left in code'); } return errors; }); // Performance validator this.registerValidator('performance', async (result, scenario) => { const errors = []; // Basic performance checks if (result.duration_ms > 30000) { // 30 seconds errors.push('Execution took longer than 30 seconds'); } return errors; }); // Security validator this.registerValidator('security', async (result, scenario) => { const errors = []; // Check for potential security issues if (result.output.includes('password') || result.output.includes('secret')) { errors.push('Potential sensitive information in output'); } return errors; }); } async generateJSONReport(results, outputPath) { const report = { generated_at: new Date().toISOString(), summary: this.getStatistics(results), results }; await fs.writeFile(outputPath, JSON.stringify(report, null, 2)); } async generateMarkdownReport(results, outputPath) { const stats = this.getStatistics(results); let markdown = `# E2E Test Report Generated: ${new Date().toISOString()} ## Summary - **Total Tests**: ${results.length} - **Passed**: ${results.filter(r => r.success).length} - **Failed**: ${results.filter(r => !r.success).length} - **Success Rate**: ${(stats.success_rate * 100).toFixed(1)}% - **Average Duration**: ${stats.average_duration.toFixed(0)}ms ## Results `; for (const result of results) { const status = result.success ? '✅ PASS' : '❌ FAIL'; markdown += `### ${status} - ${result.scenario_id} - **Duration**: ${result.duration_ms}ms - **Errors**: ${result.errors.length} - **Warnings**: ${result.warnings.length} `; if (result.errors.length > 0) { markdown += `**Errors**: ${result.errors.map(e => `- ${e}`).join('\n')} `; } } await fs.writeFile(outputPath, markdown); } async generateHTMLReport(results, outputPath) { const stats = this.getStatistics(results); const html = `<!DOCTYPE html> <html> <head> <title>E2E Test Report</title> <style> body { font-family: Arial, sans-serif; margin: 20px; } .summary { background: #f5f5f5; padding: 15px; border-radius: 5px; } .test { border: 1px solid #ddd; margin: 10px 0; padding: 15px; } .pass { border-left: 5px solid #4CAF50; } .fail { border-left: 5px solid #f44336; } .error { color: #d32f2f; margin: 5px 0; } .warning { color: #ff9800; margin: 5px 0; } </style> </head> <body> <h1>E2E Test Report</h1> <div class="summary"> <h2>Summary</h2> <p><strong>Generated:</strong> ${new Date().toISOString()}</p> <p><strong>Total Tests:</strong> ${results.length}</p> <p><strong>Passed:</strong> ${results.filter(r => r.success).length}</p> <p><strong>Failed:</strong> ${results.filter(r => !r.success).length}</p> <p><strong>Success Rate:</strong> ${(stats.success_rate * 100).toFixed(1)}%</p> <p><strong>Average Duration:</strong> ${stats.average_duration.toFixed(0)}ms</p> </div> <h2>Test Results</h2> ${results.map(result => ` <div class="test ${result.success ? 'pass' : 'fail'}"> <h3>${result.success ? '✅' : '❌'} ${result.scenario_id}</h3> <p><strong>Duration:</strong> ${result.duration_ms}ms</p> ${result.errors.length > 0 ? ` <h4>Errors</h4> ${result.errors.map(error => `<div class="error">• ${error}</div>`).join('')} ` : ''} ${result.warnings.length > 0 ? ` <h4>Warnings</h4> ${result.warnings.map(warning => `<div class="warning">• ${warning}</div>`).join('')} ` : ''} </div> `).join('')} </body> </html>`; await fs.writeFile(outputPath, html); } } export function createE2EValidator() { return new E2EValidator(); } //# sourceMappingURL=E2EValidator.js.map