mnemos-coder
Version:
CLI-based coding agent with graph-based execution loop and terminal UI
504 lines (490 loc) • 19.4 kB
JavaScript
/**
* End-to-end validation system for testing agent functionality
*/
import { EventEmitter } from 'events';
import { promises as fs } from 'fs';
import path from 'path';
import { SandboxManager } from './SandboxManager.js';
import { ExecutionLogger } from './ExecutionLogger.js';
export class E2EValidator extends EventEmitter {
sandboxManager;
customValidators = new Map();
testResults = [];
constructor() {
super();
this.sandboxManager = new SandboxManager();
this.registerBuiltinValidators();
}
/**
* Register a custom validator function
*/
registerValidator(id, validator) {
this.customValidators.set(id, validator);
}
/**
* Run a single test scenario
*/
async runScenario(scenario) {
const startTime = Date.now();
const logger = new ExecutionLogger();
this.emit('scenarioStart', { scenario });
const result = {
scenario_id: scenario.id,
success: false,
duration_ms: 0,
errors: [],
warnings: [],
details: {
setup_success: false,
execution_success: false,
validation_success: false,
cleanup_success: false
},
metrics: {
files_created: 0,
files_modified: 0,
files_deleted: 0,
peak_memory_mb: 0,
output_length: 0
},
artifacts: {
sandbox_path: '',
logs_path: '',
output: '',
error_output: ''
}
};
let sandboxPath;
try {
// 1. Setup phase
this.emit('scenarioPhase', { scenario, phase: 'setup' });
sandboxPath = await this.setupScenario(scenario);
result.artifacts.sandbox_path = sandboxPath;
result.details.setup_success = true;
// 2. Execution phase
this.emit('scenarioPhase', { scenario, phase: 'execution' });
logger.startCapture();
const executionResult = await this.executeScenario(scenario, sandboxPath);
const capturedOutput = logger.stopCapture();
result.artifacts.output = capturedOutput;
result.artifacts.error_output = executionResult.error || '';
result.details.execution_success = executionResult.success;
result.metrics.output_length = capturedOutput.length;
// 3. Validation phase
this.emit('scenarioPhase', { scenario, phase: 'validation' });
const validationErrors = await this.validateResults(scenario, sandboxPath, executionResult);
result.errors.push(...validationErrors);
result.details.validation_success = validationErrors.length === 0;
// 4. Metrics collection
await this.collectMetrics(scenario, sandboxPath, result);
// 5. Overall success determination
result.success = result.details.setup_success &&
result.details.execution_success &&
result.details.validation_success;
}
catch (error) {
const errorMsg = error instanceof Error ? error.message : String(error);
result.errors.push(errorMsg);
this.emit('scenarioError', { scenario, error: errorMsg });
}
finally {
// 6. Cleanup phase
this.emit('scenarioPhase', { scenario, phase: 'cleanup' });
try {
if (sandboxPath) {
// Export logs before cleanup
const logsPath = path.join(sandboxPath, 'execution.log');
await logger.exportLogs('txt', logsPath);
result.artifacts.logs_path = logsPath;
// Cleanup sandbox unless preserving files
if (!scenario.cleanup?.preserve_files?.length) {
await this.sandboxManager.cleanup();
}
}
result.details.cleanup_success = true;
}
catch (cleanupError) {
result.warnings.push(`Cleanup failed: ${cleanupError}`);
}
result.duration_ms = Date.now() - startTime;
this.testResults.push(result);
this.emit('scenarioComplete', { scenario, result });
}
return result;
}
/**
* Run a complete test suite
*/
async runSuite(suite) {
const startTime = Date.now();
this.emit('suiteStart', { suite });
// Run global setup if provided
if (suite.global_setup) {
await suite.global_setup();
}
const results = [];
try {
// Run scenarios sequentially to avoid resource conflicts
for (const scenario of suite.scenarios) {
const result = await this.runScenario(scenario);
results.push(result);
}
}
finally {
// Run global cleanup if provided
if (suite.global_cleanup) {
try {
await suite.global_cleanup();
}
catch (error) {
console.warn('Global cleanup failed:', error);
}
}
}
const duration = Date.now() - startTime;
const passed = results.filter(r => r.success).length;
const failed = results.length - passed;
const summary = {
total: results.length,
passed,
failed,
duration_ms: duration,
success_rate: passed / results.length
};
this.emit('suiteComplete', { suite, results, summary });
return {
name: suite.name,
results,
summary
};
}
/**
* Generate comprehensive test report
*/
async generateReport(results, outputPath, format = 'html') {
switch (format) {
case 'json':
await this.generateJSONReport(results, outputPath);
break;
case 'markdown':
await this.generateMarkdownReport(results, outputPath);
break;
case 'html':
await this.generateHTMLReport(results, outputPath);
break;
}
}
/**
* Get test statistics
*/
getStatistics(results) {
const total = results.length;
const passed = results.filter(r => r.success).length;
const successRate = passed / total;
const durations = results.map(r => r.duration_ms);
const averageDuration = durations.reduce((sum, d) => sum + d, 0) / total;
// Collect all errors and count occurrences
const errorCounts = new Map();
results.forEach(r => {
r.errors.forEach(error => {
errorCounts.set(error, (errorCounts.get(error) || 0) + 1);
});
});
const commonErrors = Array.from(errorCounts.entries())
.map(([error, count]) => ({ error, count }))
.sort((a, b) => b.count - a.count)
.slice(0, 10);
const memoryUsages = results.map(r => r.metrics.peak_memory_mb);
return {
success_rate: successRate,
average_duration: averageDuration,
common_errors: commonErrors,
performance_metrics: {
fastest_test: Math.min(...durations),
slowest_test: Math.max(...durations),
average_memory: memoryUsages.reduce((sum, m) => sum + m, 0) / total,
peak_memory: Math.max(...memoryUsages)
},
category_breakdown: {} // Would need scenario metadata to implement
};
}
async setupScenario(scenario) {
const sandboxPath = await this.sandboxManager.createSandbox();
if (scenario.setup) {
// Create files
for (const [filename, content] of Object.entries(scenario.setup.files)) {
const filePath = path.join(sandboxPath, filename);
await fs.mkdir(path.dirname(filePath), { recursive: true });
await fs.writeFile(filePath, content, 'utf-8');
}
// Install dependencies if specified
if (scenario.setup.dependencies?.length) {
const packageJson = {
name: 'test-project',
version: '1.0.0',
dependencies: scenario.setup.dependencies.reduce((acc, dep) => {
const [name, version] = dep.includes('@') ? dep.split('@') : [dep, 'latest'];
acc[name] = version;
return acc;
}, {})
};
await fs.writeFile(path.join(sandboxPath, 'package.json'), JSON.stringify(packageJson, null, 2));
}
}
return sandboxPath;
}
async executeScenario(scenario, sandboxPath) {
const { spawn } = await import('child_process');
const startTime = Date.now();
return new Promise((resolve) => {
// Build command: node ./dist/cli.js --run "command" --workspace sandboxPath --output json
const cliPath = path.resolve('./dist/cli.js');
const args = [
cliPath,
'--run',
scenario.command,
'--workspace',
sandboxPath,
'--output',
'json'
];
const childProcess = spawn('node', args, {
cwd: process.cwd(),
stdio: ['pipe', 'pipe', 'pipe'],
timeout: scenario.expectations?.duration_max_ms || 60000
});
let stdout = '';
let stderr = '';
childProcess.stdout.on('data', (data) => {
stdout += data.toString();
});
childProcess.stderr.on('data', (data) => {
stderr += data.toString();
});
childProcess.on('close', (code) => {
const duration = Date.now() - startTime;
resolve({
success: code === 0,
output: stdout,
error: stderr,
exit_code: code || 0,
duration
});
});
childProcess.on('error', (error) => {
resolve({
success: false,
output: '',
error: error.message,
exit_code: 1,
duration: Date.now() - startTime
});
});
});
}
async validateResults(scenario, sandboxPath, executionResult) {
const errors = [];
const expectations = scenario.expectations;
// Validate success expectation
if (expectations.success !== executionResult.success) {
errors.push(`Expected success: ${expectations.success}, got: ${executionResult.success}`);
}
// Validate duration
if (expectations.duration_max_ms && executionResult.duration > expectations.duration_max_ms) {
errors.push(`Execution took too long: ${executionResult.duration}ms > ${expectations.duration_max_ms}ms`);
}
// Validate file operations
if (expectations.files_created) {
for (const file of expectations.files_created) {
const filePath = path.join(sandboxPath, file);
try {
await fs.access(filePath);
}
catch {
errors.push(`Expected file not created: ${file}`);
}
}
}
// Validate file content
if (expectations.content_contains) {
for (const check of expectations.content_contains) {
const filePath = path.join(sandboxPath, check.file);
try {
const content = await fs.readFile(filePath, 'utf-8');
const pattern = typeof check.text === 'string' ? check.text : check.text;
if (typeof pattern === 'string') {
if (!content.includes(pattern)) {
errors.push(`File ${check.file} does not contain expected text: ${pattern}`);
}
}
else {
if (!pattern.test(content)) {
errors.push(`File ${check.file} does not match expected pattern: ${pattern}`);
}
}
}
catch {
errors.push(`Could not read file for content validation: ${check.file}`);
}
}
}
// Validate output
if (expectations.output_contains) {
const pattern = expectations.output_contains;
if (typeof pattern === 'string') {
if (!executionResult.output.includes(pattern)) {
errors.push(`Output does not contain expected text: ${pattern}`);
}
}
else {
if (!pattern.test(executionResult.output)) {
errors.push(`Output does not match expected pattern: ${pattern}`);
}
}
}
// Run custom validators
if (expectations.custom_validators) {
for (const validatorId of expectations.custom_validators) {
const validator = this.customValidators.get(validatorId);
if (validator) {
const validatorErrors = await validator(executionResult, scenario);
errors.push(...validatorErrors);
}
else {
errors.push(`Custom validator not found: ${validatorId}`);
}
}
}
return errors;
}
async collectMetrics(scenario, sandboxPath, result) {
try {
// Count files
const files = await fs.readdir(sandboxPath, { recursive: true });
result.metrics.files_created = files.length;
// Calculate memory usage (simplified)
const memoryUsage = process.memoryUsage();
result.metrics.peak_memory_mb = memoryUsage.heapUsed / (1024 * 1024);
}
catch (error) {
result.warnings.push(`Failed to collect metrics: ${error}`);
}
}
registerBuiltinValidators() {
// Code quality validator
this.registerValidator('code_quality', async (result, scenario) => {
const errors = [];
// Check for basic code quality indicators
if (result.output.includes('syntax error')) {
errors.push('Syntax errors detected in output');
}
if (result.output.includes('TODO') || result.output.includes('FIXME')) {
errors.push('TODO/FIXME comments left in code');
}
return errors;
});
// Performance validator
this.registerValidator('performance', async (result, scenario) => {
const errors = [];
// Basic performance checks
if (result.duration_ms > 30000) { // 30 seconds
errors.push('Execution took longer than 30 seconds');
}
return errors;
});
// Security validator
this.registerValidator('security', async (result, scenario) => {
const errors = [];
// Check for potential security issues
if (result.output.includes('password') || result.output.includes('secret')) {
errors.push('Potential sensitive information in output');
}
return errors;
});
}
async generateJSONReport(results, outputPath) {
const report = {
generated_at: new Date().toISOString(),
summary: this.getStatistics(results),
results
};
await fs.writeFile(outputPath, JSON.stringify(report, null, 2));
}
async generateMarkdownReport(results, outputPath) {
const stats = this.getStatistics(results);
let markdown = `# E2E Test Report
Generated: ${new Date().toISOString()}
## Summary
- **Total Tests**: ${results.length}
- **Passed**: ${results.filter(r => r.success).length}
- **Failed**: ${results.filter(r => !r.success).length}
- **Success Rate**: ${(stats.success_rate * 100).toFixed(1)}%
- **Average Duration**: ${stats.average_duration.toFixed(0)}ms
## Results
`;
for (const result of results) {
const status = result.success ? '✅ PASS' : '❌ FAIL';
markdown += `### ${status} - ${result.scenario_id}
- **Duration**: ${result.duration_ms}ms
- **Errors**: ${result.errors.length}
- **Warnings**: ${result.warnings.length}
`;
if (result.errors.length > 0) {
markdown += `**Errors**:
${result.errors.map(e => `- ${e}`).join('\n')}
`;
}
}
await fs.writeFile(outputPath, markdown);
}
async generateHTMLReport(results, outputPath) {
const stats = this.getStatistics(results);
const html = `<!DOCTYPE html>
<html>
<head>
<title>E2E Test Report</title>
<style>
body { font-family: Arial, sans-serif; margin: 20px; }
.summary { background: #f5f5f5; padding: 15px; border-radius: 5px; }
.test { border: 1px solid #ddd; margin: 10px 0; padding: 15px; }
.pass { border-left: 5px solid #4CAF50; }
.fail { border-left: 5px solid #f44336; }
.error { color: #d32f2f; margin: 5px 0; }
.warning { color: #ff9800; margin: 5px 0; }
</style>
</head>
<body>
<h1>E2E Test Report</h1>
<div class="summary">
<h2>Summary</h2>
<p><strong>Generated:</strong> ${new Date().toISOString()}</p>
<p><strong>Total Tests:</strong> ${results.length}</p>
<p><strong>Passed:</strong> ${results.filter(r => r.success).length}</p>
<p><strong>Failed:</strong> ${results.filter(r => !r.success).length}</p>
<p><strong>Success Rate:</strong> ${(stats.success_rate * 100).toFixed(1)}%</p>
<p><strong>Average Duration:</strong> ${stats.average_duration.toFixed(0)}ms</p>
</div>
<h2>Test Results</h2>
${results.map(result => `
<div class="test ${result.success ? 'pass' : 'fail'}">
<h3>${result.success ? '✅' : '❌'} ${result.scenario_id}</h3>
<p><strong>Duration:</strong> ${result.duration_ms}ms</p>
${result.errors.length > 0 ? `
<h4>Errors</h4>
${result.errors.map(error => `<div class="error">• ${error}</div>`).join('')}
` : ''}
${result.warnings.length > 0 ? `
<h4>Warnings</h4>
${result.warnings.map(warning => `<div class="warning">• ${warning}</div>`).join('')}
` : ''}
</div>
`).join('')}
</body>
</html>`;
await fs.writeFile(outputPath, html);
}
}
export function createE2EValidator() {
return new E2EValidator();
}
//# sourceMappingURL=E2EValidator.js.map