vibe-coder-mcp
Version:
Production-ready MCP server with complete agent integration, multi-transport support, and comprehensive development automation tools for AI-assisted workflows.
434 lines (433 loc) • 17.2 kB
JavaScript
import { AgentOrchestrator } from './agent-orchestrator.js';
import { TaskStreamer } from './task-streamer.js';
import { ExecutionWatchdog } from './execution-watchdog.js';
import { FeedbackProcessor } from './feedback-processor.js';
import { AppError, ValidationError } from '../../../utils/errors.js';
import logger from '../../../logger.js';
export class ErrorRecovery {
static instance = null;
agentOrchestrator;
taskStreamer;
executionWatchdog;
feedbackProcessor;
config;
errors = new Map();
recoveryAttempts = new Map();
errorPatterns = new Map();
stats;
constructor(config) {
this.config = {
maxRetryAttempts: 3,
retryDelayMinutes: 2,
exponentialBackoff: true,
maxBackoffMinutes: 30,
enablePatternAnalysis: true,
patternAnalysisWindowHours: 24,
autoRecoveryEnabled: true,
escalationThreshold: 5,
...config
};
this.agentOrchestrator = AgentOrchestrator.getInstance();
this.taskStreamer = TaskStreamer.getInstance();
this.executionWatchdog = ExecutionWatchdog.getInstance();
this.feedbackProcessor = FeedbackProcessor.getInstance();
this.stats = {
totalErrors: 0,
recoveredErrors: 0,
failedRecoveries: 0,
averageRecoveryTime: 0,
mostCommonErrorType: 'unknown_error',
mostEffectiveStrategy: 'retry_same_agent',
patternsPrevented: 0,
lastStatsUpdate: new Date()
};
logger.info({ config: this.config }, 'Error recovery system initialized');
}
static getInstance(config) {
if (!ErrorRecovery.instance) {
ErrorRecovery.instance = new ErrorRecovery(config);
}
return ErrorRecovery.instance;
}
async reportError(taskId, errorType, message, agentId, context, stackTrace) {
try {
const errorId = `error_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
const errorInfo = {
id: errorId,
taskId,
agentId,
errorType,
message,
stackTrace,
timestamp: new Date(),
context: context || {},
severity: this.determineSeverity(errorType, message),
recoverable: this.isRecoverable(errorType)
};
this.errors.set(errorId, errorInfo);
this.stats.totalErrors++;
logger.error({
errorId,
taskId,
agentId,
errorType,
severity: errorInfo.severity
}, `Error reported: ${message}`);
if (this.config.enablePatternAnalysis) {
await this.analyzeErrorPattern(errorInfo);
}
if (this.config.autoRecoveryEnabled && errorInfo.recoverable) {
await this.attemptRecovery(errorId);
}
return errorId;
}
catch (error) {
logger.error({ err: error, taskId, errorType }, 'Failed to report error');
throw new AppError('Error reporting failed', { cause: error });
}
}
async attemptRecovery(errorId) {
try {
const errorInfo = this.errors.get(errorId);
if (!errorInfo) {
throw new ValidationError(`Error not found: ${errorId}`);
}
const attempts = this.recoveryAttempts.get(errorId) || [];
if (attempts.length >= this.config.maxRetryAttempts) {
logger.warn({ errorId, attempts: attempts.length }, 'Max recovery attempts exceeded');
await this.escalateError(errorId);
return false;
}
const strategy = this.selectRecoveryStrategy(errorInfo, attempts);
const attemptId = `attempt_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
const attempt = {
id: attemptId,
errorId,
strategy,
attemptNumber: attempts.length + 1,
startedAt: new Date(),
success: false
};
if (!this.recoveryAttempts.has(errorId)) {
this.recoveryAttempts.set(errorId, []);
}
this.recoveryAttempts.get(errorId).push(attempt);
logger.info({
errorId,
attemptId,
strategy,
attemptNumber: attempt.attemptNumber
}, 'Starting recovery attempt');
if (attempts.length > 0) {
const delay = this.calculateRetryDelay(attempt.attemptNumber);
await this.sleep(delay);
}
const success = await this.executeRecoveryStrategy(errorInfo, attempt);
attempt.completedAt = new Date();
attempt.success = success;
if (success) {
this.stats.recoveredErrors++;
const recoveryTime = attempt.completedAt.getTime() - attempt.startedAt.getTime();
this.stats.averageRecoveryTime =
(this.stats.averageRecoveryTime + recoveryTime) / 2;
logger.info({
errorId,
attemptId,
strategy,
recoveryTime: Math.round(recoveryTime / 1000)
}, 'Recovery successful');
}
else {
this.stats.failedRecoveries++;
logger.warn({ errorId, attemptId, strategy }, 'Recovery attempt failed');
if (attempts.length < this.config.maxRetryAttempts) {
setTimeout(() => {
this.attemptRecovery(errorId).catch(error => {
logger.error({ err: error, errorId }, 'Failed to retry recovery');
});
}, 1000);
}
}
return success;
}
catch (error) {
logger.error({ err: error, errorId }, 'Failed to attempt recovery');
throw new AppError('Recovery attempt failed', { cause: error });
}
}
getError(errorId) {
return this.errors.get(errorId) || null;
}
getRecoveryAttempts(errorId) {
return this.recoveryAttempts.get(errorId) || [];
}
getErrorPatterns() {
return Array.from(this.errorPatterns.values());
}
getStats() {
this.updateStats();
return { ...this.stats };
}
cleanup(olderThanHours = 168) {
const cutoffTime = new Date(Date.now() - olderThanHours * 60 * 60 * 1000);
for (const [errorId, errorInfo] of this.errors.entries()) {
if (errorInfo.timestamp < cutoffTime) {
this.errors.delete(errorId);
this.recoveryAttempts.delete(errorId);
}
}
for (const [patternKey, pattern] of this.errorPatterns.entries()) {
if (pattern.lastOccurrence < cutoffTime) {
this.errorPatterns.delete(patternKey);
}
}
logger.debug({ cutoffTime, olderThanHours }, 'Error recovery cleanup completed');
}
determineSeverity(errorType, message) {
const criticalTypes = ['system_error', 'resource_exhaustion'];
const highTypes = ['agent_failure', 'dependency_failure'];
const mediumTypes = ['task_timeout', 'communication_failure'];
if (criticalTypes.includes(errorType))
return 'critical';
if (highTypes.includes(errorType))
return 'high';
if (mediumTypes.includes(errorType))
return 'medium';
const lowerMessage = message.toLowerCase();
if (lowerMessage.includes('critical') || lowerMessage.includes('fatal'))
return 'critical';
if (lowerMessage.includes('error') || lowerMessage.includes('failed'))
return 'high';
if (lowerMessage.includes('warning') || lowerMessage.includes('timeout'))
return 'medium';
return 'low';
}
isRecoverable(errorType) {
const unrecoverableTypes = ['validation_error', 'system_error'];
return !unrecoverableTypes.includes(errorType);
}
selectRecoveryStrategy(errorInfo, previousAttempts) {
const pattern = this.findMatchingPattern(errorInfo);
if (pattern) {
return pattern.suggestedStrategy;
}
switch (errorInfo.errorType) {
case 'agent_failure':
return previousAttempts.length === 0 ? 'restart_agent' : 'reassign_different_agent';
case 'task_timeout':
return previousAttempts.length === 0 ? 'retry_same_agent' : 'reassign_different_agent';
case 'dependency_failure':
return 'retry_same_agent';
case 'communication_failure':
return 'retry_same_agent';
case 'resource_exhaustion':
return 'reassign_different_agent';
default:
return previousAttempts.length === 0 ? 'retry_same_agent' : 'reassign_different_agent';
}
}
async executeRecoveryStrategy(errorInfo, attempt) {
try {
switch (attempt.strategy) {
case 'retry_same_agent':
return await this.retryWithSameAgent(errorInfo, attempt);
case 'reassign_different_agent':
return await this.reassignToDifferentAgent(errorInfo, attempt);
case 'restart_agent':
return await this.restartAgent(errorInfo, attempt);
case 'split_task':
return await this.splitTask(errorInfo, attempt);
case 'escalate_human':
return await this.escalateToHuman(errorInfo, attempt);
case 'skip_task':
return await this.skipTask(errorInfo, attempt);
default:
logger.warn({ strategy: attempt.strategy }, 'Unknown recovery strategy');
return false;
}
}
catch (error) {
logger.error({ err: error, strategy: attempt.strategy }, 'Recovery strategy execution failed');
return false;
}
}
async retryWithSameAgent(errorInfo, attempt) {
if (!errorInfo.agentId)
return false;
try {
await this.taskStreamer.releaseTask(errorInfo.taskId, errorInfo.agentId);
attempt.message = 'Task released for retry with same agent';
return true;
}
catch (error) {
attempt.message = `Failed to retry with same agent: ${error}`;
return false;
}
}
async reassignToDifferentAgent(errorInfo, attempt) {
try {
if (errorInfo.agentId) {
await this.taskStreamer.releaseTask(errorInfo.taskId, errorInfo.agentId);
}
attempt.message = 'Task released for reassignment to different agent';
return true;
}
catch (error) {
attempt.message = `Failed to reassign task: ${error}`;
return false;
}
}
async restartAgent(errorInfo, attempt) {
if (!errorInfo.agentId)
return false;
try {
logger.warn({ agentId: errorInfo.agentId }, 'Agent restart requested');
attempt.message = 'Agent restart requested';
return true;
}
catch (error) {
attempt.message = `Failed to restart agent: ${error}`;
return false;
}
}
async splitTask(errorInfo, attempt) {
try {
logger.info({ taskId: errorInfo.taskId }, 'Task splitting requested');
attempt.message = 'Task splitting initiated';
return true;
}
catch (error) {
attempt.message = `Failed to split task: ${error}`;
return false;
}
}
async escalateToHuman(errorInfo, attempt) {
try {
logger.error({
errorId: errorInfo.id,
taskId: errorInfo.taskId,
agentId: errorInfo.agentId
}, 'HUMAN INTERVENTION REQUIRED: Error escalated');
attempt.message = 'Escalated to human intervention';
return true;
}
catch (error) {
attempt.message = `Failed to escalate: ${error}`;
return false;
}
}
async skipTask(errorInfo, attempt) {
try {
logger.warn({ taskId: errorInfo.taskId }, 'Task skipped due to recovery failure');
attempt.message = 'Task skipped';
return true;
}
catch (error) {
attempt.message = `Failed to skip task: ${error}`;
return false;
}
}
async analyzeErrorPattern(errorInfo) {
try {
const patternKey = `${errorInfo.errorType}_${this.extractPatternFromMessage(errorInfo.message)}`;
let pattern = this.errorPatterns.get(patternKey);
if (!pattern) {
pattern = {
pattern: patternKey,
errorType: errorInfo.errorType,
frequency: 0,
lastOccurrence: errorInfo.timestamp,
affectedAgents: new Set(),
affectedTasks: new Set(),
suggestedStrategy: this.selectRecoveryStrategy(errorInfo, []),
preventionMeasures: []
};
this.errorPatterns.set(patternKey, pattern);
}
pattern.frequency++;
pattern.lastOccurrence = errorInfo.timestamp;
if (errorInfo.agentId)
pattern.affectedAgents.add(errorInfo.agentId);
pattern.affectedTasks.add(errorInfo.taskId);
if (pattern.frequency >= 3) {
pattern.preventionMeasures = this.generatePreventionMeasures(pattern);
}
logger.debug({ patternKey, frequency: pattern.frequency }, 'Error pattern updated');
}
catch (error) {
logger.error({ err: error }, 'Failed to analyze error pattern');
}
}
extractPatternFromMessage(message) {
const words = message.toLowerCase().split(/\s+/);
const significantWords = words.filter(word => word.length > 3 &&
!['the', 'and', 'for', 'with', 'from', 'this', 'that'].includes(word));
return significantWords.slice(0, 3).join('_');
}
findMatchingPattern(errorInfo) {
const patternKey = `${errorInfo.errorType}_${this.extractPatternFromMessage(errorInfo.message)}`;
return this.errorPatterns.get(patternKey) || null;
}
generatePreventionMeasures(pattern) {
const measures = [];
switch (pattern.errorType) {
case 'agent_failure':
measures.push('Implement agent health monitoring');
measures.push('Add agent restart automation');
break;
case 'task_timeout':
measures.push('Adjust timeout thresholds');
measures.push('Implement task complexity analysis');
break;
case 'communication_failure':
measures.push('Add communication retry logic');
measures.push('Implement connection pooling');
break;
}
return measures;
}
calculateRetryDelay(attemptNumber) {
if (!this.config.exponentialBackoff) {
return this.config.retryDelayMinutes * 60000;
}
const delay = this.config.retryDelayMinutes * Math.pow(2, attemptNumber - 1);
return Math.min(delay, this.config.maxBackoffMinutes) * 60000;
}
sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
async escalateError(errorId) {
const errorInfo = this.errors.get(errorId);
if (errorInfo) {
logger.error({
errorId,
taskId: errorInfo.taskId,
errorType: errorInfo.errorType
}, 'Error escalated after max recovery attempts');
}
}
updateStats() {
const errors = Array.from(this.errors.values());
const errorTypeCounts = new Map();
errors.forEach(error => {
errorTypeCounts.set(error.errorType, (errorTypeCounts.get(error.errorType) || 0) + 1);
});
let maxCount = 0;
let mostCommonType = 'unknown_error';
for (const [type, count] of errorTypeCounts.entries()) {
if (count > maxCount) {
maxCount = count;
mostCommonType = type;
}
}
this.stats.mostCommonErrorType = mostCommonType;
this.stats.lastStatsUpdate = new Date();
}
destroy() {
this.errors.clear();
this.recoveryAttempts.clear();
this.errorPatterns.clear();
ErrorRecovery.instance = null;
logger.info('Error recovery system destroyed');
}
}