UNPKG

@sethdouglasford/claude-flow

Version:

Claude Code Flow - Advanced AI-powered development workflows with SPARC methodology

964 lines 41 kB
/** * Main orchestrator for Claude-Flow */ import { SystemEvents, } from "../utils/types.js"; import { SystemError, InitializationError, ShutdownError } from "../utils/errors.js"; import { delay, retry, circuitBreaker } from "../utils/helpers.js"; import { mkdir, writeFile, readFile } from "fs/promises"; import { join, dirname } from "path"; /** * Session manager implementation with persistence */ class SessionManager { terminalManager; memoryManager; eventBus; logger; config; sessions = new Map(); sessionProfiles = new Map(); persistencePath; persistenceCircuitBreaker; constructor(terminalManager, memoryManager, eventBus, logger, config) { this.terminalManager = terminalManager; this.memoryManager = memoryManager; this.eventBus = eventBus; this.logger = logger; this.config = config; this.persistencePath = join(config.orchestrator.dataDir ?? "./data", "sessions.json"); // Circuit breaker for persistence operations this.persistenceCircuitBreaker = circuitBreaker("SessionPersistence", { threshold: 5, timeout: 30000, resetTimeout: 60000 }); } async createSession(profile) { try { // Create terminal with retry logic const terminalId = await retry(() => this.terminalManager.spawnTerminal(profile), { maxAttempts: 3, initialDelay: 1000 }); // Create memory bank with retry logic const memoryBankId = await retry(() => this.memoryManager.createBank(profile.id), { maxAttempts: 3, initialDelay: 1000 }); // Create session const session = { id: `session_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`, agentId: profile.id, terminalId, startTime: new Date(), status: "active", lastActivity: new Date(), memoryBankId, }; this.sessions.set(session.id, session); this.sessionProfiles.set(session.id, profile); this.logger.info("Session created", { sessionId: session.id, agentId: profile.id, terminalId, memoryBankId, }); // Persist sessions asynchronously this.persistSessions().catch(error => this.logger.error("Failed to persist sessions", error)); return session; } catch (error) { this.logger.error("Failed to create session", { agentId: profile.id, error: error instanceof Error ? error.message : String(error) }); throw new SystemError(`Failed to create session for agent ${profile.id}`, { error: error instanceof Error ? error.message : String(error) }); } } getSession(sessionId) { return this.sessions.get(sessionId); } getActiveSessions() { return Array.from(this.sessions.values()).filter((session) => session.status === "active" || session.status === "idle"); } async terminateSession(sessionId) { const session = this.sessions.get(sessionId); if (!session) { throw new Error(`Session not found: ${sessionId}`); } try { // Update session status first session.status = "terminated"; session.endTime = new Date(); // Terminate terminal with timeout await Promise.race([ this.terminalManager.terminateTerminal(session.terminalId), delay(5000).then(() => { throw new Error("Terminal termination timeout"); }), ]).catch(error => { this.logger.error("Error terminating terminal", { sessionId, error: error instanceof Error ? error.message : String(error) }); }); // Close memory bank with timeout await Promise.race([ this.memoryManager.closeBank(session.memoryBankId), delay(5000).then(() => { throw new Error("Memory bank close timeout"); }), ]).catch(error => { this.logger.error("Error closing memory bank", { sessionId, error: error instanceof Error ? error.message : String(error) }); }); // Clean up this.sessionProfiles.delete(sessionId); this.logger.info("Session terminated", { sessionId, duration: session.endTime.getTime() - session.startTime.getTime() }); // Persist sessions asynchronously this.persistSessions().catch(error => this.logger.error("Failed to persist sessions", error)); } catch (error) { this.logger.error("Error during session termination", { sessionId, error: error instanceof Error ? error.message : String(error) }); throw error; } } async terminateAllSessions() { const sessions = this.getActiveSessions(); // Terminate sessions in batches to avoid overwhelming the system const batchSize = 5; for (let i = 0; i < sessions.length; i += batchSize) { const batch = sessions.slice(i, i + batchSize); await Promise.allSettled(batch.map((session) => this.terminateSession(session.id))); } } removeSession(sessionId) { this.sessions.delete(sessionId); this.sessionProfiles.delete(sessionId); } async persistSessions() { if (!this.config.orchestrator.persistSessions) { return; } try { await this.persistenceCircuitBreaker.execute(async () => { const data = { sessions: Array.from(this.sessions.values()).map(session => ({ ...session, profile: this.sessionProfiles.get(session.id), })).filter(s => s.profile), taskQueue: [], metrics: { completedTasks: 0, failedTasks: 0, totalTaskDuration: 0, }, savedAt: new Date(), }; await mkdir(dirname(this.persistencePath), { recursive: true }); await writeFile(this.persistencePath, JSON.stringify(data, null, 2), "utf8"); this.logger.debug("Sessions persisted", { count: data.sessions.length }); }); } catch (error) { this.logger.error("Failed to persist sessions", error); } } async restoreSessions() { if (!this.config.orchestrator.persistSessions) { return; } try { const data = await readFile(this.persistencePath, "utf8"); const persistence = JSON.parse(data); // Restore only active/idle sessions const sessionsToRestore = persistence.sessions.filter(s => s.status === "active" || s.status === "idle"); for (const sessionData of sessionsToRestore) { try { // Recreate session const session = await this.createSession(sessionData.profile); // Update with persisted data Object.assign(session, { id: sessionData.id, startTime: new Date(sessionData.startTime), lastActivity: new Date(sessionData.lastActivity), }); this.logger.info("Session restored", { sessionId: session.id }); } catch (error) { this.logger.error("Failed to restore session", { sessionId: sessionData.id, error, }); } } } catch (error) { if (error.code !== "ENOENT") { this.logger.error("Failed to restore sessions", error); } } } } /** * Main orchestrator implementation with enhanced features */ export class Orchestrator { config; terminalManager; memoryManager; coordinationManager; mcpServer; eventBus; logger; initialized = false; shutdownInProgress = false; sessionManager; healthCheckInterval; maintenanceInterval; metricsInterval; agents = new Map(); taskQueue = []; taskHistory = new Map(); startTime = Date.now(); // Metrics tracking metrics = { completedTasks: 0, failedTasks: 0, totalTaskDuration: 0, }; // Circuit breakers for critical operations healthCheckCircuitBreaker; taskAssignmentCircuitBreaker; constructor(config, terminalManager, memoryManager, coordinationManager, mcpServer, eventBus, logger) { this.config = config; this.terminalManager = terminalManager; this.memoryManager = memoryManager; this.coordinationManager = coordinationManager; this.mcpServer = mcpServer; this.eventBus = eventBus; this.logger = logger; this.sessionManager = new SessionManager(terminalManager, memoryManager, eventBus, logger, config); // Initialize circuit breakers this.healthCheckCircuitBreaker = circuitBreaker("HealthCheck", { threshold: 3, timeout: 10000, resetTimeout: 30000 }); this.taskAssignmentCircuitBreaker = circuitBreaker("TaskAssignment", { threshold: 5, timeout: 5000, resetTimeout: 20000 }); } async initialize() { if (this.initialized) { throw new InitializationError("Orchestrator already initialized"); } this.logger.info("Initializing orchestrator..."); const startTime = Date.now(); try { // Initialize components in parallel where possible await Promise.all([ this.initializeComponent("Terminal Manager", () => this.terminalManager.initialize()), this.initializeComponent("Memory Manager", () => this.memoryManager.initialize()), this.initializeComponent("Coordination Manager", () => this.coordinationManager.initialize()), ]); // MCP server needs to be started after other components await this.initializeComponent("MCP Server", () => this.mcpServer.start()); // Restore persisted sessions await this.sessionManager.restoreSessions(); // Set up event handlers this.setupEventHandlers(); // Start background tasks this.startHealthChecks(); this.startMaintenanceTasks(); this.startMetricsCollection(); this.initialized = true; const initDuration = Date.now() - startTime; this.eventBus.emit(SystemEvents.SYSTEM_READY, { timestamp: new Date() }); this.logger.info("Orchestrator initialized successfully", { duration: initDuration }); } catch (error) { this.logger.error("Failed to initialize orchestrator", error); // Attempt cleanup on initialization failure await this.emergencyShutdown(); throw new InitializationError("Orchestrator", { error }); } } async shutdown() { if (!this.initialized || this.shutdownInProgress) { return; } this.shutdownInProgress = true; this.logger.info("Shutting down orchestrator..."); const shutdownStart = Date.now(); try { // Stop background tasks this.stopBackgroundTasks(); // Save current state await this.sessionManager.persistSessions(); // Process any remaining critical tasks await this.processShutdownTasks(); // Terminate all sessions await this.sessionManager.terminateAllSessions(); // Shutdown components with timeout await Promise.race([ this.shutdownComponents(), delay(this.config.orchestrator.shutdownTimeout), ]); const shutdownDuration = Date.now() - shutdownStart; this.eventBus.emit(SystemEvents.SYSTEM_SHUTDOWN, { reason: "Graceful shutdown" }); this.logger.info("Orchestrator shutdown complete", { duration: shutdownDuration }); } catch (error) { this.logger.error("Error during shutdown", error); // Force shutdown if graceful shutdown fails await this.emergencyShutdown(); throw new ShutdownError("Failed to shutdown gracefully", { error }); } finally { this.initialized = false; this.shutdownInProgress = false; } } async spawnAgent(profile) { if (!this.initialized) { throw new SystemError("Orchestrator not initialized"); } // Check agent limit if (this.agents.size >= this.config.orchestrator.maxConcurrentAgents) { throw new SystemError("Maximum concurrent agents reached"); } // Validate agent profile this.validateAgentProfile(profile); this.logger.info("Spawning agent", { agentId: profile.id, type: profile.type }); try { // Create session with retry const session = await retry(() => this.sessionManager.createSession(profile), { maxAttempts: 3, initialDelay: 2000 }); // Store agent profile this.agents.set(profile.id, profile); // Emit event this.eventBus.emit(SystemEvents.AGENT_SPAWNED, { agentId: profile.id, profile, sessionId: session.id, }); // Start agent health monitoring this.startAgentHealthMonitoring(profile.id); return session.id; } catch (error) { this.logger.error("Failed to spawn agent", { agentId: profile.id, error }); throw error; } } async terminateAgent(agentId) { if (!this.initialized) { throw new SystemError("Orchestrator not initialized"); } const profile = this.agents.get(agentId); if (!profile) { throw new SystemError(`Agent not found: ${agentId}`); } this.logger.info("Terminating agent", { agentId }); try { // Cancel any assigned tasks await this.cancelAgentTasks(agentId); // Find and terminate all sessions for this agent const sessions = this.sessionManager.getActiveSessions().filter((session) => session.agentId === agentId); await Promise.allSettled(sessions.map((session) => this.sessionManager.terminateSession(session.id))); // Remove agent this.agents.delete(agentId); // Emit event this.eventBus.emit(SystemEvents.AGENT_TERMINATED, { agentId, reason: "User requested", }); } catch (error) { this.logger.error("Failed to terminate agent", { agentId, error }); throw error; } } async assignTask(task) { if (!this.initialized) { throw new SystemError("Orchestrator not initialized"); } // Validate task this.validateTask(task); // Store task in history this.taskHistory.set(task.id, task); try { await this.taskAssignmentCircuitBreaker.execute(async () => { // Add to queue if no agent assigned if (!task.assignedAgent) { if (this.taskQueue.length >= this.config.orchestrator.taskQueueSize) { throw new SystemError("Task queue is full"); } this.taskQueue.push(task); this.eventBus.emit(SystemEvents.TASK_CREATED, { task }); // Try to assign immediately await this.processTaskQueue(); return; } // Assign to specific agent const agent = this.agents.get(task.assignedAgent); if (!agent) { throw new SystemError(`Agent not found: ${task.assignedAgent}`); } await this.coordinationManager.assignTask(task, task.assignedAgent); this.eventBus.emit(SystemEvents.TASK_ASSIGNED, { taskId: task.id, agentId: task.assignedAgent, }); }); } catch (error) { this.logger.error("Failed to assign task", { taskId: task.id, error }); throw error; } } async getHealthStatus() { try { return await this.healthCheckCircuitBreaker.execute(async () => { const components = {}; // Check all components in parallel const [terminal, memory, coordination, mcp] = await Promise.allSettled([ this.getComponentHealth("Terminal Manager", async () => await this.terminalManager.getHealthStatus()), this.getComponentHealth("Memory Manager", async () => await this.memoryManager.getHealthStatus()), this.getComponentHealth("Coordination Manager", async () => await this.coordinationManager.getHealthStatus()), this.getComponentHealth("MCP Server", async () => await this.mcpServer.getHealthStatus()), ]); // Process results components.terminal = this.processHealthResult(terminal, "Terminal Manager"); components.memory = this.processHealthResult(memory, "Memory Manager"); components.coordination = this.processHealthResult(coordination, "Coordination Manager"); components.mcp = this.processHealthResult(mcp, "MCP Server"); // Add orchestrator self-check components.orchestrator = { name: "Orchestrator", status: "healthy", lastCheck: new Date(), metrics: { uptime: Date.now() - this.startTime, activeAgents: this.agents.size, queuedTasks: this.taskQueue.length, memoryUsage: process.memoryUsage().heapUsed / 1024 / 1024, // MB }, }; // Determine overall status const statuses = Object.values(components).map((c) => c.status); let overallStatus = "healthy"; if (statuses.some((s) => s === "unhealthy")) { overallStatus = "unhealthy"; } else if (statuses.some((s) => s === "degraded")) { overallStatus = "degraded"; } return { status: overallStatus, components, timestamp: new Date(), }; }); } catch (error) { this.logger.error("Health check failed", error); // Return degraded status if health check fails return { status: "degraded", components: { orchestrator: { name: "Orchestrator", status: "degraded", lastCheck: new Date(), error: "Health check circuit breaker open", }, }, timestamp: new Date(), }; } } async getMetrics() { const memUsage = process.memoryUsage(); const cpuUsage = process.cpuUsage(); const avgTaskDuration = this.metrics.completedTasks > 0 ? this.metrics.totalTaskDuration / this.metrics.completedTasks : 0; return { uptime: Date.now() - this.startTime, totalAgents: this.agents.size, activeAgents: this.sessionManager.getActiveSessions().length, totalTasks: this.taskHistory.size, completedTasks: this.metrics.completedTasks, failedTasks: this.metrics.failedTasks, queuedTasks: this.taskQueue.length, avgTaskDuration, memoryUsage: memUsage, cpuUsage, timestamp: new Date(), }; } async performMaintenance() { this.logger.debug("Performing maintenance tasks"); try { // Clean up terminated sessions await this.cleanupTerminatedSessions(); // Clean up old task history await this.cleanupTaskHistory(); // Perform component maintenance await Promise.allSettled([ this.terminalManager.performMaintenance(), this.memoryManager.performMaintenance(), this.coordinationManager.performMaintenance(), ]); // Persist current state await this.sessionManager.persistSessions(); // Force garbage collection if available if (global.gc) { global.gc(); } this.logger.debug("Maintenance tasks completed"); } catch (error) { this.logger.error("Error during maintenance", error); } } setupEventHandlers() { // Handle task lifecycle events this.eventBus.on(SystemEvents.TASK_STARTED, (data) => { const { taskId, agentId } = data; const task = this.taskHistory.get(taskId); if (task) { task.status = "running"; task.startedAt = new Date(); } }); this.eventBus.on(SystemEvents.TASK_COMPLETED, async (data) => { const { taskId, result } = data; const task = this.taskHistory.get(taskId); if (task) { task.status = "completed"; task.completedAt = new Date(); if (result !== undefined) { task.output = result; } // Update metrics this.metrics.completedTasks++; if (task.startedAt) { this.metrics.totalTaskDuration += task.completedAt.getTime() - task.startedAt.getTime(); } } await this.processTaskQueue(); }); this.eventBus.on(SystemEvents.TASK_FAILED, async (data) => { const { taskId, error } = data; const task = this.taskHistory.get(taskId); if (task) { task.status = "failed"; task.completedAt = new Date(); task.error = error; // Update metrics this.metrics.failedTasks++; } // Retry or requeue based on configuration await this.handleTaskFailure(taskId, error); }); // Handle agent events this.eventBus.on(SystemEvents.AGENT_ERROR, async (data) => { const { agentId, error } = data; this.logger.error("Agent error", { agentId, error }); // Implement agent recovery await this.handleAgentError(agentId, error); }); this.eventBus.on(SystemEvents.AGENT_IDLE, async (data) => { const { agentId } = data; // Update session status const sessions = this.sessionManager.getActiveSessions().filter(s => s.agentId === agentId); sessions.forEach(s => s.status = "idle"); // Try to assign queued tasks await this.processTaskQueue(); }); // Handle system events this.eventBus.on(SystemEvents.SYSTEM_ERROR, (data) => { const { error, component } = data; this.logger.error("System error", { component, error }); // Implement system-level error recovery this.handleSystemError(component, error); }); // Handle resource events this.eventBus.on(SystemEvents.DEADLOCK_DETECTED, (data) => { const { agents, resources } = data; this.logger.error("Deadlock detected", { agents, resources }); // Implement deadlock resolution void this.resolveDeadlock(agents, resources); }); } startHealthChecks() { this.healthCheckInterval = setInterval(async () => { try { const health = await this.getHealthStatus(); this.eventBus.emit(SystemEvents.SYSTEM_HEALTHCHECK, { status: health }); if (health.status === "unhealthy") { this.logger.warn("System health check failed", health); // Attempt recovery for unhealthy components await this.recoverUnhealthyComponents(health); } } catch (error) { this.logger.error("Health check error", error); } }, this.config.orchestrator.healthCheckInterval); } startMaintenanceTasks() { this.maintenanceInterval = setInterval(async () => { await this.performMaintenance(); }, this.config.orchestrator.maintenanceInterval ?? 300000); // 5 minutes default } startMetricsCollection() { this.metricsInterval = setInterval(async () => { try { const metrics = await this.getMetrics(); this.logger.debug("Metrics collected", metrics); // Emit metrics event for monitoring systems this.eventBus.emit("metrics:collected", metrics); } catch (error) { this.logger.error("Metrics collection error", error); } }, this.config.orchestrator.metricsInterval ?? 60000); // 1 minute default } stopBackgroundTasks() { if (this.healthCheckInterval) { clearInterval(this.healthCheckInterval); } if (this.maintenanceInterval) { clearInterval(this.maintenanceInterval); } if (this.metricsInterval) { clearInterval(this.metricsInterval); } } async shutdownComponents() { const shutdownTasks = [ this.shutdownComponent("Terminal Manager", () => this.terminalManager.shutdown()), this.shutdownComponent("Memory Manager", () => this.memoryManager.shutdown()), this.shutdownComponent("Coordination Manager", () => this.coordinationManager.shutdown()), this.shutdownComponent("MCP Server", () => this.mcpServer.stop()), ]; const results = await Promise.allSettled(shutdownTasks); // Log any shutdown failures results.forEach((result, index) => { if (result.status === "rejected") { const componentName = ["Terminal Manager", "Memory Manager", "Coordination Manager", "MCP Server"][index]; this.logger.error(`Failed to shutdown ${componentName}`, result.reason); } }); } async emergencyShutdown() { this.logger.warn("Performing emergency shutdown"); try { // Force stop all components await Promise.allSettled([ this.terminalManager.shutdown().catch(() => { }), this.memoryManager.shutdown().catch(() => { }), this.coordinationManager.shutdown().catch(() => { }), this.mcpServer.stop().catch(() => { }), ]); } catch (error) { this.logger.error("Emergency shutdown error", error); } } async processTaskQueue() { if (this.taskQueue.length === 0) { return; } const availableAgents = await this.getAvailableAgents(); while (this.taskQueue.length > 0 && availableAgents.length > 0) { const task = this.taskQueue.shift(); const agent = this.selectAgentForTask(task, availableAgents); if (agent) { task.assignedAgent = agent.id; task.status = "assigned"; try { await this.coordinationManager.assignTask(task, agent.id); this.eventBus.emit(SystemEvents.TASK_ASSIGNED, { taskId: task.id, agentId: agent.id, }); // Remove agent from available list const index = availableAgents.indexOf(agent); availableAgents.splice(index, 1); } catch (error) { // Put task back in queue this.taskQueue.unshift(task); this.logger.error("Failed to assign task", { taskId: task.id, error }); break; } } else { // No suitable agent, put task back this.taskQueue.unshift(task); break; } } } async getAvailableAgents() { const sessions = this.sessionManager.getActiveSessions(); const available = []; for (const session of sessions) { if (session.status === "idle" || session.status === "active") { const profile = this.agents.get(session.agentId); if (profile) { try { const taskCount = await this.coordinationManager.getAgentTaskCount(profile.id); if (taskCount < profile.maxConcurrentTasks) { available.push(profile); } } catch (error) { this.logger.error("Failed to get agent task count", { agentId: profile.id, error }); } } } } return available.sort((a, b) => b.priority - a.priority); } selectAgentForTask(task, agents) { // Score agents based on capabilities, load, and priority const scoredAgents = agents.map(agent => { let score = agent.priority * 10; // Check capability match const requiredCapabilities = task.metadata?.requiredCapabilities ?? []; const matchedCapabilities = requiredCapabilities.filter(cap => agent.capabilities.includes(cap)).length; if (requiredCapabilities.length > 0 && matchedCapabilities === 0) { return { agent, score: -1 }; // Can't handle task } score += matchedCapabilities * 5; // Prefer agents with matching type if (task.type === agent.type) { score += 20; } return { agent, score }; }); // Filter out agents that can't handle the task const eligibleAgents = scoredAgents.filter(({ score }) => score >= 0); if (eligibleAgents.length === 0) { return undefined; } // Select agent with highest score eligibleAgents.sort((a, b) => b.score - a.score); return eligibleAgents[0].agent; } async getComponentHealth(name, check) { try { const result = await Promise.race([ check(), delay(5000).then(() => ({ healthy: false, error: "Health check timeout" })), ]); const health = { name, status: result.healthy ? "healthy" : "unhealthy", lastCheck: new Date(), }; if (result.error !== undefined) { health.error = result.error; } if ("metrics" in result && result.metrics !== undefined) { health.metrics = result.metrics; } return health; } catch (error) { return { name, status: "unhealthy", lastCheck: new Date(), error: error instanceof Error ? error.message : "Unknown error", }; } } processHealthResult(result, componentName) { if (result.status === "fulfilled") { return result.value; } else { return { name: componentName, status: "unhealthy", lastCheck: new Date(), error: result.reason?.message ?? "Health check failed", }; } } async initializeComponent(name, init) { try { await retry(init, { maxAttempts: 3, initialDelay: 2000 }); this.logger.info(`${name} initialized`); } catch (error) { this.logger.error(`Failed to initialize ${name}`, error); throw new InitializationError(name, { error }); } } async shutdownComponent(name, shutdown) { try { await Promise.race([ shutdown(), delay(10000), // 10 second timeout per component ]); this.logger.info(`${name} shut down`); } catch (error) { this.logger.error(`Failed to shutdown ${name}`, error); throw error; } } validateAgentProfile(profile) { if (!profile.id || !profile.name || !profile.type) { throw new Error("Invalid agent profile: missing required fields"); } if (profile.maxConcurrentTasks < 1) { throw new Error("Invalid agent profile: maxConcurrentTasks must be at least 1"); } if (this.agents.has(profile.id)) { throw new Error(`Agent with ID ${profile.id} already exists`); } } validateTask(task) { if (!task.id || !task.type || !task.description) { throw new Error("Invalid task: missing required fields"); } if (task.priority < 0 || task.priority > 100) { throw new Error("Invalid task: priority must be between 0 and 100"); } if (this.taskHistory.has(task.id)) { throw new Error(`Task with ID ${task.id} already exists`); } } async handleAgentError(agentId, error) { const profile = this.agents.get(agentId); if (!profile) { return; } // Log error details this.logger.error("Handling agent error", { agentId, error }); // Check if agent should be restarted const errorCount = profile.metadata?.errorCount ?? 0; profile.metadata = { ...profile.metadata, errorCount: errorCount + 1 }; if (errorCount < 3) { // Attempt to restart agent try { await this.terminateAgent(agentId); await delay(2000); // Wait before restart await this.spawnAgent({ ...profile, metadata: { ...profile.metadata, errorCount: 0 } }); this.logger.info("Agent restarted after error", { agentId }); } catch (restartError) { this.logger.error("Failed to restart agent", { agentId, error: restartError }); } } else { // Too many errors, terminate agent this.logger.error("Agent exceeded error threshold, terminating", { agentId, errorCount }); await this.terminateAgent(agentId); } } async handleTaskFailure(taskId, error) { const task = this.taskHistory.get(taskId); if (!task) { return; } const retryCount = task.metadata?.retryCount ?? 0; const maxRetries = this.config.orchestrator.taskMaxRetries ?? 3; if (retryCount < maxRetries) { // Retry task task.metadata = { ...task.metadata, retryCount: retryCount + 1 }; task.status = "queued"; delete task.assignedAgent; // Add back to queue with delay setTimeout(() => { this.taskQueue.push(task); void this.processTaskQueue(); }, Math.pow(2, retryCount) * 1000); // Exponential backoff this.logger.info("Task queued for retry", { taskId, retryCount: retryCount + 1 }); } else { this.logger.error("Task exceeded retry limit", { taskId, retryCount }); } } handleSystemError(component, error) { // Implement system-level error recovery strategies this.logger.error("Handling system error", { component, error }); // TODO: Implement specific recovery strategies based on component and error type } async resolveDeadlock(agents, resources) { this.logger.warn("Resolving deadlock", { agents, resources }); // Simple deadlock resolution: cancel lowest priority agent's tasks const agentProfiles = agents .map(id => this.agents.get(id)) .filter(Boolean); if (agentProfiles.length === 0) { return; } // Sort by priority (lowest first) agentProfiles.sort((a, b) => a.priority - b.priority); // Cancel tasks for lowest priority agent const targetAgent = agentProfiles[0]; await this.cancelAgentTasks(targetAgent.id); this.logger.info("Deadlock resolved by cancelling tasks", { agentId: targetAgent.id }); } async cancelAgentTasks(agentId) { try { const tasks = await this.coordinationManager.getAgentTasks(agentId); for (const task of tasks) { await this.coordinationManager.cancelTask(task.id); // Update task status const trackedTask = this.taskHistory.get(task.id); if (trackedTask) { trackedTask.status = "cancelled"; trackedTask.completedAt = new Date(); } this.eventBus.emit(SystemEvents.TASK_CANCELLED, { taskId: task.id, reason: "Agent termination", }); } } catch (error) { this.logger.error("Failed to cancel agent tasks", { agentId, error }); } } startAgentHealthMonitoring(agentId) { // TODO: Implement periodic health checks for individual agents } async recoverUnhealthyComponents(health) { for (const [name, component] of Object.entries(health.components)) { if (component.status === "unhealthy") { this.logger.warn("Attempting to recover unhealthy component", { name }); // TODO: Implement component-specific recovery strategies switch (name) { case "Terminal Manager": // Restart terminal pools, etc. break; case "Memory Manager": // Clear cache, reconnect to backends, etc. break; case "Coordination Manager": // Reset locks, clear message queues, etc. break; case "MCP Server": // Restart server, reset connections, etc. break; } } } } async cleanupTerminatedSessions() { const allSessions = this.sessionManager.getActiveSessions(); const terminatedSessions = allSessions.filter(s => s.status === "terminated"); const cutoffTime = Date.now() - (this.config.orchestrator.sessionRetentionMs ?? 3600000); // 1 hour default for (const session of terminatedSessions) { if (session.endTime && session.endTime.getTime() < cutoffTime) { await this.sessionManager.terminateSession(session.id); this.logger.debug("Cleaned up old session", { sessionId: session.id }); } } } async cleanupTaskHistory() { const cutoffTime = Date.now() - (this.config.orchestrator.taskHistoryRetentionMs ?? 86400000); // 24 hours default for (const [taskId, task] of this.taskHistory.entries()) { if (task.completedAt && task.completedAt.getTime() < cutoffTime) { this.taskHistory.delete(taskId); this.logger.debug("Cleaned up old task", { taskId }); } } } async processShutdownTasks() { // Process any critical tasks before shutdown const criticalTasks = this.taskQueue.filter(t => t.priority >= 90 || t.metadata?.critical === true); if (criticalTasks.length > 0) { this.logger.info("Processing critical tasks before shutdown", { count: criticalTasks.length }); // TODO: Implement critical task processing } } } //# sourceMappingURL=orchestrator.js.map