claude-flow-multilang
Version:
Revolutionary multilingual AI orchestration framework with cultural awareness and DDD architecture
1,314 lines (1,110 loc) • 41.6 kB
text/typescript
/**
* Main orchestrator for Claude-Flow
*/
import {
Config,
SystemEvents,
AgentProfile,
AgentSession,
Task,
HealthStatus,
ComponentHealth,
TaskStatus,
OrchestratorMetrics,
} from '../utils/types.js';
import { IEventBus } from './event-bus.js';
import type { ILogger } from './logger.js';
import type { ITerminalManager } from '../terminal/manager.js';
import type { IMemoryManager } from '../memory/manager.js';
import type { ICoordinationManager } from '../coordination/manager.js';
import type { IMCPServer } from '../mcp/server.js';
import { SystemError, InitializationError, ShutdownError } from '../utils/errors.js';
import { delay, retry, circuitBreaker, CircuitBreaker } from '../utils/helpers.js';
import { mkdir, writeFile, readFile } from 'fs/promises';
import { join, dirname } from 'path';
import { ClaudeAPIClient } from '../api/claude-client.js';
import { ConfigManager } from '../config/config-manager.js';
export interface ISessionManager {
createSession(profile: AgentProfile): Promise<AgentSession>;
getSession(sessionId: string): AgentSession | undefined;
getActiveSessions(): AgentSession[];
terminateSession(sessionId: string): Promise<void>;
terminateAllSessions(): Promise<void>;
persistSessions(): Promise<void>;
restoreSessions(): Promise<void>;
removeSession(sessionId: string): void;
}
export interface IOrchestrator {
initialize(): Promise<void>;
shutdown(): Promise<void>;
spawnAgent(profile: AgentProfile): Promise<string>;
terminateAgent(agentId: string): Promise<void>;
assignTask(task: Task): Promise<void>;
getHealthStatus(): Promise<HealthStatus>;
getMetrics(): Promise<OrchestratorMetrics>;
performMaintenance(): Promise<void>;
}
export interface SessionPersistence {
sessions: Array<AgentSession & { profile: AgentProfile }>;
taskQueue: Task[];
metrics: {
completedTasks: number;
failedTasks: number;
totalTaskDuration: number;
};
savedAt: Date;
}
/**
* Session manager implementation with persistence
*/
class SessionManager implements ISessionManager {
private sessions = new Map<string, AgentSession>();
private sessionProfiles = new Map<string, AgentProfile>();
private persistencePath: string;
private persistenceCircuitBreaker: CircuitBreaker;
constructor(
private terminalManager: ITerminalManager,
private memoryManager: IMemoryManager,
private eventBus: IEventBus,
private logger: ILogger,
private config: Config,
) {
this.persistencePath = join(config.orchestrator.dataDir || './data', 'sessions.json');
// Circuit breaker for persistence operations
this.persistenceCircuitBreaker = circuitBreaker('SessionPersistence', {
threshold: 5,
timeout: 30000,
resetTimeout: 60000,
});
}
async createSession(profile: AgentProfile): Promise<AgentSession> {
try {
// Create terminal with retry logic
const terminalId = await retry(() => this.terminalManager.spawnTerminal(profile), {
maxAttempts: 3,
initialDelay: 1000,
});
// Create memory bank with retry logic
const memoryBankId = await retry(() => this.memoryManager.createBank(profile.id), {
maxAttempts: 3,
initialDelay: 1000,
});
// Create session
const session: AgentSession = {
id: `session_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`,
agentId: profile.id,
terminalId,
startTime: new Date(),
status: 'active',
lastActivity: new Date(),
memoryBankId,
};
this.sessions.set(session.id, session);
this.sessionProfiles.set(session.id, profile);
this.logger.info('Session created', {
sessionId: session.id,
agentId: profile.id,
terminalId,
memoryBankId,
});
// Persist sessions asynchronously
this.persistSessions().catch((error) =>
this.logger.error('Failed to persist sessions', error),
);
return session;
} catch (error) {
this.logger.error('Failed to create session', { agentId: profile.id, error });
throw new SystemError(`Failed to create session for agent ${profile.id}`, { error });
}
}
getSession(sessionId: string): AgentSession | undefined {
return this.sessions.get(sessionId);
}
getActiveSessions(): AgentSession[] {
return Array.from(this.sessions.values()).filter(
(session) => session.status === 'active' || session.status === 'idle',
);
}
async terminateSession(sessionId: string): Promise<void> {
const session = this.sessions.get(sessionId);
if (!session) {
throw new Error(`Session not found: ${sessionId}`);
}
try {
// Update session status first
session.status = 'terminated';
session.endTime = new Date();
// Terminate terminal with timeout
await Promise.race([
this.terminalManager.terminateTerminal(session.terminalId),
delay(5000).then(() => {
throw new Error('Terminal termination timeout');
}),
]).catch((error) => {
this.logger.error('Error terminating terminal', { sessionId, error });
});
// Close memory bank with timeout
await Promise.race([
this.memoryManager.closeBank(session.memoryBankId),
delay(5000).then(() => {
throw new Error('Memory bank close timeout');
}),
]).catch((error) => {
this.logger.error('Error closing memory bank', { sessionId, error });
});
// Clean up
this.sessionProfiles.delete(sessionId);
this.logger.info('Session terminated', {
sessionId,
duration: session.endTime.getTime() - session.startTime.getTime(),
});
// Persist sessions asynchronously
this.persistSessions().catch((error) =>
this.logger.error('Failed to persist sessions', error),
);
} catch (error) {
this.logger.error('Error during session termination', { sessionId, error });
throw error;
}
}
async terminateAllSessions(): Promise<void> {
const sessions = this.getActiveSessions();
// Terminate sessions in batches to avoid overwhelming the system
const batchSize = 5;
for (let i = 0; i < sessions.length; i += batchSize) {
const batch = sessions.slice(i, i + batchSize);
await Promise.allSettled(batch.map((session) => this.terminateSession(session.id)));
}
}
removeSession(sessionId: string): void {
this.sessions.delete(sessionId);
this.sessionProfiles.delete(sessionId);
}
async persistSessions(): Promise<void> {
if (!this.config.orchestrator.persistSessions) {
return;
}
try {
await this.persistenceCircuitBreaker.execute(async () => {
const data: SessionPersistence = {
sessions: Array.from(this.sessions.values())
.map((session) => ({
...session,
profile: this.sessionProfiles.get(session.id)!,
}))
.filter((s) => s.profile),
taskQueue: [],
metrics: {
completedTasks: 0,
failedTasks: 0,
totalTaskDuration: 0,
},
savedAt: new Date(),
};
await mkdir(dirname(this.persistencePath), { recursive: true });
await writeFile(this.persistencePath, JSON.stringify(data, null, 2), 'utf8');
this.logger.debug('Sessions persisted', { count: data.sessions.length });
});
} catch (error) {
this.logger.error('Failed to persist sessions', error);
}
}
async restoreSessions(): Promise<void> {
if (!this.config.orchestrator.persistSessions) {
return;
}
try {
const data = await readFile(this.persistencePath, 'utf8');
const persistence: SessionPersistence = JSON.parse(data);
// Restore only active/idle sessions
const sessionsToRestore = persistence.sessions.filter(
(s) => s.status === 'active' || s.status === 'idle',
);
for (const sessionData of sessionsToRestore) {
try {
// Recreate session
const session = await this.createSession(sessionData.profile);
// Update with persisted data
Object.assign(session, {
id: sessionData.id,
startTime: new Date(sessionData.startTime),
lastActivity: new Date(sessionData.lastActivity),
});
this.logger.info('Session restored', { sessionId: session.id });
} catch (error) {
this.logger.error('Failed to restore session', {
sessionId: sessionData.id,
error,
});
}
}
} catch (error) {
if ((error as NodeJS.ErrnoException).code !== 'ENOENT') {
this.logger.error('Failed to restore sessions', error);
}
}
}
}
/**
* Main orchestrator implementation with enhanced features
*/
export class Orchestrator implements IOrchestrator {
private initialized = false;
private shutdownInProgress = false;
private sessionManager: ISessionManager;
private healthCheckInterval?: number;
private maintenanceInterval?: number;
private metricsInterval?: number;
private agents = new Map<string, AgentProfile>();
private taskQueue: Task[] = [];
private taskHistory = new Map<string, Task>();
private startTime = Date.now();
private claudeClient?: ClaudeAPIClient;
private configManager: ConfigManager;
// Metrics tracking
private metrics = {
completedTasks: 0,
failedTasks: 0,
totalTaskDuration: 0,
};
// Circuit breakers for critical operations
private healthCheckCircuitBreaker: CircuitBreaker;
private taskAssignmentCircuitBreaker: CircuitBreaker;
constructor(
private config: Config,
private terminalManager: ITerminalManager,
private memoryManager: IMemoryManager,
private coordinationManager: ICoordinationManager,
private mcpServer: IMCPServer,
private eventBus: IEventBus,
private logger: ILogger,
) {
this.sessionManager = new SessionManager(
terminalManager,
memoryManager,
eventBus,
logger,
config,
);
this.configManager = ConfigManager.getInstance();
// Initialize circuit breakers
this.healthCheckCircuitBreaker = circuitBreaker('HealthCheck', {
threshold: 3,
timeout: 10000,
resetTimeout: 30000,
});
this.taskAssignmentCircuitBreaker = circuitBreaker('TaskAssignment', {
threshold: 5,
timeout: 5000,
resetTimeout: 20000,
});
}
async initialize(): Promise<void> {
if (this.initialized) {
throw new InitializationError('Orchestrator already initialized');
}
this.logger.info('Initializing orchestrator...');
const startTime = Date.now();
try {
// Initialize components in parallel where possible
await Promise.all([
this.initializeComponent('Terminal Manager', () => this.terminalManager.initialize()),
this.initializeComponent('Memory Manager', () => this.memoryManager.initialize()),
this.initializeComponent('Coordination Manager', () =>
this.coordinationManager.initialize(),
),
]);
// MCP server needs to be started after other components
await this.initializeComponent('MCP Server', () => this.mcpServer.start());
// Initialize Claude API client if configured
if (this.configManager.isClaudeAPIConfigured()) {
try {
this.claudeClient = new ClaudeAPIClient(this.logger, this.configManager);
this.logger.info('Claude API client initialized', {
model: this.claudeClient.getConfig().model,
temperature: this.claudeClient.getConfig().temperature,
});
} catch (error) {
this.logger.warn('Failed to initialize Claude API client', error);
}
}
// Restore persisted sessions
await this.sessionManager.restoreSessions();
// Set up event handlers
this.setupEventHandlers();
// Start background tasks
this.startHealthChecks();
this.startMaintenanceTasks();
this.startMetricsCollection();
this.initialized = true;
const initDuration = Date.now() - startTime;
this.eventBus.emit(SystemEvents.SYSTEM_READY, { timestamp: new Date() });
this.logger.info('Orchestrator initialized successfully', { duration: initDuration });
} catch (error) {
this.logger.error('Failed to initialize orchestrator', error);
// Attempt cleanup on initialization failure
await this.emergencyShutdown();
throw new InitializationError('Orchestrator', { error });
}
}
async shutdown(): Promise<void> {
if (!this.initialized || this.shutdownInProgress) {
return;
}
this.shutdownInProgress = true;
this.logger.info('Shutting down orchestrator...');
const shutdownStart = Date.now();
try {
// Stop background tasks
this.stopBackgroundTasks();
// Save current state
await this.sessionManager.persistSessions();
// Process any remaining critical tasks
await this.processShutdownTasks();
// Terminate all sessions
await this.sessionManager.terminateAllSessions();
// Shutdown components with timeout
await Promise.race([
this.shutdownComponents(),
delay(this.config.orchestrator.shutdownTimeout),
]);
const shutdownDuration = Date.now() - shutdownStart;
this.eventBus.emit(SystemEvents.SYSTEM_SHUTDOWN, { reason: 'Graceful shutdown' });
this.logger.info('Orchestrator shutdown complete', { duration: shutdownDuration });
} catch (error) {
this.logger.error('Error during shutdown', error);
// Force shutdown if graceful shutdown fails
await this.emergencyShutdown();
throw new ShutdownError('Failed to shutdown gracefully', { error });
} finally {
this.initialized = false;
this.shutdownInProgress = false;
}
}
async spawnAgent(profile: AgentProfile): Promise<string> {
if (!this.initialized) {
throw new SystemError('Orchestrator not initialized');
}
// Check agent limit
if (this.agents.size >= this.config.orchestrator.maxConcurrentAgents) {
throw new SystemError('Maximum concurrent agents reached');
}
// Validate agent profile
this.validateAgentProfile(profile);
this.logger.info('Spawning agent', { agentId: profile.id, type: profile.type });
try {
// Create session with retry
const session = await retry(() => this.sessionManager.createSession(profile), {
maxAttempts: 3,
initialDelay: 2000,
});
// Store agent profile
this.agents.set(profile.id, profile);
// Emit event
this.eventBus.emit(SystemEvents.AGENT_SPAWNED, {
agentId: profile.id,
profile,
sessionId: session.id,
});
// Start agent health monitoring
this.startAgentHealthMonitoring(profile.id);
return session.id;
} catch (error) {
this.logger.error('Failed to spawn agent', { agentId: profile.id, error });
throw error;
}
}
async terminateAgent(agentId: string): Promise<void> {
if (!this.initialized) {
throw new SystemError('Orchestrator not initialized');
}
const profile = this.agents.get(agentId);
if (!profile) {
throw new SystemError(`Agent not found: ${agentId}`);
}
this.logger.info('Terminating agent', { agentId });
try {
// Cancel any assigned tasks
await this.cancelAgentTasks(agentId);
// Find and terminate all sessions for this agent
const sessions = this.sessionManager
.getActiveSessions()
.filter((session) => session.agentId === agentId);
await Promise.allSettled(
sessions.map((session) => this.sessionManager.terminateSession(session.id)),
);
// Remove agent
this.agents.delete(agentId);
// Emit event
this.eventBus.emit(SystemEvents.AGENT_TERMINATED, {
agentId,
reason: 'User requested',
});
} catch (error) {
this.logger.error('Failed to terminate agent', { agentId, error });
throw error;
}
}
async assignTask(task: Task): Promise<void> {
if (!this.initialized) {
throw new SystemError('Orchestrator not initialized');
}
// Validate task
this.validateTask(task);
// Store task in history
this.taskHistory.set(task.id, task);
try {
await this.taskAssignmentCircuitBreaker.execute(async () => {
// Add to queue if no agent assigned
if (!task.assignedAgent) {
if (this.taskQueue.length >= this.config.orchestrator.taskQueueSize) {
throw new SystemError('Task queue is full');
}
this.taskQueue.push(task);
this.eventBus.emit(SystemEvents.TASK_CREATED, { task });
// Try to assign immediately
await this.processTaskQueue();
return;
}
// Assign to specific agent
const agent = this.agents.get(task.assignedAgent);
if (!agent) {
throw new SystemError(`Agent not found: ${task.assignedAgent}`);
}
await this.coordinationManager.assignTask(task, task.assignedAgent);
this.eventBus.emit(SystemEvents.TASK_ASSIGNED, {
taskId: task.id,
agentId: task.assignedAgent,
});
});
} catch (error) {
this.logger.error('Failed to assign task', { taskId: task.id, error });
throw error;
}
}
async getHealthStatus(): Promise<HealthStatus> {
try {
return await this.healthCheckCircuitBreaker.execute(async () => {
const components: Record<string, ComponentHealth> = {};
// Check all components in parallel
const [terminal, memory, coordination, mcp] = await Promise.allSettled([
this.getComponentHealth(
'Terminal Manager',
async () => await this.terminalManager.getHealthStatus(),
),
this.getComponentHealth(
'Memory Manager',
async () => await this.memoryManager.getHealthStatus(),
),
this.getComponentHealth(
'Coordination Manager',
async () => await this.coordinationManager.getHealthStatus(),
),
this.getComponentHealth('MCP Server', async () => await this.mcpServer.getHealthStatus()),
]);
// Process results
components.terminal = this.processHealthResult(terminal, 'Terminal Manager');
components.memory = this.processHealthResult(memory, 'Memory Manager');
components.coordination = this.processHealthResult(coordination, 'Coordination Manager');
components.mcp = this.processHealthResult(mcp, 'MCP Server');
// Add orchestrator self-check
components.orchestrator = {
name: 'Orchestrator',
status: 'healthy',
lastCheck: new Date(),
metrics: {
uptime: Date.now() - this.startTime,
activeAgents: this.agents.size,
queuedTasks: this.taskQueue.length,
memoryUsage: process.memoryUsage().heapUsed / 1024 / 1024, // MB
},
};
// Determine overall status
const statuses = Object.values(components).map((c) => c.status);
let overallStatus: HealthStatus['status'] = 'healthy';
if (statuses.some((s) => s === 'unhealthy')) {
overallStatus = 'unhealthy';
} else if (statuses.some((s) => s === 'degraded')) {
overallStatus = 'degraded';
}
return {
status: overallStatus,
components,
timestamp: new Date(),
};
});
} catch (error) {
this.logger.error('Health check failed', error);
// Return degraded status if health check fails
return {
status: 'degraded',
components: {
orchestrator: {
name: 'Orchestrator',
status: 'degraded',
lastCheck: new Date(),
error: 'Health check circuit breaker open',
},
},
timestamp: new Date(),
};
}
}
async getMetrics(): Promise<OrchestratorMetrics> {
const memUsage = process.memoryUsage();
const cpuUsage = process.cpuUsage();
const avgTaskDuration =
this.metrics.completedTasks > 0
? this.metrics.totalTaskDuration / this.metrics.completedTasks
: 0;
return {
uptime: Date.now() - this.startTime,
totalAgents: this.agents.size,
activeAgents: this.sessionManager.getActiveSessions().length,
totalTasks: this.taskHistory.size,
completedTasks: this.metrics.completedTasks,
failedTasks: this.metrics.failedTasks,
queuedTasks: this.taskQueue.length,
avgTaskDuration,
memoryUsage: memUsage,
cpuUsage: cpuUsage,
timestamp: new Date(),
};
}
async performMaintenance(): Promise<void> {
this.logger.debug('Performing maintenance tasks');
try {
// Clean up terminated sessions
await this.cleanupTerminatedSessions();
// Clean up old task history
await this.cleanupTaskHistory();
// Perform component maintenance
await Promise.allSettled([
this.terminalManager.performMaintenance(),
this.memoryManager.performMaintenance(),
this.coordinationManager.performMaintenance(),
]);
// Persist current state
await this.sessionManager.persistSessions();
// Force garbage collection if available
if (global.gc) {
global.gc();
}
this.logger.debug('Maintenance tasks completed');
} catch (error) {
this.logger.error('Error during maintenance', error);
}
}
private setupEventHandlers(): void {
// Handle task lifecycle events
this.eventBus.on(SystemEvents.TASK_STARTED, (data: unknown) => {
const { taskId, agentId } = data as { taskId: string; agentId: string };
const task = this.taskHistory.get(taskId);
if (task) {
task.status = 'running';
task.startedAt = new Date();
}
});
this.eventBus.on(SystemEvents.TASK_COMPLETED, async (data: unknown) => {
const { taskId, result } = data as { taskId: string; result: unknown };
const task = this.taskHistory.get(taskId);
if (task) {
task.status = 'completed';
task.completedAt = new Date();
if (result !== undefined) {
task.output = result as Record<string, unknown>;
}
// Update metrics
this.metrics.completedTasks++;
if (task.startedAt) {
this.metrics.totalTaskDuration += task.completedAt.getTime() - task.startedAt.getTime();
}
}
await this.processTaskQueue();
});
this.eventBus.on(SystemEvents.TASK_FAILED, async (data: unknown) => {
const { taskId, error } = data as { taskId: string; error: Error };
const task = this.taskHistory.get(taskId);
if (task) {
task.status = 'failed';
task.completedAt = new Date();
task.error = error;
// Update metrics
this.metrics.failedTasks++;
}
// Retry or requeue based on configuration
await this.handleTaskFailure(taskId, error);
});
// Handle agent events
this.eventBus.on(SystemEvents.AGENT_ERROR, async (data: unknown) => {
const { agentId, error } = data as { agentId: string; error: Error };
this.logger.error('Agent error', { agentId, error });
// Implement agent recovery
await this.handleAgentError(agentId, error);
});
this.eventBus.on(SystemEvents.AGENT_IDLE, async (data: unknown) => {
const { agentId } = data as { agentId: string };
// Update session status
const sessions = this.sessionManager.getActiveSessions().filter((s) => s.agentId === agentId);
sessions.forEach((s) => (s.status = 'idle'));
// Try to assign queued tasks
await this.processTaskQueue();
});
// Handle system events
this.eventBus.on(SystemEvents.SYSTEM_ERROR, (data: unknown) => {
const { error, component } = data as { error: Error; component: string };
this.logger.error('System error', { component, error });
// Implement system-level error recovery
this.handleSystemError(component, error);
});
// Handle resource events
this.eventBus.on(SystemEvents.DEADLOCK_DETECTED, (data: unknown) => {
const { agents, resources } = data as { agents: string[]; resources: string[] };
this.logger.error('Deadlock detected', { agents, resources });
// Implement deadlock resolution
this.resolveDeadlock(agents, resources);
});
}
private startHealthChecks(): void {
this.healthCheckInterval = setInterval(async () => {
try {
const health = await this.getHealthStatus();
this.eventBus.emit(SystemEvents.SYSTEM_HEALTHCHECK, { status: health });
if (health.status === 'unhealthy') {
this.logger.warn('System health check failed', health);
// Attempt recovery for unhealthy components
await this.recoverUnhealthyComponents(health);
}
} catch (error) {
this.logger.error('Health check error', error);
}
}, this.config.orchestrator.healthCheckInterval);
}
private startMaintenanceTasks(): void {
this.maintenanceInterval = setInterval(async () => {
await this.performMaintenance();
}, this.config.orchestrator.maintenanceInterval || 300000); // 5 minutes default
}
private startMetricsCollection(): void {
this.metricsInterval = setInterval(async () => {
try {
const metrics = await this.getMetrics();
this.logger.debug('Metrics collected', metrics);
// Emit metrics event for monitoring systems
this.eventBus.emit('metrics:collected', metrics);
} catch (error) {
this.logger.error('Metrics collection error', error);
}
}, this.config.orchestrator.metricsInterval || 60000); // 1 minute default
}
private stopBackgroundTasks(): void {
if (this.healthCheckInterval) {
clearInterval(this.healthCheckInterval);
}
if (this.maintenanceInterval) {
clearInterval(this.maintenanceInterval);
}
if (this.metricsInterval) {
clearInterval(this.metricsInterval);
}
}
private async shutdownComponents(): Promise<void> {
const shutdownTasks = [
this.shutdownComponent('Terminal Manager', () => this.terminalManager.shutdown()),
this.shutdownComponent('Memory Manager', () => this.memoryManager.shutdown()),
this.shutdownComponent('Coordination Manager', () => this.coordinationManager.shutdown()),
this.shutdownComponent('MCP Server', () => this.mcpServer.stop()),
];
const results = await Promise.allSettled(shutdownTasks);
// Log any shutdown failures
results.forEach((result, index) => {
if (result.status === 'rejected') {
const componentName = [
'Terminal Manager',
'Memory Manager',
'Coordination Manager',
'MCP Server',
][index];
this.logger.error(`Failed to shutdown ${componentName}`, result.reason);
}
});
}
private async emergencyShutdown(): Promise<void> {
this.logger.warn('Performing emergency shutdown');
try {
// Force stop all components
await Promise.allSettled([
this.terminalManager.shutdown().catch(() => {}),
this.memoryManager.shutdown().catch(() => {}),
this.coordinationManager.shutdown().catch(() => {}),
this.mcpServer.stop().catch(() => {}),
]);
} catch (error) {
this.logger.error('Emergency shutdown error', error);
}
}
private async processTaskQueue(): Promise<void> {
if (this.taskQueue.length === 0) {
return;
}
const availableAgents = await this.getAvailableAgents();
while (this.taskQueue.length > 0 && availableAgents.length > 0) {
const task = this.taskQueue.shift()!;
const agent = this.selectAgentForTask(task, availableAgents);
if (agent) {
task.assignedAgent = agent.id;
task.status = 'assigned';
try {
await this.coordinationManager.assignTask(task, agent.id);
this.eventBus.emit(SystemEvents.TASK_ASSIGNED, {
taskId: task.id,
agentId: agent.id,
});
// Remove agent from available list
const index = availableAgents.indexOf(agent);
availableAgents.splice(index, 1);
} catch (error) {
// Put task back in queue
this.taskQueue.unshift(task);
this.logger.error('Failed to assign task', { taskId: task.id, error });
break;
}
} else {
// No suitable agent, put task back
this.taskQueue.unshift(task);
break;
}
}
}
private async getAvailableAgents(): Promise<AgentProfile[]> {
const sessions = this.sessionManager.getActiveSessions();
const available: AgentProfile[] = [];
for (const session of sessions) {
if (session.status === 'idle' || session.status === 'active') {
const profile = this.agents.get(session.agentId);
if (profile) {
try {
const taskCount = await this.coordinationManager.getAgentTaskCount(profile.id);
if (taskCount < profile.maxConcurrentTasks) {
available.push(profile);
}
} catch (error) {
this.logger.error('Failed to get agent task count', { agentId: profile.id, error });
}
}
}
}
return available.sort((a, b) => b.priority - a.priority);
}
private selectAgentForTask(task: Task, agents: AgentProfile[]): AgentProfile | undefined {
// Score agents based on capabilities, load, and priority
const scoredAgents = agents.map((agent) => {
let score = agent.priority * 10;
// Check capability match
const requiredCapabilities = (task.metadata?.requiredCapabilities as string[]) || [];
const matchedCapabilities = requiredCapabilities.filter((cap) =>
agent.capabilities.includes(cap),
).length;
if (requiredCapabilities.length > 0 && matchedCapabilities === 0) {
return { agent, score: -1 }; // Can't handle task
}
score += matchedCapabilities * 5;
// Prefer agents with matching type
if (task.type === agent.type) {
score += 20;
}
return { agent, score };
});
// Filter out agents that can't handle the task
const eligibleAgents = scoredAgents.filter(({ score }) => score >= 0);
if (eligibleAgents.length === 0) {
return undefined;
}
// Select agent with highest score
eligibleAgents.sort((a, b) => b.score - a.score);
return eligibleAgents[0].agent;
}
private async getComponentHealth(
name: string,
check: () => Promise<{ healthy: boolean; error?: string; metrics?: Record<string, number> }>,
): Promise<ComponentHealth> {
try {
const result = await Promise.race([
check(),
delay(5000).then(() => ({ healthy: false, error: 'Health check timeout' })),
]);
const health: ComponentHealth = {
name,
status: result.healthy ? 'healthy' : 'unhealthy',
lastCheck: new Date(),
};
if (result.error !== undefined) {
health.error = result.error;
}
if ('metrics' in result && result.metrics !== undefined) {
health.metrics = result.metrics;
}
return health;
} catch (error) {
return {
name,
status: 'unhealthy',
lastCheck: new Date(),
error: error instanceof Error ? error.message : 'Unknown error',
};
}
}
private processHealthResult(
result: PromiseSettledResult<ComponentHealth>,
componentName: string,
): ComponentHealth {
if (result.status === 'fulfilled') {
return result.value;
} else {
return {
name: componentName,
status: 'unhealthy',
lastCheck: new Date(),
error: result.reason?.message || 'Health check failed',
};
}
}
private async initializeComponent(name: string, init: () => Promise<void>): Promise<void> {
try {
await retry(init, { maxAttempts: 3, initialDelay: 2000 });
this.logger.info(`${name} initialized`);
} catch (error) {
this.logger.error(`Failed to initialize ${name}`, error);
throw new InitializationError(name, { error });
}
}
private async shutdownComponent(name: string, shutdown: () => Promise<void>): Promise<void> {
try {
await Promise.race([
shutdown(),
delay(10000), // 10 second timeout per component
]);
this.logger.info(`${name} shut down`);
} catch (error) {
this.logger.error(`Failed to shutdown ${name}`, error);
throw error;
}
}
private validateAgentProfile(profile: AgentProfile): void {
if (!profile.id || !profile.name || !profile.type) {
throw new Error('Invalid agent profile: missing required fields');
}
if (profile.maxConcurrentTasks < 1) {
throw new Error('Invalid agent profile: maxConcurrentTasks must be at least 1');
}
if (this.agents.has(profile.id)) {
throw new Error(`Agent with ID ${profile.id} already exists`);
}
}
private validateTask(task: Task): void {
if (!task.id || !task.type || !task.description) {
throw new Error('Invalid task: missing required fields');
}
if (task.priority < 0 || task.priority > 100) {
throw new Error('Invalid task: priority must be between 0 and 100');
}
if (this.taskHistory.has(task.id)) {
throw new Error(`Task with ID ${task.id} already exists`);
}
}
private async handleAgentError(agentId: string, error: Error): Promise<void> {
const profile = this.agents.get(agentId);
if (!profile) {
return;
}
// Log error details
this.logger.error('Handling agent error', { agentId, error });
// Check if agent should be restarted
const errorCount = (profile.metadata?.errorCount as number) || 0;
profile.metadata = { ...profile.metadata, errorCount: errorCount + 1 };
if (errorCount < 3) {
// Attempt to restart agent
try {
await this.terminateAgent(agentId);
await delay(2000); // Wait before restart
await this.spawnAgent({ ...profile, metadata: { ...profile.metadata, errorCount: 0 } });
this.logger.info('Agent restarted after error', { agentId });
} catch (restartError) {
this.logger.error('Failed to restart agent', { agentId, error: restartError });
}
} else {
// Too many errors, terminate agent
this.logger.error('Agent exceeded error threshold, terminating', { agentId, errorCount });
await this.terminateAgent(agentId);
}
}
private async handleTaskFailure(taskId: string, error: Error): Promise<void> {
const task = this.taskHistory.get(taskId);
if (!task) {
return;
}
const retryCount = (task.metadata?.retryCount as number) || 0;
const maxRetries = this.config.orchestrator.taskMaxRetries || 3;
if (retryCount < maxRetries) {
// Retry task
task.metadata = { ...task.metadata, retryCount: retryCount + 1 };
task.status = 'queued';
delete task.assignedAgent;
// Add back to queue with delay
setTimeout(
() => {
this.taskQueue.push(task);
this.processTaskQueue();
},
Math.pow(2, retryCount) * 1000,
); // Exponential backoff
this.logger.info('Task queued for retry', { taskId, retryCount: retryCount + 1 });
} else {
this.logger.error('Task exceeded retry limit', { taskId, retryCount });
}
}
private handleSystemError(component: string, error: Error): void {
// Implement system-level error recovery strategies
this.logger.error('Handling system error', { component, error });
// TODO: Implement specific recovery strategies based on component and error type
}
private async resolveDeadlock(agents: string[], resources: string[]): Promise<void> {
this.logger.warn('Resolving deadlock', { agents, resources });
// Simple deadlock resolution: cancel lowest priority agent's tasks
const agentProfiles = agents.map((id) => this.agents.get(id)).filter(Boolean) as AgentProfile[];
if (agentProfiles.length === 0) {
return;
}
// Sort by priority (lowest first)
agentProfiles.sort((a, b) => a.priority - b.priority);
// Cancel tasks for lowest priority agent
const targetAgent = agentProfiles[0];
await this.cancelAgentTasks(targetAgent.id);
this.logger.info('Deadlock resolved by cancelling tasks', { agentId: targetAgent.id });
}
private async cancelAgentTasks(agentId: string): Promise<void> {
try {
const tasks = await this.coordinationManager.getAgentTasks(agentId);
for (const task of tasks) {
await this.coordinationManager.cancelTask(task.id);
// Update task status
const trackedTask = this.taskHistory.get(task.id);
if (trackedTask) {
trackedTask.status = 'cancelled';
trackedTask.completedAt = new Date();
}
this.eventBus.emit(SystemEvents.TASK_CANCELLED, {
taskId: task.id,
reason: 'Agent termination',
});
}
} catch (error) {
this.logger.error('Failed to cancel agent tasks', { agentId, error });
}
}
private startAgentHealthMonitoring(agentId: string): void {
// TODO: Implement periodic health checks for individual agents
}
private async recoverUnhealthyComponents(health: HealthStatus): Promise<void> {
for (const [name, component] of Object.entries(health.components)) {
if (component.status === 'unhealthy') {
this.logger.warn('Attempting to recover unhealthy component', { name });
// TODO: Implement component-specific recovery strategies
switch (name) {
case 'Terminal Manager':
// Restart terminal pools, etc.
break;
case 'Memory Manager':
// Clear cache, reconnect to backends, etc.
break;
case 'Coordination Manager':
// Reset locks, clear message queues, etc.
break;
case 'MCP Server':
// Restart server, reset connections, etc.
break;
}
}
}
}
private async cleanupTerminatedSessions(): Promise<void> {
const allSessions = this.sessionManager.getActiveSessions();
const terminatedSessions = allSessions.filter((s) => (s as any).status === 'terminated');
const cutoffTime = Date.now() - (this.config.orchestrator.sessionRetentionMs || 3600000); // 1 hour default
for (const session of terminatedSessions) {
const typedSession = session as any;
if (typedSession.endTime && typedSession.endTime.getTime() < cutoffTime) {
await this.sessionManager.terminateSession(typedSession.id);
this.logger.debug('Cleaned up old session', { sessionId: typedSession.id });
}
}
}
private async cleanupTaskHistory(): Promise<void> {
const cutoffTime = Date.now() - (this.config.orchestrator.taskHistoryRetentionMs || 86400000); // 24 hours default
for (const [taskId, task] of this.taskHistory.entries()) {
if (task.completedAt && task.completedAt.getTime() < cutoffTime) {
this.taskHistory.delete(taskId);
this.logger.debug('Cleaned up old task', { taskId });
}
}
}
private async processShutdownTasks(): Promise<void> {
// Process any critical tasks before shutdown
const criticalTasks = this.taskQueue.filter(
(t) => t.priority >= 90 || t.metadata?.critical === true,
);
if (criticalTasks.length > 0) {
this.logger.info('Processing critical tasks before shutdown', {
count: criticalTasks.length,
});
// TODO: Implement critical task processing
}
}
/**
* Get Claude API client instance
*/
getClaudeClient(): ClaudeAPIClient | undefined {
return this.claudeClient;
}
/**
* Update Claude API configuration dynamically
*/
updateClaudeConfig(config: Partial<Config['claude']>): void {
this.configManager.setClaudeConfig(config);
if (this.claudeClient) {
this.claudeClient.updateConfig(config);
} else if (this.configManager.isClaudeAPIConfigured()) {
// Initialize Claude client with new config
try {
this.claudeClient = new ClaudeAPIClient(this.logger, this.configManager);
this.logger.info('Claude API client initialized with new configuration');
} catch (error) {
this.logger.error('Failed to initialize Claude API client', error);
}
}
}
/**
* Execute a Claude API request
*/
async executeClaudeRequest(
prompt: string,
options?: {
model?: string;
temperature?: number;
maxTokens?: number;
systemPrompt?: string;
},
): Promise<string | null> {
if (!this.claudeClient) {
this.logger.error('Claude API client not initialized');
return null;
}
try {
const response = await this.claudeClient.complete(prompt, options as any);
return response;
} catch (error) {
this.logger.error('Claude API request failed', error);
return null;
}
}
}