UNPKG

jay-code

Version:

Streamlined AI CLI orchestration engine with mathematical rigor and enterprise-grade reliability

642 lines (552 loc) 17.4 kB
import { getErrorMessage, getErrorStack } from '../utils/type-guards.js'; /** * Advanced task executor with timeout handling, retry logic, and resource management */ import { EventEmitter } from 'node:events'; import { spawn, ChildProcess } from 'node:child_process'; import type { TaskDefinition, TaskResult, TaskStatus, AgentState, TaskError, } from '../swarm/types.js'; import type { ILogger } from '../core/logger.js'; import type { IEventBus } from '../core/event-bus.js'; import { CircuitBreaker, CircuitBreakerManager } from './circuit-breaker.js'; import { generateId } from '../utils/helpers.js'; export interface TaskExecutorConfig { maxConcurrentTasks: number; defaultTimeout: number; retryAttempts: number; retryBackoffBase: number; retryBackoffMax: number; resourceLimits: { memory: number; cpu: number; disk: number; }; enableCircuitBreaker: boolean; enableResourceMonitoring: boolean; killTimeout: number; } export interface ExecutionContext { taskId: string; agentId: string; process?: ChildProcess; startTime: Date; timeout?: NodeJS.Timeout; resources: ResourceUsage; circuitBreaker?: CircuitBreaker; } export interface ResourceUsage { memory: number; cpu: number; disk: number; network: number; lastUpdated: Date; } export interface TaskExecutionResult { success: boolean; result?: TaskResult; error?: TaskError; executionTime: number; resourcesUsed: ResourceUsage; retryCount: number; } /** * Advanced task executor with comprehensive timeout and resource management */ export class AdvancedTaskExecutor extends EventEmitter { private logger: ILogger; private eventBus: IEventBus; private config: TaskExecutorConfig; private runningTasks = new Map<string, ExecutionContext>(); private circuitBreakerManager: CircuitBreakerManager; private resourceMonitor?: NodeJS.Timeout; private queuedTasks: TaskDefinition[] = []; private isShuttingDown = false; constructor(config: Partial<TaskExecutorConfig>, logger: ILogger, eventBus: IEventBus) { super(); this.logger = logger; this.eventBus = eventBus; this.config = { maxConcurrentTasks: 10, defaultTimeout: 300000, // 5 minutes retryAttempts: 3, retryBackoffBase: 1000, retryBackoffMax: 30000, resourceLimits: { memory: 512 * 1024 * 1024, // 512MB cpu: 1.0, // 1 CPU core disk: 1024 * 1024 * 1024, // 1GB }, enableCircuitBreaker: true, enableResourceMonitoring: true, killTimeout: 5000, ...config, }; // Initialize circuit breaker manager this.circuitBreakerManager = new CircuitBreakerManager( { failureThreshold: 5, successThreshold: 3, timeout: 60000, // 1 minute halfOpenLimit: 2, }, this.logger, this.eventBus, ); this.setupEventHandlers(); } private setupEventHandlers(): void { // Handle process events process.on('SIGTERM', () => this.gracefulShutdown()); process.on('SIGINT', () => this.gracefulShutdown()); // Handle circuit breaker events this.eventBus.on('circuitbreaker:state-change', (event) => { this.logger.info('Circuit breaker state changed', event); this.emit('circuit-breaker-changed', event); }); } async initialize(): Promise<void> { this.logger.info('Initializing advanced task executor', { maxConcurrentTasks: this.config.maxConcurrentTasks, defaultTimeout: this.config.defaultTimeout, resourceLimits: this.config.resourceLimits, }); if (this.config.enableResourceMonitoring) { this.startResourceMonitoring(); } this.emit('executor-initialized'); } async shutdown(): Promise<void> { this.logger.info('Shutting down task executor'); this.isShuttingDown = true; // Stop resource monitoring if (this.resourceMonitor) { clearInterval(this.resourceMonitor); } // Cancel all running tasks const cancelPromises = Array.from(this.runningTasks.values()).map((ctx) => this.cancelTask(ctx.taskId, 'Shutdown requested'), ); await Promise.all(cancelPromises); this.emit('executor-shutdown'); } /** * Execute a task with comprehensive error handling and resource management */ async executeTask( task: TaskDefinition, agent: AgentState, options: { timeout?: number; retryAttempts?: number; priority?: number; } = {}, ): Promise<TaskExecutionResult> { const startTime = Date.now(); let retryCount = 0; const maxRetries = options.retryAttempts ?? this.config.retryAttempts; const timeout = options.timeout ?? this.config.defaultTimeout; this.logger.info('Starting task execution', { taskId: task.id.id, agentId: agent.id.id, type: task.type, timeout, maxRetries, }); // Check if we have capacity if (this.runningTasks.size >= this.config.maxConcurrentTasks) { this.queuedTasks.push(task); this.logger.info('Task queued due to capacity limits', { taskId: task.id.id, queueSize: this.queuedTasks.length, }); // Wait for capacity await this.waitForCapacity(); } while (retryCount <= maxRetries) { try { const result = await this.executeSingleAttempt(task, agent, timeout, retryCount); this.logger.info('Task completed successfully', { taskId: task.id.id, executionTime: Date.now() - startTime, retryCount, }); return { success: true, result: result.result, executionTime: Date.now() - startTime, resourcesUsed: result.resourcesUsed, retryCount, }; } catch (error) { retryCount++; this.logger.warn('Task attempt failed', { taskId: task.id.id, attempt: retryCount, maxRetries, error: getErrorMessage(error), }); // Check if we should retry if (retryCount > maxRetries) { const taskError: TaskError = { type: 'execution_failed', message: getErrorMessage(error), stack: getErrorStack(error), context: { retryCount, maxRetries, taskType: task.type, }, recoverable: false, retryable: false, }; return { success: false, error: taskError, executionTime: Date.now() - startTime, resourcesUsed: this.getDefaultResourceUsage(), retryCount, }; } // Calculate backoff delay const backoffDelay = Math.min( this.config.retryBackoffBase * Math.pow(2, retryCount - 1), this.config.retryBackoffMax, ); this.logger.info('Retrying task after backoff', { taskId: task.id.id, backoffDelay, attempt: retryCount + 1, }); await this.delay(backoffDelay); } } // This should never be reached, but TypeScript requires it throw new Error('Unexpected end of retry loop'); } private async executeSingleAttempt( task: TaskDefinition, agent: AgentState, timeout: number, retryCount: number, ): Promise<{ result: TaskResult; resourcesUsed: ResourceUsage }> { const executionContext: ExecutionContext = { taskId: task.id.id, agentId: agent.id.id, startTime: new Date(), resources: this.getDefaultResourceUsage(), }; this.runningTasks.set(task.id.id, executionContext); try { // Set up timeout const timeoutPromise = new Promise<never>((_, reject) => { executionContext.timeout = setTimeout(() => { reject(new Error(`Task timeout after ${timeout}ms`)); }, timeout); }); // Set up circuit breaker if enabled if (this.config.enableCircuitBreaker) { executionContext.circuitBreaker = this.circuitBreakerManager.getBreaker( `agent-${agent.id.id}`, ); } // Execute task with circuit breaker protection const executionPromise = this.config.enableCircuitBreaker && executionContext.circuitBreaker ? executionContext.circuitBreaker.execute(() => this.performTaskExecution(task, agent, executionContext), ) : this.performTaskExecution(task, agent, executionContext); // Race between execution and timeout const result = await Promise.race([executionPromise, timeoutPromise]); // Clear timeout if (executionContext.timeout) { clearTimeout(executionContext.timeout); } return result; } finally { // Cleanup this.runningTasks.delete(task.id.id); // Process queued tasks this.processQueuedTasks(); } } private async performTaskExecution( task: TaskDefinition, agent: AgentState, context: ExecutionContext, ): Promise<{ result: TaskResult; resourcesUsed: ResourceUsage }> { const startTime = Date.now(); // Create task execution command const command = this.buildExecutionCommand(task, agent); this.logger.debug('Executing task command', { taskId: task.id.id, command: command.cmd, args: command.args, }); // Spawn process const childProcess = spawn(command.cmd, command.args, { stdio: ['pipe', 'pipe', 'pipe'], env: { ...process.env, ...command.env, TASK_ID: task.id.id, AGENT_ID: agent.id.id, TASK_TYPE: task.type, }, }); context.process = childProcess; // Collect output let stdout = ''; let stderr = ''; childProcess.stdout?.on('data', (data) => { stdout += data.toString(); }); childProcess.stderr?.on('data', (data) => { stderr += data.toString(); }); // Send input if provided if (task.input && childProcess.stdin) { childProcess.stdin.write( JSON.stringify({ task: task, agent: agent, input: task.input, }), ); childProcess.stdin.end(); } // Wait for process completion const exitCode = await new Promise<number>((resolve, reject) => { childProcess.on('exit', (code) => { resolve(code ?? 0); }); childProcess.on('error', (error) => { reject(new Error(`Process error: ${getErrorMessage(error)}`)); }); }); const executionTime = Date.now() - startTime; // Parse result let taskResult: TaskResult; if (exitCode === 0) { try { const output = JSON.parse(stdout); taskResult = { output: output.result || output, artifacts: output.artifacts || {}, metadata: output.metadata || {}, quality: output.quality || 0.8, completeness: output.completeness || 1.0, accuracy: output.accuracy || 0.9, executionTime, resourcesUsed: context.resources, validated: false, }; } catch (error) { taskResult = { output: stdout, artifacts: {}, metadata: { stderr }, quality: 0.5, completeness: 1.0, accuracy: 0.7, executionTime, resourcesUsed: context.resources, validated: false, }; } } else { throw new Error(`Task execution failed with exit code ${exitCode}: ${stderr}`); } return { result: taskResult, resourcesUsed: context.resources, }; } private buildExecutionCommand( task: TaskDefinition, agent: AgentState, ): { cmd: string; args: string[]; env: Record<string, string>; } { // This would be customized based on task type and agent capabilities // For now, return a default Claude execution command const cmd = 'deno'; const args = [ 'run', '--allow-all', '--no-check', './src/cli/commands/task-executor.ts', '--task-type', task.type, '--agent-type', agent.type, ]; const env = { TASK_TIMEOUT: (task.constraints.timeoutAfter || this.config.defaultTimeout).toString(), MEMORY_LIMIT: this.config.resourceLimits.memory.toString(), CPU_LIMIT: this.config.resourceLimits.cpu.toString(), }; return { cmd, args, env }; } private async cancelTask(taskId: string, reason: string): Promise<void> { const context = this.runningTasks.get(taskId); if (!context) { return; } this.logger.info('Cancelling task', { taskId, reason }); // Clear timeout if (context.timeout) { clearTimeout(context.timeout); } // Kill process if running if (context.process && !context.process.killed) { context.process.kill('SIGTERM'); // Force kill after timeout setTimeout(() => { if (context.process && !context.process.killed) { context.process.kill('SIGKILL'); } }, this.config.killTimeout); } // Remove from running tasks this.runningTasks.delete(taskId); this.emit('task-cancelled', { taskId, reason }); } private startResourceMonitoring(): void { this.resourceMonitor = setInterval(() => { this.updateResourceUsage(); }, 5000); // Update every 5 seconds } private async updateResourceUsage(): Promise<void> { for (const [taskId, context] of this.runningTasks) { if (context.process) { try { const usage = await this.getProcessResourceUsage(context.process.pid); context.resources = { ...usage, lastUpdated: new Date(), }; // Check resource limits this.checkResourceLimits(taskId, context); } catch (error) { this.logger.warn('Failed to get resource usage', { taskId, error: getErrorMessage(error), }); } } } } private async getProcessResourceUsage(pid: number | undefined): Promise<ResourceUsage> { if (!pid) { throw new Error('Process ID is undefined'); } // In a real implementation, this would use system APIs // For now, return mock data return { memory: Math.random() * this.config.resourceLimits.memory, cpu: Math.random() * 100, disk: Math.random() * this.config.resourceLimits.disk, network: Math.random() * 1024 * 1024, lastUpdated: new Date(), }; } private checkResourceLimits(taskId: string, context: ExecutionContext): void { const { memory, cpu } = context.resources; const limits = this.config.resourceLimits; if (memory > limits.memory) { this.logger.warn('Task exceeding memory limit', { taskId, current: memory, limit: limits.memory, }); this.cancelTask(taskId, 'Memory limit exceeded'); } if (cpu > limits.cpu * 100) { // CPU is in percentage this.logger.warn('Task exceeding CPU limit', { taskId, current: cpu, limit: limits.cpu * 100, }); } } private getDefaultResourceUsage(): ResourceUsage { return { memory: 0, cpu: 0, disk: 0, network: 0, lastUpdated: new Date(), }; } private async waitForCapacity(): Promise<void> { return new Promise((resolve) => { const check = () => { if (this.runningTasks.size < this.config.maxConcurrentTasks) { resolve(); } else { setTimeout(check, 1000); } }; check(); }); } private processQueuedTasks(): void { while (this.queuedTasks.length > 0 && this.runningTasks.size < this.config.maxConcurrentTasks) { const task = this.queuedTasks.shift(); if (task) { this.emit('task-dequeued', { taskId: task.id.id }); } } } private async delay(ms: number): Promise<void> { return new Promise((resolve) => setTimeout(resolve, ms)); } private async gracefulShutdown(): Promise<void> { this.logger.info('Received shutdown signal, initiating graceful shutdown'); await this.shutdown(); process.exit(0); } // Public API methods getRunningTasks(): string[] { return Array.from(this.runningTasks.keys()); } getTaskContext(taskId: string): ExecutionContext | undefined { return this.runningTasks.get(taskId); } getQueuedTasks(): TaskDefinition[] { return [...this.queuedTasks]; } getExecutorStats(): { runningTasks: number; queuedTasks: number; maxConcurrentTasks: number; totalCapacity: number; resourceLimits: typeof this.config.resourceLimits; circuitBreakers: Record<string, any>; } { return { runningTasks: this.runningTasks.size, queuedTasks: this.queuedTasks.length, maxConcurrentTasks: this.config.maxConcurrentTasks, totalCapacity: this.config.maxConcurrentTasks, resourceLimits: this.config.resourceLimits, circuitBreakers: this.circuitBreakerManager.getAllMetrics(), }; } async forceKillTask(taskId: string): Promise<void> { await this.cancelTask(taskId, 'Force killed by user'); } updateConfig(newConfig: Partial<TaskExecutorConfig>): void { this.config = { ...this.config, ...newConfig }; this.logger.info('Task executor configuration updated', { newConfig }); } }