claude-flow-tbowman01
Version:
Enterprise-grade AI agent orchestration with ruv-swarm integration (Alpha Release)
449 lines (373 loc) • 14.5 kB
text/typescript
/**
* Coordination manager for task scheduling and resource management
*/
import { Task, CoordinationConfig, SystemEvents } from '../utils/types.js';
import type { IEventBus } from '../core/event-bus.js';
import type { ILogger } from '../core/logger.js';
import { CoordinationError, DeadlockError } from '../utils/errors.js';
import { TaskScheduler } from './scheduler.js';
import { ResourceManager } from './resources.js';
import { MessageRouter } from './messaging.js';
import { AdvancedTaskScheduler } from './advanced-scheduler.js';
import { ConflictResolver } from './conflict-resolution.js';
import { CoordinationMetricsCollector } from './metrics.js';
export interface ICoordinationManager {
initialize(): Promise<void>;
shutdown(): Promise<void>;
assignTask(task: Task, agentId: string): Promise<void>;
getAgentTaskCount(agentId: string): Promise<number>;
getAgentTasks(agentId: string): Promise<Task[]>;
cancelTask(taskId: string, reason?: string): Promise<void>;
acquireResource(resourceId: string, agentId: string): Promise<void>;
releaseResource(resourceId: string, agentId: string): Promise<void>;
sendMessage(from: string, to: string, message: unknown): Promise<void>;
getHealthStatus(): Promise<{
healthy: boolean;
error?: string;
metrics?: Record<string, number>;
}>;
performMaintenance(): Promise<void>;
getCoordinationMetrics(): Promise<Record<string, unknown>>;
enableAdvancedScheduling(): void;
reportConflict(type: 'resource' | 'task', id: string, agents: string[]): Promise<void>;
}
/**
* Coordination manager implementation
*/
export class CoordinationManager implements ICoordinationManager {
private scheduler: TaskScheduler;
private resourceManager: ResourceManager;
private messageRouter: MessageRouter;
private conflictResolver: ConflictResolver;
private metricsCollector: CoordinationMetricsCollector;
private initialized = false;
private deadlockCheckInterval?: ReturnType<typeof setInterval>;
private advancedSchedulingEnabled = false;
constructor(
private config: CoordinationConfig,
private eventBus: IEventBus,
private logger: ILogger,
) {
this.scheduler = new TaskScheduler(config, eventBus, logger);
this.resourceManager = new ResourceManager(config, eventBus, logger);
this.messageRouter = new MessageRouter(config, eventBus, logger);
this.conflictResolver = new ConflictResolver(logger, eventBus);
this.metricsCollector = new CoordinationMetricsCollector(logger, eventBus);
}
async initialize(): Promise<void> {
if (this.initialized) {
return;
}
this.logger.info('Initializing coordination manager...');
try {
// Initialize components
await this.scheduler.initialize();
await this.resourceManager.initialize();
await this.messageRouter.initialize();
// Start metrics collection
this.metricsCollector.start();
// Start deadlock detection if enabled
if (this.config.deadlockDetection) {
this.startDeadlockDetection();
}
// Set up event handlers
this.setupEventHandlers();
this.initialized = true;
this.logger.info('Coordination manager initialized');
} catch (error) {
this.logger.error('Failed to initialize coordination manager', error);
throw new CoordinationError('Coordination manager initialization failed', { error });
}
}
async shutdown(): Promise<void> {
if (!this.initialized) {
return;
}
this.logger.info('Shutting down coordination manager...');
try {
// Stop deadlock detection
if (this.deadlockCheckInterval) {
clearInterval(this.deadlockCheckInterval);
}
// Stop metrics collection
this.metricsCollector.stop();
// Shutdown components
await Promise.all([
this.scheduler.shutdown(),
this.resourceManager.shutdown(),
this.messageRouter.shutdown(),
]);
this.initialized = false;
this.logger.info('Coordination manager shutdown complete');
} catch (error) {
this.logger.error('Error during coordination manager shutdown', error);
throw error;
}
}
async assignTask(task: Task, agentId: string): Promise<void> {
if (!this.initialized) {
throw new CoordinationError('Coordination manager not initialized');
}
await this.scheduler.assignTask(task, agentId);
}
async getAgentTaskCount(agentId: string): Promise<number> {
if (!this.initialized) {
throw new CoordinationError('Coordination manager not initialized');
}
return this.scheduler.getAgentTaskCount(agentId);
}
async acquireResource(resourceId: string, agentId: string): Promise<void> {
if (!this.initialized) {
throw new CoordinationError('Coordination manager not initialized');
}
await this.resourceManager.acquire(resourceId, agentId);
}
async releaseResource(resourceId: string, agentId: string): Promise<void> {
if (!this.initialized) {
throw new CoordinationError('Coordination manager not initialized');
}
await this.resourceManager.release(resourceId, agentId);
}
async sendMessage(from: string, to: string, message: unknown): Promise<void> {
if (!this.initialized) {
throw new CoordinationError('Coordination manager not initialized');
}
await this.messageRouter.send(from, to, message);
}
async getHealthStatus(): Promise<{
healthy: boolean;
error?: string;
metrics?: Record<string, number>;
}> {
try {
const [schedulerHealth, resourceHealth, messageHealth] = await Promise.all([
this.scheduler.getHealthStatus(),
this.resourceManager.getHealthStatus(),
this.messageRouter.getHealthStatus(),
]);
const metrics = {
...schedulerHealth.metrics,
...resourceHealth.metrics,
...messageHealth.metrics,
};
const healthy = schedulerHealth.healthy && resourceHealth.healthy && messageHealth.healthy;
const errors = [schedulerHealth.error, resourceHealth.error, messageHealth.error].filter(
Boolean,
);
const status: { healthy: boolean; error?: string; metrics?: Record<string, number> } = {
healthy,
metrics,
};
if (errors.length > 0) {
status.error = errors.join('; ');
}
return status;
} catch (error) {
return {
healthy: false,
error: error instanceof Error ? error.message : 'Unknown error',
};
}
}
private setupEventHandlers(): void {
// Handle task events
this.eventBus.on(SystemEvents.TASK_COMPLETED, async (data: unknown) => {
const { taskId, result } = data as { taskId: string; result: unknown };
try {
await this.scheduler.completeTask(taskId, result);
} catch (error) {
this.logger.error('Error handling task completion', { taskId, error });
}
});
this.eventBus.on(SystemEvents.TASK_FAILED, async (data: unknown) => {
const { taskId, error } = data as { taskId: string; error: Error };
try {
await this.scheduler.failTask(taskId, error);
} catch (err) {
this.logger.error('Error handling task failure', { taskId, error: err });
}
});
// Handle agent termination
this.eventBus.on(SystemEvents.AGENT_TERMINATED, async (data: unknown) => {
const { agentId } = data as { agentId: string };
try {
// Release all resources held by the agent
await this.resourceManager.releaseAllForAgent(agentId);
// Cancel all tasks assigned to the agent
await this.scheduler.cancelAgentTasks(agentId);
} catch (error) {
this.logger.error('Error handling agent termination', { agentId, error });
}
});
}
private startDeadlockDetection(): void {
this.deadlockCheckInterval = setInterval(async () => {
try {
const deadlock = await this.detectDeadlock();
if (deadlock) {
this.logger.error('Deadlock detected', deadlock);
// Emit deadlock event
this.eventBus.emit(SystemEvents.DEADLOCK_DETECTED, deadlock);
// Attempt to resolve deadlock
await this.resolveDeadlock(deadlock);
}
} catch (error) {
this.logger.error('Error during deadlock detection', error);
}
}, 10000); // Check every 10 seconds
}
private async detectDeadlock(): Promise<{
agents: string[];
resources: string[];
} | null> {
// Get resource allocation graph
const allocations = await this.resourceManager.getAllocations();
const waitingFor = await this.resourceManager.getWaitingRequests();
// Build dependency graph
const graph = new Map<string, Set<string>>();
// Add edges for resources agents are waiting for
for (const [agentId, resources] of waitingFor) {
if (!graph.has(agentId)) {
graph.set(agentId, new Set());
}
// Find who owns these resources
for (const resource of resources) {
const owner = allocations.get(resource);
if (owner && owner !== agentId) {
graph.get(agentId)!.add(owner);
}
}
}
// Detect cycles using DFS
const visited = new Set<string>();
const recursionStack = new Set<string>();
const cycle: string[] = [];
const hasCycle = (node: string): boolean => {
visited.add(node);
recursionStack.add(node);
const neighbors = graph.get(node) || new Set();
for (const neighbor of neighbors) {
if (!visited.has(neighbor)) {
if (hasCycle(neighbor)) {
cycle.unshift(node);
return true;
}
} else if (recursionStack.has(neighbor)) {
cycle.unshift(node);
cycle.unshift(neighbor);
return true;
}
}
recursionStack.delete(node);
return false;
};
// Check for cycles
for (const node of graph.keys()) {
if (!visited.has(node) && hasCycle(node)) {
// Extract unique agents in cycle
const agents = Array.from(new Set(cycle));
// Find resources involved
const resources: string[] = [];
for (const agent of agents) {
const waiting = waitingFor.get(agent) || [];
resources.push(...waiting);
}
return {
agents,
resources: Array.from(new Set(resources)),
};
}
}
return null;
}
private async resolveDeadlock(deadlock: {
agents: string[];
resources: string[];
}): Promise<void> {
this.logger.warn('Attempting to resolve deadlock', deadlock);
// Simple resolution: release resources from the lowest priority agent
// In a real implementation, use more sophisticated strategies
try {
// Find the agent with the lowest priority or least work done
const agentToPreempt = deadlock.agents[0]; // Simplified
// Release all resources held by this agent
await this.resourceManager.releaseAllForAgent(agentToPreempt);
// Reschedule the agent's tasks
await this.scheduler.rescheduleAgentTasks(agentToPreempt);
this.logger.info('Deadlock resolved by preempting agent', {
agentId: agentToPreempt,
});
} catch (error) {
throw new DeadlockError('Failed to resolve deadlock', deadlock.agents, deadlock.resources);
}
}
async getAgentTasks(agentId: string): Promise<Task[]> {
if (!this.initialized) {
throw new CoordinationError('Coordination manager not initialized');
}
return this.scheduler.getAgentTasks(agentId);
}
async cancelTask(taskId: string, reason?: string): Promise<void> {
if (!this.initialized) {
throw new CoordinationError('Coordination manager not initialized');
}
await this.scheduler.cancelTask(taskId, reason || 'User requested cancellation');
}
async performMaintenance(): Promise<void> {
if (!this.initialized) {
return;
}
this.logger.debug('Performing coordination manager maintenance');
try {
await Promise.all([
this.scheduler.performMaintenance(),
this.resourceManager.performMaintenance(),
this.messageRouter.performMaintenance(),
]);
// Clean up old conflicts
this.conflictResolver.cleanupOldConflicts(24 * 60 * 60 * 1000); // 24 hours
} catch (error) {
this.logger.error('Error during coordination manager maintenance', error);
}
}
async getCoordinationMetrics(): Promise<Record<string, unknown>> {
const baseMetrics = await this.getHealthStatus();
const coordinationMetrics = this.metricsCollector.getCurrentMetrics();
const conflictStats = this.conflictResolver.getStats();
return {
...baseMetrics.metrics,
coordination: coordinationMetrics,
conflicts: conflictStats,
advancedScheduling: this.advancedSchedulingEnabled,
};
}
enableAdvancedScheduling(): void {
if (this.advancedSchedulingEnabled) {
return;
}
this.logger.info('Enabling advanced scheduling features');
// Replace basic scheduler with advanced one
const advancedScheduler = new AdvancedTaskScheduler(this.config, this.eventBus, this.logger);
// Transfer state if needed (in a real implementation)
this.scheduler = advancedScheduler;
this.advancedSchedulingEnabled = true;
}
async reportConflict(type: 'resource' | 'task', id: string, agents: string[]): Promise<void> {
this.logger.warn('Conflict reported', { type, id, agents });
let conflict;
if (type === 'resource') {
conflict = await this.conflictResolver.reportResourceConflict(id, agents);
} else {
conflict = await this.conflictResolver.reportTaskConflict(id, agents, 'assignment');
}
// Auto-resolve using default strategy
try {
await this.conflictResolver.autoResolve(conflict.id);
} catch (error) {
this.logger.error('Failed to auto-resolve conflict', {
conflictId: conflict.id,
error,
});
}
}
}