claude-flow-tbowman01
Version:
Enterprise-grade AI agent orchestration with ruv-swarm integration (Alpha Release)
370 lines • 13 kB
JavaScript
/**
* Health Check System for Claude Flow v2.0.0
*/
import { SystemIntegration } from '../integration/system-integration.js';
import { getErrorMessage } from '../utils/error-handler.js';
export class HealthCheckManager {
eventBus;
logger;
systemIntegration;
config;
intervalId = null;
healthHistory = new Map();
isRunning = false;
lastMetrics = null;
constructor(eventBus, logger, config = {}) {
this.eventBus = eventBus;
this.logger = logger;
this.systemIntegration = SystemIntegration.getInstance();
this.config = {
interval: config.interval || 30000, // 30 seconds
timeout: config.timeout || 5000, // 5 seconds
retries: config.retries || 3,
enableMetrics: config.enableMetrics !== false,
enableAlerts: config.enableAlerts !== false,
};
this.setupEventHandlers();
}
/**
* Start health monitoring
*/
start() {
if (this.isRunning) {
this.logger.warn('Health check manager already running');
return;
}
this.logger.info('Starting health check monitoring');
this.isRunning = true;
// Perform initial health check
this.performHealthCheck();
// Set up periodic health checks
this.intervalId = setInterval(() => {
this.performHealthCheck();
}, this.config.interval);
this.eventBus.emit('health:monitor:started', {
interval: this.config.interval,
timestamp: Date.now(),
});
}
/**
* Stop health monitoring
*/
stop() {
if (!this.isRunning) {
return;
}
this.logger.info('Stopping health check monitoring');
this.isRunning = false;
if (this.intervalId) {
clearInterval(this.intervalId);
this.intervalId = null;
}
this.eventBus.emit('health:monitor:stopped', {
timestamp: Date.now(),
});
}
/**
* Perform comprehensive health check
*/
async performHealthCheck() {
const startTime = Date.now();
try {
this.logger.debug('Performing system health check');
// Get system health from integration manager
const systemHealth = await this.systemIntegration.getSystemHealth();
// Perform individual component checks
const componentChecks = await this.checkAllComponents();
// Collect system metrics if enabled
if (this.config.enableMetrics) {
this.lastMetrics = await this.collectSystemMetrics();
}
// Store health history
this.storeHealthHistory(componentChecks);
// Check for alerts
if (this.config.enableAlerts) {
await this.checkForAlerts(systemHealth);
}
const duration = Date.now() - startTime;
this.logger.debug(`Health check completed in ${duration}ms`);
// Emit health check event
this.eventBus.emit('health:check:completed', {
health: systemHealth,
metrics: this.lastMetrics,
duration,
timestamp: Date.now(),
});
return systemHealth;
}
catch (error) {
const duration = Date.now() - startTime;
this.logger.error('Health check failed:', getErrorMessage(error));
this.eventBus.emit('health:check:failed', {
error: getErrorMessage(error),
duration,
timestamp: Date.now(),
});
throw error;
}
}
/**
* Check all system components
*/
async checkAllComponents() {
const components = [
'orchestrator',
'configManager',
'memoryManager',
'agentManager',
'swarmCoordinator',
'taskEngine',
'monitor',
'mcpServer',
];
const checks = await Promise.allSettled(components.map((component) => this.checkComponent(component)));
return checks.map((result, index) => {
if (result.status === 'fulfilled') {
return result.value;
}
else {
return {
component: components[index],
healthy: false,
message: getErrorMessage(result.reason),
timestamp: Date.now(),
};
}
});
}
/**
* Check individual component health
*/
async checkComponent(componentName) {
const startTime = Date.now();
try {
const component = this.systemIntegration.getComponent(componentName);
if (!component) {
return {
component: componentName,
healthy: false,
message: 'Component not found',
timestamp: Date.now(),
};
}
// Try to call health check method if available
if (typeof component.healthCheck === 'function') {
const result = await Promise.race([
component.healthCheck(),
new Promise((_, reject) => setTimeout(() => reject(new Error('Health check timeout')), this.config.timeout)),
]);
return result;
}
// Basic availability check
const duration = Date.now() - startTime;
return {
component: componentName,
healthy: true,
message: 'Component available',
metrics: { responseTime: duration },
timestamp: Date.now(),
};
}
catch (error) {
return {
component: componentName,
healthy: false,
message: getErrorMessage(error),
timestamp: Date.now(),
};
}
}
/**
* Collect system metrics
*/
async collectSystemMetrics() {
const startTime = Date.now();
try {
// Get system resource usage
const memoryUsage = process.memoryUsage();
const cpuUsage = process.cpuUsage();
// Get component-specific metrics
const agentManager = this.systemIntegration.getComponent('agentManager');
const taskEngine = this.systemIntegration.getComponent('taskEngine');
let activeAgents = 0;
let activeTasks = 0;
let queuedTasks = 0;
let completedTasks = 0;
if (agentManager && typeof agentManager.getMetrics === 'function') {
const agentMetrics = await agentManager.getMetrics();
activeAgents = agentMetrics.activeAgents || 0;
}
if (taskEngine && typeof taskEngine.getMetrics === 'function') {
const taskMetrics = await taskEngine.getMetrics();
activeTasks = taskMetrics.activeTasks || 0;
queuedTasks = taskMetrics.queuedTasks || 0;
completedTasks = taskMetrics.completedTasks || 0;
}
return {
cpu: (cpuUsage.user + cpuUsage.system) / 1000000, // Convert to percentage
memory: (memoryUsage.heapUsed / memoryUsage.heapTotal) * 100,
network: 0, // Placeholder - would need additional monitoring
disk: 0, // Placeholder - would need additional monitoring
activeAgents,
activeTasks,
queuedTasks,
completedTasks,
errorCount: this.getErrorCount(),
uptime: process.uptime() * 1000,
timestamp: Date.now(),
};
}
catch (error) {
this.logger.error('Failed to collect system metrics:', getErrorMessage(error));
return {
cpu: 0,
memory: 0,
network: 0,
disk: 0,
activeAgents: 0,
activeTasks: 0,
queuedTasks: 0,
completedTasks: 0,
errorCount: 0,
uptime: process.uptime() * 1000,
timestamp: Date.now(),
};
}
}
/**
* Store health check history
*/
storeHealthHistory(results) {
const maxHistorySize = 100; // Keep last 100 health checks per component
results.forEach((result) => {
if (!this.healthHistory.has(result.component)) {
this.healthHistory.set(result.component, []);
}
const history = this.healthHistory.get(result.component);
history.push(result);
// Trim history if too large
if (history.length > maxHistorySize) {
history.splice(0, history.length - maxHistorySize);
}
});
}
/**
* Check for alerts and send notifications
*/
async checkForAlerts(health) {
const unhealthyComponents = Object.values(health.components).filter((component) => component.status === 'unhealthy');
if (unhealthyComponents.length > 0) {
const alert = {
type: 'component_failure',
severity: 'high',
message: `${unhealthyComponents.length} component(s) are unhealthy`,
components: unhealthyComponents.map((c) => c.component),
timestamp: Date.now(),
};
this.eventBus.emit('health:alert', alert);
this.logger.warn('Health alert triggered:', alert.message);
}
// Check system metrics for anomalies
if (this.lastMetrics) {
const alerts = [];
if (this.lastMetrics.cpu > 90) {
alerts.push({
type: 'high_cpu',
severity: 'medium',
message: `High CPU usage: ${this.lastMetrics.cpu.toFixed(1)}%`,
value: this.lastMetrics.cpu,
});
}
if (this.lastMetrics.memory > 90) {
alerts.push({
type: 'high_memory',
severity: 'medium',
message: `High memory usage: ${this.lastMetrics.memory.toFixed(1)}%`,
value: this.lastMetrics.memory,
});
}
if (this.lastMetrics.errorCount > 10) {
alerts.push({
type: 'high_errors',
severity: 'high',
message: `High error count: ${this.lastMetrics.errorCount}`,
value: this.lastMetrics.errorCount,
});
}
alerts.forEach((alert) => {
this.eventBus.emit('health:alert', {
...alert,
timestamp: Date.now(),
});
});
}
}
/**
* Get component health history
*/
getHealthHistory(component) {
if (component) {
return this.healthHistory.get(component) || [];
}
// Return all history
const allHistory = [];
for (const history of this.healthHistory.values()) {
allHistory.push(...history);
}
return allHistory.sort((a, b) => b.timestamp - a.timestamp);
}
/**
* Get current system metrics
*/
getCurrentMetrics() {
return this.lastMetrics;
}
/**
* Get system health status
*/
async getSystemHealth() {
return await this.systemIntegration.getSystemHealth();
}
/**
* Get error count from recent history
*/
getErrorCount() {
const recentTime = Date.now() - 300000; // Last 5 minutes
let errorCount = 0;
for (const history of this.healthHistory.values()) {
errorCount += history.filter((check) => check.timestamp > recentTime && !check.healthy).length;
}
return errorCount;
}
/**
* Setup event handlers
*/
setupEventHandlers() {
// Listen for component status changes
this.eventBus.on('component:status:updated', (status) => {
if (status.status === 'unhealthy') {
this.logger.warn(`Component ${status.component} became unhealthy: ${status.message}`);
}
});
// Listen for system errors
this.eventBus.on('system:error', (error) => {
this.logger.error('System error detected:', error);
});
}
/**
* Check if monitoring is running
*/
isMonitoring() {
return this.isRunning;
}
/**
* Get monitoring configuration
*/
getConfig() {
return { ...this.config };
}
}
//# sourceMappingURL=health-check.js.map