UNPKG

mega-minds

Version:

Enhanced multi-agent workflow system for Claude Code projects with automated handoff management and Claude Code hooks integration

824 lines (699 loc) 28.7 kB
// lib/enterprise/EnterpriseMonitoring.js // Advanced monitoring system with performance alerts and enterprise analytics // Phase 3.3: Enterprise Features - Production monitoring and alerting const fs = require('fs-extra'); const path = require('path'); const os = require('os'); /** * EnterpriseMonitoring provides advanced monitoring, alerting, and analytics * PRD Requirements: Performance monitoring, structured logging, enterprise analytics * Meets enterprise standards: <500ms monitoring, 99.9% uptime tracking */ class EnterpriseMonitoring { constructor(projectPath, options = {}) { this.projectPath = projectPath; this.options = { enableMonitoring: false, // Default: basic monitoring only (backward compatible) metricsRetention: options.metricsRetention || 30, // days alertThresholds: { memoryWarning: options.memoryWarning || 2048, // MB memoryCritical: options.memoryCritical || 3072, // MB responseTimeWarning: options.responseTimeWarning || 2000, // ms responseTimeCritical: options.responseTimeCritical || 5000, // ms errorRateWarning: options.errorRateWarning || 5, // % errorRateCritical: options.errorRateCritical || 10, // % diskSpaceWarning: options.diskSpaceWarning || 85, // % diskSpaceCritical: options.diskSpaceCritical || 95 // % }, monitoringInterval: options.monitoringInterval || 5000, // 5 seconds analyticsEnabled: options.analyticsEnabled || false, ...options }; // Enterprise monitoring directory structure this.enterpriseDir = path.join(projectPath, '.mega-minds', 'enterprise'); this.monitoringDir = path.join(this.enterpriseDir, 'monitoring'); this.metricsDir = path.join(this.monitoringDir, 'metrics'); this.alertsDir = path.join(this.monitoringDir, 'alerts'); this.analyticsDir = path.join(this.monitoringDir, 'analytics'); // Monitoring state this.monitoringActive = false; this.monitoringInterval = null; this.metricsBuffer = []; this.alertHistory = []; this.performanceBaseline = null; // Real-time metrics cache this.currentMetrics = { system: {}, application: {}, agents: {}, quality: {}, timestamp: null }; // Alert callbacks this.alertCallbacks = new Map(); this.initialized = false; } /** * Initialize enterprise monitoring system * Only enables advanced features if explicitly requested */ async initialize(enableAdvancedMonitoring = false) { try { // Ensure monitoring directories exist await fs.ensureDir(this.monitoringDir); await fs.ensureDir(this.metricsDir); await fs.ensureDir(this.alertsDir); await fs.ensureDir(this.analyticsDir); // Enable advanced monitoring if requested if (enableAdvancedMonitoring) { this.options.enableMonitoring = true; console.log('📊 Enterprise monitoring enabled'); } // Load performance baseline await this.loadPerformanceBaseline(); // Start monitoring if enabled if (this.options.enableMonitoring) { await this.startMonitoring(); } this.initialized = true; return true; } catch (error) { console.warn('⚠️ EnterpriseMonitoring initialization warning:', error.message); // Graceful degradation - continue with basic monitoring this.options.enableMonitoring = false; return false; } } /** * Check if advanced monitoring is enabled */ isAdvancedMonitoringEnabled() { return this.options.enableMonitoring; } /** * Start monitoring system */ async startMonitoring() { if (this.monitoringActive) { return; } console.log('📊 Starting enterprise monitoring system...'); this.monitoringActive = true; // Start periodic metrics collection this.monitoringInterval = setInterval(async () => { await this.collectMetrics(); }, this.options.monitoringInterval); // Initial metrics collection await this.collectMetrics(); console.log(`✅ Enterprise monitoring active (${this.options.monitoringInterval}ms interval)`); } /** * Stop monitoring system */ stopMonitoring() { if (this.monitoringInterval) { clearInterval(this.monitoringInterval); this.monitoringInterval = null; } this.monitoringActive = false; console.log('📊 Enterprise monitoring stopped'); } /** * Collect comprehensive metrics */ async collectMetrics() { const startTime = Date.now(); try { // System metrics const systemMetrics = await this.collectSystemMetrics(); // Application metrics const appMetrics = await this.collectApplicationMetrics(); // Agent metrics const agentMetrics = await this.collectAgentMetrics(); // Quality metrics const qualityMetrics = await this.collectQualityMetrics(); // Compile current metrics this.currentMetrics = { system: systemMetrics, application: appMetrics, agents: agentMetrics, quality: qualityMetrics, timestamp: new Date().toISOString(), collectionTime: Date.now() - startTime }; // Add to buffer for batch processing this.metricsBuffer.push(this.currentMetrics); // Process alerts await this.processAlerts(this.currentMetrics); // Flush buffer if needed (every 12 collections = 1 minute at 5s interval) if (this.metricsBuffer.length >= 12) { await this.flushMetricsBuffer(); } // Ensure collection time meets performance requirement (<500ms) const collectionTime = Date.now() - startTime; if (collectionTime > 500) { console.warn(`⚠️ Metrics collection slow: ${collectionTime}ms (target: <500ms)`); } } catch (error) { console.warn('⚠️ Error collecting metrics:', error.message); } } /** * Collect system-level metrics */ async collectSystemMetrics() { const memUsage = process.memoryUsage(); const cpuUsage = process.cpuUsage(); // Disk space check let diskSpace = null; try { const stats = await fs.stat(this.projectPath); diskSpace = { total: stats.size || 0, available: stats.size || 0, // Simplified - would need OS-specific calls for accurate data used: 0, usedPercent: 0 }; } catch (error) { // Disk space check failed - not critical } return { memory: { heapUsed: Math.round(memUsage.heapUsed / 1024 / 1024), // MB heapTotal: Math.round(memUsage.heapTotal / 1024 / 1024), // MB external: Math.round(memUsage.external / 1024 / 1024), // MB rss: Math.round(memUsage.rss / 1024 / 1024), // MB usagePercent: Math.round((memUsage.heapUsed / memUsage.heapTotal) * 100) }, cpu: { user: cpuUsage.user, system: cpuUsage.system }, uptime: Math.round(process.uptime()), loadAverage: os.loadavg(), platform: os.platform(), nodeVersion: process.version, diskSpace: diskSpace }; } /** * Collect application-specific metrics */ async collectApplicationMetrics() { try { // Check for active mega-minds state files const stateDir = path.join(this.projectPath, '.mega-minds', 'state'); let stateFiles = 0; let sessionFiles = 0; if (await fs.pathExists(stateDir)) { const files = await fs.readdir(stateDir); stateFiles = files.length; } const sessionsDir = path.join(this.projectPath, '.mega-minds', 'sessions'); if (await fs.pathExists(sessionsDir)) { const files = await fs.readdir(sessionsDir); sessionFiles = files.length; } return { stateFiles: stateFiles, sessionFiles: sessionFiles, projectPath: this.projectPath, megamindsVersion: '2.0', // Would typically read from package.json lastActivity: this.currentMetrics.timestamp }; } catch (error) { return { stateFiles: 0, sessionFiles: 0, error: error.message }; } } /** * Collect agent-specific metrics */ async collectAgentMetrics() { try { const agentStateFile = path.join(this.projectPath, '.mega-minds', 'state', 'active-agents.json'); if (await fs.pathExists(agentStateFile)) { const agentData = await fs.readJSON(agentStateFile); return { activeCount: agentData.totalActiveCount || 0, agents: agentData.activeAgents || {}, lastUpdate: agentData.lastUpdate }; } return { activeCount: 0, agents: {}, lastUpdate: null }; } catch (error) { return { activeCount: 0, agents: {}, error: error.message }; } } /** * Collect quality gate metrics */ async collectQualityMetrics() { try { const qualityDir = path.join(this.projectPath, '.mega-minds', 'quality', 'reports'); if (await fs.pathExists(qualityDir)) { const files = await fs.readdir(qualityDir); const jsonFiles = files.filter(f => f.endsWith('.json')).sort().reverse(); if (jsonFiles.length > 0) { const latestReport = path.join(qualityDir, jsonFiles[0]); const qualityData = await fs.readJSON(latestReport); return { overallScore: qualityData.overall?.score || 0, passed: qualityData.overall?.passed || false, gatesCount: Object.keys(qualityData.gates || {}).length, lastRun: qualityData.timestamp, reportCount: jsonFiles.length }; } } return { overallScore: 100, passed: true, gatesCount: 0, lastRun: null, reportCount: 0 }; } catch (error) { return { overallScore: 0, passed: false, error: error.message }; } } /** * Process alerts based on current metrics */ async processAlerts(metrics) { const alerts = []; const thresholds = this.options.alertThresholds; // Memory alerts const memoryMB = metrics.system.memory.heapUsed; if (memoryMB >= thresholds.memoryCritical) { alerts.push({ type: 'memory', level: 'critical', message: `Memory usage critical: ${memoryMB}MB (threshold: ${thresholds.memoryCritical}MB)`, value: memoryMB, threshold: thresholds.memoryCritical }); } else if (memoryMB >= thresholds.memoryWarning) { alerts.push({ type: 'memory', level: 'warning', message: `Memory usage high: ${memoryMB}MB (threshold: ${thresholds.memoryWarning}MB)`, value: memoryMB, threshold: thresholds.memoryWarning }); } // Collection time alerts (performance requirement: <500ms) if (metrics.collectionTime >= 500) { alerts.push({ type: 'performance', level: 'warning', message: `Metrics collection slow: ${metrics.collectionTime}ms (target: <500ms)`, value: metrics.collectionTime, threshold: 500 }); } // Quality gate alerts if (!metrics.quality.passed) { alerts.push({ type: 'quality', level: 'warning', message: `Quality gates failing (score: ${metrics.quality.overallScore})`, value: metrics.quality.overallScore, threshold: 85 }); } // Disk space alerts (if available) if (metrics.system.diskSpace && metrics.system.diskSpace.usedPercent) { const diskUsed = metrics.system.diskSpace.usedPercent; if (diskUsed >= thresholds.diskSpaceCritical) { alerts.push({ type: 'disk', level: 'critical', message: `Disk space critical: ${diskUsed}% used`, value: diskUsed, threshold: thresholds.diskSpaceCritical }); } else if (diskUsed >= thresholds.diskSpaceWarning) { alerts.push({ type: 'disk', level: 'warning', message: `Disk space low: ${diskUsed}% used`, value: diskUsed, threshold: thresholds.diskSpaceWarning }); } } // Process and store alerts for (const alert of alerts) { await this.handleAlert(alert); } } /** * Handle alert processing and callbacks */ async handleAlert(alert) { const alertWithMetadata = { ...alert, id: require('crypto').randomUUID(), timestamp: new Date().toISOString(), projectPath: this.projectPath, acknowledged: false }; // Add to history this.alertHistory.push(alertWithMetadata); // Keep only last 100 alerts in memory if (this.alertHistory.length > 100) { this.alertHistory = this.alertHistory.slice(-100); } // Save alert to disk await this.saveAlert(alertWithMetadata); // Call registered callbacks const callbacks = this.alertCallbacks.get(alert.type) || []; callbacks.forEach(callback => { try { callback(alertWithMetadata); } catch (error) { console.warn('⚠️ Alert callback error:', error.message); } }); // Log to console for immediate visibility const emoji = alert.level === 'critical' ? '🚨' : '⚠️'; console.log(`${emoji} ALERT [${alert.level.toUpperCase()}]: ${alert.message}`); } /** * Register alert callback */ onAlert(alertType, callback) { if (!this.alertCallbacks.has(alertType)) { this.alertCallbacks.set(alertType, []); } this.alertCallbacks.get(alertType).push(callback); } /** * Get current metrics (for dashboard integration) */ getCurrentMetrics() { return this.currentMetrics; } /** * Get recent alerts */ getRecentAlerts(limit = 10) { return this.alertHistory .slice(-limit) .sort((a, b) => new Date(b.timestamp) - new Date(a.timestamp)); } /** * Get system health status */ getHealthStatus() { const metrics = this.currentMetrics; const thresholds = this.options.alertThresholds; let status = 'healthy'; const issues = []; if (metrics.system?.memory?.heapUsed >= thresholds.memoryCritical) { status = 'critical'; issues.push('Memory usage critical'); } else if (metrics.system?.memory?.heapUsed >= thresholds.memoryWarning) { status = status === 'healthy' ? 'warning' : status; issues.push('Memory usage high'); } if (metrics.collectionTime >= 500) { status = status === 'healthy' ? 'warning' : status; issues.push('Performance degraded'); } if (!metrics.quality?.passed) { status = status === 'healthy' ? 'warning' : status; issues.push('Quality gates failing'); } return { status: status, issues: issues, uptime: metrics.system?.uptime || 0, memoryUsage: metrics.system?.memory?.heapUsed || 0, qualityScore: metrics.quality?.overallScore || 0, lastUpdate: metrics.timestamp, monitoring: this.monitoringActive }; } /** * Save alert to disk */ async saveAlert(alert) { try { const today = new Date().toISOString().split('T')[0]; const alertFile = path.join(this.alertsDir, `${today}.json`); let alerts = []; if (await fs.pathExists(alertFile)) { alerts = await fs.readJSON(alertFile); } alerts.push(alert); await fs.writeJSON(alertFile, alerts, { spaces: 2 }); } catch (error) { console.warn('⚠️ Failed to save alert:', error.message); } } /** * Flush metrics buffer to disk */ async flushMetricsBuffer() { if (this.metricsBuffer.length === 0) { return; } try { const timestamp = new Date(); const dateStr = timestamp.toISOString().split('T')[0]; const hourStr = timestamp.getHours().toString().padStart(2, '0'); const metricsFile = path.join(this.metricsDir, `${dateStr}-${hourStr}.json`); let existingMetrics = []; if (await fs.pathExists(metricsFile)) { existingMetrics = await fs.readJSON(metricsFile); } existingMetrics.push(...this.metricsBuffer); await fs.writeJSON(metricsFile, existingMetrics, { spaces: 2 }); // Clear buffer this.metricsBuffer = []; } catch (error) { console.warn('⚠️ Failed to flush metrics buffer:', error.message); } } /** * Load performance baseline */ async loadPerformanceBaseline() { try { const baselineFile = path.join(this.monitoringDir, 'performance-baseline.json'); if (await fs.pathExists(baselineFile)) { this.performanceBaseline = await fs.readJSON(baselineFile); } else { // Create initial baseline this.performanceBaseline = { memoryBaseline: 512, // MB responseTimeBaseline: 1000, // ms qualityScoreBaseline: 90, createdAt: new Date().toISOString() }; await fs.writeJSON(baselineFile, this.performanceBaseline, { spaces: 2 }); } } catch (error) { console.warn('⚠️ Error loading performance baseline:', error.message); } } /** * Generate analytics report */ async generateAnalyticsReport(days = 7) { if (!this.options.analyticsEnabled) { throw new Error('Analytics not enabled'); } try { const endDate = new Date(); const startDate = new Date(endDate.getTime() - (days * 24 * 60 * 60 * 1000)); const report = { period: { start: startDate.toISOString(), end: endDate.toISOString(), days: days }, summary: { averageMemoryUsage: 0, peakMemoryUsage: 0, averageQualityScore: 0, totalAlerts: 0, uptime: 0 }, trends: { memory: [], quality: [], alerts: [] }, generated: new Date().toISOString() }; // Collect metrics from files const metricsData = await this.loadMetricsForPeriod(startDate, endDate); if (metricsData.length > 0) { // Calculate averages const memoryValues = metricsData.map(m => m.system?.memory?.heapUsed || 0); const qualityValues = metricsData.map(m => m.quality?.overallScore || 0); report.summary.averageMemoryUsage = Math.round(memoryValues.reduce((a, b) => a + b, 0) / memoryValues.length); report.summary.peakMemoryUsage = Math.max(...memoryValues); report.summary.averageQualityScore = Math.round(qualityValues.reduce((a, b) => a + b, 0) / qualityValues.length); // Generate trends (daily aggregates) const dailyData = this.aggregateMetricsByDay(metricsData); report.trends.memory = dailyData.map(d => ({ date: d.date, value: d.avgMemory })); report.trends.quality = dailyData.map(d => ({ date: d.date, value: d.avgQuality })); } // Count alerts const alertsData = await this.loadAlertsForPeriod(startDate, endDate); report.summary.totalAlerts = alertsData.length; // Group alerts by day const alertsByDay = {}; alertsData.forEach(alert => { const day = alert.timestamp.split('T')[0]; alertsByDay[day] = (alertsByDay[day] || 0) + 1; }); report.trends.alerts = Object.entries(alertsByDay).map(([date, count]) => ({ date, value: count })); // Save report const reportFile = path.join(this.analyticsDir, `analytics-report-${endDate.toISOString().split('T')[0]}.json`); await fs.writeJSON(reportFile, report, { spaces: 2 }); return report; } catch (error) { console.error('❌ Failed to generate analytics report:', error.message); throw error; } } /** * Load metrics for a specific time period */ async loadMetricsForPeriod(startDate, endDate) { const metrics = []; try { const files = await fs.readdir(this.metricsDir); const jsonFiles = files.filter(f => f.endsWith('.json')).sort(); for (const file of jsonFiles) { const filePath = path.join(this.metricsDir, file); const fileMetrics = await fs.readJSON(filePath); // Filter by date range const filtered = fileMetrics.filter(m => { const timestamp = new Date(m.timestamp); return timestamp >= startDate && timestamp <= endDate; }); metrics.push(...filtered); } } catch (error) { console.warn('⚠️ Error loading metrics:', error.message); } return metrics; } /** * Load alerts for a specific time period */ async loadAlertsForPeriod(startDate, endDate) { const alerts = []; try { const files = await fs.readdir(this.alertsDir); const jsonFiles = files.filter(f => f.endsWith('.json')).sort(); for (const file of jsonFiles) { const filePath = path.join(this.alertsDir, file); const fileAlerts = await fs.readJSON(filePath); // Filter by date range const filtered = fileAlerts.filter(a => { const timestamp = new Date(a.timestamp); return timestamp >= startDate && timestamp <= endDate; }); alerts.push(...filtered); } } catch (error) { console.warn('⚠️ Error loading alerts:', error.message); } return alerts; } /** * Aggregate metrics by day */ aggregateMetricsByDay(metrics) { const dailyData = {}; metrics.forEach(metric => { const day = metric.timestamp.split('T')[0]; if (!dailyData[day]) { dailyData[day] = { date: day, memoryValues: [], qualityValues: [] }; } if (metric.system?.memory?.heapUsed) { dailyData[day].memoryValues.push(metric.system.memory.heapUsed); } if (metric.quality?.overallScore) { dailyData[day].qualityValues.push(metric.quality.overallScore); } }); // Calculate averages return Object.values(dailyData).map(day => ({ date: day.date, avgMemory: day.memoryValues.length > 0 ? Math.round(day.memoryValues.reduce((a, b) => a + b, 0) / day.memoryValues.length) : 0, avgQuality: day.qualityValues.length > 0 ? Math.round(day.qualityValues.reduce((a, b) => a + b, 0) / day.qualityValues.length) : 0 })); } /** * Cleanup old monitoring data */ async cleanup() { try { const cutoffDate = new Date(); cutoffDate.setDate(cutoffDate.getDate() - this.options.metricsRetention); // Cleanup old metrics files const metricsFiles = await fs.readdir(this.metricsDir); for (const file of metricsFiles) { if (file.endsWith('.json')) { const fileDate = file.split('-')[0]; if (new Date(fileDate) < cutoffDate) { await fs.remove(path.join(this.metricsDir, file)); } } } // Cleanup old alert files const alertFiles = await fs.readdir(this.alertsDir); for (const file of alertFiles) { if (file.endsWith('.json')) { const fileDate = file.replace('.json', ''); if (new Date(fileDate) < cutoffDate) { await fs.remove(path.join(this.alertsDir, file)); } } } } catch (error) { console.warn('⚠️ Error during monitoring cleanup:', error.message); } } /** * Shutdown monitoring system */ async shutdown() { this.stopMonitoring(); // Flush any remaining metrics await this.flushMetricsBuffer(); // Cleanup old data await this.cleanup(); console.log('📊 EnterpriseMonitoring shutdown complete'); } } module.exports = EnterpriseMonitoring;