mega-minds
Version:
Enhanced multi-agent workflow system for Claude Code projects with automated handoff management and Claude Code hooks integration
824 lines (699 loc) • 28.7 kB
JavaScript
// lib/enterprise/EnterpriseMonitoring.js
// Advanced monitoring system with performance alerts and enterprise analytics
// Phase 3.3: Enterprise Features - Production monitoring and alerting
const fs = require('fs-extra');
const path = require('path');
const os = require('os');
/**
* EnterpriseMonitoring provides advanced monitoring, alerting, and analytics
* PRD Requirements: Performance monitoring, structured logging, enterprise analytics
* Meets enterprise standards: <500ms monitoring, 99.9% uptime tracking
*/
class EnterpriseMonitoring {
constructor(projectPath, options = {}) {
this.projectPath = projectPath;
this.options = {
enableMonitoring: false, // Default: basic monitoring only (backward compatible)
metricsRetention: options.metricsRetention || 30, // days
alertThresholds: {
memoryWarning: options.memoryWarning || 2048, // MB
memoryCritical: options.memoryCritical || 3072, // MB
responseTimeWarning: options.responseTimeWarning || 2000, // ms
responseTimeCritical: options.responseTimeCritical || 5000, // ms
errorRateWarning: options.errorRateWarning || 5, // %
errorRateCritical: options.errorRateCritical || 10, // %
diskSpaceWarning: options.diskSpaceWarning || 85, // %
diskSpaceCritical: options.diskSpaceCritical || 95 // %
},
monitoringInterval: options.monitoringInterval || 5000, // 5 seconds
analyticsEnabled: options.analyticsEnabled || false,
...options
};
// Enterprise monitoring directory structure
this.enterpriseDir = path.join(projectPath, '.mega-minds', 'enterprise');
this.monitoringDir = path.join(this.enterpriseDir, 'monitoring');
this.metricsDir = path.join(this.monitoringDir, 'metrics');
this.alertsDir = path.join(this.monitoringDir, 'alerts');
this.analyticsDir = path.join(this.monitoringDir, 'analytics');
// Monitoring state
this.monitoringActive = false;
this.monitoringInterval = null;
this.metricsBuffer = [];
this.alertHistory = [];
this.performanceBaseline = null;
// Real-time metrics cache
this.currentMetrics = {
system: {},
application: {},
agents: {},
quality: {},
timestamp: null
};
// Alert callbacks
this.alertCallbacks = new Map();
this.initialized = false;
}
/**
* Initialize enterprise monitoring system
* Only enables advanced features if explicitly requested
*/
async initialize(enableAdvancedMonitoring = false) {
try {
// Ensure monitoring directories exist
await fs.ensureDir(this.monitoringDir);
await fs.ensureDir(this.metricsDir);
await fs.ensureDir(this.alertsDir);
await fs.ensureDir(this.analyticsDir);
// Enable advanced monitoring if requested
if (enableAdvancedMonitoring) {
this.options.enableMonitoring = true;
console.log('📊 Enterprise monitoring enabled');
}
// Load performance baseline
await this.loadPerformanceBaseline();
// Start monitoring if enabled
if (this.options.enableMonitoring) {
await this.startMonitoring();
}
this.initialized = true;
return true;
} catch (error) {
console.warn('⚠️ EnterpriseMonitoring initialization warning:', error.message);
// Graceful degradation - continue with basic monitoring
this.options.enableMonitoring = false;
return false;
}
}
/**
* Check if advanced monitoring is enabled
*/
isAdvancedMonitoringEnabled() {
return this.options.enableMonitoring;
}
/**
* Start monitoring system
*/
async startMonitoring() {
if (this.monitoringActive) {
return;
}
console.log('📊 Starting enterprise monitoring system...');
this.monitoringActive = true;
// Start periodic metrics collection
this.monitoringInterval = setInterval(async () => {
await this.collectMetrics();
}, this.options.monitoringInterval);
// Initial metrics collection
await this.collectMetrics();
console.log(`✅ Enterprise monitoring active (${this.options.monitoringInterval}ms interval)`);
}
/**
* Stop monitoring system
*/
stopMonitoring() {
if (this.monitoringInterval) {
clearInterval(this.monitoringInterval);
this.monitoringInterval = null;
}
this.monitoringActive = false;
console.log('📊 Enterprise monitoring stopped');
}
/**
* Collect comprehensive metrics
*/
async collectMetrics() {
const startTime = Date.now();
try {
// System metrics
const systemMetrics = await this.collectSystemMetrics();
// Application metrics
const appMetrics = await this.collectApplicationMetrics();
// Agent metrics
const agentMetrics = await this.collectAgentMetrics();
// Quality metrics
const qualityMetrics = await this.collectQualityMetrics();
// Compile current metrics
this.currentMetrics = {
system: systemMetrics,
application: appMetrics,
agents: agentMetrics,
quality: qualityMetrics,
timestamp: new Date().toISOString(),
collectionTime: Date.now() - startTime
};
// Add to buffer for batch processing
this.metricsBuffer.push(this.currentMetrics);
// Process alerts
await this.processAlerts(this.currentMetrics);
// Flush buffer if needed (every 12 collections = 1 minute at 5s interval)
if (this.metricsBuffer.length >= 12) {
await this.flushMetricsBuffer();
}
// Ensure collection time meets performance requirement (<500ms)
const collectionTime = Date.now() - startTime;
if (collectionTime > 500) {
console.warn(`⚠️ Metrics collection slow: ${collectionTime}ms (target: <500ms)`);
}
} catch (error) {
console.warn('⚠️ Error collecting metrics:', error.message);
}
}
/**
* Collect system-level metrics
*/
async collectSystemMetrics() {
const memUsage = process.memoryUsage();
const cpuUsage = process.cpuUsage();
// Disk space check
let diskSpace = null;
try {
const stats = await fs.stat(this.projectPath);
diskSpace = {
total: stats.size || 0,
available: stats.size || 0, // Simplified - would need OS-specific calls for accurate data
used: 0,
usedPercent: 0
};
} catch (error) {
// Disk space check failed - not critical
}
return {
memory: {
heapUsed: Math.round(memUsage.heapUsed / 1024 / 1024), // MB
heapTotal: Math.round(memUsage.heapTotal / 1024 / 1024), // MB
external: Math.round(memUsage.external / 1024 / 1024), // MB
rss: Math.round(memUsage.rss / 1024 / 1024), // MB
usagePercent: Math.round((memUsage.heapUsed / memUsage.heapTotal) * 100)
},
cpu: {
user: cpuUsage.user,
system: cpuUsage.system
},
uptime: Math.round(process.uptime()),
loadAverage: os.loadavg(),
platform: os.platform(),
nodeVersion: process.version,
diskSpace: diskSpace
};
}
/**
* Collect application-specific metrics
*/
async collectApplicationMetrics() {
try {
// Check for active mega-minds state files
const stateDir = path.join(this.projectPath, '.mega-minds', 'state');
let stateFiles = 0;
let sessionFiles = 0;
if (await fs.pathExists(stateDir)) {
const files = await fs.readdir(stateDir);
stateFiles = files.length;
}
const sessionsDir = path.join(this.projectPath, '.mega-minds', 'sessions');
if (await fs.pathExists(sessionsDir)) {
const files = await fs.readdir(sessionsDir);
sessionFiles = files.length;
}
return {
stateFiles: stateFiles,
sessionFiles: sessionFiles,
projectPath: this.projectPath,
megamindsVersion: '2.0', // Would typically read from package.json
lastActivity: this.currentMetrics.timestamp
};
} catch (error) {
return {
stateFiles: 0,
sessionFiles: 0,
error: error.message
};
}
}
/**
* Collect agent-specific metrics
*/
async collectAgentMetrics() {
try {
const agentStateFile = path.join(this.projectPath, '.mega-minds', 'state', 'active-agents.json');
if (await fs.pathExists(agentStateFile)) {
const agentData = await fs.readJSON(agentStateFile);
return {
activeCount: agentData.totalActiveCount || 0,
agents: agentData.activeAgents || {},
lastUpdate: agentData.lastUpdate
};
}
return {
activeCount: 0,
agents: {},
lastUpdate: null
};
} catch (error) {
return {
activeCount: 0,
agents: {},
error: error.message
};
}
}
/**
* Collect quality gate metrics
*/
async collectQualityMetrics() {
try {
const qualityDir = path.join(this.projectPath, '.mega-minds', 'quality', 'reports');
if (await fs.pathExists(qualityDir)) {
const files = await fs.readdir(qualityDir);
const jsonFiles = files.filter(f => f.endsWith('.json')).sort().reverse();
if (jsonFiles.length > 0) {
const latestReport = path.join(qualityDir, jsonFiles[0]);
const qualityData = await fs.readJSON(latestReport);
return {
overallScore: qualityData.overall?.score || 0,
passed: qualityData.overall?.passed || false,
gatesCount: Object.keys(qualityData.gates || {}).length,
lastRun: qualityData.timestamp,
reportCount: jsonFiles.length
};
}
}
return {
overallScore: 100,
passed: true,
gatesCount: 0,
lastRun: null,
reportCount: 0
};
} catch (error) {
return {
overallScore: 0,
passed: false,
error: error.message
};
}
}
/**
* Process alerts based on current metrics
*/
async processAlerts(metrics) {
const alerts = [];
const thresholds = this.options.alertThresholds;
// Memory alerts
const memoryMB = metrics.system.memory.heapUsed;
if (memoryMB >= thresholds.memoryCritical) {
alerts.push({
type: 'memory',
level: 'critical',
message: `Memory usage critical: ${memoryMB}MB (threshold: ${thresholds.memoryCritical}MB)`,
value: memoryMB,
threshold: thresholds.memoryCritical
});
} else if (memoryMB >= thresholds.memoryWarning) {
alerts.push({
type: 'memory',
level: 'warning',
message: `Memory usage high: ${memoryMB}MB (threshold: ${thresholds.memoryWarning}MB)`,
value: memoryMB,
threshold: thresholds.memoryWarning
});
}
// Collection time alerts (performance requirement: <500ms)
if (metrics.collectionTime >= 500) {
alerts.push({
type: 'performance',
level: 'warning',
message: `Metrics collection slow: ${metrics.collectionTime}ms (target: <500ms)`,
value: metrics.collectionTime,
threshold: 500
});
}
// Quality gate alerts
if (!metrics.quality.passed) {
alerts.push({
type: 'quality',
level: 'warning',
message: `Quality gates failing (score: ${metrics.quality.overallScore})`,
value: metrics.quality.overallScore,
threshold: 85
});
}
// Disk space alerts (if available)
if (metrics.system.diskSpace && metrics.system.diskSpace.usedPercent) {
const diskUsed = metrics.system.diskSpace.usedPercent;
if (diskUsed >= thresholds.diskSpaceCritical) {
alerts.push({
type: 'disk',
level: 'critical',
message: `Disk space critical: ${diskUsed}% used`,
value: diskUsed,
threshold: thresholds.diskSpaceCritical
});
} else if (diskUsed >= thresholds.diskSpaceWarning) {
alerts.push({
type: 'disk',
level: 'warning',
message: `Disk space low: ${diskUsed}% used`,
value: diskUsed,
threshold: thresholds.diskSpaceWarning
});
}
}
// Process and store alerts
for (const alert of alerts) {
await this.handleAlert(alert);
}
}
/**
* Handle alert processing and callbacks
*/
async handleAlert(alert) {
const alertWithMetadata = {
...alert,
id: require('crypto').randomUUID(),
timestamp: new Date().toISOString(),
projectPath: this.projectPath,
acknowledged: false
};
// Add to history
this.alertHistory.push(alertWithMetadata);
// Keep only last 100 alerts in memory
if (this.alertHistory.length > 100) {
this.alertHistory = this.alertHistory.slice(-100);
}
// Save alert to disk
await this.saveAlert(alertWithMetadata);
// Call registered callbacks
const callbacks = this.alertCallbacks.get(alert.type) || [];
callbacks.forEach(callback => {
try {
callback(alertWithMetadata);
} catch (error) {
console.warn('⚠️ Alert callback error:', error.message);
}
});
// Log to console for immediate visibility
const emoji = alert.level === 'critical' ? '🚨' : '⚠️';
console.log(`${emoji} ALERT [${alert.level.toUpperCase()}]: ${alert.message}`);
}
/**
* Register alert callback
*/
onAlert(alertType, callback) {
if (!this.alertCallbacks.has(alertType)) {
this.alertCallbacks.set(alertType, []);
}
this.alertCallbacks.get(alertType).push(callback);
}
/**
* Get current metrics (for dashboard integration)
*/
getCurrentMetrics() {
return this.currentMetrics;
}
/**
* Get recent alerts
*/
getRecentAlerts(limit = 10) {
return this.alertHistory
.slice(-limit)
.sort((a, b) => new Date(b.timestamp) - new Date(a.timestamp));
}
/**
* Get system health status
*/
getHealthStatus() {
const metrics = this.currentMetrics;
const thresholds = this.options.alertThresholds;
let status = 'healthy';
const issues = [];
if (metrics.system?.memory?.heapUsed >= thresholds.memoryCritical) {
status = 'critical';
issues.push('Memory usage critical');
} else if (metrics.system?.memory?.heapUsed >= thresholds.memoryWarning) {
status = status === 'healthy' ? 'warning' : status;
issues.push('Memory usage high');
}
if (metrics.collectionTime >= 500) {
status = status === 'healthy' ? 'warning' : status;
issues.push('Performance degraded');
}
if (!metrics.quality?.passed) {
status = status === 'healthy' ? 'warning' : status;
issues.push('Quality gates failing');
}
return {
status: status,
issues: issues,
uptime: metrics.system?.uptime || 0,
memoryUsage: metrics.system?.memory?.heapUsed || 0,
qualityScore: metrics.quality?.overallScore || 0,
lastUpdate: metrics.timestamp,
monitoring: this.monitoringActive
};
}
/**
* Save alert to disk
*/
async saveAlert(alert) {
try {
const today = new Date().toISOString().split('T')[0];
const alertFile = path.join(this.alertsDir, `${today}.json`);
let alerts = [];
if (await fs.pathExists(alertFile)) {
alerts = await fs.readJSON(alertFile);
}
alerts.push(alert);
await fs.writeJSON(alertFile, alerts, { spaces: 2 });
} catch (error) {
console.warn('⚠️ Failed to save alert:', error.message);
}
}
/**
* Flush metrics buffer to disk
*/
async flushMetricsBuffer() {
if (this.metricsBuffer.length === 0) {
return;
}
try {
const timestamp = new Date();
const dateStr = timestamp.toISOString().split('T')[0];
const hourStr = timestamp.getHours().toString().padStart(2, '0');
const metricsFile = path.join(this.metricsDir, `${dateStr}-${hourStr}.json`);
let existingMetrics = [];
if (await fs.pathExists(metricsFile)) {
existingMetrics = await fs.readJSON(metricsFile);
}
existingMetrics.push(...this.metricsBuffer);
await fs.writeJSON(metricsFile, existingMetrics, { spaces: 2 });
// Clear buffer
this.metricsBuffer = [];
} catch (error) {
console.warn('⚠️ Failed to flush metrics buffer:', error.message);
}
}
/**
* Load performance baseline
*/
async loadPerformanceBaseline() {
try {
const baselineFile = path.join(this.monitoringDir, 'performance-baseline.json');
if (await fs.pathExists(baselineFile)) {
this.performanceBaseline = await fs.readJSON(baselineFile);
} else {
// Create initial baseline
this.performanceBaseline = {
memoryBaseline: 512, // MB
responseTimeBaseline: 1000, // ms
qualityScoreBaseline: 90,
createdAt: new Date().toISOString()
};
await fs.writeJSON(baselineFile, this.performanceBaseline, { spaces: 2 });
}
} catch (error) {
console.warn('⚠️ Error loading performance baseline:', error.message);
}
}
/**
* Generate analytics report
*/
async generateAnalyticsReport(days = 7) {
if (!this.options.analyticsEnabled) {
throw new Error('Analytics not enabled');
}
try {
const endDate = new Date();
const startDate = new Date(endDate.getTime() - (days * 24 * 60 * 60 * 1000));
const report = {
period: {
start: startDate.toISOString(),
end: endDate.toISOString(),
days: days
},
summary: {
averageMemoryUsage: 0,
peakMemoryUsage: 0,
averageQualityScore: 0,
totalAlerts: 0,
uptime: 0
},
trends: {
memory: [],
quality: [],
alerts: []
},
generated: new Date().toISOString()
};
// Collect metrics from files
const metricsData = await this.loadMetricsForPeriod(startDate, endDate);
if (metricsData.length > 0) {
// Calculate averages
const memoryValues = metricsData.map(m => m.system?.memory?.heapUsed || 0);
const qualityValues = metricsData.map(m => m.quality?.overallScore || 0);
report.summary.averageMemoryUsage = Math.round(memoryValues.reduce((a, b) => a + b, 0) / memoryValues.length);
report.summary.peakMemoryUsage = Math.max(...memoryValues);
report.summary.averageQualityScore = Math.round(qualityValues.reduce((a, b) => a + b, 0) / qualityValues.length);
// Generate trends (daily aggregates)
const dailyData = this.aggregateMetricsByDay(metricsData);
report.trends.memory = dailyData.map(d => ({ date: d.date, value: d.avgMemory }));
report.trends.quality = dailyData.map(d => ({ date: d.date, value: d.avgQuality }));
}
// Count alerts
const alertsData = await this.loadAlertsForPeriod(startDate, endDate);
report.summary.totalAlerts = alertsData.length;
// Group alerts by day
const alertsByDay = {};
alertsData.forEach(alert => {
const day = alert.timestamp.split('T')[0];
alertsByDay[day] = (alertsByDay[day] || 0) + 1;
});
report.trends.alerts = Object.entries(alertsByDay).map(([date, count]) => ({ date, value: count }));
// Save report
const reportFile = path.join(this.analyticsDir, `analytics-report-${endDate.toISOString().split('T')[0]}.json`);
await fs.writeJSON(reportFile, report, { spaces: 2 });
return report;
} catch (error) {
console.error('❌ Failed to generate analytics report:', error.message);
throw error;
}
}
/**
* Load metrics for a specific time period
*/
async loadMetricsForPeriod(startDate, endDate) {
const metrics = [];
try {
const files = await fs.readdir(this.metricsDir);
const jsonFiles = files.filter(f => f.endsWith('.json')).sort();
for (const file of jsonFiles) {
const filePath = path.join(this.metricsDir, file);
const fileMetrics = await fs.readJSON(filePath);
// Filter by date range
const filtered = fileMetrics.filter(m => {
const timestamp = new Date(m.timestamp);
return timestamp >= startDate && timestamp <= endDate;
});
metrics.push(...filtered);
}
} catch (error) {
console.warn('⚠️ Error loading metrics:', error.message);
}
return metrics;
}
/**
* Load alerts for a specific time period
*/
async loadAlertsForPeriod(startDate, endDate) {
const alerts = [];
try {
const files = await fs.readdir(this.alertsDir);
const jsonFiles = files.filter(f => f.endsWith('.json')).sort();
for (const file of jsonFiles) {
const filePath = path.join(this.alertsDir, file);
const fileAlerts = await fs.readJSON(filePath);
// Filter by date range
const filtered = fileAlerts.filter(a => {
const timestamp = new Date(a.timestamp);
return timestamp >= startDate && timestamp <= endDate;
});
alerts.push(...filtered);
}
} catch (error) {
console.warn('⚠️ Error loading alerts:', error.message);
}
return alerts;
}
/**
* Aggregate metrics by day
*/
aggregateMetricsByDay(metrics) {
const dailyData = {};
metrics.forEach(metric => {
const day = metric.timestamp.split('T')[0];
if (!dailyData[day]) {
dailyData[day] = {
date: day,
memoryValues: [],
qualityValues: []
};
}
if (metric.system?.memory?.heapUsed) {
dailyData[day].memoryValues.push(metric.system.memory.heapUsed);
}
if (metric.quality?.overallScore) {
dailyData[day].qualityValues.push(metric.quality.overallScore);
}
});
// Calculate averages
return Object.values(dailyData).map(day => ({
date: day.date,
avgMemory: day.memoryValues.length > 0 ?
Math.round(day.memoryValues.reduce((a, b) => a + b, 0) / day.memoryValues.length) : 0,
avgQuality: day.qualityValues.length > 0 ?
Math.round(day.qualityValues.reduce((a, b) => a + b, 0) / day.qualityValues.length) : 0
}));
}
/**
* Cleanup old monitoring data
*/
async cleanup() {
try {
const cutoffDate = new Date();
cutoffDate.setDate(cutoffDate.getDate() - this.options.metricsRetention);
// Cleanup old metrics files
const metricsFiles = await fs.readdir(this.metricsDir);
for (const file of metricsFiles) {
if (file.endsWith('.json')) {
const fileDate = file.split('-')[0];
if (new Date(fileDate) < cutoffDate) {
await fs.remove(path.join(this.metricsDir, file));
}
}
}
// Cleanup old alert files
const alertFiles = await fs.readdir(this.alertsDir);
for (const file of alertFiles) {
if (file.endsWith('.json')) {
const fileDate = file.replace('.json', '');
if (new Date(fileDate) < cutoffDate) {
await fs.remove(path.join(this.alertsDir, file));
}
}
}
} catch (error) {
console.warn('⚠️ Error during monitoring cleanup:', error.message);
}
}
/**
* Shutdown monitoring system
*/
async shutdown() {
this.stopMonitoring();
// Flush any remaining metrics
await this.flushMetricsBuffer();
// Cleanup old data
await this.cleanup();
console.log('📊 EnterpriseMonitoring shutdown complete');
}
}
module.exports = EnterpriseMonitoring;