UNPKG

codecrucible-synth

Version:

Production-Ready AI Development Platform with Multi-Voice Synthesis, Smithery MCP Integration, Enterprise Security, and Zero-Timeout Reliability

601 lines 22 kB
/** * Enterprise Performance Monitoring and Optimization System * Integrates with existing PerformanceMonitor to provide enterprise-grade capabilities */ import { EventEmitter } from 'events'; import { PerformanceMonitor, } from './performance-monitor.js'; import { AuditEventType, AuditSeverity, AuditOutcome, } from '../security/security-audit-logger.js'; import { logger } from '../logger.js'; export class EnterprisePerformanceSystem extends EventEmitter { config; performanceMonitor; auditLogger; sloViolations = []; capacityPredictions = new Map(); businessMetrics = new Map(); anomalies = []; optimizations = []; baselineData = new Map(); monitoringInterval; predictionInterval; constructor(performanceMonitor, auditLogger, config = {}) { super(); this.performanceMonitor = performanceMonitor; this.auditLogger = auditLogger; this.config = { enableSLOMonitoring: true, enableCapacityPlanning: true, enableAnomalyDetection: true, enablePredictiveScaling: true, enableCostOptimization: true, enableBusinessMetrics: true, alerting: { enabled: true, channels: ['log', 'console'], escalationRules: [ { condition: 'slo_violation', delay: 300000, // 5 minutes channels: ['slack', 'email'], severity: 'critical', }, ], }, slo: { availability: 99.9, latencyP99: 1000, errorRate: 0.1, throughput: 1000, }, ...config, }; this.initialize(); } /** * Initialize enterprise performance monitoring */ initialize() { // Listen to performance monitor events this.performanceMonitor.on('metric-recorded', metric => { this.processMetric(metric); }); this.performanceMonitor.on('performance-snapshot', snapshot => { this.processSnapshot(snapshot); }); this.performanceMonitor.on('threshold-critical', event => { this.handleCriticalThreshold(event); }); // Start enterprise monitoring if (this.config.enableSLOMonitoring) { this.startSLOMonitoring(); } if (this.config.enableCapacityPlanning) { this.startCapacityPlanning(); } if (this.config.enableAnomalyDetection) { this.startAnomalyDetection(); } logger.info('Enterprise Performance System initialized', { sloMonitoring: this.config.enableSLOMonitoring, capacityPlanning: this.config.enableCapacityPlanning, anomalyDetection: this.config.enableAnomalyDetection, businessMetrics: this.config.enableBusinessMetrics, }); } /** * Process individual metrics for enterprise analysis */ processMetric(metric) { // Update baseline data this.updateBaseline(metric.name, metric.value); // Check SLO compliance if (this.config.enableSLOMonitoring) { this.checkSLOCompliance(metric); } // Detect anomalies if (this.config.enableAnomalyDetection) { this.detectAnomaly(metric); } // Update business metrics correlation if (this.config.enableBusinessMetrics) { this.updateBusinessMetricCorrelations(metric); } } /** * Process performance snapshots for trend analysis */ processSnapshot(snapshot) { // Capacity planning analysis if (this.config.enableCapacityPlanning) { this.analyzeCapacity(snapshot); } // Performance optimization opportunities this.identifyOptimizations(snapshot); // Emit enterprise snapshot event this.emit('enterprise-snapshot', { snapshot, sloCompliance: this.calculateSLOCompliance(), capacityStatus: this.getCapacityStatus(), anomalyCount: this.anomalies.length, optimizationOpportunities: this.optimizations.length, }); } /** * Start SLO monitoring */ startSLOMonitoring() { this.monitoringInterval = setInterval(() => { this.checkAllSLOs(); }, 60000); // Check every minute } /** * Start capacity planning */ startCapacityPlanning() { this.predictionInterval = setInterval(() => { this.updateCapacityPredictions(); }, 300000); // Update every 5 minutes } /** * Start anomaly detection */ startAnomalyDetection() { setInterval(() => { this.performAnomalyAnalysis(); }, 120000); // Analyze every 2 minutes } /** * Check SLO compliance for a metric */ checkSLOCompliance(metric) { const violations = []; // Availability SLO if (metric.name === 'http_request_duration' && metric.tags.status?.startsWith('5')) { const errorRate = this.calculateErrorRate(); if (errorRate > 100 - this.config.slo.availability) { violations.push({ metric: 'availability', target: this.config.slo.availability, actual: 100 - errorRate, duration: 0, impact: this.determineSLOImpact(errorRate), timestamp: Date.now(), }); } } // Latency SLO if (metric.name === 'http_request_duration') { const latencyStats = this.performanceMonitor.getMetricStats('http_request_duration'); if (latencyStats && latencyStats.p99 > this.config.slo.latencyP99) { violations.push({ metric: 'latency_p99', target: this.config.slo.latencyP99, actual: latencyStats.p99, duration: 0, impact: this.determineSLOImpact(latencyStats.p99 / this.config.slo.latencyP99), timestamp: Date.now(), }); } } // Process violations violations.forEach(violation => { this.sloViolations.push(violation); this.handleSLOViolation(violation); }); } /** * Calculate current error rate */ calculateErrorRate() { const httpMetrics = this.performanceMonitor.getAllMetrics()['http_request_duration'] || []; const recentMetrics = httpMetrics.filter(m => Date.now() - m.timestamp < 300000); // Last 5 minutes if (recentMetrics.length === 0) return 0; const errorCount = recentMetrics.filter(m => m.tags.status?.startsWith('4') || m.tags.status?.startsWith('5')).length; return (errorCount / recentMetrics.length) * 100; } /** * Determine SLO impact level */ determineSLOImpact(ratio) { if (ratio > 2) return 'critical'; if (ratio > 1.5) return 'high'; if (ratio > 1.2) return 'medium'; return 'low'; } /** * Handle SLO violation */ handleSLOViolation(violation) { logger.error('SLO violation detected', violation); // Audit log if (this.auditLogger) { this.auditLogger.logEvent(AuditEventType.SYSTEM_EVENT, AuditSeverity.HIGH, AuditOutcome.ERROR, 'enterprise-performance-system', 'slo_violation', violation.metric, `SLO violation: ${violation.metric} target=${violation.target} actual=${violation.actual}`, {}, { violation, impact: violation.impact, }); } // Emit violation event this.emit('slo-violation', violation); // Trigger alerting if (this.config.alerting.enabled) { this.triggerAlert('SLO Violation', violation); } } /** * Update baseline data for anomaly detection */ updateBaseline(metricName, value) { if (!this.baselineData.has(metricName)) { this.baselineData.set(metricName, []); } const baseline = this.baselineData.get(metricName); baseline.push(value); // Keep only last 1000 data points if (baseline.length > 1000) { baseline.shift(); } } /** * Detect anomalies in metrics */ detectAnomaly(metric) { const baseline = this.baselineData.get(metric.name); if (!baseline || baseline.length < 10) return; const anomaly = this.calculateAnomalyScore(metric.value, baseline); if (anomaly.severity > 0.8) { const alert = { metric: metric.name, anomalyType: anomaly.type, severity: anomaly.severity, confidence: anomaly.confidence, description: `Anomaly detected in ${metric.name}: ${anomaly.description}`, timestamp: Date.now(), context: { value: metric.value, baseline: anomaly.baseline, tags: metric.tags, }, }; this.anomalies.push(alert); this.emit('anomaly-detected', alert); logger.warn('Performance anomaly detected', alert); } } /** * Calculate anomaly score using statistical methods */ calculateAnomalyScore(value, baseline) { const mean = baseline.reduce((sum, v) => sum + v, 0) / baseline.length; const variance = baseline.reduce((sum, v) => sum + Math.pow(v - mean, 2), 0) / baseline.length; const std = Math.sqrt(variance); const zScore = Math.abs((value - mean) / std); const severity = Math.min(zScore / 3, 1); // Normalize to 0-1 let type = 'outlier'; let description = ''; if (value > mean + 2 * std) { type = 'spike'; description = `Value ${value.toFixed(2)} is ${zScore.toFixed(2)} standard deviations above baseline`; } else if (value < mean - 2 * std) { type = 'drop'; description = `Value ${value.toFixed(2)} is ${zScore.toFixed(2)} standard deviations below baseline`; } else { type = 'outlier'; description = `Unusual value detected: ${value.toFixed(2)}`; } return { severity, confidence: Math.min(baseline.length / 100, 1), // More data = higher confidence type, description, baseline: { mean, std }, }; } /** * Analyze capacity trends and predictions */ analyzeCapacity(snapshot) { const resources = ['memory', 'cpu', 'connections']; resources.forEach(resource => { const prediction = this.predictCapacity(resource, snapshot); if (prediction) { this.capacityPredictions.set(resource, prediction); // Alert if capacity will be reached soon if (prediction.timeToCapacity < 24) { // Less than 24 hours this.emit('capacity-warning', prediction); logger.warn('Capacity warning', prediction); } } }); } /** * Predict capacity requirements */ predictCapacity(resource, snapshot) { // Simplified linear trend prediction const metricName = `${resource}_usage_percent`; const baseline = this.baselineData.get(metricName); if (!baseline || baseline.length < 10) return null; const recent = baseline.slice(-10); const trend = this.calculateTrend(recent); const currentUsage = recent[recent.length - 1]; if (trend <= 0) return null; // No growth trend const timeToCapacity = (95 - currentUsage) / trend; // Hours to reach 95% const predictedUsage = currentUsage + trend * 24; // 24 hours ahead return { resource, currentUsage, predictedUsage, timeToCapacity, confidence: Math.min(recent.length / 20, 1), recommendations: this.generateCapacityRecommendations(resource, timeToCapacity, trend), }; } /** * Calculate linear trend */ calculateTrend(values) { if (values.length < 2) return 0; const n = values.length; const sumX = (n * (n - 1)) / 2; const sumY = values.reduce((sum, val) => sum + val, 0); const sumXY = values.reduce((sum, val, idx) => sum + val * idx, 0); const sumX2 = values.reduce((sum, _, idx) => sum + idx * idx, 0); return (n * sumXY - sumX * sumY) / (n * sumX2 - sumX * sumX); } /** * Generate capacity recommendations */ generateCapacityRecommendations(resource, timeToCapacity, trend) { const recommendations = []; if (timeToCapacity < 24) { recommendations.push(`Immediate action required: ${resource} capacity will be reached in ${timeToCapacity.toFixed(1)} hours`); recommendations.push(`Consider scaling up ${resource} immediately`); } else if (timeToCapacity < 168) { // 1 week recommendations.push(`Plan ${resource} scaling within the next week`); recommendations.push(`Monitor ${resource} usage closely`); } if (trend > 5) { recommendations.push(`High growth rate detected for ${resource} (${trend.toFixed(2)}%/hour)`); recommendations.push(`Investigate cause of rapid ${resource} growth`); } return recommendations; } /** * Identify performance optimization opportunities */ identifyOptimizations(snapshot) { const optimizations = []; // Memory optimization if (snapshot.metrics.memory.heapUsed / snapshot.metrics.memory.heapTotal > 0.8) { optimizations.push({ area: 'memory', currentValue: snapshot.metrics.memory.heapUsed, optimizedValue: snapshot.metrics.memory.heapUsed * 0.7, improvement: 30, confidence: 0.8, effort: 'medium', risk: 'low', recommendations: [ 'Implement memory pooling', 'Review object retention', 'Optimize garbage collection', ], }); } // Response time optimization const latencyStats = this.performanceMonitor.getMetricStats('http_request_duration'); if (latencyStats && latencyStats.p95 > 500) { optimizations.push({ area: 'response_time', currentValue: latencyStats.p95, optimizedValue: latencyStats.p95 * 0.6, improvement: 40, confidence: 0.7, effort: 'high', risk: 'medium', recommendations: [ 'Implement response caching', 'Optimize database queries', 'Add CDN for static assets', ], }); } this.optimizations = optimizations; optimizations.forEach(optimization => { this.emit('optimization-opportunity', optimization); }); } /** * Update business metric correlations */ updateBusinessMetricCorrelations(metric) { // This would correlate technical metrics with business metrics // For now, we'll track some basic correlations if (metric.name === 'http_request_duration') { this.recordBusinessMetric('user_experience_score', 100 - metric.value / 10, 'score'); } if (metric.name === 'error_rate') { this.recordBusinessMetric('system_reliability', 100 - metric.value, 'percent'); } } /** * Record business metric */ recordBusinessMetric(name, value, unit) { const metric = { name, value, unit, timestamp: Date.now(), correlations: {}, }; if (!this.businessMetrics.has(name)) { this.businessMetrics.set(name, []); } const metrics = this.businessMetrics.get(name); metrics.push(metric); // Keep only last 100 metrics if (metrics.length > 100) { metrics.shift(); } this.emit('business-metric', metric); } /** * Check all SLOs */ checkAllSLOs() { const compliance = this.calculateSLOCompliance(); if (compliance.overall < this.config.slo.availability) { logger.warn('Overall SLO compliance below target', compliance); this.emit('slo-compliance-low', compliance); } } /** * Calculate SLO compliance */ calculateSLOCompliance() { const errorRate = this.calculateErrorRate(); const availability = 100 - errorRate; const latencyStats = this.performanceMonitor.getMetricStats('http_request_duration'); const latencyCompliance = latencyStats ? Math.max(0, 100 - ((latencyStats.p99 - this.config.slo.latencyP99) / this.config.slo.latencyP99) * 100) : 100; const throughputStats = this.performanceMonitor.getMetricStats('http_requests_total'); const throughputCompliance = throughputStats && throughputStats.count > 0 ? Math.min(100, (throughputStats.count / this.config.slo.throughput) * 100) : 0; const overall = (availability + latencyCompliance + throughputCompliance) / 3; return { overall, availability, latency: latencyCompliance, errorRate, throughput: throughputCompliance, }; } /** * Get capacity status */ getCapacityStatus() { return Object.fromEntries(this.capacityPredictions); } /** * Update capacity predictions */ updateCapacityPredictions() { // This would be called periodically to update predictions const snapshot = this.performanceMonitor.getPerformanceSummary().lastSnapshot; if (snapshot) { this.analyzeCapacity(snapshot); } } /** * Perform comprehensive anomaly analysis */ performAnomalyAnalysis() { // Clean old anomalies (older than 1 hour) const cutoff = Date.now() - 3600000; this.anomalies = this.anomalies.filter(a => a.timestamp > cutoff); // Emit anomaly summary if (this.anomalies.length > 0) { this.emit('anomaly-summary', { count: this.anomalies.length, highSeverity: this.anomalies.filter(a => a.severity > 0.8).length, recentTrends: this.analyzeAnomalyTrends(), }); } } /** * Analyze anomaly trends */ analyzeAnomalyTrends() { const trends = {}; this.anomalies.forEach(anomaly => { trends[anomaly.anomalyType] = (trends[anomaly.anomalyType] || 0) + 1; }); return trends; } /** * Trigger alert */ triggerAlert(type, data) { const alert = { type, timestamp: Date.now(), data, channels: this.config.alerting.channels, }; this.emit('alert', alert); // Log alert logger.error(`Performance Alert: ${type}`, alert); // In a real system, this would integrate with: // - Slack/Teams webhooks // - Email services // - PagerDuty/OpsGenie // - SMS services } /** * Handle critical threshold events from base monitor */ handleCriticalThreshold(event) { logger.error('Critical performance threshold exceeded', event); // Enhanced enterprise handling this.triggerAlert('Critical Threshold', event); // Auto-scaling trigger (if enabled) if (this.config.enablePredictiveScaling) { this.emit('scaling-trigger', { metric: event.metric, value: event.value, threshold: event.threshold, action: 'scale-up', }); } } /** * Get enterprise performance dashboard data */ getEnterpriseMetrics() { return { slo: this.calculateSLOCompliance(), capacity: this.getCapacityStatus(), anomalies: this.anomalies.slice(-10), // Last 10 anomalies optimizations: this.optimizations, businessMetrics: Object.fromEntries(this.businessMetrics), alerts: { violations: this.sloViolations.filter(v => Date.now() - v.timestamp < 3600000).length, anomalies: this.anomalies.filter(a => Date.now() - a.timestamp < 3600000).length, capacity: Array.from(this.capacityPredictions.values()).filter(p => p.timeToCapacity < 168) .length, }, }; } /** * Stop enterprise monitoring */ stop() { if (this.monitoringInterval) { clearInterval(this.monitoringInterval); } if (this.predictionInterval) { clearInterval(this.predictionInterval); } logger.info('Enterprise Performance System stopped'); this.emit('enterprise-stop'); } } // Export default instance export const enterprisePerformanceSystem = new EnterprisePerformanceSystem(new PerformanceMonitor()); //# sourceMappingURL=enterprise-performance-system.js.map