UNPKG

@hivetechs/hive-ai

Version:

Real-time streaming AI consensus platform with HTTP+SSE MCP integration for Claude Code, VS Code, Cursor, and Windsurf - powered by OpenRouter's unified API

722 lines 25.8 kB
/** * Advanced Alerting System - Expert-level monitoring and alerting * * Provides intelligent alerting with smart thresholds, anomaly detection, * and customizable alert rules for production monitoring. */ import { structuredLogger } from './structured-logger.js'; export class AdvancedAlerting { rules = new Map(); alerts = []; config; anomalyBaselines = new Map(); throttleTracker = new Map(); constructor(config) { this.config = { enabled: true, defaultThresholds: { responseTime: 30000, // 30 seconds errorRate: 0.05, // 5% costSpike: 2.0, // 2x normal cost qualityDrop: 0.8 // Below 8/10 quality score }, anomalyDetection: { enabled: true, sensitivity: 'medium', learningPeriodDays: 7 }, notifications: {}, ...config }; this.initializeDefaultRules(); this.startMonitoring(); } /** * Initialize default alert rules */ initializeDefaultRules() { const defaultRules = [ { name: 'High Response Time', description: 'Response time exceeds acceptable threshold', enabled: true, category: 'performance', severity: 'high', condition: { type: 'threshold', metric: 'pipeline.totalDuration', operator: '>', value: this.config.defaultThresholds.responseTime, windowMinutes: 5, aggregation: 'avg' }, actions: [ { type: 'log', config: { level: 'warn' }, enabled: true }, { type: 'webhook', config: {}, enabled: false } ], throttle: { enabled: true, windowMinutes: 15, maxAlerts: 3 } }, { name: 'OpenRouter High Error Rate', description: 'OpenRouter API error rate is too high', enabled: true, category: 'availability', severity: 'critical', condition: { type: 'threshold', metric: 'openRouter.successRate', operator: '<', value: 1 - this.config.defaultThresholds.errorRate, windowMinutes: 10, aggregation: 'avg' }, actions: [ { type: 'log', config: { level: 'error' }, enabled: true }, { type: 'circuit_breaker', config: { action: 'trip' }, enabled: true } ], throttle: { enabled: true, windowMinutes: 30, maxAlerts: 2 } }, { name: 'Cost Spike Detection', description: 'Unusual increase in costs detected', enabled: true, category: 'cost', severity: 'medium', condition: { type: 'anomaly', metric: 'cost.costPerRequest', operator: '>', value: this.config.defaultThresholds.costSpike, windowMinutes: 60 }, actions: [ { type: 'log', config: { level: 'warn' }, enabled: true } ], throttle: { enabled: true, windowMinutes: 120, maxAlerts: 1 } }, { name: 'Quality Score Drop', description: 'Quality scores have dropped significantly', enabled: true, category: 'quality', severity: 'medium', condition: { type: 'threshold', metric: 'quality.averageQualityScore', operator: '<', value: this.config.defaultThresholds.qualityDrop, windowMinutes: 30, aggregation: 'avg' }, actions: [ { type: 'log', config: { level: 'warn' }, enabled: true } ], throttle: { enabled: true, windowMinutes: 60, maxAlerts: 2 } }, { name: 'Memory Usage Critical', description: 'Memory usage is critically high', enabled: true, category: 'performance', severity: 'critical', condition: { type: 'threshold', metric: 'resources.memoryUsage', operator: '>', value: 1024 * 1024 * 1024, // 1GB windowMinutes: 5, aggregation: 'avg' }, actions: [ { type: 'log', config: { level: 'error' }, enabled: true }, { type: 'auto_scale', config: { action: 'restart' }, enabled: false } ], throttle: { enabled: true, windowMinutes: 10, maxAlerts: 1 } } ]; defaultRules.forEach(rule => { this.addRule(rule); }); } /** * Add new alert rule */ addRule(rule) { const id = `rule_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`; const now = new Date().toISOString(); const fullRule = { ...rule, id, createdAt: now, updatedAt: now, triggeredCount: 0 }; this.rules.set(id, fullRule); structuredLogger.info('Alert rule added', { ruleId: id, ruleName: rule.name }); return id; } /** * Update existing alert rule */ updateRule(id, updates) { const rule = this.rules.get(id); if (!rule) { return false; } const updatedRule = { ...rule, ...updates, id: rule.id, // Preserve original ID createdAt: rule.createdAt, // Preserve creation time updatedAt: new Date().toISOString() }; this.rules.set(id, updatedRule); structuredLogger.info('Alert rule updated', { ruleId: id }); return true; } /** * Remove alert rule */ removeRule(id) { const removed = this.rules.delete(id); if (removed) { structuredLogger.info('Alert rule removed', { ruleId: id }); } return removed; } /** * Get all alert rules */ getRules() { return Array.from(this.rules.values()); } /** * Start monitoring for alerts */ startMonitoring() { setInterval(() => { this.checkAlerts(); }, 30000); // Check every 30 seconds } /** * Check all rules against current metrics */ async checkAlerts() { if (!this.config.enabled) return; try { for (const rule of this.rules.values()) { if (!rule.enabled) continue; await this.evaluateRule(rule); } } catch (error) { structuredLogger.error('Alert checking failed', {}, error); } } /** * Evaluate a single alert rule */ async evaluateRule(rule) { try { // Get recent metrics for evaluation const metrics = await this.getRecentMetrics(rule.condition.windowMinutes || 5); if (metrics.length === 0) return; // Check if rule condition is met const triggered = await this.evaluateCondition(rule.condition, metrics); if (triggered) { // Check throttling if (this.isThrottled(rule)) { return; } // Create and fire alert await this.fireAlert(rule, metrics); } } catch (error) { structuredLogger.error('Rule evaluation failed', { ruleId: rule.id }, error); } } /** * Evaluate alert condition against metrics */ async evaluateCondition(condition, metrics) { switch (condition.type) { case 'threshold': return this.evaluateThresholdCondition(condition, metrics); case 'anomaly': return this.evaluateAnomalyCondition(condition, metrics); case 'trend': return this.evaluateTrendCondition(condition, metrics); case 'composite': return this.evaluateCompositeCondition(condition, metrics); default: return false; } } /** * Evaluate threshold-based condition */ evaluateThresholdCondition(condition, metrics) { const values = this.extractMetricValues(condition.metric, metrics); if (values.length === 0) return false; let aggregatedValue; switch (condition.aggregation) { case 'avg': aggregatedValue = values.reduce((sum, val) => sum + val, 0) / values.length; break; case 'sum': aggregatedValue = values.reduce((sum, val) => sum + val, 0); break; case 'min': aggregatedValue = Math.min(...values); break; case 'max': aggregatedValue = Math.max(...values); break; case 'count': aggregatedValue = values.length; break; default: aggregatedValue = values[values.length - 1]; // Latest value } const threshold = typeof condition.value === 'number' ? condition.value : parseFloat(condition.value); switch (condition.operator) { case '>': return aggregatedValue > threshold; case '<': return aggregatedValue < threshold; case '>=': return aggregatedValue >= threshold; case '<=': return aggregatedValue <= threshold; case '=': return Math.abs(aggregatedValue - threshold) < 0.001; case '!=': return Math.abs(aggregatedValue - threshold) >= 0.001; default: return false; } } /** * Evaluate anomaly-based condition */ evaluateAnomalyCondition(condition, metrics) { if (!this.config.anomalyDetection.enabled) return false; const values = this.extractMetricValues(condition.metric, metrics); if (values.length === 0) return false; const baseline = this.getAnomalyBaseline(condition.metric); if (baseline.length < 10) { // Not enough historical data this.updateAnomalyBaseline(condition.metric, values); return false; } const currentValue = values[values.length - 1]; const baselineAvg = baseline.reduce((sum, val) => sum + val, 0) / baseline.length; const baselineStd = this.calculateStandardDeviation(baseline); // Determine sensitivity threshold const sensitivityMultiplier = { low: 3, medium: 2, high: 1.5 }[this.config.anomalyDetection.sensitivity]; const threshold = baselineAvg + (sensitivityMultiplier * baselineStd); const multiplier = typeof condition.value === 'number' ? condition.value : parseFloat(condition.value); return currentValue > (threshold * multiplier); } /** * Evaluate trend-based condition */ evaluateTrendCondition(condition, metrics) { const values = this.extractMetricValues(condition.metric, metrics); if (values.length < 3) return false; // Simple trend detection: compare recent values with older values const recentValues = values.slice(-3); const olderValues = values.slice(0, 3); const recentAvg = recentValues.reduce((sum, val) => sum + val, 0) / recentValues.length; const olderAvg = olderValues.reduce((sum, val) => sum + val, 0) / olderValues.length; const changePercent = ((recentAvg - olderAvg) / olderAvg) * 100; const threshold = typeof condition.value === 'number' ? condition.value : parseFloat(condition.value); switch (condition.operator) { case '>': return changePercent > threshold; case '<': return changePercent < threshold; default: return false; } } /** * Evaluate composite condition */ async evaluateCompositeCondition(condition, metrics) { if (!condition.conditions || condition.conditions.length === 0) return false; const results = await Promise.all(condition.conditions.map(subCondition => this.evaluateCondition(subCondition, metrics))); switch (condition.logic) { case 'AND': return results.every(result => result); case 'OR': return results.some(result => result); default: return false; } } /** * Extract metric values from performance metrics */ extractMetricValues(metricPath, metrics) { return metrics.map(metric => { const value = this.getNestedValue(metric, metricPath); return typeof value === 'number' ? value : 0; }).filter(val => !isNaN(val)); } /** * Get nested value from object using dot notation */ getNestedValue(obj, path) { return path.split('.').reduce((current, key) => current?.[key], obj); } /** * Fire an alert */ async fireAlert(rule, metrics) { const alertId = `alert_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`; const now = new Date().toISOString(); // Update rule statistics rule.triggeredCount++; rule.lastTriggered = now; rule.updatedAt = now; // Create alert const alert = { id: alertId, ruleId: rule.id, ruleName: rule.name, severity: rule.severity, message: this.generateAlertMessage(rule, metrics), details: { condition: rule.condition, metricValues: this.extractMetricValues(rule.condition.metric, metrics), timestamp: now }, timestamp: now, resolved: false, acknowledged: false }; this.alerts.push(alert); // Execute alert actions await this.executeAlertActions(rule.actions, alert); // Update throttling tracker this.updateThrottleTracker(rule.id); structuredLogger.warn('Alert fired', { alertId: alert.id, ruleId: rule.id, ruleName: rule.name, severity: rule.severity, message: alert.message }); } /** * Execute alert actions */ async executeAlertActions(actions, alert) { for (const action of actions) { if (!action.enabled) continue; try { await this.executeAction(action, alert); } catch (error) { structuredLogger.error('Alert action failed', { alertId: alert.id, actionType: action.type }, error); } } } /** * Execute individual alert action */ async executeAction(action, alert) { switch (action.type) { case 'log': const level = action.config.level || 'warn'; if (level === 'error') { structuredLogger.error('Alert triggered', { alertId: alert.id, severity: alert.severity, message: alert.message }); } else if (level === 'warn') { structuredLogger.warn('Alert triggered', { alertId: alert.id, severity: alert.severity, message: alert.message }); } else if (level === 'info') { structuredLogger.info('Alert triggered', { alertId: alert.id, severity: alert.severity, message: alert.message }); } else { structuredLogger.debug('Alert triggered', { alertId: alert.id, severity: alert.severity, message: alert.message }); } break; case 'webhook': if (this.config.notifications.webhook) { await this.sendWebhook(this.config.notifications.webhook, alert); } break; case 'slack': if (this.config.notifications.slack) { await this.sendSlackNotification(this.config.notifications.slack, alert); } break; case 'circuit_breaker': if (action.config.action === 'trip') { const { globalHealthMonitor } = await import('./health-monitor.js'); // globalHealthMonitor.reportApiError('openrouter', new Error('Alert triggered circuit breaker')); structuredLogger.warn('Circuit breaker tripped by alert', { alertId: alert.id }); } break; case 'auto_scale': // Placeholder for auto-scaling actions structuredLogger.info('Auto-scale action triggered', { alertId: alert.id, action: action.config.action }); break; } } /** * Send webhook notification */ async sendWebhook(webhookConfig, alert) { const payload = { alertId: alert.id, severity: alert.severity, message: alert.message, timestamp: alert.timestamp, details: alert.details }; const response = await fetch(webhookConfig.url, { method: 'POST', headers: { 'Content-Type': 'application/json', ...webhookConfig.headers }, body: JSON.stringify(payload) }); if (!response.ok) { throw new Error(`Webhook failed: ${response.status} ${response.statusText}`); } } /** * Send Slack notification */ async sendSlackNotification(slackConfig, alert) { const color = { low: '#36a64f', medium: '#ff9500', high: '#ff0000', critical: '#8b0000' }[alert.severity]; const payload = { channel: slackConfig.channel, username: 'Hive AI Monitoring', icon_emoji: ':warning:', attachments: [{ color, title: `Alert: ${alert.ruleName}`, text: alert.message, fields: [ { title: 'Severity', value: alert.severity.toUpperCase(), short: true }, { title: 'Time', value: new Date(alert.timestamp).toLocaleString(), short: true } ], footer: 'Hive AI', ts: Math.floor(new Date(alert.timestamp).getTime() / 1000) }] }; const response = await fetch(slackConfig.webhookUrl, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify(payload) }); if (!response.ok) { throw new Error(`Slack notification failed: ${response.status} ${response.statusText}`); } } /** * Check if rule is throttled */ isThrottled(rule) { if (!rule.throttle.enabled) return false; const now = Date.now(); const windowMs = rule.throttle.windowMinutes * 60 * 1000; const windowStart = now - windowMs; const recentAlerts = this.throttleTracker.get(rule.id) || []; const alertsInWindow = recentAlerts.filter(timestamp => timestamp > windowStart); return alertsInWindow.length >= rule.throttle.maxAlerts; } /** * Update throttle tracker */ updateThrottleTracker(ruleId) { const now = Date.now(); const current = this.throttleTracker.get(ruleId) || []; current.push(now); // Keep only last 100 timestamps to prevent memory leaks if (current.length > 100) { current.splice(0, current.length - 100); } this.throttleTracker.set(ruleId, current); } /** * Get recent metrics for evaluation */ async getRecentMetrics(windowMinutes) { // This would fetch from the performance monitor or database // For now, return empty array return []; } /** * Get anomaly baseline for metric */ getAnomalyBaseline(metric) { return this.anomalyBaselines.get(metric) || []; } /** * Update anomaly baseline */ updateAnomalyBaseline(metric, newValues) { const current = this.anomalyBaselines.get(metric) || []; const updated = [...current, ...newValues]; // Keep only recent values for baseline (last 1000 data points) const maxBaseline = 1000; if (updated.length > maxBaseline) { updated.splice(0, updated.length - maxBaseline); } this.anomalyBaselines.set(metric, updated); } /** * Calculate standard deviation */ calculateStandardDeviation(values) { const avg = values.reduce((sum, val) => sum + val, 0) / values.length; const variance = values.reduce((sum, val) => sum + Math.pow(val - avg, 2), 0) / values.length; return Math.sqrt(variance); } /** * Generate alert message */ generateAlertMessage(rule, metrics) { const values = this.extractMetricValues(rule.condition.metric, metrics); const currentValue = values[values.length - 1]; return `${rule.description}. Current value: ${currentValue?.toFixed(2) || 'N/A'}, Threshold: ${rule.condition.value}`; } /** * Public API methods */ /** * Acknowledge alert */ acknowledgeAlert(alertId, acknowledgedBy) { const alert = this.alerts.find(a => a.id === alertId); if (!alert) return false; alert.acknowledged = true; alert.acknowledgedAt = new Date().toISOString(); alert.acknowledgedBy = acknowledgedBy; structuredLogger.info('Alert acknowledged', { alertId, acknowledgedBy }); return true; } /** * Resolve alert */ resolveAlert(alertId) { const alert = this.alerts.find(a => a.id === alertId); if (!alert) return false; alert.resolved = true; alert.resolvedAt = new Date().toISOString(); structuredLogger.info('Alert resolved', { alertId }); return true; } /** * Get all alerts */ getAlerts(filter) { let filtered = [...this.alerts]; if (filter) { if (filter.severity) { filtered = filtered.filter(a => a.severity === filter.severity); } if (filter.resolved !== undefined) { filtered = filtered.filter(a => a.resolved === filter.resolved); } if (filter.acknowledged !== undefined) { filtered = filtered.filter(a => a.acknowledged === filter.acknowledged); } if (filter.since) { const since = new Date(filter.since); filtered = filtered.filter(a => new Date(a.timestamp) >= since); } } return filtered.sort((a, b) => new Date(b.timestamp).getTime() - new Date(a.timestamp).getTime()); } /** * Get alerting statistics */ getAlertingStats() { const enabledRules = Array.from(this.rules.values()).filter(r => r.enabled).length; const unresolvedAlerts = this.alerts.filter(a => !a.resolved).length; const last24h = new Date(Date.now() - 24 * 60 * 60 * 1000); const alertsLast24h = this.alerts.filter(a => new Date(a.timestamp) >= last24h).length; const alertsBySeverity = this.alerts.reduce((acc, alert) => { acc[alert.severity] = (acc[alert.severity] || 0) + 1; return acc; }, {}); return { totalRules: this.rules.size, enabledRules, totalAlerts: this.alerts.length, alertsBySeverity, unresolvedAlerts, alertsLast24h }; } /** * Update alerting configuration */ updateConfig(newConfig) { this.config = { ...this.config, ...newConfig }; structuredLogger.info('Alerting configuration updated'); } /** * Get current configuration */ getConfig() { return { ...this.config }; } } /** * Global advanced alerting instance */ export const globalAdvancedAlerting = new AdvancedAlerting(); //# sourceMappingURL=advanced-alerting.js.map