@hivetechs/hive-ai
Version:
Real-time streaming AI consensus platform with HTTP+SSE MCP integration for Claude Code, VS Code, Cursor, and Windsurf - powered by OpenRouter's unified API
722 lines • 25.8 kB
JavaScript
/**
* Advanced Alerting System - Expert-level monitoring and alerting
*
* Provides intelligent alerting with smart thresholds, anomaly detection,
* and customizable alert rules for production monitoring.
*/
import { structuredLogger } from './structured-logger.js';
export class AdvancedAlerting {
rules = new Map();
alerts = [];
config;
anomalyBaselines = new Map();
throttleTracker = new Map();
constructor(config) {
this.config = {
enabled: true,
defaultThresholds: {
responseTime: 30000, // 30 seconds
errorRate: 0.05, // 5%
costSpike: 2.0, // 2x normal cost
qualityDrop: 0.8 // Below 8/10 quality score
},
anomalyDetection: {
enabled: true,
sensitivity: 'medium',
learningPeriodDays: 7
},
notifications: {},
...config
};
this.initializeDefaultRules();
this.startMonitoring();
}
/**
* Initialize default alert rules
*/
initializeDefaultRules() {
const defaultRules = [
{
name: 'High Response Time',
description: 'Response time exceeds acceptable threshold',
enabled: true,
category: 'performance',
severity: 'high',
condition: {
type: 'threshold',
metric: 'pipeline.totalDuration',
operator: '>',
value: this.config.defaultThresholds.responseTime,
windowMinutes: 5,
aggregation: 'avg'
},
actions: [
{ type: 'log', config: { level: 'warn' }, enabled: true },
{ type: 'webhook', config: {}, enabled: false }
],
throttle: {
enabled: true,
windowMinutes: 15,
maxAlerts: 3
}
},
{
name: 'OpenRouter High Error Rate',
description: 'OpenRouter API error rate is too high',
enabled: true,
category: 'availability',
severity: 'critical',
condition: {
type: 'threshold',
metric: 'openRouter.successRate',
operator: '<',
value: 1 - this.config.defaultThresholds.errorRate,
windowMinutes: 10,
aggregation: 'avg'
},
actions: [
{ type: 'log', config: { level: 'error' }, enabled: true },
{ type: 'circuit_breaker', config: { action: 'trip' }, enabled: true }
],
throttle: {
enabled: true,
windowMinutes: 30,
maxAlerts: 2
}
},
{
name: 'Cost Spike Detection',
description: 'Unusual increase in costs detected',
enabled: true,
category: 'cost',
severity: 'medium',
condition: {
type: 'anomaly',
metric: 'cost.costPerRequest',
operator: '>',
value: this.config.defaultThresholds.costSpike,
windowMinutes: 60
},
actions: [
{ type: 'log', config: { level: 'warn' }, enabled: true }
],
throttle: {
enabled: true,
windowMinutes: 120,
maxAlerts: 1
}
},
{
name: 'Quality Score Drop',
description: 'Quality scores have dropped significantly',
enabled: true,
category: 'quality',
severity: 'medium',
condition: {
type: 'threshold',
metric: 'quality.averageQualityScore',
operator: '<',
value: this.config.defaultThresholds.qualityDrop,
windowMinutes: 30,
aggregation: 'avg'
},
actions: [
{ type: 'log', config: { level: 'warn' }, enabled: true }
],
throttle: {
enabled: true,
windowMinutes: 60,
maxAlerts: 2
}
},
{
name: 'Memory Usage Critical',
description: 'Memory usage is critically high',
enabled: true,
category: 'performance',
severity: 'critical',
condition: {
type: 'threshold',
metric: 'resources.memoryUsage',
operator: '>',
value: 1024 * 1024 * 1024, // 1GB
windowMinutes: 5,
aggregation: 'avg'
},
actions: [
{ type: 'log', config: { level: 'error' }, enabled: true },
{ type: 'auto_scale', config: { action: 'restart' }, enabled: false }
],
throttle: {
enabled: true,
windowMinutes: 10,
maxAlerts: 1
}
}
];
defaultRules.forEach(rule => {
this.addRule(rule);
});
}
/**
* Add new alert rule
*/
addRule(rule) {
const id = `rule_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
const now = new Date().toISOString();
const fullRule = {
...rule,
id,
createdAt: now,
updatedAt: now,
triggeredCount: 0
};
this.rules.set(id, fullRule);
structuredLogger.info('Alert rule added', { ruleId: id, ruleName: rule.name });
return id;
}
/**
* Update existing alert rule
*/
updateRule(id, updates) {
const rule = this.rules.get(id);
if (!rule) {
return false;
}
const updatedRule = {
...rule,
...updates,
id: rule.id, // Preserve original ID
createdAt: rule.createdAt, // Preserve creation time
updatedAt: new Date().toISOString()
};
this.rules.set(id, updatedRule);
structuredLogger.info('Alert rule updated', { ruleId: id });
return true;
}
/**
* Remove alert rule
*/
removeRule(id) {
const removed = this.rules.delete(id);
if (removed) {
structuredLogger.info('Alert rule removed', { ruleId: id });
}
return removed;
}
/**
* Get all alert rules
*/
getRules() {
return Array.from(this.rules.values());
}
/**
* Start monitoring for alerts
*/
startMonitoring() {
setInterval(() => {
this.checkAlerts();
}, 30000); // Check every 30 seconds
}
/**
* Check all rules against current metrics
*/
async checkAlerts() {
if (!this.config.enabled)
return;
try {
for (const rule of this.rules.values()) {
if (!rule.enabled)
continue;
await this.evaluateRule(rule);
}
}
catch (error) {
structuredLogger.error('Alert checking failed', {}, error);
}
}
/**
* Evaluate a single alert rule
*/
async evaluateRule(rule) {
try {
// Get recent metrics for evaluation
const metrics = await this.getRecentMetrics(rule.condition.windowMinutes || 5);
if (metrics.length === 0)
return;
// Check if rule condition is met
const triggered = await this.evaluateCondition(rule.condition, metrics);
if (triggered) {
// Check throttling
if (this.isThrottled(rule)) {
return;
}
// Create and fire alert
await this.fireAlert(rule, metrics);
}
}
catch (error) {
structuredLogger.error('Rule evaluation failed', { ruleId: rule.id }, error);
}
}
/**
* Evaluate alert condition against metrics
*/
async evaluateCondition(condition, metrics) {
switch (condition.type) {
case 'threshold':
return this.evaluateThresholdCondition(condition, metrics);
case 'anomaly':
return this.evaluateAnomalyCondition(condition, metrics);
case 'trend':
return this.evaluateTrendCondition(condition, metrics);
case 'composite':
return this.evaluateCompositeCondition(condition, metrics);
default:
return false;
}
}
/**
* Evaluate threshold-based condition
*/
evaluateThresholdCondition(condition, metrics) {
const values = this.extractMetricValues(condition.metric, metrics);
if (values.length === 0)
return false;
let aggregatedValue;
switch (condition.aggregation) {
case 'avg':
aggregatedValue = values.reduce((sum, val) => sum + val, 0) / values.length;
break;
case 'sum':
aggregatedValue = values.reduce((sum, val) => sum + val, 0);
break;
case 'min':
aggregatedValue = Math.min(...values);
break;
case 'max':
aggregatedValue = Math.max(...values);
break;
case 'count':
aggregatedValue = values.length;
break;
default:
aggregatedValue = values[values.length - 1]; // Latest value
}
const threshold = typeof condition.value === 'number' ? condition.value : parseFloat(condition.value);
switch (condition.operator) {
case '>':
return aggregatedValue > threshold;
case '<':
return aggregatedValue < threshold;
case '>=':
return aggregatedValue >= threshold;
case '<=':
return aggregatedValue <= threshold;
case '=':
return Math.abs(aggregatedValue - threshold) < 0.001;
case '!=':
return Math.abs(aggregatedValue - threshold) >= 0.001;
default:
return false;
}
}
/**
* Evaluate anomaly-based condition
*/
evaluateAnomalyCondition(condition, metrics) {
if (!this.config.anomalyDetection.enabled)
return false;
const values = this.extractMetricValues(condition.metric, metrics);
if (values.length === 0)
return false;
const baseline = this.getAnomalyBaseline(condition.metric);
if (baseline.length < 10) {
// Not enough historical data
this.updateAnomalyBaseline(condition.metric, values);
return false;
}
const currentValue = values[values.length - 1];
const baselineAvg = baseline.reduce((sum, val) => sum + val, 0) / baseline.length;
const baselineStd = this.calculateStandardDeviation(baseline);
// Determine sensitivity threshold
const sensitivityMultiplier = {
low: 3,
medium: 2,
high: 1.5
}[this.config.anomalyDetection.sensitivity];
const threshold = baselineAvg + (sensitivityMultiplier * baselineStd);
const multiplier = typeof condition.value === 'number' ? condition.value : parseFloat(condition.value);
return currentValue > (threshold * multiplier);
}
/**
* Evaluate trend-based condition
*/
evaluateTrendCondition(condition, metrics) {
const values = this.extractMetricValues(condition.metric, metrics);
if (values.length < 3)
return false;
// Simple trend detection: compare recent values with older values
const recentValues = values.slice(-3);
const olderValues = values.slice(0, 3);
const recentAvg = recentValues.reduce((sum, val) => sum + val, 0) / recentValues.length;
const olderAvg = olderValues.reduce((sum, val) => sum + val, 0) / olderValues.length;
const changePercent = ((recentAvg - olderAvg) / olderAvg) * 100;
const threshold = typeof condition.value === 'number' ? condition.value : parseFloat(condition.value);
switch (condition.operator) {
case '>':
return changePercent > threshold;
case '<':
return changePercent < threshold;
default:
return false;
}
}
/**
* Evaluate composite condition
*/
async evaluateCompositeCondition(condition, metrics) {
if (!condition.conditions || condition.conditions.length === 0)
return false;
const results = await Promise.all(condition.conditions.map(subCondition => this.evaluateCondition(subCondition, metrics)));
switch (condition.logic) {
case 'AND':
return results.every(result => result);
case 'OR':
return results.some(result => result);
default:
return false;
}
}
/**
* Extract metric values from performance metrics
*/
extractMetricValues(metricPath, metrics) {
return metrics.map(metric => {
const value = this.getNestedValue(metric, metricPath);
return typeof value === 'number' ? value : 0;
}).filter(val => !isNaN(val));
}
/**
* Get nested value from object using dot notation
*/
getNestedValue(obj, path) {
return path.split('.').reduce((current, key) => current?.[key], obj);
}
/**
* Fire an alert
*/
async fireAlert(rule, metrics) {
const alertId = `alert_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
const now = new Date().toISOString();
// Update rule statistics
rule.triggeredCount++;
rule.lastTriggered = now;
rule.updatedAt = now;
// Create alert
const alert = {
id: alertId,
ruleId: rule.id,
ruleName: rule.name,
severity: rule.severity,
message: this.generateAlertMessage(rule, metrics),
details: {
condition: rule.condition,
metricValues: this.extractMetricValues(rule.condition.metric, metrics),
timestamp: now
},
timestamp: now,
resolved: false,
acknowledged: false
};
this.alerts.push(alert);
// Execute alert actions
await this.executeAlertActions(rule.actions, alert);
// Update throttling tracker
this.updateThrottleTracker(rule.id);
structuredLogger.warn('Alert fired', {
alertId: alert.id,
ruleId: rule.id,
ruleName: rule.name,
severity: rule.severity,
message: alert.message
});
}
/**
* Execute alert actions
*/
async executeAlertActions(actions, alert) {
for (const action of actions) {
if (!action.enabled)
continue;
try {
await this.executeAction(action, alert);
}
catch (error) {
structuredLogger.error('Alert action failed', {
alertId: alert.id,
actionType: action.type
}, error);
}
}
}
/**
* Execute individual alert action
*/
async executeAction(action, alert) {
switch (action.type) {
case 'log':
const level = action.config.level || 'warn';
if (level === 'error') {
structuredLogger.error('Alert triggered', { alertId: alert.id, severity: alert.severity, message: alert.message });
}
else if (level === 'warn') {
structuredLogger.warn('Alert triggered', { alertId: alert.id, severity: alert.severity, message: alert.message });
}
else if (level === 'info') {
structuredLogger.info('Alert triggered', { alertId: alert.id, severity: alert.severity, message: alert.message });
}
else {
structuredLogger.debug('Alert triggered', { alertId: alert.id, severity: alert.severity, message: alert.message });
}
break;
case 'webhook':
if (this.config.notifications.webhook) {
await this.sendWebhook(this.config.notifications.webhook, alert);
}
break;
case 'slack':
if (this.config.notifications.slack) {
await this.sendSlackNotification(this.config.notifications.slack, alert);
}
break;
case 'circuit_breaker':
if (action.config.action === 'trip') {
const { globalHealthMonitor } = await import('./health-monitor.js');
// globalHealthMonitor.reportApiError('openrouter', new Error('Alert triggered circuit breaker'));
structuredLogger.warn('Circuit breaker tripped by alert', { alertId: alert.id });
}
break;
case 'auto_scale':
// Placeholder for auto-scaling actions
structuredLogger.info('Auto-scale action triggered', { alertId: alert.id, action: action.config.action });
break;
}
}
/**
* Send webhook notification
*/
async sendWebhook(webhookConfig, alert) {
const payload = {
alertId: alert.id,
severity: alert.severity,
message: alert.message,
timestamp: alert.timestamp,
details: alert.details
};
const response = await fetch(webhookConfig.url, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
...webhookConfig.headers
},
body: JSON.stringify(payload)
});
if (!response.ok) {
throw new Error(`Webhook failed: ${response.status} ${response.statusText}`);
}
}
/**
* Send Slack notification
*/
async sendSlackNotification(slackConfig, alert) {
const color = {
low: '#36a64f',
medium: '#ff9500',
high: '#ff0000',
critical: '#8b0000'
}[alert.severity];
const payload = {
channel: slackConfig.channel,
username: 'Hive AI Monitoring',
icon_emoji: ':warning:',
attachments: [{
color,
title: `Alert: ${alert.ruleName}`,
text: alert.message,
fields: [
{ title: 'Severity', value: alert.severity.toUpperCase(), short: true },
{ title: 'Time', value: new Date(alert.timestamp).toLocaleString(), short: true }
],
footer: 'Hive AI',
ts: Math.floor(new Date(alert.timestamp).getTime() / 1000)
}]
};
const response = await fetch(slackConfig.webhookUrl, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(payload)
});
if (!response.ok) {
throw new Error(`Slack notification failed: ${response.status} ${response.statusText}`);
}
}
/**
* Check if rule is throttled
*/
isThrottled(rule) {
if (!rule.throttle.enabled)
return false;
const now = Date.now();
const windowMs = rule.throttle.windowMinutes * 60 * 1000;
const windowStart = now - windowMs;
const recentAlerts = this.throttleTracker.get(rule.id) || [];
const alertsInWindow = recentAlerts.filter(timestamp => timestamp > windowStart);
return alertsInWindow.length >= rule.throttle.maxAlerts;
}
/**
* Update throttle tracker
*/
updateThrottleTracker(ruleId) {
const now = Date.now();
const current = this.throttleTracker.get(ruleId) || [];
current.push(now);
// Keep only last 100 timestamps to prevent memory leaks
if (current.length > 100) {
current.splice(0, current.length - 100);
}
this.throttleTracker.set(ruleId, current);
}
/**
* Get recent metrics for evaluation
*/
async getRecentMetrics(windowMinutes) {
// This would fetch from the performance monitor or database
// For now, return empty array
return [];
}
/**
* Get anomaly baseline for metric
*/
getAnomalyBaseline(metric) {
return this.anomalyBaselines.get(metric) || [];
}
/**
* Update anomaly baseline
*/
updateAnomalyBaseline(metric, newValues) {
const current = this.anomalyBaselines.get(metric) || [];
const updated = [...current, ...newValues];
// Keep only recent values for baseline (last 1000 data points)
const maxBaseline = 1000;
if (updated.length > maxBaseline) {
updated.splice(0, updated.length - maxBaseline);
}
this.anomalyBaselines.set(metric, updated);
}
/**
* Calculate standard deviation
*/
calculateStandardDeviation(values) {
const avg = values.reduce((sum, val) => sum + val, 0) / values.length;
const variance = values.reduce((sum, val) => sum + Math.pow(val - avg, 2), 0) / values.length;
return Math.sqrt(variance);
}
/**
* Generate alert message
*/
generateAlertMessage(rule, metrics) {
const values = this.extractMetricValues(rule.condition.metric, metrics);
const currentValue = values[values.length - 1];
return `${rule.description}. Current value: ${currentValue?.toFixed(2) || 'N/A'}, Threshold: ${rule.condition.value}`;
}
/**
* Public API methods
*/
/**
* Acknowledge alert
*/
acknowledgeAlert(alertId, acknowledgedBy) {
const alert = this.alerts.find(a => a.id === alertId);
if (!alert)
return false;
alert.acknowledged = true;
alert.acknowledgedAt = new Date().toISOString();
alert.acknowledgedBy = acknowledgedBy;
structuredLogger.info('Alert acknowledged', { alertId, acknowledgedBy });
return true;
}
/**
* Resolve alert
*/
resolveAlert(alertId) {
const alert = this.alerts.find(a => a.id === alertId);
if (!alert)
return false;
alert.resolved = true;
alert.resolvedAt = new Date().toISOString();
structuredLogger.info('Alert resolved', { alertId });
return true;
}
/**
* Get all alerts
*/
getAlerts(filter) {
let filtered = [...this.alerts];
if (filter) {
if (filter.severity) {
filtered = filtered.filter(a => a.severity === filter.severity);
}
if (filter.resolved !== undefined) {
filtered = filtered.filter(a => a.resolved === filter.resolved);
}
if (filter.acknowledged !== undefined) {
filtered = filtered.filter(a => a.acknowledged === filter.acknowledged);
}
if (filter.since) {
const since = new Date(filter.since);
filtered = filtered.filter(a => new Date(a.timestamp) >= since);
}
}
return filtered.sort((a, b) => new Date(b.timestamp).getTime() - new Date(a.timestamp).getTime());
}
/**
* Get alerting statistics
*/
getAlertingStats() {
const enabledRules = Array.from(this.rules.values()).filter(r => r.enabled).length;
const unresolvedAlerts = this.alerts.filter(a => !a.resolved).length;
const last24h = new Date(Date.now() - 24 * 60 * 60 * 1000);
const alertsLast24h = this.alerts.filter(a => new Date(a.timestamp) >= last24h).length;
const alertsBySeverity = this.alerts.reduce((acc, alert) => {
acc[alert.severity] = (acc[alert.severity] || 0) + 1;
return acc;
}, {});
return {
totalRules: this.rules.size,
enabledRules,
totalAlerts: this.alerts.length,
alertsBySeverity,
unresolvedAlerts,
alertsLast24h
};
}
/**
* Update alerting configuration
*/
updateConfig(newConfig) {
this.config = { ...this.config, ...newConfig };
structuredLogger.info('Alerting configuration updated');
}
/**
* Get current configuration
*/
getConfig() {
return { ...this.config };
}
}
/**
* Global advanced alerting instance
*/
export const globalAdvancedAlerting = new AdvancedAlerting();
//# sourceMappingURL=advanced-alerting.js.map