devflow-ai
Version:
Enterprise-grade AI agent orchestration with swarm management UI dashboard
1,345 lines (1,104 loc) • 42.1 kB
text/typescript
/**
* Truth Alert Manager - Advanced alerting and notification system
*
* Provides comprehensive threshold monitoring, intelligent alert routing,
* escalation management, and automated remediation capabilities.
*/
import type { ILogger } from '../core/logger.js';
import type { IEventBus } from '../core/event-bus.js';
import type {
TruthMetric,
TruthAlert,
AlertThreshold,
AlertAction,
EscalationLevel,
TruthAlertType,
TruthTelemetryConfig,
} from './telemetry.js';
export interface AlertRule {
id: string;
name: string;
description: string;
enabled: boolean;
// Condition definition
metric: string;
operator: 'gt' | 'lt' | 'eq' | 'gte' | 'lte' | 'ne' | 'change' | 'rate';
threshold: number;
duration: number; // How long condition must persist (ms)
// Alert configuration
severity: 'info' | 'warning' | 'critical' | 'emergency';
category: TruthAlertType;
priority: number; // 1-10, higher is more urgent
// Context and filtering
filters: Record<string, any>;
conditions: AlertCondition[];
// Actions and escalation
actions: AlertAction[];
escalationPath: EscalationLevel[];
suppressions: AlertSuppression[];
// Metadata
tags: Record<string, string>;
createdBy: string;
createdAt: Date;
lastModified: Date;
}
export interface AlertCondition {
field: string;
operator: string;
value: any;
logicalOperator?: 'AND' | 'OR';
}
export interface AlertSuppression {
id: string;
condition: string;
startTime: Date;
endTime: Date;
reason: string;
createdBy: string;
}
export interface AlertChannel {
id: string;
name: string;
type: 'email' | 'slack' | 'webhook' | 'sms' | 'teams' | 'discord' | 'pagerduty';
config: Record<string, any>;
enabled: boolean;
filters: AlertFilter[];
rateLimits: RateLimit[];
}
export interface AlertFilter {
field: string;
operator: string;
values: string[];
action: 'include' | 'exclude';
}
export interface RateLimit {
window: number; // time window in milliseconds
maxAlerts: number;
resetTime?: Date;
currentCount: number;
}
export interface AlertHistory {
alertId: string;
timestamp: Date;
action: 'created' | 'acknowledged' | 'resolved' | 'escalated' | 'suppressed';
actor: string;
details: Record<string, any>;
}
export interface AlertStatistics {
totalAlerts: number;
activeAlerts: number;
resolvedAlerts: number;
averageResolutionTime: number;
alertsByType: Record<string, number>;
alertsBySeverity: Record<string, number>;
topAlertSources: Array<{ source: string; count: number }>;
escalationRate: number;
falsePositiveRate: number;
}
export interface ThresholdGroup {
name: string;
thresholds: AlertThreshold[];
enabled: boolean;
scope: 'system' | 'agent' | 'task' | 'custom';
}
export class TruthAlertManager {
private config: TruthTelemetryConfig;
private logger: ILogger;
private eventBus: IEventBus;
// Alert management state
private alertRules = new Map<string, AlertRule>();
private activeAlerts = new Map<string, TruthAlert>();
private alertHistory: AlertHistory[] = [];
private alertChannels = new Map<string, AlertChannel>();
private suppressions = new Map<string, AlertSuppression>();
// Threshold monitoring
private thresholdGroups = new Map<string, ThresholdGroup>();
private metricStates = new Map<string, MetricState>();
// Processing state
private processingInterval?: NodeJS.Timeout;
private escalationInterval?: NodeJS.Timeout;
private cleanupInterval?: NodeJS.Timeout;
// Statistics
private statistics: AlertStatistics;
constructor(
config: TruthTelemetryConfig,
logger: ILogger,
eventBus: IEventBus
) {
this.config = config;
this.logger = logger;
this.eventBus = eventBus;
this.statistics = this.initializeStatistics();
this.initializeDefaultRules();
this.initializeDefaultChannels();
}
async initialize(): Promise<void> {
this.logger.info('Initializing Truth Alert Manager', {
alertEnabled: this.config.alertEnabled,
thresholds: this.config.alertThresholds,
});
// Start processing loops
this.startAlertProcessing();
this.startEscalationProcessing();
this.startCleanupProcessing();
// Load configuration
await this.loadAlertConfiguration();
this.logger.info('Truth Alert Manager initialized successfully', {
rules: this.alertRules.size,
channels: this.alertChannels.size,
});
}
async shutdown(): Promise<void> {
this.logger.info('Shutting down Truth Alert Manager');
// Stop processing intervals
if (this.processingInterval) clearInterval(this.processingInterval);
if (this.escalationInterval) clearInterval(this.escalationInterval);
if (this.cleanupInterval) clearInterval(this.cleanupInterval);
// Save state
await this.saveAlertConfiguration();
this.logger.info('Truth Alert Manager shutdown complete');
}
// ========================================================================================
// Alert Processing
// ========================================================================================
async checkThresholds(metric: TruthMetric): Promise<void> {
if (!this.config.alertEnabled) return;
try {
// Update metric state
await this.updateMetricState(metric);
// Check all applicable rules
const applicableRules = this.getApplicableRules(metric);
for (const rule of applicableRules) {
await this.evaluateRule(rule, metric);
}
} catch (error) {
this.logger.error('Error checking thresholds', { metricId: metric.id, error });
}
}
private async updateMetricState(metric: TruthMetric): Promise<void> {
const key = `${metric.agentId}:${metric.metricType}`;
let state = this.metricStates.get(key);
if (!state) {
state = {
key,
currentValue: metric.value,
previousValue: metric.value,
lastUpdate: metric.timestamp,
changeRate: 0,
samples: [metric.value],
thresholdViolations: new Map(),
};
this.metricStates.set(key, state);
} else {
// Update state
state.previousValue = state.currentValue;
state.currentValue = metric.value;
state.lastUpdate = metric.timestamp;
// Calculate change rate (per hour)
const timeDiff = metric.timestamp.getTime() - state.lastUpdate.getTime();
if (timeDiff > 0) {
const valueDiff = metric.value - state.previousValue;
state.changeRate = (valueDiff / timeDiff) * (60 * 60 * 1000); // per hour
}
// Update samples (keep last 100)
state.samples.push(metric.value);
if (state.samples.length > 100) {
state.samples = state.samples.slice(-100);
}
}
}
private getApplicableRules(metric: TruthMetric): AlertRule[] {
return Array.from(this.alertRules.values()).filter(rule => {
if (!rule.enabled) return false;
// Check if rule applies to this metric
if (rule.metric !== metric.metricType && rule.metric !== '*') return false;
// Check filters
if (!this.matchesFilters(metric, rule.filters)) return false;
// Check conditions
if (!this.matchesConditions(metric, rule.conditions)) return false;
return true;
});
}
private async evaluateRule(rule: AlertRule, metric: TruthMetric): Promise<void> {
const key = `${metric.agentId}:${metric.metricType}`;
const state = this.metricStates.get(key);
if (!state) return;
// Evaluate threshold condition
const conditionMet = this.evaluateCondition(rule, metric, state);
if (conditionMet) {
await this.handleThresholdViolation(rule, metric, state);
} else {
await this.handleThresholdClearance(rule, metric, state);
}
}
private evaluateCondition(rule: AlertRule, metric: TruthMetric, state: MetricState): boolean {
const value = metric.value;
const threshold = rule.threshold;
switch (rule.operator) {
case 'gt': return value > threshold;
case 'gte': return value >= threshold;
case 'lt': return value < threshold;
case 'lte': return value <= threshold;
case 'eq': return value === threshold;
case 'ne': return value !== threshold;
case 'change': return Math.abs(value - state.previousValue) > threshold;
case 'rate': return Math.abs(state.changeRate) > threshold;
default: return false;
}
}
private async handleThresholdViolation(
rule: AlertRule,
metric: TruthMetric,
state: MetricState
): Promise<void> {
const violationKey = `${rule.id}:${state.key}`;
let violation = state.thresholdViolations.get(violationKey);
if (!violation) {
// New violation
violation = {
ruleId: rule.id,
startTime: metric.timestamp,
lastSeen: metric.timestamp,
count: 1,
alertId: null,
};
state.thresholdViolations.set(violationKey, violation);
} else {
// Existing violation
violation.lastSeen = metric.timestamp;
violation.count++;
}
// Check if violation has persisted long enough
const duration = metric.timestamp.getTime() - violation.startTime.getTime();
if (duration >= rule.duration && !violation.alertId) {
// Create alert
const alertId = await this.createAlert(rule, metric, violation);
violation.alertId = alertId;
}
}
private async handleThresholdClearance(
rule: AlertRule,
metric: TruthMetric,
state: MetricState
): Promise<void> {
const violationKey = `${rule.id}:${state.key}`;
const violation = state.thresholdViolations.get(violationKey);
if (violation && violation.alertId) {
// Resolve the alert
await this.resolveAlert(violation.alertId, 'threshold_cleared');
}
// Clear violation
state.thresholdViolations.delete(violationKey);
}
private async createAlert(
rule: AlertRule,
metric: TruthMetric,
violation: ThresholdViolation
): Promise<string> {
const alertId = `alert-${Date.now()}-${Math.random().toString(36).slice(2)}`;
const alert: TruthAlert = {
id: alertId,
timestamp: new Date(),
severity: rule.severity,
type: rule.category,
message: this.generateAlertMessage(rule, metric),
source: `agent:${metric.agentId}`,
context: {
ruleId: rule.id,
ruleName: rule.name,
metricType: metric.metricType,
metricValue: metric.value,
threshold: rule.threshold,
operator: rule.operator,
duration: violation.lastSeen.getTime() - violation.startTime.getTime(),
violationCount: violation.count,
agentId: metric.agentId,
taskId: metric.taskId,
...metric.context,
},
thresholds: [
{
metric: rule.metric,
operator: rule.operator,
value: rule.threshold,
duration: rule.duration,
severity: rule.severity,
},
],
actions: [...rule.actions],
escalationPath: [...rule.escalationPath],
resolved: false,
};
this.activeAlerts.set(alertId, alert);
// Update statistics
this.statistics.totalAlerts++;
this.statistics.activeAlerts++;
this.statistics.alertsByType[alert.type] = (this.statistics.alertsByType[alert.type] || 0) + 1;
this.statistics.alertsBySeverity[alert.severity] = (this.statistics.alertsBySeverity[alert.severity] || 0) + 1;
// Log alert creation
this.logger.warn('Truth alert created', {
alertId,
rule: rule.name,
metric: metric.metricType,
value: metric.value,
threshold: rule.threshold,
agent: metric.agentId,
});
// Add to history
this.addToHistory(alertId, 'created', 'system', { rule: rule.name });
// Emit event
this.eventBus.emit('truth-alert:created', { alert, rule, metric });
// Execute alert actions
await this.executeAlertActions(alert);
return alertId;
}
async executeAlertActions(alert: TruthAlert): Promise<void> {
for (const action of alert.actions) {
if (!action.enabled) continue;
try {
await this.executeAction(alert, action);
} catch (error) {
this.logger.error('Failed to execute alert action', {
alertId: alert.id,
actionType: action.type,
error,
});
}
}
}
private async executeAction(alert: TruthAlert, action: AlertAction): Promise<void> {
switch (action.type) {
case 'notify':
await this.sendNotification(alert, action);
break;
case 'escalate':
await this.escalateAlert(alert, action);
break;
case 'auto-remediate':
await this.autoRemediate(alert, action);
break;
case 'suspend':
await this.suspendAgent(alert, action);
break;
case 'restart':
await this.restartAgent(alert, action);
break;
default:
this.logger.warn('Unknown alert action type', { type: action.type });
}
}
// ========================================================================================
// Notification System
// ========================================================================================
private async sendNotification(alert: TruthAlert, action: AlertAction): Promise<void> {
const channelIds = action.config.channels || ['default'];
for (const channelId of channelIds) {
const channel = this.alertChannels.get(channelId);
if (!channel || !channel.enabled) continue;
// Check filters
if (!this.alertMatchesChannelFilters(alert, channel)) continue;
// Check rate limits
if (!this.checkRateLimit(channel, alert)) continue;
await this.sendToChannel(alert, channel);
}
}
private async sendToChannel(alert: TruthAlert, channel: AlertChannel): Promise<void> {
try {
switch (channel.type) {
case 'email':
await this.sendEmailNotification(alert, channel);
break;
case 'slack':
await this.sendSlackNotification(alert, channel);
break;
case 'webhook':
await this.sendWebhookNotification(alert, channel);
break;
case 'teams':
await this.sendTeamsNotification(alert, channel);
break;
case 'discord':
await this.sendDiscordNotification(alert, channel);
break;
case 'pagerduty':
await this.sendPagerDutyNotification(alert, channel);
break;
default:
this.logger.warn('Unknown channel type', { type: channel.type });
}
this.logger.info('Alert notification sent', {
alertId: alert.id,
channel: channel.name,
type: channel.type,
});
} catch (error) {
this.logger.error('Failed to send notification', {
alertId: alert.id,
channel: channel.name,
error,
});
}
}
// ========================================================================================
// Escalation Management
// ========================================================================================
private async escalateAlert(alert: TruthAlert, action: AlertAction): Promise<void> {
const escalationLevel = action.config.level || 1;
if (escalationLevel <= alert.escalationLevel) return; // Already escalated
alert.escalationLevel = escalationLevel;
// Find escalation configuration
const escalation = alert.escalationPath.find(e => e.level === escalationLevel);
if (!escalation) return;
// Check escalation conditions
if (!this.checkEscalationConditions(alert, escalation)) return;
// Update alert severity if needed
if (escalationLevel > 1) {
alert.severity = this.getEscalatedSeverity(alert.severity);
}
// Send escalation notifications
await this.sendEscalationNotifications(alert, escalation);
// Add to history
this.addToHistory(alert.id, 'escalated', 'system', {
level: escalationLevel,
targets: escalation.targets,
});
// Update statistics
this.statistics.escalationRate = this.calculateEscalationRate();
this.logger.warn('Alert escalated', {
alertId: alert.id,
level: escalationLevel,
severity: alert.severity,
});
this.eventBus.emit('truth-alert:escalated', { alert, escalationLevel });
}
private async sendEscalationNotifications(
alert: TruthAlert,
escalation: EscalationLevel
): Promise<void> {
for (const target of escalation.targets) {
const channel = this.alertChannels.get(target);
if (channel && channel.enabled) {
await this.sendToChannel(alert, channel);
}
}
}
// ========================================================================================
// Auto-Remediation
// ========================================================================================
private async autoRemediate(alert: TruthAlert, action: AlertAction): Promise<void> {
const remediationType = action.config.type;
try {
switch (remediationType) {
case 'restart_agent':
await this.restartAgent(alert, action);
break;
case 'scale_resources':
await this.scaleResources(alert, action);
break;
case 'adjust_thresholds':
await this.adjustThresholds(alert, action);
break;
case 'redistribute_load':
await this.redistributeLoad(alert, action);
break;
case 'failover':
await this.initiateFailover(alert, action);
break;
default:
this.logger.warn('Unknown remediation type', { type: remediationType });
}
this.logger.info('Auto-remediation executed', {
alertId: alert.id,
type: remediationType,
});
} catch (error) {
this.logger.error('Auto-remediation failed', {
alertId: alert.id,
type: remediationType,
error,
});
}
}
private async restartAgent(alert: TruthAlert, action: AlertAction): Promise<void> {
const agentId = alert.context.agentId;
if (!agentId) return;
this.eventBus.emit('agent:restart-requested', {
agentId,
reason: 'automated_remediation',
alertId: alert.id,
});
}
private async suspendAgent(alert: TruthAlert, action: AlertAction): Promise<void> {
const agentId = alert.context.agentId;
if (!agentId) return;
this.eventBus.emit('agent:suspend-requested', {
agentId,
duration: action.config.duration || 300000, // 5 minutes default
reason: 'automated_remediation',
alertId: alert.id,
});
}
private async scaleResources(alert: TruthAlert, action: AlertAction): Promise<void> {
this.eventBus.emit('system:scale-requested', {
direction: action.config.direction || 'up',
factor: action.config.factor || 1.5,
reason: 'automated_remediation',
alertId: alert.id,
});
}
private async adjustThresholds(alert: TruthAlert, action: AlertAction): Promise<void> {
const ruleId = alert.context.ruleId;
const rule = this.alertRules.get(ruleId);
if (rule) {
const adjustment = action.config.adjustment || 0.1;
rule.threshold = rule.threshold + (rule.threshold * adjustment);
this.logger.info('Alert threshold adjusted', {
ruleId,
oldThreshold: rule.threshold - (rule.threshold * adjustment),
newThreshold: rule.threshold,
});
}
}
private async redistributeLoad(alert: TruthAlert, action: AlertAction): Promise<void> {
this.eventBus.emit('load-balancer:redistribute', {
reason: 'automated_remediation',
alertId: alert.id,
excludeAgents: [alert.context.agentId],
});
}
private async initiateFailover(alert: TruthAlert, action: AlertAction): Promise<void> {
this.eventBus.emit('system:failover-requested', {
primaryAgent: alert.context.agentId,
reason: 'automated_remediation',
alertId: alert.id,
});
}
// ========================================================================================
// Alert Resolution
// ========================================================================================
async resolveAlert(alertId: string, reason: string, resolvedBy?: string): Promise<boolean> {
const alert = this.activeAlerts.get(alertId);
if (!alert || alert.resolved) return false;
alert.resolved = true;
alert.resolvedAt = new Date();
alert.resolvedBy = resolvedBy || 'system';
alert.context.resolutionReason = reason;
// Update statistics
this.statistics.activeAlerts--;
this.statistics.resolvedAlerts++;
// Calculate resolution time
const resolutionTime = alert.resolvedAt.getTime() - alert.timestamp.getTime();
this.updateAverageResolutionTime(resolutionTime);
// Add to history
this.addToHistory(alertId, 'resolved', resolvedBy || 'system', { reason });
this.logger.info('Alert resolved', {
alertId,
reason,
resolvedBy: resolvedBy || 'system',
duration: resolutionTime,
});
this.eventBus.emit('truth-alert:resolved', { alert, reason, resolvedBy });
return true;
}
async acknowledgeAlert(alertId: string, acknowledgedBy: string, comment?: string): Promise<boolean> {
const alert = this.activeAlerts.get(alertId);
if (!alert || alert.resolved) return false;
alert.acknowledged = true;
alert.acknowledgedAt = new Date();
alert.acknowledgedBy = acknowledgedBy;
if (comment) {
alert.context.acknowledgmentComment = comment;
}
// Add to history
this.addToHistory(alertId, 'acknowledged', acknowledgedBy, { comment });
this.logger.info('Alert acknowledged', {
alertId,
acknowledgedBy,
comment,
});
this.eventBus.emit('truth-alert:acknowledged', { alert, acknowledgedBy, comment });
return true;
}
// ========================================================================================
// Rule Management
// ========================================================================================
createAlertRule(rule: Omit<AlertRule, 'id' | 'createdAt' | 'lastModified'>): string {
const ruleId = `rule-${Date.now()}-${Math.random().toString(36).slice(2)}`;
const fullRule: AlertRule = {
...rule,
id: ruleId,
createdAt: new Date(),
lastModified: new Date(),
};
this.alertRules.set(ruleId, fullRule);
this.logger.info('Alert rule created', {
ruleId,
name: rule.name,
metric: rule.metric,
threshold: rule.threshold,
});
return ruleId;
}
updateAlertRule(ruleId: string, updates: Partial<AlertRule>): boolean {
const rule = this.alertRules.get(ruleId);
if (!rule) return false;
const updatedRule = {
...rule,
...updates,
id: ruleId, // Prevent ID changes
lastModified: new Date(),
};
this.alertRules.set(ruleId, updatedRule);
this.logger.info('Alert rule updated', { ruleId, updates });
return true;
}
deleteAlertRule(ruleId: string): boolean {
const deleted = this.alertRules.delete(ruleId);
if (deleted) {
this.logger.info('Alert rule deleted', { ruleId });
}
return deleted;
}
// ========================================================================================
// Processing Loops
// ========================================================================================
private startAlertProcessing(): void {
this.processingInterval = setInterval(() => {
this.processActiveAlerts();
}, 30000); // Every 30 seconds
this.logger.info('Started alert processing');
}
private startEscalationProcessing(): void {
this.escalationInterval = setInterval(() => {
this.processEscalations();
}, 60000); // Every minute
this.logger.info('Started escalation processing');
}
private startCleanupProcessing(): void {
this.cleanupInterval = setInterval(() => {
this.cleanupResolvedAlerts();
this.cleanupOldHistory();
this.resetRateLimits();
}, 300000); // Every 5 minutes
this.logger.info('Started cleanup processing');
}
async processAlert(alert: TruthAlert): Promise<void> {
// Check for auto-resolution conditions
await this.checkAutoResolution(alert);
// Check for escalation conditions
await this.checkEscalation(alert);
// Update alert statistics
this.updateAlertStatistics(alert);
}
private async processActiveAlerts(): Promise<void> {
for (const alert of this.activeAlerts.values()) {
if (!alert.resolved) {
await this.processAlert(alert);
}
}
}
private async processEscalations(): Promise<void> {
for (const alert of this.activeAlerts.values()) {
if (!alert.resolved && !alert.acknowledged) {
await this.checkEscalation(alert);
}
}
}
private async checkAutoResolution(alert: TruthAlert): Promise<void> {
// Check if the underlying condition has been resolved
const currentValue = await this.getCurrentMetricValue(alert);
if (currentValue === null) return;
const rule = this.alertRules.get(alert.context.ruleId);
if (!rule) return;
// Check if condition is no longer met
const conditionMet = this.evaluateConditionValue(rule, currentValue);
if (!conditionMet) {
await this.resolveAlert(alert.id, 'condition_resolved');
}
}
private async checkEscalation(alert: TruthAlert): Promise<void> {
const now = Date.now();
const alertAge = now - alert.timestamp.getTime();
// Find next escalation level
const nextEscalation = alert.escalationPath.find(
e => e.level > alert.escalationLevel
);
if (nextEscalation && alertAge >= nextEscalation.delay) {
await this.escalateAlert(alert, {
type: 'escalate',
target: 'escalation',
config: { level: nextEscalation.level },
enabled: true,
});
}
}
// ========================================================================================
// Utility Methods
// ========================================================================================
private generateAlertMessage(rule: AlertRule, metric: TruthMetric): string {
return `${rule.name}: ${metric.metricType} ${rule.operator} ${rule.threshold} ` +
`(current: ${metric.value.toFixed(3)}) for agent ${metric.agentId}`;
}
private matchesFilters(metric: TruthMetric, filters: Record<string, any>): boolean {
for (const [key, value] of Object.entries(filters)) {
const metricValue = this.getMetricProperty(metric, key);
if (metricValue !== value) return false;
}
return true;
}
private matchesConditions(metric: TruthMetric, conditions: AlertCondition[]): boolean {
if (conditions.length === 0) return true;
// Simple AND/OR logic evaluation
let result = true;
let currentOperator = 'AND';
for (const condition of conditions) {
const metricValue = this.getMetricProperty(metric, condition.field);
const conditionResult = this.evaluateConditionValue(condition, metricValue);
if (currentOperator === 'AND') {
result = result && conditionResult;
} else {
result = result || conditionResult;
}
currentOperator = condition.logicalOperator || 'AND';
}
return result;
}
private getMetricProperty(metric: TruthMetric, path: string): any {
const parts = path.split('.');
let value: any = metric;
for (const part of parts) {
value = value?.[part];
}
return value;
}
private evaluateConditionValue(condition: any, value: any): boolean {
switch (condition.operator) {
case 'gt': return value > condition.value;
case 'gte': return value >= condition.value;
case 'lt': return value < condition.value;
case 'lte': return value <= condition.value;
case 'eq': return value === condition.value;
case 'ne': return value !== condition.value;
case 'contains': return String(value).includes(condition.value);
case 'startsWith': return String(value).startsWith(condition.value);
case 'endsWith': return String(value).endsWith(condition.value);
default: return false;
}
}
private alertMatchesChannelFilters(alert: TruthAlert, channel: AlertChannel): boolean {
for (const filter of channel.filters) {
const alertValue = this.getMetricProperty(alert, filter.field);
const matches = filter.values.includes(String(alertValue));
if (filter.action === 'include' && !matches) return false;
if (filter.action === 'exclude' && matches) return false;
}
return true;
}
private checkRateLimit(channel: AlertChannel, alert: TruthAlert): boolean {
for (const rateLimit of channel.rateLimits) {
const now = Date.now();
// Reset if window has passed
if (rateLimit.resetTime && now > rateLimit.resetTime.getTime()) {
rateLimit.currentCount = 0;
rateLimit.resetTime = new Date(now + rateLimit.window);
}
// Initialize if needed
if (!rateLimit.resetTime) {
rateLimit.resetTime = new Date(now + rateLimit.window);
rateLimit.currentCount = 0;
}
// Check limit
if (rateLimit.currentCount >= rateLimit.maxAlerts) {
return false;
}
// Increment counter
rateLimit.currentCount++;
}
return true;
}
private checkEscalationConditions(alert: TruthAlert, escalation: EscalationLevel): boolean {
// Simple condition evaluation
for (const condition of escalation.conditions) {
// This would typically parse and evaluate complex conditions
// For now, using simple checks
if (condition.includes('unacknowledged') && alert.acknowledged) return false;
if (condition.includes('duration') && !this.checkDurationCondition(alert, condition)) return false;
}
return true;
}
private checkDurationCondition(alert: TruthAlert, condition: string): boolean {
// Parse condition like "duration > 30m"
const match = condition.match(/duration\s*([><=]+)\s*(\d+)([mhs])/);
if (!match) return true;
const operator = match[1];
const value = parseInt(match[2]);
const unit = match[3];
const multiplier = unit === 's' ? 1000 : unit === 'm' ? 60000 : 3600000; // hours
const thresholdMs = value * multiplier;
const durationMs = Date.now() - alert.timestamp.getTime();
switch (operator) {
case '>': return durationMs > thresholdMs;
case '>=': return durationMs >= thresholdMs;
case '<': return durationMs < thresholdMs;
case '<=': return durationMs <= thresholdMs;
case '=': return Math.abs(durationMs - thresholdMs) < 1000;
default: return true;
}
}
private getEscalatedSeverity(currentSeverity: string): 'info' | 'warning' | 'critical' | 'emergency' {
switch (currentSeverity) {
case 'info': return 'warning';
case 'warning': return 'critical';
case 'critical': return 'emergency';
default: return 'emergency';
}
}
private async getCurrentMetricValue(alert: TruthAlert): Promise<number | null> {
// This would typically query the current metric value
// For now, returning null to indicate unavailable
return null;
}
private addToHistory(alertId: string, action: string, actor: string, details: Record<string, any>): void {
this.alertHistory.push({
alertId,
timestamp: new Date(),
action: action as any,
actor,
details,
});
// Keep history size manageable
if (this.alertHistory.length > 10000) {
this.alertHistory = this.alertHistory.slice(-5000);
}
}
private cleanupResolvedAlerts(): void {
const cutoff = new Date(Date.now() - 24 * 60 * 60 * 1000); // 24 hours
for (const [alertId, alert] of this.activeAlerts) {
if (alert.resolved && alert.resolvedAt && alert.resolvedAt < cutoff) {
this.activeAlerts.delete(alertId);
}
}
}
private cleanupOldHistory(): void {
const cutoff = new Date(Date.now() - 7 * 24 * 60 * 60 * 1000); // 7 days
this.alertHistory = this.alertHistory.filter(h => h.timestamp >= cutoff);
}
private resetRateLimits(): void {
const now = Date.now();
for (const channel of this.alertChannels.values()) {
for (const rateLimit of channel.rateLimits) {
if (rateLimit.resetTime && now > rateLimit.resetTime.getTime()) {
rateLimit.currentCount = 0;
rateLimit.resetTime = new Date(now + rateLimit.window);
}
}
}
}
private updateAlertStatistics(alert: TruthAlert): void {
// Update top alert sources
const source = alert.source;
const existing = this.statistics.topAlertSources.find(s => s.source === source);
if (existing) {
existing.count++;
} else {
this.statistics.topAlertSources.push({ source, count: 1 });
}
// Sort and limit
this.statistics.topAlertSources.sort((a, b) => b.count - a.count);
this.statistics.topAlertSources = this.statistics.topAlertSources.slice(0, 10);
}
private updateAverageResolutionTime(resolutionTime: number): void {
const count = this.statistics.resolvedAlerts;
const currentAvg = this.statistics.averageResolutionTime;
this.statistics.averageResolutionTime =
((currentAvg * (count - 1)) + resolutionTime) / count;
}
private calculateEscalationRate(): number {
const escalatedAlerts = this.alertHistory.filter(h => h.action === 'escalated').length;
return this.statistics.totalAlerts > 0 ? escalatedAlerts / this.statistics.totalAlerts : 0;
}
// ========================================================================================
// Notification Implementations (Placeholders)
// ========================================================================================
private async sendEmailNotification(alert: TruthAlert, channel: AlertChannel): Promise<void> {
// Implementation would send actual email
this.logger.info('Email notification sent', { alertId: alert.id, to: channel.config.to });
}
private async sendSlackNotification(alert: TruthAlert, channel: AlertChannel): Promise<void> {
// Implementation would send to Slack webhook
this.logger.info('Slack notification sent', { alertId: alert.id, webhook: channel.config.webhook });
}
private async sendWebhookNotification(alert: TruthAlert, channel: AlertChannel): Promise<void> {
// Implementation would POST to webhook URL
this.logger.info('Webhook notification sent', { alertId: alert.id, url: channel.config.url });
}
private async sendTeamsNotification(alert: TruthAlert, channel: AlertChannel): Promise<void> {
// Implementation would send to Teams webhook
this.logger.info('Teams notification sent', { alertId: alert.id, webhook: channel.config.webhook });
}
private async sendDiscordNotification(alert: TruthAlert, channel: AlertChannel): Promise<void> {
// Implementation would send to Discord webhook
this.logger.info('Discord notification sent', { alertId: alert.id, webhook: channel.config.webhook });
}
private async sendPagerDutyNotification(alert: TruthAlert, channel: AlertChannel): Promise<void> {
// Implementation would create PagerDuty incident
this.logger.info('PagerDuty notification sent', { alertId: alert.id, serviceKey: channel.config.serviceKey });
}
// ========================================================================================
// Configuration Management
// ========================================================================================
private async loadAlertConfiguration(): Promise<void> {
// Placeholder for loading configuration from storage
this.logger.debug('Loading alert configuration');
}
private async saveAlertConfiguration(): Promise<void> {
// Placeholder for saving configuration to storage
this.logger.debug('Saving alert configuration');
}
private initializeDefaultRules(): void {
// Create default alert rules
const defaultRules: Omit<AlertRule, 'id' | 'createdAt' | 'lastModified'>[] = [
{
name: 'Low Truth Accuracy',
description: 'Alert when agent accuracy falls below threshold',
enabled: true,
metric: 'accuracy',
operator: 'lt',
threshold: this.config.alertThresholds.accuracyThreshold,
duration: 300000, // 5 minutes
severity: 'critical',
category: 'accuracy_degradation',
priority: 8,
filters: {},
conditions: [],
actions: [
{ type: 'notify', target: 'default', config: { channels: ['default'] }, enabled: true },
],
escalationPath: [
{ level: 1, delay: 900000, targets: ['critical'], conditions: ['unacknowledged'] }, // 15 minutes
],
suppressions: [],
tags: { category: 'accuracy', auto_created: 'true' },
createdBy: 'system',
},
{
name: 'High Human Intervention Rate',
description: 'Alert when human intervention rate exceeds threshold',
enabled: true,
metric: '*',
operator: 'gt',
threshold: this.config.alertThresholds.interventionRateThreshold,
duration: 600000, // 10 minutes
severity: 'warning',
category: 'high_intervention_rate',
priority: 6,
filters: { 'context.verificationMethod': 'human' },
conditions: [],
actions: [
{ type: 'notify', target: 'default', config: { channels: ['default'] }, enabled: true },
],
escalationPath: [],
suppressions: [],
tags: { category: 'efficiency', auto_created: 'true' },
createdBy: 'system',
},
];
defaultRules.forEach(rule => this.createAlertRule(rule));
}
private initializeDefaultChannels(): void {
// Create default notification channels
const defaultChannels: Omit<AlertChannel, 'id'>[] = [
{
name: 'Default Log Channel',
type: 'email',
config: { to: 'admin@example.com' },
enabled: true,
filters: [],
rateLimits: [
{ window: 300000, maxAlerts: 10, currentCount: 0 }, // 10 alerts per 5 minutes
],
},
{
name: 'Critical Alerts',
type: 'slack',
config: { webhook: 'https://hooks.slack.com/services/...' },
enabled: false, // Disabled until configured
filters: [
{ field: 'severity', operator: 'eq', values: ['critical', 'emergency'], action: 'include' },
],
rateLimits: [
{ window: 60000, maxAlerts: 5, currentCount: 0 }, // 5 alerts per minute
],
},
];
defaultChannels.forEach(channel => {
const channelId = `channel-${Date.now()}-${Math.random().toString(36).slice(2)}`;
this.alertChannels.set(channelId, { ...channel, id: channelId });
});
}
private initializeStatistics(): AlertStatistics {
return {
totalAlerts: 0,
activeAlerts: 0,
resolvedAlerts: 0,
averageResolutionTime: 0,
alertsByType: {},
alertsBySeverity: {},
topAlertSources: [],
escalationRate: 0,
falsePositiveRate: 0,
};
}
// ========================================================================================
// Public API
// ========================================================================================
getActiveAlerts(): TruthAlert[] {
return Array.from(this.activeAlerts.values()).filter(a => !a.resolved);
}
getAlert(alertId: string): TruthAlert | undefined {
return this.activeAlerts.get(alertId);
}
getAlertHistory(alertId?: string, limit: number = 100): AlertHistory[] {
let history = this.alertHistory;
if (alertId) {
history = history.filter(h => h.alertId === alertId);
}
return history.slice(-limit);
}
getAlertRules(): AlertRule[] {
return Array.from(this.alertRules.values());
}
getAlertChannels(): AlertChannel[] {
return Array.from(this.alertChannels.values());
}
getStatistics(): AlertStatistics {
return { ...this.statistics };
}
createSuppression(alertId: string, suppression: Omit<AlertSuppression, 'id'>): string {
const suppressionId = `suppression-${Date.now()}`;
this.suppressions.set(suppressionId, { ...suppression, id: suppressionId });
return suppressionId;
}
removeSuppression(suppressionId: string): boolean {
return this.suppressions.delete(suppressionId);
}
}
// ========================================================================================
// Supporting Interfaces
// ========================================================================================
interface MetricState {
key: string;
currentValue: number;
previousValue: number;
lastUpdate: Date;
changeRate: number;
samples: number[];
thresholdViolations: Map<string, ThresholdViolation>;
}
interface ThresholdViolation {
ruleId: string;
startTime: Date;
lastSeen: Date;
count: number;
alertId: string | null;
}