UNPKG

recoder-analytics

Version:

Comprehensive analytics and monitoring for the Recoder.xyz ecosystem

606 lines 23.6 kB
"use strict"; /** * Alert Escalation Policies & Management * * Manages alert escalation with configurable policies, time-based escalation, * on-call rotations, and automatic escalation based on severity and response times. */ Object.defineProperty(exports, "__esModule", { value: true }); exports.escalationPolicies = exports.EscalationPolicies = void 0; const shared_1 = require("@recoder/shared"); const events_1 = require("events"); class EscalationPolicies extends events_1.EventEmitter { constructor() { super(); this.policies = new Map(); this.onCallSchedules = new Map(); this.activeEscalations = new Map(); this.config = { checkInterval: 30000, // Check escalations every 30 seconds maxEscalationLevel: 5, // Maximum escalation levels defaultEscalationDelay: 15, // Default delay in minutes acknowledgmentTimeout: 300, // 5 minutes to acknowledge }; this.escalationTimer = null; this.isRunning = false; this.initializeDefaultPolicies(); this.initializeOnCallSchedules(); } initializeDefaultPolicies() { const defaultPolicies = [ { id: 'default_critical', name: 'Critical Alert Escalation', description: 'Standard escalation for critical alerts', enabled: true, rules: [ { level: 1, delayMinutes: 0, notificationChannels: ['slack_alerts', 'email_oncall'], recipients: [ { type: 'oncall', id: 'primary_oncall', name: 'Primary On-Call', contact: { email: 'oncall@recoder.xyz', slack: '@oncall' } } ], actions: [ { type: 'notify', config: { urgency: 'high', method: ['email', 'slack'] } } ], conditions: { onlyIfNotAcknowledged: true } }, { level: 2, delayMinutes: 15, notificationChannels: ['slack_alerts', 'email_oncall'], recipients: [ { type: 'team', id: 'sre_team', name: 'SRE Team', contact: { email: 'sre@recoder.xyz', slack: '@sre-team' } } ], actions: [ { type: 'page', config: { service: 'pagerduty', urgency: 'high' } } ], conditions: { onlyIfNotAcknowledged: true, onlyIfNotResolved: true } }, { level: 3, delayMinutes: 30, notificationChannels: ['email_oncall'], recipients: [ { type: 'role', id: 'engineering_manager', name: 'Engineering Manager', contact: { email: 'manager@recoder.xyz' } } ], actions: [ { type: 'escalate_externally', config: { service: 'incident_management', severity: 'critical' } } ], conditions: { onlyIfNotAcknowledged: true, onlyIfNotResolved: true } } ], conditions: { severities: ['critical'] }, createdAt: new Date(), updatedAt: new Date() }, { id: 'default_high', name: 'High Priority Escalation', description: 'Standard escalation for high priority alerts', enabled: true, rules: [ { level: 1, delayMinutes: 0, notificationChannels: ['slack_alerts'], recipients: [ { type: 'oncall', id: 'primary_oncall', name: 'Primary On-Call', contact: { slack: '@oncall' } } ], actions: [ { type: 'notify', config: { urgency: 'medium' } } ] }, { level: 2, delayMinutes: 30, notificationChannels: ['email_oncall'], recipients: [ { type: 'team', id: 'sre_team', name: 'SRE Team', contact: { email: 'sre@recoder.xyz' } } ], actions: [ { type: 'notify', config: { urgency: 'medium' } } ], conditions: { onlyIfNotAcknowledged: true } } ], conditions: { severities: ['high'] }, createdAt: new Date(), updatedAt: new Date() }, { id: 'default_budget', name: 'Budget Alert Escalation', description: 'Escalation for budget and cost alerts', enabled: true, rules: [ { level: 1, delayMinutes: 0, notificationChannels: ['email_billing'], recipients: [ { type: 'team', id: 'billing_team', name: 'Billing Team', contact: { email: 'billing@recoder.xyz' } } ], actions: [ { type: 'notify', config: { urgency: 'medium' } } ] }, { level: 2, delayMinutes: 60, notificationChannels: ['email_oncall'], recipients: [ { type: 'role', id: 'finance_manager', name: 'Finance Manager', contact: { email: 'finance@recoder.xyz' } } ], actions: [ { type: 'ticket', config: { system: 'jira', project: 'BILLING', priority: 'high' } } ], conditions: { onlyIfNotAcknowledged: true } } ], conditions: { severities: ['high', 'critical'], tags: ['budget', 'cost'] }, createdAt: new Date(), updatedAt: new Date() } ]; defaultPolicies.forEach(policy => { this.policies.set(policy.id, policy); }); shared_1.Logger.info(`Initialized ${defaultPolicies.length} default escalation policies`); } initializeOnCallSchedules() { const defaultSchedules = [ { id: 'primary_oncall', name: 'Primary On-Call Rotation', description: 'Primary on-call rotation for critical alerts', rotation: { type: 'weekly', duration: 168, // 1 week in hours participants: [ { type: 'user', id: 'user1', name: 'Alice Smith', contact: { email: 'alice@recoder.xyz', phone: '+1234567890', slack: '@alice' } }, { type: 'user', id: 'user2', name: 'Bob Johnson', contact: { email: 'bob@recoder.xyz', phone: '+1234567891', slack: '@bob' } }, { type: 'user', id: 'user3', name: 'Carol Williams', contact: { email: 'carol@recoder.xyz', phone: '+1234567892', slack: '@carol' } } ], startDate: new Date('2024-01-01T00:00:00Z'), currentIndex: 0 }, overrides: [], escalationPolicyId: 'default_critical' } ]; defaultSchedules.forEach(schedule => { this.onCallSchedules.set(schedule.id, schedule); }); shared_1.Logger.info(`Initialized ${defaultSchedules.length} on-call schedules`); } /** * Start escalation policy management */ async start() { if (this.isRunning) { shared_1.Logger.warn('Escalation policies are already running'); return; } shared_1.Logger.info('Starting escalation policy management...'); // Start escalation check timer this.escalationTimer = setInterval(() => { this.processEscalations(); }, this.config.checkInterval); this.isRunning = true; this.emit('escalationStarted'); shared_1.Logger.info('Escalation policy management started successfully'); } /** * Stop escalation policy management */ async stop() { if (!this.isRunning) { return; } shared_1.Logger.info('Stopping escalation policy management...'); if (this.escalationTimer) { clearInterval(this.escalationTimer); this.escalationTimer = null; } this.isRunning = false; this.emit('escalationStopped'); shared_1.Logger.info('Escalation policy management stopped'); } /** * Start escalation for an alert */ async startEscalation(alertId, policyId) { const policy = this.policies.get(policyId); if (!policy || !policy.enabled) { shared_1.Logger.warn(`Escalation policy ${policyId} not found or disabled`); return; } // Check if escalation already exists if (this.activeEscalations.has(alertId)) { shared_1.Logger.debug(`Escalation already active for alert ${alertId}`); return; } const escalation = { id: `escalation_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`, alertId, policyId, currentLevel: 0, startTime: new Date(), acknowledged: false, escalationHistory: [] }; this.activeEscalations.set(alertId, escalation); // Start with level 1 immediately await this.executeEscalationLevel(escalation, 1); this.emit('escalationStarted', escalation); shared_1.Logger.info(`Started escalation for alert ${alertId} with policy ${policyId}`); } /** * Stop escalation for an alert */ async stopEscalation(alertId) { const escalation = this.activeEscalations.get(alertId); if (!escalation) { return; } this.activeEscalations.delete(alertId); this.emit('escalationStopped', escalation); shared_1.Logger.info(`Stopped escalation for alert ${alertId}`); } /** * Check and process due escalations */ async checkEscalation(alertId) { const escalation = this.activeEscalations.get(alertId); if (!escalation || escalation.acknowledged) { return; } const now = new Date(); // Check if it's time for next escalation level if (escalation.nextEscalationTime && now >= escalation.nextEscalationTime) { const policy = this.policies.get(escalation.policyId); if (!policy) return; const nextLevel = escalation.currentLevel + 1; const nextRule = policy.rules.find(rule => rule.level === nextLevel); if (nextRule) { await this.executeEscalationLevel(escalation, nextLevel); } } } /** * Acknowledge an escalation */ async acknowledgeEscalation(alertId, acknowledgedBy) { const escalation = this.activeEscalations.get(alertId); if (!escalation) { return; } escalation.acknowledged = true; escalation.acknowledgedBy = acknowledgedBy; escalation.acknowledgedAt = new Date(); this.emit('escalationAcknowledged', escalation); shared_1.Logger.info(`Escalation acknowledged for alert ${alertId} by ${acknowledgedBy}`); } /** * Get current on-call person for a schedule */ getCurrentOnCall(scheduleId) { const schedule = this.onCallSchedules.get(scheduleId); if (!schedule) { return null; } // Check for active overrides first const now = new Date(); const activeOverride = schedule.overrides.find(override => now >= override.startTime && now <= override.endTime); if (activeOverride) { return activeOverride.recipient; } // Calculate current person in rotation const rotation = schedule.rotation; const timeSinceStart = now.getTime() - rotation.startDate.getTime(); const rotationDurationMs = rotation.duration * 60 * 60 * 1000; // Convert hours to ms const cyclesSinceStart = Math.floor(timeSinceStart / rotationDurationMs); const currentIndex = cyclesSinceStart % rotation.participants.length; return rotation.participants[currentIndex]; } /** * Add or update escalation policy */ async setEscalationPolicy(policy) { this.policies.set(policy.id, { ...policy, updatedAt: new Date() }); shared_1.Logger.info(`${this.policies.has(policy.id) ? 'Updated' : 'Created'} escalation policy: ${policy.name}`); } /** * Get all escalation policies */ getEscalationPolicies() { return Array.from(this.policies.values()); } /** * Get active escalations */ getActiveEscalations() { return Array.from(this.activeEscalations.values()); } /** * Add or update on-call schedule */ async setOnCallSchedule(schedule) { this.onCallSchedules.set(schedule.id, schedule); shared_1.Logger.info(`${this.onCallSchedules.has(schedule.id) ? 'Updated' : 'Created'} on-call schedule: ${schedule.name}`); } /** * Get all on-call schedules */ getOnCallSchedules() { return Array.from(this.onCallSchedules.values()); } /** * Add on-call override */ async addOnCallOverride(scheduleId, override) { const schedule = this.onCallSchedules.get(scheduleId); if (!schedule) { throw new Error(`Schedule ${scheduleId} not found`); } const overrideWithId = { id: `override_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`, ...override }; schedule.overrides.push(overrideWithId); shared_1.Logger.info(`Added on-call override for schedule ${scheduleId}: ${override.reason}`); return overrideWithId.id; } // Private helper methods async processEscalations() { for (const escalation of this.activeEscalations.values()) { if (!escalation.acknowledged) { await this.checkEscalation(escalation.alertId); } } } async executeEscalationLevel(escalation, level) { const policy = this.policies.get(escalation.policyId); if (!policy) return; const rule = policy.rules.find(r => r.level === level); if (!rule) return; // Check rule conditions if (rule.conditions?.onlyIfNotAcknowledged && escalation.acknowledged) { return; } escalation.currentLevel = level; const step = { level, timestamp: new Date(), recipients: rule.recipients.map(r => r.name), channels: rule.notificationChannels, actions: rule.actions.map(a => a.type), success: false }; try { // Execute notifications await this.executeNotifications(rule, escalation.alertId); // Execute actions await this.executeActions(rule.actions, escalation.alertId); step.success = true; // Schedule next escalation level const nextRule = policy.rules.find(r => r.level === level + 1); if (nextRule) { escalation.nextEscalationTime = new Date(Date.now() + nextRule.delayMinutes * 60 * 1000); } } catch (error) { step.error = error instanceof Error ? error.message : 'Unknown error'; shared_1.Logger.error(`Escalation level ${level} failed for alert ${escalation.alertId}:`, error); } escalation.escalationHistory.push(step); this.emit('escalationLevelExecuted', escalation, level, step.success); shared_1.Logger.info(`Executed escalation level ${level} for alert ${escalation.alertId}`); } async executeNotifications(rule, alertId) { // Resolve actual recipients const recipients = await this.resolveRecipients(rule.recipients); // This would integrate with the notification service shared_1.Logger.info(`Would notify ${recipients.length} recipients for alert ${alertId}`); // Simulate notification success await new Promise(resolve => setTimeout(resolve, 100)); } async executeActions(actions, alertId) { for (const action of actions) { try { switch (action.type) { case 'page': await this.executePagingAction(action, alertId); break; case 'ticket': await this.executeTicketAction(action, alertId); break; case 'webhook': await this.executeWebhookAction(action, alertId); break; case 'escalate_externally': await this.executeExternalEscalation(action, alertId); break; default: shared_1.Logger.debug(`Unknown action type: ${action.type}`); } } catch (error) { shared_1.Logger.error(`Failed to execute action ${action.type} for alert ${alertId}:`, error); } } } async resolveRecipients(recipients) { const resolved = []; for (const recipient of recipients) { if (recipient.type === 'oncall') { const onCallPerson = this.getCurrentOnCall(recipient.id); if (onCallPerson) { resolved.push(onCallPerson); } } else { resolved.push(recipient); } } return resolved; } async executePagingAction(action, alertId) { shared_1.Logger.info(`Would send page for alert ${alertId} via ${action.config.service}`); } async executeTicketAction(action, alertId) { shared_1.Logger.info(`Would create ticket for alert ${alertId} in ${action.config.system}`); } async executeWebhookAction(action, alertId) { shared_1.Logger.info(`Would call webhook for alert ${alertId}`); } async executeExternalEscalation(action, alertId) { shared_1.Logger.info(`Would escalate externally for alert ${alertId} to ${action.config.service}`); } } exports.EscalationPolicies = EscalationPolicies; // Export singleton instance exports.escalationPolicies = new EscalationPolicies(); //# sourceMappingURL=escalation-policies.js.map