recoder-analytics
Version:
Comprehensive analytics and monitoring for the Recoder.xyz ecosystem
606 lines • 23.6 kB
JavaScript
;
/**
* Alert Escalation Policies & Management
*
* Manages alert escalation with configurable policies, time-based escalation,
* on-call rotations, and automatic escalation based on severity and response times.
*/
Object.defineProperty(exports, "__esModule", { value: true });
exports.escalationPolicies = exports.EscalationPolicies = void 0;
const shared_1 = require("@recoder/shared");
const events_1 = require("events");
class EscalationPolicies extends events_1.EventEmitter {
constructor() {
super();
this.policies = new Map();
this.onCallSchedules = new Map();
this.activeEscalations = new Map();
this.config = {
checkInterval: 30000, // Check escalations every 30 seconds
maxEscalationLevel: 5, // Maximum escalation levels
defaultEscalationDelay: 15, // Default delay in minutes
acknowledgmentTimeout: 300, // 5 minutes to acknowledge
};
this.escalationTimer = null;
this.isRunning = false;
this.initializeDefaultPolicies();
this.initializeOnCallSchedules();
}
initializeDefaultPolicies() {
const defaultPolicies = [
{
id: 'default_critical',
name: 'Critical Alert Escalation',
description: 'Standard escalation for critical alerts',
enabled: true,
rules: [
{
level: 1,
delayMinutes: 0,
notificationChannels: ['slack_alerts', 'email_oncall'],
recipients: [
{
type: 'oncall',
id: 'primary_oncall',
name: 'Primary On-Call',
contact: {
email: 'oncall@recoder.xyz',
slack: '@oncall'
}
}
],
actions: [
{
type: 'notify',
config: {
urgency: 'high',
method: ['email', 'slack']
}
}
],
conditions: {
onlyIfNotAcknowledged: true
}
},
{
level: 2,
delayMinutes: 15,
notificationChannels: ['slack_alerts', 'email_oncall'],
recipients: [
{
type: 'team',
id: 'sre_team',
name: 'SRE Team',
contact: {
email: 'sre@recoder.xyz',
slack: '@sre-team'
}
}
],
actions: [
{
type: 'page',
config: {
service: 'pagerduty',
urgency: 'high'
}
}
],
conditions: {
onlyIfNotAcknowledged: true,
onlyIfNotResolved: true
}
},
{
level: 3,
delayMinutes: 30,
notificationChannels: ['email_oncall'],
recipients: [
{
type: 'role',
id: 'engineering_manager',
name: 'Engineering Manager',
contact: {
email: 'manager@recoder.xyz'
}
}
],
actions: [
{
type: 'escalate_externally',
config: {
service: 'incident_management',
severity: 'critical'
}
}
],
conditions: {
onlyIfNotAcknowledged: true,
onlyIfNotResolved: true
}
}
],
conditions: {
severities: ['critical']
},
createdAt: new Date(),
updatedAt: new Date()
},
{
id: 'default_high',
name: 'High Priority Escalation',
description: 'Standard escalation for high priority alerts',
enabled: true,
rules: [
{
level: 1,
delayMinutes: 0,
notificationChannels: ['slack_alerts'],
recipients: [
{
type: 'oncall',
id: 'primary_oncall',
name: 'Primary On-Call',
contact: {
slack: '@oncall'
}
}
],
actions: [
{
type: 'notify',
config: {
urgency: 'medium'
}
}
]
},
{
level: 2,
delayMinutes: 30,
notificationChannels: ['email_oncall'],
recipients: [
{
type: 'team',
id: 'sre_team',
name: 'SRE Team',
contact: {
email: 'sre@recoder.xyz'
}
}
],
actions: [
{
type: 'notify',
config: {
urgency: 'medium'
}
}
],
conditions: {
onlyIfNotAcknowledged: true
}
}
],
conditions: {
severities: ['high']
},
createdAt: new Date(),
updatedAt: new Date()
},
{
id: 'default_budget',
name: 'Budget Alert Escalation',
description: 'Escalation for budget and cost alerts',
enabled: true,
rules: [
{
level: 1,
delayMinutes: 0,
notificationChannels: ['email_billing'],
recipients: [
{
type: 'team',
id: 'billing_team',
name: 'Billing Team',
contact: {
email: 'billing@recoder.xyz'
}
}
],
actions: [
{
type: 'notify',
config: {
urgency: 'medium'
}
}
]
},
{
level: 2,
delayMinutes: 60,
notificationChannels: ['email_oncall'],
recipients: [
{
type: 'role',
id: 'finance_manager',
name: 'Finance Manager',
contact: {
email: 'finance@recoder.xyz'
}
}
],
actions: [
{
type: 'ticket',
config: {
system: 'jira',
project: 'BILLING',
priority: 'high'
}
}
],
conditions: {
onlyIfNotAcknowledged: true
}
}
],
conditions: {
severities: ['high', 'critical'],
tags: ['budget', 'cost']
},
createdAt: new Date(),
updatedAt: new Date()
}
];
defaultPolicies.forEach(policy => {
this.policies.set(policy.id, policy);
});
shared_1.Logger.info(`Initialized ${defaultPolicies.length} default escalation policies`);
}
initializeOnCallSchedules() {
const defaultSchedules = [
{
id: 'primary_oncall',
name: 'Primary On-Call Rotation',
description: 'Primary on-call rotation for critical alerts',
rotation: {
type: 'weekly',
duration: 168, // 1 week in hours
participants: [
{
type: 'user',
id: 'user1',
name: 'Alice Smith',
contact: {
email: 'alice@recoder.xyz',
phone: '+1234567890',
slack: '@alice'
}
},
{
type: 'user',
id: 'user2',
name: 'Bob Johnson',
contact: {
email: 'bob@recoder.xyz',
phone: '+1234567891',
slack: '@bob'
}
},
{
type: 'user',
id: 'user3',
name: 'Carol Williams',
contact: {
email: 'carol@recoder.xyz',
phone: '+1234567892',
slack: '@carol'
}
}
],
startDate: new Date('2024-01-01T00:00:00Z'),
currentIndex: 0
},
overrides: [],
escalationPolicyId: 'default_critical'
}
];
defaultSchedules.forEach(schedule => {
this.onCallSchedules.set(schedule.id, schedule);
});
shared_1.Logger.info(`Initialized ${defaultSchedules.length} on-call schedules`);
}
/**
* Start escalation policy management
*/
async start() {
if (this.isRunning) {
shared_1.Logger.warn('Escalation policies are already running');
return;
}
shared_1.Logger.info('Starting escalation policy management...');
// Start escalation check timer
this.escalationTimer = setInterval(() => {
this.processEscalations();
}, this.config.checkInterval);
this.isRunning = true;
this.emit('escalationStarted');
shared_1.Logger.info('Escalation policy management started successfully');
}
/**
* Stop escalation policy management
*/
async stop() {
if (!this.isRunning) {
return;
}
shared_1.Logger.info('Stopping escalation policy management...');
if (this.escalationTimer) {
clearInterval(this.escalationTimer);
this.escalationTimer = null;
}
this.isRunning = false;
this.emit('escalationStopped');
shared_1.Logger.info('Escalation policy management stopped');
}
/**
* Start escalation for an alert
*/
async startEscalation(alertId, policyId) {
const policy = this.policies.get(policyId);
if (!policy || !policy.enabled) {
shared_1.Logger.warn(`Escalation policy ${policyId} not found or disabled`);
return;
}
// Check if escalation already exists
if (this.activeEscalations.has(alertId)) {
shared_1.Logger.debug(`Escalation already active for alert ${alertId}`);
return;
}
const escalation = {
id: `escalation_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`,
alertId,
policyId,
currentLevel: 0,
startTime: new Date(),
acknowledged: false,
escalationHistory: []
};
this.activeEscalations.set(alertId, escalation);
// Start with level 1 immediately
await this.executeEscalationLevel(escalation, 1);
this.emit('escalationStarted', escalation);
shared_1.Logger.info(`Started escalation for alert ${alertId} with policy ${policyId}`);
}
/**
* Stop escalation for an alert
*/
async stopEscalation(alertId) {
const escalation = this.activeEscalations.get(alertId);
if (!escalation) {
return;
}
this.activeEscalations.delete(alertId);
this.emit('escalationStopped', escalation);
shared_1.Logger.info(`Stopped escalation for alert ${alertId}`);
}
/**
* Check and process due escalations
*/
async checkEscalation(alertId) {
const escalation = this.activeEscalations.get(alertId);
if (!escalation || escalation.acknowledged) {
return;
}
const now = new Date();
// Check if it's time for next escalation level
if (escalation.nextEscalationTime && now >= escalation.nextEscalationTime) {
const policy = this.policies.get(escalation.policyId);
if (!policy)
return;
const nextLevel = escalation.currentLevel + 1;
const nextRule = policy.rules.find(rule => rule.level === nextLevel);
if (nextRule) {
await this.executeEscalationLevel(escalation, nextLevel);
}
}
}
/**
* Acknowledge an escalation
*/
async acknowledgeEscalation(alertId, acknowledgedBy) {
const escalation = this.activeEscalations.get(alertId);
if (!escalation) {
return;
}
escalation.acknowledged = true;
escalation.acknowledgedBy = acknowledgedBy;
escalation.acknowledgedAt = new Date();
this.emit('escalationAcknowledged', escalation);
shared_1.Logger.info(`Escalation acknowledged for alert ${alertId} by ${acknowledgedBy}`);
}
/**
* Get current on-call person for a schedule
*/
getCurrentOnCall(scheduleId) {
const schedule = this.onCallSchedules.get(scheduleId);
if (!schedule) {
return null;
}
// Check for active overrides first
const now = new Date();
const activeOverride = schedule.overrides.find(override => now >= override.startTime && now <= override.endTime);
if (activeOverride) {
return activeOverride.recipient;
}
// Calculate current person in rotation
const rotation = schedule.rotation;
const timeSinceStart = now.getTime() - rotation.startDate.getTime();
const rotationDurationMs = rotation.duration * 60 * 60 * 1000; // Convert hours to ms
const cyclesSinceStart = Math.floor(timeSinceStart / rotationDurationMs);
const currentIndex = cyclesSinceStart % rotation.participants.length;
return rotation.participants[currentIndex];
}
/**
* Add or update escalation policy
*/
async setEscalationPolicy(policy) {
this.policies.set(policy.id, { ...policy, updatedAt: new Date() });
shared_1.Logger.info(`${this.policies.has(policy.id) ? 'Updated' : 'Created'} escalation policy: ${policy.name}`);
}
/**
* Get all escalation policies
*/
getEscalationPolicies() {
return Array.from(this.policies.values());
}
/**
* Get active escalations
*/
getActiveEscalations() {
return Array.from(this.activeEscalations.values());
}
/**
* Add or update on-call schedule
*/
async setOnCallSchedule(schedule) {
this.onCallSchedules.set(schedule.id, schedule);
shared_1.Logger.info(`${this.onCallSchedules.has(schedule.id) ? 'Updated' : 'Created'} on-call schedule: ${schedule.name}`);
}
/**
* Get all on-call schedules
*/
getOnCallSchedules() {
return Array.from(this.onCallSchedules.values());
}
/**
* Add on-call override
*/
async addOnCallOverride(scheduleId, override) {
const schedule = this.onCallSchedules.get(scheduleId);
if (!schedule) {
throw new Error(`Schedule ${scheduleId} not found`);
}
const overrideWithId = {
id: `override_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`,
...override
};
schedule.overrides.push(overrideWithId);
shared_1.Logger.info(`Added on-call override for schedule ${scheduleId}: ${override.reason}`);
return overrideWithId.id;
}
// Private helper methods
async processEscalations() {
for (const escalation of this.activeEscalations.values()) {
if (!escalation.acknowledged) {
await this.checkEscalation(escalation.alertId);
}
}
}
async executeEscalationLevel(escalation, level) {
const policy = this.policies.get(escalation.policyId);
if (!policy)
return;
const rule = policy.rules.find(r => r.level === level);
if (!rule)
return;
// Check rule conditions
if (rule.conditions?.onlyIfNotAcknowledged && escalation.acknowledged) {
return;
}
escalation.currentLevel = level;
const step = {
level,
timestamp: new Date(),
recipients: rule.recipients.map(r => r.name),
channels: rule.notificationChannels,
actions: rule.actions.map(a => a.type),
success: false
};
try {
// Execute notifications
await this.executeNotifications(rule, escalation.alertId);
// Execute actions
await this.executeActions(rule.actions, escalation.alertId);
step.success = true;
// Schedule next escalation level
const nextRule = policy.rules.find(r => r.level === level + 1);
if (nextRule) {
escalation.nextEscalationTime = new Date(Date.now() + nextRule.delayMinutes * 60 * 1000);
}
}
catch (error) {
step.error = error instanceof Error ? error.message : 'Unknown error';
shared_1.Logger.error(`Escalation level ${level} failed for alert ${escalation.alertId}:`, error);
}
escalation.escalationHistory.push(step);
this.emit('escalationLevelExecuted', escalation, level, step.success);
shared_1.Logger.info(`Executed escalation level ${level} for alert ${escalation.alertId}`);
}
async executeNotifications(rule, alertId) {
// Resolve actual recipients
const recipients = await this.resolveRecipients(rule.recipients);
// This would integrate with the notification service
shared_1.Logger.info(`Would notify ${recipients.length} recipients for alert ${alertId}`);
// Simulate notification success
await new Promise(resolve => setTimeout(resolve, 100));
}
async executeActions(actions, alertId) {
for (const action of actions) {
try {
switch (action.type) {
case 'page':
await this.executePagingAction(action, alertId);
break;
case 'ticket':
await this.executeTicketAction(action, alertId);
break;
case 'webhook':
await this.executeWebhookAction(action, alertId);
break;
case 'escalate_externally':
await this.executeExternalEscalation(action, alertId);
break;
default:
shared_1.Logger.debug(`Unknown action type: ${action.type}`);
}
}
catch (error) {
shared_1.Logger.error(`Failed to execute action ${action.type} for alert ${alertId}:`, error);
}
}
}
async resolveRecipients(recipients) {
const resolved = [];
for (const recipient of recipients) {
if (recipient.type === 'oncall') {
const onCallPerson = this.getCurrentOnCall(recipient.id);
if (onCallPerson) {
resolved.push(onCallPerson);
}
}
else {
resolved.push(recipient);
}
}
return resolved;
}
async executePagingAction(action, alertId) {
shared_1.Logger.info(`Would send page for alert ${alertId} via ${action.config.service}`);
}
async executeTicketAction(action, alertId) {
shared_1.Logger.info(`Would create ticket for alert ${alertId} in ${action.config.system}`);
}
async executeWebhookAction(action, alertId) {
shared_1.Logger.info(`Would call webhook for alert ${alertId}`);
}
async executeExternalEscalation(action, alertId) {
shared_1.Logger.info(`Would escalate externally for alert ${alertId} to ${action.config.service}`);
}
}
exports.EscalationPolicies = EscalationPolicies;
// Export singleton instance
exports.escalationPolicies = new EscalationPolicies();
//# sourceMappingURL=escalation-policies.js.map