UNPKG

tlnt

Version:

TLNT - HMS-Powered Multi-Agent Platform with Government Agency Analysis, Deep Research, and Enterprise-Ready Deployment. Self-optimizing multi-domain AI agent with continuous learning and enterprise-grade performance monitoring.

531 lines 19.2 kB
import { EventEmitter } from 'events'; import * as fs from 'fs/promises'; import * as path from 'path'; import { createWriteStream } from 'fs'; import { createGzip } from 'zlib'; /** * Watchdog Monitor for event fan-out and persistence * Provides real-time monitoring and alerting for HMS Dev system */ export class WatchdogMonitor extends EventEmitter { config; messageBus; running = false; // Event storage and processing eventBuffer = []; channelStats = new Map(); logStreams = new Map(); currentLogFile; flushTimer; // Monitoring and alerting alerts = new Map(); eventRateCounter = 0; errorRateCounter = 0; lastRateReset = Date.now(); // Metrics totalEventsProcessed = 0; totalErrorsDetected = 0; startTime = Date.now(); constructor(config, messageBus) { super(); this.messageBus = messageBus; this.config = { redisUrl: config.redisUrl || 'redis://localhost:6379', monitoredPatterns: config.monitoredPatterns || [ 'delegation.*', 'agent.*', 'task.*', 'deal.*', 'error.*', 'escalation.*', 'control.*' ], persistEvents: config.persistEvents ?? true, logDirectory: config.logDirectory || './logs/watchdog', compressionEnabled: config.compressionEnabled ?? true, maxFileSize: config.maxFileSize || 100 * 1024 * 1024, // 100MB logRetentionHours: config.logRetentionHours || 168, // 7 days maxMemoryEvents: config.maxMemoryEvents || 10000, flushInterval: config.flushInterval || 5000, // 5 seconds alertThresholds: { errorRate: config.alertThresholds?.errorRate || 10, eventRate: config.alertThresholds?.eventRate || 1000, memoryUsage: config.alertThresholds?.memoryUsage || 85, ...config.alertThresholds }, excludePatterns: config.excludePatterns || ['debug.*', 'heartbeat.*'], minimumSeverity: config.minimumSeverity || 'info' }; this.setupEventHandlers(); } /** * Start monitoring */ async start() { if (this.running) return; try { // Create log directory await this.ensureLogDirectory(); // Subscribe to monitored patterns for (const pattern of this.config.monitoredPatterns) { await this.messageBus.subscribePattern(pattern, this.handleEvent.bind(this)); } // Start periodic tasks this.startPeriodicTasks(); this.running = true; this.emit('started'); console.log(`🐕 Watchdog monitor started, monitoring ${this.config.monitoredPatterns.length} patterns`); } catch (error) { this.emit('error', error); throw error; } } /** * Stop monitoring */ async stop() { if (!this.running) return; this.running = false; // Stop periodic tasks if (this.flushTimer) { clearInterval(this.flushTimer); this.flushTimer = undefined; } // Flush remaining events await this.flushEvents(); // Close log streams for (const stream of this.logStreams.values()) { stream.end(); } this.logStreams.clear(); // Unsubscribe from patterns for (const pattern of this.config.monitoredPatterns) { await this.messageBus.unsubscribe(pattern); } this.emit('stopped'); console.log('🐕 Watchdog monitor stopped'); } /** * Handle incoming events */ async handleEvent(message) { try { // Check if event should be excluded if (this.shouldExcludeEvent(message)) { return; } // Create watch event const watchEvent = this.createWatchEvent(message); // Check minimum severity if (!this.meetsSeverityThreshold(watchEvent)) { return; } // Update counters this.totalEventsProcessed++; this.eventRateCounter++; if (watchEvent.severity === 'error' || watchEvent.severity === 'critical') { this.totalErrorsDetected++; this.errorRateCounter++; } // Update channel stats this.updateChannelStats(message, watchEvent); // Add to buffer this.addToBuffer(watchEvent); // Check for alerts await this.checkAlertConditions(watchEvent); // Emit event for real-time monitoring this.emit('eventProcessed', watchEvent); // Fan out to monitor channels await this.fanOutEvent(watchEvent); } catch (error) { this.emit('eventProcessingError', { error, message }); } } /** * Create watch event from message */ createWatchEvent(message) { return { eventId: `evt_${Date.now()}_${Math.random().toString(36).substring(2, 8)}`, timestamp: new Date(message.timestamp), source: message.source, eventType: message.type, channel: this.extractChannelFromMessage(message), data: message.data, severity: this.determineSeverity(message), tags: this.extractTags(message), correlationId: this.extractCorrelationId(message), sessionId: this.extractSessionId(message) }; } /** * Add event to buffer */ addToBuffer(event) { this.eventBuffer.push(event); // Trim buffer if too large if (this.eventBuffer.length > this.config.maxMemoryEvents) { this.eventBuffer = this.eventBuffer.slice(-this.config.maxMemoryEvents); } } /** * Fan out event to monitor channels */ async fanOutEvent(event) { const monitorChannel = `monitor.${event.eventType}`; const globalMonitorChannel = 'monitor.all'; const fanOutMessage = { type: 'watch_event', source: 'watchdog', data: event, priority: this.getSeverityPriority(event.severity) }; // Send to specific event type monitor await this.messageBus.publish(monitorChannel, fanOutMessage); // Send to global monitor await this.messageBus.publish(globalMonitorChannel, fanOutMessage); // Send high-severity events to alert channel if (event.severity === 'error' || event.severity === 'critical') { await this.messageBus.publish('monitor.alerts', { ...fanOutMessage, data: event, priority: 3 }); } } /** * Flush events to persistent storage */ async flushEvents() { if (!this.config.persistEvents || this.eventBuffer.length === 0) { return; } try { const logStream = await this.getLogStream(); for (const event of this.eventBuffer) { const logLine = JSON.stringify(event) + '\n'; logStream.write(logLine); } this.eventBuffer = []; this.emit('eventsFlushed', { count: this.eventBuffer.length }); } catch (error) { this.emit('flushError', error); } } /** * Get or create log stream */ async getLogStream() { const logFileName = this.generateLogFileName(); if (this.currentLogFile !== logFileName) { // Close current stream const currentStream = this.logStreams.get(this.currentLogFile || ''); if (currentStream) { currentStream.end(); this.logStreams.delete(this.currentLogFile || ''); } // Create new stream const logPath = path.join(this.config.logDirectory, logFileName); let stream = createWriteStream(logPath, { flags: 'a' }); // Add compression if enabled if (this.config.compressionEnabled && logFileName.endsWith('.gz')) { const gzipStream = createGzip(); gzipStream.pipe(stream); stream = gzipStream; } this.logStreams.set(logFileName, stream); this.currentLogFile = logFileName; } return this.logStreams.get(logFileName); } /** * Generate log file name based on current date */ generateLogFileName() { const now = new Date(); const dateStr = now.toISOString().split('T')[0]; // YYYY-MM-DD const hourStr = now.getHours().toString().padStart(2, '0'); const extension = this.config.compressionEnabled ? '.log.gz' : '.log'; return `watchdog-${dateStr}-${hourStr}${extension}`; } /** * Check alert conditions */ async checkAlertConditions(event) { const now = Date.now(); // Reset counters every minute if (now - this.lastRateReset > 60000) { this.eventRateCounter = 0; this.errorRateCounter = 0; this.lastRateReset = now; } // Check error rate threshold if (this.errorRateCounter > this.config.alertThresholds.errorRate) { await this.createAlert('error_threshold', 'error', 'High Error Rate Detected', `Error rate of ${this.errorRateCounter} errors/minute exceeds threshold of ${this.config.alertThresholds.errorRate}`, { errorRate: this.errorRateCounter, threshold: this.config.alertThresholds.errorRate }); } // Check event rate threshold const eventRate = this.eventRateCounter / 60; // events per second over last minute if (eventRate > this.config.alertThresholds.eventRate) { await this.createAlert('rate_limit', 'warning', 'High Event Rate Detected', `Event rate of ${eventRate.toFixed(1)} events/second exceeds threshold of ${this.config.alertThresholds.eventRate}`, { eventRate, threshold: this.config.alertThresholds.eventRate }); } // Check memory usage const memoryUsage = process.memoryUsage(); const memoryPercent = (memoryUsage.heapUsed / memoryUsage.heapTotal) * 100; if (memoryPercent > this.config.alertThresholds.memoryUsage) { await this.createAlert('memory_usage', 'warning', 'High Memory Usage', `Memory usage at ${memoryPercent.toFixed(1)}% exceeds threshold of ${this.config.alertThresholds.memoryUsage}%`, { memoryPercent, threshold: this.config.alertThresholds.memoryUsage, memoryUsage }); } } /** * Create alert */ async createAlert(type, severity, title, message, metadata) { const alertId = `alert_${type}_${Date.now()}`; const alert = { alertId, type, severity, title, message, timestamp: new Date(), acknowledged: false, metadata }; this.alerts.set(alertId, alert); // Emit alert event this.emit('alertCreated', alert); // Send alert to monitoring channels await this.messageBus.publish('monitor.alerts', { type: 'alert', source: 'watchdog', data: alert, priority: severity === 'critical' ? 3 : 2 }); return alert; } /** * Update channel statistics */ updateChannelStats(message, event) { const channel = event.channel; if (!this.channelStats.has(channel)) { this.channelStats.set(channel, { eventCount: 0, lastEventTime: event.timestamp, errorCount: 0, averageEventSize: 0, eventTypes: {} }); } const stats = this.channelStats.get(channel); stats.eventCount++; stats.lastEventTime = event.timestamp; if (event.severity === 'error' || event.severity === 'critical') { stats.errorCount++; } // Update event types stats.eventTypes[event.eventType] = (stats.eventTypes[event.eventType] || 0) + 1; // Update average event size (approximate) const eventSize = JSON.stringify(event).length; stats.averageEventSize = (stats.averageEventSize * (stats.eventCount - 1) + eventSize) / stats.eventCount; } /** * Get monitoring statistics */ getStats() { return { running: this.running, uptime: Date.now() - this.startTime, totalEventsProcessed: this.totalEventsProcessed, totalErrorsDetected: this.totalErrorsDetected, bufferedEvents: this.eventBuffer.length, channelCount: this.channelStats.size, activeAlerts: Array.from(this.alerts.values()).filter(a => !a.acknowledged).length, memoryUsage: process.memoryUsage() }; } /** * Get channel statistics */ getChannelStats() { return new Map(this.channelStats); } /** * Get recent events */ getRecentEvents(limit = 100, filter) { let events = [...this.eventBuffer]; if (filter) { if (filter.severity) { events = events.filter(e => filter.severity.includes(e.severity)); } if (filter.eventType) { events = events.filter(e => filter.eventType.includes(e.eventType)); } if (filter.source) { events = events.filter(e => filter.source.includes(e.source)); } } return events.slice(-limit).reverse(); } /** * Get active alerts */ getActiveAlerts() { return Array.from(this.alerts.values()).filter(a => !a.acknowledged); } /** * Acknowledge alert */ acknowledgeAlert(alertId) { const alert = this.alerts.get(alertId); if (alert && !alert.acknowledged) { alert.acknowledged = true; this.emit('alertAcknowledged', alert); return true; } return false; } // Helper methods setupEventHandlers() { this.on('error', (error) => { console.error('Watchdog monitor error:', error); }); } startPeriodicTasks() { // Flush events periodically this.flushTimer = setInterval(() => { this.flushEvents().catch(error => { this.emit('flushError', error); }); }, this.config.flushInterval); // Cleanup old log files periodically (every hour) setInterval(() => { this.cleanupOldLogs().catch(error => { this.emit('cleanupError', error); }); }, 3600000); } async ensureLogDirectory() { try { await fs.mkdir(this.config.logDirectory, { recursive: true }); } catch (error) { if (error.code !== 'EEXIST') { throw error; } } } async cleanupOldLogs() { if (!this.config.persistEvents) return; try { const files = await fs.readdir(this.config.logDirectory); const cutoffTime = Date.now() - (this.config.logRetentionHours * 60 * 60 * 1000); for (const file of files) { if (file.startsWith('watchdog-') && (file.endsWith('.log') || file.endsWith('.log.gz'))) { const filePath = path.join(this.config.logDirectory, file); const stats = await fs.stat(filePath); if (stats.mtime.getTime() < cutoffTime) { await fs.unlink(filePath); this.emit('logFileDeleted', { file, age: Date.now() - stats.mtime.getTime() }); } } } } catch (error) { this.emit('cleanupError', error); } } shouldExcludeEvent(message) { for (const pattern of this.config.excludePatterns) { if (this.matchesPattern(message.type, pattern)) { return true; } } return false; } meetsSeverityThreshold(event) { const severityLevels = ['debug', 'info', 'warning', 'error', 'critical']; const eventLevel = severityLevels.indexOf(event.severity); const thresholdLevel = severityLevels.indexOf(this.config.minimumSeverity); return eventLevel >= thresholdLevel; } matchesPattern(text, pattern) { const regexPattern = pattern.replace(/\*/g, '.*'); return new RegExp(`^${regexPattern}$`).test(text); } extractChannelFromMessage(message) { // Try to extract channel from message metadata or use source return message.data.channel || message.source || 'unknown'; } determineSeverity(message) { if (message.type.includes('error') || message.type.includes('fail')) { return 'error'; } if (message.type.includes('warning') || message.type.includes('warn')) { return 'warning'; } if (message.type.includes('critical') || message.type.includes('alert')) { return 'critical'; } if (message.type.includes('debug')) { return 'debug'; } return 'info'; } extractTags(message) { const tags = []; // Add source as tag if (message.source) { tags.push(`source:${message.source}`); } // Add type as tag tags.push(`type:${message.type}`); // Extract custom tags from data if (message.data.tags && Array.isArray(message.data.tags)) { tags.push(...message.data.tags); } return tags; } extractCorrelationId(message) { return message.data.correlationId || message.data.traceId || message.data.requestId; } extractSessionId(message) { return message.data.sessionId || message.data.userId; } getSeverityPriority(severity) { switch (severity) { case 'critical': return 3; case 'error': return 2; case 'warning': return 1; default: return 0; } } /** * Health check */ async healthCheck() { const stats = this.getStats(); const isHealthy = this.running && stats.activeAlerts === 0; return { status: isHealthy ? 'healthy' : 'unhealthy', details: { ...stats, config: { monitoredPatterns: this.config.monitoredPatterns.length, persistEvents: this.config.persistEvents, logDirectory: this.config.logDirectory } } }; } } //# sourceMappingURL=watchdogMonitor.js.map