tlnt
Version:
TLNT - HMS-Powered Multi-Agent Platform with Government Agency Analysis, Deep Research, and Enterprise-Ready Deployment. Self-optimizing multi-domain AI agent with continuous learning and enterprise-grade performance monitoring.
531 lines • 19.2 kB
JavaScript
import { EventEmitter } from 'events';
import * as fs from 'fs/promises';
import * as path from 'path';
import { createWriteStream } from 'fs';
import { createGzip } from 'zlib';
/**
* Watchdog Monitor for event fan-out and persistence
* Provides real-time monitoring and alerting for HMS Dev system
*/
export class WatchdogMonitor extends EventEmitter {
config;
messageBus;
running = false;
// Event storage and processing
eventBuffer = [];
channelStats = new Map();
logStreams = new Map();
currentLogFile;
flushTimer;
// Monitoring and alerting
alerts = new Map();
eventRateCounter = 0;
errorRateCounter = 0;
lastRateReset = Date.now();
// Metrics
totalEventsProcessed = 0;
totalErrorsDetected = 0;
startTime = Date.now();
constructor(config, messageBus) {
super();
this.messageBus = messageBus;
this.config = {
redisUrl: config.redisUrl || 'redis://localhost:6379',
monitoredPatterns: config.monitoredPatterns || [
'delegation.*',
'agent.*',
'task.*',
'deal.*',
'error.*',
'escalation.*',
'control.*'
],
persistEvents: config.persistEvents ?? true,
logDirectory: config.logDirectory || './logs/watchdog',
compressionEnabled: config.compressionEnabled ?? true,
maxFileSize: config.maxFileSize || 100 * 1024 * 1024, // 100MB
logRetentionHours: config.logRetentionHours || 168, // 7 days
maxMemoryEvents: config.maxMemoryEvents || 10000,
flushInterval: config.flushInterval || 5000, // 5 seconds
alertThresholds: {
errorRate: config.alertThresholds?.errorRate || 10,
eventRate: config.alertThresholds?.eventRate || 1000,
memoryUsage: config.alertThresholds?.memoryUsage || 85,
...config.alertThresholds
},
excludePatterns: config.excludePatterns || ['debug.*', 'heartbeat.*'],
minimumSeverity: config.minimumSeverity || 'info'
};
this.setupEventHandlers();
}
/**
* Start monitoring
*/
async start() {
if (this.running)
return;
try {
// Create log directory
await this.ensureLogDirectory();
// Subscribe to monitored patterns
for (const pattern of this.config.monitoredPatterns) {
await this.messageBus.subscribePattern(pattern, this.handleEvent.bind(this));
}
// Start periodic tasks
this.startPeriodicTasks();
this.running = true;
this.emit('started');
console.log(`🐕 Watchdog monitor started, monitoring ${this.config.monitoredPatterns.length} patterns`);
}
catch (error) {
this.emit('error', error);
throw error;
}
}
/**
* Stop monitoring
*/
async stop() {
if (!this.running)
return;
this.running = false;
// Stop periodic tasks
if (this.flushTimer) {
clearInterval(this.flushTimer);
this.flushTimer = undefined;
}
// Flush remaining events
await this.flushEvents();
// Close log streams
for (const stream of this.logStreams.values()) {
stream.end();
}
this.logStreams.clear();
// Unsubscribe from patterns
for (const pattern of this.config.monitoredPatterns) {
await this.messageBus.unsubscribe(pattern);
}
this.emit('stopped');
console.log('🐕 Watchdog monitor stopped');
}
/**
* Handle incoming events
*/
async handleEvent(message) {
try {
// Check if event should be excluded
if (this.shouldExcludeEvent(message)) {
return;
}
// Create watch event
const watchEvent = this.createWatchEvent(message);
// Check minimum severity
if (!this.meetsSeverityThreshold(watchEvent)) {
return;
}
// Update counters
this.totalEventsProcessed++;
this.eventRateCounter++;
if (watchEvent.severity === 'error' || watchEvent.severity === 'critical') {
this.totalErrorsDetected++;
this.errorRateCounter++;
}
// Update channel stats
this.updateChannelStats(message, watchEvent);
// Add to buffer
this.addToBuffer(watchEvent);
// Check for alerts
await this.checkAlertConditions(watchEvent);
// Emit event for real-time monitoring
this.emit('eventProcessed', watchEvent);
// Fan out to monitor channels
await this.fanOutEvent(watchEvent);
}
catch (error) {
this.emit('eventProcessingError', { error, message });
}
}
/**
* Create watch event from message
*/
createWatchEvent(message) {
return {
eventId: `evt_${Date.now()}_${Math.random().toString(36).substring(2, 8)}`,
timestamp: new Date(message.timestamp),
source: message.source,
eventType: message.type,
channel: this.extractChannelFromMessage(message),
data: message.data,
severity: this.determineSeverity(message),
tags: this.extractTags(message),
correlationId: this.extractCorrelationId(message),
sessionId: this.extractSessionId(message)
};
}
/**
* Add event to buffer
*/
addToBuffer(event) {
this.eventBuffer.push(event);
// Trim buffer if too large
if (this.eventBuffer.length > this.config.maxMemoryEvents) {
this.eventBuffer = this.eventBuffer.slice(-this.config.maxMemoryEvents);
}
}
/**
* Fan out event to monitor channels
*/
async fanOutEvent(event) {
const monitorChannel = `monitor.${event.eventType}`;
const globalMonitorChannel = 'monitor.all';
const fanOutMessage = {
type: 'watch_event',
source: 'watchdog',
data: event,
priority: this.getSeverityPriority(event.severity)
};
// Send to specific event type monitor
await this.messageBus.publish(monitorChannel, fanOutMessage);
// Send to global monitor
await this.messageBus.publish(globalMonitorChannel, fanOutMessage);
// Send high-severity events to alert channel
if (event.severity === 'error' || event.severity === 'critical') {
await this.messageBus.publish('monitor.alerts', {
...fanOutMessage,
data: event,
priority: 3
});
}
}
/**
* Flush events to persistent storage
*/
async flushEvents() {
if (!this.config.persistEvents || this.eventBuffer.length === 0) {
return;
}
try {
const logStream = await this.getLogStream();
for (const event of this.eventBuffer) {
const logLine = JSON.stringify(event) + '\n';
logStream.write(logLine);
}
this.eventBuffer = [];
this.emit('eventsFlushed', { count: this.eventBuffer.length });
}
catch (error) {
this.emit('flushError', error);
}
}
/**
* Get or create log stream
*/
async getLogStream() {
const logFileName = this.generateLogFileName();
if (this.currentLogFile !== logFileName) {
// Close current stream
const currentStream = this.logStreams.get(this.currentLogFile || '');
if (currentStream) {
currentStream.end();
this.logStreams.delete(this.currentLogFile || '');
}
// Create new stream
const logPath = path.join(this.config.logDirectory, logFileName);
let stream = createWriteStream(logPath, { flags: 'a' });
// Add compression if enabled
if (this.config.compressionEnabled && logFileName.endsWith('.gz')) {
const gzipStream = createGzip();
gzipStream.pipe(stream);
stream = gzipStream;
}
this.logStreams.set(logFileName, stream);
this.currentLogFile = logFileName;
}
return this.logStreams.get(logFileName);
}
/**
* Generate log file name based on current date
*/
generateLogFileName() {
const now = new Date();
const dateStr = now.toISOString().split('T')[0]; // YYYY-MM-DD
const hourStr = now.getHours().toString().padStart(2, '0');
const extension = this.config.compressionEnabled ? '.log.gz' : '.log';
return `watchdog-${dateStr}-${hourStr}${extension}`;
}
/**
* Check alert conditions
*/
async checkAlertConditions(event) {
const now = Date.now();
// Reset counters every minute
if (now - this.lastRateReset > 60000) {
this.eventRateCounter = 0;
this.errorRateCounter = 0;
this.lastRateReset = now;
}
// Check error rate threshold
if (this.errorRateCounter > this.config.alertThresholds.errorRate) {
await this.createAlert('error_threshold', 'error', 'High Error Rate Detected', `Error rate of ${this.errorRateCounter} errors/minute exceeds threshold of ${this.config.alertThresholds.errorRate}`, { errorRate: this.errorRateCounter, threshold: this.config.alertThresholds.errorRate });
}
// Check event rate threshold
const eventRate = this.eventRateCounter / 60; // events per second over last minute
if (eventRate > this.config.alertThresholds.eventRate) {
await this.createAlert('rate_limit', 'warning', 'High Event Rate Detected', `Event rate of ${eventRate.toFixed(1)} events/second exceeds threshold of ${this.config.alertThresholds.eventRate}`, { eventRate, threshold: this.config.alertThresholds.eventRate });
}
// Check memory usage
const memoryUsage = process.memoryUsage();
const memoryPercent = (memoryUsage.heapUsed / memoryUsage.heapTotal) * 100;
if (memoryPercent > this.config.alertThresholds.memoryUsage) {
await this.createAlert('memory_usage', 'warning', 'High Memory Usage', `Memory usage at ${memoryPercent.toFixed(1)}% exceeds threshold of ${this.config.alertThresholds.memoryUsage}%`, { memoryPercent, threshold: this.config.alertThresholds.memoryUsage, memoryUsage });
}
}
/**
* Create alert
*/
async createAlert(type, severity, title, message, metadata) {
const alertId = `alert_${type}_${Date.now()}`;
const alert = {
alertId,
type,
severity,
title,
message,
timestamp: new Date(),
acknowledged: false,
metadata
};
this.alerts.set(alertId, alert);
// Emit alert event
this.emit('alertCreated', alert);
// Send alert to monitoring channels
await this.messageBus.publish('monitor.alerts', {
type: 'alert',
source: 'watchdog',
data: alert,
priority: severity === 'critical' ? 3 : 2
});
return alert;
}
/**
* Update channel statistics
*/
updateChannelStats(message, event) {
const channel = event.channel;
if (!this.channelStats.has(channel)) {
this.channelStats.set(channel, {
eventCount: 0,
lastEventTime: event.timestamp,
errorCount: 0,
averageEventSize: 0,
eventTypes: {}
});
}
const stats = this.channelStats.get(channel);
stats.eventCount++;
stats.lastEventTime = event.timestamp;
if (event.severity === 'error' || event.severity === 'critical') {
stats.errorCount++;
}
// Update event types
stats.eventTypes[event.eventType] = (stats.eventTypes[event.eventType] || 0) + 1;
// Update average event size (approximate)
const eventSize = JSON.stringify(event).length;
stats.averageEventSize = (stats.averageEventSize * (stats.eventCount - 1) + eventSize) / stats.eventCount;
}
/**
* Get monitoring statistics
*/
getStats() {
return {
running: this.running,
uptime: Date.now() - this.startTime,
totalEventsProcessed: this.totalEventsProcessed,
totalErrorsDetected: this.totalErrorsDetected,
bufferedEvents: this.eventBuffer.length,
channelCount: this.channelStats.size,
activeAlerts: Array.from(this.alerts.values()).filter(a => !a.acknowledged).length,
memoryUsage: process.memoryUsage()
};
}
/**
* Get channel statistics
*/
getChannelStats() {
return new Map(this.channelStats);
}
/**
* Get recent events
*/
getRecentEvents(limit = 100, filter) {
let events = [...this.eventBuffer];
if (filter) {
if (filter.severity) {
events = events.filter(e => filter.severity.includes(e.severity));
}
if (filter.eventType) {
events = events.filter(e => filter.eventType.includes(e.eventType));
}
if (filter.source) {
events = events.filter(e => filter.source.includes(e.source));
}
}
return events.slice(-limit).reverse();
}
/**
* Get active alerts
*/
getActiveAlerts() {
return Array.from(this.alerts.values()).filter(a => !a.acknowledged);
}
/**
* Acknowledge alert
*/
acknowledgeAlert(alertId) {
const alert = this.alerts.get(alertId);
if (alert && !alert.acknowledged) {
alert.acknowledged = true;
this.emit('alertAcknowledged', alert);
return true;
}
return false;
}
// Helper methods
setupEventHandlers() {
this.on('error', (error) => {
console.error('Watchdog monitor error:', error);
});
}
startPeriodicTasks() {
// Flush events periodically
this.flushTimer = setInterval(() => {
this.flushEvents().catch(error => {
this.emit('flushError', error);
});
}, this.config.flushInterval);
// Cleanup old log files periodically (every hour)
setInterval(() => {
this.cleanupOldLogs().catch(error => {
this.emit('cleanupError', error);
});
}, 3600000);
}
async ensureLogDirectory() {
try {
await fs.mkdir(this.config.logDirectory, { recursive: true });
}
catch (error) {
if (error.code !== 'EEXIST') {
throw error;
}
}
}
async cleanupOldLogs() {
if (!this.config.persistEvents)
return;
try {
const files = await fs.readdir(this.config.logDirectory);
const cutoffTime = Date.now() - (this.config.logRetentionHours * 60 * 60 * 1000);
for (const file of files) {
if (file.startsWith('watchdog-') && (file.endsWith('.log') || file.endsWith('.log.gz'))) {
const filePath = path.join(this.config.logDirectory, file);
const stats = await fs.stat(filePath);
if (stats.mtime.getTime() < cutoffTime) {
await fs.unlink(filePath);
this.emit('logFileDeleted', { file, age: Date.now() - stats.mtime.getTime() });
}
}
}
}
catch (error) {
this.emit('cleanupError', error);
}
}
shouldExcludeEvent(message) {
for (const pattern of this.config.excludePatterns) {
if (this.matchesPattern(message.type, pattern)) {
return true;
}
}
return false;
}
meetsSeverityThreshold(event) {
const severityLevels = ['debug', 'info', 'warning', 'error', 'critical'];
const eventLevel = severityLevels.indexOf(event.severity);
const thresholdLevel = severityLevels.indexOf(this.config.minimumSeverity);
return eventLevel >= thresholdLevel;
}
matchesPattern(text, pattern) {
const regexPattern = pattern.replace(/\*/g, '.*');
return new RegExp(`^${regexPattern}$`).test(text);
}
extractChannelFromMessage(message) {
// Try to extract channel from message metadata or use source
return message.data.channel || message.source || 'unknown';
}
determineSeverity(message) {
if (message.type.includes('error') || message.type.includes('fail')) {
return 'error';
}
if (message.type.includes('warning') || message.type.includes('warn')) {
return 'warning';
}
if (message.type.includes('critical') || message.type.includes('alert')) {
return 'critical';
}
if (message.type.includes('debug')) {
return 'debug';
}
return 'info';
}
extractTags(message) {
const tags = [];
// Add source as tag
if (message.source) {
tags.push(`source:${message.source}`);
}
// Add type as tag
tags.push(`type:${message.type}`);
// Extract custom tags from data
if (message.data.tags && Array.isArray(message.data.tags)) {
tags.push(...message.data.tags);
}
return tags;
}
extractCorrelationId(message) {
return message.data.correlationId ||
message.data.traceId ||
message.data.requestId;
}
extractSessionId(message) {
return message.data.sessionId ||
message.data.userId;
}
getSeverityPriority(severity) {
switch (severity) {
case 'critical': return 3;
case 'error': return 2;
case 'warning': return 1;
default: return 0;
}
}
/**
* Health check
*/
async healthCheck() {
const stats = this.getStats();
const isHealthy = this.running && stats.activeAlerts === 0;
return {
status: isHealthy ? 'healthy' : 'unhealthy',
details: {
...stats,
config: {
monitoredPatterns: this.config.monitoredPatterns.length,
persistEvents: this.config.persistEvents,
logDirectory: this.config.logDirectory
}
}
};
}
}
//# sourceMappingURL=watchdogMonitor.js.map