UNPKG

@restnfeel/agentc-starter-kit

Version:

한국어 기업용 CMS 모듈 - Task Master AI와 함께 빠르게 웹사이트를 구현할 수 있는 재사용 가능한 컴포넌트 시스템

883 lines (780 loc) 23.5 kB
import { DeploymentExecution, DeploymentMonitoring, EnvironmentType, PerformanceMetric, ResourceMetric, ActiveAlert, AlertConfig, MonitoringConfig, LogEntry, DeploymentStatus, } from "./types"; export interface MonitoringDashboard { deploymentId: string; environment: EnvironmentType; status: DeploymentStatus; overview: MonitoringOverview; realTimeMetrics: RealTimeMetrics; alerts: AlertSummary; logs: LogSummary; lastUpdated: Date; } export interface MonitoringOverview { uptime: number; totalRequests: number; successRate: number; errorRate: number; averageResponseTime: number; peakResponseTime: number; activeUsers: number; dataTransfer: number; // MB } export interface RealTimeMetrics { performance: { responseTime: PerformanceMetric; throughput: PerformanceMetric; errorRate: PerformanceMetric; availability: PerformanceMetric; }; resources: { cpu: ResourceMetric; memory: ResourceMetric; disk: ResourceMetric; network: ResourceMetric; }; database: { connections: number; queryTime: number; lockWaitTime: number; cacheHitRate: number; }; external: { apiLatency: Record<string, number>; serviceHealth: Record<string, "healthy" | "degraded" | "unhealthy">; }; } export interface AlertSummary { active: number; critical: number; high: number; medium: number; low: number; resolved: number; acknowledged: number; totalToday: number; } export interface LogSummary { total: number; errors: number; warnings: number; info: number; debug: number; recentErrors: LogEntry[]; errorTrends: { timestamp: Date; count: number }[]; } export interface MetricDataPoint { timestamp: Date; value: number; metadata?: Record<string, unknown>; } export interface MetricTimeSeries { name: string; unit: string; dataPoints: MetricDataPoint[]; aggregation: "avg" | "sum" | "min" | "max" | "count"; interval: "1m" | "5m" | "15m" | "1h" | "1d"; } export interface MonitoringReport { deploymentId: string; environment: EnvironmentType; period: { start: Date; end: Date; }; summary: { totalDeployments: number; successfulDeployments: number; failedDeployments: number; averageDeploymentTime: number; averageRecoveryTime: number; uptimePercentage: number; }; metrics: MetricTimeSeries[]; incidents: IncidentSummary[]; recommendations: string[]; generatedAt: Date; } export interface IncidentSummary { id: string; title: string; severity: "low" | "medium" | "high" | "critical"; status: "open" | "investigating" | "resolved"; startTime: Date; endTime?: Date; duration?: number; // minutes affectedServices: string[]; rootCause?: string; resolution?: string; } export class DeploymentMonitor { private monitoring = new Map<string, DeploymentMonitoring>(); private metrics = new Map<string, MetricTimeSeries[]>(); private alerts = new Map<string, ActiveAlert[]>(); private alertConfigs = new Map<string, AlertConfig[]>(); private logEntries = new Map<string, LogEntry[]>(); private incidents = new Map<string, IncidentSummary[]>(); constructor() { this.initializeDefaultAlertConfigs(); this.startPeriodicCollection(); } // Main monitoring methods async startMonitoring( deploymentId: string, environment: EnvironmentType, config: MonitoringConfig ): Promise<void> { if (!config.enabled) { console.log(`Monitoring disabled for deployment ${deploymentId}`); return; } const monitoring: DeploymentMonitoring = { deploymentId, environment, healthStatus: "healthy", uptime: 100, responseTime: this.createEmptyPerformanceMetric(), throughput: this.createEmptyPerformanceMetric(), errorRate: this.createEmptyPerformanceMetric(), cpuUsage: this.createEmptyResourceMetric(), memoryUsage: this.createEmptyResourceMetric(), diskUsage: this.createEmptyResourceMetric(), networkUsage: this.createEmptyResourceMetric(), activeAlerts: [], lastUpdated: new Date(), }; this.monitoring.set(deploymentId, monitoring); this.alertConfigs.set(deploymentId, config.alerts || []); this.metrics.set(deploymentId, []); this.alerts.set(deploymentId, []); this.logEntries.set(deploymentId, []); this.incidents.set(deploymentId, []); console.log( `Started monitoring for deployment ${deploymentId} in ${environment}` ); } async stopMonitoring(deploymentId: string): Promise<void> { this.monitoring.delete(deploymentId); this.alertConfigs.delete(deploymentId); this.metrics.delete(deploymentId); this.alerts.delete(deploymentId); this.logEntries.delete(deploymentId); this.incidents.delete(deploymentId); console.log(`Stopped monitoring for deployment ${deploymentId}`); } async getMonitoringData( deploymentId: string ): Promise<DeploymentMonitoring | null> { const monitoring = this.monitoring.get(deploymentId); if (!monitoring) { return null; } // Update with latest data await this.updateMonitoringData(deploymentId); return this.monitoring.get(deploymentId) || null; } async getDashboard( deploymentId: string ): Promise<MonitoringDashboard | null> { const monitoring = await this.getMonitoringData(deploymentId); if (!monitoring) { return null; } const overview = await this.generateOverview(deploymentId); const realTimeMetrics = await this.getRealTimeMetrics(deploymentId); const alertSummary = this.getAlertSummary(deploymentId); const logSummary = this.getLogSummary(deploymentId); return { deploymentId, environment: monitoring.environment, status: this.determineDeploymentStatus(monitoring), overview, realTimeMetrics, alerts: alertSummary, logs: logSummary, lastUpdated: new Date(), }; } // Metrics collection async collectMetric( deploymentId: string, metricName: string, value: number, metadata?: Record<string, unknown> ): Promise<void> { const dataPoint: MetricDataPoint = { timestamp: new Date(), value, metadata, }; let timeSeries = this.metrics .get(deploymentId) ?.find((m) => m.name === metricName); if (!timeSeries) { timeSeries = { name: metricName, unit: this.getMetricUnit(metricName), dataPoints: [], aggregation: this.getMetricAggregation(metricName), interval: "1m", }; const deploymentMetrics = this.metrics.get(deploymentId) || []; deploymentMetrics.push(timeSeries); this.metrics.set(deploymentId, deploymentMetrics); } timeSeries.dataPoints.push(dataPoint); // Keep only last 1000 data points to prevent memory issues if (timeSeries.dataPoints.length > 1000) { timeSeries.dataPoints = timeSeries.dataPoints.slice(-1000); } // Check for alert conditions await this.checkAlertConditions(deploymentId, metricName, value); } async getMetricTimeSeries( deploymentId: string, metricName: string, timeRange?: { start: Date; end: Date } ): Promise<MetricTimeSeries | null> { const deploymentMetrics = this.metrics.get(deploymentId); if (!deploymentMetrics) { return null; } const timeSeries = deploymentMetrics.find((m) => m.name === metricName); if (!timeSeries) { return null; } if (!timeRange) { return timeSeries; } // Filter data points by time range const filteredDataPoints = timeSeries.dataPoints.filter( (dp) => dp.timestamp >= timeRange.start && dp.timestamp <= timeRange.end ); return { ...timeSeries, dataPoints: filteredDataPoints, }; } // Alert management async triggerAlert( deploymentId: string, alertConfig: AlertConfig, currentValue: number ): Promise<void> { const alert: ActiveAlert = { id: this.generateId(), name: alertConfig.name, severity: alertConfig.severity, message: `${ alertConfig.name }: Current value ${currentValue} ${this.getThresholdComparison( currentValue, alertConfig.threshold )} threshold ${alertConfig.threshold}`, triggeredAt: new Date(), acknowledged: false, }; const deploymentAlerts = this.alerts.get(deploymentId) || []; deploymentAlerts.push(alert); this.alerts.set(deploymentId, deploymentAlerts); // Update monitoring data const monitoring = this.monitoring.get(deploymentId); if (monitoring) { monitoring.activeAlerts = deploymentAlerts; this.monitoring.set(deploymentId, monitoring); } // Send notifications (mock implementation) await this.sendAlertNotifications(deploymentId, alert, alertConfig); console.log(`Alert triggered for ${deploymentId}: ${alert.name}`); } async acknowledgeAlert( deploymentId: string, alertId: string, acknowledgedBy: string ): Promise<boolean> { const deploymentAlerts = this.alerts.get(deploymentId); if (!deploymentAlerts) { return false; } const alert = deploymentAlerts.find((a) => a.id === alertId); if (!alert) { return false; } alert.acknowledged = true; alert.acknowledgedBy = acknowledgedBy; // Update monitoring data const monitoring = this.monitoring.get(deploymentId); if (monitoring) { monitoring.activeAlerts = deploymentAlerts; this.monitoring.set(deploymentId, monitoring); } return true; } async resolveAlert(deploymentId: string, alertId: string): Promise<boolean> { const deploymentAlerts = this.alerts.get(deploymentId); if (!deploymentAlerts) { return false; } const alertIndex = deploymentAlerts.findIndex((a) => a.id === alertId); if (alertIndex === -1) { return false; } deploymentAlerts.splice(alertIndex, 1); this.alerts.set(deploymentId, deploymentAlerts); // Update monitoring data const monitoring = this.monitoring.get(deploymentId); if (monitoring) { monitoring.activeAlerts = deploymentAlerts; this.monitoring.set(deploymentId, monitoring); } return true; } // Log management async addLogEntry( deploymentId: string, entry: Omit<LogEntry, "timestamp"> ): Promise<void> { const logEntry: LogEntry = { ...entry, timestamp: new Date(), }; const deploymentLogs = this.logEntries.get(deploymentId) || []; deploymentLogs.push(logEntry); // Keep only last 10000 log entries to prevent memory issues if (deploymentLogs.length > 10000) { this.logEntries.set(deploymentId, deploymentLogs.slice(-10000)); } else { this.logEntries.set(deploymentId, deploymentLogs); } } async getLogs( deploymentId: string, options?: { level?: "debug" | "info" | "warn" | "error"; startTime?: Date; endTime?: Date; limit?: number; } ): Promise<LogEntry[]> { let logs = this.logEntries.get(deploymentId) || []; // Apply filters if (options?.level) { logs = logs.filter((log) => log.level === options.level); } if (options?.startTime) { logs = logs.filter((log) => log.timestamp >= options.startTime!); } if (options?.endTime) { logs = logs.filter((log) => log.timestamp <= options.endTime!); } // Sort by timestamp (newest first) logs.sort((a, b) => b.timestamp.getTime() - a.timestamp.getTime()); // Apply limit if (options?.limit) { logs = logs.slice(0, options.limit); } return logs; } // Reporting async generateReport( deploymentId: string, period: { start: Date; end: Date } ): Promise<MonitoringReport> { const monitoring = this.monitoring.get(deploymentId); if (!monitoring) { throw new Error( `No monitoring data found for deployment ${deploymentId}` ); } const metrics = this.metrics.get(deploymentId) || []; const incidents = this.incidents.get(deploymentId) || []; // Filter metrics by time period const filteredMetrics = metrics.map((metric) => ({ ...metric, dataPoints: metric.dataPoints.filter( (dp) => dp.timestamp >= period.start && dp.timestamp <= period.end ), })); // Calculate summary statistics const summary = { totalDeployments: 1, // Simplified for single deployment successfulDeployments: monitoring.healthStatus === "healthy" ? 1 : 0, failedDeployments: monitoring.healthStatus === "unhealthy" ? 1 : 0, averageDeploymentTime: 300, // Mock value in seconds averageRecoveryTime: 60, // Mock value in seconds uptimePercentage: monitoring.uptime, }; const recommendations = this.generateRecommendations( monitoring, filteredMetrics ); return { deploymentId, environment: monitoring.environment, period, summary, metrics: filteredMetrics, incidents: incidents.filter( (incident) => incident.startTime >= period.start && incident.startTime <= period.end ), recommendations, generatedAt: new Date(), }; } // Private helper methods private async updateMonitoringData(deploymentId: string): Promise<void> { const monitoring = this.monitoring.get(deploymentId); if (!monitoring) { return; } // Simulate collecting real-time metrics const newMetrics = await this.collectRealTimeMetrics(deploymentId); // Update monitoring object Object.assign(monitoring, newMetrics); monitoring.lastUpdated = new Date(); this.monitoring.set(deploymentId, monitoring); } private async collectRealTimeMetrics( deploymentId: string ): Promise<Partial<DeploymentMonitoring>> { // Mock implementation - in real scenario, this would collect from actual monitoring sources return { healthStatus: Math.random() > 0.1 ? "healthy" : "degraded", uptime: 99.5 + Math.random() * 0.5, responseTime: { current: 100 + Math.random() * 50, average: 120, min: 80, max: 200, trend: Math.random() > 0.5 ? "improving" : "stable", }, throughput: { current: 1000 + Math.random() * 200, average: 1100, min: 800, max: 1500, trend: "stable", }, errorRate: { current: Math.random() * 2, average: 1.2, min: 0, max: 5, trend: "improving", }, cpuUsage: { current: 40 + Math.random() * 30, average: 55, peak: 85, limit: 100, }, memoryUsage: { current: 60 + Math.random() * 20, average: 65, peak: 90, limit: 100, }, diskUsage: { current: 30 + Math.random() * 10, average: 35, peak: 50, limit: 100, }, networkUsage: { current: 20 + Math.random() * 15, average: 25, peak: 60, limit: 100, }, }; } private async generateOverview( deploymentId: string ): Promise<MonitoringOverview> { // Mock implementation return { uptime: 99.9, totalRequests: 1000000, successRate: 99.8, errorRate: 0.2, averageResponseTime: 120, peakResponseTime: 500, activeUsers: 250, dataTransfer: 1500, }; } private async getRealTimeMetrics( deploymentId: string ): Promise<RealTimeMetrics> { const monitoring = this.monitoring.get(deploymentId); if (!monitoring) { throw new Error( `No monitoring data found for deployment ${deploymentId}` ); } return { performance: { responseTime: monitoring.responseTime, throughput: monitoring.throughput, errorRate: monitoring.errorRate, availability: { current: monitoring.uptime, average: monitoring.uptime, min: monitoring.uptime - 0.5, max: 100, trend: "stable", }, }, resources: { cpu: monitoring.cpuUsage, memory: monitoring.memoryUsage, disk: monitoring.diskUsage, network: monitoring.networkUsage, }, database: { connections: 50, queryTime: 25, lockWaitTime: 2, cacheHitRate: 95, }, external: { apiLatency: { "payment-api": 150, "auth-service": 80, "notification-service": 200, }, serviceHealth: { "payment-api": "healthy", "auth-service": "healthy", "notification-service": "degraded", }, }, }; } private getAlertSummary(deploymentId: string): AlertSummary { const alerts = this.alerts.get(deploymentId) || []; return { active: alerts.length, critical: alerts.filter((a) => a.severity === "critical").length, high: alerts.filter((a) => a.severity === "high").length, medium: alerts.filter((a) => a.severity === "medium").length, low: alerts.filter((a) => a.severity === "low").length, resolved: 0, // Would track resolved alerts in a real implementation acknowledged: alerts.filter((a) => a.acknowledged).length, totalToday: alerts.length, // Simplified }; } private getLogSummary(deploymentId: string): LogSummary { const logs = this.logEntries.get(deploymentId) || []; const recentErrors = logs .filter((log) => log.level === "error") .slice(0, 10); return { total: logs.length, errors: logs.filter((log) => log.level === "error").length, warnings: logs.filter((log) => log.level === "warn").length, info: logs.filter((log) => log.level === "info").length, debug: logs.filter((log) => log.level === "debug").length, recentErrors, errorTrends: [], // Would be calculated from historical data }; } private determineDeploymentStatus( monitoring: DeploymentMonitoring ): DeploymentStatus { if (monitoring.healthStatus === "unhealthy") { return "failed"; } if (monitoring.healthStatus === "degraded") { return "testing"; } return "success"; } private async checkAlertConditions( deploymentId: string, metricName: string, value: number ): Promise<void> { const alertConfigs = this.alertConfigs.get(deploymentId) || []; for (const config of alertConfigs) { if ( config.enabled && this.shouldTriggerAlert(config, metricName, value) ) { await this.triggerAlert(deploymentId, config, value); } } } private shouldTriggerAlert( config: AlertConfig, metricName: string, value: number ): boolean { // Check if this alert config applies to the metric if (!config.condition.includes(metricName)) { return false; } // Check if current time is outside cooldown period const now = Date.now(); const cooldownMs = config.cooldown * 60 * 1000; // In a real implementation, would track last trigger time per alert // For now, simplified check // Evaluate threshold condition return value > config.threshold; } private async sendAlertNotifications( deploymentId: string, alert: ActiveAlert, config: AlertConfig ): Promise<void> { // Mock implementation - would send actual notifications console.log(`Sending alert notifications for ${deploymentId}:`, { alert: alert.name, severity: alert.severity, channels: config.channels, }); } private generateRecommendations( monitoring: DeploymentMonitoring, metrics: MetricTimeSeries[] ): string[] { const recommendations: string[] = []; // Resource utilization recommendations if (monitoring.cpuUsage.average > 80) { recommendations.push( "Consider scaling up CPU resources - average utilization is above 80%" ); } if (monitoring.memoryUsage.average > 85) { recommendations.push( "Memory usage is consistently high - consider increasing memory allocation" ); } // Performance recommendations if (monitoring.responseTime.average > 500) { recommendations.push( "Response times are above recommended threshold - investigate performance bottlenecks" ); } if (monitoring.errorRate.average > 1) { recommendations.push( "Error rate is elevated - review application logs for recurring issues" ); } // Health recommendations if (monitoring.uptime < 99.5) { recommendations.push( "Uptime is below target SLA - review infrastructure reliability" ); } return recommendations; } private createEmptyPerformanceMetric(): PerformanceMetric { return { current: 0, average: 0, min: 0, max: 0, trend: "stable", }; } private createEmptyResourceMetric(): ResourceMetric { return { current: 0, average: 0, peak: 0, limit: 100, }; } private getMetricUnit(metricName: string): string { const units: Record<string, string> = { response_time: "ms", throughput: "req/s", error_rate: "%", cpu_usage: "%", memory_usage: "%", disk_usage: "%", network_usage: "%", }; return units[metricName] || "count"; } private getMetricAggregation( metricName: string ): "avg" | "sum" | "min" | "max" | "count" { const aggregations: Record< string, "avg" | "sum" | "min" | "max" | "count" > = { response_time: "avg", throughput: "sum", error_rate: "avg", cpu_usage: "avg", memory_usage: "avg", disk_usage: "avg", network_usage: "avg", }; return aggregations[metricName] || "avg"; } private getThresholdComparison(value: number, threshold: number): string { return value > threshold ? "exceeds" : "is below"; } private generateId(): string { return `alert_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`; } private initializeDefaultAlertConfigs(): void { // Default alert configurations would be initialized here console.log("Initialized default alert configurations"); } private startPeriodicCollection(): void { // Start periodic metric collection every minute setInterval(async () => { for (const [deploymentId] of this.monitoring) { try { await this.updateMonitoringData(deploymentId); // Collect some sample metrics await this.collectMetric( deploymentId, "response_time", 100 + Math.random() * 50 ); await this.collectMetric( deploymentId, "throughput", 1000 + Math.random() * 200 ); await this.collectMetric( deploymentId, "error_rate", Math.random() * 2 ); await this.collectMetric( deploymentId, "cpu_usage", 50 + Math.random() * 30 ); await this.collectMetric( deploymentId, "memory_usage", 60 + Math.random() * 20 ); } catch (error) { console.error( `Failed to update monitoring data for ${deploymentId}:`, error ); } } }, 60000); // 1 minute } }