UNPKG

@mrtkrcm/acp-claude-code

Version:

ACP (Agent Client Protocol) bridge for Claude Code

319 lines 13.4 kB
import { createLogger } from './logger.js'; export class McpPerformanceMonitor { metrics = []; maxMetrics = 10000; // Keep last 10k metrics logger; alertRules = []; monitoringInterval; constructor() { this.logger = createLogger('MCP-Performance'); this.initializeDefaultAlertRules(); this.startMonitoring(); } /** * Record a performance metric for MCP tool execution */ recordMetric(metric) { const fullMetric = { ...metric, timestamp: Date.now(), }; this.metrics.push(fullMetric); // Maintain sliding window if (this.metrics.length > this.maxMetrics) { this.metrics = this.metrics.slice(-this.maxMetrics); } this.logger.debug(`Recorded metric: ${metric.serverName}.${metric.toolName} - ${metric.duration}ms (${metric.success ? 'success' : 'failed'})`); } /** * Get performance statistics for a specific server */ getServerStats(serverName, timeWindowMs = 3600000) { const cutoff = Date.now() - timeWindowMs; const relevantMetrics = this.metrics.filter(m => m.serverName === serverName && m.timestamp >= cutoff); if (relevantMetrics.length === 0) { return { serverName, totalCalls: 0, successfulCalls: 0, failedCalls: 0, avgResponseTime: 0, p95ResponseTime: 0, p99ResponseTime: 0, errorRate: 0, lastActivity: 0, uptime: 0, }; } const successfulCalls = relevantMetrics.filter(m => m.success).length; const failedCalls = relevantMetrics.length - successfulCalls; const responseTimes = relevantMetrics.map(m => m.duration).sort((a, b) => a - b); const avgResponseTime = responseTimes.reduce((sum, time) => sum + time, 0) / responseTimes.length; const p95ResponseTime = responseTimes[Math.floor(responseTimes.length * 0.95)] || 0; const p99ResponseTime = responseTimes[Math.floor(responseTimes.length * 0.99)] || 0; const lastActivity = Math.max(...relevantMetrics.map(m => m.timestamp)); const firstActivity = Math.min(...relevantMetrics.map(m => m.timestamp)); const uptime = (lastActivity - firstActivity) / 1000; // Convert to seconds return { serverName, totalCalls: relevantMetrics.length, successfulCalls, failedCalls, avgResponseTime, p95ResponseTime, p99ResponseTime, errorRate: failedCalls / relevantMetrics.length, lastActivity, uptime, }; } /** * Get performance statistics for all servers */ getAllServerStats(timeWindowMs = 3600000) { const serverNames = new Set(this.metrics.map(m => m.serverName)); return Array.from(serverNames).map(name => this.getServerStats(name, timeWindowMs)); } /** * Get tool-specific performance statistics */ getToolStats(serverName, toolName, timeWindowMs = 3600000) { const cutoff = Date.now() - timeWindowMs; const relevantMetrics = this.metrics.filter(m => m.serverName === serverName && m.toolName === toolName && m.timestamp >= cutoff); const errorTypes = {}; let successCount = 0; for (const metric of relevantMetrics) { if (metric.success) { successCount++; } else if (metric.errorType) { errorTypes[metric.errorType] = (errorTypes[metric.errorType] || 0) + 1; } } const avgResponseTime = relevantMetrics.length > 0 ? relevantMetrics.reduce((sum, m) => sum + m.duration, 0) / relevantMetrics.length : 0; return { toolName, serverName, totalCalls: relevantMetrics.length, successRate: relevantMetrics.length > 0 ? successCount / relevantMetrics.length : 0, avgResponseTime, errorTypes, }; } /** * Detect performance anomalies */ detectAnomalies(serverName) { const anomalies = []; const stats = this.getServerStats(serverName, 3600000); // Last hour const recentStats = this.getServerStats(serverName, 300000); // Last 5 minutes // High latency detection if (stats.p95ResponseTime > 10000) { // > 10 seconds anomalies.push({ type: 'high_latency', severity: 'critical', message: `High latency detected for ${serverName}: P95 response time is ${stats.p95ResponseTime}ms`, details: { p95ResponseTime: stats.p95ResponseTime, avgResponseTime: stats.avgResponseTime }, }); } else if (stats.p95ResponseTime > 5000) { // > 5 seconds anomalies.push({ type: 'high_latency', severity: 'warning', message: `Elevated latency detected for ${serverName}: P95 response time is ${stats.p95ResponseTime}ms`, details: { p95ResponseTime: stats.p95ResponseTime, avgResponseTime: stats.avgResponseTime }, }); } // High error rate detection if (stats.errorRate > 0.5) { // > 50% error rate anomalies.push({ type: 'high_error_rate', severity: 'critical', message: `Critical error rate for ${serverName}: ${(stats.errorRate * 100).toFixed(1)}%`, details: { errorRate: stats.errorRate, failedCalls: stats.failedCalls, totalCalls: stats.totalCalls }, }); } else if (stats.errorRate > 0.1) { // > 10% error rate anomalies.push({ type: 'high_error_rate', severity: 'warning', message: `Elevated error rate for ${serverName}: ${(stats.errorRate * 100).toFixed(1)}%`, details: { errorRate: stats.errorRate, failedCalls: stats.failedCalls, totalCalls: stats.totalCalls }, }); } // Unusual patterns (sudden changes) if (stats.totalCalls > 10 && recentStats.totalCalls > 5) { const recentErrorRate = recentStats.errorRate; const historicalErrorRate = stats.errorRate; if (recentErrorRate > historicalErrorRate * 3 && recentErrorRate > 0.05) { anomalies.push({ type: 'unusual_pattern', severity: 'error', message: `Sudden spike in error rate for ${serverName}: ${(recentErrorRate * 100).toFixed(1)}% (recent) vs ${(historicalErrorRate * 100).toFixed(1)}% (historical)`, details: { recentErrorRate, historicalErrorRate, recentCalls: recentStats.totalCalls }, }); } } return anomalies; } /** * Generate performance recommendations */ generateRecommendations(serverName) { const recommendations = []; const stats = this.getServerStats(serverName); const anomalies = this.detectAnomalies(serverName); // Performance recommendations if (stats.avgResponseTime > 5000) { recommendations.push({ category: 'performance', priority: 'high', recommendation: 'Consider increasing connection pool size or implementing caching', reasoning: `Average response time (${stats.avgResponseTime}ms) is above optimal threshold`, }); } if (stats.p99ResponseTime > stats.avgResponseTime * 10) { recommendations.push({ category: 'performance', priority: 'medium', recommendation: 'Investigate outlier requests causing high P99 latency', reasoning: `P99 response time (${stats.p99ResponseTime}ms) is significantly higher than average (${stats.avgResponseTime}ms)`, }); } // Reliability recommendations if (stats.errorRate > 0.1) { recommendations.push({ category: 'reliability', priority: 'high', recommendation: 'Implement retry logic with exponential backoff', reasoning: `Error rate (${(stats.errorRate * 100).toFixed(1)}%) indicates reliability issues`, }); } // Configuration recommendations based on anomalies for (const anomaly of anomalies) { if (anomaly.type === 'high_latency') { recommendations.push({ category: 'configuration', priority: anomaly.severity === 'critical' ? 'critical' : 'high', recommendation: 'Review timeout settings and server configuration', reasoning: anomaly.message, }); } } return recommendations; } initializeDefaultAlertRules() { this.alertRules = [ { id: 'high_error_rate', condition: (stats) => stats.errorRate > 0.2 && stats.totalCalls > 10, message: 'High error rate detected', severity: 'error', cooldownMs: 300000, // 5 minutes }, { id: 'critical_error_rate', condition: (stats) => stats.errorRate > 0.5 && stats.totalCalls > 5, message: 'Critical error rate detected', severity: 'critical', cooldownMs: 120000, // 2 minutes }, { id: 'high_latency', condition: (stats) => stats.p95ResponseTime > 10000, message: 'High latency detected', severity: 'warning', cooldownMs: 600000, // 10 minutes }, { id: 'server_unresponsive', condition: (stats) => Date.now() - stats.lastActivity > 600000 && stats.totalCalls > 0, message: 'Server appears unresponsive', severity: 'critical', cooldownMs: 300000, // 5 minutes }, ]; } startMonitoring() { this.monitoringInterval = setInterval(() => { this.checkAlerts(); this.cleanupOldMetrics(); }, 60000); // Check every minute } checkAlerts() { const serverNames = new Set(this.metrics.map(m => m.serverName)); for (const serverName of serverNames) { const stats = this.getServerStats(serverName, 300000); // Last 5 minutes for (const rule of this.alertRules) { if (rule.lastTriggered && Date.now() - rule.lastTriggered < rule.cooldownMs) { continue; // Still in cooldown } if (rule.condition(stats)) { this.triggerAlert(rule, serverName, stats); rule.lastTriggered = Date.now(); } } } } triggerAlert(rule, serverName, stats) { const alertMessage = `${rule.message} for ${serverName}`; switch (rule.severity) { case 'critical': this.logger.error(alertMessage, { rule: rule.id, stats }); break; case 'error': this.logger.error(alertMessage, { rule: rule.id, stats }); break; case 'warning': this.logger.warn(alertMessage, { rule: rule.id, stats }); break; default: this.logger.info(alertMessage, { rule: rule.id, stats }); } // In a production system, you might also: // - Send alerts to external monitoring systems // - Trigger automated remediation actions // - Update circuit breaker configurations } cleanupOldMetrics() { const cutoff = Date.now() - (24 * 60 * 60 * 1000); // Keep last 24 hours const originalCount = this.metrics.length; this.metrics = this.metrics.filter(m => m.timestamp >= cutoff); const cleaned = originalCount - this.metrics.length; if (cleaned > 0) { this.logger.debug(`Cleaned up ${cleaned} old metrics`); } } /** * Export performance data for external analysis */ exportMetrics(format = 'json') { if (format === 'csv') { const headers = 'timestamp,serverName,toolName,duration,success,errorType,inputSize,outputSize,retryCount'; const rows = this.metrics.map(m => [ m.timestamp, m.serverName, m.toolName, m.duration, m.success, m.errorType || '', m.inputSize || '', m.outputSize || '', m.retryCount || '' ].join(',')); return [headers, ...rows].join('\n'); } return JSON.stringify(this.metrics, null, 2); } destroy() { if (this.monitoringInterval) { clearInterval(this.monitoringInterval); } } } export const globalPerformanceMonitor = new McpPerformanceMonitor(); //# sourceMappingURL=mcp-performance-monitor.js.map