@mrtkrcm/acp-claude-code
Version:
ACP (Agent Client Protocol) bridge for Claude Code
319 lines • 13.4 kB
JavaScript
import { createLogger } from './logger.js';
export class McpPerformanceMonitor {
metrics = [];
maxMetrics = 10000; // Keep last 10k metrics
logger;
alertRules = [];
monitoringInterval;
constructor() {
this.logger = createLogger('MCP-Performance');
this.initializeDefaultAlertRules();
this.startMonitoring();
}
/**
* Record a performance metric for MCP tool execution
*/
recordMetric(metric) {
const fullMetric = {
...metric,
timestamp: Date.now(),
};
this.metrics.push(fullMetric);
// Maintain sliding window
if (this.metrics.length > this.maxMetrics) {
this.metrics = this.metrics.slice(-this.maxMetrics);
}
this.logger.debug(`Recorded metric: ${metric.serverName}.${metric.toolName} - ${metric.duration}ms (${metric.success ? 'success' : 'failed'})`);
}
/**
* Get performance statistics for a specific server
*/
getServerStats(serverName, timeWindowMs = 3600000) {
const cutoff = Date.now() - timeWindowMs;
const relevantMetrics = this.metrics.filter(m => m.serverName === serverName && m.timestamp >= cutoff);
if (relevantMetrics.length === 0) {
return {
serverName,
totalCalls: 0,
successfulCalls: 0,
failedCalls: 0,
avgResponseTime: 0,
p95ResponseTime: 0,
p99ResponseTime: 0,
errorRate: 0,
lastActivity: 0,
uptime: 0,
};
}
const successfulCalls = relevantMetrics.filter(m => m.success).length;
const failedCalls = relevantMetrics.length - successfulCalls;
const responseTimes = relevantMetrics.map(m => m.duration).sort((a, b) => a - b);
const avgResponseTime = responseTimes.reduce((sum, time) => sum + time, 0) / responseTimes.length;
const p95ResponseTime = responseTimes[Math.floor(responseTimes.length * 0.95)] || 0;
const p99ResponseTime = responseTimes[Math.floor(responseTimes.length * 0.99)] || 0;
const lastActivity = Math.max(...relevantMetrics.map(m => m.timestamp));
const firstActivity = Math.min(...relevantMetrics.map(m => m.timestamp));
const uptime = (lastActivity - firstActivity) / 1000; // Convert to seconds
return {
serverName,
totalCalls: relevantMetrics.length,
successfulCalls,
failedCalls,
avgResponseTime,
p95ResponseTime,
p99ResponseTime,
errorRate: failedCalls / relevantMetrics.length,
lastActivity,
uptime,
};
}
/**
* Get performance statistics for all servers
*/
getAllServerStats(timeWindowMs = 3600000) {
const serverNames = new Set(this.metrics.map(m => m.serverName));
return Array.from(serverNames).map(name => this.getServerStats(name, timeWindowMs));
}
/**
* Get tool-specific performance statistics
*/
getToolStats(serverName, toolName, timeWindowMs = 3600000) {
const cutoff = Date.now() - timeWindowMs;
const relevantMetrics = this.metrics.filter(m => m.serverName === serverName &&
m.toolName === toolName &&
m.timestamp >= cutoff);
const errorTypes = {};
let successCount = 0;
for (const metric of relevantMetrics) {
if (metric.success) {
successCount++;
}
else if (metric.errorType) {
errorTypes[metric.errorType] = (errorTypes[metric.errorType] || 0) + 1;
}
}
const avgResponseTime = relevantMetrics.length > 0
? relevantMetrics.reduce((sum, m) => sum + m.duration, 0) / relevantMetrics.length
: 0;
return {
toolName,
serverName,
totalCalls: relevantMetrics.length,
successRate: relevantMetrics.length > 0 ? successCount / relevantMetrics.length : 0,
avgResponseTime,
errorTypes,
};
}
/**
* Detect performance anomalies
*/
detectAnomalies(serverName) {
const anomalies = [];
const stats = this.getServerStats(serverName, 3600000); // Last hour
const recentStats = this.getServerStats(serverName, 300000); // Last 5 minutes
// High latency detection
if (stats.p95ResponseTime > 10000) { // > 10 seconds
anomalies.push({
type: 'high_latency',
severity: 'critical',
message: `High latency detected for ${serverName}: P95 response time is ${stats.p95ResponseTime}ms`,
details: { p95ResponseTime: stats.p95ResponseTime, avgResponseTime: stats.avgResponseTime },
});
}
else if (stats.p95ResponseTime > 5000) { // > 5 seconds
anomalies.push({
type: 'high_latency',
severity: 'warning',
message: `Elevated latency detected for ${serverName}: P95 response time is ${stats.p95ResponseTime}ms`,
details: { p95ResponseTime: stats.p95ResponseTime, avgResponseTime: stats.avgResponseTime },
});
}
// High error rate detection
if (stats.errorRate > 0.5) { // > 50% error rate
anomalies.push({
type: 'high_error_rate',
severity: 'critical',
message: `Critical error rate for ${serverName}: ${(stats.errorRate * 100).toFixed(1)}%`,
details: { errorRate: stats.errorRate, failedCalls: stats.failedCalls, totalCalls: stats.totalCalls },
});
}
else if (stats.errorRate > 0.1) { // > 10% error rate
anomalies.push({
type: 'high_error_rate',
severity: 'warning',
message: `Elevated error rate for ${serverName}: ${(stats.errorRate * 100).toFixed(1)}%`,
details: { errorRate: stats.errorRate, failedCalls: stats.failedCalls, totalCalls: stats.totalCalls },
});
}
// Unusual patterns (sudden changes)
if (stats.totalCalls > 10 && recentStats.totalCalls > 5) {
const recentErrorRate = recentStats.errorRate;
const historicalErrorRate = stats.errorRate;
if (recentErrorRate > historicalErrorRate * 3 && recentErrorRate > 0.05) {
anomalies.push({
type: 'unusual_pattern',
severity: 'error',
message: `Sudden spike in error rate for ${serverName}: ${(recentErrorRate * 100).toFixed(1)}% (recent) vs ${(historicalErrorRate * 100).toFixed(1)}% (historical)`,
details: { recentErrorRate, historicalErrorRate, recentCalls: recentStats.totalCalls },
});
}
}
return anomalies;
}
/**
* Generate performance recommendations
*/
generateRecommendations(serverName) {
const recommendations = [];
const stats = this.getServerStats(serverName);
const anomalies = this.detectAnomalies(serverName);
// Performance recommendations
if (stats.avgResponseTime > 5000) {
recommendations.push({
category: 'performance',
priority: 'high',
recommendation: 'Consider increasing connection pool size or implementing caching',
reasoning: `Average response time (${stats.avgResponseTime}ms) is above optimal threshold`,
});
}
if (stats.p99ResponseTime > stats.avgResponseTime * 10) {
recommendations.push({
category: 'performance',
priority: 'medium',
recommendation: 'Investigate outlier requests causing high P99 latency',
reasoning: `P99 response time (${stats.p99ResponseTime}ms) is significantly higher than average (${stats.avgResponseTime}ms)`,
});
}
// Reliability recommendations
if (stats.errorRate > 0.1) {
recommendations.push({
category: 'reliability',
priority: 'high',
recommendation: 'Implement retry logic with exponential backoff',
reasoning: `Error rate (${(stats.errorRate * 100).toFixed(1)}%) indicates reliability issues`,
});
}
// Configuration recommendations based on anomalies
for (const anomaly of anomalies) {
if (anomaly.type === 'high_latency') {
recommendations.push({
category: 'configuration',
priority: anomaly.severity === 'critical' ? 'critical' : 'high',
recommendation: 'Review timeout settings and server configuration',
reasoning: anomaly.message,
});
}
}
return recommendations;
}
initializeDefaultAlertRules() {
this.alertRules = [
{
id: 'high_error_rate',
condition: (stats) => stats.errorRate > 0.2 && stats.totalCalls > 10,
message: 'High error rate detected',
severity: 'error',
cooldownMs: 300000, // 5 minutes
},
{
id: 'critical_error_rate',
condition: (stats) => stats.errorRate > 0.5 && stats.totalCalls > 5,
message: 'Critical error rate detected',
severity: 'critical',
cooldownMs: 120000, // 2 minutes
},
{
id: 'high_latency',
condition: (stats) => stats.p95ResponseTime > 10000,
message: 'High latency detected',
severity: 'warning',
cooldownMs: 600000, // 10 minutes
},
{
id: 'server_unresponsive',
condition: (stats) => Date.now() - stats.lastActivity > 600000 && stats.totalCalls > 0,
message: 'Server appears unresponsive',
severity: 'critical',
cooldownMs: 300000, // 5 minutes
},
];
}
startMonitoring() {
this.monitoringInterval = setInterval(() => {
this.checkAlerts();
this.cleanupOldMetrics();
}, 60000); // Check every minute
}
checkAlerts() {
const serverNames = new Set(this.metrics.map(m => m.serverName));
for (const serverName of serverNames) {
const stats = this.getServerStats(serverName, 300000); // Last 5 minutes
for (const rule of this.alertRules) {
if (rule.lastTriggered && Date.now() - rule.lastTriggered < rule.cooldownMs) {
continue; // Still in cooldown
}
if (rule.condition(stats)) {
this.triggerAlert(rule, serverName, stats);
rule.lastTriggered = Date.now();
}
}
}
}
triggerAlert(rule, serverName, stats) {
const alertMessage = `${rule.message} for ${serverName}`;
switch (rule.severity) {
case 'critical':
this.logger.error(alertMessage, { rule: rule.id, stats });
break;
case 'error':
this.logger.error(alertMessage, { rule: rule.id, stats });
break;
case 'warning':
this.logger.warn(alertMessage, { rule: rule.id, stats });
break;
default:
this.logger.info(alertMessage, { rule: rule.id, stats });
}
// In a production system, you might also:
// - Send alerts to external monitoring systems
// - Trigger automated remediation actions
// - Update circuit breaker configurations
}
cleanupOldMetrics() {
const cutoff = Date.now() - (24 * 60 * 60 * 1000); // Keep last 24 hours
const originalCount = this.metrics.length;
this.metrics = this.metrics.filter(m => m.timestamp >= cutoff);
const cleaned = originalCount - this.metrics.length;
if (cleaned > 0) {
this.logger.debug(`Cleaned up ${cleaned} old metrics`);
}
}
/**
* Export performance data for external analysis
*/
exportMetrics(format = 'json') {
if (format === 'csv') {
const headers = 'timestamp,serverName,toolName,duration,success,errorType,inputSize,outputSize,retryCount';
const rows = this.metrics.map(m => [
m.timestamp,
m.serverName,
m.toolName,
m.duration,
m.success,
m.errorType || '',
m.inputSize || '',
m.outputSize || '',
m.retryCount || ''
].join(','));
return [headers, ...rows].join('\n');
}
return JSON.stringify(this.metrics, null, 2);
}
destroy() {
if (this.monitoringInterval) {
clearInterval(this.monitoringInterval);
}
}
}
export const globalPerformanceMonitor = new McpPerformanceMonitor();
//# sourceMappingURL=mcp-performance-monitor.js.map