UNPKG

auto-publishing-mcp-server

Version:

Enterprise-grade MCP Server for Auto-Publishing with pre-publish validation, multi-cloud deployment, and monitoring

422 lines (367 loc) 12.8 kB
/** * Enhanced Prometheus-Integrated Monitoring Tool * Provides real-time metrics querying and monitoring capabilities */ import { BaseToolCall } from '../base/tool-call.js'; export class PrometheusMonitorTool extends BaseToolCall { constructor() { super('monitor/prometheus', 'Advanced Prometheus monitoring with real-time metrics'); this.prometheusUrl = process.env.PROMETHEUS_URL || 'http://192.168.101.1:9090'; } getInputSchema() { return { type: 'object', properties: { action: { type: 'string', enum: ['query', 'range_query', 'targets', 'alerts', 'health', 'system_overview', 'mcp_dashboard', 'deployment_metrics'], description: 'Monitoring action to perform' }, query: { type: 'string', description: 'PromQL query (required for query actions)' }, start: { type: 'string', description: 'Start time for range queries (ISO format or relative like "1h")' }, end: { type: 'string', description: 'End time for range queries (ISO format or relative like "now")' }, step: { type: 'string', description: 'Query step interval (e.g., "30s", "1m", "5m")' }, service: { type: 'string', description: 'Specific service to monitor (mcp-server, node-exporter, etc.)' } }, required: ['action'] }; } async execute(args, context) { try { const { action, query, start, end, step, service } = args; switch (action) { case 'query': return await this.executeQuery(query); case 'range_query': return await this.executeRangeQuery(query, start, end, step); case 'targets': return await this.getTargets(); case 'alerts': return await this.getAlerts(); case 'health': return await this.getHealth(); case 'system_overview': return await this.getSystemOverview(); case 'mcp_dashboard': return await this.getMcpDashboard(); case 'deployment_metrics': return await this.getDeploymentMetrics(service); default: throw new Error(`Unknown monitoring action: ${action}`); } } catch (error) { throw new Error(`Prometheus monitoring failed: ${error.message}`); } } /** * Execute a single PromQL query */ async executeQuery(query) { if (!query) { throw new Error('Query parameter is required'); } const response = await fetch(`${this.prometheusUrl}/api/v1/query?query=${encodeURIComponent(query)}`); const data = await response.json(); if (data.status !== 'success') { throw new Error(`Query failed: ${data.error || 'Unknown error'}`); } return { query, resultType: data.data.resultType, results: data.data.result.map(result => ({ metric: result.metric, value: result.value ? result.value[1] : null, timestamp: result.value ? result.value[0] : null })), executionTime: data.data.executionTime || 'N/A' }; } /** * Execute a range query for time series data */ async executeRangeQuery(query, start = '1h', end = '', step = '30s') { if (!query) { throw new Error('Query parameter is required'); } const endTime = end || 'now'; const url = `${this.prometheusUrl}/api/v1/query_range?query=${encodeURIComponent(query)}&start=${start}&end=${endTime}&step=${step}`; const response = await fetch(url); const data = await response.json(); if (data.status !== 'success') { throw new Error(`Range query failed: ${data.error || 'Unknown error'}`); } return { query, start, end: endTime, step, resultType: data.data.resultType, results: data.data.result.map(result => ({ metric: result.metric, values: result.values.map(([timestamp, value]) => ({ timestamp: new Date(timestamp * 1000).toISOString(), value: parseFloat(value) })) })) }; } /** * Get all Prometheus targets and their status */ async getTargets() { const response = await fetch(`${this.prometheusUrl}/api/v1/targets`); const data = await response.json(); if (data.status !== 'success') { throw new Error(`Failed to get targets: ${data.error || 'Unknown error'}`); } const activeTargets = data.data.activeTargets.map(target => ({ discoveredLabels: target.discoveredLabels, labels: target.labels, scrapeUrl: target.scrapeUrl, health: target.health, lastError: target.lastError || null, lastScrape: target.lastScrape, lastScrapeDuration: target.lastScrapeDuration })); return { totalTargets: activeTargets.length, healthyTargets: activeTargets.filter(t => t.health === 'up').length, unhealthyTargets: activeTargets.filter(t => t.health === 'down').length, targets: activeTargets }; } /** * Get active alerts */ async getAlerts() { const response = await fetch(`${this.prometheusUrl}/api/v1/alerts`); const data = await response.json(); if (data.status !== 'success') { throw new Error(`Failed to get alerts: ${data.error || 'Unknown error'}`); } const alerts = data.data.alerts || []; return { totalAlerts: alerts.length, firingAlerts: alerts.filter(a => a.state === 'firing').length, pendingAlerts: alerts.filter(a => a.state === 'pending').length, alerts: alerts.map(alert => ({ alertname: alert.labels.alertname, instance: alert.labels.instance, severity: alert.labels.severity, state: alert.state, activeAt: alert.activeAt, value: alert.value, annotations: alert.annotations })) }; } /** * Get Prometheus health status */ async getHealth() { try { const response = await fetch(`${this.prometheusUrl}/-/healthy`); const prometheusHealthy = response.ok; // Also check if our MCP server is being scraped const mcpQuery = await this.executeQuery('up{job=\"mcp-server\"}'); const mcpHealthy = mcpQuery.results.length > 0 && mcpQuery.results[0].value === '1'; // Check node exporter const nodeQuery = await this.executeQuery('up{job=\"node-exporter\"}'); const nodeHealthy = nodeQuery.results.length > 0 && nodeQuery.results[0].value === '1'; return { prometheus: prometheusHealthy ? 'healthy' : 'unhealthy', mcpServer: mcpHealthy ? 'healthy' : 'unhealthy', nodeExporter: nodeHealthy ? 'healthy' : 'unhealthy', timestamp: new Date().toISOString() }; } catch (error) { return { prometheus: 'unhealthy', mcpServer: 'unknown', nodeExporter: 'unknown', error: error.message, timestamp: new Date().toISOString() }; } } /** * Get system overview with key metrics */ async getSystemOverview() { const queries = { cpuUsage: '100 - (avg(irate(node_cpu_seconds_total{mode=\"idle\"}[5m])) * 100)', memoryUsage: '(1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100', diskUsage: '100 - ((node_filesystem_avail_bytes{mountpoint=\"/\"} * 100) / node_filesystem_size_bytes{mountpoint=\"/\"})', networkReceive: 'irate(node_network_receive_bytes_total{device!~\"lo|docker.*|veth.*\"}[5m])', networkTransmit: 'irate(node_network_transmit_bytes_total{device!~\"lo|docker.*|veth.*\"}[5m])', uptime: 'node_boot_time_seconds', containerCount: 'count(container_last_seen{name!=\"\"})' }; const results = {}; for (const [metric, query] of Object.entries(queries)) { try { const result = await this.executeQuery(query); results[metric] = { value: result.results.length > 0 ? parseFloat(result.results[0].value) : null, unit: this.getMetricUnit(metric), query: query }; } catch (error) { results[metric] = { value: null, error: error.message, query: query }; } } return { timestamp: new Date().toISOString(), system: results, summary: this.generateSystemSummary(results) }; } /** * Get MCP-specific dashboard metrics */ async getMcpDashboard() { const queries = { totalRequests: 'mcp_requests_total', toolCalls: 'mcp_tool_calls_total', toolErrors: 'mcp_tool_errors_total', deployments: 'mcp_deployment_total', deploymentFailures: 'mcp_deployment_failures_total', activeSessions: 'mcp_active_sessions', registeredTools: 'mcp_registered_tools', allocatedResources: 'mcp_allocated_resources', requestRate: 'rate(mcp_requests_total[5m])', errorRate: 'rate(mcp_tool_errors_total[5m])', deploymentSuccessRate: '(rate(mcp_deployment_total[5m]) - rate(mcp_deployment_failures_total[5m])) / rate(mcp_deployment_total[5m]) * 100' }; const results = {}; for (const [metric, query] of Object.entries(queries)) { try { const result = await this.executeQuery(query); results[metric] = { value: result.results.length > 0 ? parseFloat(result.results[0].value) : 0, query: query }; } catch (error) { results[metric] = { value: 0, error: error.message, query: query }; } } return { timestamp: new Date().toISOString(), metrics: results, health: results.totalRequests.value > 0 ? 'active' : 'idle', recommendations: this.generateMcpRecommendations(results) }; } /** * Get deployment-specific metrics */ async getDeploymentMetrics(service) { const queries = service ? { serviceUptime: `up{instance=~\".*${service}.*\"}`, serviceCpu: `rate(container_cpu_usage_seconds_total{name=~\".*${service}.*\"}[5m]) * 100`, serviceMemory: `container_memory_usage_bytes{name=~\".*${service}.*\"} / 1024 / 1024` } : { totalContainers: 'count(container_last_seen{name!=\"\"})', runningContainers: 'count(container_last_seen{name!=\"\"} > 0)', containerCpuUsage: 'sum(rate(container_cpu_usage_seconds_total{name!=\"\"}[5m])) * 100', containerMemoryUsage: 'sum(container_memory_usage_bytes{name!=\"\"}) / 1024 / 1024' }; const results = {}; for (const [metric, query] of Object.entries(queries)) { try { const result = await this.executeQuery(query); results[metric] = { value: result.results.length > 0 ? parseFloat(result.results[0].value) : 0, query: query }; } catch (error) { results[metric] = { value: 0, error: error.message, query: query }; } } return { timestamp: new Date().toISOString(), service: service || 'all', metrics: results }; } /** * Get metric unit for display */ getMetricUnit(metric) { const units = { cpuUsage: '%', memoryUsage: '%', diskUsage: '%', networkReceive: 'bytes/s', networkTransmit: 'bytes/s', uptime: 'seconds', containerCount: 'count' }; return units[metric] || ''; } /** * Generate system summary */ generateSystemSummary(results) { const summary = []; if (results.cpuUsage.value > 80) { summary.push('⚠️ High CPU usage detected'); } if (results.memoryUsage.value > 85) { summary.push('⚠️ High memory usage detected'); } if (results.diskUsage.value > 90) { summary.push('🚨 Disk usage critical'); } if (summary.length === 0) { summary.push('✅ System metrics within normal ranges'); } return summary; } /** * Generate MCP recommendations */ generateMcpRecommendations(results) { const recommendations = []; if (results.errorRate.value > 0.1) { recommendations.push('High error rate detected - investigate tool implementations'); } if (results.deploymentSuccessRate.value < 95 && results.deployments.value > 0) { recommendations.push('Deployment success rate below 95% - check deployment validation'); } if (results.activeSessions.value === 0 && results.totalRequests.value === 0) { recommendations.push('No active sessions - consider running health checks'); } if (recommendations.length === 0) { recommendations.push('MCP server performing optimally'); } return recommendations; } }