UNPKG

observability-analyzer

Version:

Production-ready MCP Server for intelligent Loki/Tempo observability dashboard analysis and generation

github.com/unmesh/observability-analyzer

unmesh/observability-analyzer

632 lines (587 loc) • 28.5 kB

JavaScript

#!/usr/bin/env node "use strict"; var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || (function () { var ownKeys = function(o) { ownKeys = Object.getOwnPropertyNames || function (o) { var ar = []; for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k; return ar; }; return ownKeys(o); }; return function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]); __setModuleDefault(result, mod); return result; }; })(); Object.defineProperty(exports, "__esModule", { value: true }); const index_js_1 = require("@modelcontextprotocol/sdk/server/index.js"); const stdio_js_1 = require("@modelcontextprotocol/sdk/server/stdio.js"); const types_js_1 = require("@modelcontextprotocol/sdk/types.js"); const fs = __importStar(require("fs")); const ConfigManager_js_1 = require("./config/ConfigManager.js"); const LokiAnalyzer_js_1 = require("./analyzers/LokiAnalyzer.js"); const REDMethodGenerator_js_1 = require("./dashboards/REDMethodGenerator.js"); const GrafanaExporter_js_1 = require("./dashboards/GrafanaExporter.js"); class ObservabilityAnalyzerServer { server; configManager; redMethodGenerator; grafanaExporter; constructor() { this.server = new index_js_1.Server({ name: 'observability-dashboard-analyzer', version: '1.0.3', }, { capabilities: { tools: {}, }, }); this.configManager = new ConfigManager_js_1.ConfigManager(); this.redMethodGenerator = new REDMethodGenerator_js_1.REDMethodGenerator(); this.grafanaExporter = new GrafanaExporter_js_1.GrafanaExporter(); this.setupToolHandlers(); } setupToolHandlers() { this.server.setRequestHandler(types_js_1.ListToolsRequestSchema, async () => ({ tools: [ { name: 'analyze_loki_stack', description: 'Analyzes Loki logs to discover services and recommend dashboard types. Default: last 1 hour. Support time ranges like 1h, 24h, 7d.', inputSchema: { type: 'object', properties: { lokiUrl: { type: 'string', description: 'Loki instance URL (optional, uses config if not provided)' }, timeRange: { type: 'string', description: 'Time range for analysis (default: 1h). Examples: 1h, 6h, 24h, 7d' } } } }, { name: 'generate_loki_dashboard', description: 'Creates Loki-based monitoring dashboard JSON with log volume, error rates, and service health panels. Returns the dashboard JSON in the response for immediate use.', inputSchema: { type: 'object', properties: { services: { type: 'array', items: { type: 'string' }, description: 'List of service names to include in dashboard' }, outputPath: { type: 'string', description: 'Optional: Path to save the dashboard JSON file' }, datasourceUid: { type: 'string', description: 'Grafana datasource UID (required)' }, datasourceName: { type: 'string', description: 'Optional: Human-readable datasource name (defaults to datasourceUid)' }, datasourceType: { type: 'string', description: 'Optional: Datasource type (defaults to "loki")' } }, required: ['services', 'datasourceUid'] } }, { name: 'validate_loki_queries', description: 'Tests LogQL queries against real Loki API to validate query performance and provide optimization suggestions. Supports dashboard JSON files and handles 200 responses with 0 results by trying increased time ranges.', inputSchema: { type: 'object', properties: { queries: { type: 'array', items: { type: 'string' }, description: 'List of LogQL queries to validate' }, dashboardPath: { type: 'string', description: 'Path to dashboard JSON file to extract and validate queries from' }, dashboardJson: { type: 'string', description: 'Dashboard JSON content as string to extract and validate queries from' } } } }, { name: 'query_loki', description: 'Executes LogQL queries directly against Loki API with full parameter control. Supports both instant and range queries with flexible time parameters and result formatting.', inputSchema: { type: 'object', properties: { query: { type: 'string', description: 'LogQL query to execute' }, queryType: { type: 'string', enum: ['instant', 'range'], description: 'Query type: "instant" for current values, "range" for time series (default: range)' }, timeRange: { type: 'string', description: 'Time range for range queries (default: 1h). Examples: 5m, 1h, 6h, 24h, 7d' }, startTime: { type: 'string', description: 'Start time (ISO string or relative like "2h ago"). Overrides timeRange.' }, endTime: { type: 'string', description: 'End time (ISO string or "now"). Used with startTime.' }, limit: { type: 'number', description: 'Maximum number of entries to return (default: 100, max: 5000)' }, direction: { type: 'string', enum: ['forward', 'backward'], description: 'Result ordering (default: backward)' }, step: { type: 'string', description: 'Query resolution step for range queries (default: auto)' } }, required: ['query'] } }, { name: 'detect_service_labels', description: 'Analyzes Loki logs to detect which label keys are used for service identification (e.g., service_name, application, service, etc.). Helps identify the correct label patterns before generating dashboards.', inputSchema: { type: 'object', properties: { timeRange: { type: 'string', description: 'Time range for detection (default: 1h). Examples: 1h, 6h, 24h, 7d' } } } } ] })); this.server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => { const { name, arguments: args } = request.params; try { switch (name) { case 'analyze_loki_stack': return await this.handleAnalyzeLokiStack(args); case 'generate_loki_dashboard': return await this.handleGenerateLokiDashboard(args); case 'validate_loki_queries': return await this.handleValidateLokiQueries(args); case 'query_loki': return await this.handleQueryLoki(args); case 'detect_service_labels': return await this.handleDetectServiceLabels(args); default: throw new types_js_1.McpError(types_js_1.ErrorCode.MethodNotFound, `Unknown tool: ${name}`); } } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); throw new types_js_1.McpError(types_js_1.ErrorCode.InternalError, `Error executing tool ${name}: ${errorMessage}`); } }); } async handleAnalyzeLokiStack(args) { const config = this.configManager.getConfig(); // Override URL if provided if (args.lokiUrl) { config.loki.url = args.lokiUrl; } const lokiAnalyzer = new LokiAnalyzer_js_1.LokiAnalyzer(config.loki); const timeRange = args.timeRange || '1h'; // Default to 1 hour const lokiAnalysis = await lokiAnalyzer.analyzeServices(timeRange); return { content: [ { type: 'text', text: `# Loki Stack Analysis Results ## Service Discovery - **Services Found**: ${lokiAnalysis.services.length} - **Services**: ${lokiAnalysis.services.join(', ') || 'None detected'} ## Log Structure Quality - **Structured Logs**: ${lokiAnalysis.hasStructuredLogs ? '✅' : '❌'} - **Service Labels**: ${lokiAnalysis.hasServiceLabels ? '✅' : '❌'} - **Error Levels**: ${lokiAnalysis.hasErrorLevels ? '✅' : '❌'} - **Duration Fields**: ${lokiAnalysis.hasDurationFields ? '✅' : '❌'} ## Available Labels ${Object.keys(lokiAnalysis.labels).map(label => `- **${label}**: ${lokiAnalysis.labels[label].slice(0, 3).join(', ')}${lokiAnalysis.labels[label].length > 3 ? '...' : ''}`).join('\n')} ## Log Volume Analysis - **Total Lines**: ${lokiAnalysis.logVolume.totalLines} - **Lines/Second**: ${lokiAnalysis.logVolume.linesPerSecond} - **Bytes/Second**: ${lokiAnalysis.logVolume.bytesPerSecond} ## Error Patterns ${lokiAnalysis.errorPatterns.slice(0, 5).map((pattern, index) => `${index + 1}. ${pattern}`).join('\n')} ## Dashboard Recommendations ${lokiAnalysis.services.length > 0 ? '✅ **Recommended**: Generate service monitoring dashboard with log volume, error rates, and service health panels.' : `⚠️ **No services detected** in time range: ${timeRange} **Possible causes:** 1. No logging activity during this period 2. Missing service labels in logs (service_name, application, service, app) 3. Authentication or connectivity issues 4. Time range too narrow - try 24h or 7d **Debug info:** Check Claude Desktop logs for [DEBUG] and [ERROR] messages`} ` } ] }; } async handleGenerateLokiDashboard(args) { const config = this.configManager.getConfig(); const { services, outputPath, datasourceUid, datasourceName, datasourceType = 'loki' } = args; const lokiAnalyzer = new LokiAnalyzer_js_1.LokiAnalyzer(config.loki); // First analyze services to detect their correct label keys const analysis = await lokiAnalyzer.analyzeServices('1h'); // Generate RED queries using detected service labels const lokiQueries = await lokiAnalyzer.generateREDQueries(services, analysis.serviceLabels); // Generate dashboard with flexible datasource configuration const dashboardConfig = { datasourceUid, datasourceName: datasourceName || datasourceUid, datasourceType }; const dashboard = this.redMethodGenerator.generateLokiDashboard(services, lokiQueries, dashboardConfig); // Export if output path provided if (outputPath) { this.grafanaExporter.exportDashboard(dashboard, outputPath); } // Include the dashboard JSON in the response for MCP clients const dashboardJson = JSON.stringify(dashboard, null, 2); return { content: [ { type: 'text', text: `# Loki Monitoring Dashboard Generated - **Services**: ${services.join(', ')} - **Panels**: ${dashboard.panels?.length || 6} ${outputPath ? `- **Exported to**: ${outputPath}` : ''} ## Detected Service Labels ${Object.entries(analysis.serviceLabels).length > 0 ? Object.entries(analysis.serviceLabels).map(([service, label]) => `- **${service}**: Uses label \`${label}\``).join('\n') : '- No specific service labels detected, using defaults'} ## Dashboard Features - ✅ Log volume monitoring by service - ✅ Error rate tracking with thresholds - ✅ Log level distribution - ✅ Service health overview - ✅ Error pattern detection - ✅ Performance query optimization ## Generated LogQL Queries - **Request Rate**: ${lokiQueries.requestRate.length} queries - **Error Rate**: ${lokiQueries.errorRate.length} queries - **Duration**: ${lokiQueries.duration.length} queries The dashboard uses automatically detected service label patterns for optimal query performance. ## Dashboard JSON \`\`\`json ${dashboardJson} \`\`\` ` } ] }; } async handleValidateLokiQueries(args) { const config = this.configManager.getConfig(); const { queries, dashboardPath, dashboardJson } = args; const lokiAnalyzer = new LokiAnalyzer_js_1.LokiAnalyzer(config.loki); let queriesToValidate = queries || []; // Extract queries from dashboard if provided if (dashboardPath || dashboardJson) { try { let dashboardContent; if (dashboardPath) { dashboardContent = fs.readFileSync(dashboardPath, 'utf8'); } else { dashboardContent = dashboardJson; } const extractedQueries = lokiAnalyzer.extractQueriesFromDashboard(dashboardContent); queriesToValidate = [...queriesToValidate, ...extractedQueries]; } catch (error) { return { content: [ { type: 'text', text: `# Dashboard Query Extraction Error ❌ **Error**: ${error instanceof Error ? error.message : String(error)} Please ensure: 1. Dashboard JSON is valid 2. File path exists (if using dashboardPath) 3. Dashboard follows standard Grafana JSON structure ` } ] }; } } if (queriesToValidate.length === 0) { return { content: [ { type: 'text', text: `# No Queries Found ⚠️ **No queries to validate** Please provide either: - \`queries\`: Array of LogQL query strings - \`dashboardPath\`: Path to dashboard JSON file - \`dashboardJson\`: Dashboard JSON content as string ` } ] }; } // Validate the queries with enhanced time range handling const validation = await lokiAnalyzer.validateQueries(queriesToValidate); const totalQueries = validation.length; const successfulQueries = validation.filter(q => q.valid).length; const successRate = Math.round((successfulQueries / totalQueries) * 100); const zeroResultQueries = validation.filter(q => q.valid && q.resultCount === 0).length; // Get optimization suggestions const optimizationSuggestions = await Promise.all(queriesToValidate.map(async (query) => await lokiAnalyzer.suggestQueryOptimizations(query))); return { content: [ { type: 'text', text: `# LogQL Query Validation Results ## Overall Results - **Total Queries**: ${totalQueries} - **Successful Queries**: ${successfulQueries} - **Success Rate**: ${successRate}% - **Queries with 0 Results**: ${zeroResultQueries} ${dashboardPath ? `- **Source**: Dashboard file \`${dashboardPath}\`` : ''} ${dashboardJson && !dashboardPath ? `- **Source**: Dashboard JSON (inline)` : ''} ## Query Validation Details ${validation.map((result, index) => ` ${index + 1}. ${result.valid ? '✅' : '❌'} \`${result.query.substring(0, 80)}${result.query.length > 80 ? '...' : ''}\` ${result.error ? `❌ Error: ${result.error}` : `✅ Results: ${result.resultCount ?? 0} records (${result.timeRangeUsed || '1h'})`} ${result.message ? ` ℹ️ ${result.message}` : ''} `).join('')} ${optimizationSuggestions.flat().length > 0 ? `## Performance Optimization Suggestions ${optimizationSuggestions.flat().map((suggestion, index) => ` ${index + 1}. **${suggestion.priority.toUpperCase()} Priority**: ${suggestion.description} - Current: \`${suggestion.currentQuery.substring(0, 60)}...\` - Optimized: \`${suggestion.optimizedQuery.substring(0, 60)}...\` - Expected Improvement: ${suggestion.expectedImprovement} `).join('')}` : ''} ## Summary & Recommendations ${successRate >= 90 && zeroResultQueries === 0 ? '✅ **Excellent**: All queries are working well and returning data.' : successRate >= 90 ? '⚠️ **Good**: High success rate but some queries return no data. Check time ranges and label selectors.' : successRate >= 70 ? '⚠️ **Medium**: Some queries need optimization but are functional.' : '❌ **Poor**: Significant query optimization needed for production use.'} ${zeroResultQueries > 0 ? ` ### Zero Results Analysis ${zeroResultQueries} queries returned 0 results. This tool automatically tested with extended time ranges (1h → 6h → 24h → 7d). **Common causes:** - No logs matching the query criteria in the tested time ranges - Incorrect label selectors (check service names, label keys) - Data retention policies preventing access to older logs - Query syntax errors or typos in service/label names` : ''} ${successRate < 100 ? ` ### General Optimization Tips - Use specific label selectors instead of wildcards - Add exact string matching before regex operations - Consider query caching for frequently used patterns - Validate service names exist in your log labels` : ''} ` } ] }; } async handleQueryLoki(args) { const config = this.configManager.getConfig(); const { query, queryType = 'range', timeRange, startTime, endTime, limit, direction, step } = args; const lokiAnalyzer = new LokiAnalyzer_js_1.LokiAnalyzer(config.loki); try { const result = await lokiAnalyzer.executeQuery(query, queryType, { timeRange, startTime, endTime, limit, direction, step }); // Format time range info let timeInfo = ''; if (queryType === 'range') { if (startTime && endTime) { timeInfo = `${startTime} → ${endTime}`; } else { timeInfo = `Last ${timeRange || '1h'}`; } } else { timeInfo = endTime || 'Now'; } // Format results for display let resultsDisplay = ''; if (result.resultCount === 0) { resultsDisplay = '**No results found**'; } else if (result.data && typeof result.data === 'object' && 'result' in result.data) { const data = result.data; if (result.resultType === 'streams') { // Format log streams const streams = data.result; resultsDisplay = `**${result.resultCount} log entries** from ${streams.length} streams\n\n`; // Show first few entries let entryCount = 0; for (const stream of streams.slice(0, 3)) { resultsDisplay += `### Stream: ${JSON.stringify(stream.stream)}\n`; for (const [timestamp, line] of (stream.values || []).slice(0, 3)) { const time = new Date(parseInt(timestamp) / 1000000).toISOString(); resultsDisplay += `**${time}**: ${line.substring(0, 200)}${line.length > 200 ? '...' : ''}\n`; entryCount++; if (entryCount >= 10) break; } if (entryCount >= 10) break; } if (result.resultCount > 10) { resultsDisplay += `\n*... and ${result.resultCount - 10} more entries*`; } } else if (result.resultType === 'matrix') { // Format metric data const matrix = data.result; resultsDisplay = `**${matrix.length} metric series**\n\n`; for (const series of matrix.slice(0, 5)) { resultsDisplay += `### Metric: ${JSON.stringify(series.metric)}\n`; for (const [timestamp, value] of (series.values || []).slice(-3)) { const time = new Date(timestamp * 1000).toISOString(); resultsDisplay += `**${time}**: ${value}\n`; } } if (matrix.length > 5) { resultsDisplay += `\n*... and ${matrix.length - 5} more series*`; } } else { // Other result types resultsDisplay = `**${result.resultCount} results** of type \`${result.resultType}\`\n\n\`\`\`json\n${JSON.stringify(data.result, null, 2)}\n\`\`\``; } } return { content: [ { type: 'text', text: `# Loki Query Results ## Query Details - **Query**: \`${query}\` - **Type**: ${queryType.charAt(0).toUpperCase() + queryType.slice(1)} query - **Time Range**: ${timeInfo} - **Limit**: ${limit || 100} - **Direction**: ${direction || 'backward'} ${step ? `- **Step**: ${step}` : ''} ## Results ${resultsDisplay} ## Execution Stats - **Execution Time**: ${result.executionTime}ms - **Result Type**: \`${result.resultType}\` - **Total Results**: ${result.resultCount} ${result.stats ? `## Query Performance \`\`\`json ${JSON.stringify(result.stats, null, 2)} \`\`\`` : ''} ## Raw Response Data <details> <summary>Click to expand full response</summary> \`\`\`json ${JSON.stringify(result.data, null, 2)} \`\`\` </details> ` } ] }; } catch (error) { return { content: [ { type: 'text', text: `# Loki Query Error ## Query Details - **Query**: \`${query}\` - **Type**: ${queryType} - **Parameters**: ${JSON.stringify({ timeRange, startTime, endTime, limit, direction, step }, null, 2)} ## Error ❌ **${error instanceof Error ? error.message : String(error)}** ## Troubleshooting - **Check query syntax**: Ensure LogQL syntax is correct - **Verify time range**: Make sure the time range contains data - **Check service labels**: Use \`detect_service_labels\` to find correct label patterns - **Test smaller ranges**: Try a shorter time range first - **Validate connectivity**: Ensure Loki instance is accessible ## Common Query Examples - **Service logs**: \`{service="my-service"}\` - **Error logs**: \`{service="my-service"} |~ "(?i)error|exception"\` - **Metrics**: \`sum(rate({service="my-service"}[5m]))\` - **JSON filtering**: \`{service="my-service"} | json | level="error"\` ` } ] }; } } async handleDetectServiceLabels(args) { const config = this.configManager.getConfig(); const { timeRange = '1h' } = args; const lokiAnalyzer = new LokiAnalyzer_js_1.LokiAnalyzer(config.loki); const analysis = await lokiAnalyzer.analyzeServices(timeRange); return { content: [ { type: 'text', text: `# Service Label Detection Results ## Services Found ${analysis.services.length > 0 ? `Found **${analysis.services.length}** services in the last ${timeRange}:\n${analysis.services.map(s => `- ${s}`).join('\n')}` : `No services detected in the last ${timeRange}`} ## Service Label Mappings ${Object.entries(analysis.serviceLabels).length > 0 ? Object.entries(analysis.serviceLabels).map(([service, label]) => { if (label === 'json_field') { return `- **${service}**: Found in JSON log content (no standard label)`; } return `- **${service}**: Uses label \`${label}\``; }).join('\n') : 'No service label mappings detected'} ## Available Labels in Logs ${Object.keys(analysis.labels).length > 0 ? Object.entries(analysis.labels).map(([label, values]) => `- **${label}**: ${values.slice(0, 3).join(', ')}${values.length > 3 ? '...' : ''}`).join('\n') : 'No labels detected in logs'} ## Recommendations ${analysis.services.length > 0 ? `✅ **Ready for dashboard generation**: Service labels detected successfully.` : `⚠️ **No services found**: Try a longer time range (e.g., 24h or 7d) or check your label patterns. **Common service label patterns to check:** - \`service_name\` - Standard Kubernetes/microservices label - \`application\` - Application deployment label - \`service\` - Generic service identifier - \`app\` - Common application label - \`container_name\` - Container-based identification - \`job\` - Prometheus job label **Next steps:** 1. Check your log aggregation configuration 2. Verify service labels are being applied correctly 3. Try querying with specific service names if known`} ${Object.keys(analysis.serviceLabels).length > 0 ? ` ## Sample Query Patterns Based on detected labels, use these patterns for manual queries: ${Object.entries(analysis.serviceLabels).map(([service, label]) => { if (label === 'json_field') { return `- **${service}**: \`{__name__=~".+"} |~ "\\"service\\":\\"${service}\\""\``; } return `- **${service}**: \`{${label}="${service}"}\``; }).join('\n')}` : ''} ` } ] }; } async run() { const transport = new stdio_js_1.StdioServerTransport(); await this.server.connect(transport); console.error('Observability Dashboard Analyzer MCP server running on stdio'); } } const server = new ObservabilityAnalyzerServer(); server.run().catch(console.error); //# sourceMappingURL=index.js.map