observability-analyzer
Version:
Production-ready MCP Server for intelligent Loki/Tempo observability dashboard analysis and generation
632 lines (587 loc) • 28.5 kB
JavaScript
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || (function () {
var ownKeys = function(o) {
ownKeys = Object.getOwnPropertyNames || function (o) {
var ar = [];
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
return ar;
};
return ownKeys(o);
};
return function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
__setModuleDefault(result, mod);
return result;
};
})();
Object.defineProperty(exports, "__esModule", { value: true });
const index_js_1 = require("@modelcontextprotocol/sdk/server/index.js");
const stdio_js_1 = require("@modelcontextprotocol/sdk/server/stdio.js");
const types_js_1 = require("@modelcontextprotocol/sdk/types.js");
const fs = __importStar(require("fs"));
const ConfigManager_js_1 = require("./config/ConfigManager.js");
const LokiAnalyzer_js_1 = require("./analyzers/LokiAnalyzer.js");
const REDMethodGenerator_js_1 = require("./dashboards/REDMethodGenerator.js");
const GrafanaExporter_js_1 = require("./dashboards/GrafanaExporter.js");
class ObservabilityAnalyzerServer {
server;
configManager;
redMethodGenerator;
grafanaExporter;
constructor() {
this.server = new index_js_1.Server({
name: 'observability-dashboard-analyzer',
version: '1.0.3',
}, {
capabilities: {
tools: {},
},
});
this.configManager = new ConfigManager_js_1.ConfigManager();
this.redMethodGenerator = new REDMethodGenerator_js_1.REDMethodGenerator();
this.grafanaExporter = new GrafanaExporter_js_1.GrafanaExporter();
this.setupToolHandlers();
}
setupToolHandlers() {
this.server.setRequestHandler(types_js_1.ListToolsRequestSchema, async () => ({
tools: [
{
name: 'analyze_loki_stack',
description: 'Analyzes Loki logs to discover services and recommend dashboard types. Default: last 1 hour. Support time ranges like 1h, 24h, 7d.',
inputSchema: {
type: 'object',
properties: {
lokiUrl: { type: 'string', description: 'Loki instance URL (optional, uses config if not provided)' },
timeRange: { type: 'string', description: 'Time range for analysis (default: 1h). Examples: 1h, 6h, 24h, 7d' }
}
}
},
{
name: 'generate_loki_dashboard',
description: 'Creates Loki-based monitoring dashboard JSON with log volume, error rates, and service health panels. Returns the dashboard JSON in the response for immediate use.',
inputSchema: {
type: 'object',
properties: {
services: {
type: 'array',
items: { type: 'string' },
description: 'List of service names to include in dashboard'
},
outputPath: { type: 'string', description: 'Optional: Path to save the dashboard JSON file' },
datasourceUid: { type: 'string', description: 'Grafana datasource UID (required)' },
datasourceName: { type: 'string', description: 'Optional: Human-readable datasource name (defaults to datasourceUid)' },
datasourceType: { type: 'string', description: 'Optional: Datasource type (defaults to "loki")' }
},
required: ['services', 'datasourceUid']
}
},
{
name: 'validate_loki_queries',
description: 'Tests LogQL queries against real Loki API to validate query performance and provide optimization suggestions. Supports dashboard JSON files and handles 200 responses with 0 results by trying increased time ranges.',
inputSchema: {
type: 'object',
properties: {
queries: {
type: 'array',
items: { type: 'string' },
description: 'List of LogQL queries to validate'
},
dashboardPath: {
type: 'string',
description: 'Path to dashboard JSON file to extract and validate queries from'
},
dashboardJson: {
type: 'string',
description: 'Dashboard JSON content as string to extract and validate queries from'
}
}
}
},
{
name: 'query_loki',
description: 'Executes LogQL queries directly against Loki API with full parameter control. Supports both instant and range queries with flexible time parameters and result formatting.',
inputSchema: {
type: 'object',
properties: {
query: { type: 'string', description: 'LogQL query to execute' },
queryType: {
type: 'string',
enum: ['instant', 'range'],
description: 'Query type: "instant" for current values, "range" for time series (default: range)'
},
timeRange: { type: 'string', description: 'Time range for range queries (default: 1h). Examples: 5m, 1h, 6h, 24h, 7d' },
startTime: { type: 'string', description: 'Start time (ISO string or relative like "2h ago"). Overrides timeRange.' },
endTime: { type: 'string', description: 'End time (ISO string or "now"). Used with startTime.' },
limit: { type: 'number', description: 'Maximum number of entries to return (default: 100, max: 5000)' },
direction: { type: 'string', enum: ['forward', 'backward'], description: 'Result ordering (default: backward)' },
step: { type: 'string', description: 'Query resolution step for range queries (default: auto)' }
},
required: ['query']
}
},
{
name: 'detect_service_labels',
description: 'Analyzes Loki logs to detect which label keys are used for service identification (e.g., service_name, application, service, etc.). Helps identify the correct label patterns before generating dashboards.',
inputSchema: {
type: 'object',
properties: {
timeRange: { type: 'string', description: 'Time range for detection (default: 1h). Examples: 1h, 6h, 24h, 7d' }
}
}
}
]
}));
this.server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
const { name, arguments: args } = request.params;
try {
switch (name) {
case 'analyze_loki_stack':
return await this.handleAnalyzeLokiStack(args);
case 'generate_loki_dashboard':
return await this.handleGenerateLokiDashboard(args);
case 'validate_loki_queries':
return await this.handleValidateLokiQueries(args);
case 'query_loki':
return await this.handleQueryLoki(args);
case 'detect_service_labels':
return await this.handleDetectServiceLabels(args);
default:
throw new types_js_1.McpError(types_js_1.ErrorCode.MethodNotFound, `Unknown tool: ${name}`);
}
}
catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
throw new types_js_1.McpError(types_js_1.ErrorCode.InternalError, `Error executing tool ${name}: ${errorMessage}`);
}
});
}
async handleAnalyzeLokiStack(args) {
const config = this.configManager.getConfig();
// Override URL if provided
if (args.lokiUrl) {
config.loki.url = args.lokiUrl;
}
const lokiAnalyzer = new LokiAnalyzer_js_1.LokiAnalyzer(config.loki);
const timeRange = args.timeRange || '1h'; // Default to 1 hour
const lokiAnalysis = await lokiAnalyzer.analyzeServices(timeRange);
return {
content: [
{
type: 'text',
text: `# Loki Stack Analysis Results
## Service Discovery
- **Services Found**: ${lokiAnalysis.services.length}
- **Services**: ${lokiAnalysis.services.join(', ') || 'None detected'}
## Log Structure Quality
- **Structured Logs**: ${lokiAnalysis.hasStructuredLogs ? '✅' : '❌'}
- **Service Labels**: ${lokiAnalysis.hasServiceLabels ? '✅' : '❌'}
- **Error Levels**: ${lokiAnalysis.hasErrorLevels ? '✅' : '❌'}
- **Duration Fields**: ${lokiAnalysis.hasDurationFields ? '✅' : '❌'}
## Available Labels
${Object.keys(lokiAnalysis.labels).map(label => `- **${label}**: ${lokiAnalysis.labels[label].slice(0, 3).join(', ')}${lokiAnalysis.labels[label].length > 3 ? '...' : ''}`).join('\n')}
## Log Volume Analysis
- **Total Lines**: ${lokiAnalysis.logVolume.totalLines}
- **Lines/Second**: ${lokiAnalysis.logVolume.linesPerSecond}
- **Bytes/Second**: ${lokiAnalysis.logVolume.bytesPerSecond}
## Error Patterns
${lokiAnalysis.errorPatterns.slice(0, 5).map((pattern, index) => `${index + 1}. ${pattern}`).join('\n')}
## Dashboard Recommendations
${lokiAnalysis.services.length > 0 ?
'✅ **Recommended**: Generate service monitoring dashboard with log volume, error rates, and service health panels.' :
`⚠️ **No services detected** in time range: ${timeRange}
**Possible causes:**
1. No logging activity during this period
2. Missing service labels in logs (service_name, application, service, app)
3. Authentication or connectivity issues
4. Time range too narrow - try 24h or 7d
**Debug info:** Check Claude Desktop logs for [DEBUG] and [ERROR] messages`}
`
}
]
};
}
async handleGenerateLokiDashboard(args) {
const config = this.configManager.getConfig();
const { services, outputPath, datasourceUid, datasourceName, datasourceType = 'loki' } = args;
const lokiAnalyzer = new LokiAnalyzer_js_1.LokiAnalyzer(config.loki);
// First analyze services to detect their correct label keys
const analysis = await lokiAnalyzer.analyzeServices('1h');
// Generate RED queries using detected service labels
const lokiQueries = await lokiAnalyzer.generateREDQueries(services, analysis.serviceLabels);
// Generate dashboard with flexible datasource configuration
const dashboardConfig = {
datasourceUid,
datasourceName: datasourceName || datasourceUid,
datasourceType
};
const dashboard = this.redMethodGenerator.generateLokiDashboard(services, lokiQueries, dashboardConfig);
// Export if output path provided
if (outputPath) {
this.grafanaExporter.exportDashboard(dashboard, outputPath);
}
// Include the dashboard JSON in the response for MCP clients
const dashboardJson = JSON.stringify(dashboard, null, 2);
return {
content: [
{
type: 'text',
text: `# Loki Monitoring Dashboard Generated
- **Services**: ${services.join(', ')}
- **Panels**: ${dashboard.panels?.length || 6}
${outputPath ? `- **Exported to**: ${outputPath}` : ''}
## Detected Service Labels
${Object.entries(analysis.serviceLabels).length > 0 ?
Object.entries(analysis.serviceLabels).map(([service, label]) => `- **${service}**: Uses label \`${label}\``).join('\n') :
'- No specific service labels detected, using defaults'}
## Dashboard Features
- ✅ Log volume monitoring by service
- ✅ Error rate tracking with thresholds
- ✅ Log level distribution
- ✅ Service health overview
- ✅ Error pattern detection
- ✅ Performance query optimization
## Generated LogQL Queries
- **Request Rate**: ${lokiQueries.requestRate.length} queries
- **Error Rate**: ${lokiQueries.errorRate.length} queries
- **Duration**: ${lokiQueries.duration.length} queries
The dashboard uses automatically detected service label patterns for optimal query performance.
## Dashboard JSON
\`\`\`json
${dashboardJson}
\`\`\`
`
}
]
};
}
async handleValidateLokiQueries(args) {
const config = this.configManager.getConfig();
const { queries, dashboardPath, dashboardJson } = args;
const lokiAnalyzer = new LokiAnalyzer_js_1.LokiAnalyzer(config.loki);
let queriesToValidate = queries || [];
// Extract queries from dashboard if provided
if (dashboardPath || dashboardJson) {
try {
let dashboardContent;
if (dashboardPath) {
dashboardContent = fs.readFileSync(dashboardPath, 'utf8');
}
else {
dashboardContent = dashboardJson;
}
const extractedQueries = lokiAnalyzer.extractQueriesFromDashboard(dashboardContent);
queriesToValidate = [...queriesToValidate, ...extractedQueries];
}
catch (error) {
return {
content: [
{
type: 'text',
text: `# Dashboard Query Extraction Error
❌ **Error**: ${error instanceof Error ? error.message : String(error)}
Please ensure:
1. Dashboard JSON is valid
2. File path exists (if using dashboardPath)
3. Dashboard follows standard Grafana JSON structure
`
}
]
};
}
}
if (queriesToValidate.length === 0) {
return {
content: [
{
type: 'text',
text: `# No Queries Found
⚠️ **No queries to validate**
Please provide either:
- \`queries\`: Array of LogQL query strings
- \`dashboardPath\`: Path to dashboard JSON file
- \`dashboardJson\`: Dashboard JSON content as string
`
}
]
};
}
// Validate the queries with enhanced time range handling
const validation = await lokiAnalyzer.validateQueries(queriesToValidate);
const totalQueries = validation.length;
const successfulQueries = validation.filter(q => q.valid).length;
const successRate = Math.round((successfulQueries / totalQueries) * 100);
const zeroResultQueries = validation.filter(q => q.valid && q.resultCount === 0).length;
// Get optimization suggestions
const optimizationSuggestions = await Promise.all(queriesToValidate.map(async (query) => await lokiAnalyzer.suggestQueryOptimizations(query)));
return {
content: [
{
type: 'text',
text: `# LogQL Query Validation Results
## Overall Results
- **Total Queries**: ${totalQueries}
- **Successful Queries**: ${successfulQueries}
- **Success Rate**: ${successRate}%
- **Queries with 0 Results**: ${zeroResultQueries}
${dashboardPath ? `- **Source**: Dashboard file \`${dashboardPath}\`` : ''}
${dashboardJson && !dashboardPath ? `- **Source**: Dashboard JSON (inline)` : ''}
## Query Validation Details
${validation.map((result, index) => `
${index + 1}. ${result.valid ? '✅' : '❌'} \`${result.query.substring(0, 80)}${result.query.length > 80 ? '...' : ''}\`
${result.error ? `❌ Error: ${result.error}` :
`✅ Results: ${result.resultCount ?? 0} records (${result.timeRangeUsed || '1h'})`}
${result.message ? ` ℹ️ ${result.message}` : ''}
`).join('')}
${optimizationSuggestions.flat().length > 0 ? `## Performance Optimization Suggestions
${optimizationSuggestions.flat().map((suggestion, index) => `
${index + 1}. **${suggestion.priority.toUpperCase()} Priority**: ${suggestion.description}
- Current: \`${suggestion.currentQuery.substring(0, 60)}...\`
- Optimized: \`${suggestion.optimizedQuery.substring(0, 60)}...\`
- Expected Improvement: ${suggestion.expectedImprovement}
`).join('')}` : ''}
## Summary & Recommendations
${successRate >= 90 && zeroResultQueries === 0 ? '✅ **Excellent**: All queries are working well and returning data.' :
successRate >= 90 ? '⚠️ **Good**: High success rate but some queries return no data. Check time ranges and label selectors.' :
successRate >= 70 ? '⚠️ **Medium**: Some queries need optimization but are functional.' :
'❌ **Poor**: Significant query optimization needed for production use.'}
${zeroResultQueries > 0 ? `
### Zero Results Analysis
${zeroResultQueries} queries returned 0 results. This tool automatically tested with extended time ranges (1h → 6h → 24h → 7d).
**Common causes:**
- No logs matching the query criteria in the tested time ranges
- Incorrect label selectors (check service names, label keys)
- Data retention policies preventing access to older logs
- Query syntax errors or typos in service/label names` : ''}
${successRate < 100 ? `
### General Optimization Tips
- Use specific label selectors instead of wildcards
- Add exact string matching before regex operations
- Consider query caching for frequently used patterns
- Validate service names exist in your log labels` : ''}
`
}
]
};
}
async handleQueryLoki(args) {
const config = this.configManager.getConfig();
const { query, queryType = 'range', timeRange, startTime, endTime, limit, direction, step } = args;
const lokiAnalyzer = new LokiAnalyzer_js_1.LokiAnalyzer(config.loki);
try {
const result = await lokiAnalyzer.executeQuery(query, queryType, {
timeRange,
startTime,
endTime,
limit,
direction,
step
});
// Format time range info
let timeInfo = '';
if (queryType === 'range') {
if (startTime && endTime) {
timeInfo = `${startTime} → ${endTime}`;
}
else {
timeInfo = `Last ${timeRange || '1h'}`;
}
}
else {
timeInfo = endTime || 'Now';
}
// Format results for display
let resultsDisplay = '';
if (result.resultCount === 0) {
resultsDisplay = '**No results found**';
}
else if (result.data && typeof result.data === 'object' && 'result' in result.data) {
const data = result.data;
if (result.resultType === 'streams') {
// Format log streams
const streams = data.result;
resultsDisplay = `**${result.resultCount} log entries** from ${streams.length} streams\n\n`;
// Show first few entries
let entryCount = 0;
for (const stream of streams.slice(0, 3)) {
resultsDisplay += `### Stream: ${JSON.stringify(stream.stream)}\n`;
for (const [timestamp, line] of (stream.values || []).slice(0, 3)) {
const time = new Date(parseInt(timestamp) / 1000000).toISOString();
resultsDisplay += `**${time}**: ${line.substring(0, 200)}${line.length > 200 ? '...' : ''}\n`;
entryCount++;
if (entryCount >= 10)
break;
}
if (entryCount >= 10)
break;
}
if (result.resultCount > 10) {
resultsDisplay += `\n*... and ${result.resultCount - 10} more entries*`;
}
}
else if (result.resultType === 'matrix') {
// Format metric data
const matrix = data.result;
resultsDisplay = `**${matrix.length} metric series**\n\n`;
for (const series of matrix.slice(0, 5)) {
resultsDisplay += `### Metric: ${JSON.stringify(series.metric)}\n`;
for (const [timestamp, value] of (series.values || []).slice(-3)) {
const time = new Date(timestamp * 1000).toISOString();
resultsDisplay += `**${time}**: ${value}\n`;
}
}
if (matrix.length > 5) {
resultsDisplay += `\n*... and ${matrix.length - 5} more series*`;
}
}
else {
// Other result types
resultsDisplay = `**${result.resultCount} results** of type \`${result.resultType}\`\n\n\`\`\`json\n${JSON.stringify(data.result, null, 2)}\n\`\`\``;
}
}
return {
content: [
{
type: 'text',
text: `# Loki Query Results
## Query Details
- **Query**: \`${query}\`
- **Type**: ${queryType.charAt(0).toUpperCase() + queryType.slice(1)} query
- **Time Range**: ${timeInfo}
- **Limit**: ${limit || 100}
- **Direction**: ${direction || 'backward'}
${step ? `- **Step**: ${step}` : ''}
## Results
${resultsDisplay}
## Execution Stats
- **Execution Time**: ${result.executionTime}ms
- **Result Type**: \`${result.resultType}\`
- **Total Results**: ${result.resultCount}
${result.stats ? `## Query Performance
\`\`\`json
${JSON.stringify(result.stats, null, 2)}
\`\`\`` : ''}
## Raw Response Data
<details>
<summary>Click to expand full response</summary>
\`\`\`json
${JSON.stringify(result.data, null, 2)}
\`\`\`
</details>
`
}
]
};
}
catch (error) {
return {
content: [
{
type: 'text',
text: `# Loki Query Error
## Query Details
- **Query**: \`${query}\`
- **Type**: ${queryType}
- **Parameters**: ${JSON.stringify({ timeRange, startTime, endTime, limit, direction, step }, null, 2)}
## Error
❌ **${error instanceof Error ? error.message : String(error)}**
## Troubleshooting
- **Check query syntax**: Ensure LogQL syntax is correct
- **Verify time range**: Make sure the time range contains data
- **Check service labels**: Use \`detect_service_labels\` to find correct label patterns
- **Test smaller ranges**: Try a shorter time range first
- **Validate connectivity**: Ensure Loki instance is accessible
## Common Query Examples
- **Service logs**: \`{service="my-service"}\`
- **Error logs**: \`{service="my-service"} |~ "(?i)error|exception"\`
- **Metrics**: \`sum(rate({service="my-service"}[5m]))\`
- **JSON filtering**: \`{service="my-service"} | json | level="error"\`
`
}
]
};
}
}
async handleDetectServiceLabels(args) {
const config = this.configManager.getConfig();
const { timeRange = '1h' } = args;
const lokiAnalyzer = new LokiAnalyzer_js_1.LokiAnalyzer(config.loki);
const analysis = await lokiAnalyzer.analyzeServices(timeRange);
return {
content: [
{
type: 'text',
text: `# Service Label Detection Results
## Services Found
${analysis.services.length > 0 ?
`Found **${analysis.services.length}** services in the last ${timeRange}:\n${analysis.services.map(s => `- ${s}`).join('\n')}` :
`No services detected in the last ${timeRange}`}
## Service Label Mappings
${Object.entries(analysis.serviceLabels).length > 0 ?
Object.entries(analysis.serviceLabels).map(([service, label]) => {
if (label === 'json_field') {
return `- **${service}**: Found in JSON log content (no standard label)`;
}
return `- **${service}**: Uses label \`${label}\``;
}).join('\n') :
'No service label mappings detected'}
## Available Labels in Logs
${Object.keys(analysis.labels).length > 0 ?
Object.entries(analysis.labels).map(([label, values]) => `- **${label}**: ${values.slice(0, 3).join(', ')}${values.length > 3 ? '...' : ''}`).join('\n') :
'No labels detected in logs'}
## Recommendations
${analysis.services.length > 0 ?
`✅ **Ready for dashboard generation**: Service labels detected successfully.` :
`⚠️ **No services found**: Try a longer time range (e.g., 24h or 7d) or check your label patterns.
**Common service label patterns to check:**
- \`service_name\` - Standard Kubernetes/microservices label
- \`application\` - Application deployment label
- \`service\` - Generic service identifier
- \`app\` - Common application label
- \`container_name\` - Container-based identification
- \`job\` - Prometheus job label
**Next steps:**
1. Check your log aggregation configuration
2. Verify service labels are being applied correctly
3. Try querying with specific service names if known`}
${Object.keys(analysis.serviceLabels).length > 0 ? `
## Sample Query Patterns
Based on detected labels, use these patterns for manual queries:
${Object.entries(analysis.serviceLabels).map(([service, label]) => {
if (label === 'json_field') {
return `- **${service}**: \`{__name__=~".+"} |~ "\\"service\\":\\"${service}\\""\``;
}
return `- **${service}**: \`{${label}="${service}"}\``;
}).join('\n')}` : ''}
`
}
]
};
}
async run() {
const transport = new stdio_js_1.StdioServerTransport();
await this.server.connect(transport);
console.error('Observability Dashboard Analyzer MCP server running on stdio');
}
}
const server = new ObservabilityAnalyzerServer();
server.run().catch(console.error);
//# sourceMappingURL=index.js.map
;