UNPKG

openclaw-grafana-lens

Version:

OpenClaw plugin that gives AI agents full Grafana access — 18 composable tools for PromQL/LogQL/TraceQL queries, dashboard creation, alerting, SRE investigation, security monitoring, data collection pipeline management via Grafana Alloy (29 recipes), and

333 lines (332 loc) 16.7 kB
/** * Query error guidance — pattern-match PromQL/LogQL errors to structured hints. * * Addresses mcp-grafana #430: agents send bad PromQL blind and cannot recover * from cryptic errors like "parse error: unexpected end of input". * * Each guidance rule has a regex pattern and produces a structured hint with * cause, suggestion, and optional corrected example. */ const PROMQL_RULES = [ // ── Syntax errors ─────────────────────────────────────────────── { pattern: /unclosed left parenthesis/i, guidance: () => ({ cause: "Unclosed parenthesis in PromQL expression", suggestion: "Check for missing closing ')' — count opening and closing parentheses", example: "rate(http_requests_total[5m]) — ensure every '(' has a matching ')'", }), }, { pattern: /unexpected end of input/i, guidance: () => ({ cause: "Incomplete PromQL expression", suggestion: "The expression is truncated — ensure it's complete with all brackets and parentheses closed", }), }, { pattern: /unexpected (right parenthesis|"\)")/i, guidance: () => ({ cause: "Extra closing parenthesis in PromQL expression", suggestion: "Remove the extra ')' or add matching opening '('", }), }, { pattern: /could not parse remaining input/i, guidance: (_m, expr) => ({ cause: "PromQL syntax error — trailing invalid characters", suggestion: "Check for typos or extra characters at the end of the expression", example: expr ? `Verify: ${expr}` : undefined, }), }, // ── Range/aggregation mistakes ────────────────────────────────── { pattern: /(?:expected type range vector|ranges only allowed for vector selectors)/i, guidance: () => ({ cause: "Range vector expected — a function like rate() requires [duration] syntax", suggestion: "Add a range selector like [5m] to the metric inside rate(), increase(), or similar functions", example: "rate(http_requests_total[5m]) — not rate(http_requests_total)", }), }, { pattern: /(?:expected type instant vector|vector selector must be wrapped)/i, guidance: () => ({ cause: "Instant vector expected where a range vector was given", suggestion: "Remove the [duration] range selector — this context expects an instant vector", example: "sum(my_metric) — not sum(my_metric[5m])", }), }, // ── Metric not found / label issues ───────────────────────────── { pattern: /(?:no match for|metric not found)/i, guidance: (_m, expr) => ({ cause: "No data for this metric or label selector", suggestion: "Verify the metric name exists using grafana_list_metrics. Check label names/values with grafana_list_metrics in label mode. Common issue: metric is present but has different label values", example: expr.includes("{") ? "Try the metric name alone first without label filters to confirm it exists" : "Use grafana_list_metrics to search for similar metric names", }), }, // ── Auth / connection errors ──────────────────────────────────── { pattern: /authentication failed/i, guidance: () => ({ cause: "Grafana service account token is invalid or expired", suggestion: "Check GRAFANA_SERVICE_ACCOUNT_TOKEN — the token may need rotation. Ensure the service account has Editor role", }), }, { pattern: /rate limit/i, guidance: () => ({ cause: "Query rate limited by Prometheus or Grafana", suggestion: "Wait a few seconds and retry. If persistent, simplify the query to reduce evaluation cost", }), }, { pattern: /timeout|context deadline exceeded|context canceled/i, guidance: (_m, expr) => ({ cause: "Query timed out — the expression is too expensive to evaluate", suggestion: "Narrow the time range, add label filters, or simplify the expression. Avoid {__name__=~'.+'} which scans all metrics", example: expr.includes("=~") ? "Replace broad regex matchers with specific label values" : "Try a shorter time range (e.g., now-1h instead of now-7d)", }), }, // ── 4xx/5xx from Prometheus ───────────────────────────────────── { pattern: /bad_data/i, guidance: () => ({ cause: "Prometheus rejected the query as malformed", suggestion: "Check PromQL syntax — common issues: missing brackets, invalid function names, wrong aggregation labels. Use grafana_explore_datasources to verify this is a Prometheus datasource", }), }, { pattern: /execution/i, guidance: () => ({ cause: "Prometheus execution error — the query is valid syntax but failed to run", suggestion: "Check if the datasource is healthy. Try a simpler query like 'up' to verify connectivity", }), }, // ── Not found (wrong datasource) ─────────────────────────────── { pattern: /Not found/i, guidance: () => ({ cause: "Datasource not found or not a Prometheus datasource", suggestion: "Use grafana_explore_datasources to verify the datasource UID and type. If this is a Loki datasource, use grafana_query_logs instead", }), }, ]; const LOGQL_RULES = [ // ── Stream selector issues ────────────────────────────────────── { pattern: /unexpected IDENTIFIER/i, guidance: () => ({ cause: "LogQL requires a stream selector — bare text is not valid", suggestion: "Wrap your query in a stream selector: {job=\"your-service\"} |= \"your text\"", example: '{job="api"} |= "error" — not just "error"', }), }, { pattern: /queries require at least one regexp or equality/i, guidance: () => ({ cause: "Empty stream selector {} — Loki requires at least one label matcher", suggestion: "Add a label filter: {job=\"your-service\"} or {service_name=\"your-service\"}. Use grafana_explore_datasources to find available Loki labels", example: '{job="api"} |= "error" — not {} |= "error"', }), }, { pattern: /unexpected \$end/i, guidance: () => ({ cause: "Incomplete LogQL expression — unexpected end of input", suggestion: "Check for unclosed braces, missing quotes, or incomplete pipeline stages", example: '{job="api"} |= "error" — ensure all { } and \" \" are properly closed', }), }, // ── Pipeline/parser errors ────────────────────────────────────── { pattern: /unexpected.*expecting (STRING|CLOSE_BRACE|PIPE)/i, guidance: (_m) => ({ cause: "LogQL syntax error in stream selector or pipeline", suggestion: "Check your label matchers and pipeline stages. Labels use = for exact match, =~ for regex. Pipeline stages: |= (contains), != (not contains), | json, | logfmt", }), }, // ── Metric query on logs issues ───────────────────────────────── { pattern: /(?:expected type range vector|ranges only allowed)/i, guidance: () => ({ cause: "LogQL metric query requires a range — [duration] syntax", suggestion: "Add a range selector for log metric queries", example: 'rate({job="api"} |= "error" [5m]) — the [5m] must wrap the full log selector', }), }, // ── Auth/connection (shared patterns) ─────────────────────────── { pattern: /authentication failed/i, guidance: () => ({ cause: "Grafana service account token is invalid or expired", suggestion: "Check GRAFANA_SERVICE_ACCOUNT_TOKEN — ensure the service account has Editor role", }), }, { pattern: /timeout|context deadline exceeded/i, guidance: () => ({ cause: "Log query timed out — too much data scanned", suggestion: "Narrow the time range (e.g., now-15m instead of now-24h), add label filters, or use line filters (|= or !=) to reduce scan scope", }), }, { pattern: /Not found/i, guidance: () => ({ cause: "Datasource not found or not a Loki datasource", suggestion: "Use grafana_explore_datasources to verify the datasource UID and type. If this is a Prometheus datasource, use grafana_query instead", }), }, ]; const TRACEQL_RULES = [ // ── Syntax errors ─────────────────────────────────────────────── { pattern: /unexpected.*expecting/i, guidance: () => ({ cause: "TraceQL syntax error", suggestion: "Check braces and attribute syntax. TraceQL uses { } for span selectors and resource.attr / span.attr for attributes", example: '{ resource.service.name = "openclaw" } — note: string values require double quotes', }), }, { pattern: /invalid attribute/i, guidance: () => ({ cause: "Invalid or unknown attribute in TraceQL expression", suggestion: "Use 'resource.' prefix for resource attributes, 'span.' for span attributes. Common: resource.service.name, span.http.status_code, name, duration, status", }), }, { pattern: /empty query/i, guidance: () => ({ cause: "Empty TraceQL query", suggestion: "Provide a TraceQL expression. Simple: { } to match all traces. Filtered: { resource.service.name = \"openclaw\" }", example: '{ resource.service.name = "openclaw" && duration > 1s }', }), }, // ── Auth/connection ───────────────────────────────────────────── { pattern: /authentication failed/i, guidance: () => ({ cause: "Grafana service account token is invalid or expired", suggestion: "Check GRAFANA_SERVICE_ACCOUNT_TOKEN — ensure the service account has Editor role", }), }, { pattern: /timeout|context deadline exceeded/i, guidance: () => ({ cause: "Trace search timed out — too many traces matched", suggestion: "Narrow the time range, add attribute filters, or set minDuration/maxDuration to reduce scope", }), }, { pattern: /Not found/i, guidance: () => ({ cause: "Datasource not found or not a Tempo datasource", suggestion: "Use grafana_explore_datasources to verify the datasource UID and type. Use grafana_query for Prometheus, grafana_query_logs for Loki", }), }, // ── Trace ID issues ───────────────────────────────────────────── { pattern: /trace.*not found|failed to get trace/i, guidance: () => ({ cause: "Trace ID not found in Tempo", suggestion: "The trace may have expired (check Tempo retention) or the ID may be incorrect. Use queryType 'search' to find recent traces first", }), }, { pattern: /invalid trace id/i, guidance: () => ({ cause: "Invalid trace ID format — must be a 32-character hex string", suggestion: "Trace IDs are 32 hex chars (e.g., 'abc123def456...'). Get valid IDs from a search query or from trace_id fields in log entries", }), }, ]; // ── Public API ──────────────────────────────────────────────────────── /** Match an error against a rule list, returning the first matching guidance. */ function matchGuidanceRule(rules, error, expr) { for (const rule of rules) { const match = error.match(rule.pattern); if (match) { if (rule.exprPattern && !rule.exprPattern.test(expr)) continue; return rule.guidance(match, expr); } } return undefined; } /** * Match a PromQL error to structured guidance for agent recovery. * Returns undefined if no pattern matches. */ export function getPromQLGuidance(error, expr) { return matchGuidanceRule(PROMQL_RULES, error, expr); } /** * Match a LogQL error to structured guidance for agent recovery. * Returns undefined if no pattern matches. */ export function getLogQLGuidance(error, expr) { return matchGuidanceRule(LOGQL_RULES, error, expr); } /** * Match a TraceQL error to structured guidance for agent recovery. * Returns undefined if no pattern matches. */ export function getTraceQLGuidance(error, expr) { return matchGuidanceRule(TRACEQL_RULES, error, expr); } // ── Prometheus warnings (infos field) ───────────────────────────────── /** * Parse Prometheus "infos" field into structured warnings. * * Prometheus returns an `infos` array on successful queries when there are * non-fatal issues (e.g., applying rate() to a gauge metric). The tool should * surface these so the agent can self-correct. */ export function parsePrometheusWarnings(infos) { if (!infos?.length) return undefined; const warnings = []; for (const info of infos) { // "metric might not be a counter, name does not end in _total/_sum/_count/_bucket" if (info.includes("might not be a counter")) { const metricMatch = info.match(/"([^"]+)"/); const metricName = metricMatch?.[1] ?? "unknown"; warnings.push({ cause: `rate() applied to '${metricName}' which appears to be a gauge, not a counter`, suggestion: "Gauges measure current values (e.g., temperature, queue depth). Use rate() only on counters (names ending in _total, _sum, _count, _bucket). For gauge change over time, use delta() or deriv() instead", example: `delta(${metricName}[5m]) — measures change over 5 minutes for a gauge`, }); } else { // Generic warning passthrough warnings.push({ cause: info, suggestion: "Review the query — Prometheus flagged a potential issue", }); } } return warnings.length > 0 ? warnings : undefined; } // ── Empty result hint ───────────────────────────────────────────────── /** * Generate a hint when a query returns no data (empty result set). * This is not an error — Prometheus returns success with empty results — * but the agent may not know why there's no data. */ export function getEmptyResultHint(expr) { if (expr.includes("{") && expr.includes("=")) { return { cause: "Query returned no data — the metric exists but no series match the label filters", suggestion: "Verify label names and values. Use grafana_list_metrics with labels: true to see available label values. Try the metric name alone without label filters to confirm data exists", }; } return { cause: "Query returned no data — the metric may not exist or have no recent data points", suggestion: "Verify the metric name using grafana_list_metrics. Check that the datasource is receiving data (try 'up' to test connectivity). If querying custom metrics, ensure they've been pushed recently", }; }