UNPKG

openclaw-grafana-lens

Version:

OpenClaw plugin that gives AI agents full Grafana access — 18 composable tools for PromQL/LogQL/TraceQL queries, dashboard creation, alerting, SRE investigation, security monitoring, data collection pipeline management via Grafana Alloy (29 recipes), and

348 lines (347 loc) 17.3 kB
/** * grafana_query_traces tool * * Run TraceQL queries against any Tempo datasource via Grafana's datasource proxy. * Supports two query types: * - search: TraceQL expression → flat trace summaries * - get: Trace ID → flattened span list with resolved attributes * Mirrors grafana_query_logs structure for consistency. */ import { jsonResult, readStringParam, readNumberParam } from "../sdk-compat.js"; import { instanceProperties } from "./instance-param.js"; import { resolvePanelQuery } from "./resolve-panel.js"; import { getTraceQLGuidance } from "./query-guidance.js"; export const MAX_SEARCH_TRACES = 50; export const MAX_TRACE_SPANS = 200; const MAX_ATTRIBUTES_PER_SPAN = 20; const STATUS_MAP = { 0: "unset", 1: "ok", 2: "error" }; /** Tempo protobuf-JSON returns string status codes like "STATUS_CODE_OK". */ const STATUS_STRING_MAP = { STATUS_CODE_UNSET: "unset", STATUS_CODE_OK: "ok", STATUS_CODE_ERROR: "error", }; const SPAN_KIND_MAP = { 0: "unspecified", 1: "internal", 2: "server", 3: "client", 4: "producer", 5: "consumer", }; /** Tempo protobuf-JSON returns string kind like "SPAN_KIND_INTERNAL". */ const SPAN_KIND_STRING_MAP = { SPAN_KIND_UNSPECIFIED: "unspecified", SPAN_KIND_INTERNAL: "internal", SPAN_KIND_SERVER: "server", SPAN_KIND_CLIENT: "client", SPAN_KIND_PRODUCER: "producer", SPAN_KIND_CONSUMER: "consumer", }; // ── Base64 ↔ hex helpers ───────────────────────────────────────────── /** Decode base64 to hex string (Tempo protobuf-JSON uses base64 for trace/span IDs). */ function base64ToHex(b64) { try { const buf = Buffer.from(b64, "base64"); return buf.toString("hex"); } catch { return b64; // Already hex or invalid — pass through } } /** Detect whether a string is base64-encoded (not hex). */ function isBase64(s) { // Hex trace IDs are 32 chars [0-9a-f]; base64 uses A-Z, +, /, = return /[A-Z+/=]/.test(s); } // ── OTLP attribute resolution ───────────────────────────────────────── /** Resolve a typed OTLP attribute value to a plain JS value. */ function resolveAttributeValue(attr) { const v = attr.value; if (v.stringValue !== undefined) return v.stringValue; if (v.intValue !== undefined) return Number(v.intValue); if (v.doubleValue !== undefined) return v.doubleValue; if (v.boolValue !== undefined) return v.boolValue; if (v.arrayValue?.values) return v.arrayValue.values.map((x) => x.stringValue ?? ""); return ""; } /** Convert a list of OTLP attributes to a plain object, capped at maxAttrs. */ function resolveAttributes(attrs, maxAttrs) { if (!attrs?.length) return {}; const result = {}; let count = 0; for (const attr of attrs) { if (count >= maxAttrs) break; result[attr.key] = resolveAttributeValue(attr); count++; } return result; } // ── Duration math ───────────────────────────────────────────────────── /** * Calculate duration in milliseconds from nanosecond timestamps. * Uses BigInt for precision — JS Number loses accuracy above 2^53. */ function durationMs(startNano, endNano) { try { return Number((BigInt(endNano) - BigInt(startNano)) / 1000000n); } catch { // Fallback for invalid input return 0; } } // ── OTLP flattening ────────────────────────────────────────────────── /** Extract service.name from resource attributes. */ function extractServiceName(resource) { if (!resource?.attributes) return "unknown"; for (const attr of resource.attributes) { if (attr.key === "service.name" && attr.value.stringValue) { return attr.value.stringValue; } } return "unknown"; } /** Resolve status code from numeric or string format. */ function resolveStatus(code) { if (code === undefined || code === null) return "unset"; if (typeof code === "string") return STATUS_STRING_MAP[code] ?? code; return STATUS_MAP[code] ?? "unset"; } /** Resolve span kind from numeric or string format. */ function resolveKind(kind) { if (kind === undefined || kind === null) return undefined; if (typeof kind === "string") return SPAN_KIND_STRING_MAP[kind] ?? kind; return SPAN_KIND_MAP[kind] ?? String(kind); } /** Normalize a trace/span ID — decode base64 to hex if needed. */ function normalizeId(id) { if (!id) return ""; return isBase64(id) ? base64ToHex(id) : id; } /** * Flatten deeply nested OTLP resourceSpans/batches into a sorted FlatSpan array. * Handles both formats: * - OTLP JSON: { resourceSpans: [...] } — hex IDs, numeric kind/status * - Protobuf-JSON (Tempo v2): { batches: [...] } — base64 IDs, string kind/status */ function flattenTrace(traceResult, limit) { const all = []; // Tempo v2 returns `batches`, OTLP format uses `resourceSpans` const resourceSpansList = traceResult.resourceSpans ?? traceResult.batches ?? []; for (const rs of resourceSpansList) { const serviceName = extractServiceName(rs.resource); for (const ss of rs.scopeSpans ?? []) { for (const span of ss.spans ?? []) { all.push({ traceId: normalizeId(span.traceId), spanId: normalizeId(span.spanId), parentSpanId: normalizeId(span.parentSpanId) || undefined, operationName: span.name ?? span.operationName ?? "unknown", serviceName, startTime: new Date(Number(BigInt(span.startTimeUnixNano) / 1000000n)).toISOString(), durationMs: durationMs(span.startTimeUnixNano, span.endTimeUnixNano), status: resolveStatus(span.status?.code), kind: resolveKind(span.kind), attributes: resolveAttributes(span.attributes, MAX_ATTRIBUTES_PER_SPAN), }); } } } const totalSpans = all.length; // Sort by start time (earliest first) — natural trace order all.sort((a, b) => a.startTime.localeCompare(b.startTime)); return { spans: all.slice(0, limit), totalSpans }; } // ── Search result formatting ────────────────────────────────────────── function formatSearchTraces(traces, limit) { const sliced = traces.slice(0, limit); return sliced.map((t) => ({ traceId: t.traceID, rootServiceName: t.rootServiceName, rootTraceName: t.rootTraceName, startTime: new Date(Number(BigInt(t.startTimeUnixNano) / 1000000n)).toISOString(), durationMs: t.durationMs, spanCount: t.spanSets?.[0]?.matched ?? undefined, })); } // ── Tool factory ────────────────────────────────────────────────────── export function createQueryTracesToolFactory(registry) { return (_ctx) => ({ name: "grafana_query_traces", label: "Grafana Query Traces", description: [ "Run a TraceQL query against a Tempo datasource in Grafana.", "WORKFLOW: Use for distributed tracing — find slow spans, debug request flows, investigate session traces.", "Two query types: 'search' (default) finds traces matching a TraceQL expression, 'get' retrieves a full trace by ID.", "Requires a datasourceUid — use grafana_explore_datasources to find Tempo datasources (type: 'tempo').", "PANEL RE-RUN: Set dashboardUid + panelId to re-run an existing panel's TraceQL query — no need to extract the query manually. Overrides query and datasourceUid.", "Search returns trace summaries (traceId, rootService, rootTrace, duration). Get returns flattened spans with resolved attributes.", "Use minDuration/maxDuration to find slow or fast spans. Use start/end to narrow the time window.", ].join(" "), parameters: { type: "object", properties: { ...instanceProperties(registry), datasourceUid: { type: "string", description: "UID of the Tempo datasource (use grafana_explore_datasources to find it). Optional when using dashboardUid + panelId.", }, query: { type: "string", description: "TraceQL expression for search (e.g., '{ resource.service.name = \"openclaw\" }'), or trace ID for get. Optional when using dashboardUid + panelId.", }, queryType: { type: "string", enum: ["search", "get"], description: "Query type: 'search' (default) for TraceQL trace search, 'get' for full trace by ID", }, start: { type: "string", description: "Start time for search (default: 'now-1h'). Accepts: 'now-1h', 'now-30m', Unix seconds, or RFC3339", }, end: { type: "string", description: "End time for search (default: 'now'). Accepts: 'now', Unix seconds, or RFC3339", }, limit: { type: "number", description: "Max traces to return for search (default 20, max 50)", }, minDuration: { type: "string", description: "Minimum trace duration filter (e.g., '100ms', '1s', '5s')", }, maxDuration: { type: "string", description: "Maximum trace duration filter (e.g., '10s', '30s')", }, dashboardUid: { type: "string", description: "Dashboard UID to resolve a panel's TraceQL query from (use with panelId).", }, panelId: { type: "number", description: "Panel ID within the dashboard to re-run (use with dashboardUid).", }, }, required: [], }, async execute(_toolCallId, params) { const client = registry.get(readStringParam(params, "instance")); const dashboardUid = readStringParam(params, "dashboardUid"); const panelId = readNumberParam(params, "panelId"); let datasourceUid = readStringParam(params, "datasourceUid"); let query = readStringParam(params, "query"); const queryType = readStringParam(params, "queryType") ?? "search"; const start = readStringParam(params, "start") ?? "now-1h"; const end = readStringParam(params, "end") ?? "now"; const rawLimit = readNumberParam(params, "limit") ?? 20; const limit = Math.min(Math.max(1, rawLimit), MAX_SEARCH_TRACES); const minDuration = readStringParam(params, "minDuration"); const maxDuration = readStringParam(params, "maxDuration"); let panelMeta; try { // ── Panel resolution ────────────────────────────────────────── if (dashboardUid && panelId != null) { const resolved = await resolvePanelQuery(client, dashboardUid, panelId); if ("error" in resolved) { return jsonResult({ error: resolved.error }); } if (resolved.queryTool !== "grafana_query_traces") { return jsonResult({ error: `Panel ${panelId} ('${resolved.panelTitle}') uses ${resolved.datasourceType} datasource. Use ${resolved.queryTool} with the same dashboardUid + panelId instead.`, }); } query = query ?? resolved.expr; datasourceUid = datasourceUid ?? resolved.datasourceUid; panelMeta = { resolvedFrom: "panel", panelTitle: resolved.panelTitle, panelType: resolved.panelType, templateVarsReplaced: resolved.templateVarsReplaced, }; } // Validate required params (after panel resolution) if (!datasourceUid) { return jsonResult({ error: "Missing 'datasourceUid'. Provide it directly or use dashboardUid + panelId to resolve from a panel." }); } if (!query) { return jsonResult({ error: "Missing 'query'. Provide a TraceQL expression (for search) or trace ID (for get), or use dashboardUid + panelId to resolve from a panel." }); } // ── Get trace by ID ───────────────────────────────────────── if (queryType === "get") { const traceResult = await client.getTrace(datasourceUid, query); const { spans, totalSpans } = flattenTrace(traceResult, MAX_TRACE_SPANS); return jsonResult({ status: "success", queryType: "get", traceId: query, datasourceUid, totalSpans, spans, ...(totalSpans > spans.length ? { truncated: true, truncationHint: `Showing ${MAX_TRACE_SPANS} of ${totalSpans} spans. Use TraceQL search with span filters to find specific spans.` } : {}), ...(query ? { correlationHint: { logQuery: `{service_name="openclaw"} | json | trace_id = "${query}"`, tool: "grafana_query_logs", tip: "Find logs correlated with this trace. Always use | json | trace_id = ... (not |= line filter).", }, } : {}), ...panelMeta, }); } // ── Search traces ─────────────────────────────────────────── const searchResult = await client.searchTraces(datasourceUid, query, { start, end, limit, minDuration: minDuration ?? undefined, maxDuration: maxDuration ?? undefined, }); const traces = searchResult.traces ?? []; const totalTraces = traces.length; const formattedTraces = formatSearchTraces(traces, MAX_SEARCH_TRACES); return jsonResult({ status: "success", queryType: "search", query, datasourceUid, totalTraces, traces: formattedTraces, ...(totalTraces > MAX_SEARCH_TRACES ? { truncated: true, truncationHint: `Showing ${MAX_SEARCH_TRACES} of ${totalTraces} traces. Narrow your TraceQL query or time range to see specific traces.` } : {}), ...(totalTraces > 0 ? { correlationHint: { logQuery: `{service_name="openclaw"} | json | trace_id = "${formattedTraces[0].traceId}"`, tool: "grafana_query_logs", tip: "Use this LogQL pattern with any traceId from the results to find correlated logs. Always use | json | trace_id = ... (not |= line filter).", }, } : {}), ...(totalTraces === 0 ? { hint: { cause: "No traces found matching the query", suggestion: "Verify the Tempo datasource has data. Try a broader query like '{ }' or widen the time range. Use grafana_explore_datasources to confirm the datasource type is 'tempo'." } } : {}), ...panelMeta, }); } catch (err) { const reason = err instanceof Error ? err.message : String(err); const guidance = getTraceQLGuidance(reason, query ?? ""); return jsonResult({ error: `Trace query failed: ${reason}`, ...(guidance ? { guidance } : {}), }); } }, }); }