openclaw-grafana-lens
Version:
OpenClaw plugin that gives AI agents full Grafana access — 18 composable tools for PromQL/LogQL/TraceQL queries, dashboard creation, alerting, SRE investigation, security monitoring, data collection pipeline management via Grafana Alloy (29 recipes), and
342 lines (341 loc) • 17.7 kB
JavaScript
/**
* create-dashboard tool
*
* Registers an agent tool that creates Grafana dashboards from templates.
* The agent calls this tool when a user asks for a dashboard.
*
* This tool wraps POST /api/dashboards/db with built-in templates
* (llm-command-center, session-explorer, cost-intelligence, tool-performance)
* or custom dashboard JSON.
*/
import { jsonResult, readStringParam } from "../sdk-compat.js";
import { instanceProperties } from "./instance-param.js";
import { validateTargetQueries } from "./update-dashboard.js";
// Template names that ship with the extension
import llmCommandCenterTemplate from "../templates/llm-command-center.json" with { type: "json" };
import sessionExplorerTemplate from "../templates/session-explorer.json" with { type: "json" };
import costIntelligenceTemplate from "../templates/cost-intelligence.json" with { type: "json" };
import toolPerformanceTemplate from "../templates/tool-performance.json" with { type: "json" };
import nodeExporterTemplate from "../templates/node-exporter.json" with { type: "json" };
import httpServiceTemplate from "../templates/http-service.json" with { type: "json" };
import metricExplorerTemplate from "../templates/metric-explorer.json" with { type: "json" };
import multiKpiTemplate from "../templates/multi-kpi.json" with { type: "json" };
import sreOperationsTemplate from "../templates/sre-operations.json" with { type: "json" };
import genaiObservabilityTemplate from "../templates/genai-observability.json" with { type: "json" };
import weeklyReviewTemplate from "../templates/weekly-review.json" with { type: "json" };
import securityOverviewTemplate from "../templates/security-overview.json" with { type: "json" };
// Stable UIDs for AI templates — enables reliable cross-dashboard drill-down links
const AI_TEMPLATE_UIDS = {
"llm-command-center": "openclaw-command-center",
"session-explorer": "openclaw-session-explorer",
"cost-intelligence": "openclaw-cost-intelligence",
"tool-performance": "openclaw-tool-performance",
"sre-operations": "openclaw-sre-operations",
"genai-observability": "openclaw-genai-observability",
"security-overview": "openclaw-security-overview",
};
const TEMPLATE_SUGGESTIONS = {
"llm-command-center": [
{ template: "session-explorer", reason: "Tier 2: drill into individual sessions with trace hierarchy" },
{ template: "cost-intelligence", reason: "Tier 3a: financial deep-dive with model attribution" },
{ template: "sre-operations", reason: "Tier 3c: queue health, webhooks, stuck sessions" },
],
"session-explorer": [
{ template: "cost-intelligence", reason: "Tier 3a: correlate session behavior with cost impact" },
{ template: "tool-performance", reason: "Tier 3b: tool reliability and latency analytics" },
],
"cost-intelligence": [
{ template: "tool-performance", reason: "Tier 3b: tool usage patterns driving costs" },
{ template: "sre-operations", reason: "Tier 3c: operational health and queue monitoring" },
],
"tool-performance": [
{ template: "sre-operations", reason: "Tier 3c: operational health context for tool issues" },
{ template: "genai-observability", reason: "Industry-standard gen_ai monitoring" },
],
"sre-operations": [
{ template: "genai-observability", reason: "Industry-standard gen_ai monitoring for broader AI ops" },
{ template: "security-overview", reason: "Security monitoring — injection signals, session anomalies, tool errors" },
],
"genai-observability": [
{ template: "llm-command-center", reason: "Tier 1: OpenClaw-specific command center with session drill-down" },
],
"node-exporter": [
{ template: "http-service", reason: "Complement system health with HTTP service monitoring" },
],
"http-service": [
{ template: "node-exporter", reason: "Complement HTTP monitoring with system-level health" },
],
"metric-explorer": [
{ template: "multi-kpi", reason: "Compare multiple metrics side-by-side" },
],
"multi-kpi": [
{ template: "metric-explorer", reason: "Deep-dive into any single metric" },
],
"weekly-review": [
{ template: "multi-kpi", reason: "Track specific KPIs alongside weekly trends" },
],
"security-overview": [
{ template: "sre-operations", reason: "Complement security monitoring with operational health context" },
{ template: "llm-command-center", reason: "Tier 1 overview for broader system health alongside security" },
],
};
const TEMPLATES = {
"llm-command-center": llmCommandCenterTemplate,
"session-explorer": sessionExplorerTemplate,
"cost-intelligence": costIntelligenceTemplate,
"tool-performance": toolPerformanceTemplate,
"sre-operations": sreOperationsTemplate,
"genai-observability": genaiObservabilityTemplate,
"node-exporter": nodeExporterTemplate,
"http-service": httpServiceTemplate,
"metric-explorer": metricExplorerTemplate,
"multi-kpi": multiKpiTemplate,
"weekly-review": weeklyReviewTemplate,
"security-overview": securityOverviewTemplate,
};
/** Grafana template variables (e.g., `${DS_PROMETHEUS}`) can't be used for API queries. */
function isTemplateVariable(uid) {
return uid.startsWith("${") && uid.endsWith("}");
}
/** Extract the first concrete datasource UID from a panel's targets or panel-level datasource. */
function firstUidFromPanel(panel) {
const targets = panel.targets;
if (targets) {
for (const t of targets) {
if (t.datasource?.uid && !isTemplateVariable(t.datasource.uid))
return t.datasource.uid;
}
}
const ds = panel.datasource;
if (ds?.uid && !isTemplateVariable(ds.uid))
return ds.uid;
return undefined;
}
/** Find the first concrete datasource UID across all panels (computed once as a fallback). */
function findGlobalDatasourceUid(panels) {
for (const p of panels) {
const uid = firstUidFromPanel(p);
if (uid)
return uid;
}
return undefined;
}
/**
* Validate all panel PromQL queries in a custom dashboard.
* Dry-runs each panel's expressions and reports per-panel health.
* Never throws — individual panel failures are captured as error status.
*/
export async function validateDashboardPanels(client, panels) {
const panelsWithTargets = panels.filter((p) => {
const targets = p.targets;
return targets && targets.some((t) => t.expr);
});
if (panelsWithTargets.length === 0) {
return {
panelsTotal: panels.length,
panelsValid: 0,
panelsNoData: 0,
panelsError: 0,
panelsSkipped: panels.length,
details: [],
};
}
// Pre-compute the global fallback datasource UID once (avoids O(P²) re-scanning)
const globalFallbackDsUid = findGlobalDatasourceUid(panels);
const details = await Promise.all(panelsWithTargets.map(async (panel) => {
const panelId = panel.id ?? 0;
const title = panel.title ?? "(untitled)";
const targets = panel.targets.filter((t) => t.expr);
const dsUid = firstUidFromPanel(panel) ?? globalFallbackDsUid;
if (!dsUid) {
return {
panelId,
title,
status: "skipped",
error: "No datasource UID found — set datasource.uid on targets",
};
}
try {
const validation = await validateTargetQueries(client, targets, dsUid);
if (!validation.validated) {
return { panelId, title, status: "skipped", error: validation.skippedReason };
}
const hasError = validation.results.some((r) => !r.valid);
const hasData = validation.results.some((r) => r.valid && r.sampleValue !== undefined);
let status;
if (hasError)
status = "error";
else if (hasData)
status = "ok";
else
status = "nodata";
return {
panelId,
title,
status,
...(hasError
? { error: validation.results.find((r) => !r.valid)?.error }
: {}),
queries: validation.results,
};
}
catch (err) {
const reason = err instanceof Error ? err.message : String(err);
return { panelId, title, status: "error", error: reason };
}
}));
// Single-pass count instead of four .filter() calls
const counts = { ok: 0, nodata: 0, error: 0, skipped: 0 };
for (const d of details)
counts[d.status]++;
return {
panelsTotal: panels.length,
panelsValid: counts.ok,
panelsNoData: counts.nodata,
panelsError: counts.error,
panelsSkipped: counts.skipped + (panels.length - panelsWithTargets.length),
details,
};
}
export function createDashboardToolFactory(registry) {
return (_ctx) => ({
name: "grafana_create_dashboard",
label: "Grafana Dashboard",
description: [
"Create or update a Grafana dashboard.",
"WORKFLOW: 3-tier SRE drill-down hierarchy — start with 'llm-command-center' (Tier 1: golden signals, session table with click-to-drill-down).",
"Drill into 'session-explorer' (Tier 2: per-session trace hierarchy, LLM calls, tool calls, conversation flow — THE killer feature).",
"Deep dive: 'cost-intelligence' (Tier 3a: spending trends, model attribution, cache savings), 'tool-performance' (Tier 3b: tool reliability ranking, latency, error rates), 'sre-operations' (Tier 3c: queue health, webhooks, stuck sessions, tool loops).",
"'security-overview' for security monitoring (injection signals, session anomalies, tool errors, webhook errors, cost spikes).",
"'genai-observability' for industry-standard AI monitoring using OTel gen_ai semantic conventions — works with any gen_ai data, not just OpenClaw.",
"'node-exporter' for system health (CPU/memory/disk), 'http-service' for HTTP golden signals (rate/errors/latency).",
"'metric-explorer' to deep-dive any single metric, 'multi-kpi' for a 4-metric overview dashboard.",
"'weekly-review' for weekly external data trends (calendar, fitness, finance) with an all-custom-metrics table.",
"All AI templates have Loki log-to-trace correlation via Tempo + stable UIDs for cross-dashboard navigation.",
"Can also accept custom dashboard JSON for fully custom dashboards. Custom JSON dashboards include a validation field that dry-runs each panel's PromQL — check validation.panelsError for broken queries.",
"Returns the dashboard URL for sharing with the user.",
"To modify the dashboard after creation (add/remove panels, change settings), use grafana_update_dashboard with the returned UID.",
].join(" "),
parameters: {
type: "object",
properties: {
...instanceProperties(registry),
template: {
type: "string",
enum: Object.keys(TEMPLATES),
description: "Name of a built-in template. AI observability (3-tier hierarchy): llm-command-center (Tier 1 system overview + session drill-down), session-explorer (Tier 2 per-session trace hierarchy), cost-intelligence (Tier 3a financial deep-dive), tool-performance (Tier 3b tool analytics), sre-operations (Tier 3c queue/webhook/session health). Security: security-overview (injection signals, session anomalies, tool errors, webhook errors, cost spikes). genai-observability (OTel gen_ai standard — industry-standard AI monitoring, works with any gen_ai data). System: node-exporter (CPU/memory/disk), http-service (RED signals). Generic: metric-explorer (any single metric), multi-kpi (any 4 metrics), weekly-review (weekly external data + all custom metrics table).",
},
title: {
type: "string",
description: "Custom dashboard title. Overrides the template default (e.g., 'My LLM Command Center').",
},
dashboard: {
type: "object",
description: "Full custom dashboard JSON (Grafana dashboard model). Use this instead of template for fully custom dashboards.",
},
folderUid: {
type: "string",
description: "UID of the Grafana folder to place the dashboard in (e.g., 'abc123'). Omit to use the default folder.",
},
overwrite: {
type: "boolean",
description: "Whether to overwrite if a dashboard with the same title exists.",
default: true,
},
},
},
async execute(_toolCallId, params) {
const client = registry.get(readStringParam(params, "instance"));
const template = readStringParam(params, "template");
const title = readStringParam(params, "title");
const folderUid = readStringParam(params, "folderUid");
const dashboard = params.dashboard;
const overwrite = typeof params.overwrite === "boolean" ? params.overwrite : true;
let dashboardJson;
if (dashboard) {
// Custom dashboard provided directly
dashboardJson = dashboard;
}
else if (template && TEMPLATES[template]) {
// Use built-in template
dashboardJson = structuredClone(TEMPLATES[template]);
}
else if (template) {
return jsonResult({
error: `Unknown template '${template}'. Available: ${Object.keys(TEMPLATES).join(", ")}`,
});
}
else {
return jsonResult({
error: `Either 'template' or 'dashboard' is required. Available templates: ${Object.keys(TEMPLATES).join(", ")}`,
});
}
// Apply title override
if (title) {
dashboardJson.title = title;
}
// Assign stable UID for AI templates (enables cross-dashboard drill-down links)
if (template && AI_TEMPLATE_UIDS[template]) {
dashboardJson.uid = AI_TEMPLATE_UIDS[template];
}
// Ensure dashboard has no ID (so Grafana creates a new one or matches by title)
delete dashboardJson.id;
// Auto-assign sequential panel IDs where missing — Grafana does not auto-assign them,
// and downstream tools (resolve-panel, update-dashboard) rely on panel.id for lookups.
const panels = dashboardJson.panels;
if (panels) {
let nextId = 1;
for (const p of panels) {
if (p.id == null) {
p.id = nextId;
}
if (typeof p.id === "number" && p.id >= nextId) {
nextId = p.id + 1;
}
}
}
try {
const result = await client.createDashboard({
dashboard: dashboardJson,
folderUid,
message: "Created by Grafana Lens agent",
overwrite,
});
const response = {
uid: result.uid,
url: client.dashboardUrl(result.uid),
status: result.status,
message: `Dashboard "${dashboardJson.title}" created successfully.`,
};
// Add template chaining hints for template-based dashboards
if (template && TEMPLATE_SUGGESTIONS[template]) {
response.suggestedNext = TEMPLATE_SUGGESTIONS[template];
}
// Custom dashboards: validate panel PromQL queries (informational — never blocks)
if (dashboard) {
try {
const panels = dashboardJson.panels ?? [];
const validation = await validateDashboardPanels(client, panels);
if (validation.details.length > 0) {
response.validation = validation;
}
}
catch (validationErr) {
// Validation failure is non-fatal — dashboard was already created.
// Surface that validation was attempted but failed (not silently omitted).
const reason = validationErr instanceof Error ? validationErr.message : String(validationErr);
response.validation = { error: `Validation failed: ${reason}` };
}
}
return jsonResult(response);
}
catch (err) {
const reason = err instanceof Error ? err.message : String(err);
if (reason.includes("Not found") && folderUid) {
return jsonResult({ error: `Failed to create dashboard: folder '${folderUid}' not found. Omit folderUid to use the default folder, or check the folder UID.` });
}
return jsonResult({ error: `Failed to create dashboard: ${reason}` });
}
},
});
}