openclaw-grafana-lens
Version:
OpenClaw plugin that gives AI agents full Grafana access — 18 composable tools for PromQL/LogQL/TraceQL queries, dashboard creation, alerting, SRE investigation, security monitoring, data collection pipeline management via Grafana Alloy (29 recipes), and
224 lines (223 loc) • 13.9 kB
JavaScript
/**
* Grafana Lens — OpenClaw Extension Entry Point
*
* This is the main entry point for the Grafana Lens extension.
* It follows the same pattern as diagnostics-otel:
*
* 1. Export a default OpenClawPluginDefinition
* 2. In register(), set up services and tools using the Plugin API
* 3. Services handle background work (metrics collection via OTLP push)
* 4. Tools handle agent-invoked actions (dashboard creation)
*
* Architecture (self-contained — all Grafana interaction via bundled GrafanaClient):
* - MetricsCollector service: subscribes to diagnostic events → pushes via OTLP
* - createDashboardTool: agent tool wrapping Grafana's dashboard API
* - Skill file (skills/SKILL.md): teaches the agent when to use these tools
*/
import { parseConfig, validateConfig } from "./src/config.js";
import { getErrorMessage } from "./src/sdk-compat.js";
import { GrafanaClientRegistry } from "./src/grafana-client-registry.js";
import { createMetricsCollectorService } from "./src/services/metrics-collector.js";
import { PLUGIN_VERSION } from "./src/version.js";
import { createAlertWebhookService } from "./src/services/alert-webhook.js";
import { createDashboardToolFactory } from "./src/tools/create-dashboard.js";
import { createQueryToolFactory } from "./src/tools/query.js";
import { createAlertToolFactory } from "./src/tools/create-alert.js";
import { createShareDashboardToolFactory } from "./src/tools/share-dashboard.js";
import { createAnnotateToolFactory } from "./src/tools/annotate.js";
import { createExploreDatasourcesToolFactory } from "./src/tools/explore-datasources.js";
import { createListMetricsToolFactory } from "./src/tools/list-metrics.js";
import { createSearchToolFactory } from "./src/tools/search.js";
import { createGetDashboardToolFactory } from "./src/tools/get-dashboard.js";
import { createCheckAlertsToolFactory } from "./src/tools/check-alerts.js";
import { createPushMetricsToolFactory } from "./src/tools/push-metrics.js";
import { createQueryLogsToolFactory } from "./src/tools/query-logs.js";
import { createUpdateDashboardToolFactory } from "./src/tools/update-dashboard.js";
import { createExplainMetricToolFactory } from "./src/tools/explain-metric.js";
import { createSecurityCheckToolFactory } from "./src/tools/security-check.js";
import { createQueryTracesToolFactory } from "./src/tools/query-traces.js";
import { createInvestigateToolFactory } from "./src/tools/investigate.js";
import { createAlloyService } from "./src/services/alloy-service.js";
import { createAlloyPipelineToolFactory } from "./src/tools/alloy-pipeline.js";
const plugin = {
id: "openclaw-grafana-lens",
name: "Grafana Lens",
description: "Agent-driven Grafana — query, visualize, alert, and deliver to messaging channels",
register(api) {
// ── Parse plugin config (never throws — credentials may be missing) ──
const config = parseConfig(api.pluginConfig);
if (config._warnings) {
for (const w of config._warnings)
api.logger.warn(`grafana-lens: ${w}`);
}
const validation = validateConfig(config);
if (!validation.valid) {
api.logger.warn(`grafana-lens: plugin loaded but Grafana tools are disabled — ${validation.errors.join("; ")}`);
return;
}
const validConfig = validation.config;
// ── Create client registry (one GrafanaClient per configured instance) ──
const registry = new GrafanaClientRegistry(validConfig);
const instances = registry.listInstances();
if (instances.length === 1) {
api.logger.info(`grafana-lens: connecting to ${instances[0].url}`);
}
else {
api.logger.info(`grafana-lens: ${instances.length} instances configured — default: "${registry.getDefaultName()}"`);
}
// ── Verify Grafana connectivity on startup (all instances in parallel) ──
void Promise.allSettled(instances.map(async (inst) => {
const tag = registry.isMultiInstance() ? `[${inst.name}] ` : "";
try {
const ok = await registry.get(inst.name).healthCheck();
if (ok) {
api.logger.info(`grafana-lens: ${tag}Grafana connection verified`);
}
else {
api.logger.warn(`grafana-lens: ${tag}could not reach Grafana at ${inst.url} — tools will fail until connectivity is restored`);
}
}
catch (err) {
api.logger.warn(`grafana-lens: ${tag}failed to verify connection: ${err instanceof Error ? err.message : err}`);
}
}));
// ── Register alert webhook service ───────────────────────────────
const { service: alertService, store: alertStore } = createAlertWebhookService(validConfig, api.registerHttpRoute.bind(api));
api.registerService(alertService);
// ── Register metrics collector service ──────────────────────────
const { service: metricsService, getCustomMetricsStore, getLifecycleTelemetry } = createMetricsCollectorService(validConfig, alertStore, PLUGIN_VERSION);
api.registerService(metricsService);
// ── Register agent tools ────────────────────────────────────────
api.registerTool(createDashboardToolFactory(registry));
api.registerTool(createQueryToolFactory(registry));
api.registerTool(createAlertToolFactory(registry));
api.registerTool(createShareDashboardToolFactory(registry));
api.registerTool(createAnnotateToolFactory(registry));
api.registerTool(createExploreDatasourcesToolFactory(registry));
api.registerTool(createListMetricsToolFactory(registry, getCustomMetricsStore));
api.registerTool(createSearchToolFactory(registry));
api.registerTool(createGetDashboardToolFactory(registry));
api.registerTool(createCheckAlertsToolFactory(registry, alertStore));
api.registerTool(createPushMetricsToolFactory(registry, getCustomMetricsStore));
api.registerTool(createQueryLogsToolFactory(registry));
api.registerTool(createUpdateDashboardToolFactory(registry));
api.registerTool(createExplainMetricToolFactory(registry));
api.registerTool(createSecurityCheckToolFactory(registry));
api.registerTool(createQueryTracesToolFactory(registry));
api.registerTool(createInvestigateToolFactory(registry, alertStore));
// ── Register Alloy pipeline service + tool (opt-in) ────────────
const alloyConfig = validConfig.alloy;
if (alloyConfig?.enabled && alloyConfig.configDir) {
const { service: alloyService, getClient, getStore, getExportTargets } = createAlloyService(alloyConfig, validConfig.otlp);
api.registerService(alloyService);
api.registerTool(createAlloyPipelineToolFactory({ getClient, getStore, getExportTargets }));
api.logger.info("grafana-lens: Alloy pipeline management enabled");
}
// ── Register before_agent_start hook for alert awareness ─────────
api.on("before_agent_start", (_event, _ctx) => {
const pending = alertStore.getPendingAlerts();
if (pending.length === 0)
return;
const summary = pending
.map((a) => `- [${a.status.toUpperCase()}] ${a.title} (${new Date(a.receivedAt).toISOString()})`)
.join("\n");
return {
prependContext: `GRAFANA ALERTS (${pending.length} pending):\n${summary}\nUse grafana_check_alerts to see details, then investigate with grafana_query.\n`,
};
});
// ══════════════════════════════════════════════════════════════════
// LIFECYCLE HOOKS — gen_ai-compliant session-scoped traces
// Only registered when metrics/telemetry is enabled (default: true).
// Follows memory-lancedb pattern: conditional hook registration.
// When metrics disabled, skip registration entirely — avoids OpenClaw
// invoking handlers and passing event data (LLM inputs, session state)
// for users who explicitly opted out of telemetry.
// ══════════════════════════════════════════════════════════════════
if (validConfig.metrics?.enabled !== false) {
// Count successful hook registrations so the startup log reports the real
// number (not a hardcoded count that drifts as hooks are added/removed).
// Also surfaces per-hook errors if api.on throws or is unavailable.
const registered = [];
const registerHookSafe = (name, handler) => {
try {
api.on(name, handler);
registered.push(name);
}
catch (err) {
api.logger.warn(`grafana-lens: failed to register "${name}" hook: ${getErrorMessage(err)}`);
}
};
// Helper for hooks not yet in the published PluginHookName type — accepts
// async handlers too so Promise-returning callbacks aren't silently dropped.
const apiOn = (name, handler) => registerHookSafe(name, handler);
// ── TIER 1: Critical (root-cause analysis + gen_ai compliance) ────
registerHookSafe("session_start", (event, ctx) => {
getLifecycleTelemetry()?.onSessionStart(event, ctx);
});
registerHookSafe("session_end", async (event, ctx) => {
getLifecycleTelemetry()?.onSessionEnd(event, ctx);
// Flush session summary log + trace spans immediately (avoid batch delay data loss)
await getLifecycleTelemetry()?.flushAll();
});
registerHookSafe("llm_input", (event, ctx) => {
getLifecycleTelemetry()?.onLlmInput(event, ctx);
});
registerHookSafe("llm_output", (event, ctx) => {
getLifecycleTelemetry()?.onLlmOutput(event, ctx);
});
registerHookSafe("agent_end", async (event, ctx) => {
getLifecycleTelemetry()?.onAgentEnd(event, ctx);
// Flush FINAL summary log + closed root span immediately (same pattern as session_end)
await getLifecycleTelemetry()?.flushAll();
});
// ── TIER 2: High value (operational awareness) ────────────────────
registerHookSafe("message_received", (event, ctx) => {
getLifecycleTelemetry()?.onMessageReceived(event, ctx);
});
registerHookSafe("message_sent", (event, ctx) => {
getLifecycleTelemetry()?.onMessageSent(event, ctx);
});
registerHookSafe("before_compaction", (event, ctx) => {
getLifecycleTelemetry()?.onBeforeCompaction(event, ctx);
});
registerHookSafe("after_compaction", (event, ctx) => {
getLifecycleTelemetry()?.onAfterCompaction(event, ctx);
});
// ── TIER 3: Multi-agent visibility ────────────────────────────────
// subagent_spawned/subagent_ended exist in openclaw source but are not yet
// in the published npm PluginHookName type — cast needed (same as tool.loop pattern)
apiOn("subagent_spawned", (event, ctx) => {
getLifecycleTelemetry()?.onSubagentSpawned(event, ctx);
});
apiOn("subagent_ended", (event, ctx) => {
getLifecycleTelemetry()?.onSubagentEnded(event, ctx);
});
// ── UPGRADED: after_tool_call → gen_ai execute_tool convention ────
registerHookSafe("after_tool_call", (event, ctx) => {
getLifecycleTelemetry()?.onAfterToolCall(event, ctx);
});
// ── TIER 4: New SRE hooks (Part 5) ─────────────────────────────────
// before_reset — session context wipe detection
apiOn("before_reset", (event, ctx) => {
getLifecycleTelemetry()?.onBeforeReset(event, ctx);
});
// before_tool_call — tool invocation intent + span pairing
apiOn("before_tool_call", (event, ctx) => {
getLifecycleTelemetry()?.onBeforeToolCall(event, ctx);
});
// gateway_start — infrastructure availability
apiOn("gateway_start", (event) => {
getLifecycleTelemetry()?.onGatewayStart(event);
});
// gateway_stop — infrastructure shutdown
apiOn("gateway_stop", (event) => {
getLifecycleTelemetry()?.onGatewayStop(event);
});
api.logger.info(`grafana-lens: registered 17 tools, services, and ${registered.length} lifecycle hooks [${registered.join(", ")}]`);
}
else {
api.logger.info("grafana-lens: registered 17 tools and services (lifecycle hooks skipped — metrics disabled)");
}
},
};
export default plugin;