UNPKG

openclaw-grafana-lens

Version:

OpenClaw plugin that gives AI agents full Grafana access — 18 composable tools for PromQL/LogQL/TraceQL queries, dashboard creation, alerting, SRE investigation, security monitoring, data collection pipeline management via Grafana Alloy (29 recipes), and

224 lines (223 loc) 13.9 kB
/** * Grafana Lens — OpenClaw Extension Entry Point * * This is the main entry point for the Grafana Lens extension. * It follows the same pattern as diagnostics-otel: * * 1. Export a default OpenClawPluginDefinition * 2. In register(), set up services and tools using the Plugin API * 3. Services handle background work (metrics collection via OTLP push) * 4. Tools handle agent-invoked actions (dashboard creation) * * Architecture (self-contained — all Grafana interaction via bundled GrafanaClient): * - MetricsCollector service: subscribes to diagnostic events → pushes via OTLP * - createDashboardTool: agent tool wrapping Grafana's dashboard API * - Skill file (skills/SKILL.md): teaches the agent when to use these tools */ import { parseConfig, validateConfig } from "./src/config.js"; import { getErrorMessage } from "./src/sdk-compat.js"; import { GrafanaClientRegistry } from "./src/grafana-client-registry.js"; import { createMetricsCollectorService } from "./src/services/metrics-collector.js"; import { PLUGIN_VERSION } from "./src/version.js"; import { createAlertWebhookService } from "./src/services/alert-webhook.js"; import { createDashboardToolFactory } from "./src/tools/create-dashboard.js"; import { createQueryToolFactory } from "./src/tools/query.js"; import { createAlertToolFactory } from "./src/tools/create-alert.js"; import { createShareDashboardToolFactory } from "./src/tools/share-dashboard.js"; import { createAnnotateToolFactory } from "./src/tools/annotate.js"; import { createExploreDatasourcesToolFactory } from "./src/tools/explore-datasources.js"; import { createListMetricsToolFactory } from "./src/tools/list-metrics.js"; import { createSearchToolFactory } from "./src/tools/search.js"; import { createGetDashboardToolFactory } from "./src/tools/get-dashboard.js"; import { createCheckAlertsToolFactory } from "./src/tools/check-alerts.js"; import { createPushMetricsToolFactory } from "./src/tools/push-metrics.js"; import { createQueryLogsToolFactory } from "./src/tools/query-logs.js"; import { createUpdateDashboardToolFactory } from "./src/tools/update-dashboard.js"; import { createExplainMetricToolFactory } from "./src/tools/explain-metric.js"; import { createSecurityCheckToolFactory } from "./src/tools/security-check.js"; import { createQueryTracesToolFactory } from "./src/tools/query-traces.js"; import { createInvestigateToolFactory } from "./src/tools/investigate.js"; import { createAlloyService } from "./src/services/alloy-service.js"; import { createAlloyPipelineToolFactory } from "./src/tools/alloy-pipeline.js"; const plugin = { id: "openclaw-grafana-lens", name: "Grafana Lens", description: "Agent-driven Grafana — query, visualize, alert, and deliver to messaging channels", register(api) { // ── Parse plugin config (never throws — credentials may be missing) ── const config = parseConfig(api.pluginConfig); if (config._warnings) { for (const w of config._warnings) api.logger.warn(`grafana-lens: ${w}`); } const validation = validateConfig(config); if (!validation.valid) { api.logger.warn(`grafana-lens: plugin loaded but Grafana tools are disabled — ${validation.errors.join("; ")}`); return; } const validConfig = validation.config; // ── Create client registry (one GrafanaClient per configured instance) ── const registry = new GrafanaClientRegistry(validConfig); const instances = registry.listInstances(); if (instances.length === 1) { api.logger.info(`grafana-lens: connecting to ${instances[0].url}`); } else { api.logger.info(`grafana-lens: ${instances.length} instances configured — default: "${registry.getDefaultName()}"`); } // ── Verify Grafana connectivity on startup (all instances in parallel) ── void Promise.allSettled(instances.map(async (inst) => { const tag = registry.isMultiInstance() ? `[${inst.name}] ` : ""; try { const ok = await registry.get(inst.name).healthCheck(); if (ok) { api.logger.info(`grafana-lens: ${tag}Grafana connection verified`); } else { api.logger.warn(`grafana-lens: ${tag}could not reach Grafana at ${inst.url} — tools will fail until connectivity is restored`); } } catch (err) { api.logger.warn(`grafana-lens: ${tag}failed to verify connection: ${err instanceof Error ? err.message : err}`); } })); // ── Register alert webhook service ─────────────────────────────── const { service: alertService, store: alertStore } = createAlertWebhookService(validConfig, api.registerHttpRoute.bind(api)); api.registerService(alertService); // ── Register metrics collector service ────────────────────────── const { service: metricsService, getCustomMetricsStore, getLifecycleTelemetry } = createMetricsCollectorService(validConfig, alertStore, PLUGIN_VERSION); api.registerService(metricsService); // ── Register agent tools ──────────────────────────────────────── api.registerTool(createDashboardToolFactory(registry)); api.registerTool(createQueryToolFactory(registry)); api.registerTool(createAlertToolFactory(registry)); api.registerTool(createShareDashboardToolFactory(registry)); api.registerTool(createAnnotateToolFactory(registry)); api.registerTool(createExploreDatasourcesToolFactory(registry)); api.registerTool(createListMetricsToolFactory(registry, getCustomMetricsStore)); api.registerTool(createSearchToolFactory(registry)); api.registerTool(createGetDashboardToolFactory(registry)); api.registerTool(createCheckAlertsToolFactory(registry, alertStore)); api.registerTool(createPushMetricsToolFactory(registry, getCustomMetricsStore)); api.registerTool(createQueryLogsToolFactory(registry)); api.registerTool(createUpdateDashboardToolFactory(registry)); api.registerTool(createExplainMetricToolFactory(registry)); api.registerTool(createSecurityCheckToolFactory(registry)); api.registerTool(createQueryTracesToolFactory(registry)); api.registerTool(createInvestigateToolFactory(registry, alertStore)); // ── Register Alloy pipeline service + tool (opt-in) ──────────── const alloyConfig = validConfig.alloy; if (alloyConfig?.enabled && alloyConfig.configDir) { const { service: alloyService, getClient, getStore, getExportTargets } = createAlloyService(alloyConfig, validConfig.otlp); api.registerService(alloyService); api.registerTool(createAlloyPipelineToolFactory({ getClient, getStore, getExportTargets })); api.logger.info("grafana-lens: Alloy pipeline management enabled"); } // ── Register before_agent_start hook for alert awareness ───────── api.on("before_agent_start", (_event, _ctx) => { const pending = alertStore.getPendingAlerts(); if (pending.length === 0) return; const summary = pending .map((a) => `- [${a.status.toUpperCase()}] ${a.title} (${new Date(a.receivedAt).toISOString()})`) .join("\n"); return { prependContext: `GRAFANA ALERTS (${pending.length} pending):\n${summary}\nUse grafana_check_alerts to see details, then investigate with grafana_query.\n`, }; }); // ══════════════════════════════════════════════════════════════════ // LIFECYCLE HOOKS — gen_ai-compliant session-scoped traces // Only registered when metrics/telemetry is enabled (default: true). // Follows memory-lancedb pattern: conditional hook registration. // When metrics disabled, skip registration entirely — avoids OpenClaw // invoking handlers and passing event data (LLM inputs, session state) // for users who explicitly opted out of telemetry. // ══════════════════════════════════════════════════════════════════ if (validConfig.metrics?.enabled !== false) { // Count successful hook registrations so the startup log reports the real // number (not a hardcoded count that drifts as hooks are added/removed). // Also surfaces per-hook errors if api.on throws or is unavailable. const registered = []; const registerHookSafe = (name, handler) => { try { api.on(name, handler); registered.push(name); } catch (err) { api.logger.warn(`grafana-lens: failed to register "${name}" hook: ${getErrorMessage(err)}`); } }; // Helper for hooks not yet in the published PluginHookName type — accepts // async handlers too so Promise-returning callbacks aren't silently dropped. const apiOn = (name, handler) => registerHookSafe(name, handler); // ── TIER 1: Critical (root-cause analysis + gen_ai compliance) ──── registerHookSafe("session_start", (event, ctx) => { getLifecycleTelemetry()?.onSessionStart(event, ctx); }); registerHookSafe("session_end", async (event, ctx) => { getLifecycleTelemetry()?.onSessionEnd(event, ctx); // Flush session summary log + trace spans immediately (avoid batch delay data loss) await getLifecycleTelemetry()?.flushAll(); }); registerHookSafe("llm_input", (event, ctx) => { getLifecycleTelemetry()?.onLlmInput(event, ctx); }); registerHookSafe("llm_output", (event, ctx) => { getLifecycleTelemetry()?.onLlmOutput(event, ctx); }); registerHookSafe("agent_end", async (event, ctx) => { getLifecycleTelemetry()?.onAgentEnd(event, ctx); // Flush FINAL summary log + closed root span immediately (same pattern as session_end) await getLifecycleTelemetry()?.flushAll(); }); // ── TIER 2: High value (operational awareness) ──────────────────── registerHookSafe("message_received", (event, ctx) => { getLifecycleTelemetry()?.onMessageReceived(event, ctx); }); registerHookSafe("message_sent", (event, ctx) => { getLifecycleTelemetry()?.onMessageSent(event, ctx); }); registerHookSafe("before_compaction", (event, ctx) => { getLifecycleTelemetry()?.onBeforeCompaction(event, ctx); }); registerHookSafe("after_compaction", (event, ctx) => { getLifecycleTelemetry()?.onAfterCompaction(event, ctx); }); // ── TIER 3: Multi-agent visibility ──────────────────────────────── // subagent_spawned/subagent_ended exist in openclaw source but are not yet // in the published npm PluginHookName type — cast needed (same as tool.loop pattern) apiOn("subagent_spawned", (event, ctx) => { getLifecycleTelemetry()?.onSubagentSpawned(event, ctx); }); apiOn("subagent_ended", (event, ctx) => { getLifecycleTelemetry()?.onSubagentEnded(event, ctx); }); // ── UPGRADED: after_tool_call → gen_ai execute_tool convention ──── registerHookSafe("after_tool_call", (event, ctx) => { getLifecycleTelemetry()?.onAfterToolCall(event, ctx); }); // ── TIER 4: New SRE hooks (Part 5) ───────────────────────────────── // before_reset — session context wipe detection apiOn("before_reset", (event, ctx) => { getLifecycleTelemetry()?.onBeforeReset(event, ctx); }); // before_tool_call — tool invocation intent + span pairing apiOn("before_tool_call", (event, ctx) => { getLifecycleTelemetry()?.onBeforeToolCall(event, ctx); }); // gateway_start — infrastructure availability apiOn("gateway_start", (event) => { getLifecycleTelemetry()?.onGatewayStart(event); }); // gateway_stop — infrastructure shutdown apiOn("gateway_stop", (event) => { getLifecycleTelemetry()?.onGatewayStop(event); }); api.logger.info(`grafana-lens: registered 17 tools, services, and ${registered.length} lifecycle hooks [${registered.join(", ")}]`); } else { api.logger.info("grafana-lens: registered 17 tools and services (lifecycle hooks skipped — metrics disabled)"); } }, }; export default plugin;