openclaw-grafana-lens
Version:
OpenClaw plugin that gives AI agents full Grafana access — 18 composable tools for PromQL/LogQL/TraceQL queries, dashboard creation, alerting, SRE investigation, security monitoring, data collection pipeline management via Grafana Alloy (29 recipes), and
247 lines (246 loc) • 12.8 kB
JavaScript
/**
* grafana_security_check tool
*
* Runs a comprehensive security health check against Grafana metrics and logs.
* Executes 6 PromQL + 1 LogQL queries in parallel using Promise.allSettled,
* then returns a unified threat-level report.
*
* Detection-only: never blocks, terminates, or modifies anything.
* Honest about limitations: auth failures are invisible to this tool.
*/
import { jsonResult, readStringParam } from "../sdk-compat.js";
import { instanceProperties } from "./instance-param.js";
const PROM_CHECKS = [
{
name: "webhook_error_ratio",
expr: () => "sum(rate(openclaw_lens_webhook_error_total[5m])) / (sum(rate(openclaw_lens_webhook_received_total[5m])) + 0.001)",
evaluate: (v) => {
const pct = Math.round(v * 100);
if (v >= 0.5)
return { status: "red", detail: `${pct}% of webhooks failing (critical at 50%)` };
if (v >= 0.2)
return { status: "yellow", detail: `${pct}% of webhooks failing (warning at 20%)` };
return { status: "green", detail: `${pct}% webhook error ratio (healthy)` };
},
},
{
name: "cost_anomaly",
expr: () => "openclaw_lens_daily_cost_usd",
evaluate: (v) => {
const cost = `$${v.toFixed(2)}`;
if (v >= 50)
return { status: "red", detail: `Daily cost ${cost} (critical at $50)` };
if (v >= 10)
return { status: "yellow", detail: `Daily cost ${cost} (warning at $10)` };
return { status: "green", detail: `Daily cost ${cost} (healthy)` };
},
},
{
name: "tool_loops",
expr: () => "sum(openclaw_lens_tool_loops_active) or vector(0)",
evaluate: (v) => {
if (v >= 3)
return { status: "red", detail: `${v} active tool loops (critical at 3)` };
if (v >= 1)
return { status: "yellow", detail: `${v} active tool loop(s) (warning at 1)` };
return { status: "green", detail: "No active tool loops" };
},
},
{
name: "injection_signals",
expr: (lb) => `sum(increase(openclaw_lens_prompt_injection_signals_total[${lb}])) or vector(0)`,
evaluate: (v) => {
const n = Math.round(v);
if (n >= 5)
return { status: "red", detail: `${n} prompt injection patterns detected (critical at 5)` };
if (n >= 1)
return { status: "yellow", detail: `${n} prompt injection pattern(s) detected (warning at 1)` };
return { status: "green", detail: "No prompt injection signals" };
},
},
{
name: "session_enumeration",
expr: () => "sum(openclaw_lens_unique_sessions_1h) or vector(0)",
evaluate: (v) => {
const n = Math.round(v);
if (n >= 200)
return { status: "red", detail: `${n} unique sessions in 1h (critical at 200)` };
if (n >= 50)
return { status: "yellow", detail: `${n} unique sessions in 1h (warning at 50)` };
return { status: "green", detail: `${n} unique session(s) in 1h (healthy)` };
},
},
{
name: "stuck_sessions",
expr: () => "sum(openclaw_lens_sessions_stuck) or vector(0)",
evaluate: (v) => {
const n = Math.round(v);
if (n >= 3)
return { status: "red", detail: `${n} stuck sessions (critical at 3)` };
if (n >= 1)
return { status: "yellow", detail: `${n} stuck session(s) (warning at 1)` };
return { status: "green", detail: "No stuck sessions" };
},
},
];
/** Determine overall threat level from individual check statuses. */
function overallLevel(checks) {
if (checks.some((c) => c.status === "red"))
return "red";
if (checks.some((c) => c.status === "yellow"))
return "yellow";
return "green";
}
export function createSecurityCheckToolFactory(registry) {
return (_ctx) => ({
name: "grafana_security_check",
label: "Security Check",
description: [
"WORKFLOW: Run a comprehensive security health check against Grafana metrics and logs.",
"Use when asked 'am I being attacked?', 'security status', 'security audit', or 'security check'.",
"Runs 6 parallel checks: webhook error patterns, cost anomalies, tool loops,",
"prompt injection signals, session enumeration, and stuck sessions.",
"Returns structured report with threat level (green/yellow/red) per check,",
"suggested actions, and honest limitations.",
].join(" "),
parameters: {
type: "object",
properties: {
...instanceProperties(registry),
lookback: {
type: "string",
description: "Time window to analyze. Default '1h'. Use '24h' for daily review, '7d' for weekly.",
},
},
},
async execute(_toolCallId, params) {
const client = registry.get(readStringParam(params, "instance"));
const lookback = readStringParam(params, "lookback") ?? "1h";
// Fix 3: Validate lookback to prevent PromQL injection
if (!/^\d+[smhdwy]$/.test(lookback)) {
return jsonResult({ error: `Invalid lookback "${lookback}" — use a Prometheus duration like "1h", "24h", "7d"` });
}
try {
// Auto-discover datasources
const datasources = await client.listDatasources();
const promDs = datasources.find((d) => d.type === "prometheus");
const lokiDs = datasources.find((d) => d.type === "loki");
if (!promDs) {
return jsonResult({
error: "No Prometheus datasource found — security check requires Prometheus for metric queries. Use grafana_explore_datasources to verify.",
});
}
// Fix 5: Run PromQL checks and Loki query in parallel
const [promResults, lokiResult] = await Promise.all([
Promise.allSettled(PROM_CHECKS.map(async (check) => {
// Fix 4: Use typed PrometheusInstantResult directly
const result = await client.queryPrometheus(promDs.uid, check.expr(lookback));
const first = result.data.result[0];
const parsed = first ? parseFloat(first.value[1]) : 0;
const value = isNaN(parsed) ? 0 : parsed;
const { status, detail } = check.evaluate(value);
return { name: check.name, status, value, detail };
})),
lokiDs
? client.queryLokiRange(lokiDs.uid, `{service_name="openclaw"} | json | event_name=~"prompt_injection.detected|gateway.start|tool.loop"`, `now-${lookback}`, "now", { limit: 100 }).catch(() => null)
: Promise.resolve(null),
]);
// Extract successful check results, mark failures
const checks = [];
const limitations = [
"Auth failures not observable — openclaw auth middleware emits no diagnostic events. Monitor gateway access logs manually.",
];
for (let i = 0; i < promResults.length; i++) {
const settled = promResults[i];
if (settled.status === "fulfilled") {
checks.push(settled.value);
}
else {
// Fix 2: Failed queries report "yellow" (unknown) not "green" (safe)
checks.push({
name: PROM_CHECKS[i].name,
status: "yellow",
value: -1,
detail: `Unable to assess — query failed: ${settled.reason instanceof Error ? settled.reason.message : String(settled.reason)}`,
});
limitations.push(`${PROM_CHECKS[i].name} check failed — query error`);
}
}
// Extract Loki security event count
let securityEventCount = 0;
if (lokiDs) {
if (lokiResult) {
const logData = lokiResult;
if (logData?.data?.result) {
for (const stream of logData.data.result) {
securityEventCount += stream.values?.length ?? 0;
}
}
}
else if (lokiResult === null) {
// Loki query was attempted but failed (caught in Promise.all)
limitations.push("Loki log check failed — security event logs unavailable");
}
}
else {
limitations.push("No Loki datasource — security event log correlation unavailable");
}
// Build summary
const warnings = checks.filter((c) => c.status === "yellow");
const criticals = checks.filter((c) => c.status === "red");
const parts = [];
if (criticals.length > 0)
parts.push(`${criticals.length} critical: ${criticals.map((c) => c.detail).join("; ")}`);
if (warnings.length > 0)
parts.push(`${warnings.length} warning(s): ${warnings.map((c) => c.detail).join("; ")}`);
if (parts.length === 0)
parts.push("All checks green — no threats detected");
const summary = parts.join(". ");
// Build suggested actions based on findings
const suggestedActions = [];
for (const check of checks) {
if (check.status === "green")
continue;
switch (check.name) {
case "webhook_error_ratio":
suggestedActions.push("Investigate webhook errors: use grafana_query_logs with '{service_name=\"openclaw\"} | json | component=\"diagnostic\" | event_name=\"webhook.error\"'");
break;
case "cost_anomaly":
suggestedActions.push("Investigate cost spike: use grafana_explain_metric on 'openclaw_lens_daily_cost_usd' and check 'openclaw_lens_cost_by_model_total' for model attribution");
break;
case "tool_loops":
suggestedActions.push("Investigate tool loops: use grafana_query_logs with '{service_name=\"openclaw\"} | json | event_name=\"tool.loop\"'");
break;
case "injection_signals":
suggestedActions.push("Review injection patterns: use grafana_query_logs with '{service_name=\"openclaw\"} | json | event_name=\"prompt_injection.detected\"'");
break;
case "session_enumeration":
suggestedActions.push("Review session volume: check recent sessions for unusual patterns, consider rotating API tokens");
break;
case "stuck_sessions":
suggestedActions.push("Investigate stuck sessions: use grafana_query on 'openclaw_lens_stuck_session_max_age_ms' and check recent session logs");
break;
}
}
if (suggestedActions.length === 0) {
suggestedActions.push("No immediate actions needed. Consider setting up security alerts with grafana_create_alert for proactive monitoring.");
}
return jsonResult({
overallThreatLevel: overallLevel(checks),
summary,
checks,
securityEventLogs: securityEventCount,
limitations,
suggestedActions,
dashboardTemplate: "security-overview",
});
}
catch (err) {
const reason = err instanceof Error ? err.message : String(err);
return jsonResult({
error: `Security check failed: ${reason}`,
});
}
},
});
}