UNPKG

openclaw-grafana-lens

Version:

OpenClaw plugin that gives AI agents full Grafana access — 18 composable tools for PromQL/LogQL/TraceQL queries, dashboard creation, alerting, SRE investigation, security monitoring, data collection pipeline management via Grafana Alloy (29 recipes), and

294 lines (293 loc) 9.23 kB
/** * Lifecycle Telemetry gen_ai-compliant session-scoped traces * * Converts OpenClaw plugin hook events into hierarchical OTel traces * following the gen_ai semantic conventions (v1.27.0+). * * Trace hierarchy (all spans share the same trace_id per session): * invoke_agent openclaw (root, INTERNAL) * +-- chat {model} (CLIENT) * +-- execute_tool {toolName} (INTERNAL, sibling of chat) * +-- chat {model} (CLIENT, next turn) * +-- openclaw.compaction (INTERNAL) * +-- openclaw.subagent.spawn {agentId} (INTERNAL) * +-- openclaw.agent.end (INTERNAL) * * Context propagation: Explicit no global TracerProvider. * Uses trace.setSpan(ROOT_CONTEXT, parentSpan) + 3rd arg to startSpan(). * * Also records gen_ai standard metrics: * - gen_ai.client.token.usage (histogram) * - gen_ai.client.operation.duration (histogram, seconds) */ import type { Span, Context } from "@opentelemetry/api"; import type { Counter, Histogram } from "@opentelemetry/api"; import type { OtelTraces } from "./otel-traces.js"; import type { OtelLogs } from "./otel-logs.js"; export type SessionStartEvent = { sessionId: string; resumedFrom?: string; }; export type SessionStartCtx = { agentId?: string; sessionId: string; }; export type SessionEndEvent = { sessionId: string; messageCount: number; durationMs?: number; }; export type SessionEndCtx = { agentId?: string; sessionId: string; }; export type LlmInputEvent = { runId: string; sessionId: string; provider: string; model: string; systemPrompt?: string; prompt: string; historyMessages: unknown[]; imagesCount: number; }; export type LlmInputCtx = { agentId?: string; sessionKey?: string; sessionId?: string; }; export type LlmOutputEvent = { runId: string; sessionId: string; provider: string; model: string; assistantTexts: string[]; lastAssistant?: unknown; usage?: { input?: number; output?: number; cacheRead?: number; cacheWrite?: number; total?: number; }; }; export type LlmOutputCtx = { agentId?: string; sessionKey?: string; sessionId?: string; }; export type AgentEndEvent = { messages: unknown[]; success: boolean; error?: string; durationMs?: number; }; export type AgentEndCtx = { agentId?: string; sessionKey?: string; sessionId?: string; }; export type MessageReceivedEvent = { from: string; content: string; timestamp?: number; metadata?: Record<string, unknown>; }; export type MessageReceivedCtx = { channelId: string; accountId?: string; conversationId?: string; }; export type MessageSentEvent = { to: string; content: string; success: boolean; error?: string; }; export type MessageSentCtx = { channelId: string; accountId?: string; conversationId?: string; }; export type BeforeCompactionEvent = { messageCount: number; compactingCount?: number; tokenCount?: number; sessionFile?: string; }; export type BeforeCompactionCtx = { agentId?: string; sessionKey?: string; sessionId?: string; }; export type AfterCompactionEvent = { messageCount: number; compactedCount: number; tokenCount?: number; sessionFile?: string; }; export type AfterCompactionCtx = { agentId?: string; sessionKey?: string; sessionId?: string; }; export type SubagentSpawnedEvent = { runId: string; childSessionKey: string; agentId: string; label?: string; mode: string; requester?: string; threadRequested: boolean; }; export type SubagentSpawnedCtx = { runId?: string; childSessionKey?: string; requesterSessionKey?: string; }; export type SubagentEndedEvent = { targetSessionKey: string; targetKind: string; reason: string; outcome?: string; error?: string; endedAt?: string; }; export type SubagentEndedCtx = { runId?: string; childSessionKey?: string; requesterSessionKey?: string; }; export type AfterToolCallEvent = { toolName: string; params: Record<string, unknown>; result?: unknown; error?: string; durationMs?: number; }; export type AfterToolCallCtx = { agentId?: string; sessionKey?: string; toolName: string; toolCallId?: string; }; export type BeforeResetEvent = { sessionFile?: string; messages?: unknown[]; reason?: string; }; export type BeforeResetCtx = { agentId?: string; sessionKey?: string; sessionId?: string; }; export type BeforeToolCallEvent = { toolName: string; params: Record<string, unknown>; }; export type BeforeToolCallCtx = { agentId?: string; sessionKey?: string; toolName: string; }; export type GatewayStartEvent = { port?: number; }; export type GatewayStopEvent = { reason?: string; }; export type LifecycleInstruments = { tokenUsage: Histogram; operationDuration: Histogram; sessionsStartedTotal: Counter; sessionsCompleted: Counter; sessionDurationMs: Histogram; compactionsTotal: Counter; compactionMessagesRemoved: Histogram; subagentsSpawnedTotal: Counter; subagentOutcomesTotal: Counter; subagentDurationMs: Histogram; messageDeliveryTotal: Counter; toolCallsTotal: Counter; toolDurationMs: Histogram; costByModel: Counter; sessionMessageTypes: Counter; gatewayRestarts: Counter; sessionResets: Counter; toolErrorClasses: Counter; promptInjectionSignals: Counter; traceFallbackSpans: Counter; }; export type LifecycleTelemetry = { onSessionStart(event: SessionStartEvent, ctx: SessionStartCtx): void; onSessionEnd(event: SessionEndEvent, ctx: SessionEndCtx): void; onLlmInput(event: LlmInputEvent, ctx: LlmInputCtx): void; onLlmOutput(event: LlmOutputEvent, ctx: LlmOutputCtx): void; onAgentEnd(event: AgentEndEvent, ctx: AgentEndCtx): void; onMessageReceived(event: MessageReceivedEvent, ctx: MessageReceivedCtx): void; onMessageSent(event: MessageSentEvent, ctx: MessageSentCtx): void; onBeforeCompaction(event: BeforeCompactionEvent, ctx: BeforeCompactionCtx): void; onAfterCompaction(event: AfterCompactionEvent, ctx: AfterCompactionCtx): void; onSubagentSpawned(event: SubagentSpawnedEvent, ctx: SubagentSpawnedCtx): void; onSubagentEnded(event: SubagentEndedEvent, ctx: SubagentEndedCtx): void; onAfterToolCall(event: AfterToolCallEvent, ctx: AfterToolCallCtx): void; onBeforeReset(event: BeforeResetEvent, ctx: BeforeResetCtx): void; onBeforeToolCall(event: BeforeToolCallEvent, ctx: BeforeToolCallCtx): void; onGatewayStart(event: GatewayStartEvent): void; onGatewayStop(event: GatewayStopEvent): void; /** Get session trace context for parenting external spans (e.g., tool calls) */ getSessionContext(sessionKey: string): { span: Span; ctx: Context; } | undefined; /** Get session trace context by sessionId or sessionKey (for cross-pipeline correlation) */ getSessionContextByAny(sessionId?: string, sessionKey?: string): { span: Span; ctx: Context; } | undefined; /** Get rolling average LLM latency in ms (for ObservableGauge in metrics-collector) */ getAvgLatencyMs(): number; /** Get unique session count in the last 1h (for security ObservableGauge) */ getUniqueSessionCount1h(): number; /** Flush all pending logs and traces to the backend (call after critical events) */ flushAll(): Promise<void>; /** Clean up all state (call on service stop) */ destroy(): void; }; /** Extract the actual response model from lastAssistant (may differ from request model) */ export declare function extractResponseModel(lastAssistant: unknown): string | undefined; export declare function extractFinishReason(lastAssistant: unknown): string | undefined; export type LifecycleTelemetryOpts = { agentVersion?: string; captureContent?: boolean; contentMaxLength?: number; redactSecrets?: boolean; /** Estimate cost from provider/model/usage replaces accumulateCost from diagnostic events */ costEstimator?: (provider?: string, model?: string, usage?: { input?: number; output?: number; cacheRead?: number; cacheWrite?: number; }) => number | undefined; /** Subscribe to model.usage diagnostic events for dual-path trace fallback */ onDiagnosticEvent?: (listener: (evt: { type: string; sessionKey?: string; sessionId?: string; provider?: string; model?: string; usage: { input?: number; output?: number; cacheRead?: number; cacheWrite?: number; }; durationMs?: number; costUsd?: number; context?: { limit?: number; used?: number; }; }) => void) => () => void; }; export declare function createLifecycleTelemetry(traces: OtelTraces, logs: OtelLogs, instruments: LifecycleInstruments, opts?: LifecycleTelemetryOpts): LifecycleTelemetry;