openclaw-grafana-lens
Version:
OpenClaw plugin that gives AI agents full Grafana access — 18 composable tools for PromQL/LogQL/TraceQL queries, dashboard creation, alerting, SRE investigation, security monitoring, data collection pipeline management via Grafana Alloy (29 recipes), and
294 lines (293 loc) • 9.23 kB
TypeScript
/**
* Lifecycle Telemetry — gen_ai-compliant session-scoped traces
*
* Converts OpenClaw plugin hook events into hierarchical OTel traces
* following the gen_ai semantic conventions (v1.27.0+).
*
* Trace hierarchy (all spans share the same trace_id per session):
* invoke_agent openclaw (root, INTERNAL)
* +-- chat {model} (CLIENT)
* +-- execute_tool {toolName} (INTERNAL, sibling of chat)
* +-- chat {model} (CLIENT, next turn)
* +-- openclaw.compaction (INTERNAL)
* +-- openclaw.subagent.spawn {agentId} (INTERNAL)
* +-- openclaw.agent.end (INTERNAL)
*
* Context propagation: Explicit — no global TracerProvider.
* Uses trace.setSpan(ROOT_CONTEXT, parentSpan) + 3rd arg to startSpan().
*
* Also records gen_ai standard metrics:
* - gen_ai.client.token.usage (histogram)
* - gen_ai.client.operation.duration (histogram, seconds)
*/
import type { Span, Context } from "@opentelemetry/api";
import type { Counter, Histogram } from "@opentelemetry/api";
import type { OtelTraces } from "./otel-traces.js";
import type { OtelLogs } from "./otel-logs.js";
export type SessionStartEvent = {
sessionId: string;
resumedFrom?: string;
};
export type SessionStartCtx = {
agentId?: string;
sessionId: string;
};
export type SessionEndEvent = {
sessionId: string;
messageCount: number;
durationMs?: number;
};
export type SessionEndCtx = {
agentId?: string;
sessionId: string;
};
export type LlmInputEvent = {
runId: string;
sessionId: string;
provider: string;
model: string;
systemPrompt?: string;
prompt: string;
historyMessages: unknown[];
imagesCount: number;
};
export type LlmInputCtx = {
agentId?: string;
sessionKey?: string;
sessionId?: string;
};
export type LlmOutputEvent = {
runId: string;
sessionId: string;
provider: string;
model: string;
assistantTexts: string[];
lastAssistant?: unknown;
usage?: {
input?: number;
output?: number;
cacheRead?: number;
cacheWrite?: number;
total?: number;
};
};
export type LlmOutputCtx = {
agentId?: string;
sessionKey?: string;
sessionId?: string;
};
export type AgentEndEvent = {
messages: unknown[];
success: boolean;
error?: string;
durationMs?: number;
};
export type AgentEndCtx = {
agentId?: string;
sessionKey?: string;
sessionId?: string;
};
export type MessageReceivedEvent = {
from: string;
content: string;
timestamp?: number;
metadata?: Record<string, unknown>;
};
export type MessageReceivedCtx = {
channelId: string;
accountId?: string;
conversationId?: string;
};
export type MessageSentEvent = {
to: string;
content: string;
success: boolean;
error?: string;
};
export type MessageSentCtx = {
channelId: string;
accountId?: string;
conversationId?: string;
};
export type BeforeCompactionEvent = {
messageCount: number;
compactingCount?: number;
tokenCount?: number;
sessionFile?: string;
};
export type BeforeCompactionCtx = {
agentId?: string;
sessionKey?: string;
sessionId?: string;
};
export type AfterCompactionEvent = {
messageCount: number;
compactedCount: number;
tokenCount?: number;
sessionFile?: string;
};
export type AfterCompactionCtx = {
agentId?: string;
sessionKey?: string;
sessionId?: string;
};
export type SubagentSpawnedEvent = {
runId: string;
childSessionKey: string;
agentId: string;
label?: string;
mode: string;
requester?: string;
threadRequested: boolean;
};
export type SubagentSpawnedCtx = {
runId?: string;
childSessionKey?: string;
requesterSessionKey?: string;
};
export type SubagentEndedEvent = {
targetSessionKey: string;
targetKind: string;
reason: string;
outcome?: string;
error?: string;
endedAt?: string;
};
export type SubagentEndedCtx = {
runId?: string;
childSessionKey?: string;
requesterSessionKey?: string;
};
export type AfterToolCallEvent = {
toolName: string;
params: Record<string, unknown>;
result?: unknown;
error?: string;
durationMs?: number;
};
export type AfterToolCallCtx = {
agentId?: string;
sessionKey?: string;
toolName: string;
toolCallId?: string;
};
export type BeforeResetEvent = {
sessionFile?: string;
messages?: unknown[];
reason?: string;
};
export type BeforeResetCtx = {
agentId?: string;
sessionKey?: string;
sessionId?: string;
};
export type BeforeToolCallEvent = {
toolName: string;
params: Record<string, unknown>;
};
export type BeforeToolCallCtx = {
agentId?: string;
sessionKey?: string;
toolName: string;
};
export type GatewayStartEvent = {
port?: number;
};
export type GatewayStopEvent = {
reason?: string;
};
export type LifecycleInstruments = {
tokenUsage: Histogram;
operationDuration: Histogram;
sessionsStartedTotal: Counter;
sessionsCompleted: Counter;
sessionDurationMs: Histogram;
compactionsTotal: Counter;
compactionMessagesRemoved: Histogram;
subagentsSpawnedTotal: Counter;
subagentOutcomesTotal: Counter;
subagentDurationMs: Histogram;
messageDeliveryTotal: Counter;
toolCallsTotal: Counter;
toolDurationMs: Histogram;
costByModel: Counter;
sessionMessageTypes: Counter;
gatewayRestarts: Counter;
sessionResets: Counter;
toolErrorClasses: Counter;
promptInjectionSignals: Counter;
traceFallbackSpans: Counter;
};
export type LifecycleTelemetry = {
onSessionStart(event: SessionStartEvent, ctx: SessionStartCtx): void;
onSessionEnd(event: SessionEndEvent, ctx: SessionEndCtx): void;
onLlmInput(event: LlmInputEvent, ctx: LlmInputCtx): void;
onLlmOutput(event: LlmOutputEvent, ctx: LlmOutputCtx): void;
onAgentEnd(event: AgentEndEvent, ctx: AgentEndCtx): void;
onMessageReceived(event: MessageReceivedEvent, ctx: MessageReceivedCtx): void;
onMessageSent(event: MessageSentEvent, ctx: MessageSentCtx): void;
onBeforeCompaction(event: BeforeCompactionEvent, ctx: BeforeCompactionCtx): void;
onAfterCompaction(event: AfterCompactionEvent, ctx: AfterCompactionCtx): void;
onSubagentSpawned(event: SubagentSpawnedEvent, ctx: SubagentSpawnedCtx): void;
onSubagentEnded(event: SubagentEndedEvent, ctx: SubagentEndedCtx): void;
onAfterToolCall(event: AfterToolCallEvent, ctx: AfterToolCallCtx): void;
onBeforeReset(event: BeforeResetEvent, ctx: BeforeResetCtx): void;
onBeforeToolCall(event: BeforeToolCallEvent, ctx: BeforeToolCallCtx): void;
onGatewayStart(event: GatewayStartEvent): void;
onGatewayStop(event: GatewayStopEvent): void;
/** Get session trace context for parenting external spans (e.g., tool calls) */
getSessionContext(sessionKey: string): {
span: Span;
ctx: Context;
} | undefined;
/** Get session trace context by sessionId or sessionKey (for cross-pipeline correlation) */
getSessionContextByAny(sessionId?: string, sessionKey?: string): {
span: Span;
ctx: Context;
} | undefined;
/** Get rolling average LLM latency in ms (for ObservableGauge in metrics-collector) */
getAvgLatencyMs(): number;
/** Get unique session count in the last 1h (for security ObservableGauge) */
getUniqueSessionCount1h(): number;
/** Flush all pending logs and traces to the backend (call after critical events) */
flushAll(): Promise<void>;
/** Clean up all state (call on service stop) */
destroy(): void;
};
/** Extract the actual response model from lastAssistant (may differ from request model) */
export declare function extractResponseModel(lastAssistant: unknown): string | undefined;
export declare function extractFinishReason(lastAssistant: unknown): string | undefined;
export type LifecycleTelemetryOpts = {
agentVersion?: string;
captureContent?: boolean;
contentMaxLength?: number;
redactSecrets?: boolean;
/** Estimate cost from provider/model/usage — replaces accumulateCost from diagnostic events */
costEstimator?: (provider?: string, model?: string, usage?: {
input?: number;
output?: number;
cacheRead?: number;
cacheWrite?: number;
}) => number | undefined;
/** Subscribe to model.usage diagnostic events for dual-path trace fallback */
onDiagnosticEvent?: (listener: (evt: {
type: string;
sessionKey?: string;
sessionId?: string;
provider?: string;
model?: string;
usage: {
input?: number;
output?: number;
cacheRead?: number;
cacheWrite?: number;
};
durationMs?: number;
costUsd?: number;
context?: {
limit?: number;
used?: number;
};
}) => void) => () => void;
};
export declare function createLifecycleTelemetry(traces: OtelTraces, logs: OtelLogs, instruments: LifecycleInstruments, opts?: LifecycleTelemetryOpts): LifecycleTelemetry;