UNPKG

@ai2070/l0

Version:

L0: The Missing Reliability Substrate for AI

387 lines 14.2 kB
import { SpanStatusCode, SpanKind } from "@opentelemetry/api"; import { EventType } from "../types/observability"; export { SpanStatusCode, SpanKind }; export const SemanticAttributes = { LLM_SYSTEM: "gen_ai.system", LLM_REQUEST_MODEL: "gen_ai.request.model", LLM_RESPONSE_MODEL: "gen_ai.response.model", LLM_REQUEST_MAX_TOKENS: "gen_ai.request.max_tokens", LLM_REQUEST_TEMPERATURE: "gen_ai.request.temperature", LLM_REQUEST_TOP_P: "gen_ai.request.top_p", LLM_RESPONSE_FINISH_REASON: "gen_ai.response.finish_reasons", LLM_USAGE_INPUT_TOKENS: "gen_ai.usage.input_tokens", LLM_USAGE_OUTPUT_TOKENS: "gen_ai.usage.output_tokens", L0_SESSION_ID: "l0.session_id", L0_STREAM_COMPLETED: "l0.stream.completed", L0_FALLBACK_INDEX: "l0.fallback.index", L0_RETRY_COUNT: "l0.retry.count", L0_NETWORK_ERROR_COUNT: "l0.network.error_count", L0_GUARDRAIL_VIOLATION_COUNT: "l0.guardrail.violation_count", L0_DRIFT_DETECTED: "l0.drift.detected", L0_TIME_TO_FIRST_TOKEN: "l0.time_to_first_token_ms", L0_TOKENS_PER_SECOND: "l0.tokens_per_second", }; export class L0OpenTelemetry { tracer; meter; config; requestCounter; tokenCounter; retryCounter; errorCounter; durationHistogram; ttftHistogram; activeStreamsGauge; activeStreams = 0; constructor(config) { this.tracer = config.tracer; this.meter = config.meter; this.config = { serviceName: config.serviceName ?? "l0", traceTokens: config.traceTokens ?? false, recordTokenContent: config.recordTokenContent ?? false, recordGuardrailViolations: config.recordGuardrailViolations ?? true, defaultAttributes: config.defaultAttributes, }; if (this.meter) { this.initializeMetrics(); } } initializeMetrics() { if (!this.meter) return; this.requestCounter = this.meter.createCounter("l0.requests", { description: "Total number of L0 stream requests", unit: "1", }); this.tokenCounter = this.meter.createCounter("l0.tokens", { description: "Total number of tokens processed", unit: "1", }); this.retryCounter = this.meter.createCounter("l0.retries", { description: "Total number of retry attempts", unit: "1", }); this.errorCounter = this.meter.createCounter("l0.errors", { description: "Total number of errors", unit: "1", }); this.durationHistogram = this.meter.createHistogram("l0.duration", { description: "Stream duration in milliseconds", unit: "ms", }); this.ttftHistogram = this.meter.createHistogram("l0.time_to_first_token", { description: "Time to first token in milliseconds", unit: "ms", }); this.activeStreamsGauge = this.meter.createUpDownCounter("l0.active_streams", { description: "Number of currently active streams", unit: "1", }); } async traceStream(name, fn, attributes) { if (!this.tracer) { return fn(createNoOpSpan()); } const spanAttributes = { ...this.config.defaultAttributes, ...attributes, }; const span = this.tracer.startSpan(`${this.config.serviceName}.${name}`, { kind: SpanKind.CLIENT, attributes: spanAttributes, }); this.activeStreams++; this.activeStreamsGauge?.add(1); try { const result = await fn(span); span.setStatus({ code: SpanStatusCode.OK }); return result; } catch (error) { span.setStatus({ code: SpanStatusCode.ERROR, message: error instanceof Error ? error.message : String(error), }); if (error instanceof Error) { span.recordException(error); } this.errorCounter?.add(1, { type: "stream_error" }); throw error; } finally { this.activeStreams--; this.activeStreamsGauge?.add(-1); span.end(); } } recordTelemetry(telemetry, span) { const attributes = { [SemanticAttributes.L0_SESSION_ID]: telemetry.sessionId, }; this.requestCounter?.add(1, { status: "completed" }); if (telemetry.metrics.totalTokens > 0) { this.tokenCounter?.add(telemetry.metrics.totalTokens, attributes); } if (telemetry.metrics.totalRetries > 0) { this.retryCounter?.add(telemetry.metrics.totalRetries, { ...attributes, type: "total", }); } if (telemetry.metrics.networkRetryCount > 0) { this.retryCounter?.add(telemetry.metrics.networkRetryCount, { ...attributes, type: "network", }); } if (telemetry.metrics.modelRetryCount > 0) { this.retryCounter?.add(telemetry.metrics.modelRetryCount, { ...attributes, type: "model", }); } if (telemetry.network.errorCount > 0) { const errorsByType = telemetry.network.errorsByType; if (errorsByType && Object.keys(errorsByType).length > 0) { for (const [errorType, count] of Object.entries(errorsByType)) { if (count > 0) { this.errorCounter?.add(count, { ...attributes, type: "network", error_type: errorType, }); } } } else { this.errorCounter?.add(telemetry.network.errorCount, { ...attributes, type: "network", }); } } if (telemetry.guardrails?.violationCount && telemetry.guardrails.violationCount > 0) { const byRuleAndSeverity = telemetry.guardrails.violationsByRuleAndSeverity; if (byRuleAndSeverity && Object.keys(byRuleAndSeverity).length > 0) { for (const [rule, severities] of Object.entries(byRuleAndSeverity)) { for (const [severity, count] of Object.entries(severities)) { if (count > 0) { this.errorCounter?.add(count, { ...attributes, type: "guardrail_violation", rule, severity, }); } } } } else { this.errorCounter?.add(telemetry.guardrails.violationCount, { ...attributes, type: "guardrail_violation", }); } } if (telemetry.drift?.detected) { this.errorCounter?.add(1, { ...attributes, type: "drift", }); } if (telemetry.duration) { this.durationHistogram?.record(telemetry.duration, attributes); } if (telemetry.metrics.timeToFirstToken) { this.ttftHistogram?.record(telemetry.metrics.timeToFirstToken, attributes); } if (span?.isRecording()) { span.setAttributes({ [SemanticAttributes.L0_SESSION_ID]: telemetry.sessionId, [SemanticAttributes.LLM_USAGE_OUTPUT_TOKENS]: telemetry.metrics.totalTokens, [SemanticAttributes.L0_RETRY_COUNT]: telemetry.metrics.totalRetries, [SemanticAttributes.L0_NETWORK_ERROR_COUNT]: telemetry.network.errorCount, }); if (telemetry.guardrails?.violationCount) { span.setAttribute(SemanticAttributes.L0_GUARDRAIL_VIOLATION_COUNT, telemetry.guardrails.violationCount); } if (telemetry.drift?.detected) { span.setAttribute(SemanticAttributes.L0_DRIFT_DETECTED, true); } if (telemetry.metrics.timeToFirstToken) { span.setAttribute(SemanticAttributes.L0_TIME_TO_FIRST_TOKEN, telemetry.metrics.timeToFirstToken); } if (telemetry.metrics.tokensPerSecond) { span.setAttribute(SemanticAttributes.L0_TOKENS_PER_SECOND, telemetry.metrics.tokensPerSecond); } if (telemetry.duration) { span.setAttribute("duration_ms", telemetry.duration); } } } recordToken(span, content) { if (this.config.traceTokens && span?.isRecording()) { const eventAttributes = {}; if (this.config.recordTokenContent && content) { eventAttributes["token.content"] = content; } span.addEvent("token", eventAttributes); } } recordRetry(reason, attempt, span) { if (span?.isRecording()) { span.addEvent("retry", { "retry.reason": reason, "retry.attempt": attempt, }); } } recordNetworkError(error, errorType, span) { if (span?.isRecording()) { span.addEvent("network_error", { "error.type": errorType, "error.message": error.message, }); } } recordGuardrailViolation(violation, span) { if (!this.config.recordGuardrailViolations) return; if (span?.isRecording()) { span.addEvent("guardrail_violation", { "guardrail.rule": violation.rule, "guardrail.severity": violation.severity, "guardrail.message": violation.message, }); } } recordDrift(driftType, confidence, span) { if (span?.isRecording()) { span.setAttribute(SemanticAttributes.L0_DRIFT_DETECTED, true); span.addEvent("drift_detected", { "drift.type": driftType, "drift.confidence": confidence, }); } } createSpan(name, attributes) { if (!this.tracer) { return createNoOpSpan(); } return this.tracer.startSpan(`${this.config.serviceName}.${name}`, { kind: SpanKind.INTERNAL, attributes: { ...this.config.defaultAttributes, ...attributes, }, }); } connectMonitor(monitor) { const originalComplete = monitor.complete.bind(monitor); monitor.complete = () => { originalComplete(); const telemetry = monitor.getTelemetry(); if (telemetry) { this.recordTelemetry(telemetry); } }; } getActiveStreams() { return this.activeStreams; } } export function createOpenTelemetry(config) { return new L0OpenTelemetry(config); } function createNoOpSpan() { return { spanContext: () => ({ traceId: "", spanId: "", traceFlags: 0, }), setAttribute: function () { return this; }, setAttributes: function () { return this; }, addEvent: function () { return this; }, addLink: function () { return this; }, addLinks: function () { return this; }, setStatus: function () { return this; }, updateName: function () { return this; }, recordException: function () { }, end: function () { }, isRecording: function () { return false; }, }; } export function createOpenTelemetryHandler(config) { const otel = new L0OpenTelemetry(config); let currentSpan; return (event) => { switch (event.type) { case EventType.SESSION_START: { currentSpan = otel.createSpan("stream"); const e = event; currentSpan.setAttribute("l0.attempt", e.attempt); currentSpan.setAttribute("l0.is_retry", e.isRetry); currentSpan.setAttribute("l0.is_fallback", e.isFallback); break; } case EventType.RETRY_ATTEMPT: { const e = event; otel.recordRetry(e.reason, e.attempt, currentSpan); break; } case EventType.ERROR: { const e = event; otel.recordNetworkError(new Error(e.error), e.failureType || "unknown", currentSpan); break; } case EventType.GUARDRAIL_RULE_RESULT: { const e = event; if (e.violation) { otel.recordGuardrailViolation(e.violation, currentSpan); } break; } case EventType.DRIFT_CHECK_RESULT: { const e = event; if (e.detected && e.types.length > 0) { otel.recordDrift(e.types.join(","), e.confidence ?? 0, currentSpan); } break; } case EventType.COMPLETE: { const e = event; if (currentSpan) { currentSpan.setAttribute("l0.token_count", e.tokenCount); currentSpan.setAttribute("l0.content_length", e.contentLength); currentSpan.setAttribute("l0.duration_ms", e.durationMs); currentSpan.setStatus({ code: SpanStatusCode.OK }); currentSpan.end(); currentSpan = undefined; } break; } default: break; } }; } //# sourceMappingURL=opentelemetry.js.map