autotel
Version:
Write Once, Observe Anywhere
1 lines • 79.8 kB
Source Map (JSON)
{"version":3,"file":"messaging.cjs","names":["trace","SpanKind","context","propagation","extractLinksFromBatch","createLinkFromHeaders"],"sources":["../src/messaging.ts"],"sourcesContent":["/**\n * Messaging helpers for event-driven architectures\n *\n * Provides specialized tracing for message producers and consumers\n * with automatic context propagation, link extraction, and OTel\n * semantic convention compliance.\n *\n * @example Producer\n * ```typescript\n * import { traceProducer } from 'autotel/messaging';\n *\n * export const publishEvent = traceProducer({\n * system: 'kafka',\n * destination: 'user-events',\n * })(ctx => async (event: UserEvent) => {\n * const headers = ctx.getTraceHeaders();\n * await producer.send({\n * topic: 'user-events',\n * messages: [{ value: JSON.stringify(event), headers }]\n * });\n * });\n * ```\n *\n * @example Consumer\n * ```typescript\n * import { traceConsumer } from 'autotel/messaging';\n *\n * export const processEvents = traceConsumer({\n * system: 'kafka',\n * destination: 'user-events',\n * consumerGroup: 'event-processor',\n * batchMode: true,\n * })(ctx => async (messages: KafkaMessage[]) => {\n * // Links to producer spans are automatically extracted\n * for (const msg of messages) {\n * await processMessage(msg);\n * }\n * });\n * ```\n *\n * @module\n */\n\nimport { SpanKind, context, propagation } from '@opentelemetry/api';\nimport type {\n Attributes,\n AttributeValue,\n Link,\n SpanContext,\n} from '@opentelemetry/api';\nimport { trace } from './functional';\nimport type { TraceContext } from './trace-context';\nimport { emitCorrelatedEvent } from './correlated-events';\nimport { createLinkFromHeaders, extractLinksFromBatch } from './sampling';\n\n// ============================================================================\n// Types\n// ============================================================================\n\n/**\n * Supported messaging systems\n */\nexport type MessagingSystem =\n | 'kafka'\n | 'rabbitmq'\n | 'sqs'\n | 'sns'\n | 'pubsub'\n | 'activemq'\n | 'azure_servicebus'\n | 'eventhubs'\n | (string & {});\n\n/**\n * Messaging operation types\n */\nexport type MessagingOperation = 'publish' | 'receive' | 'process' | 'settle';\n\n/**\n * Configuration for producer tracing\n */\nexport interface ProducerConfig {\n /** Messaging system (kafka, rabbitmq, sqs, etc.) */\n system: MessagingSystem;\n\n /** Destination name (topic/queue) */\n destination: string;\n\n /** Extract message ID from arguments */\n messageIdFrom?: string | ((args: unknown[]) => string | undefined);\n\n /** Extract partition from arguments (Kafka-specific) */\n partitionFrom?: string | ((args: unknown[]) => number | undefined);\n\n /** Extract message key from arguments (Kafka-specific) */\n keyFrom?: string | ((args: unknown[]) => string | undefined);\n\n /** Additional attributes to set on span */\n attributes?: Attributes;\n\n /** Propagate baggage in message headers */\n propagateBaggage?: boolean;\n\n /** Callback before sending (for custom attributes) */\n beforeSend?: (ctx: ProducerContext, args: unknown[]) => void;\n\n /** Callback on error */\n onError?: (error: Error, ctx: ProducerContext) => void;\n\n // ---- Extensible Hooks (\"Bring Your Own\" System Support) ----\n\n /**\n * Hook to add system-specific attributes\n *\n * Use this to add attributes for messaging systems not explicitly supported\n * (e.g., NATS, Temporal, Cloudflare Queues, Redis Streams).\n *\n * @example NATS attributes\n * ```typescript\n * customAttributes: (ctx, args) => ({\n * 'nats.subject': args[0].subject,\n * 'nats.reply_to': args[0].replyTo,\n * 'nats.stream': 'orders',\n * })\n * ```\n *\n * @example Temporal attributes\n * ```typescript\n * customAttributes: (ctx, args) => ({\n * 'temporal.workflow_id': args[0].workflowId,\n * 'temporal.run_id': args[0].runId,\n * 'temporal.task_queue': 'orders-queue',\n * })\n * ```\n */\n customAttributes?: (\n ctx: ProducerContext,\n args: unknown[],\n ) => Record<string, AttributeValue>;\n\n /**\n * Hook for custom header injection (beyond W3C traceparent)\n *\n * Use this to inject headers for systems that use non-standard\n * context propagation formats.\n *\n * @example Datadog headers\n * ```typescript\n * customHeaders: (ctx) => ({\n * 'x-datadog-trace-id': ctx.getTraceId(),\n * 'x-datadog-parent-id': ctx.getSpanId(),\n * })\n * ```\n *\n * @example Custom correlation headers\n * ```typescript\n * customHeaders: (ctx) => ({\n * 'x-correlation-id': correlationId,\n * 'x-request-id': requestId,\n * })\n * ```\n */\n customHeaders?: (ctx: ProducerContext) => Record<string, string>;\n}\n\n/**\n * Configuration for consumer tracing\n */\nexport interface ConsumerConfig {\n /** Messaging system (kafka, rabbitmq, sqs, etc.) */\n system: MessagingSystem;\n\n /** Destination name (topic/queue) */\n destination: string;\n\n /** Consumer group name */\n consumerGroup?: string;\n\n /** Extract headers from message for link creation */\n headersFrom?: string | ((msg: unknown) => Record<string, string> | undefined);\n\n /** Enable batch mode - extract links from all messages */\n batchMode?: boolean;\n\n /** Extract baggage from message headers */\n extractBaggage?: boolean;\n\n /** Additional attributes to set on span */\n attributes?: Attributes;\n\n /** Consumer lag metrics extraction */\n lagMetrics?: LagMetricsConfig;\n\n /** Callback when message goes to DLQ */\n onDLQ?: (ctx: ConsumerContext, reason: string) => void;\n\n /** Callback on error */\n onError?: (error: Error, ctx: ConsumerContext) => void;\n\n // ---- Message Ordering Support ----\n\n /**\n * Message ordering configuration\n *\n * Enable sequence tracking, out-of-order detection, and deduplication.\n *\n * @example Kafka ordering\n * ```typescript\n * ordering: {\n * sequenceFrom: (msg) => msg.offset,\n * partitionKeyFrom: (msg) => msg.key,\n * detectOutOfOrder: true,\n * onOutOfOrder: (ctx, info) => {\n * console.warn(`Out of order: expected ${info.expectedSequence}, got ${info.currentSequence}`);\n * },\n * }\n * ```\n */\n ordering?: OrderingConfig;\n\n // ---- Consumer Group Tracking ----\n\n /**\n * Consumer group tracking configuration\n *\n * Enables observability of consumer group state, including membership,\n * partition assignments, and rebalancing events.\n *\n * @example Kafka consumer group tracking\n * ```typescript\n * consumerGroupTracking: {\n * memberId: () => consumer.memberId,\n * groupInstanceId: process.env.KAFKA_GROUP_INSTANCE_ID,\n * onRebalance: (ctx, event) => {\n * if (event.type === 'revoked') {\n * logger.warn('Partitions revoked', event.partitions);\n * }\n * },\n * trackPartitionLag: true,\n * }\n * ```\n */\n consumerGroupTracking?: ConsumerGroupTrackingConfig;\n\n // ---- Extensible Hooks (\"Bring Your Own\" System Support) ----\n\n /**\n * Hook to add system-specific attributes\n *\n * Use this to add attributes for messaging systems not explicitly supported\n * (e.g., NATS, Temporal, Cloudflare Queues, Redis Streams).\n *\n * @example NATS consumer attributes\n * ```typescript\n * customAttributes: (ctx, msg) => ({\n * 'nats.subject': msg.subject,\n * 'nats.stream': msg.info?.stream,\n * 'nats.consumer': msg.info?.consumer,\n * 'nats.delivered_count': msg.info?.redeliveryCount,\n * })\n * ```\n *\n * @example Cloudflare Queue attributes\n * ```typescript\n * customAttributes: (ctx, msg) => ({\n * 'cloudflare.queue_id': msg.id,\n * 'cloudflare.timestamp_ms': msg.timestamp.getTime(),\n * 'cloudflare.attempts': msg.attempts,\n * })\n * ```\n */\n customAttributes?: (\n ctx: ConsumerContext,\n msg: unknown,\n ) => Record<string, AttributeValue>;\n\n /**\n * Hook for custom context extraction (beyond W3C traceparent)\n *\n * Use this to extract parent span context from systems that use\n * non-standard header formats.\n *\n * @example Datadog context extraction\n * ```typescript\n * customContextExtractor: (headers) => {\n * const traceId = headers['x-datadog-trace-id'];\n * const spanId = headers['x-datadog-parent-id'];\n * if (!traceId || !spanId) return null;\n * return {\n * traceId: traceIdToOtel(traceId),\n * spanId: spanIdToOtel(spanId),\n * traceFlags: TraceFlags.SAMPLED,\n * };\n * }\n * ```\n *\n * @example B3 format extraction\n * ```typescript\n * customContextExtractor: (headers) => {\n * const traceId = headers['x-b3-traceid'];\n * const spanId = headers['x-b3-spanid'];\n * const sampled = headers['x-b3-sampled'] === '1';\n * if (!traceId || !spanId) return null;\n * return {\n * traceId,\n * spanId,\n * traceFlags: sampled ? TraceFlags.SAMPLED : TraceFlags.NONE,\n * };\n * }\n * ```\n */\n customContextExtractor?: (\n headers: Record<string, string>,\n ) => SpanContext | null;\n}\n\n/**\n * Configuration for consumer lag metrics\n */\nexport interface LagMetricsConfig {\n /** Get current message offset */\n getCurrentOffset?: (msg: unknown) => number | undefined;\n\n /** Get end offset (high watermark) - can be async */\n getEndOffset?: () => number | Promise<number>;\n\n /** Get committed offset - can be async */\n getCommittedOffset?: () => number | Promise<number>;\n\n /** Get partition from message */\n getPartition?: (msg: unknown) => number | undefined;\n}\n\n/**\n * Configuration for message ordering tracking\n */\nexport interface OrderingConfig {\n /**\n * Extract sequence number from message\n *\n * Sequence numbers enable out-of-order detection and gap analysis.\n *\n * @example Kafka offset\n * ```typescript\n * sequenceFrom: (msg) => msg.offset\n * ```\n */\n sequenceFrom?: (msg: unknown) => number | undefined;\n\n /**\n * Extract partition key from message\n *\n * Partition keys determine message ordering in Kafka.\n *\n * @example Message key\n * ```typescript\n * partitionKeyFrom: (msg) => msg.key\n * ```\n */\n partitionKeyFrom?: (msg: unknown) => string | undefined;\n\n /**\n * Extract message ID for deduplication\n *\n * Used to detect duplicate messages.\n *\n * @example Idempotency key\n * ```typescript\n * messageIdFrom: (msg) => msg.headers['idempotency-key']\n * ```\n */\n messageIdFrom?: (msg: unknown) => string | undefined;\n\n /**\n * Enable out-of-order detection\n *\n * Tracks sequence numbers per partition and detects when messages\n * arrive out of order.\n *\n * @default false\n */\n detectOutOfOrder?: boolean;\n\n /**\n * Enable deduplication detection\n *\n * Tracks message IDs and detects duplicates within the window.\n *\n * @default false\n */\n detectDuplicates?: boolean;\n\n /**\n * Deduplication window size (number of message IDs to track)\n *\n * @default 1000\n */\n deduplicationWindowSize?: number;\n\n /**\n * Callback when out-of-order message detected\n */\n onOutOfOrder?: (ctx: ConsumerContext, info: OutOfOrderInfo) => void;\n\n /**\n * Callback when duplicate message detected\n */\n onDuplicate?: (ctx: ConsumerContext, messageId: string) => void;\n}\n\n/**\n * Information about out-of-order message\n */\nexport interface OutOfOrderInfo {\n /** Current sequence number */\n currentSequence: number;\n\n /** Expected sequence number */\n expectedSequence: number;\n\n /** Partition key (if available) */\n partitionKey?: string;\n\n /** Gap size (positive = gap, negative = out of order) */\n gap: number;\n}\n\n// ============================================================================\n// Consumer Group Tracking Types\n// ============================================================================\n\n/**\n * Configuration for consumer group tracking\n *\n * Enables observability of consumer group state, including membership,\n * partition assignments, and rebalancing events.\n *\n * @example Kafka consumer group tracking\n * ```typescript\n * consumerGroupTracking: {\n * memberId: consumer.memberId,\n * groupInstanceId: process.env.CONSUMER_ID,\n * onRebalance: (ctx, event) => {\n * if (event.type === 'assigned') {\n * console.log(`Assigned partitions: ${event.partitions}`);\n * }\n * },\n * }\n * ```\n */\nexport interface ConsumerGroupTrackingConfig {\n /**\n * Consumer member ID\n *\n * Unique identifier assigned by the broker to this consumer.\n */\n memberId?: string | (() => string | undefined);\n\n /**\n * Static group instance ID (for static membership)\n *\n * If set, enables static group membership which prevents\n * rebalances when consumers restart.\n */\n groupInstanceId?: string | (() => string | undefined);\n\n /**\n * Callback when rebalance occurs\n */\n onRebalance?: (ctx: ConsumerContext, event: RebalanceEvent) => void;\n\n /**\n * Callback when partitions are assigned\n */\n onPartitionsAssigned?: (\n ctx: ConsumerContext,\n partitions: PartitionAssignment[],\n ) => void;\n\n /**\n * Callback when partitions are revoked\n */\n onPartitionsRevoked?: (\n ctx: ConsumerContext,\n partitions: PartitionAssignment[],\n ) => void;\n\n /**\n * Track consumer lag per partition\n *\n * @default true\n */\n trackPartitionLag?: boolean;\n\n /**\n * Track consumer heartbeat health\n *\n * @default false\n */\n trackHeartbeat?: boolean;\n\n /**\n * Heartbeat interval in milliseconds (for health tracking)\n */\n heartbeatIntervalMs?: number;\n}\n\n/**\n * Rebalance event types\n */\nexport type RebalanceType = 'assigned' | 'revoked' | 'lost';\n\n/**\n * Rebalance event information\n */\nexport interface RebalanceEvent {\n /** Type of rebalance event */\n type: RebalanceType;\n\n /** Partitions affected by the rebalance */\n partitions: PartitionAssignment[];\n\n /** Timestamp of the rebalance event */\n timestamp: number;\n\n /** Generation ID (increments on each rebalance) */\n generation?: number;\n\n /** Consumer member ID */\n memberId?: string;\n\n /** Reason for the rebalance (if available) */\n reason?: string;\n}\n\n/**\n * Partition assignment information\n */\nexport interface PartitionAssignment {\n /** Topic name */\n topic: string;\n\n /** Partition number */\n partition: number;\n\n /** Initial offset (if available) */\n offset?: number;\n\n /** Metadata (if available) */\n metadata?: string;\n}\n\n/**\n * Consumer group state snapshot\n */\nexport interface ConsumerGroupState {\n /** Consumer group name */\n groupId: string;\n\n /** Consumer member ID */\n memberId?: string;\n\n /** Static instance ID (if using static membership) */\n groupInstanceId?: string;\n\n /** Currently assigned partitions */\n assignedPartitions: PartitionAssignment[];\n\n /** Group generation ID */\n generation?: number;\n\n /** Whether the consumer is currently active */\n isActive: boolean;\n\n /** Last heartbeat timestamp */\n lastHeartbeat?: number;\n\n /** Consumer state (stable, preparing_rebalance, completing_rebalance, dead) */\n state?:\n | 'stable'\n | 'preparing_rebalance'\n | 'completing_rebalance'\n | 'dead'\n | 'empty';\n}\n\n/**\n * Partition lag information\n */\nexport interface PartitionLag {\n /** Topic name */\n topic: string;\n\n /** Partition number */\n partition: number;\n\n /** Current consumer offset */\n currentOffset: number;\n\n /** End offset (high watermark) */\n endOffset: number;\n\n /** Calculated lag */\n lag: number;\n\n /** Timestamp of measurement */\n timestamp: number;\n}\n\n/**\n * DLQ failure category types\n */\nexport type DLQReasonCategory =\n | 'validation'\n | 'processing'\n | 'timeout'\n | 'poison'\n | 'unknown';\n\n/**\n * Options for enhanced DLQ recording\n */\nexport interface DLQOptions {\n /**\n * Automatically link to the producer span context\n *\n * When true, creates a span link from the DLQ event back to\n * the original producer span for correlation.\n *\n * @default true\n */\n linkToProducer?: boolean;\n\n /**\n * Category of the failure that caused DLQ routing\n *\n * - validation: Message failed schema/format validation\n * - processing: Business logic error during processing\n * - timeout: Processing exceeded allowed time\n * - poison: Message causes repeated failures (poison pill)\n * - unknown: Uncategorized failure\n */\n reasonCategory?: DLQReasonCategory;\n\n /**\n * Number of processing attempts before DLQ routing\n */\n attemptCount?: number;\n\n /**\n * The original error that caused DLQ routing\n *\n * Error details are recorded as span attributes for debugging.\n */\n originalError?: Error;\n\n /**\n * Additional metadata to record with the DLQ event\n */\n metadata?: Record<string, string | number | boolean>;\n}\n\n/**\n * Options for recording DLQ replay\n */\nexport interface DLQReplayOptions {\n /**\n * Original span context from DLQ message\n *\n * If provided, creates a span link to correlate with the original failure.\n */\n originalDLQSpanContext?: SpanContext;\n\n /**\n * Time spent in DLQ before replay (milliseconds)\n */\n dlqDwellTimeMs?: number;\n\n /**\n * Retry attempt number for this replay\n */\n replayAttempt?: number;\n}\n\n/**\n * Extended trace context for producers with header injection\n */\nexport interface ProducerContext extends TraceContext {\n /**\n * Get W3C trace context headers to inject into message\n *\n * @returns Headers object with traceparent and optionally tracestate\n *\n * @example\n * ```typescript\n * const headers = ctx.getTraceHeaders();\n * await producer.send({\n * topic: 'events',\n * messages: [{ value: data, headers }]\n * });\n * ```\n */\n getTraceHeaders(): { traceparent: string; tracestate?: string };\n\n /**\n * Get all propagation headers including baggage if enabled\n *\n * @returns Headers object with all W3C trace context headers\n */\n getAllPropagationHeaders(): Record<string, string>;\n\n /**\n * Get all headers including custom headers from customHeaders hook\n *\n * This combines W3C trace context headers, baggage (if enabled),\n * and any custom headers defined via the customHeaders hook.\n *\n * @returns Combined headers object\n *\n * @example\n * ```typescript\n * const headers = ctx.getFullHeaders();\n * // Contains: traceparent, tracestate, baggage (if enabled), and custom headers\n * await producer.send({ topic, messages: [{ value, headers }] });\n * ```\n */\n getFullHeaders(): Record<string, string>;\n}\n\n/**\n * Extended trace context for consumers\n */\nexport interface ConsumerContext extends TraceContext {\n /**\n * Record that a message is being sent to DLQ\n *\n * Enhanced with auto-linking to producer span, failure categorization,\n * and detailed error recording for comprehensive DLQ observability.\n *\n * @param reason - Human-readable reason for DLQ routing\n * @param dlqNameOrOptions - DLQ name (string) or enhanced options object\n * @param options - Enhanced DLQ options (when second param is dlqName)\n *\n * @example Basic usage (backwards compatible)\n * ```typescript\n * ctx.recordDLQ('Schema validation failed', 'orders-dlq');\n * ```\n *\n * @example Enhanced usage with options\n * ```typescript\n * ctx.recordDLQ('Invalid order total', 'orders-dlq', {\n * reasonCategory: 'validation',\n * attemptCount: 3,\n * originalError: error,\n * linkToProducer: true, // Auto-links to producer span\n * });\n * ```\n *\n * @example Using options object as second param\n * ```typescript\n * ctx.recordDLQ('Processing timeout', {\n * reasonCategory: 'timeout',\n * attemptCount: 5,\n * metadata: { processingTimeMs: 30000 },\n * });\n * ```\n */\n recordDLQ(reason: string, dlqName?: string, options?: DLQOptions): void;\n recordDLQ(reason: string, options?: DLQOptions): void;\n\n /**\n * Record replay of a message from DLQ\n *\n * Use this when processing a message that was replayed from the DLQ\n * to create links for correlation and track replay metrics.\n *\n * @param options - Replay tracking options\n *\n * @example\n * ```typescript\n * export const processReplay = traceConsumer({\n * system: 'kafka',\n * destination: 'orders-dlq-replay',\n * })(ctx => async (message) => {\n * ctx.recordReplay({\n * originalDLQSpanContext: extractSpanContext(message.headers),\n * dlqDwellTimeMs: Date.now() - message.timestamp,\n * replayAttempt: message.replayCount,\n * });\n * await processOrder(message);\n * });\n * ```\n */\n recordReplay(options?: DLQReplayOptions): void;\n\n /**\n * Record retry attempt\n *\n * @param attemptNumber - Current retry attempt (1-based)\n * @param maxAttempts - Maximum retry attempts\n */\n recordRetry(attemptNumber: number, maxAttempts?: number): void;\n\n /**\n * Get the producer span context links extracted from message headers\n *\n * Useful for accessing the producer span context when implementing\n * custom DLQ or retry logic.\n *\n * @returns Array of span links extracted from the message, or empty array\n */\n getProducerLinks(): Link[];\n\n // ---- Message Ordering Methods ----\n\n /**\n * Check if the current message is a duplicate\n *\n * @returns True if the message has been seen before\n */\n isDuplicate(): boolean;\n\n /**\n * Check if the current message arrived out of order\n *\n * @returns Out of order info, or null if in order\n */\n getOutOfOrderInfo(): OutOfOrderInfo | null;\n\n /**\n * Get current sequence number\n *\n * @returns The sequence number, or null if not configured\n */\n getSequenceNumber(): number | null;\n\n /**\n * Get partition key\n *\n * @returns The partition key, or null if not configured\n */\n getPartitionKey(): string | null;\n\n // ---- Consumer Group Methods ----\n\n /**\n * Record a rebalance event\n *\n * Call this when the consumer group undergoes a rebalance to capture\n * the event as a span event with partition assignment details.\n *\n * @param event - The rebalance event details\n *\n * @example\n * ```typescript\n * consumer.on('rebalance', (event) => {\n * ctx.recordRebalance({\n * type: event.type,\n * partitions: event.assignment,\n * generation: event.generationId,\n * timestamp: Date.now(),\n * });\n * });\n * ```\n */\n recordRebalance(event: RebalanceEvent): void;\n\n /**\n * Record a heartbeat event\n *\n * Call this on each heartbeat to track consumer health.\n *\n * @param healthy - Whether the heartbeat was successful\n * @param latencyMs - Optional latency of the heartbeat in milliseconds\n */\n recordHeartbeat(healthy: boolean, latencyMs?: number): void;\n\n /**\n * Record partition lag for a specific partition\n *\n * @param lag - The partition lag information\n */\n recordPartitionLag(lag: PartitionLag): void;\n\n /**\n * Get the current consumer group state\n *\n * @returns The current consumer group state, or null if not configured\n */\n getConsumerGroupState(): ConsumerGroupState | null;\n\n /**\n * Get the consumer member ID\n *\n * @returns The member ID, or null if not available\n */\n getMemberId(): string | null;\n\n /**\n * Get the current partition assignments\n *\n * @returns Array of assigned partitions, or empty array if none\n */\n getAssignedPartitions(): PartitionAssignment[];\n}\n\n// ============================================================================\n// Producer Helper\n// ============================================================================\n\n/**\n * Create a traced message producer function\n *\n * Sets SpanKind.PRODUCER, OTel messaging semantic attributes,\n * and provides context injection helpers.\n *\n * @param config - Producer configuration\n * @returns Factory function that wraps your producer logic\n *\n * @example Kafka producer\n * ```typescript\n * export const publishUserEvent = traceProducer({\n * system: 'kafka',\n * destination: 'user-events',\n * messageIdFrom: (args) => args[0]?.eventId,\n * })(ctx => async (event: UserEvent) => {\n * const headers = ctx.getTraceHeaders();\n * await producer.send({\n * topic: 'user-events',\n * messages: [{\n * key: event.userId,\n * value: JSON.stringify(event),\n * headers,\n * }]\n * });\n * });\n * ```\n *\n * @example SQS producer\n * ```typescript\n * export const sendToSQS = traceProducer({\n * system: 'sqs',\n * destination: 'orders-queue',\n * })(ctx => async (order: Order) => {\n * const headers = ctx.getAllPropagationHeaders();\n * await sqs.sendMessage({\n * QueueUrl: QUEUE_URL,\n * MessageBody: JSON.stringify(order),\n * MessageAttributes: headersToSQSAttributes(headers),\n * });\n * });\n * ```\n */\nexport function traceProducer<TArgs extends unknown[], TReturn>(\n config: ProducerConfig,\n) {\n const spanName = `${config.system}.publish ${config.destination}`;\n\n return (\n fnFactory: (ctx: ProducerContext) => (...args: TArgs) => Promise<TReturn>,\n ): ((...args: TArgs) => Promise<TReturn>) => {\n return trace<TArgs, TReturn>(\n { name: spanName, spanKind: SpanKind.PRODUCER },\n (baseCtx) => {\n // Extend context with producer-specific methods\n const ctx = extendContextForProducer(baseCtx, config);\n\n // Set semantic convention attributes\n setProducerAttributes(ctx, config);\n\n // Call beforeSend callback if provided\n return (...args: TArgs) => {\n // Extract dynamic attributes from args\n setDynamicProducerAttributes(ctx, config, args);\n\n // Apply custom attributes hook if provided\n if (config.customAttributes) {\n const customAttrs = config.customAttributes(ctx, args);\n for (const [key, value] of Object.entries(customAttrs)) {\n if (value !== undefined && value !== null) {\n ctx.setAttribute(key, value as string | number | boolean);\n }\n }\n }\n\n if (config.beforeSend) {\n config.beforeSend(ctx, args);\n }\n\n // Execute user's function\n const userFn = fnFactory(ctx);\n return userFn(...args).catch((error) => {\n if (config.onError) {\n config.onError(error as Error, ctx);\n }\n throw error;\n });\n };\n },\n );\n };\n}\n\n// ============================================================================\n// Consumer Helper\n// ============================================================================\n\n/**\n * Create a traced message consumer function\n *\n * Sets SpanKind.CONSUMER, OTel messaging semantic attributes,\n * automatically extracts links from producer trace headers,\n * and provides DLQ/retry recording helpers.\n *\n * @param config - Consumer configuration\n * @returns Factory function that wraps your consumer logic\n *\n * @example Kafka consumer (single message)\n * ```typescript\n * export const processUserEvent = traceConsumer({\n * system: 'kafka',\n * destination: 'user-events',\n * consumerGroup: 'event-processor',\n * headersFrom: (msg) => msg.headers,\n * })(ctx => async (message: KafkaMessage) => {\n * // Link to producer span is automatically created\n * const event = JSON.parse(message.value.toString());\n * await processEvent(event);\n * });\n * ```\n *\n * @example Kafka consumer (batch mode)\n * ```typescript\n * export const processUserEventBatch = traceConsumer({\n * system: 'kafka',\n * destination: 'user-events',\n * consumerGroup: 'event-processor',\n * batchMode: true,\n * headersFrom: (msg) => msg.headers,\n * lagMetrics: {\n * getCurrentOffset: (msg) => msg.offset,\n * getEndOffset: () => consumer.getHighWatermark(),\n * getPartition: (msg) => msg.partition,\n * },\n * })(ctx => async (messages: KafkaMessage[]) => {\n * // Links to all producer spans are automatically created\n * for (const msg of messages) {\n * await processEvent(JSON.parse(msg.value.toString()));\n * }\n * });\n * ```\n *\n * @example SQS consumer with DLQ handling\n * ```typescript\n * export const processSQSMessage = traceConsumer({\n * system: 'sqs',\n * destination: 'orders-queue',\n * headersFrom: (msg) => sqsAttributesToHeaders(msg.MessageAttributes),\n * onDLQ: (ctx, reason) => {\n * ctx.recordDLQ(reason, 'orders-dlq');\n * },\n * })(ctx => async (message: SQSMessage) => {\n * try {\n * await processOrder(JSON.parse(message.Body));\n * } catch (error) {\n * if (message.ApproximateReceiveCount > 3) {\n * ctx.recordDLQ(error.message);\n * throw error;\n * }\n * ctx.recordRetry(message.ApproximateReceiveCount, 3);\n * throw error;\n * }\n * });\n * ```\n */\nexport function traceConsumer<TArgs extends unknown[], TReturn>(\n config: ConsumerConfig,\n) {\n const operation = config.batchMode ? 'receive' : 'process';\n const spanName = `${config.system}.${operation} ${config.destination}`;\n\n return (\n fnFactory: (ctx: ConsumerContext) => (...args: TArgs) => Promise<TReturn>,\n ): ((...args: TArgs) => Promise<TReturn>) => {\n return trace<TArgs, TReturn>(\n { name: spanName, spanKind: SpanKind.CONSUMER },\n (baseCtx) => {\n // Create mutable storage for producer links (populated during extractAndAddLinks)\n const linkStorage: ProducerLinkStorage = { links: [] };\n\n // Create mutable ordering state (populated during extractOrdering)\n const orderingState: OrderingState = {\n sequenceNumber: null,\n partitionKey: null,\n messageId: null,\n isDuplicate: false,\n outOfOrderInfo: null,\n };\n\n // Create consumer group state\n const groupTracking = config.consumerGroupTracking;\n const groupState: ConsumerGroupStateInternal = {\n memberId:\n typeof groupTracking?.memberId === 'function'\n ? (groupTracking.memberId() ?? null)\n : (groupTracking?.memberId ?? null),\n groupInstanceId:\n typeof groupTracking?.groupInstanceId === 'function'\n ? (groupTracking.groupInstanceId() ?? null)\n : (groupTracking?.groupInstanceId ?? null),\n assignedPartitions: [],\n generation: null,\n isActive: true,\n lastHeartbeat: null,\n state: null,\n };\n\n // Extend context with consumer-specific methods\n const ctx = extendContextForConsumer(\n baseCtx,\n config,\n linkStorage,\n orderingState,\n groupState,\n );\n\n // Set semantic convention attributes\n setConsumerAttributes(ctx, config);\n\n return async (...args: TArgs) => {\n // Extract links from message headers (includes customContextExtractor if provided)\n // This also populates linkStorage.links for getProducerLinks() and DLQ auto-linking\n await extractAndAddLinks(ctx, config, args, linkStorage);\n\n // Extract and process ordering information\n if (config.ordering) {\n extractAndProcessOrdering(ctx, config, args, orderingState);\n }\n\n // Extract lag metrics if configured\n if (config.lagMetrics) {\n await extractLagMetrics(ctx, config.lagMetrics, args);\n }\n\n // Apply custom attributes hook if provided\n if (config.customAttributes) {\n // For batch mode, extract first message; for single mode, use args[0] directly\n const batch = args[0];\n const msg =\n config.batchMode && Array.isArray(batch) && batch.length > 0\n ? batch[0]\n : batch;\n // Only call hook if we have a message\n if (msg !== undefined) {\n const customAttrs = config.customAttributes(ctx, msg);\n for (const [key, value] of Object.entries(customAttrs)) {\n if (value !== undefined && value !== null) {\n ctx.setAttribute(key, value as string | number | boolean);\n }\n }\n }\n }\n\n // Execute user's function\n const userFn = fnFactory(ctx);\n return userFn(...args).catch((error) => {\n if (config.onError) {\n config.onError(error as Error, ctx);\n }\n throw error;\n });\n };\n },\n );\n };\n}\n\n// ============================================================================\n// Helper Functions\n// ============================================================================\n\n/**\n * Extend base context with producer-specific methods\n */\nfunction extendContextForProducer(\n baseCtx: TraceContext,\n config: ProducerConfig,\n): ProducerContext {\n // Create a reference for `this` binding in getFullHeaders\n const producerCtx: ProducerContext = {\n ...baseCtx,\n\n getTraceHeaders(): { traceparent: string; tracestate?: string } {\n const headers: Record<string, string> = {};\n propagation.inject(context.active(), headers);\n\n const result: { traceparent: string; tracestate?: string } = {\n traceparent: headers['traceparent'] || '',\n };\n\n if (headers['tracestate']) {\n result.tracestate = headers['tracestate'];\n }\n\n return result;\n },\n\n getAllPropagationHeaders(): Record<string, string> {\n const headers: Record<string, string> = {};\n propagation.inject(context.active(), headers);\n\n // Include baggage if configured\n if (config.propagateBaggage) {\n const baggage = propagation.getBaggage(context.active());\n if (baggage) {\n const entries: string[] = [];\n for (const [key, value] of baggage.getAllEntries()) {\n entries.push(\n `${encodeURIComponent(key)}=${encodeURIComponent(value.value)}`,\n );\n }\n if (entries.length > 0) {\n headers['baggage'] = entries.join(',');\n }\n }\n }\n\n return headers;\n },\n\n getFullHeaders(): Record<string, string> {\n // Start with all propagation headers (W3C + baggage)\n const headers = producerCtx.getAllPropagationHeaders();\n\n // Add custom headers from hook if configured\n if (config.customHeaders) {\n const customHeaders = config.customHeaders(producerCtx);\n Object.assign(headers, customHeaders);\n }\n\n return headers;\n },\n };\n\n return producerCtx;\n}\n\n/**\n * Mutable storage for producer links (populated during extractAndAddLinks)\n */\ninterface ProducerLinkStorage {\n links: Link[];\n}\n\n/**\n * Ordering state for a single message\n */\ninterface OrderingState {\n sequenceNumber: number | null;\n partitionKey: string | null;\n messageId: string | null;\n isDuplicate: boolean;\n outOfOrderInfo: OutOfOrderInfo | null;\n}\n\n/**\n * Global sequence tracker for out-of-order detection (per partition)\n */\nconst sequenceTrackers = new Map<string, number>();\n\n/**\n * Global deduplication window (LRU-style using Map insertion order)\n */\nconst deduplicationWindow = new Map<string, number>();\nconst DEFAULT_DEDUP_WINDOW_SIZE = 1000;\n\n/**\n * Clean up old entries from deduplication window\n */\nfunction trimDeduplicationWindow(maxSize: number): void {\n if (deduplicationWindow.size > maxSize) {\n const excess = deduplicationWindow.size - maxSize;\n const iterator = deduplicationWindow.keys();\n for (let i = 0; i < excess; i++) {\n const key = iterator.next().value;\n if (key) deduplicationWindow.delete(key);\n }\n }\n}\n\n/**\n * Consumer group state tracking for a single consumer\n */\ninterface ConsumerGroupStateInternal {\n memberId: string | null;\n groupInstanceId: string | null;\n assignedPartitions: PartitionAssignment[];\n generation: number | null;\n isActive: boolean;\n lastHeartbeat: number | null;\n state: ConsumerGroupState['state'] | null;\n}\n\n/**\n * Extend base context with consumer-specific methods\n */\nfunction extendContextForConsumer(\n baseCtx: TraceContext,\n config: ConsumerConfig,\n linkStorage: ProducerLinkStorage,\n orderingState: OrderingState,\n groupState: ConsumerGroupStateInternal,\n): ConsumerContext {\n const consumerCtx: ConsumerContext = {\n ...baseCtx,\n\n recordDLQ(\n reason: string,\n dlqNameOrOptions?: string | DLQOptions,\n optionsParam?: DLQOptions,\n ): void {\n // Parse overloaded arguments\n let dlqName: string | undefined;\n let options: DLQOptions | undefined;\n\n if (typeof dlqNameOrOptions === 'string') {\n dlqName = dlqNameOrOptions;\n options = optionsParam;\n } else if (typeof dlqNameOrOptions === 'object') {\n options = dlqNameOrOptions;\n }\n\n // Default linkToProducer to true\n const linkToProducer = options?.linkToProducer ?? true;\n\n // Set basic DLQ attributes\n baseCtx.setAttribute('messaging.dlq.reason', reason);\n if (dlqName) {\n baseCtx.setAttribute('messaging.dlq.name', dlqName);\n }\n\n // Set enhanced DLQ attributes\n if (options?.reasonCategory) {\n baseCtx.setAttribute(\n 'messaging.dlq.reason_category',\n options.reasonCategory,\n );\n }\n if (options?.attemptCount !== undefined) {\n baseCtx.setAttribute(\n 'messaging.dlq.attempt_count',\n options.attemptCount,\n );\n }\n if (options?.originalError) {\n baseCtx.setAttribute(\n 'messaging.dlq.error.type',\n options.originalError.name,\n );\n baseCtx.setAttribute(\n 'messaging.dlq.error.message',\n options.originalError.message,\n );\n }\n\n // Set custom metadata\n if (options?.metadata) {\n for (const [key, value] of Object.entries(options.metadata)) {\n baseCtx.setAttribute(`messaging.dlq.metadata.${key}`, value);\n }\n }\n\n // Auto-link to producer span if available and enabled\n const producerLink = linkStorage.links[0];\n if (linkToProducer && producerLink) {\n baseCtx.setAttribute(\n 'messaging.dlq.producer_trace_id',\n producerLink.context.traceId,\n );\n baseCtx.setAttribute(\n 'messaging.dlq.producer_span_id',\n producerLink.context.spanId,\n );\n }\n\n // Record event with all attributes\n const eventAttrs: Record<string, string | number | boolean> = {\n 'messaging.dlq.reason': reason,\n ...(dlqName && { 'messaging.dlq.name': dlqName }),\n ...(options?.reasonCategory && {\n 'messaging.dlq.reason_category': options.reasonCategory,\n }),\n ...(options?.attemptCount !== undefined && {\n 'messaging.dlq.attempt_count': options.attemptCount,\n }),\n ...(options?.originalError && {\n 'messaging.dlq.error.type': options.originalError.name,\n 'messaging.dlq.error.message': options.originalError.message,\n }),\n };\n\n // Add producer link info to event if available\n if (linkToProducer && producerLink) {\n eventAttrs['messaging.dlq.producer_trace_id'] =\n producerLink.context.traceId;\n eventAttrs['messaging.dlq.producer_span_id'] =\n producerLink.context.spanId;\n }\n\n emitCorrelatedEvent(baseCtx, 'dlq_routed', eventAttrs);\n\n // Call user's onDLQ callback if provided\n if (config.onDLQ) {\n config.onDLQ(consumerCtx, reason);\n }\n },\n\n recordReplay(options?: DLQReplayOptions): void {\n baseCtx.setAttribute('messaging.replay', true);\n\n if (options?.replayAttempt !== undefined) {\n baseCtx.setAttribute('messaging.replay.attempt', options.replayAttempt);\n }\n if (options?.dlqDwellTimeMs !== undefined) {\n baseCtx.setAttribute(\n 'messaging.replay.dwell_time_ms',\n options.dlqDwellTimeMs,\n );\n }\n\n // Create span link to original DLQ span if provided\n if (options?.originalDLQSpanContext) {\n baseCtx.addLinks([\n {\n context: options.originalDLQSpanContext,\n attributes: { 'messaging.link.source': 'dlq_replay' },\n },\n ]);\n }\n\n const eventAttrs: Record<string, string | number | boolean> = {\n 'messaging.replay': true,\n ...(options?.replayAttempt !== undefined && {\n 'messaging.replay.attempt': options.replayAttempt,\n }),\n ...(options?.dlqDwellTimeMs !== undefined && {\n 'messaging.replay.dwell_time_ms': options.dlqDwellTimeMs,\n }),\n };\n\n emitCorrelatedEvent(baseCtx, 'dlq_replay', eventAttrs);\n },\n\n recordRetry(attemptNumber: number, maxAttempts?: number): void {\n baseCtx.setAttribute('messaging.retry.count', attemptNumber);\n if (maxAttempts !== undefined) {\n baseCtx.setAttribute('messaging.retry.max_attempts', maxAttempts);\n }\n emitCorrelatedEvent(baseCtx, 'retry_attempt', {\n 'messaging.retry.count': attemptNumber,\n ...(maxAttempts !== undefined && {\n 'messaging.retry.max_attempts': maxAttempts,\n }),\n });\n },\n\n getProducerLinks(): Link[] {\n return [...linkStorage.links];\n },\n\n // ---- Ordering Methods ----\n\n isDuplicate(): boolean {\n return orderingState.isDuplicate;\n },\n\n getOutOfOrderInfo(): OutOfOrderInfo | null {\n return orderingState.outOfOrderInfo;\n },\n\n getSequenceNumber(): number | null {\n return orderingState.sequenceNumber;\n },\n\n getPartitionKey(): string | null {\n return orderingState.partitionKey;\n },\n\n // ---- Consumer Group Methods ----\n\n recordRebalance(event: RebalanceEvent): void {\n // Update internal state including consumer group state\n if (event.type === 'assigned') {\n groupState.assignedPartitions = event.partitions;\n groupState.isActive = true;\n // After assignment completes, group is stable\n groupState.state = 'stable';\n } else if (event.type === 'revoked' || event.type === 'lost') {\n // Remove revoked partitions from assignments\n const revokedSet = new Set(\n event.partitions.map((p) => `${p.topic}:${p.partition}`),\n );\n groupState.assignedPartitions = groupState.assignedPartitions.filter(\n (p) => !revokedSet.has(`${p.topic}:${p.partition}`),\n );\n if (event.type === 'lost') {\n groupState.isActive = false;\n // Consumer lost connection, mark as dead\n groupState.state = 'dead';\n } else {\n // Revoked means rebalance is starting\n // If no partitions remain, consumer is empty; otherwise preparing for rebalance\n groupState.state =\n groupState.assignedPartitions.length === 0\n ? 'empty'\n : 'preparing_rebalance';\n }\n }\n\n if (event.generation !== undefined) {\n groupState.generation = event.generation;\n }\n if (event.memberId) {\n groupState.memberId = event.memberId;\n }\n\n // Set span attributes\n baseCtx.setAttribute(\n 'messaging.consumer_group.rebalance.type',\n event.type,\n );\n baseCtx.setAttribute(\n 'messaging.consumer_group.rebalance.partition_count',\n event.partitions.length,\n );\n if (event.generation !== undefined) {\n baseCtx.setAttribute(\n 'messaging.consumer_group.generation',\n event.generation,\n );\n }\n if (event.memberId) {\n baseCtx.setAttribute(\n 'messaging.consumer_group.member_id',\n event.memberId,\n );\n }\n if (event.reason) {\n baseCtx.setAttribute(\n 'messaging.consumer_group.rebalance.reason',\n event.reason,\n );\n }\n\n // Set the new state on the span\n if (groupState.state) {\n baseCtx.setAttribute(\n 'messaging.consumer_group.state',\n groupState.state,\n );\n }\n\n // Record event\n const eventAttrs: Record<string, string | number | boolean> = {\n 'messaging.consumer_group.rebalance.type': event.type,\n 'messaging.consumer_group.rebalance.partition_count':\n event.partitions.length,\n 'messaging.consumer_group.rebalance.timestamp': event.timestamp,\n ...(event.generation !== undefined && {\n 'messaging.consumer_group.generation': event.generation,\n }),\n ...(event.memberId && {\n 'messaging.consumer_group.member_id': event.memberId,\n }),\n ...(event.reason && {\n 'messaging.consumer_group.rebalance.reason': event.reason,\n }),\n ...(groupState.state && {\n 'messaging.consumer_group.state': groupState.state,\n }),\n };\n\n // Add partition details if not too many\n if (event.partitions.length <= 10) {\n eventAttrs['messaging.consumer_group.rebalance.partitions'] =\n event.partitions.map((p) => `${p.topic}:${p.partition}`).join(',');\n }\n\n emitCorrelatedEvent(baseCtx, `consumer_group_${event.type}`, eventAttrs);\n\n // Call user's onRebalance callback if provided\n if (config.consumerGroupTracking?.onRebalance) {\n config.consumerGroupTracking.onRebalance(consumerCtx, event);\n }\n\n // Call specific callbacks\n if (\n event.type === 'assigned' &&\n config.consumerGroupTracking?.onPartitionsAssigned\n ) {\n config.consumerGroupTracking.onPartitionsAssigned(\n consumerCtx,\n event.partitions,\n );\n }\n if (\n event.type === 'revoked' &&\n config.consumerGroupTracking?.onPartitionsRevoked\n ) {\n config.consumerGroupTracking.onPartitionsRevoked(\n consumerCtx,\n event.partitions,\n );\n }\n },\n\n recordHeartbeat(healthy: boolean, latencyMs?: number): void {\n groupState.lastHeartbeat = Date.now();\n\n baseCtx.setAttribute(\n 'messaging.consumer_group.heartbeat.healthy',\n healthy,\n );\n if (latencyMs !== undefined) {\n baseCtx.setAttribute(\n 'messaging.consumer_group.heartbeat.latency_ms',\n latencyMs,\n );\n }\n\n emitCorrelatedEvent(baseCtx, 'consumer_group_heartbeat', {\n 'messaging.consumer_group.heartbeat.healthy': healthy,\n 'messaging.consumer_group.heartbeat.timestamp':\n groupState.lastHeartbeat,\n ...(latencyMs !== undefined && {\n 'messaging.consumer_group.heartbeat.latency_ms': latencyMs,\n }),\n });\n },\n\n recordPartitionLag(lag: PartitionLag): void {\n const prefix = `messaging.consumer_group.lag.${lag.topic}.${lag.partition}`;\n\n baseCtx.setAttribute(`${prefix}.current_offset`, lag.currentOffset);\n baseCtx.setAttribute(`${prefix}.end_offset`, lag.endOffset);\n baseCtx.setAttribute(`${prefix}.lag`, lag.lag);\n\n emitCorrelatedEvent(baseCtx, 'partition_lag_recorded', {\n 'messaging.consumer_group.lag.topic': lag.topic,\n 'messaging.consumer_group.lag.partition': lag.partition,\n 'messaging.consumer_group.lag.current_offset': lag.currentOffset,\n 'messaging.consumer_group.lag.end_offset': lag.endOffset,\n 'messaging.consumer_group.lag.lag': lag.lag,\n 'messaging.consumer_group.lag.timestamp': lag.timestamp,\n });\n },\n\n getConsumerGroupState(): ConsumerGroupState | null {\n if (!config.consumerGroup) {\n return null;\n }\n\n return {\n groupId: config.consumerGroup,\n memberId: groupState.memberId ?? undefined,\n groupInstanceId: groupState.groupInstanceId ?? undefined,\n assignedPartitions: [...groupState.assignedPartitions],\n generation: groupState.generation ?? undefined,\n isActive: groupState.isActive,\n lastHeartbeat: groupState.lastHeartbeat ?? undefined,\n state: groupState.state ?? undefined,\n };\n },\n\n getMemberId(): string | null {\n return groupState.memberId;\n },\n\n getAssignedPartitions(): PartitionAssignment[] {\n return [...groupState.assignedPartitions];\n },\n };\n\n return consumerCtx;\n}\n\n/**\n * Set OTel semantic convention attributes for producer\n */\nfunction setProducerAttributes(\n ctx: TraceContext,\n config: ProducerConfig,\n): void {\n ctx.setAttribute('messaging.system', config.system);\n ctx.setAttribute('messaging.operation', 'publish');\n ctx.setAttribute('messaging.destination.name', config.destination);\n\n // Set system-specific destination attribute\n if (config.system === 'kafka') {\n ctx.setAttribute('messaging.kafka.destination.topic', config.destination);\n }\n\n // Set custom attributes\n if (config.attributes) {\n setCustomAttributes(ctx,