autotel
Version:
Write Once, Observe Anywhere
865 lines (861 loc) • 27.1 kB
text/typescript
import { Attributes, SpanContext, Link, AttributeValue } from '@opentelemetry/api';
import { T as TraceContext } from './trace-context-t5X1AP-e.cjs';
/**
* Messaging helpers for event-driven architectures
*
* Provides specialized tracing for message producers and consumers
* with automatic context propagation, link extraction, and OTel
* semantic convention compliance.
*
* @example Producer
* ```typescript
* import { traceProducer } from 'autotel/messaging';
*
* export const publishEvent = traceProducer({
* system: 'kafka',
* destination: 'user-events',
* })(ctx => async (event: UserEvent) => {
* const headers = ctx.getTraceHeaders();
* await producer.send({
* topic: 'user-events',
* messages: [{ value: JSON.stringify(event), headers }]
* });
* });
* ```
*
* @example Consumer
* ```typescript
* import { traceConsumer } from 'autotel/messaging';
*
* export const processEvents = traceConsumer({
* system: 'kafka',
* destination: 'user-events',
* consumerGroup: 'event-processor',
* batchMode: true,
* })(ctx => async (messages: KafkaMessage[]) => {
* // Links to producer spans are automatically extracted
* for (const msg of messages) {
* await processMessage(msg);
* }
* });
* ```
*
* @module
*/
/**
* Supported messaging systems
*/
type MessagingSystem = 'kafka' | 'rabbitmq' | 'sqs' | 'sns' | 'pubsub' | 'activemq' | 'azure_servicebus' | 'eventhubs' | (string & {});
/**
* Messaging operation types
*/
type MessagingOperation = 'publish' | 'receive' | 'process' | 'settle';
/**
* Configuration for producer tracing
*/
interface ProducerConfig {
/** Messaging system (kafka, rabbitmq, sqs, etc.) */
system: MessagingSystem;
/** Destination name (topic/queue) */
destination: string;
/** Extract message ID from arguments */
messageIdFrom?: string | ((args: unknown[]) => string | undefined);
/** Extract partition from arguments (Kafka-specific) */
partitionFrom?: string | ((args: unknown[]) => number | undefined);
/** Extract message key from arguments (Kafka-specific) */
keyFrom?: string | ((args: unknown[]) => string | undefined);
/** Additional attributes to set on span */
attributes?: Attributes;
/** Propagate baggage in message headers */
propagateBaggage?: boolean;
/** Callback before sending (for custom attributes) */
beforeSend?: (ctx: ProducerContext, args: unknown[]) => void;
/** Callback on error */
onError?: (error: Error, ctx: ProducerContext) => void;
/**
* Hook to add system-specific attributes
*
* Use this to add attributes for messaging systems not explicitly supported
* (e.g., NATS, Temporal, Cloudflare Queues, Redis Streams).
*
* @example NATS attributes
* ```typescript
* customAttributes: (ctx, args) => ({
* 'nats.subject': args[0].subject,
* 'nats.reply_to': args[0].replyTo,
* 'nats.stream': 'orders',
* })
* ```
*
* @example Temporal attributes
* ```typescript
* customAttributes: (ctx, args) => ({
* 'temporal.workflow_id': args[0].workflowId,
* 'temporal.run_id': args[0].runId,
* 'temporal.task_queue': 'orders-queue',
* })
* ```
*/
customAttributes?: (ctx: ProducerContext, args: unknown[]) => Record<string, AttributeValue>;
/**
* Hook for custom header injection (beyond W3C traceparent)
*
* Use this to inject headers for systems that use non-standard
* context propagation formats.
*
* @example Datadog headers
* ```typescript
* customHeaders: (ctx) => ({
* 'x-datadog-trace-id': ctx.getTraceId(),
* 'x-datadog-parent-id': ctx.getSpanId(),
* })
* ```
*
* @example Custom correlation headers
* ```typescript
* customHeaders: (ctx) => ({
* 'x-correlation-id': correlationId,
* 'x-request-id': requestId,
* })
* ```
*/
customHeaders?: (ctx: ProducerContext) => Record<string, string>;
}
/**
* Configuration for consumer tracing
*/
interface ConsumerConfig {
/** Messaging system (kafka, rabbitmq, sqs, etc.) */
system: MessagingSystem;
/** Destination name (topic/queue) */
destination: string;
/** Consumer group name */
consumerGroup?: string;
/** Extract headers from message for link creation */
headersFrom?: string | ((msg: unknown) => Record<string, string> | undefined);
/** Enable batch mode - extract links from all messages */
batchMode?: boolean;
/** Extract baggage from message headers */
extractBaggage?: boolean;
/** Additional attributes to set on span */
attributes?: Attributes;
/** Consumer lag metrics extraction */
lagMetrics?: LagMetricsConfig;
/** Callback when message goes to DLQ */
onDLQ?: (ctx: ConsumerContext, reason: string) => void;
/** Callback on error */
onError?: (error: Error, ctx: ConsumerContext) => void;
/**
* Message ordering configuration
*
* Enable sequence tracking, out-of-order detection, and deduplication.
*
* @example Kafka ordering
* ```typescript
* ordering: {
* sequenceFrom: (msg) => msg.offset,
* partitionKeyFrom: (msg) => msg.key,
* detectOutOfOrder: true,
* onOutOfOrder: (ctx, info) => {
* console.warn(`Out of order: expected ${info.expectedSequence}, got ${info.currentSequence}`);
* },
* }
* ```
*/
ordering?: OrderingConfig;
/**
* Consumer group tracking configuration
*
* Enables observability of consumer group state, including membership,
* partition assignments, and rebalancing events.
*
* @example Kafka consumer group tracking
* ```typescript
* consumerGroupTracking: {
* memberId: () => consumer.memberId,
* groupInstanceId: process.env.KAFKA_GROUP_INSTANCE_ID,
* onRebalance: (ctx, event) => {
* if (event.type === 'revoked') {
* logger.warn('Partitions revoked', event.partitions);
* }
* },
* trackPartitionLag: true,
* }
* ```
*/
consumerGroupTracking?: ConsumerGroupTrackingConfig;
/**
* Hook to add system-specific attributes
*
* Use this to add attributes for messaging systems not explicitly supported
* (e.g., NATS, Temporal, Cloudflare Queues, Redis Streams).
*
* @example NATS consumer attributes
* ```typescript
* customAttributes: (ctx, msg) => ({
* 'nats.subject': msg.subject,
* 'nats.stream': msg.info?.stream,
* 'nats.consumer': msg.info?.consumer,
* 'nats.delivered_count': msg.info?.redeliveryCount,
* })
* ```
*
* @example Cloudflare Queue attributes
* ```typescript
* customAttributes: (ctx, msg) => ({
* 'cloudflare.queue_id': msg.id,
* 'cloudflare.timestamp_ms': msg.timestamp.getTime(),
* 'cloudflare.attempts': msg.attempts,
* })
* ```
*/
customAttributes?: (ctx: ConsumerContext, msg: unknown) => Record<string, AttributeValue>;
/**
* Hook for custom context extraction (beyond W3C traceparent)
*
* Use this to extract parent span context from systems that use
* non-standard header formats.
*
* @example Datadog context extraction
* ```typescript
* customContextExtractor: (headers) => {
* const traceId = headers['x-datadog-trace-id'];
* const spanId = headers['x-datadog-parent-id'];
* if (!traceId || !spanId) return null;
* return {
* traceId: traceIdToOtel(traceId),
* spanId: spanIdToOtel(spanId),
* traceFlags: TraceFlags.SAMPLED,
* };
* }
* ```
*
* @example B3 format extraction
* ```typescript
* customContextExtractor: (headers) => {
* const traceId = headers['x-b3-traceid'];
* const spanId = headers['x-b3-spanid'];
* const sampled = headers['x-b3-sampled'] === '1';
* if (!traceId || !spanId) return null;
* return {
* traceId,
* spanId,
* traceFlags: sampled ? TraceFlags.SAMPLED : TraceFlags.NONE,
* };
* }
* ```
*/
customContextExtractor?: (headers: Record<string, string>) => SpanContext | null;
}
/**
* Configuration for consumer lag metrics
*/
interface LagMetricsConfig {
/** Get current message offset */
getCurrentOffset?: (msg: unknown) => number | undefined;
/** Get end offset (high watermark) - can be async */
getEndOffset?: () => number | Promise<number>;
/** Get committed offset - can be async */
getCommittedOffset?: () => number | Promise<number>;
/** Get partition from message */
getPartition?: (msg: unknown) => number | undefined;
}
/**
* Configuration for message ordering tracking
*/
interface OrderingConfig {
/**
* Extract sequence number from message
*
* Sequence numbers enable out-of-order detection and gap analysis.
*
* @example Kafka offset
* ```typescript
* sequenceFrom: (msg) => msg.offset
* ```
*/
sequenceFrom?: (msg: unknown) => number | undefined;
/**
* Extract partition key from message
*
* Partition keys determine message ordering in Kafka.
*
* @example Message key
* ```typescript
* partitionKeyFrom: (msg) => msg.key
* ```
*/
partitionKeyFrom?: (msg: unknown) => string | undefined;
/**
* Extract message ID for deduplication
*
* Used to detect duplicate messages.
*
* @example Idempotency key
* ```typescript
* messageIdFrom: (msg) => msg.headers['idempotency-key']
* ```
*/
messageIdFrom?: (msg: unknown) => string | undefined;
/**
* Enable out-of-order detection
*
* Tracks sequence numbers per partition and detects when messages
* arrive out of order.
*
* @default false
*/
detectOutOfOrder?: boolean;
/**
* Enable deduplication detection
*
* Tracks message IDs and detects duplicates within the window.
*
* @default false
*/
detectDuplicates?: boolean;
/**
* Deduplication window size (number of message IDs to track)
*
* @default 1000
*/
deduplicationWindowSize?: number;
/**
* Callback when out-of-order message detected
*/
onOutOfOrder?: (ctx: ConsumerContext, info: OutOfOrderInfo) => void;
/**
* Callback when duplicate message detected
*/
onDuplicate?: (ctx: ConsumerContext, messageId: string) => void;
}
/**
* Information about out-of-order message
*/
interface OutOfOrderInfo {
/** Current sequence number */
currentSequence: number;
/** Expected sequence number */
expectedSequence: number;
/** Partition key (if available) */
partitionKey?: string;
/** Gap size (positive = gap, negative = out of order) */
gap: number;
}
/**
* Configuration for consumer group tracking
*
* Enables observability of consumer group state, including membership,
* partition assignments, and rebalancing events.
*
* @example Kafka consumer group tracking
* ```typescript
* consumerGroupTracking: {
* memberId: consumer.memberId,
* groupInstanceId: process.env.CONSUMER_ID,
* onRebalance: (ctx, event) => {
* if (event.type === 'assigned') {
* console.log(`Assigned partitions: ${event.partitions}`);
* }
* },
* }
* ```
*/
interface ConsumerGroupTrackingConfig {
/**
* Consumer member ID
*
* Unique identifier assigned by the broker to this consumer.
*/
memberId?: string | (() => string | undefined);
/**
* Static group instance ID (for static membership)
*
* If set, enables static group membership which prevents
* rebalances when consumers restart.
*/
groupInstanceId?: string | (() => string | undefined);
/**
* Callback when rebalance occurs
*/
onRebalance?: (ctx: ConsumerContext, event: RebalanceEvent) => void;
/**
* Callback when partitions are assigned
*/
onPartitionsAssigned?: (ctx: ConsumerContext, partitions: PartitionAssignment[]) => void;
/**
* Callback when partitions are revoked
*/
onPartitionsRevoked?: (ctx: ConsumerContext, partitions: PartitionAssignment[]) => void;
/**
* Track consumer lag per partition
*
* @default true
*/
trackPartitionLag?: boolean;
/**
* Track consumer heartbeat health
*
* @default false
*/
trackHeartbeat?: boolean;
/**
* Heartbeat interval in milliseconds (for health tracking)
*/
heartbeatIntervalMs?: number;
}
/**
* Rebalance event types
*/
type RebalanceType = 'assigned' | 'revoked' | 'lost';
/**
* Rebalance event information
*/
interface RebalanceEvent {
/** Type of rebalance event */
type: RebalanceType;
/** Partitions affected by the rebalance */
partitions: PartitionAssignment[];
/** Timestamp of the rebalance event */
timestamp: number;
/** Generation ID (increments on each rebalance) */
generation?: number;
/** Consumer member ID */
memberId?: string;
/** Reason for the rebalance (if available) */
reason?: string;
}
/**
* Partition assignment information
*/
interface PartitionAssignment {
/** Topic name */
topic: string;
/** Partition number */
partition: number;
/** Initial offset (if available) */
offset?: number;
/** Metadata (if available) */
metadata?: string;
}
/**
* Consumer group state snapshot
*/
interface ConsumerGroupState {
/** Consumer group name */
groupId: string;
/** Consumer member ID */
memberId?: string;
/** Static instance ID (if using static membership) */
groupInstanceId?: string;
/** Currently assigned partitions */
assignedPartitions: PartitionAssignment[];
/** Group generation ID */
generation?: number;
/** Whether the consumer is currently active */
isActive: boolean;
/** Last heartbeat timestamp */
lastHeartbeat?: number;
/** Consumer state (stable, preparing_rebalance, completing_rebalance, dead) */
state?: 'stable' | 'preparing_rebalance' | 'completing_rebalance' | 'dead' | 'empty';
}
/**
* Partition lag information
*/
interface PartitionLag {
/** Topic name */
topic: string;
/** Partition number */
partition: number;
/** Current consumer offset */
currentOffset: number;
/** End offset (high watermark) */
endOffset: number;
/** Calculated lag */
lag: number;
/** Timestamp of measurement */
timestamp: number;
}
/**
* DLQ failure category types
*/
type DLQReasonCategory = 'validation' | 'processing' | 'timeout' | 'poison' | 'unknown';
/**
* Options for enhanced DLQ recording
*/
interface DLQOptions {
/**
* Automatically link to the producer span context
*
* When true, creates a span link from the DLQ event back to
* the original producer span for correlation.
*
* @default true
*/
linkToProducer?: boolean;
/**
* Category of the failure that caused DLQ routing
*
* - validation: Message failed schema/format validation
* - processing: Business logic error during processing
* - timeout: Processing exceeded allowed time
* - poison: Message causes repeated failures (poison pill)
* - unknown: Uncategorized failure
*/
reasonCategory?: DLQReasonCategory;
/**
* Number of processing attempts before DLQ routing
*/
attemptCount?: number;
/**
* The original error that caused DLQ routing
*
* Error details are recorded as span attributes for debugging.
*/
originalError?: Error;
/**
* Additional metadata to record with the DLQ event
*/
metadata?: Record<string, string | number | boolean>;
}
/**
* Options for recording DLQ replay
*/
interface DLQReplayOptions {
/**
* Original span context from DLQ message
*
* If provided, creates a span link to correlate with the original failure.
*/
originalDLQSpanContext?: SpanContext;
/**
* Time spent in DLQ before replay (milliseconds)
*/
dlqDwellTimeMs?: number;
/**
* Retry attempt number for this replay
*/
replayAttempt?: number;
}
/**
* Extended trace context for producers with header injection
*/
interface ProducerContext extends TraceContext {
/**
* Get W3C trace context headers to inject into message
*
* @returns Headers object with traceparent and optionally tracestate
*
* @example
* ```typescript
* const headers = ctx.getTraceHeaders();
* await producer.send({
* topic: 'events',
* messages: [{ value: data, headers }]
* });
* ```
*/
getTraceHeaders(): {
traceparent: string;
tracestate?: string;
};
/**
* Get all propagation headers including baggage if enabled
*
* @returns Headers object with all W3C trace context headers
*/
getAllPropagationHeaders(): Record<string, string>;
/**
* Get all headers including custom headers from customHeaders hook
*
* This combines W3C trace context headers, baggage (if enabled),
* and any custom headers defined via the customHeaders hook.
*
* @returns Combined headers object
*
* @example
* ```typescript
* const headers = ctx.getFullHeaders();
* // Contains: traceparent, tracestate, baggage (if enabled), and custom headers
* await producer.send({ topic, messages: [{ value, headers }] });
* ```
*/
getFullHeaders(): Record<string, string>;
}
/**
* Extended trace context for consumers
*/
interface ConsumerContext extends TraceContext {
/**
* Record that a message is being sent to DLQ
*
* Enhanced with auto-linking to producer span, failure categorization,
* and detailed error recording for comprehensive DLQ observability.
*
* @param reason - Human-readable reason for DLQ routing
* @param dlqNameOrOptions - DLQ name (string) or enhanced options object
* @param options - Enhanced DLQ options (when second param is dlqName)
*
* @example Basic usage (backwards compatible)
* ```typescript
* ctx.recordDLQ('Schema validation failed', 'orders-dlq');
* ```
*
* @example Enhanced usage with options
* ```typescript
* ctx.recordDLQ('Invalid order total', 'orders-dlq', {
* reasonCategory: 'validation',
* attemptCount: 3,
* originalError: error,
* linkToProducer: true, // Auto-links to producer span
* });
* ```
*
* @example Using options object as second param
* ```typescript
* ctx.recordDLQ('Processing timeout', {
* reasonCategory: 'timeout',
* attemptCount: 5,
* metadata: { processingTimeMs: 30000 },
* });
* ```
*/
recordDLQ(reason: string, dlqName?: string, options?: DLQOptions): void;
recordDLQ(reason: string, options?: DLQOptions): void;
/**
* Record replay of a message from DLQ
*
* Use this when processing a message that was replayed from the DLQ
* to create links for correlation and track replay metrics.
*
* @param options - Replay tracking options
*
* @example
* ```typescript
* export const processReplay = traceConsumer({
* system: 'kafka',
* destination: 'orders-dlq-replay',
* })(ctx => async (message) => {
* ctx.recordReplay({
* originalDLQSpanContext: extractSpanContext(message.headers),
* dlqDwellTimeMs: Date.now() - message.timestamp,
* replayAttempt: message.replayCount,
* });
* await processOrder(message);
* });
* ```
*/
recordReplay(options?: DLQReplayOptions): void;
/**
* Record retry attempt
*
* @param attemptNumber - Current retry attempt (1-based)
* @param maxAttempts - Maximum retry attempts
*/
recordRetry(attemptNumber: number, maxAttempts?: number): void;
/**
* Get the producer span context links extracted from message headers
*
* Useful for accessing the producer span context when implementing
* custom DLQ or retry logic.
*
* @returns Array of span links extracted from the message, or empty array
*/
getProducerLinks(): Link[];
/**
* Check if the current message is a duplicate
*
* @returns True if the message has been seen before
*/
isDuplicate(): boolean;
/**
* Check if the current message arrived out of order
*
* @returns Out of order info, or null if in order
*/
getOutOfOrderInfo(): OutOfOrderInfo | null;
/**
* Get current sequence number
*
* @returns The sequence number, or null if not configured
*/
getSequenceNumber(): number | null;
/**
* Get partition key
*
* @returns The partition key, or null if not configured
*/
getPartitionKey(): string | null;
/**
* Record a rebalance event
*
* Call this when the consumer group undergoes a rebalance to capture
* the event as a span event with partition assignment details.
*
* @param event - The rebalance event details
*
* @example
* ```typescript
* consumer.on('rebalance', (event) => {
* ctx.recordRebalance({
* type: event.type,
* partitions: event.assignment,
* generation: event.generationId,
* timestamp: Date.now(),
* });
* });
* ```
*/
recordRebalance(event: RebalanceEvent): void;
/**
* Record a heartbeat event
*
* Call this on each heartbeat to track consumer health.
*
* @param healthy - Whether the heartbeat was successful
* @param latencyMs - Optional latency of the heartbeat in milliseconds
*/
recordHeartbeat(healthy: boolean, latencyMs?: number): void;
/**
* Record partition lag for a specific partition
*
* @param lag - The partition lag information
*/
recordPartitionLag(lag: PartitionLag): void;
/**
* Get the current consumer group state
*
* @returns The current consumer group state, or null if not configured
*/
getConsumerGroupState(): ConsumerGroupState | null;
/**
* Get the consumer member ID
*
* @returns The member ID, or null if not available
*/
getMemberId(): string | null;
/**
* Get the current partition assignments
*
* @returns Array of assigned partitions, or empty array if none
*/
getAssignedPartitions(): PartitionAssignment[];
}
/**
* Create a traced message producer function
*
* Sets SpanKind.PRODUCER, OTel messaging semantic attributes,
* and provides context injection helpers.
*
* @param config - Producer configuration
* @returns Factory function that wraps your producer logic
*
* @example Kafka producer
* ```typescript
* export const publishUserEvent = traceProducer({
* system: 'kafka',
* destination: 'user-events',
* messageIdFrom: (args) => args[0]?.eventId,
* })(ctx => async (event: UserEvent) => {
* const headers = ctx.getTraceHeaders();
* await producer.send({
* topic: 'user-events',
* messages: [{
* key: event.userId,
* value: JSON.stringify(event),
* headers,
* }]
* });
* });
* ```
*
* @example SQS producer
* ```typescript
* export const sendToSQS = traceProducer({
* system: 'sqs',
* destination: 'orders-queue',
* })(ctx => async (order: Order) => {
* const headers = ctx.getAllPropagationHeaders();
* await sqs.sendMessage({
* QueueUrl: QUEUE_URL,
* MessageBody: JSON.stringify(order),
* MessageAttributes: headersToSQSAttributes(headers),
* });
* });
* ```
*/
declare function traceProducer<TArgs extends unknown[], TReturn>(config: ProducerConfig): (fnFactory: (ctx: ProducerContext) => (...args: TArgs) => Promise<TReturn>) => ((...args: TArgs) => Promise<TReturn>);
/**
* Create a traced message consumer function
*
* Sets SpanKind.CONSUMER, OTel messaging semantic attributes,
* automatically extracts links from producer trace headers,
* and provides DLQ/retry recording helpers.
*
* @param config - Consumer configuration
* @returns Factory function that wraps your consumer logic
*
* @example Kafka consumer (single message)
* ```typescript
* export const processUserEvent = traceConsumer({
* system: 'kafka',
* destination: 'user-events',
* consumerGroup: 'event-processor',
* headersFrom: (msg) => msg.headers,
* })(ctx => async (message: KafkaMessage) => {
* // Link to producer span is automatically created
* const event = JSON.parse(message.value.toString());
* await processEvent(event);
* });
* ```
*
* @example Kafka consumer (batch mode)
* ```typescript
* export const processUserEventBatch = traceConsumer({
* system: 'kafka',
* destination: 'user-events',
* consumerGroup: 'event-processor',
* batchMode: true,
* headersFrom: (msg) => msg.headers,
* lagMetrics: {
* getCurrentOffset: (msg) => msg.offset,
* getEndOffset: () => consumer.getHighWatermark(),
* getPartition: (msg) => msg.partition,
* },
* })(ctx => async (messages: KafkaMessage[]) => {
* // Links to all producer spans are automatically created
* for (const msg of messages) {
* await processEvent(JSON.parse(msg.value.toString()));
* }
* });
* ```
*
* @example SQS consumer with DLQ handling
* ```typescript
* export const processSQSMessage = traceConsumer({
* system: 'sqs',
* destination: 'orders-queue',
* headersFrom: (msg) => sqsAttributesToHeaders(msg.MessageAttributes),
* onDLQ: (ctx, reason) => {
* ctx.recordDLQ(reason, 'orders-dlq');
* },
* })(ctx => async (message: SQSMessage) => {
* try {
* await processOrder(JSON.parse(message.Body));
* } catch (error) {
* if (message.ApproximateReceiveCount > 3) {
* ctx.recordDLQ(error.message);
* throw error;
* }
* ctx.recordRetry(message.ApproximateReceiveCount, 3);
* throw error;
* }
* });
* ```
*/
declare function traceConsumer<TArgs extends unknown[], TReturn>(config: ConsumerConfig): (fnFactory: (ctx: ConsumerContext) => (...args: TArgs) => Promise<TReturn>) => ((...args: TArgs) => Promise<TReturn>);
/**
* Clear sequence tracking state (useful for testing)
*/
declare function clearOrderingState(): void;
export { type ConsumerConfig, type ConsumerContext, type ConsumerGroupState, type ConsumerGroupTrackingConfig, type DLQOptions, type DLQReasonCategory, type DLQReplayOptions, type LagMetricsConfig, type MessagingOperation, type MessagingSystem, type OrderingConfig, type OutOfOrderInfo, type PartitionAssignment, type PartitionLag, type ProducerConfig, type ProducerContext, type RebalanceEvent, type RebalanceType, clearOrderingState, traceConsumer, traceProducer };