UNPKG

@azure/event-hubs

Version:
389 lines • 18.8 kB
// Copyright (c) Microsoft Corporation. // Licensed under the MIT License. import { AbortError } from "@azure/abort-controller"; import { PartitionProcessor } from "./partitionProcessor.js"; import { isEventPosition, latestEventPosition } from "./eventPosition.js"; import { PumpManagerImpl } from "./pumpManager.js"; import { logErrorStackTrace, logger } from "./logger.js"; import { CloseReason } from "./models/public.js"; import { delayWithoutThrow } from "./util/delayWithoutThrow.js"; import { getRandomName } from "./util/utils.js"; import { StandardAbortMessage } from "@azure/core-amqp"; /** * Event Processor based applications consist of one or more instances of EventProcessor which have been * configured to consume events from the same Event Hub and consumer group. They balance the * workload across different instances by distributing the partitions to be processed among themselves. * They also allow the user to track progress when events are processed using checkpoints. * * A checkpoint is meant to represent the last successfully processed event by the user from a particular * partition of a consumer group in an Event Hub instance. * * @internal */ export class EventProcessor { _consumerGroup; _context; _subscriptionEventHandlers; _checkpointStore; _processorOptions; _pumpManager; _id; _isRunning = false; _loopTask; _abortController; /** * A specific partition to target. */ _processingTarget; /** * Determines which partitions to claim as part of load balancing. */ _loadBalancingStrategy; /** * The amount of time between load balancing attempts. */ _loopIntervalInMs; _eventHubName; _fullyQualifiedNamespace; /** * @param consumerGroup - The name of the consumer group from which you want to process events. * @param eventHubClient - An instance of `EventHubClient` that was created for the Event Hub instance. * @param PartitionProcessorClass - A user-provided class that extends the `PartitionProcessor` class. * This class will be responsible for processing and checkpointing events. * @param checkpointStore - An instance of `CheckpointStore`. See @azure/eventhubs-checkpointstore-blob for an implementation. * For production, choose an implementation that will store checkpoints and partition ownership details to a durable store. * @param options - A set of options to configure the Event Processor * - `maxBatchSize` : The max size of the batch of events passed each time to user code for processing. * - `maxWaitTimeInSeconds` : The maximum amount of time to wait to build up the requested message count before * passing the data to user code for processing. If not provided, it defaults to 60 seconds. */ constructor(_consumerGroup, _context, _subscriptionEventHandlers, _checkpointStore, options) { this._consumerGroup = _consumerGroup; this._context = _context; this._subscriptionEventHandlers = _subscriptionEventHandlers; this._checkpointStore = _checkpointStore; if (options.ownerId) { this._id = options.ownerId; logger.verbose(`Starting event processor with ID ${this._id}`); } else { this._id = getRandomName(); logger.verbose(`Starting event processor with autogenerated ID ${this._id}`); } this._eventHubName = this._context.config.entityPath; this._fullyQualifiedNamespace = this._context.config.host; this._processorOptions = options; this._pumpManager = options.pumpManager || new PumpManagerImpl(this._id, this._processorOptions); this._processingTarget = options.processingTarget; this._loopIntervalInMs = options.loopIntervalInMs; this._loadBalancingStrategy = options.loadBalancingStrategy; } /** * The unique identifier for the EventProcessor. */ get id() { return this._id; } _createPartitionOwnershipRequest(partitionOwnershipMap, partitionIdToClaim) { const previousPartitionOwnership = partitionOwnershipMap.get(partitionIdToClaim); const partitionOwnership = { ownerId: this._id, partitionId: partitionIdToClaim, fullyQualifiedNamespace: this._fullyQualifiedNamespace, consumerGroup: this._consumerGroup, eventHubName: this._eventHubName, etag: previousPartitionOwnership ? previousPartitionOwnership.etag : undefined, }; return partitionOwnership; } /* * Claim ownership of the given partition if it's available */ async _claimOwnership(ownershipRequest, abortSignal) { if (abortSignal.aborted) { logger.verbose(`[${this._id}] Subscription was closed before claiming ownership of ${ownershipRequest.partitionId}.`); return; } logger.info(`[${this._id}] Attempting to claim ownership of partition ${ownershipRequest.partitionId}.`); try { const claimedOwnerships = await this._checkpointStore.claimOwnership([ownershipRequest]); // can happen if the partition was claimed out from underneath us - we shouldn't // attempt to spin up a processor. if (!claimedOwnerships.length) { return; } logger.info(`[${this._id}] Successfully claimed ownership of partition ${ownershipRequest.partitionId}.`); await this._startPump(ownershipRequest.partitionId, abortSignal); } catch (err) { logger.warning(`[${this._id}] Failed to claim ownership of partition ${ownershipRequest.partitionId}`); logErrorStackTrace(err); await this._handleSubscriptionError(err); } } async _startPump(partitionId, abortSignal) { if (abortSignal.aborted) { logger.verbose(`[${this._id}] The subscription was closed before starting to read from ${partitionId}.`); return; } if (this._pumpManager.isReceivingFromPartition(partitionId)) { logger.verbose(`[${this._id}] There is already an active partitionPump for partition "${partitionId}", skipping pump creation.`); return; } logger.verbose(`[${this._id}] [${partitionId}] Calling user-provided PartitionProcessorFactory.`); const partitionProcessor = new PartitionProcessor(this._subscriptionEventHandlers, this._checkpointStore, { fullyQualifiedNamespace: this._fullyQualifiedNamespace, eventHubName: this._eventHubName, consumerGroup: this._consumerGroup, partitionId: partitionId, eventProcessorId: this._id, }); const eventPosition = await this._getStartingPosition(partitionId); await this._pumpManager.createPump(eventPosition, this._context, partitionProcessor, abortSignal); logger.verbose(`[${this._id}] PartitionPump created successfully.`); } async _getStartingPosition(partitionIdToClaim) { const availableCheckpoints = await this._checkpointStore.listCheckpoints(this._fullyQualifiedNamespace, this._eventHubName, this._consumerGroup); const validCheckpoints = availableCheckpoints.filter((chk) => chk.partitionId === partitionIdToClaim); if (validCheckpoints.length > 0) { return { offset: validCheckpoints[0].offset }; } logger.verbose(`No checkpoint found for partition ${partitionIdToClaim}. Looking for fallback.`); return getStartPosition(partitionIdToClaim, this._processorOptions.startPosition); } async _runLoopForSinglePartition(partitionId, abortSignal) { while (!abortSignal.aborted) { try { await this._startPump(partitionId, abortSignal); } catch (err) { logger.warning(`[${this._id}] An error occurred within the EventProcessor loop: ${err?.name}: ${err?.message}`); logErrorStackTrace(err); await this._handleSubscriptionError(err); } finally { // sleep for some time after which we can attempt to create a pump again. logger.verbose(`[${this._id}] Pausing the EventProcessor loop for ${this._loopIntervalInMs} ms.`); // swallow errors from delay since it's fine for delay to exit early await delayWithoutThrow(this._loopIntervalInMs, abortSignal); } } this._isRunning = false; } /** * Every loop to this method will result in this EventProcessor owning at most one new partition. * * The load is considered balanced when no active EventProcessor owns 2 partitions more than any other active * EventProcessor. Given that each invocation to this method results in ownership claim of at most one partition, * this algorithm converges gradually towards a steady state. * * When a new partition is claimed, this method is also responsible for starting a partition pump that creates an * EventHubConsumer for processing events from that partition. */ async _runLoopWithLoadBalancing(loadBalancingStrategy, abortSignal) { let cancelLoopResolver; // This provides a mechanism for exiting the loop early // if the subscription has had `close` called. const cancelLoopPromise = new Promise((resolve) => { cancelLoopResolver = resolve; if (abortSignal.aborted) { resolve(); return; } abortSignal.addEventListener("abort", resolve); }); // Periodically check if any partitions need to be claimed and claim them. while (!abortSignal.aborted) { const iterationStartTimeInMs = Date.now(); try { const { partitionIds } = await this._context.managementSession.getEventHubProperties({ abortSignal, }); await this._performLoadBalancing(loadBalancingStrategy, partitionIds, abortSignal); } catch (err) { logger.warning(`[${this._id}] An error occurred within the EventProcessor loop: ${err?.name}: ${err?.message}`); logErrorStackTrace(err); // Protect against the scenario where the user awaits on subscription.close() from inside processError. await Promise.race([this._handleSubscriptionError(err), cancelLoopPromise]); } finally { // Sleep for some time, then continue the loop. const iterationDeltaInMs = Date.now() - iterationStartTimeInMs; const delayDurationInMs = Math.max(this._loopIntervalInMs - iterationDeltaInMs, 0); logger.verbose(`[${this._id}] Pausing the EventProcessor loop for ${delayDurationInMs} ms.`); // Swallow the error since it's fine to exit early from the delay. await delayWithoutThrow(delayDurationInMs, abortSignal); } } if (cancelLoopResolver) { abortSignal.removeEventListener("abort", cancelLoopResolver); } this._isRunning = false; } async _performLoadBalancing(loadBalancingStrategy, partitionIds, abortSignal) { if (abortSignal.aborted) throw new AbortError(StandardAbortMessage); // Retrieve current partition ownership details from the datastore. const partitionOwnership = await this._checkpointStore.listOwnership(this._fullyQualifiedNamespace, this._eventHubName, this._consumerGroup); if (abortSignal.aborted) throw new AbortError(StandardAbortMessage); const { partitionOwnershipMap, partitionsToClaim } = computePartitionsToClaim({ id: this._id, isReceivingFromPartition: (partitionId) => this._pumpManager.isReceivingFromPartition(partitionId), loadBalancingStrategy, partitionIds, partitionOwnership, }); for (const partitionToClaim of partitionsToClaim) { const partitionOwnershipRequest = this._createPartitionOwnershipRequest(partitionOwnershipMap, partitionToClaim); await this._claimOwnership(partitionOwnershipRequest, abortSignal); } } /** * This is called when there are errors that are not specific to a partition (ex: load balancing) */ async _handleSubscriptionError(err) { // filter out any internal "expected" errors if (err.name === "AbortError") { return; } if (this._subscriptionEventHandlers.processError) { try { await this._subscriptionEventHandlers.processError(err, { fullyQualifiedNamespace: this._fullyQualifiedNamespace, eventHubName: this._eventHubName, consumerGroup: this._consumerGroup, partitionId: "", updateCheckpoint: async () => { /* no-op */ }, }); } catch (errorFromUser) { logger.verbose(`[${this._id}] An error was thrown from the user's processError handler: ${errorFromUser}`); } } } /** * Starts the `EventProcessor`. Based on the number of instances of `EventProcessor` that are running for the * same consumer group, the partitions are distributed among these instances to process events. * * For each partition, the user provided `PartitionProcessor` is instantiated. * * Subsequent calls to start will be ignored if this event processor is already running. * Calling `start()` after `stop()` is called will restart this event processor. * */ start() { if (this._isRunning) { logger.verbose(`[${this._id}] Attempted to start an already running EventProcessor.`); return; } this._isRunning = true; this._abortController = new AbortController(); logger.verbose(`[${this._id}] Starting an EventProcessor.`); if (this._processingTarget) { logger.verbose(`[${this._id}] Single partition target: ${this._processingTarget}`); this._loopTask = this._runLoopForSinglePartition(this._processingTarget, this._abortController.signal); } else { logger.verbose(`[${this._id}] Multiple partitions, using load balancer`); this._loopTask = this._runLoopWithLoadBalancing(this._loadBalancingStrategy, this._abortController.signal); } } isRunning() { return this._isRunning; } /** * Stops processing events for all partitions owned by this event processor. * All `PartitionProcessor` will be shutdown and any open resources will be closed. * * Subsequent calls to stop will be ignored if the event processor is not running. * */ async stop() { logger.verbose(`[${this._id}] Stopping an EventProcessor.`); // cancel the event processor loop this._abortController?.abort(); try { // remove all existing pumps await this._pumpManager.removeAllPumps(CloseReason.Shutdown); // waits for the event processor loop to complete // will complete immediately if _loopTask is undefined if (this._loopTask) { await this._loopTask; } } catch (err) { logger.verbose(`[${this._id}] An error occurred while stopping the EventProcessor: ${err}`); } finally { logger.verbose(`[${this._id}] EventProcessor stopped.`); } if (this._processingTarget) { logger.verbose(`[${this._id}] No partitions owned, skipping abandoning.`); } else { await this.abandonPartitionOwnerships(); } } async abandonPartitionOwnerships() { logger.verbose(`[${this._id}] Abandoning owned partitions`); const allOwnerships = await this._checkpointStore.listOwnership(this._fullyQualifiedNamespace, this._eventHubName, this._consumerGroup); const ourOwnerships = allOwnerships.filter((ownership) => ownership.ownerId === this._id); // unclaim any partitions that we currently own for (const ownership of ourOwnerships) { ownership.ownerId = ""; } return this._checkpointStore.claimOwnership(ourOwnerships); } } function isAbandoned(ownership) { return ownership.ownerId === ""; } function getStartPosition(partitionIdToClaim, startPositions) { if (startPositions == null) { return latestEventPosition; } if (isEventPosition(startPositions)) { return startPositions; } const startPosition = startPositions[partitionIdToClaim]; if (startPosition == null) { return latestEventPosition; } return startPosition; } function computePartitionsToClaim(inputs) { const { partitionOwnership, id, isReceivingFromPartition, loadBalancingStrategy, partitionIds } = inputs; const partitionOwnershipMap = new Map(); const nonAbandonedPartitionOwnershipMap = new Map(); const partitionsToRenew = []; // Separate abandoned ownerships from claimed ownerships. // We only want to pass active partition ownerships to the // load balancer, but we need to hold onto the abandoned // partition ownerships because we need the etag to claim them. for (const ownership of partitionOwnership) { partitionOwnershipMap.set(ownership.partitionId, ownership); if (!isAbandoned(ownership)) { nonAbandonedPartitionOwnershipMap.set(ownership.partitionId, ownership); } if (ownership.ownerId === id && isReceivingFromPartition(ownership.partitionId)) { partitionsToRenew.push(ownership.partitionId); } } // Pass the list of all the partition ids and the collection of claimed partition ownerships // to the load balance strategy. // The load balancing strategy only needs to know the full list of partitions, // and which of those are currently claimed. // Since abandoned partitions are no longer claimed, we exclude them. const partitionsToClaim = loadBalancingStrategy.getPartitionsToClaim(id, nonAbandonedPartitionOwnershipMap, partitionIds); partitionsToClaim.push(...partitionsToRenew); return { partitionsToClaim: new Set(partitionsToClaim), partitionOwnershipMap, }; } //# sourceMappingURL=eventProcessor.js.map