UNPKG

kafkajs

Version:

A modern Apache Kafka client for node.js

760 lines (663 loc) 24.5 kB
const sleep = require('../utils/sleep') const websiteUrl = require('../utils/websiteUrl') const arrayDiff = require('../utils/arrayDiff') const createRetry = require('../retry') const sharedPromiseTo = require('../utils/sharedPromiseTo') const OffsetManager = require('./offsetManager') const Batch = require('./batch') const SeekOffsets = require('./seekOffsets') const SubscriptionState = require('./subscriptionState') const { events: { GROUP_JOIN, HEARTBEAT, CONNECT, RECEIVED_UNSUBSCRIBED_TOPICS }, } = require('./instrumentationEvents') const { MemberAssignment } = require('./assignerProtocol') const { KafkaJSError, KafkaJSNonRetriableError, KafkaJSStaleTopicMetadataAssignment, isRebalancing, } = require('../errors') const { keys } = Object const STALE_METADATA_ERRORS = [ 'LEADER_NOT_AVAILABLE', // Fetch before v9 uses NOT_LEADER_FOR_PARTITION 'NOT_LEADER_FOR_PARTITION', // Fetch after v9 uses {FENCED,UNKNOWN}_LEADER_EPOCH 'FENCED_LEADER_EPOCH', 'UNKNOWN_LEADER_EPOCH', 'UNKNOWN_TOPIC_OR_PARTITION', ] const PRIVATE = { JOIN: Symbol('private:ConsumerGroup:join'), SYNC: Symbol('private:ConsumerGroup:sync'), SHARED_HEARTBEAT: Symbol('private:ConsumerGroup:sharedHeartbeat'), } module.exports = class ConsumerGroup { /** * @param {object} options * @param {import('../../types').RetryOptions} options.retry * @param {import('../../types').Cluster} options.cluster * @param {string} options.groupId * @param {string[]} options.topics * @param {Record<string, { fromBeginning?: boolean }>} options.topicConfigurations * @param {import('../../types').Logger} options.logger * @param {import('../instrumentation/emitter')} options.instrumentationEmitter * @param {import('../../types').Assigner[]} options.assigners * @param {number} options.sessionTimeout * @param {number} options.rebalanceTimeout * @param {number} options.maxBytesPerPartition * @param {number} options.minBytes * @param {number} options.maxBytes * @param {number} options.maxWaitTimeInMs * @param {boolean} options.autoCommit * @param {number} options.autoCommitInterval * @param {number} options.autoCommitThreshold * @param {number} options.isolationLevel * @param {string} options.rackId * @param {number} options.metadataMaxAge */ constructor({ retry, cluster, groupId, topics, topicConfigurations, logger, instrumentationEmitter, assigners, sessionTimeout, rebalanceTimeout, maxBytesPerPartition, minBytes, maxBytes, maxWaitTimeInMs, autoCommit, autoCommitInterval, autoCommitThreshold, isolationLevel, rackId, metadataMaxAge, }) { /** @type {import("../../types").Cluster} */ this.cluster = cluster this.groupId = groupId this.topics = topics this.topicsSubscribed = topics this.topicConfigurations = topicConfigurations this.logger = logger.namespace('ConsumerGroup') this.instrumentationEmitter = instrumentationEmitter this.retrier = createRetry(Object.assign({}, retry)) this.assigners = assigners this.sessionTimeout = sessionTimeout this.rebalanceTimeout = rebalanceTimeout this.maxBytesPerPartition = maxBytesPerPartition this.minBytes = minBytes this.maxBytes = maxBytes this.maxWaitTime = maxWaitTimeInMs this.autoCommit = autoCommit this.autoCommitInterval = autoCommitInterval this.autoCommitThreshold = autoCommitThreshold this.isolationLevel = isolationLevel this.rackId = rackId this.metadataMaxAge = metadataMaxAge this.seekOffset = new SeekOffsets() this.coordinator = null this.generationId = null this.leaderId = null this.memberId = null this.members = null this.groupProtocol = null this.partitionsPerSubscribedTopic = null /** * Preferred read replica per topic and partition * * Each of the partitions tracks the preferred read replica (`nodeId`) and a timestamp * until when that preference is valid. * * @type {{[topicName: string]: {[partition: number]: {nodeId: number, expireAt: number}}}} */ this.preferredReadReplicasPerTopicPartition = {} this.offsetManager = null this.subscriptionState = new SubscriptionState() this.lastRequest = Date.now() this[PRIVATE.SHARED_HEARTBEAT] = sharedPromiseTo(async ({ interval }) => { const { groupId, generationId, memberId } = this const now = Date.now() if (memberId && now >= this.lastRequest + interval) { const payload = { groupId, memberId, groupGenerationId: generationId, } await this.coordinator.heartbeat(payload) this.instrumentationEmitter.emit(HEARTBEAT, payload) this.lastRequest = Date.now() } }) } isLeader() { return this.leaderId && this.memberId === this.leaderId } getNodeIds() { return this.cluster.getNodeIds() } async connect() { await this.cluster.connect() this.instrumentationEmitter.emit(CONNECT) await this.cluster.refreshMetadataIfNecessary() } async [PRIVATE.JOIN]() { const { groupId, sessionTimeout, rebalanceTimeout } = this this.coordinator = await this.cluster.findGroupCoordinator({ groupId }) const groupData = await this.coordinator.joinGroup({ groupId, sessionTimeout, rebalanceTimeout, memberId: this.memberId || '', groupProtocols: this.assigners.map(assigner => assigner.protocol({ topics: this.topicsSubscribed, }) ), }) this.generationId = groupData.generationId this.leaderId = groupData.leaderId this.memberId = groupData.memberId this.members = groupData.members this.groupProtocol = groupData.groupProtocol } async leave() { const { groupId, memberId } = this if (memberId) { await this.coordinator.leaveGroup({ groupId, memberId }) this.memberId = null } } async [PRIVATE.SYNC]() { let assignment = [] const { groupId, generationId, memberId, members, groupProtocol, topics, topicsSubscribed, coordinator, } = this if (this.isLeader()) { this.logger.debug('Chosen as group leader', { groupId, generationId, memberId, topics }) const assigner = this.assigners.find(({ name }) => name === groupProtocol) if (!assigner) { throw new KafkaJSNonRetriableError( `Unsupported partition assigner "${groupProtocol}", the assigner wasn't found in the assigners list` ) } await this.cluster.refreshMetadata() assignment = await assigner.assign({ members, topics: topicsSubscribed }) this.logger.debug('Group assignment', { groupId, generationId, groupProtocol, assignment, topics: topicsSubscribed, }) } // Keep track of the partitions for the subscribed topics this.partitionsPerSubscribedTopic = this.generatePartitionsPerSubscribedTopic() const { memberAssignment } = await this.coordinator.syncGroup({ groupId, generationId, memberId, groupAssignment: assignment, }) const decodedMemberAssignment = MemberAssignment.decode(memberAssignment) const decodedAssignment = decodedMemberAssignment != null ? decodedMemberAssignment.assignment : {} this.logger.debug('Received assignment', { groupId, generationId, memberId, memberAssignment: decodedAssignment, }) const assignedTopics = keys(decodedAssignment) const topicsNotSubscribed = arrayDiff(assignedTopics, topicsSubscribed) if (topicsNotSubscribed.length > 0) { const payload = { groupId, generationId, memberId, assignedTopics, topicsSubscribed, topicsNotSubscribed, } this.instrumentationEmitter.emit(RECEIVED_UNSUBSCRIBED_TOPICS, payload) this.logger.warn('Consumer group received unsubscribed topics', { ...payload, helpUrl: websiteUrl( 'docs/faq', 'why-am-i-receiving-messages-for-topics-i-m-not-subscribed-to' ), }) } // Remove unsubscribed topics from the list const safeAssignment = arrayDiff(assignedTopics, topicsNotSubscribed) const currentMemberAssignment = safeAssignment.map(topic => ({ topic, partitions: decodedAssignment[topic], })) // Check if the consumer is aware of all assigned partitions for (const assignment of currentMemberAssignment) { const { topic, partitions: assignedPartitions } = assignment const knownPartitions = this.partitionsPerSubscribedTopic.get(topic) const isAwareOfAllAssignedPartitions = assignedPartitions.every(partition => knownPartitions.includes(partition) ) if (!isAwareOfAllAssignedPartitions) { this.logger.warn('Consumer is not aware of all assigned partitions, refreshing metadata', { groupId, generationId, memberId, topic, knownPartitions, assignedPartitions, }) // If the consumer is not aware of all assigned partitions, refresh metadata // and update the list of partitions per subscribed topic. It's enough to perform // this operation once since refresh metadata will update metadata for all topics await this.cluster.refreshMetadata() this.partitionsPerSubscribedTopic = this.generatePartitionsPerSubscribedTopic() break } } this.topics = currentMemberAssignment.map(({ topic }) => topic) this.subscriptionState.assign(currentMemberAssignment) this.offsetManager = new OffsetManager({ cluster: this.cluster, topicConfigurations: this.topicConfigurations, instrumentationEmitter: this.instrumentationEmitter, memberAssignment: currentMemberAssignment.reduce( (partitionsByTopic, { topic, partitions }) => ({ ...partitionsByTopic, [topic]: partitions, }), {} ), autoCommit: this.autoCommit, autoCommitInterval: this.autoCommitInterval, autoCommitThreshold: this.autoCommitThreshold, coordinator, groupId, generationId, memberId, }) } joinAndSync() { const startJoin = Date.now() return this.retrier(async bail => { try { await this[PRIVATE.JOIN]() await this[PRIVATE.SYNC]() const memberAssignment = this.assigned().reduce( (result, { topic, partitions }) => ({ ...result, [topic]: partitions }), {} ) const payload = { groupId: this.groupId, memberId: this.memberId, leaderId: this.leaderId, isLeader: this.isLeader(), memberAssignment, groupProtocol: this.groupProtocol, duration: Date.now() - startJoin, } this.instrumentationEmitter.emit(GROUP_JOIN, payload) this.logger.info('Consumer has joined the group', payload) } catch (e) { if (isRebalancing(e)) { // Rebalance in progress isn't a retriable protocol error since the consumer // has to go through find coordinator and join again before it can // actually retry the operation. We wrap the original error in a retriable error // here instead in order to restart the join + sync sequence using the retrier. throw new KafkaJSError(e) } if (e.type === 'UNKNOWN_MEMBER_ID') { this.memberId = null throw new KafkaJSError(e) } bail(e) } }) } /** * @param {import("../../types").TopicPartition} topicPartition */ resetOffset({ topic, partition }) { this.offsetManager.resetOffset({ topic, partition }) } /** * @param {import("../../types").TopicPartitionOffset} topicPartitionOffset */ resolveOffset({ topic, partition, offset }) { this.offsetManager.resolveOffset({ topic, partition, offset }) } /** * Update the consumer offset for the given topic/partition. This will be used * on the next fetch. If this API is invoked for the same topic/partition more * than once, the latest offset will be used on the next fetch. * * @param {import("../../types").TopicPartitionOffset} topicPartitionOffset */ seek({ topic, partition, offset }) { this.seekOffset.set(topic, partition, offset) } pause(topicPartitions) { this.logger.info(`Pausing fetching from ${topicPartitions.length} topics`, { topicPartitions, }) this.subscriptionState.pause(topicPartitions) } resume(topicPartitions) { this.logger.info(`Resuming fetching from ${topicPartitions.length} topics`, { topicPartitions, }) this.subscriptionState.resume(topicPartitions) } assigned() { return this.subscriptionState.assigned() } paused() { return this.subscriptionState.paused() } /** * @param {string} topic * @param {string} partition * @returns {boolean} whether the specified topic-partition are paused or not */ isPaused(topic, partition) { return this.subscriptionState.isPaused(topic, partition) } async commitOffsetsIfNecessary() { await this.offsetManager.commitOffsetsIfNecessary() } async commitOffsets(offsets) { await this.offsetManager.commitOffsets(offsets) } uncommittedOffsets() { return this.offsetManager.uncommittedOffsets() } async heartbeat({ interval }) { return this[PRIVATE.SHARED_HEARTBEAT]({ interval }) } async fetch(nodeId) { try { await this.cluster.refreshMetadataIfNecessary() this.checkForStaleAssignment() let topicPartitions = this.subscriptionState.assigned() topicPartitions = this.filterPartitionsByNode(nodeId, topicPartitions) await this.seekOffsets(topicPartitions) const committedOffsets = this.offsetManager.committedOffsets() const activeTopicPartitions = this.getActiveTopicPartitions() const requests = topicPartitions .map(({ topic, partitions }) => ({ topic, partitions: partitions .filter( partition => /** * When recovering from OffsetOutOfRange, each partition can recover * concurrently, which invalidates resolved and committed offsets as part * of the recovery mechanism (see OffsetManager.clearOffsets). In concurrent * scenarios this can initiate a new fetch with invalid offsets. * * This was further highlighted by https://github.com/tulios/kafkajs/pull/570, * which increased concurrency, making this more likely to happen. * * This is solved by only making requests for partitions with initialized offsets. * * See the following pull request which explains the context of the problem: * @issue https://github.com/tulios/kafkajs/pull/578 */ committedOffsets[topic][partition] != null && activeTopicPartitions[topic].has(partition) ) .map(partition => ({ partition, fetchOffset: this.offsetManager.nextOffset(topic, partition).toString(), maxBytes: this.maxBytesPerPartition, })), })) .filter(({ partitions }) => partitions.length) if (!requests.length) { await sleep(this.maxWaitTime) return [] } const broker = await this.cluster.findBroker({ nodeId }) const { responses } = await broker.fetch({ maxWaitTime: this.maxWaitTime, minBytes: this.minBytes, maxBytes: this.maxBytes, isolationLevel: this.isolationLevel, topics: requests, rackId: this.rackId, }) return responses.flatMap(({ topicName, partitions }) => { const topicRequestData = requests.find(({ topic }) => topic === topicName) let preferredReadReplicas = this.preferredReadReplicasPerTopicPartition[topicName] if (!preferredReadReplicas) { this.preferredReadReplicasPerTopicPartition[topicName] = preferredReadReplicas = {} } return partitions .filter( ({ partition }) => !this.seekOffset.has(topicName, partition) && !this.subscriptionState.isPaused(topicName, partition) ) .map(partitionData => { const { partition, preferredReadReplica } = partitionData if (preferredReadReplica != null && preferredReadReplica !== -1) { const { nodeId: currentPreferredReadReplica } = preferredReadReplicas[partition] || {} if (currentPreferredReadReplica !== preferredReadReplica) { this.logger.info(`Preferred read replica is now ${preferredReadReplica}`, { groupId: this.groupId, memberId: this.memberId, topic: topicName, partition, }) } preferredReadReplicas[partition] = { nodeId: preferredReadReplica, expireAt: Date.now() + this.metadataMaxAge, } } const partitionRequestData = topicRequestData.partitions.find( ({ partition }) => partition === partitionData.partition ) const fetchedOffset = partitionRequestData.fetchOffset return new Batch(topicName, fetchedOffset, partitionData) }) }) } catch (e) { await this.recoverFromFetch(e) return [] } } async recoverFromFetch(e) { if (STALE_METADATA_ERRORS.includes(e.type) || e.name === 'KafkaJSTopicMetadataNotLoaded') { this.logger.debug('Stale cluster metadata, refreshing...', { groupId: this.groupId, memberId: this.memberId, error: e.message, }) await this.cluster.refreshMetadata() await this.joinAndSync() return } if (e.name === 'KafkaJSStaleTopicMetadataAssignment') { this.logger.warn(`${e.message}, resync group`, { groupId: this.groupId, memberId: this.memberId, topic: e.topic, unknownPartitions: e.unknownPartitions, }) await this.joinAndSync() return } if (e.name === 'KafkaJSOffsetOutOfRange') { await this.recoverFromOffsetOutOfRange(e) return } if (e.name === 'KafkaJSConnectionClosedError') { this.cluster.removeBroker({ host: e.host, port: e.port }) return } if (e.name === 'KafkaJSBrokerNotFound' || e.name === 'KafkaJSConnectionClosedError') { this.logger.debug(`${e.message}, refreshing metadata and retrying...`) await this.cluster.refreshMetadata() return } throw e } async recoverFromOffsetOutOfRange(e) { // If we are fetching from a follower try with the leader before resetting offsets const preferredReadReplicas = this.preferredReadReplicasPerTopicPartition[e.topic] if (preferredReadReplicas && typeof preferredReadReplicas[e.partition] === 'number') { this.logger.info('Offset out of range while fetching from follower, retrying with leader', { topic: e.topic, partition: e.partition, groupId: this.groupId, memberId: this.memberId, }) delete preferredReadReplicas[e.partition] } else { this.logger.error('Offset out of range, resetting to default offset', { topic: e.topic, partition: e.partition, groupId: this.groupId, memberId: this.memberId, }) await this.offsetManager.setDefaultOffset({ topic: e.topic, partition: e.partition, }) } } generatePartitionsPerSubscribedTopic() { const map = new Map() for (const topic of this.topicsSubscribed) { const partitions = this.cluster .findTopicPartitionMetadata(topic) .map(m => m.partitionId) .sort() map.set(topic, partitions) } return map } checkForStaleAssignment() { if (!this.partitionsPerSubscribedTopic) { return } const newPartitionsPerSubscribedTopic = this.generatePartitionsPerSubscribedTopic() for (const [topic, partitions] of newPartitionsPerSubscribedTopic) { const diff = arrayDiff(partitions, this.partitionsPerSubscribedTopic.get(topic)) if (diff.length > 0) { throw new KafkaJSStaleTopicMetadataAssignment('Topic has been updated', { topic, unknownPartitions: diff, }) } } } async seekOffsets(topicPartitions) { for (const { topic, partitions } of topicPartitions) { for (const partition of partitions) { const seekEntry = this.seekOffset.pop(topic, partition) if (!seekEntry) { continue } this.logger.debug('Seek offset', { groupId: this.groupId, memberId: this.memberId, seek: seekEntry, }) await this.offsetManager.seek(seekEntry) } } await this.offsetManager.resolveOffsets() } hasSeekOffset({ topic, partition }) { return this.seekOffset.has(topic, partition) } /** * For each of the partitions find the best nodeId to read it from * * @param {string} topic * @param {number[]} partitions * @returns {{[nodeId: number]: number[]}} per-node assignment of partitions * @see Cluster~findLeaderForPartitions */ // Invariant: The resulting object has each partition referenced exactly once findReadReplicaForPartitions(topic, partitions) { const partitionMetadata = this.cluster.findTopicPartitionMetadata(topic) const preferredReadReplicas = this.preferredReadReplicasPerTopicPartition[topic] return partitions.reduce((result, id) => { const partitionId = parseInt(id, 10) const metadata = partitionMetadata.find(p => p.partitionId === partitionId) if (!metadata) { return result } if (metadata.leader == null) { throw new KafkaJSError('Invalid partition metadata', { topic, partitionId, metadata }) } // Pick the preferred replica if there is one, and it isn't known to be offline, otherwise the leader. let nodeId = metadata.leader if (preferredReadReplicas) { const { nodeId: preferredReadReplica, expireAt } = preferredReadReplicas[partitionId] || {} if (Date.now() >= expireAt) { this.logger.debug('Preferred read replica information has expired, using leader', { topic, partitionId, groupId: this.groupId, memberId: this.memberId, preferredReadReplica, leader: metadata.leader, }) // Drop the entry delete preferredReadReplicas[partitionId] } else if (preferredReadReplica != null) { // Valid entry, check whether it is not offline // Note that we don't delete the preference here, and rather hope that eventually that replica comes online again const offlineReplicas = metadata.offlineReplicas if (Array.isArray(offlineReplicas) && offlineReplicas.includes(nodeId)) { this.logger.debug('Preferred read replica is offline, using leader', { topic, partitionId, groupId: this.groupId, memberId: this.memberId, preferredReadReplica, leader: metadata.leader, }) } else { nodeId = preferredReadReplica } } } const current = result[nodeId] || [] return { ...result, [nodeId]: [...current, partitionId] } }, {}) } filterPartitionsByNode(nodeId, topicPartitions) { return topicPartitions.map(({ topic, partitions }) => ({ topic, partitions: this.findReadReplicaForPartitions(topic, partitions)[nodeId] || [], })) } getActiveTopicPartitions() { const activeSubscriptionState = this.subscriptionState.active() const activeTopicPartitions = {} activeSubscriptionState.forEach(({ topic, partitions }) => { activeTopicPartitions[topic] = new Set(partitions) }) return activeTopicPartitions } }