UNPKG

kafkajs

Version:

A modern Apache Kafka client for node.js

523 lines (453 loc) 15.6 kB
const Long = require('../utils/long') const createRetry = require('../retry') const { initialRetryTime } = require('../retry/defaults') const ConsumerGroup = require('./consumerGroup') const Runner = require('./runner') const { events, wrap: wrapEvent, unwrap: unwrapEvent } = require('./instrumentationEvents') const InstrumentationEventEmitter = require('../instrumentation/emitter') const { KafkaJSNonRetriableError } = require('../errors') const { roundRobin } = require('./assigners') const { EARLIEST_OFFSET, LATEST_OFFSET } = require('../constants') const ISOLATION_LEVEL = require('../protocol/isolationLevel') const sharedPromiseTo = require('../utils/sharedPromiseTo') const { keys, values } = Object const { CONNECT, DISCONNECT, STOP, CRASH } = events const eventNames = values(events) const eventKeys = keys(events) .map(key => `consumer.events.${key}`) .join(', ') const specialOffsets = [ Long.fromValue(EARLIEST_OFFSET).toString(), Long.fromValue(LATEST_OFFSET).toString(), ] /** * @param {Object} params * @param {import("../../types").Cluster} params.cluster * @param {String} params.groupId * @param {import('../../types').RetryOptions} [params.retry] * @param {import('../../types').Logger} params.logger * @param {import('../../types').PartitionAssigner[]} [params.partitionAssigners] * @param {number} [params.sessionTimeout] * @param {number} [params.rebalanceTimeout] * @param {number} [params.heartbeatInterval] * @param {number} [params.maxBytesPerPartition] * @param {number} [params.minBytes] * @param {number} [params.maxBytes] * @param {number} [params.maxWaitTimeInMs] * @param {number} [params.isolationLevel] * @param {string} [params.rackId] * @param {InstrumentationEventEmitter} [params.instrumentationEmitter] * @param {number} params.metadataMaxAge * * @returns {import("../../types").Consumer} */ module.exports = ({ cluster, groupId, retry, logger: rootLogger, partitionAssigners = [roundRobin], sessionTimeout = 30000, rebalanceTimeout = 60000, heartbeatInterval = 3000, maxBytesPerPartition = 1048576, // 1MB minBytes = 1, maxBytes = 10485760, // 10MB maxWaitTimeInMs = 5000, isolationLevel = ISOLATION_LEVEL.READ_COMMITTED, rackId = '', instrumentationEmitter: rootInstrumentationEmitter, metadataMaxAge, }) => { if (!groupId) { throw new KafkaJSNonRetriableError('Consumer groupId must be a non-empty string.') } const logger = rootLogger.namespace('Consumer') const instrumentationEmitter = rootInstrumentationEmitter || new InstrumentationEventEmitter() const assigners = partitionAssigners.map(createAssigner => createAssigner({ groupId, logger, cluster }) ) /** @type {Record<string, { fromBeginning?: boolean }>} */ const topics = {} let runner = null /** @type {ConsumerGroup} */ let consumerGroup = null let restartTimeout = null if (heartbeatInterval >= sessionTimeout) { throw new KafkaJSNonRetriableError( `Consumer heartbeatInterval (${heartbeatInterval}) must be lower than sessionTimeout (${sessionTimeout}). It is recommended to set heartbeatInterval to approximately a third of the sessionTimeout.` ) } /** @type {import("../../types").Consumer["connect"]} */ const connect = async () => { await cluster.connect() instrumentationEmitter.emit(CONNECT) } /** @type {import("../../types").Consumer["disconnect"]} */ const disconnect = async () => { try { await stop() logger.debug('consumer has stopped, disconnecting', { groupId }) await cluster.disconnect() instrumentationEmitter.emit(DISCONNECT) } catch (e) { logger.error(`Caught error when disconnecting the consumer: ${e.message}`, { stack: e.stack, groupId, }) throw e } } /** @type {import("../../types").Consumer["stop"]} */ const stop = sharedPromiseTo(async () => { try { if (runner) { await runner.stop() runner = null consumerGroup = null instrumentationEmitter.emit(STOP) } clearTimeout(restartTimeout) logger.info('Stopped', { groupId }) } catch (e) { logger.error(`Caught error when stopping the consumer: ${e.message}`, { stack: e.stack, groupId, }) throw e } }) /** @type {import("../../types").Consumer["subscribe"]} */ const subscribe = async ({ topic, topics: subscriptionTopics, fromBeginning = false }) => { if (consumerGroup) { throw new KafkaJSNonRetriableError('Cannot subscribe to topic while consumer is running') } if (!topic && !subscriptionTopics) { throw new KafkaJSNonRetriableError('Missing required argument "topics"') } if (subscriptionTopics != null && !Array.isArray(subscriptionTopics)) { throw new KafkaJSNonRetriableError('Argument "topics" must be an array') } const subscriptions = subscriptionTopics || [topic] for (const subscription of subscriptions) { if (typeof subscription !== 'string' && !(subscription instanceof RegExp)) { throw new KafkaJSNonRetriableError( `Invalid topic ${subscription} (${typeof subscription}), the topic name has to be a String or a RegExp` ) } } const hasRegexSubscriptions = subscriptions.some(subscription => subscription instanceof RegExp) const metadata = hasRegexSubscriptions ? await cluster.metadata() : undefined const topicsToSubscribe = [] for (const subscription of subscriptions) { const isRegExp = subscription instanceof RegExp if (isRegExp) { const topicRegExp = subscription const matchedTopics = metadata.topicMetadata .map(({ topic: topicName }) => topicName) .filter(topicName => topicRegExp.test(topicName)) logger.debug('Subscription based on RegExp', { groupId, topicRegExp: topicRegExp.toString(), matchedTopics, }) topicsToSubscribe.push(...matchedTopics) } else { topicsToSubscribe.push(subscription) } } for (const t of topicsToSubscribe) { topics[t] = { fromBeginning } } await cluster.addMultipleTargetTopics(topicsToSubscribe) } /** @type {import("../../types").Consumer["run"]} */ const run = async ({ autoCommit = true, autoCommitInterval = null, autoCommitThreshold = null, eachBatchAutoResolve = true, partitionsConsumedConcurrently: concurrency = 1, eachBatch = null, eachMessage = null, } = {}) => { if (consumerGroup) { logger.warn('consumer#run was called, but the consumer is already running', { groupId }) return } const start = async onCrash => { logger.info('Starting', { groupId }) consumerGroup = new ConsumerGroup({ logger: rootLogger, topics: keys(topics), topicConfigurations: topics, retry, cluster, groupId, assigners, sessionTimeout, rebalanceTimeout, maxBytesPerPartition, minBytes, maxBytes, maxWaitTimeInMs, instrumentationEmitter, isolationLevel, rackId, metadataMaxAge, autoCommit, autoCommitInterval, autoCommitThreshold, }) runner = new Runner({ logger: rootLogger, consumerGroup, instrumentationEmitter, heartbeatInterval, retry, autoCommit, eachBatchAutoResolve, eachBatch, eachMessage, onCrash, concurrency, }) await runner.start() } const onCrash = async e => { logger.error(`Crash: ${e.name}: ${e.message}`, { groupId, retryCount: e.retryCount, stack: e.stack, }) if (e.name === 'KafkaJSConnectionClosedError') { cluster.removeBroker({ host: e.host, port: e.port }) } await disconnect() const getOriginalCause = error => { if (error.cause) { return getOriginalCause(error.cause) } return error } const isErrorRetriable = e.name === 'KafkaJSNumberOfRetriesExceeded' || getOriginalCause(e).retriable === true const shouldRestart = isErrorRetriable && (!retry || !retry.restartOnFailure || (await retry.restartOnFailure(e).catch(error => { logger.error( 'Caught error when invoking user-provided "restartOnFailure" callback. Defaulting to restarting.', { error: error.message || error, cause: e.message || e, groupId, } ) return true }))) instrumentationEmitter.emit(CRASH, { error: e, groupId, restart: shouldRestart, }) if (shouldRestart) { const retryTime = e.retryTime || (retry && retry.initialRetryTime) || initialRetryTime logger.error(`Restarting the consumer in ${retryTime}ms`, { retryCount: e.retryCount, retryTime, groupId, }) restartTimeout = setTimeout(() => start(onCrash), retryTime) } } await start(onCrash) } /** @type {import("../../types").Consumer["on"]} */ const on = (eventName, listener) => { if (!eventNames.includes(eventName)) { throw new KafkaJSNonRetriableError(`Event name should be one of ${eventKeys}`) } return instrumentationEmitter.addListener(unwrapEvent(eventName), event => { event.type = wrapEvent(event.type) Promise.resolve(listener(event)).catch(e => { logger.error(`Failed to execute listener: ${e.message}`, { eventName, stack: e.stack, }) }) }) } /** * @type {import("../../types").Consumer["commitOffsets"]} * @param topicPartitions * Example: [{ topic: 'topic-name', partition: 0, offset: '1', metadata: 'event-id-3' }] */ const commitOffsets = async (topicPartitions = []) => { const commitsByTopic = topicPartitions.reduce( (payload, { topic, partition, offset, metadata = null }) => { if (!topic) { throw new KafkaJSNonRetriableError(`Invalid topic ${topic}`) } if (isNaN(partition)) { throw new KafkaJSNonRetriableError( `Invalid partition, expected a number received ${partition}` ) } let commitOffset try { commitOffset = Long.fromValue(offset) } catch (_) { throw new KafkaJSNonRetriableError(`Invalid offset, expected a long received ${offset}`) } if (commitOffset.lessThan(0)) { throw new KafkaJSNonRetriableError('Offset must not be a negative number') } if (metadata !== null && typeof metadata !== 'string') { throw new KafkaJSNonRetriableError( `Invalid offset metadata, expected string or null, received ${metadata}` ) } const topicCommits = payload[topic] || [] topicCommits.push({ partition, offset: commitOffset, metadata }) return { ...payload, [topic]: topicCommits } }, {} ) if (!consumerGroup) { throw new KafkaJSNonRetriableError( 'Consumer group was not initialized, consumer#run must be called first' ) } const topics = Object.keys(commitsByTopic) return runner.commitOffsets({ topics: topics.map(topic => { return { topic, partitions: commitsByTopic[topic], } }), }) } /** @type {import("../../types").Consumer["seek"]} */ const seek = ({ topic, partition, offset }) => { if (!topic) { throw new KafkaJSNonRetriableError(`Invalid topic ${topic}`) } if (isNaN(partition)) { throw new KafkaJSNonRetriableError( `Invalid partition, expected a number received ${partition}` ) } let seekOffset try { seekOffset = Long.fromValue(offset) } catch (_) { throw new KafkaJSNonRetriableError(`Invalid offset, expected a long received ${offset}`) } if (seekOffset.lessThan(0) && !specialOffsets.includes(seekOffset.toString())) { throw new KafkaJSNonRetriableError('Offset must not be a negative number') } if (!consumerGroup) { throw new KafkaJSNonRetriableError( 'Consumer group was not initialized, consumer#run must be called first' ) } consumerGroup.seek({ topic, partition, offset: seekOffset.toString() }) } /** @type {import("../../types").Consumer["describeGroup"]} */ const describeGroup = async () => { const coordinator = await cluster.findGroupCoordinator({ groupId }) const retrier = createRetry(retry) return retrier(async () => { const { groups } = await coordinator.describeGroups({ groupIds: [groupId] }) return groups.find(group => group.groupId === groupId) }) } /** * @type {import("../../types").Consumer["pause"]} * @param topicPartitions * Example: [{ topic: 'topic-name', partitions: [1, 2] }] */ const pause = (topicPartitions = []) => { for (const topicPartition of topicPartitions) { if (!topicPartition || !topicPartition.topic) { throw new KafkaJSNonRetriableError( `Invalid topic ${(topicPartition && topicPartition.topic) || topicPartition}` ) } else if ( typeof topicPartition.partitions !== 'undefined' && (!Array.isArray(topicPartition.partitions) || topicPartition.partitions.some(isNaN)) ) { throw new KafkaJSNonRetriableError( `Array of valid partitions required to pause specific partitions instead of ${topicPartition.partitions}` ) } } if (!consumerGroup) { throw new KafkaJSNonRetriableError( 'Consumer group was not initialized, consumer#run must be called first' ) } consumerGroup.pause(topicPartitions) } /** * Returns the list of topic partitions paused on this consumer * * @type {import("../../types").Consumer["paused"]} */ const paused = () => { if (!consumerGroup) { return [] } return consumerGroup.paused() } /** * @type {import("../../types").Consumer["resume"]} * @param topicPartitions * Example: [{ topic: 'topic-name', partitions: [1, 2] }] */ const resume = (topicPartitions = []) => { for (const topicPartition of topicPartitions) { if (!topicPartition || !topicPartition.topic) { throw new KafkaJSNonRetriableError( `Invalid topic ${(topicPartition && topicPartition.topic) || topicPartition}` ) } else if ( typeof topicPartition.partitions !== 'undefined' && (!Array.isArray(topicPartition.partitions) || topicPartition.partitions.some(isNaN)) ) { throw new KafkaJSNonRetriableError( `Array of valid partitions required to resume specific partitions instead of ${topicPartition.partitions}` ) } } if (!consumerGroup) { throw new KafkaJSNonRetriableError( 'Consumer group was not initialized, consumer#run must be called first' ) } consumerGroup.resume(topicPartitions) } /** * @return {Object} logger */ const getLogger = () => logger return { connect, disconnect, subscribe, stop, run, commitOffsets, seek, describeGroup, pause, paused, resume, on, events, logger: getLogger, } }