UNPKG

@platformatic/kafka

Version:

Modern and performant client for Apache Kafka

573 lines (572 loc) 24.5 kB
import { Readable } from 'node:stream'; import { createPromisifiedCallback, kCallbackPromise, noopCallback } from "../../apis/callbacks.js"; import { ListOffsetTimestamps } from "../../apis/enumerations.js"; import { consumerReceivesChannel, createDiagnosticContext, notifyCreation } from "../../diagnostic.js"; import { UserError } from "../../errors.js"; import { kAutocommit, kInstance, kRefreshOffsetsAndFetch } from "../../symbols.js"; import { kInspect, kPrometheus } from "../base/base.js"; import { ensureMetric } from "../metrics.js"; import { defaultConsumerOptions } from "./options.js"; import { MessagesStreamFallbackModes, MessagesStreamModes } from "./types.js"; // Don't move this function as being in the same file will enable V8 to remove. // For futher info, ask Matteo. /* c8 ignore next 3 - Fallback deserializer, nothing to really test */ export function noopDeserializer(data) { return data; } export function defaultCorruptedMessageHandler() { return true; } let currentInstance = 0; export class MessagesStream extends Readable { #consumer; #mode; #fallbackMode; #paused; #fetches; #maxFetches; #options; #topics; #offsetsToFetch; #offsetsToCommit; #offsetsCommitted; #inflightNodes; #keyDeserializer; #valueDeserializer; #headerKeyDeserializer; #headerValueDeserializer; #autocommitEnabled; #autocommitInterval; #autocommitInflight; #shouldClose; #closeCallbacks; #metricsConsumedMessages; #corruptedMessageHandler; [kInstance]; constructor(consumer, options) { const { autocommit, mode, fallbackMode, maxFetches, offsets, deserializers, onCorruptedMessage, // The options below are only destructured to avoid being part of structuredClone below partitionAssigner: _partitionAssigner, ...otherOptions } = options; if (offsets && mode !== MessagesStreamModes.MANUAL) { throw new UserError('Cannot specify offsets when the stream mode is not MANUAL.'); } if (!offsets && mode === MessagesStreamModes.MANUAL) { throw new UserError('Must specify offsets when the stream mode is MANUAL.'); } /* c8 ignore next 4 - Unless is initialized directly, highWaterMark is always defined */ super({ objectMode: true, highWaterMark: maxFetches ?? options.highWaterMark ?? defaultConsumerOptions.highWaterMark }); this[kInstance] = currentInstance++; this.#consumer = consumer; this.#mode = mode ?? MessagesStreamModes.LATEST; this.#fallbackMode = fallbackMode ?? MessagesStreamFallbackModes.LATEST; this.#offsetsToCommit = new Map(); this.#offsetsCommitted = new Map(); this.#paused = false; this.#fetches = 0; this.#maxFetches = maxFetches ?? 0; this.#topics = structuredClone(options.topics); this.#inflightNodes = new Set(); this.#keyDeserializer = deserializers?.key ?? noopDeserializer; this.#valueDeserializer = deserializers?.value ?? noopDeserializer; this.#headerKeyDeserializer = deserializers?.headerKey ?? noopDeserializer; this.#headerValueDeserializer = deserializers?.headerValue ?? noopDeserializer; this.#autocommitEnabled = !!options.autocommit; this.#autocommitInflight = false; this.#shouldClose = false; this.#closeCallbacks = []; this.#corruptedMessageHandler = onCorruptedMessage ?? defaultCorruptedMessageHandler; // Restore offsets this.#offsetsToFetch = new Map(); if (offsets) { for (const { topic, partition, offset } of offsets) { this.#offsetsToFetch.set(`${topic}:${partition}`, offset); } } // Clone the rest of the options so the user can never mutate them this.#options = structuredClone(otherOptions); // Start the autocommit interval if (typeof autocommit === 'number' && autocommit > 0) { this.#autocommitInterval = setInterval(this[kAutocommit].bind(this), autocommit); } else { this.#autocommitInterval = null; } // When the consumer joins a group, we need to fetch again as the assignments // will have changed so we may have gone from last with no assignments to // having some. this.#consumer.on('consumer:group:join', () => { this.#offsetsCommitted.clear(); this.#refreshOffsets((error) => { /* c8 ignore next 4 - Hard to test */ if (error) { this.destroy(error); return; } this.#fetch(); }); }); if (consumer[kPrometheus]) { this.#metricsConsumedMessages = ensureMetric(consumer[kPrometheus], 'Counter', 'kafka_consumed_messages', 'Number of consumed Kafka messages'); } notifyCreation('messages-stream', this); } /* c8 ignore next 3 - Simple getter */ get offsetsToFetch() { return this.#offsetsToFetch; } /* c8 ignore next 3 - Simple getter */ get offsetsToCommit() { return this.#offsetsToCommit; } /* c8 ignore next 3 - Simple getter */ get offsetsCommitted() { return this.#offsetsCommitted; } // TODO: This is deprecated alias, remove in future major version /* c8 ignore next 3 - Simple getter */ get committedOffsets() { return this.#offsetsCommitted; } close(callback) { if (!callback) { callback = createPromisifiedCallback(); } if (this.closed || this.destroyed) { callback(null); return callback[kCallbackPromise]; } this.#closeCallbacks.push(callback); if (this.#shouldClose) { this.#invokeCloseCallbacks(null); return callback[kCallbackPromise]; } this.#shouldClose = true; this.push(null); if (this.#autocommitInterval) { clearInterval(this.#autocommitInterval); } if (this.readableFlowing === null || this.isPaused()) { this.removeAllListeners('data'); this.removeAllListeners('readable'); this.resume(); } /* c8 ignore next 3 - Hard to test */ this.once('error', (error) => { callback(error); }); this.once('close', () => { // We have offsets that were enqueued to be committed. Perform the operation if (this.#offsetsToCommit.size > 0) { this[kAutocommit](); } // We have offsets that are being committed. These are awaited despite of the force parameters if (this.#autocommitInflight) { this.once('autocommit', error => { this.#invokeCloseCallbacks(error); }); return; } this.#invokeCloseCallbacks(null); }); return callback[kCallbackPromise]; } isActive() { if (this.#shouldClose || this.closed || this.destroyed) { return false; } return this.#consumer.isActive(); } isConnected() { if (this.#shouldClose || this.closed || this.destroyed) { return false; } return this.#consumer.isConnected(); } resume() { this.#paused = false; return super.resume(); } // We want to track if the stream is paused explicitly by the user, while isPaused from Node.js can also // be true if the stream is paused because there is no consumer. pause() { this.#paused = true; return super.pause(); } /* c8 ignore next 3 - Only forwards to Node.js implementation - Inserted here to please Typescript */ addListener(event, listener) { return super.addListener(event, listener); } /* c8 ignore next 3 - Only forwards to Node.js implementation - Inserted here to please Typescript */ on(event, listener) { return super.on(event, listener); } /* c8 ignore next 3 - Only forwards to Node.js implementation - Inserted here to please Typescript */ once(event, listener) { return super.once(event, listener); } /* c8 ignore next 3 - Only forwards to Node.js implementation - Inserted here to please Typescript */ prependListener(event, listener) { return super.prependListener(event, listener); } /* c8 ignore next 3 - Only forwards to Node.js implementation - Inserted here to please Typescript */ prependOnceListener(event, listener) { return super.prependOnceListener(event, listener); } [Symbol.asyncIterator]() { return super[Symbol.asyncIterator](); } _construct(callback) { this.#refreshOffsets(callback); } _destroy(error, callback) { if (this.#autocommitInterval) { clearInterval(this.#autocommitInterval); } callback(error); } _read() { this.#fetch(); } #fetch() { /* c8 ignore next 4 - Hard to test */ if (this.#shouldClose || this.closed || this.destroyed) { this.push(null); return; } // No need to fetch if nobody is consuming the data if (this.readableFlowing === null || this.#paused) { return; } this.#consumer.metadata({ topics: this.#consumer.topics.current }, (error, metadata) => { if (error) { this.emit('fetch'); // The stream has been closed, ignore any error /* c8 ignore next 4 - Hard to test */ if (this.#shouldClose || this.closed || this.destroyed) { this.push(null); return; } this.destroy(error); return; } /* c8 ignore next 5 - Hard to test */ if (this.#shouldClose || this.closed || this.destroyed) { this.emit('fetch'); this.push(null); return; } const requests = new Map(); const topicIds = new Map(); // Group topic-partitions by the destination broker const requestedOffsets = new Map(); for (const topic of this.#topics) { const assignment = this.#assignmentsForTopic(topic); // This consumer has no assignment for the topic, continue if (!assignment) { continue; } const partitions = assignment.partitions; for (const partition of partitions) { const leader = metadata.topics.get(topic).partitions[partition].leader; if (this.#inflightNodes.has(leader)) { continue; } let leaderRequests = requests.get(leader); if (!leaderRequests) { leaderRequests = []; requests.set(leader, leaderRequests); } const topicId = metadata.topics.get(topic).id; topicIds.set(topicId, topic); const fetchOffset = this.#offsetsToFetch.get(`${topic}:${partition}`); requestedOffsets.set(`${topic}:${partition}`, fetchOffset); leaderRequests.push({ topicId, partitions: [ { partition, fetchOffset, partitionMaxBytes: this.#options.maxBytes, currentLeaderEpoch: -1, lastFetchedEpoch: -1 } ] }); } } for (const [leader, leaderRequests] of requests) { this.#inflightNodes.add(leader); this.#consumer.fetch({ ...this.#options, node: leader, topics: leaderRequests }, (error, response) => { this.#inflightNodes.delete(leader); this.emit('fetch'); if (error) { // The stream has been closed, ignore the error /* c8 ignore next 4 - Hard to test */ if (this.#shouldClose || this.closed || this.destroyed) { this.push(null); return; } this.destroy(error); return; } if (this.#shouldClose || this.closed || this.destroyed) { // When it's the last inflight, we finally close the stream. // This is done to avoid the user exiting from consmuming metrics like for-await and still see the process up. if (this.#inflightNodes.size === 0) { this.push(null); } return; } this.#pushRecords(metadata, topicIds, response, requestedOffsets); if (this.#maxFetches > 0 && ++this.#fetches >= this.#maxFetches) { this.push(null); } }); } }); } #pushRecords(metadata, topicIds, response, requestedOffsets) { const autocommit = this.#autocommitEnabled; let canPush = true; const keyDeserializer = this.#keyDeserializer; const valueDeserializer = this.#valueDeserializer; const headerKeyDeserializer = this.#headerKeyDeserializer; const headerValueDeserializer = this.#headerValueDeserializer; let diagnosticContext; // Parse results for (const topicResponse of response.responses) { const topic = topicIds.get(topicResponse.topicId); for (const { records: recordsBatches, partitionIndex: partition } of topicResponse.partitions) { if (!recordsBatches) { continue; } for (const batch of recordsBatches) { const firstTimestamp = batch.firstTimestamp; const firstOffset = batch.firstOffset; const leaderEpoch = metadata.topics.get(topic).partitions[partition].leaderEpoch; for (const record of batch.records) { const offset = batch.firstOffset + BigInt(record.offsetDelta); if (offset < requestedOffsets.get(`${topic}:${partition}`)) { // Thi is a duplicate message, ignore it continue; } diagnosticContext = createDiagnosticContext({ client: this.#consumer, stream: this, operation: 'receive', raw: record }); consumerReceivesChannel.start.publish(diagnosticContext); const commit = autocommit ? noopCallback : this.#commit.bind(this, topic, partition, offset, leaderEpoch); try { const headers = new Map(); for (const [headerKey, headerValue] of record.headers) { headers.set(headerKeyDeserializer(headerKey), headerValueDeserializer(headerValue)); } const key = keyDeserializer(record.key, headers); const value = valueDeserializer(record.value, headers); this.#metricsConsumedMessages?.inc(); const message = { key, value, headers, topic, partition, timestamp: firstTimestamp + record.timestampDelta, offset, commit }; diagnosticContext.result = message; consumerReceivesChannel.asyncStart.publish(diagnosticContext); canPush = this.push(message); consumerReceivesChannel.asyncEnd.publish(diagnosticContext); } catch (error) { const shouldDestroy = this.#corruptedMessageHandler(record, topic, partition, firstTimestamp, firstOffset, commit); if (shouldDestroy) { diagnosticContext.error = error; consumerReceivesChannel.error.publish(diagnosticContext); this.destroy(new UserError('Failed to deserialize a message.', { cause: error })); return; } } finally { consumerReceivesChannel.end.publish(diagnosticContext); } } if (batch === recordsBatches[recordsBatches.length - 1]) { // Track the last read offset const lastOffset = batch.firstOffset + BigInt(batch.lastOffsetDelta); this.#offsetsToFetch.set(`${topic}:${partition}`, lastOffset + 1n); // Autocommit if needed if (autocommit) { this.#offsetsToCommit.set(`${topic}:${partition}`, { topic, partition, offset: lastOffset, leaderEpoch }); } } } } } if (this.#autocommitEnabled && !this.#autocommitInterval) { this[kAutocommit](); } if (canPush) { process.nextTick(() => { this.#fetch(); }); } } #updateCommittedOffset(topic, partition, offset) { const key = `${topic}:${partition}`; const previous = this.#offsetsCommitted.get(key); if (typeof previous === 'undefined' || previous < offset) { this.#offsetsCommitted.set(key, offset); } } // This could optimized to only schedule once per tick on a topic-partition and only commit the latest offset #commit(topic, partition, offset, leaderEpoch, callback) { if (!callback) { callback = createPromisifiedCallback(); } this.#consumer.commit({ offsets: [{ topic, partition, offset, leaderEpoch }] }, error => { /* c8 ignore next 4 - Hard to test */ if (error) { callback(error); return; } this.#updateCommittedOffset(topic, partition, offset); callback(null); }); return callback[kCallbackPromise]; } [kAutocommit]() { if (this.#offsetsToCommit.size === 0) { return; } this.#autocommitInflight = true; const offsets = Array.from(this.#offsetsToCommit.values()); this.#offsetsToCommit.clear(); this.#consumer.commit({ offsets }, error => { this.#autocommitInflight = false; if (error) { this.emit('autocommit', error); this.destroy(error); return; } for (const { topic, partition, offset } of offsets) { this.#updateCommittedOffset(topic, partition, offset); } this.emit('autocommit', null, offsets); }); } #refreshOffsets(callback) { /* c8 ignore next 4 - Hard to test */ if (this.#topics.length === 0) { callback(null); return; } // List topic offsets this.#consumer.listOffsets({ topics: this.#topics, timestamp: this.#mode === MessagesStreamModes.EARLIEST || (this.#mode !== MessagesStreamModes.LATEST && this.#fallbackMode === MessagesStreamFallbackModes.EARLIEST) ? ListOffsetTimestamps.EARLIEST : ListOffsetTimestamps.LATEST }, (error, offsets) => { if (error) { /* c8 ignore next 4 - Hard to test */ if (this.#shouldClose || this.closed || this.destroyed) { callback(null); return; } callback(error); return; } if (this.#mode !== MessagesStreamModes.COMMITTED) { this.#assignOffsets(offsets, new Map(), callback); return; } // Now restore group offsets const topics = []; for (const topic of this.#topics) { const assignment = this.#assignmentsForTopic(topic); if (!assignment) { continue; } topics.push(assignment); } if (!topics.length) { this.#assignOffsets(offsets, new Map(), callback); return; } this.#consumer.listCommittedOffsets({ topics }, (error, commits) => { if (error) { /* c8 ignore next 4 - Hard to test */ if (this.#shouldClose || this.closed || this.destroyed) { callback(null); return; } callback(error); return; } this.#assignOffsets(offsets, commits, callback); }); }); } [kRefreshOffsetsAndFetch]() { this.#refreshOffsets(() => { this.#fetch(); }); } #assignOffsets(offsets, commits, callback) { for (const [topic, partitions] of offsets) { for (let i = 0; i < partitions.length; i++) { if (!this.#offsetsToFetch.has(`${topic}:${i}`)) { this.#offsetsToFetch.set(`${topic}:${i}`, partitions[i]); } } } for (const [topic, partitions] of commits) { for (let i = 0; i < partitions.length; i++) { const offset = partitions[i]; if (offset >= 0n) { this.#offsetsToFetch.set(`${topic}:${i}`, offset + 1n); } else if (this.#fallbackMode === MessagesStreamFallbackModes.FAIL) { callback(new UserError(`Topic ${topic} has no committed offset on partition ${i} for group ${this.#consumer.groupId}.`, { topic, partition: i, groupId: this.#consumer.groupId })); return; } } } // Rebuild the list of offsetsCommitted (which is used for consumer lag) out of the offsets to fetch for (const topic of this.#topics) { const assignment = this.#assignmentsForTopic(topic); // This consumer has no assignment for the topic, continue if (!assignment) { continue; } const partitions = assignment.partitions; for (const partition of partitions) { const committed = this.#offsetsToFetch.get(`${topic}:${partition}`); this.#offsetsCommitted.set(`${topic}:${partition}`, committed - 1n); } } this.emit('offsets'); callback(null); } #assignmentsForTopic(topic) { return this.#consumer.assignments?.find(assignment => assignment.topic === topic); } #invokeCloseCallbacks(error) { for (const callback of this.#closeCallbacks) { callback(error); } this.#closeCallbacks = []; } /* c8 ignore next 3 - This is a private API used to debug during development */ [kInspect](...args) { this.#consumer[kInspect](...args); } }