UNPKG

@ydbjs/topic

Version:

YDB Topics client for publish-subscribe messaging. Provides at-least-once delivery, exactly-once publishing, FIFO guarantees, and scalable message processing for unstructured data.

286 lines 15.7 kB
import { abortable } from '@ydbjs/abortable'; import { Codec } from '@ydbjs/api/topic'; import { timestampMs } from '@bufbuild/protobuf/wkt'; import { loggers } from '@ydbjs/debug'; import { TopicMessage } from '../message.js'; import { _send_read_request } from './_read_request.js'; let dbg = loggers.topic.extend('reader'); export let _read = function read(ctx, options = {}) { let limit = options.limit || Infinity; let signal = options.signal; let waitMs = options.waitMs || 60_000; dbg.log('starting read operation with limit=%s, waitMs=%d, hasSignal=%s', limit === Infinity ? 'unlimited' : limit, waitMs, !!signal); dbg.log('reader state: disposed=%s, bufferSize=%d, freeBufferSize=%d, partitionSessions=%d', ctx.disposed, ctx.buffer.length, ctx.freeBufferSize, ctx.partitionSessions.size); // Check if the reader has been disposed, cannot read with disposed reader if (ctx.disposed) { throw new Error('Reader is disposed'); } // Create a merged abort controller to handle both external signal and reader disposal // We avoid AbortSignal.any() to prevent memory accumulation from composite signals let mergedController = new AbortController(); let mergedSignal = mergedController.signal; // Link reader's controller signal let controllerAbortHandler = () => mergedController.abort(ctx.controller.signal.reason); if (ctx.controller.signal.aborted) { mergedController.abort(ctx.controller.signal.reason); } else { ctx.controller.signal.addEventListener('abort', controllerAbortHandler, { once: true }); } // Link external signal if provided let externalAbortHandler; if (signal) { if (signal.aborted) { mergedController.abort(signal.reason); } else { externalAbortHandler = () => mergedController.abort(signal.reason); signal.addEventListener('abort', externalAbortHandler, { once: true, }); } } // Cleanup function to remove listeners let cleanupListeners = () => { ctx.controller.signal.removeEventListener('abort', controllerAbortHandler); if (externalAbortHandler && signal) { signal.removeEventListener('abort', externalAbortHandler); } }; // If the signal is already aborted, throw an error immediately. if (mergedSignal.aborted) { cleanupListeners(); throw new Error('Read aborted', { cause: mergedSignal.reason }); } return (async function* () { try { let messageCount = 0; while (true) { dbg.log('generator iteration called, messageCount=%d, limit=%s', messageCount, limit === Infinity ? 'unlimited' : limit); // If the reader is disposed, return if (ctx.disposed) { dbg.log('reader disposed during iteration, returning'); return; } // If the signal is already aborted, return if (mergedSignal.aborted) { dbg.log('signal aborted during iteration, returning'); return; } // If we've reached the limit, return if (messageCount >= limit) { dbg.log('limit reached, returning'); return; } let messages = []; // Wait for the next readResponse or until the timeout expires. if (!ctx.buffer.length) { dbg.log('buffer empty, waiting for data (waitMs=%d)', waitMs); let waiter = Promise.withResolvers(); // true = data arrived, false = timeout // Wait for new data to arrive let bufferCheckInterval = setInterval(() => { if (ctx.buffer.length > 0) { dbg.log('data arrived in buffer, resolving waiter (bufferSize=%d)', ctx.buffer.length); waiter.resolve(true); } }, 10); // Check every 10ms // Set up timeout manually to avoid creating AbortSignal.any() each iteration let timeoutId = setTimeout(() => { waiter.resolve(false); // timeout }, waitMs); try { // oxlint-disable-next-line no-await-in-loop let dataArrived = await abortable(mergedSignal, waiter.promise); if (!dataArrived) { // Timeout expired dbg.log('wait timeout expired, yielding empty result'); yield []; continue; } } catch (error) { if (mergedSignal.aborted) { dbg.log('read aborted during wait, finishing'); return; } throw error; } finally { clearInterval(bufferCheckInterval); clearTimeout(timeoutId); } if (mergedSignal.aborted) { dbg.log('read aborted during wait, finishing'); return; } if (ctx.disposed) { dbg.log('reader disposed during wait, finishing'); return; } } let releasableBufferBytes = 0n; while (ctx.buffer.length && messageCount < limit) { let fullRead = true; let response = ctx.buffer.shift(); // Get the first response from the buffer if (response.partitionData.length === 0) { dbg.log('skipping empty response'); continue; // Skip empty responses } // If we have a limit and reached it, break the loop if (messageCount >= limit) { ctx.buffer.unshift(response); // Put the response back to the front of the buffer break; } while (response.partitionData.length && messageCount < limit) { let pd = response.partitionData.shift(); // Get the first partition data if (pd.batches.length === 0) { dbg.log('skipping empty partition data for sessionId=%s', pd.partitionSessionId); continue; // Skip empty partition data } // If we have a limit and reached it, break the loop if (messageCount >= limit) { response.partitionData.unshift(pd); // Put the partition data back to the front of the response break; } let partitionSession = ctx.partitionSessions.get(pd.partitionSessionId); if (!partitionSession) { dbg.log('error: readResponse for unknown partitionSessionId=%s', pd.partitionSessionId); continue; } if (partitionSession.isStopped) { dbg.log('error: readResponse for stopped partitionSessionId=%s', pd.partitionSessionId); continue; } while (pd.batches.length && messageCount < limit) { let batch = pd.batches.shift(); // Get the first batch if (batch.messageData.length === 0) { dbg.log('skipping empty batch from producer=%s', batch.producerId); continue; // Skip empty batches } // If we have a limit and reached it, break the loop if (messageCount >= limit) { pd.batches.unshift(batch); // Put the batch back to the front of the partition data break; } while (batch.messageData.length && messageCount < limit) { // Process each message in the batch let msg = batch.messageData.shift(); // Get the first message from the batch // If we have a limit and reached it, break the loop if (messageCount >= limit) { batch.messageData.unshift(msg); // Put the message back to the front of the batch break; } let payload = msg.data; if (batch.codec !== Codec.UNSPECIFIED) { if (!ctx.codecs.has(batch.codec)) { dbg.log('error: codec %s is not supported', batch.codec); throw new Error(`Codec ${batch.codec} is not supported`); } // Decompress the message data using the provided decompress function try { payload = ctx.codecs .get(batch.codec) .decompress(msg.data); } catch (error) { dbg.log('error: failed to decompress message data: %O', error); throw error; } } // Process the message let message = new TopicMessage({ partitionSession: partitionSession, producer: batch.producerId, payload: payload, codec: batch.codec, seqNo: msg.seqNo, offset: msg.offset, uncompressedSize: msg.uncompressedSize, ...(msg.createdAt && { createdAt: timestampMs(msg.createdAt), }), ...(batch.writtenAt && { writtenAt: timestampMs(batch.writtenAt), }), ...(msg.metadataItems && { metadataItems: Object.fromEntries(msg.metadataItems.map((item) => [ item.key, item.value, ])), }), }); // Track read offset for transaction support if (ctx.readOffsets) { let existing = ctx.readOffsets.get(pd.partitionSessionId); if (existing) { // Update last offset, keep first offset existing.lastOffset = msg.offset; } else { // First message for this partition session ctx.readOffsets.set(pd.partitionSessionId, { firstOffset: msg.offset, lastOffset: msg.offset, }); } } messages.push(message); messageCount++; } if (batch.messageData.length != 0) { fullRead = false; pd.batches.unshift(batch); // Put the batch back to the front of the partition data } } if (pd.batches.length != 0) { fullRead = false; response.partitionData.unshift(pd); // Put the partition data back to the front of the response } } if (response.partitionData.length != 0) { fullRead = false; ctx.buffer.unshift(response); // Put the response back to the front of the buffer } // If we have read all messages from the response, we can release its buffer allocation if (response.partitionData.length === 0 && fullRead) { releasableBufferBytes += response.bytesSize; dbg.log('response fully processed, releasing %s bytes from buffer', response.bytesSize); } } dbg.log('message processing complete: yielding %d messages, total messageCount=%d', messages.length, messageCount); dbg.log('buffer state: bufferSize=%d, maxBufferSize=%d, freeBufferSize=%d, releasableBytes=%s', ctx.buffer.length, ctx.maxBufferSize, ctx.freeBufferSize, releasableBufferBytes); dbg.log('yield %d messages, buffer size is %d bytes, free buffer size is %d bytes', messages.length, ctx.maxBufferSize - ctx.freeBufferSize, ctx.freeBufferSize); if (releasableBufferBytes > 0n) { // Update free buffer size using helper function ctx.updateFreeBufferSize(releasableBufferBytes); // If we have free buffer space, request more data. // Use getter to get current queue (may have been recreated on retry) try { _send_read_request({ queue: ctx.outgoingQueue, bytesSize: releasableBufferBytes, }); } catch { // Queue may be closed during retry, ignore dbg.log('failed to send read request, queue may be closed'); } } dbg.log('generator yielding: messagesCount=%d', messages.length); yield messages; // If we've reached the limit or no messages were yielded and buffer is empty, return if (messageCount >= limit || (messages.length === 0 && !ctx.buffer.length)) { return; } } } finally { // Clean up event listeners when generator completes cleanupListeners(); } })(); }; //# sourceMappingURL=_read.js.map