lifion-kinesis
Version:
Lifion client for Amazon Kinesis Data streams
378 lines (346 loc) • 12.1 kB
JavaScript
/**
* Module that implements a shard polling consumer.
*
* @module polling-consumer
* @private
*/
import deaggregate from './deaggregate.js';
import { getRecordsDecoder } from './records.js';
import { getStreamShards } from './stream.js';
/**
* Requests an new shard iterator form the given stream and shard. If a sequence number is
* provided the iterator points to the next record after the sequence number, if not provided,
* the iterator points to the latest record.
*
* @param {Object} client - The AWS.Kinesis instance to use for the request.
* @param {Object} logger - An instance of a logger.
* @param {string} streamName - The name of the stream where the shard belongs.
* @param {string} shardId - The ID of the shard to get an iterator for.
* @param {string} initialPositionInStream - The location in the shard from which the Consumer will start
* fetching records from when the application starts for the first time and there is no checkpoint for the shard.
* @param {string} [sequenceNumber] - Where the iterator should point, latest otherwise.
* @fulfil {string} The new shard iterator.
* @returns {Promise} A promise for a new shard iterator.
* @private
*/
async function getShardIterator(
client,
logger,
streamName,
shardId,
initialPositionInStream,
sequenceNumber
) {
const params = {
ShardId: shardId,
ShardIteratorType: sequenceNumber ? 'AFTER_SEQUENCE_NUMBER' : initialPositionInStream,
StreamName: streamName
};
if (sequenceNumber) {
params.StartingSequenceNumber = sequenceNumber;
}
try {
const { ShardIterator } = await client.getShardIterator(params);
return ShardIterator;
} catch (err) {
if (err.code === 'InvalidArgumentException' && sequenceNumber) {
logger.warn(`The stored checkpoint for "${streamName}/${shardId}" is invalid. Ignoring it.`);
return getShardIterator(client, logger, streamName, shardId, initialPositionInStream);
}
throw err;
}
}
/**
* Releases the lease of the shard so it becomes available for re-acquisition. This allows this or
* another consumer to pick the shard up again on the next lease acquisition attempt and resume
* polling from the last stored checkpoint, instead of the shard staying silently stalled while its
* lease remains owned.
*
* @param {Object} props - The private data of the consumer instance.
* @returns {Promise}
* @private
*/
async function releaseLease(props) {
const { logger, shardId, stateStore } = props;
try {
const { shardState, streamState } = await stateStore.getShardAndStreamState(shardId, {});
await stateStore.releaseShardLease(shardId, shardState.version, streamState);
} catch (err) {
logger.warn(`Couldn't release the lease for shard "${shardId}":`, err);
}
}
/**
* Polls for records and pushes them to the parent stream. If auto-checkpoints are enabled, they
* will be stored before the request for records.
*
* @param {Object} props - The private data of the consumer instance.
* @returns {Promise}
* @private
*/
async function pollForRecords(props) {
const privateProps = props;
const {
approxArrival,
checkpoint,
client,
continuePolling,
initialPositionInStream,
leaseExpiration,
limit,
logger,
noRecordsPollDelay,
pollDelay,
pushToStream,
recordsDecoder,
seqNumToCheckpoint,
setCheckpoint,
shardId,
shouldDeaggregate,
stateStore,
stopConsumer,
streamName,
useAutoCheckpoints,
usePausedPolling
} = privateProps;
try {
if (Date.now() > leaseExpiration) {
logger.debug(`Unable to read from shard "${shardId}" anymore, the lease expired.`);
stopConsumer(shardId);
return;
}
if (seqNumToCheckpoint) {
await setCheckpoint(seqNumToCheckpoint, approxArrival);
privateProps.seqNumToCheckpoint = null;
privateProps.approxArrival = null;
}
let { iterator } = privateProps;
if (!iterator && checkpoint) {
logger.debug(`Starting to read shard "${shardId}" from a known checkpoint.`);
iterator = await getShardIterator(
client,
logger,
streamName,
shardId,
initialPositionInStream,
checkpoint
);
}
if (!iterator) {
logger.debug(
`Starting to read shard "${shardId}" from the ${
initialPositionInStream === 'LATEST' ? 'latest record' : 'trim horizon'
}.`
);
iterator = await getShardIterator(
client,
logger,
streamName,
shardId,
initialPositionInStream
);
}
const data = await client.getRecords({ Limit: limit, ShardIterator: iterator });
const { MillisBehindLatest, NextShardIterator, Records } = data;
const msBehind = MillisBehindLatest;
privateProps.iterator = NextShardIterator;
const recordsCount = Records.length;
if (recordsCount === 0) {
if (NextShardIterator === undefined) {
const shards = await getStreamShards(privateProps);
logger.debug(`The parent shard "${shardId}" has been depleted.`);
await stateStore.markShardAsDepleted(shards, shardId);
stopConsumer(shardId);
return;
}
const delay = msBehind <= 0 ? noRecordsPollDelay : 0;
if (delay === 0) logger.debug(`Fast-forwarding "${shardId}"… (${msBehind}ms behind)`);
privateProps.timeoutId = setTimeout(pollForRecords, delay, privateProps);
return;
}
const deaggCollection = shouldDeaggregate ? await deaggregate(Records) : Records;
const records = await Promise.all(deaggCollection.map(recordsDecoder));
logger.debug(`Got ${recordsCount} record(s) from "${shardId}" (${msBehind}ms behind)`);
if (useAutoCheckpoints) {
const { approximateArrivalTimestamp, sequenceNumber } = records[recordsCount - 1];
if (!usePausedPolling) {
await setCheckpoint(sequenceNumber, approximateArrivalTimestamp);
} else {
privateProps.seqNumToCheckpoint = sequenceNumber;
privateProps.approxArrival = approximateArrivalTimestamp;
}
}
const propsToPush = {
millisBehindLatest: msBehind,
records,
shardId,
streamName,
...(!useAutoCheckpoints && { setCheckpoint }),
...(usePausedPolling && { continuePolling })
};
pushToStream(null, propsToPush);
if (!usePausedPolling) {
privateProps.timeoutId = setTimeout(pollForRecords, pollDelay, privateProps);
}
} catch (err) {
if (privateProps.stopped) return;
if (err.code === 'ExpiredIteratorException') {
logger.warn('Previous shard iterator expired, recreating…');
privateProps.iterator = null;
await pollForRecords(privateProps);
return;
}
logger.warn(
`Releasing the lease for shard "${shardId}" to recover from an unexpected error:`,
err
);
await releaseLease(privateProps);
stopConsumer(shardId);
}
}
/**
* Class that implements a polling consumer.
*
* @alias module:polling-consumer
*/
class PollingConsumer {
#data = {};
/**
* Initializes an instance of the polling consumer.
*
* @param {Object} options - The initialization options.
* @param {string} options.checkpoint - The last-known checkpoint for the stream shard.
* @param {Object} options.client - An instance of the Kinesis client.
* @param {string} options.compression - The kind of data compression to use with records.
* @param {string} options.initialPositionInStream - The location in the shard from which the Consumer will start
* fetching records from when the application starts for the first time and there is no checkpoint for the shard.
* @param {string} options.leaseExpiration - The timestamp of the shard lease expiration.
* @param {number} options.limit - The limit of records per get records call.
* @param {Object} options.logger - An instance of a logger.
* @param {number} options.noRecordsPollDelay - The delay in milliseconds before attempting to
* get more records when there were none in the previous attempt.
* @param {number} options.pollDelay - When the `usePausedPolling` option is `false`, this
* option defines the delay in milliseconds in between poll requests for more records.
* @param {function(Error, Object): void} options.pushToStream - A function to push incoming records to the consumer.
* @param {string} options.shardId - The ID of the stream shard to retrieve records for.
* @param {Object} options.stateStore - An instance of the state store.
* @param {function(): void} options.stopConsumer - A function that stops this consumer from the manager.
* @param {string} options.streamName - The name of the Kinesis stream.
* @param {boolean} options.useAutoCheckpoints - Whether to automatically store shard checkpoints
* using the sequence number of the most-recently received record or not.
* @param {boolean} options.usePausedPolling - Whether if the client is waiting for
* user-intervention before polling for more records, or not.
* @param {boolean} options.useS3ForLargeItems - Whether to automatically use an S3
* bucket to store large items or not.
*/
constructor(options) {
const {
checkpoint,
client,
compression,
initialPositionInStream,
leaseExpiration,
limit,
logger,
noRecordsPollDelay,
pollDelay,
pushToStream,
s3,
s3Client,
shardId,
shouldDeaggregate,
shouldParseJson,
stateStore,
stopConsumer,
streamName,
useAutoCheckpoints,
usePausedPolling,
useS3ForLargeItems
} = options;
Object.assign(this.#data, {
approxArrival: null,
checkpoint,
client,
compression,
continuePolling: null,
initialPositionInStream,
iterator: null,
leaseExpiration: new Date(leaseExpiration).getTime(),
limit,
logger,
noRecordsPollDelay,
pollDelay,
pushToStream,
recordsDecoder: getRecordsDecoder({
compression,
inputEncoding: 'Buffer',
logger,
s3Client,
shouldParseJson,
useS3ForLargeItems
}),
s3,
seqNumToCheckpoint: null,
setCheckpoint: null,
shardId,
shouldDeaggregate,
stateStore,
stopConsumer,
stopped: false,
streamName,
timeoutId: null,
useAutoCheckpoints,
usePausedPolling,
useS3ForLargeItems
});
}
/**
* Starts the timers to poll for records.
*
* @fulfil {undefined}
* @returns {Promise}
*/
async start() {
const privateProps = this.#data;
const { logger, shardId, stateStore, stopConsumer } = privateProps;
let shardsPath;
let shardsPathNames;
try {
({ shardsPath, shardsPathNames } = await stateStore.getShardsData());
} catch (err) {
logger.warn("Can't start the consumer as the state can't be resolved:", err);
stopConsumer(shardId);
return;
}
privateProps.setCheckpoint = async (sequenceNumber, approxArrival) => {
await stateStore.storeShardCheckpoint(
shardId,
sequenceNumber,
shardsPath,
shardsPathNames,
approxArrival
);
privateProps.checkpoint = sequenceNumber;
privateProps.approxArrival = approxArrival;
};
privateProps.continuePolling = () => pollForRecords(privateProps);
pollForRecords(privateProps);
}
/**
* Stops the consumer from polling for more records.
*/
stop() {
const privateProps = this.#data;
clearTimeout(privateProps.timeoutId);
privateProps.timeoutId = null;
privateProps.stopped = true;
}
/**
* Updates the shard lease expiration timestamp.
*
* @param {string} leaseExpiration - The updated timestamp when the shard lease expires.
*/
updateLeaseExpiration(leaseExpiration) {
this.#data.leaseExpiration = new Date(leaseExpiration).getTime();
}
}
export default PollingConsumer;