lifion-kinesis
Version:
Lifion client for Amazon Kinesis Data streams
365 lines (332 loc) • 11.4 kB
JavaScript
/**
* Module that implements a shard polling consumer.
*
* @module polling-consumer
* @private
*/
;
const deaggregate = require('./deaggregate');
const { getRecordsDecoder } = require('./records');
const { getStreamShards } = require('./stream');
const privateData = new WeakMap();
/**
* Provides access to the private data of the specified instance.
*
* @param {Object} instance - The private data's owner.
* @returns {Object} The private data.
* @private
*/
function internal(instance) {
if (!privateData.has(instance)) privateData.set(instance, {});
return privateData.get(instance);
}
/**
* Requests an new shard iterator form the given stream and shard. If a sequence number is
* provided the iterator points to the next record after the sequence number, if not provided,
* the iterator points to the latest record.
*
* @param {Object} client - The AWS.Kinesis instance to use for the request.
* @param {Object} logger - An instance of a logger.
* @param {string} streamName - The name of the stream where the shard belongs.
* @param {string} shardId - The ID of the shard to get an iterator for.
* @param {string} initialPositionInStream - The location in the shard from which the Consumer will start
* fetching records from when the application starts for the first time and there is no checkpoint for the shard.
* @param {string} [sequenceNumber] - Where the iterator should point, latest otherwise.
* @fulfil {string} The new shard iterator.
* @returns {Promise} A promise for a new shard iterator.
* @private
*/
async function getShardIterator(
client,
logger,
streamName,
shardId,
initialPositionInStream,
sequenceNumber
) {
const params = {
ShardId: shardId,
ShardIteratorType: sequenceNumber ? 'AFTER_SEQUENCE_NUMBER' : initialPositionInStream,
StreamName: streamName
};
if (sequenceNumber) {
params.StartingSequenceNumber = sequenceNumber;
}
try {
const { ShardIterator } = await client.getShardIterator(params);
return ShardIterator;
} catch (err) {
if (err.code === 'InvalidArgumentException' && sequenceNumber) {
logger.warn(`The stored checkpoint for "${streamName}/${shardId}" is invalid. Ignoring it.`);
return getShardIterator(client, logger, streamName, shardId, initialPositionInStream);
}
throw err;
}
}
/**
* Polls for records and pushes them to the parent stream. If auto-checkpoints are enabled, they
* will be stored before the request for records.
*
* @param {Object} instance - The instance for which the private data will be retrieved for.
* @returns {Promise}
* @private
*/
async function pollForRecords(instance) {
const privateProps = internal(instance);
const {
approxArrival,
checkpoint,
client,
continuePolling,
initialPositionInStream,
leaseExpiration,
limit,
logger,
noRecordsPollDelay,
pollDelay,
pushToStream,
recordsDecoder,
seqNumToCheckpoint,
setCheckpoint,
shardId,
shouldDeaggregate,
stateStore,
stopConsumer,
streamName,
useAutoCheckpoints,
usePausedPolling
} = privateProps;
try {
if (Date.now() > leaseExpiration) {
logger.debug(`Unable to read from shard "${shardId}" anymore, the lease expired.`);
stopConsumer(shardId);
return;
}
if (seqNumToCheckpoint) {
await setCheckpoint(seqNumToCheckpoint, approxArrival);
privateProps.seqNumToCheckpoint = null;
privateProps.approxArrival = null;
}
let { iterator } = privateProps;
if (!iterator && checkpoint) {
logger.debug(`Starting to read shard "${shardId}" from a known checkpoint.`);
iterator = await getShardIterator(
client,
logger,
streamName,
shardId,
initialPositionInStream,
checkpoint
);
}
if (!iterator) {
logger.debug(
`Starting to read shard "${shardId}" from the ${
initialPositionInStream === 'LATEST' ? 'latest record' : 'trim horizon'
}.`
);
iterator = await getShardIterator(
client,
logger,
streamName,
shardId,
initialPositionInStream
);
}
const data = await client.getRecords({ Limit: limit, ShardIterator: iterator });
const { MillisBehindLatest, NextShardIterator, Records } = data;
const msBehind = MillisBehindLatest;
privateProps.iterator = NextShardIterator;
const recordsCount = Records.length;
if (recordsCount === 0) {
if (NextShardIterator === undefined) {
const shards = await getStreamShards(privateProps);
logger.debug(`The parent shard "${shardId}" has been depleted.`);
await stateStore.markShardAsDepleted(shards, shardId);
stopConsumer(shardId);
return;
}
const delay = msBehind <= 0 ? noRecordsPollDelay : 0;
if (delay === 0) logger.debug(`Fast-forwarding "${shardId}"… (${msBehind}ms behind)`);
privateProps.timeoutId = setTimeout(pollForRecords, delay, instance);
return;
}
const deaggCollection = shouldDeaggregate ? await deaggregate(Records) : Records;
const records = await Promise.all(deaggCollection.map(recordsDecoder));
logger.debug(`Got ${recordsCount} record(s) from "${shardId}" (${msBehind}ms behind)`);
if (useAutoCheckpoints) {
const { approximateArrivalTimestamp, sequenceNumber } = records[recordsCount - 1];
if (!usePausedPolling) {
await setCheckpoint(sequenceNumber, approximateArrivalTimestamp);
} else {
privateProps.seqNumToCheckpoint = sequenceNumber;
privateProps.approxArrival = approximateArrivalTimestamp;
}
}
const propsToPush = {
millisBehindLatest: msBehind,
records,
shardId,
streamName,
...(!useAutoCheckpoints && { setCheckpoint }),
...(usePausedPolling && { continuePolling })
};
pushToStream(null, propsToPush);
if (!usePausedPolling) {
privateProps.timeoutId = setTimeout(pollForRecords, pollDelay, instance);
}
} catch (err) {
if (err.code === 'ExpiredIteratorException') {
logger.warn('Previous shard iterator expired, recreating…');
privateProps.iterator = null;
await pollForRecords(instance);
return;
}
logger.error(err);
pushToStream(err);
}
}
/**
* Class that implements a polling consumer.
*
* @alias module:polling-consumer
*/
class PollingConsumer {
/**
* Initializes an instance of the polling consumer.
*
* @param {Object} options - The initialization options.
* @param {string} options.checkpoint - The last-known checkpoint for the stream shard.
* @param {Object} options.client - An instance of the Kinesis client.
* @param {string} options.compression - The kind of data compression to use with records.
* @param {string} options.initialPositionInStream - The location in the shard from which the Consumer will start
* fetching records from when the application starts for the first time and there is no checkpoint for the shard.
* @param {string} options.leaseExpiration - The timestamp of the shard lease expiration.
* @param {number} options.limit - The limit of records per get records call.
* @param {Object} options.logger - An instance of a logger.
* @param {number} options.noRecordsPollDelay - The delay in milliseconds before attempting to
* get more records when there were none in the previous attempt.
* @param {number} options.pollDelay - When the `usePausedPolling` option is `false`, this
* option defines the delay in milliseconds in between poll requests for more records.
* @param {Function} options.pushToStream - A function to push incoming records to the consumer.
* @param {string} options.shardId - The ID of the stream shard to retrieve records for.
* @param {Object} options.stateStore - An instance of the state store.
* @param {Function} options.stopConsumer - A function that stops this consumer from the manager.
* @param {string} options.streamName - The name of the Kinesis stream.
* @param {boolean} options.useAutoCheckpoints - Whether to automatically store shard checkpoints
* using the sequence number of the most-recently received record or not.
* @param {boolean} options.usePausedPolling - Whether if the client is waiting for
* user-intervention before polling for more records, or not.
* @param {boolean} options.useS3ForLargeItems - Whether to automatically use an S3
* bucket to store large items or not.
*/
constructor(options) {
const {
checkpoint,
client,
compression,
initialPositionInStream,
leaseExpiration,
limit,
logger,
noRecordsPollDelay,
pollDelay,
pushToStream,
s3,
s3Client,
shardId,
shouldDeaggregate,
shouldParseJson,
stateStore,
stopConsumer,
streamName,
useAutoCheckpoints,
usePausedPolling,
useS3ForLargeItems
} = options;
Object.assign(internal(this), {
approxArrival: null,
checkpoint,
client,
compression,
continuePolling: null,
initialPositionInStream,
iterator: null,
leaseExpiration: new Date(leaseExpiration).getTime(),
limit,
logger,
noRecordsPollDelay,
pollDelay,
pushToStream,
recordsDecoder: getRecordsDecoder({
compression,
inputEncoding: 'Buffer',
logger,
s3Client,
shouldParseJson,
useS3ForLargeItems
}),
s3,
seqNumToCheckpoint: null,
setCheckpoint: null,
shardId,
shouldDeaggregate,
stateStore,
stopConsumer,
streamName,
timeoutId: null,
useAutoCheckpoints,
usePausedPolling,
useS3ForLargeItems
});
}
/**
* Starts the timers to poll for records.
*
* @fulfil {undefined}
* @returns {Promise}
*/
async start() {
const privateProps = internal(this);
const { logger, shardId, stateStore, stopConsumer } = privateProps;
let shardsPath;
let shardsPathNames;
try {
({ shardsPath, shardsPathNames } = await stateStore.getShardsData());
} catch (err) {
logger.warn("Can't start the consumer as the state can't be resolved:", err);
stopConsumer(shardId);
return;
}
privateProps.setCheckpoint = async (sequenceNumber, approxArrival) => {
await stateStore.storeShardCheckpoint(
shardId,
sequenceNumber,
shardsPath,
shardsPathNames,
approxArrival
);
privateProps.checkpoint = sequenceNumber;
privateProps.approxArrival = approxArrival;
};
privateProps.continuePolling = () => pollForRecords(this);
pollForRecords(this);
}
/**
* Stops the timers that poll for records.
*/
stop() {
const privateProps = internal(this);
clearTimeout(privateProps.timeoutId);
privateProps.timeoutId = null;
}
/**
* Updates the shard lease expiration timestamp.
*
* @param {string} leaseExpiration - The updated timestamp when the shard lease expires.
*/
updateLeaseExpiration(leaseExpiration) {
internal(this).leaseExpiration = new Date(leaseExpiration).getTime();
}
}
module.exports = PollingConsumer;