UNPKG

kinesis-client-library

Version:

Process Kinesis streams and automatically scale up or down as shards split or merge.

441 lines (440 loc) 18.8 kB
"use strict"; var __extends = (this && this.__extends) || function (d, b) { for (var p in b) if (b.hasOwnProperty(p)) d[p] = b[p]; function __() { this.constructor = d; } d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __()); }; var events_1 = require('events'); var cluster_1 = require('cluster'); var path_1 = require('path'); var url_1 = require('url'); var underscore_1 = require('underscore'); var async_1 = require('async'); var bunyan_1 = require('bunyan'); var factory_1 = require('./lib/aws/factory'); var config_1 = require('./lib/config'); var kinesis_1 = require('./lib/aws/kinesis'); var Lease_1 = require('./lib/models/Lease'); var Cluster_1 = require('./lib/models/Cluster'); var Stream_1 = require('./lib/models/Stream'); var server_1 = require('./lib/server'); // Cluster of consumers. var ConsumerCluster = (function (_super) { __extends(ConsumerCluster, _super); function ConsumerCluster(pathToConsumer, opts) { _super.call(this); this.isShuttingDownFromError = false; this.externalNetwork = {}; this.consumers = {}; this.consumerIds = []; this.lastGarbageCollectedAt = Date.now(); this.opts = opts; this.logger = bunyan_1.createLogger({ name: 'KinesisCluster', level: opts.logLevel, }); cluster_1.setupMaster({ exec: pathToConsumer, silent: true, }); this.endpoints = { kinesis: this.getKinesisEndpoint(), dynamo: this.getDynamoEndpoint(), }; this.kinesis = factory_1.createKinesisClient(this.opts.awsConfig, this.endpoints.kinesis); this.cluster = new Cluster_1.Cluster(this.opts.tableName, this.opts.awsConfig, this.endpoints.dynamo); this.init(); } ConsumerCluster.prototype.init = function () { var _this = this; async_1.auto({ tableExists: function (done) { var tableName = _this.opts.tableName; var awsConfig = _this.opts.awsConfig; Cluster_1.Cluster.tableExists(tableName, awsConfig, _this.getDynamoEndpoint(), done); }, createTable: ['tableExists', function (done, data) { if (data.tableExists) { return done(); } var tableName = _this.opts.tableName; var awsConfig = _this.opts.awsConfig; var capacity = _this.opts.capacity || {}; _this.logger.info({ table: tableName }, 'Creating DynamoDB table'); Cluster_1.Cluster.createTable(tableName, awsConfig, capacity, _this.getDynamoEndpoint(), done); }], createStream: function (done) { var streamName = _this.opts.streamName; var streamModel = new Stream_1.Stream(streamName, _this.kinesis); streamModel.exists(function (err, exists) { if (err) { return done(err); } if (exists) { return done(); } _this.kinesis.createStream({ StreamName: streamName, ShardCount: 1 }, function (err) { if (err) { return done(err); } streamModel.onActive(done); }); }); }, }, function (err) { if (err) { return _this.logAndEmitError(err, 'Error ensuring Dynamo table exists'); } _this.loopReportClusterToNetwork(); _this.loopFetchExternalNetwork(); }); }; ConsumerCluster.prototype.getKinesisEndpoint = function () { var isLocal = this.opts.localKinesis; var port = this.opts.localKinesisPort; var customEndpoint = this.opts.kinesisEndpoint; var endpoint = null; if (isLocal) { var endpointConfig = config_1.default.localKinesisEndpoint; if (port) { endpointConfig.port = port; } endpoint = url_1.format(endpointConfig); } else if (customEndpoint) { endpoint = customEndpoint; } return endpoint; }; ConsumerCluster.prototype.getDynamoEndpoint = function () { var isLocal = this.opts.localDynamo; var customEndpoint = this.opts.dynamoEndpoint; var endpoint = null; if (isLocal) { var endpointConfig = config_1.default.localDynamoDBEndpoint; endpoint = url_1.format(endpointConfig); } else if (customEndpoint) { endpoint = customEndpoint; } return endpoint; }; // Run an HTTP server. Useful as a health check. ConsumerCluster.prototype.serveHttp = function (port) { var _this = this; this.logger.debug('Starting HTTP server on port %s', port); server_1.create(port, function () { return _this.consumerIds.length; }); }; ConsumerCluster.prototype.consumeAvailableShard = function (shardId, leaseCounter) { // Stops accepting consumers, since the cluster will be reset based one an error if (this.isShuttingDownFromError) { return; } this.spawn(shardId, leaseCounter); }; ConsumerCluster.prototype.updateNetwork = function () { var _this = this; this.garbageCollectClusters(); if (this.shouldTryToAcquireMoreShards()) { this.logger.debug('Should try to acquire more shards'); this.fetchAvailableShard(); } else if (this.hasTooManyShards()) { this.logger.debug({ consumerIds: this.consumerIds }, 'Have too many shards'); this.killConsumer(function (err) { if (err) { _this.logAndEmitError(err); } }); } }; // Compare cluster state to external network to figure out if we should try to change our shard allocation. ConsumerCluster.prototype.shouldTryToAcquireMoreShards = function () { if (this.consumerIds.length === 0) { return true; } var externalNetwork = this.externalNetwork; var networkKeys = Object.keys(externalNetwork); if (networkKeys.length === 0) { return true; } var lowestInOutterNetwork = networkKeys.reduce(function (memo, key) { var count = externalNetwork[key]; if (count < memo) { memo = count; } return memo; }, Infinity); return this.consumerIds.length <= lowestInOutterNetwork; }; // Determine if we have too many shards compared to the rest of the network. ConsumerCluster.prototype.hasTooManyShards = function () { var externalNetwork = this.externalNetwork; var networkKeys = Object.keys(externalNetwork); if (networkKeys.length === 0) { return false; } var lowestInOutterNetwork = networkKeys.reduce(function (memo, key) { var count = externalNetwork[key]; if (count < memo) { memo = count; } return memo; }, Infinity); return this.consumerIds.length > (lowestInOutterNetwork + 1); }; // Fetch data about unleased shards. ConsumerCluster.prototype.fetchAvailableShard = function () { var _this = this; // Hack around typescript var _asyncResults = {}; async_1.parallel({ allShardIds: function (done) { kinesis_1.listShards(_this.kinesis, _this.opts.streamName, function (err, shards) { if (err) { return done(err); } _asyncResults.shards = shards; done(); }); }, leases: function (done) { var tableName = _this.opts.tableName; var awsConfig = _this.opts.awsConfig; Lease_1.Lease.fetchAll(tableName, awsConfig, _this.getDynamoEndpoint(), function (err, leases) { if (err) { return done(err); } _asyncResults.leases = leases; done(); }); }, }, function (err) { if (err) { return _this.logAndEmitError(err, 'Error fetching available shards'); } var shards = _asyncResults.shards, leases = _asyncResults.leases; var leaseItems = leases.Items; var finishedShardIds = leaseItems.filter(function (lease) { return lease.get('isFinished'); }).map(function (lease) { return lease.get('id'); }); var allUnfinishedShards = shards.filter(function (shard) { return finishedShardIds.indexOf(shard.ShardId) === -1; }); var leasedShardIds = leaseItems.map(function (item) { return item.get('id'); }); var newShards = allUnfinishedShards.filter(function (shard) { // skip already leased shards if (leasedShardIds.indexOf(shard.ShardId) >= 0) { return false; } // skip if parent shard is not finished (split case) if (shard.ParentShardId && !(finishedShardIds.indexOf(shard.ParentShardId) >= 0)) { _this.logger.info({ ParentShardId: shard.ParentShardId, ShardId: shard.ShardId }, 'Ignoring shard because ParentShardId is not finished'); return false; } // skip if adjacent parent shard is not finished (merge case) if (shard.AdjacentParentShardId && !(finishedShardIds.indexOf(shard.AdjacentParentShardId) >= 0)) { _this.logger.info({ AdjacentParentShardId: shard.AdjacentParentShardId, ShardId: shard.ShardId }, 'Ignoring shard because AdjacentParentShardId is not finished'); return false; } return true; }); // If there are shards theat have not been leased, pick one if (newShards.length > 0) { _this.logger.info({ newShards: newShards }, 'Unleased shards available'); return _this.consumeAvailableShard(newShards[0].ShardId, null); } // Try to find the first expired lease var firstExpiredLease = underscore_1.find(leaseItems, function (leaseItem) { if (leaseItem.get('expiresAt') > Date.now()) { return false; } if (leaseItem.get('isFinished')) { return false; } return true; }); if (firstExpiredLease) { var shardId = firstExpiredLease.get('id'); var leaseCounter = firstExpiredLease.get('leaseCounter'); _this.logger.info({ shardId: shardId, leaseCounter: leaseCounter }, 'Found available shard'); _this.consumeAvailableShard(shardId, leaseCounter); } }); }; // Create a new consumer processes. ConsumerCluster.prototype.spawn = function (shardId, leaseCounter) { this.logger.info({ shardId: shardId, leaseCounter: leaseCounter }, 'Spawning consumer'); var consumerOpts = { tableName: this.opts.tableName, awsConfig: this.opts.awsConfig, streamName: this.opts.streamName, startingIteratorType: (this.opts.startingIteratorType || '').toUpperCase(), shardId: shardId, leaseCounter: leaseCounter, dynamoEndpoint: this.endpoints.dynamo, kinesisEndpoint: this.endpoints.kinesis, numRecords: this.opts.numRecords, timeBetweenReads: this.opts.timeBetweenReads, logLevel: this.opts.logLevel, }; var env = { CONSUMER_INSTANCE_OPTS: JSON.stringify(consumerOpts), CONSUMER_SUPER_CLASS_PATH: path_1.join(__dirname, 'AbstractConsumer.js'), }; var consumer = cluster_1.fork(env); consumer.opts = consumerOpts; consumer.process.stdout.pipe(process.stdout); consumer.process.stderr.pipe(process.stderr); this.addConsumer(consumer); }; // Add a consumer to the cluster. ConsumerCluster.prototype.addConsumer = function (consumer) { var _this = this; this.consumerIds.push(consumer.id); this.consumers[consumer.id] = consumer; consumer.once('exit', function (code) { var logMethod = code === 0 ? 'info' : 'error'; _this.logger[logMethod]({ shardId: consumer.opts.shardId, exitCode: code }, 'Consumer exited'); _this.consumerIds = underscore_1.without(_this.consumerIds, consumer.id); delete _this.consumers[consumer.id]; }); }; // Kill any consumer in the cluster. ConsumerCluster.prototype.killConsumer = function (callback) { var id = this.consumerIds[0]; this.killConsumerById(id, callback); }; // Kill a specific consumer in the cluster. ConsumerCluster.prototype.killConsumerById = function (id, callback) { var _this = this; this.logger.info({ id: id }, 'Killing consumer'); var callbackWasCalled = false; var wrappedCallback = function (err) { if (callbackWasCalled) { return; } callbackWasCalled = true; callback(err); }; // Force kill the consumer in 40 seconds, giving enough time for the consumer's shutdown // process to finish var timer = setTimeout(function () { if (_this.consumers[id]) { _this.consumers[id].kill(); } wrappedCallback(new Error('Consumer did not exit in time')); }, 40000); this.consumers[id].once('exit', function (code) { clearTimeout(timer); var err = null; if (code > 0) { err = new Error('Consumer process exited with code: ' + code); } wrappedCallback(err); }); this.consumers[id].send(config_1.default.shutdownMessage); }; ConsumerCluster.prototype.killAllConsumers = function (callback) { this.logger.info('Killing all consumers'); async_1.each(this.consumerIds, this.killConsumerById.bind(this), callback); }; // Continuously fetch data about the rest of the network. ConsumerCluster.prototype.loopFetchExternalNetwork = function () { var _this = this; this.logger.info('Starting external network fetch loop'); var fetchThenWait = function (done) { _this.fetchExternalNetwork(function (err) { if (err) { return done(err); } setTimeout(done, 5000); }); }; var handleError = function (err) { _this.logAndEmitError(err, 'Error fetching external network data'); }; async_1.forever(fetchThenWait, handleError); }; // Fetch data about the rest of the network. ConsumerCluster.prototype.fetchExternalNetwork = function (callback) { var _this = this; this.cluster.fetchAll(function (err, clusters) { if (err) { return callback(err); } _this.externalNetwork = clusters.Items.filter(function (cluster) { return cluster.get('id') !== _this.cluster.id; }).reduce(function (memo, cluster) { memo[cluster.get('id')] = cluster.get('activeConsumers'); return memo; }, {}); _this.logger.debug({ externalNetwork: _this.externalNetwork }, 'Updated external network'); _this.updateNetwork(); callback(); }); }; // Continuously publish data about this cluster to the network. ConsumerCluster.prototype.loopReportClusterToNetwork = function () { var _this = this; this.logger.info('Starting report cluster loop'); var reportThenWait = function (done) { _this.reportClusterToNetwork(function (err) { if (err) { return done(err); } setTimeout(done, 1000); }); }; var handleError = function (err) { _this.logAndEmitError(err, 'Error reporting cluster to network'); }; async_1.forever(reportThenWait, handleError); }; // Publish data about this cluster to the nework. ConsumerCluster.prototype.reportClusterToNetwork = function (callback) { this.logger.debug({ consumers: this.consumerIds.length }, 'Rerpoting cluster to network'); this.cluster.reportActiveConsumers(this.consumerIds.length, callback); }; // Garbage collect expired clusters from the network. ConsumerCluster.prototype.garbageCollectClusters = function () { var _this = this; if (Date.now() < (this.lastGarbageCollectedAt + (1000 * 60))) { return; } this.lastGarbageCollectedAt = Date.now(); this.cluster.garbageCollect(function (err, garbageCollectedClusters) { if (err) { _this.logger.error(err, 'Error garbage collecting clusters, continuing cluster execution anyway'); return; } if (garbageCollectedClusters.length) { _this.logger.info('Garbage collected %d clusters', garbageCollectedClusters.length); } }); }; ConsumerCluster.prototype.logAndEmitError = function (err, desc) { var _this = this; this.logger.error(desc); this.logger.error(err); // Only start the shutdown process once if (this.isShuttingDownFromError) { return; } this.isShuttingDownFromError = true; // Kill all consumers and then emit an error so that the cluster can be re-spawned this.killAllConsumers(function (killErr) { if (killErr) { _this.logger.error(killErr); } // Emit the original error that started the shutdown process _this.emit('error', err); }); }; return ConsumerCluster; }(events_1.EventEmitter)); exports.ConsumerCluster = ConsumerCluster;