UNPKG

firehoser_es5

Version:

es5 version of firehoser. A wrapper around AWS Kinesis Firehose with retry logic and custom queuing behavior.

214 lines (189 loc) 7.45 kB
'use strict'; require("babel-polyfill"); var _ = require('lodash'); var AWS = require('aws-sdk'); var async = require('async'); var JaySchema = require('jayschema'); var moment = require('moment'); var schemaValidator = new JaySchema(); class DeliveryStream{ constructor(name, awsConfig=null, schema=null, retryInterval=1500, firehose=null, logger=null){ this.maxIngestion = 400; this.maxDrains = 3; this.maxRetries = 40; this.name = name; if (awsConfig !== null){ AWS.config.update(awsConfig); } this.schema = schema; this.retryInterval = retryInterval; this.firehose = firehose ? firehose : new AWS.Firehose({params: {DeliveryStreamName: name}}); this.log = logger ? logger : () => {}; } validateRecord(record){ return schemaValidator.validate(record, this.schema); } validateRecords(records){ if (!this.schema){ return [records, []]; } let validRecords = []; let invalidRecords = []; _.forEach(records, (record) => { let validationErrors = this.validateRecord(record); if (_.isEmpty(validationErrors)){ validRecords.push(record); } else { let ve = validationErrors[0]; invalidRecords.push({ type: "schema", originalRecord: record, description: buildSchemaErrorDescription(ve), details: ve, }); } }); return [validRecords, invalidRecords]; } formatRecord(record){ return {Data: record + '\n'}; } putRecord(record){ return this.putRecords([record]); } putRecords(records){ this.log(`DeliveryStream.putRecords() called with ${records.length} records.`); return new Promise((resolve, reject) => { // Validate records against a schema, if necessary. let [validRecords, invalidRecords] = this.validateRecords(records); // Split the records into reasonably-sized chunks. records = _.map(validRecords, this.formatRecord); let chunks = _.chunk(records, this.maxIngestion); let tasks = []; for (let i=0; i < chunks.length; i++){ tasks.push(this.drain.bind(this, chunks[i])); } // Schedule the chunks all at the same time. this.log(`Kicking off ${tasks.length} calls to drain() for ${records.length} records.`); async.parallelLimit(tasks, this.maxDrains, function(err, results){ let allErrors = invalidRecords.concat(_.flatten(results)); if (err || !_.isEmpty(allErrors)){ return reject(allErrors); } return resolve(); }); }); } drain(records, cb, numRetries=0){ var leftovers = []; this.log(`Draining ${records.length} records. Pass #${numRetries + 1}`); this.firehose.putRecordBatch({Records: records}, function(firehoseErr, resp){ // Stuff broke! if (firehoseErr){ return cb(null, { type: "firehose", description: "Internal aws-sdk error.", details: firehoseErr, originalRecord: null }); } // Not all records make it in, but firehose keeps on chugging! if (resp.FailedPutCount > 0){ } // Push errored records back into the next list. for (let [orig, result] of _.zip(records, resp.RequestResponses)){ if (!_.isUndefined(result.ErrorCode)){ this.log(`Got ErrorCode ${result.ErrorCode} for record ${orig}`); leftovers.push({ type: "firehose", description: result.ErrorMessage, details: { ErrorCode: result.ErrorCode, ErrorMessage: result.ErrorMessage, }, originalRecord: orig, }); } } // Recurse! if (leftovers.length && numRetries < this.maxRetries){ // We're about to recurse, let the child handle storing error details. leftovers = _.map(leftovers, (leftover) => { return _.pick(leftover, ['originalRecord'])}) return setTimeout(function(){ this.drain.bind(this, leftovers, cb, numRetries + 1); }, this.retryInterval); } else { return cb(null, leftovers); } }); } } class JSONDeliveryStream extends DeliveryStream { formatRecord(record){ return super.formatRecord(JSON.stringify(record)); } } class QueuableDeliveryStream extends DeliveryStream { constructor(name, maxTime=30000, maxSize=500, ...args){ super(name, ...args); this.queue = []; this.timeout = null; this.maxTime = maxTime; this.maxSize = maxSize; this.promise = null; setInterval(this.drainQueue.bind(this), this.maxTime); } putRecords(records){ this.log(`QueuableDeliveryStream.putRecords() called with ${records.length} records.`); this.queue.push(...records); if (this.promise === null){ this.promise = new Promise((resolve, reject) => { this.resolver = resolve; this.rejecter = reject; }); } this.log(`queue size is: ${this.queue.length}, maxSize is: ${this.maxSize}.`); if (this.queue.length >= this.maxSize){ // Queue's full! this.log(`queue is full, draining immediately.`); setImmediate(this.drainQueue.bind(this)); } return this.promise; } drainQueue(){ this.log(`Countdown timer expired or queue limit reached.`); this.log(`Time to drain the queue of ${this.queue.length} records.`); let toQueue = this.queue.splice(0, this.queue.length); if (!toQueue.length){ this.log(`No records in queue, not draining anything.`); return; } super.putRecords(toQueue).then(this.resolver, this.rejecter).then(() => { this.promise = null; this.rejecter = null; this.resolver = null; }); } } class QueuableJSONDeliveryStream extends QueuableDeliveryStream { formatRecord(record){ return super.formatRecord(JSON.stringify(record)); } } function buildSchemaErrorDescription(ve){ if (ve.desc){ return ve.desc; } let field = ve.instanceContext.replace(/(#\/)|(#)/ig, "").replace(/\//g, ".") return `${ve.kind || 'Error'} on '${field}'. Expected ${ve.constraintName} to be ${ve.constraintValue}, actual value was ${ve.testedValue}.` } function makeRedshiftTimestamp(input){ return moment(input).utc().format('YYYY-MM-DD HH:mm:ss') } module.exports = { DeliveryStream: DeliveryStream, JSONDeliveryStream: JSONDeliveryStream, QueuableDeliveryStream: QueuableDeliveryStream, QueuableJSONDeliveryStream: QueuableJSONDeliveryStream, makeRedshiftTimestamp: makeRedshiftTimestamp };