s3-stream-commander
Version:
Provides streaming multipart downloads for S3
180 lines (164 loc) • 7.8 kB
JavaScript
;
var util = require('util');
var EventEmitter = require('events').EventEmitter;
var async = require('async');
var debug = require('debug')('s3-stream-download:downloader');
/**
* Size of chunk to download. Need to tradeoff chattiness vs memory
* usage of buffering the chunk
*/
const DOWNLOAD_CHUNK_SIZE = 5 * 1024 * 1024; // 5MB
const CONCURRENT_CHUNKS = 5;
const RETRIES = 5;
/**
* Constructor for a `Downloader`. Takes a `AWS.S3` client
* instance, and additional object configuration to be
* passed into the client's `createMultipartUpload` method.
*
* @params {AWS.S3} client
* @params {Object} config
*/
module.exports = class Downloader extends EventEmitter {
constructor(s3Client, s3Params, options) {
super();
this._s3Client = s3Client;
this._s3Params = s3Params;
this._nextPartNumber = 0;
this._started = false;
// Will download concurrentChunks and then pause until first read sets to true
this._paused = true;
this._emitting = false;
this._download_chunk_size = options.downloadChunkSize || DOWNLOAD_CHUNK_SIZE;
this._concurrent_chunks = options.concurrentChunks || CONCURRENT_CHUNKS;
this._retries = options.retries || RETRIES;
this._parts = {};
this._metadata = {};
if (!s3Params.Bucket) {
throw new Error('Downloader requires options with `Bucket` specified.');
}
if (!s3Params.Key) {
throw new Error('Downloader requires options with `Key` specified.');
}
this._startDownload();
}
get paused() {
return this._paused;
}
set paused(paused) {
if (this._paused !== paused) {
debug('paused: ' + paused);
}
this._paused = paused;
this._emitParts();
}
_startDownload() {
if (!this._started) {
var self = this;
self._started = true;
debug('starting download');
self._s3Client.headObject({Bucket: self._s3Params.Bucket, Key: self._s3Params.Key}, function (err, data) {
if (err) {
debug('headObject err: ' + err);
return self.emit('error', err);
}
else {
if (!data || !data.ContentLength || data.ContentLength === 0) {
debug('key not found bucket: ' + self._s3Params.Bucket + ', Key: ' + self._s3Params.Key);
return self.emit('error', 'Key not found');
}
else {
self.totalObjectSize = data.ContentLength;
debug('totalObjectSize: ' + self.totalObjectSize);
self.totalParts = Math.ceil(self.totalObjectSize / self._download_chunk_size);
debug('totalParts: ' + self.totalParts);
self._metadata['ContentLength'] = data.ContentLength;
self._metadata['ETag'] = data.ETag;
self._metadata['LastModified'] = data.LastModified;
async.timesLimit(self.totalParts, self._concurrent_chunks,
function (partNumber, done) {
var startByte = partNumber * self._download_chunk_size;
var endByte = startByte + self._download_chunk_size - 1;
if (endByte > self.totalObjectSize) {
endByte = self.totalObjectSize;
}
debug(`partNumber: ${partNumber}, startByte: ${startByte}, endByte: ${endByte}`);
async.retry({
times: self._retries,
interval: function (retryCount) {
debug(`Retrying partNumber:${partNumber}, retryCount:${retryCount}`);
return 1000 * Math.pow(2, retryCount);
}
},
function (done) {
var params = util._extend({Range: `bytes=${startByte}-${endByte}`}, self._s3Params);
self._s3Client.getObject(params, function (err, data) {
if (err) {
debug('getObject error: ' + err);
self.emit('error', err);
}
else {
self._parts[partNumber] = data.Body;
debug('received partNumber: ' + partNumber);
self._emitParts();
}
setImmediate(() => done(err));
});
},
function (err) {
if (self.paused) {
debug('pausing after download partNumber: ' + partNumber);
var _checkIntervalId = setInterval(function () {
if (!self.paused && _checkIntervalId) {
clearInterval(_checkIntervalId);
_checkIntervalId = undefined;
debug('resuming after download partNumber: ' + partNumber);
return done(err);
}
}, 100);
}
else {
setImmediate(() => done(err));
}
}
);
},
function (err, results) {
if (err) {
debug('download err: ' + err);
self.emit('error', err);
}
}
);
}
}
});
}
}
_emitParts() {
if (!this._paused && !this._emitting) {
var self = this;
self._emitting = true;
debug('emitting parts');
async.whilst(
function () {
return self._parts[self._nextPartNumber] && !self._paused;
},
function (done) {
var part = self._parts[self._nextPartNumber];
delete self._parts[self._nextPartNumber];
self._nextPartNumber++;
self.emit('part', part);
debug('emitting partNumber: ' + (self._nextPartNumber - 1));
if (self._nextPartNumber >= self.totalParts) {
debug('emitting finish');
self.emit('finish');
}
setImmediate(done);
},
function (err) {
self._emitting = false;
}
);
}
}
};