knox-mpu
Version:
Provide multi part upload functionality to Amazon S3 using the knox library
410 lines (340 loc) • 12.6 kB
JavaScript
var _ = require('lodash'),
EventEmitter = require('events').EventEmitter,
Batch = require('batch'),
fs = require('fs'),
path = require('path'),
os = require('os'),
util = require('util'),
parse = require('./parse');
/**
* Initializes a Amazon S3 Multi part file upload with the given options
*/
function MultiPartUpload(opts, callback) {
if (!opts.client || !opts.objectName) {
throw new Error('MultiPart upload must be created from a client and provide an object name');
}
if (!opts.stream && !opts.file) {
throw new Error('MultiPart upload must be passed either a stream or file parameter');
}
if (opts.stream && opts.file) {
throw new Error('You cannot provide both a stream and a file to upload');
}
if (opts.noDisk && opts.partSize && opts.partSize > 10485760) {
throw new Error('Keep in-memory part sizes 10MB or less');
}
callback = callback || function(err, results) {};
this.objectName = opts.objectName;
this.fileName = opts.file;
this.headers = opts.headers || {};
this.client = opts.client;
this.partSize = opts.partSize || 5242880; // 5MB default
this.maxRetries = opts.maxRetries || 0; // default to no retry
this.uploadId = null;
this.uploads = new Batch();
this.noDisk = opts.noDisk;
this.maxUploadSize = opts.maxUploadSize || 1/0; // infinity default
this.currentUploadSize = 0;
this.aborted = false;
this.totalUploadSize = 0;
this.uploads.concurrency(opts.batchSize ||4); // 4 simultaneous uploads by default
// initialise the tmp directory based on opts (fallback to os.tmpDir())
this.tmpDir = !this.noDisk && (opts.tmpDir || os.tmpDir());
var mpu = this,
written = 0;
mpu.on('partProgress', function(data) {
written += data.written;
mpu.emit('progress', {
written: written,
total: mpu.totalUploadSize,
percent: written / mpu.totalUploadSize * 100 | 0
});
});
// Recalculate progress as previously written data needs to be rewritten
mpu.on('failed', function(part, partWritten) {
written = written - partWritten;
});
if (opts.stream) {
this._putStream(opts.stream, callback);
} else {
this._putFile(opts.file, callback);
}
}
util.inherits(MultiPartUpload, EventEmitter);
/**
* Attempts to initiate the MultiPartUpload request (gets the upload ID)
*/
MultiPartUpload.prototype._initiate = function(callback) {
// Send the initiate request
var req = this.client.request('POST', this.objectName + '?uploads', this.headers),
mpu = this;
// Handle the xml response
parse.xmlResponse(req, function(err, body) {
if (err) return callback(err);
if (!body.UploadId) return callback('Invalid upload ID');
mpu.uploadId = body.UploadId;
mpu.emit('initiated', body.UploadId);
return callback(null, body.UploadId);
});
req.end();
};
/**
* Streams a file to S3 using a multipart form upload
*
* Divides the file into separate files, and then writes them to Amazon S3
*/
MultiPartUpload.prototype._putFile = function(file, callback) {
if (!file) return callback('Invalid file');
var mpu = this,
parts = [];
fs.exists(file, function(exists) {
if (!exists) {
return callback('File does not exist');
}
fs.lstat(file, function (err, stats) {
var remainingBytes = stats.size;
var offset = 0;
mpu.totalUploadSize = stats.size;
while (remainingBytes > mpu.partSize) {
var partId = parts.length + 1,
part = {
id: partId,
fileName: mpu.fileName,
offset: offset,
length: mpu.partSize,
triesLeft: mpu.maxRetries + 1
};
offset += mpu.partSize;
remainingBytes -= mpu.partSize;
parts.push(part);
mpu.uploads.push(mpu._uploadPart.bind(mpu, part));
}
if (remainingBytes) {
var partId = parts.length + 1,
part = {
id: partId,
fileName: mpu.fileName,
offset: offset,
length: remainingBytes,
triesLeft: mpu.maxRetries + 1
};
parts.push(part);
mpu.uploads.push(mpu._uploadPart.bind(mpu, part));
}
mpu._initiate(function(err, uploadId) {
if (err || !uploadId) {
return callback('Unable to initiate file upload');
}
return mpu._completeUploads(callback);
});
});
});
}
/**
* Streams a stream to S3 using a multipart form upload.
*
* It will attempt to initialize the upload (if not already started), read the stream in,
* write the stream to a temporary file of the given partSize, and then start uploading a part
* each time a part is available
*/
MultiPartUpload.prototype._putStream = function(stream, callback) {
if (!stream) return callback('Invalid stream');
var mpu = this;
if (!this.uploadId) {
this._initiate(function(err, uploadId) {
if (err || !uploadId) return callback('Unable to initiate stream upload [' + err || 'No upload ID' + ']');
});
}
// Start handling the stream straight away
mpu._handleStream(stream, callback);
};
/**
Handles an incoming stream, divides it into parts, and uploads it to S3
**/
MultiPartUpload.prototype._handleStream = function(stream, callback) {
var mpu = this,
parts = [],
current;
// Create a new part
function newPart() {
var partId = parts.length + 1,
partFileName = path.resolve(path.join(mpu.tmpDir || '', 'mpu-' + this.objectName + '-' + random_seed() + '-' + (mpu.uploadId || Date.now()) + '-' + partId)),
partFile = !mpu.noDisk && fs.createWriteStream(partFileName),
part = {
id: partId,
stream: partFile,
fileName: partFileName,
offset: 0,
length: 0,
triesLeft: mpu.maxRetries + 1,
data: Buffer('')
};
parts.push(part);
return part;
}
function partReady(part) {
if (!part) return;
// Ensure the stream is closed
if (part.stream && part.stream.writable) {
part.stream.end();
}
mpu.uploads.push(mpu._uploadPart.bind(mpu, part));
}
function abortUpload(part) {
// Ensure the stream is closed and temporary file removed
if (part && part.stream.writable) {
// Ensure the stream is closed
if (part.stream.writable) {
part.stream.end();
}
// Remove the temporary file
fs.unlink(part.fileName, function(err) {
if(err) return callback(err);
});
}
current = null;
mpu.aborted = true;
}
// Handle the data coming in
stream.on('data', function(buffer) {
// Check if we are over the max total limit
if((mpu.currentUploadSize += buffer.length )> mpu.maxUploadSize){
return abortUpload(current);
}
if (!current) {
current = newPart();
}
if (current.stream) {
current.stream.write(buffer);
} else {
current.data = Buffer.concat([current.data, buffer]);
}
current.length += buffer.length;
mpu.totalUploadSize += buffer.length;
// Check if we have a part
if (current.length >= mpu.partSize) {
partReady(current);
current = null;
}
});
// Handle the end of the stream
stream.on('end', function() {
if (current) {
partReady(current);
}
// Wait for the completion of the uploads
if(mpu.aborted){
return mpu._abortUploads(callback);
}else{
return mpu._completeUploads(callback);
}
});
// Handle errors
stream.on('error', function(err) {
// Clean up
return callback(err);
});
};
/**
Uploads a part, or if we are not ready yet, waits for the upload to be initiated
and will then upload
**/
MultiPartUpload.prototype._uploadPart = function(part, callback) {
// If we haven't started the upload yet, wait for the initialization
if (!this.uploadId) {
return this.on('initiated', this._uploadPart.bind(this, part, callback));
}
var url = this.objectName + '?partNumber=' + part.id + '&uploadId=' + this.uploadId,
headers = { 'Content-Length': part.length },
req = this.client.request('PUT', url, headers),
partStream = !this.noDisk && fs.createReadStream(part.fileName, {start: part.offset, end: part.offset + part.length - 1}),
mpu = this,
written = 0;
// Wait for the upload to complete
req.on('response', function(res) {
if (res.statusCode != 200) {
var result = {part: part.id, message: 'Upload failed with status code '+res.statusCode };
mpu.emit('failed', result);
if (--part.triesLeft)
return MultiPartUpload.prototype._uploadPart.call(mpu, part, callback);
else
return callback(result);
}
// Grab the etag and return it
var etag = res.headers.etag,
result = {part: part.id, etag: etag, size: part.length};
mpu.emit('uploaded', result);
return callback(null, result);
});
// Handle errors
req.on('error', function(err) {
var result = {part: part.id, message: err};
mpu.emit('failed', result, written);
if (--part.triesLeft)
return MultiPartUpload.prototype._uploadPart.call(mpu, part, callback);
else
return callback(result);
});
if (!this.noDisk) {
partStream.on('data', function (data) {
written += data.length;
mpu.emit('partProgress', {
part: part.id,
written: data.length,
overall: written,
total: part.length,
percent: written / part.length * 100 | 0
});
});
partStream.pipe(req);
} else {
req.write(part.data);
req.end();
}
mpu.emit('uploading', part.id);
};
/**
Indicates that all uploads have been started and that we should wait for completion
**/
MultiPartUpload.prototype._completeUploads = function(callback) {
var mpu = this;
this.uploads.end(function(err, results) {
if (err) return callback(err);
var size = 0, parts;
parts = _.map(results, function(value) {
size += value.size;
return util.format('<Part><PartNumber>%d</PartNumber><ETag>%s</ETag></Part>', value.part, value.etag);
}).join('');
var req = mpu.client.request('POST', mpu.objectName + '?uploadId=' + mpu.uploadId);
// Register the response handler
parse.xmlResponse(req, function(err, body) {
if (err) return callback(err);
delete body.$;
body.size = size;
mpu.emit('completed', body);
return callback(null, body);
});
// Write the request
req.write('<CompleteMultipartUpload>' + parts + '</CompleteMultipartUpload>');
req.end();
});
};
/**
Indicates that upload has been aborted
**/
MultiPartUpload.prototype._abortUploads = function(callback) {
var mpu = this;
this.uploads.end(function(err, results) {
if (err) return callback(err);
var req = mpu.client.request('DELETE', mpu.objectName + '?uploadId=' + mpu.uploadId);
// Register the response handler
parse.xmlResponse(req, function(err, body) {
if (err) return callback(err);
return callback('reached maxUploadSize');
});
req.end();
});
};
module.exports = MultiPartUpload;
function random_seed(){
return 'xxxx'.replace(/[xy]/g, function(c) {var r = Math.random()*16|0,v=c=='x'?r:r&0x3|0x8;return v.toString(16);});
}