s3-upload-stream
Version:
Writeable stream for uploading content of unknown size to S3 via the multipart API.
373 lines (313 loc) • 10.9 kB
JavaScript
var Writable = require('stream').Writable,
events = require("events");
// Set the S3 client to be used for this upload.
function Client(client) {
if (this instanceof Client === false) {
return new Client(client);
}
if (!client) {
throw new Error('Must configure an S3 client before attempting to create an S3 upload stream.');
}
this.cachedClient = client;
}
// Generate a writeable stream which uploads to a file on S3.
Client.prototype.upload = function (destinationDetails, sessionDetails) {
var cachedClient = this.cachedClient;
var e = new events.EventEmitter();
if (!sessionDetails) sessionDetails = {};
// Create the writable stream interface.
var ws = new Writable({
highWaterMark: 4194304 // 4 MB
});
// Data pertaining to the overall upload.
// If resumable parts are passed in, they must be free of gaps.
var multipartUploadID = sessionDetails.UploadId ? sessionDetails.UploadId : null;
var partNumber = sessionDetails.Parts ? (sessionDetails.Parts.length + 1) : 1;
var partIds = sessionDetails.Parts || [];
var receivedSize = 0;
var uploadedSize = 0;
// Light state management -
// started: used to fire 'ready' even on a quick resume
// paused: used to govern manual pause/resume
var started = false;
var paused = false;
// Parts which need to be uploaded to S3.
var pendingParts = 0;
var concurrentPartThreshold = 1;
// Data pertaining to buffers we have received
var receivedBuffers = [];
var receivedBuffersLength = 0;
var partSizeThreshold = 5242880;
// Set the maximum amount of data that we will keep in memory before flushing it to S3 as a part
// of the multipart upload
ws.maxPartSize = function (partSize) {
if (partSize < 5242880)
partSize = 5242880;
partSizeThreshold = partSize;
return ws;
};
ws.getMaxPartSize = function () {
return partSizeThreshold;
};
// Set the maximum amount of data that we will keep in memory before flushing it to S3 as a part
// of the multipart upload
ws.concurrentParts = function (parts) {
if (parts < 1)
parts = 1;
concurrentPartThreshold = parts;
return ws;
};
ws.getConcurrentParts = function () {
return concurrentPartThreshold;
};
// Handler to receive data and upload it to S3.
ws._write = function (incomingBuffer, enc, next) {
// Pause/resume check #1 out of 2:
// Block incoming writes immediately on pause.
if (paused)
e.once('resume', write);
else
write();
function write() {
absorbBuffer(incomingBuffer);
if (receivedBuffersLength < partSizeThreshold)
return next(); // Ready to receive more data in _write.
// We need to upload some data
uploadHandler(next);
}
};
// Ask the stream to pause - will allow existing
// part uploads to complete first.
ws.pause = function () {
// if already mid-pause, this does nothing
if (paused) return false;
// if there's no active upload, this does nothing
if (!started) return false;
paused = true;
// give caller how many parts are mid-upload
ws.emit('pausing', pendingParts);
// if there are no parts outstanding, declare the stream
// paused and return currently sent part details.
if (pendingParts === 0)
notifyPaused();
// otherwise, the 'paused' event will get sent once the
// last part finishes uploading.
return true;
};
// Lift the pause, and re-kick off the uploading.
ws.resume = function () {
// if we're not paused, this does nothing
if (!paused) return false;
paused = false;
e.emit('resume'); // internal event
ws.emit('resume'); // external event
return true;
};
// when pausing, return relevant pause state to client
var notifyPaused = function () {
ws.emit('paused', {
UploadId: multipartUploadID,
Parts: partIds,
uploadedSize: uploadedSize
});
};
// Concurrently upload parts to S3.
var uploadHandler = function (next) {
// If this is the first part, and we're just starting,
// but we have a multipartUploadID, then we're beginning
// a resume and can fire the 'ready' event externally.
if (multipartUploadID && !started)
ws.emit('ready', multipartUploadID);
started = true;
if (pendingParts < concurrentPartThreshold) {
// Has the MPU been created yet?
if (multipartUploadID)
upload(); // Upload the part immediately.
else {
e.once('ready', upload); // Wait until multipart upload is initialized.
createMultipartUpload();
}
}
else {
// Block uploading (and receiving of more data) until we upload
// some of the pending parts
e.once('part', upload);
}
function upload() {
// Pause/resume check #2 out of 2:
// Block queued up parts until resumption.
if (paused)
e.once('resume', uploadNow);
else
uploadNow();
function uploadNow() {
pendingParts++;
flushPart(function (partDetails) {
--pendingParts;
e.emit('part'); // Internal event
ws.emit('part', partDetails); // External event
// if we're paused and this was the last outstanding part,
// we can notify the caller that we're really paused now.
if (paused && pendingParts === 0)
notifyPaused();
});
next();
}
}
};
// Absorb an incoming buffer from _write into a buffer queue
var absorbBuffer = function (incomingBuffer) {
receivedBuffers.push(incomingBuffer);
receivedBuffersLength += incomingBuffer.length;
};
// Take a list of received buffers and return a combined buffer that is exactly
// partSizeThreshold in size.
var preparePartBuffer = function () {
// Combine the buffers we've received and reset the list of buffers.
var combinedBuffer = Buffer.concat(receivedBuffers, receivedBuffersLength);
receivedBuffers.length = 0; // Trick to reset the array while keeping the original reference
receivedBuffersLength = 0;
if (combinedBuffer.length > partSizeThreshold) {
// The combined buffer is too big, so slice off the end and put it back in the array.
var remainder = new Buffer(combinedBuffer.length - partSizeThreshold);
combinedBuffer.copy(remainder, 0, partSizeThreshold);
receivedBuffers.push(remainder);
receivedBuffersLength = remainder.length;
// Return the perfectly sized part.
var uploadBuffer = new Buffer(partSizeThreshold);
combinedBuffer.copy(uploadBuffer, 0, 0, partSizeThreshold);
return uploadBuffer;
}
else {
// It just happened to be perfectly sized, so return it.
return combinedBuffer;
}
};
// Flush a part out to S3.
var flushPart = function (callback) {
var partBuffer = preparePartBuffer();
var localPartNumber = partNumber;
partNumber++;
receivedSize += partBuffer.length;
cachedClient.uploadPart(
{
Body: partBuffer,
Bucket: destinationDetails.Bucket,
Key: destinationDetails.Key,
UploadId: multipartUploadID,
PartNumber: localPartNumber
},
function (err, result) {
if (err)
abortUpload('Failed to upload a part to S3: ' + JSON.stringify(err));
else {
uploadedSize += partBuffer.length;
partIds[localPartNumber - 1] = {
ETag: result.ETag,
PartNumber: localPartNumber
};
callback({
ETag: result.ETag,
PartNumber: localPartNumber,
receivedSize: receivedSize,
uploadedSize: uploadedSize
});
}
}
);
};
// Overwrite the end method so that we can hijack it to flush the last part and then complete
// the multipart upload
ws.originalEnd = ws.end;
ws.end = function (Part, encoding, callback) {
ws.originalEnd(Part, encoding, function afterDoneWithOriginalEnd() {
if (Part)
absorbBuffer(Part);
// Upload any remaining data
var uploadRemainingData = function () {
if (receivedBuffersLength > 0) {
uploadHandler(uploadRemainingData);
return;
}
if (pendingParts > 0) {
setTimeout(uploadRemainingData, 50); // Wait 50 ms for the pending uploads to finish before trying again.
return;
}
completeUpload();
};
uploadRemainingData();
if (typeof callback == 'function')
callback();
});
};
// Turn all the individual parts we uploaded to S3 into a finalized upload.
var completeUpload = function () {
// There is a possibility that the incoming stream was empty, therefore the MPU never started
// and cannot be finalized.
if (multipartUploadID) {
cachedClient.completeMultipartUpload(
{
Bucket: destinationDetails.Bucket,
Key: destinationDetails.Key,
UploadId: multipartUploadID,
MultipartUpload: {
Parts: partIds
}
},
function (err, result) {
if (err)
abortUpload('Failed to complete the multipart upload on S3: ' + JSON.stringify(err));
else {
// Emit both events for backwards compatibility, and to follow the spec.
ws.emit('uploaded', result);
ws.emit('finish', result);
started = false;
}
}
);
}
};
// When a fatal error occurs abort the multipart upload
var abortUpload = function (rootError) {
cachedClient.abortMultipartUpload(
{
Bucket: destinationDetails.Bucket,
Key: destinationDetails.Key,
UploadId: multipartUploadID
},
function (abortError) {
if (abortError)
ws.emit('error', rootError + '\n Additionally failed to abort the multipart upload on S3: ' + abortError);
else
ws.emit('error', rootError);
}
);
};
var createMultipartUpload = function () {
cachedClient.createMultipartUpload(
destinationDetails,
function (err, data) {
if (err)
ws.emit('error', 'Failed to create a multipart upload on S3: ' + JSON.stringify(err));
else {
multipartUploadID = data.UploadId;
ws.emit('ready', multipartUploadID);
e.emit('ready'); // Internal event
}
}
);
};
return ws;
};
Client.globalClient = null;
Client.client = function (options) {
Client.globalClient = new Client(options);
return Client.globalClient;
};
Client.upload = function (destinationDetails, sessionDetails) {
if (!Client.globalClient) {
throw new Error('Must configure an S3 client before attempting to create an S3 upload stream.');
}
return Client.globalClient.upload(destinationDetails, sessionDetails);
};
module.exports = Client;