@uppy/companion
Version:
OAuth helper and remote fetcher for Uppy's (https://uppy.io) extensible file upload widget with support for drag&drop, resumable uploads, previews, restrictions, file processing/encoding, remote providers like Dropbox and Google Drive, S3 and more :dog:
630 lines (629 loc) • 25.8 kB
JavaScript
"use strict";
const tus = require('tus-js-client');
const { randomUUID } = require('node:crypto');
const validator = require('validator');
const { pipeline } = require('node:stream/promises');
const { join } = require('node:path');
const fs = require('node:fs');
const throttle = require('lodash/throttle');
const { once } = require('node:events');
const { FormData } = require('formdata-node');
const { Upload } = require('@aws-sdk/lib-storage');
const { rfc2047EncodeMetadata, getBucket, truncateFilename, } = require('./helpers/utils');
const got = require('./got');
const { createReadStream, createWriteStream, ReadStream } = fs;
const { stat, unlink } = fs.promises;
const emitter = require('./emitter');
const { jsonStringify, hasMatch } = require('./helpers/utils');
const logger = require('./logger');
const headerSanitize = require('./header-blacklist');
const redis = require('./redis');
// Need to limit length or we can get
// "MetadataTooLarge: Your metadata headers exceed the maximum allowed metadata size" in tus / S3
const DEFAULT_FIELD_NAME = 'files[]';
const PROTOCOLS = Object.freeze({
multipart: 'multipart',
s3Multipart: 's3-multipart',
tus: 'tus',
});
function exceedsMaxFileSize(maxFileSize, size) {
return maxFileSize && size && size > maxFileSize;
}
class ValidationError extends Error {
name = 'ValidationError';
}
/**
* Validate the options passed down to the uplaoder
*
* @param {UploaderOptions} options
*/
function validateOptions(options) {
// validate HTTP Method
if (options.httpMethod) {
if (typeof options.httpMethod !== 'string') {
throw new ValidationError('unsupported HTTP METHOD specified');
}
const method = options.httpMethod.toUpperCase();
if (method !== 'PUT' && method !== 'POST') {
throw new ValidationError('unsupported HTTP METHOD specified');
}
}
if (exceedsMaxFileSize(options.companionOptions.maxFileSize, options.size)) {
throw new ValidationError('maxFileSize exceeded');
}
// validate fieldname
if (options.fieldname != null && typeof options.fieldname !== 'string') {
throw new ValidationError('fieldname must be a string');
}
// validate metadata
if (options.metadata != null && typeof options.metadata !== 'object') {
throw new ValidationError('metadata must be an object');
}
// validate headers
if (options.headers != null && typeof options.headers !== 'object') {
throw new ValidationError('headers must be an object');
}
// validate protocol
// @todo this validation should not be conditional once the protocol field is mandatory
if (options.protocol &&
!Object.keys(PROTOCOLS).some((key) => PROTOCOLS[key] === options.protocol)) {
throw new ValidationError('unsupported protocol specified');
}
// s3 uploads don't require upload destination
// validation, because the destination is determined
// by the server's s3 config
if (options.protocol !== PROTOCOLS.s3Multipart) {
if (!options.endpoint && !options.uploadUrl) {
throw new ValidationError('no destination specified');
}
const validateUrl = (url) => {
const validatorOpts = { require_protocol: true, require_tld: false };
if (url && !validator.isURL(url, validatorOpts)) {
throw new ValidationError('invalid destination url');
}
const allowedUrls = options.companionOptions.uploadUrls;
if (allowedUrls && url && !hasMatch(url, allowedUrls)) {
throw new ValidationError('upload destination does not match any allowed destinations');
}
};
[options.endpoint, options.uploadUrl].forEach(validateUrl);
}
if (options.chunkSize != null && typeof options.chunkSize !== 'number') {
throw new ValidationError('incorrect chunkSize');
}
}
const states = {
idle: 'idle',
uploading: 'uploading',
paused: 'paused',
done: 'done',
};
class Uploader {
/** @type {import('ioredis').Redis} */
storage;
/**
* Uploads file to destination based on the supplied protocol (tus, s3-multipart, multipart)
* For tus uploads, the deferredLength option is enabled, because file size value can be unreliable
* for some providers (Instagram particularly)
*
* @typedef {object} UploaderOptions
* @property {string} endpoint
* @property {string} [uploadUrl]
* @property {string} protocol
* @property {number} [size]
* @property {string} [fieldname]
* @property {string} pathPrefix
* @property {any} [s3]
* @property {any} metadata
* @property {any} companionOptions
* @property {any} [storage]
* @property {any} [headers]
* @property {string} [httpMethod]
* @property {boolean} [useFormData]
* @property {number} [chunkSize]
* @property {string} [providerName]
*
* @param {UploaderOptions} options
*/
constructor(options) {
validateOptions(options);
this.providerName = options.providerName;
this.options = options;
this.token = randomUUID();
this.fileName = `${Uploader.FILE_NAME_PREFIX}-${this.token}`;
this.options.metadata = {
...(this.providerName != null && { provider: this.providerName }),
...(this.options.metadata || {}), // allow user to override provider
};
this.options.fieldname = this.options.fieldname || DEFAULT_FIELD_NAME;
this.size = options.size;
const { maxFilenameLength } = this.options.companionOptions;
// Define upload file name
this.uploadFileName = truncateFilename(this.options.metadata.name || this.fileName, maxFilenameLength);
this.storage = options.storage;
this.downloadedBytes = 0;
this.readStream = null;
if (this.options.protocol === PROTOCOLS.tus) {
emitter().on(`pause:${this.token}`, () => {
logger.debug('Received from client: pause', 'uploader', this.shortToken);
if (this.#uploadState !== states.uploading)
return;
this.#uploadState = states.paused;
if (this.tus) {
this.tus.abort();
}
});
emitter().on(`resume:${this.token}`, () => {
logger.debug('Received from client: resume', 'uploader', this.shortToken);
if (this.#uploadState !== states.paused)
return;
this.#uploadState = states.uploading;
if (this.tus) {
this.tus.start();
}
});
}
emitter().on(`cancel:${this.token}`, () => {
logger.debug('Received from client: cancel', 'uploader', this.shortToken);
if (this.tus) {
const shouldTerminate = !!this.tus.url;
this.tus.abort(shouldTerminate).catch(() => { });
}
this.#canceled = true;
this.abortReadStream(new Error('Canceled'));
});
}
#uploadState = states.idle;
#canceled = false;
abortReadStream(err) {
this.#uploadState = states.done;
if (this.readStream)
this.readStream.destroy(err);
}
_getUploadProtocol() {
// todo a default protocol should not be set. We should ensure that the user specifies their protocol.
// after we drop old versions of uppy client we can remove this
return this.options.protocol || PROTOCOLS.multipart;
}
async _uploadByProtocol(req) {
const protocol = this._getUploadProtocol();
switch (protocol) {
case PROTOCOLS.multipart:
return this.#uploadMultipart(this.readStream);
case PROTOCOLS.s3Multipart:
return this.#uploadS3Multipart(this.readStream, req);
case PROTOCOLS.tus:
return this.#uploadTus(this.readStream);
default:
throw new Error('Invalid protocol');
}
}
async _downloadStreamAsFile(stream) {
this.tmpPath = join(this.options.pathPrefix, this.fileName);
logger.debug('fully downloading file', 'uploader.download', this.shortToken);
const writeStream = createWriteStream(this.tmpPath);
const onData = (chunk) => {
this.downloadedBytes += chunk.length;
if (exceedsMaxFileSize(this.options.companionOptions.maxFileSize, this.downloadedBytes)) {
this.abortReadStream(new Error('maxFileSize exceeded'));
}
this.onProgress(0, undefined);
};
stream.on('data', onData);
await pipeline(stream, writeStream);
logger.debug('finished fully downloading file', 'uploader.download', this.shortToken);
const { size } = await stat(this.tmpPath);
this.size = size;
const fileStream = createReadStream(this.tmpPath);
this.readStream = fileStream;
}
_canStream() {
return this.options.companionOptions.streamingUpload;
}
/**
*
* @param {import('stream').Readable} stream
* @param {import('express').Request} req
*/
async uploadStream(stream, req) {
try {
if (this.#uploadState !== states.idle)
throw new Error('Can only start an upload in the idle state');
if (this.readStream)
throw new Error('Already uploading');
this.#uploadState = states.uploading;
this.readStream = stream;
if (!this._canStream()) {
logger.debug('need to download the whole file first', 'controller.get.provider.size', this.shortToken);
// Some streams need to be downloaded entirely first, because we don't know their size from the provider
// This is true for zoom and drive (exported files) or some URL downloads.
// The stream will then typically come from a "Transfer-Encoding: chunked" response
await this._downloadStreamAsFile(this.readStream);
}
if (this.#uploadState !== states.uploading)
return undefined;
const { url, extraData } = await Promise.race([
this._uploadByProtocol(req),
// If we don't handle stream errors, we get unhandled error in node.
new Promise((resolve, reject) => this.readStream.on('error', reject)),
]);
return { url, extraData };
}
finally {
this.#uploadState = states.done;
logger.debug('cleanup', this.shortToken);
if (this.readStream && !this.readStream.destroyed)
this.readStream.destroy();
await this.tryDeleteTmpPath();
}
}
tryDeleteTmpPath() {
if (this.tmpPath)
unlink(this.tmpPath).catch(() => { });
}
/**
*
* @param {import('stream').Readable} stream
* @param {import('express').Request} req
*/
async tryUploadStream(stream, req) {
try {
emitter().emit('upload-start', { token: this.token });
const ret = await this.uploadStream(stream, req);
if (!ret)
return;
const { url, extraData } = ret;
this.#emitSuccess(url, extraData);
}
catch (err) {
if (this.#canceled) {
logger.error('Aborted upload', 'uploader.aborted', this.shortToken);
return;
}
logger.error(err, 'uploader.error', this.shortToken);
await this.#emitError(err);
}
finally {
emitter().removeAllListeners(`pause:${this.token}`);
emitter().removeAllListeners(`resume:${this.token}`);
emitter().removeAllListeners(`cancel:${this.token}`);
}
}
/**
* returns a substring of the token. Used as traceId for logging
* we avoid using the entire token because this is meant to be a short term
* access token between uppy client and companion websocket
*
* @param {string} token the token to Shorten
* @returns {string}
*/
static shortenToken(token) {
return token.substring(0, 8);
}
static reqToOptions(req, size) {
const useFormDataIsSet = Object.hasOwn(req.body, 'useFormData');
const useFormData = useFormDataIsSet ? req.body.useFormData : true;
return {
// Client provided info (must be validated and not blindly trusted):
headers: req.body.headers,
httpMethod: req.body.httpMethod,
protocol: req.body.protocol,
endpoint: req.body.endpoint,
uploadUrl: req.body.uploadUrl,
metadata: req.body.metadata,
fieldname: req.body.fieldname,
useFormData,
providerName: req.companion.providerName,
// Info coming from companion server configuration:
size,
companionOptions: req.companion.options,
pathPrefix: `${req.companion.options.filePath}`,
storage: redis.client(),
s3: req.companion.s3Client
? {
client: req.companion.s3Client,
options: req.companion.options.s3,
}
: null,
chunkSize: req.companion.options.chunkSize,
};
}
/**
* returns a substring of the token. Used as traceId for logging
* we avoid using the entire token because this is meant to be a short term
* access token between uppy client and companion websocket
*/
get shortToken() {
return Uploader.shortenToken(this.token);
}
async awaitReady(timeout) {
logger.debug('waiting for socket connection', 'uploader.socket.wait', this.shortToken);
const eventName = `connection:${this.token}`;
await once(emitter(), eventName, timeout && { signal: AbortSignal.timeout(timeout) });
logger.debug('socket connection received', 'uploader.socket.wait', this.shortToken);
}
/**
* @typedef {{action: string, payload: object}} State
* @param {State} state
*/
saveState(state) {
if (!this.storage)
return;
// make sure the keys get cleaned up.
// https://github.com/transloadit/uppy/issues/3748
const keyExpirySec = 60 * 60 * 24;
const redisKey = `${Uploader.STORAGE_PREFIX}:${this.token}`;
this.storage.set(redisKey, jsonStringify(state), 'EX', keyExpirySec);
}
throttledEmitProgress = throttle((dataToEmit) => {
const { bytesUploaded, bytesTotal, progress } = dataToEmit.payload;
logger.debug(`${bytesUploaded} ${bytesTotal} ${progress}%`, 'uploader.total.progress', this.shortToken);
this.saveState(dataToEmit);
emitter().emit(this.token, dataToEmit);
}, 1000, { trailing: false });
/**
*
* @param {number} [bytesUploaded]
* @param {number | null} [bytesTotalIn]
*/
onProgress(bytesUploaded = 0, bytesTotalIn = 0) {
const bytesTotal = bytesTotalIn || this.size || 0;
// If fully downloading before uploading, combine downloaded and uploaded bytes
// This will make sure that the user sees half of the progress before upload starts (while downloading)
let combinedBytes = bytesUploaded;
if (!this._canStream()) {
combinedBytes = Math.floor((combinedBytes + (this.downloadedBytes || 0)) / 2);
}
// Prevent divide by zero
let percentage = 0;
if (bytesTotal > 0)
percentage = Math.min(Math.max(0, (combinedBytes / bytesTotal) * 100), 100);
const formattedPercentage = percentage.toFixed(2);
if (this.#uploadState !== states.uploading) {
return;
}
const payload = {
progress: formattedPercentage,
bytesUploaded: combinedBytes,
bytesTotal,
};
const dataToEmit = {
action: 'progress',
payload,
};
// avoid flooding the client (and log) with progress events.
// flooding will cause reduced performance and possibly network issues
this.throttledEmitProgress(dataToEmit);
}
/**
*
* @param {string} url
* @param {object} extraData
*/
#emitSuccess(url, extraData) {
const emitData = {
action: 'success',
payload: { ...extraData, complete: true, url },
};
this.saveState(emitData);
emitter().emit(this.token, emitData);
}
/**
*
* @param {Error} err
*/
async #emitError(err) {
// delete stack to avoid sending server info to client
// see PR discussion https://github.com/transloadit/uppy/pull/3832
// @ts-ignore
const { serializeError } = await import('serialize-error');
const { stack, ...serializedErr } = serializeError(err);
const dataToEmit = {
action: 'error',
payload: { error: serializedErr },
};
this.saveState(dataToEmit);
emitter().emit(this.token, dataToEmit);
}
/**
* start the tus upload
*
* @param {any} stream
*/
async #uploadTus(stream) {
const uploader = this;
const isFileStream = stream instanceof ReadStream;
// chunkSize needs to be a finite value if the stream is not a file stream (fs.createReadStream)
// https://github.com/tus/tus-js-client/blob/4479b78032937ac14da9b0542e489ac6fe7e0bc7/lib/node/fileReader.js#L50
const chunkSize = this.options.chunkSize || (isFileStream ? Infinity : 50e6);
const tusRet = await new Promise((resolve, reject) => {
const tusOptions = {
endpoint: this.options.endpoint,
uploadUrl: this.options.uploadUrl,
retryDelays: [0, 1000, 3000, 5000],
chunkSize,
headers: headerSanitize(this.options.headers),
addRequestId: true,
metadata: {
// file name and type as required by the tusd tus server
// https://github.com/tus/tusd/blob/5b376141903c1fd64480c06dde3dfe61d191e53d/unrouted_handler.go#L614-L646
filename: this.uploadFileName,
filetype: this.options.metadata.type,
...this.options.metadata,
},
/**
*
* @param {Error} error
*/
onError(error) {
logger.error(error, 'uploader.tus.error');
// deleting tus originalRequest field because it uses the same http-agent
// as companion, and this agent may contain sensitive request details (e.g headers)
// previously made to providers. Deleting the field would prevent it from getting leaked
// to the frontend etc.
// @ts-ignore
delete error.originalRequest;
// @ts-ignore
delete error.originalResponse;
reject(error);
},
/**
*
* @param {number} [bytesUploaded]
* @param {number} [bytesTotal]
*/
onProgress(bytesUploaded, bytesTotal) {
uploader.onProgress(bytesUploaded, bytesTotal);
},
onSuccess() {
resolve({ url: uploader.tus.url });
},
};
if (this.options.companionOptions.tusDeferredUploadLength &&
!isFileStream) {
tusOptions.uploadLengthDeferred = true;
}
else {
if (!this.size) {
reject(new Error('tusDeferredUploadLength needs to be enabled if no file size is provided by the provider'));
}
tusOptions.uploadLengthDeferred = false;
tusOptions.uploadSize = this.size;
}
this.tus = new tus.Upload(stream, tusOptions);
this.tus.start();
});
// @ts-ignore
if (this.size != null && this.tus._size !== this.size) {
// @ts-ignore
logger.warn(
// @ts-expect-error _size is not typed
`Tus uploaded size ${this.tus._size} different from reported URL size ${this.size}`, 'upload.tus.mismatch.error');
}
return tusRet;
}
async #uploadMultipart(stream) {
if (!this.options.endpoint) {
throw new Error('No multipart endpoint set');
}
function getRespObj(response) {
// remove browser forbidden headers
const { 'set-cookie': deleted, 'set-cookie2': deleted2, ...responseHeaders } = response.headers;
return {
responseText: response.body,
status: response.statusCode,
statusText: response.statusMessage,
headers: responseHeaders,
};
}
// upload progress
let bytesUploaded = 0;
stream.on('data', (data) => {
bytesUploaded += data.length;
this.onProgress(bytesUploaded, undefined);
});
const url = this.options.endpoint;
const reqOptions = {
headers: headerSanitize(this.options.headers),
};
if (this.options.useFormData) {
const formData = new FormData();
Object.entries(this.options.metadata).forEach(([key, value]) => formData.append(key, value));
// see https://github.com/octet-stream/form-data/blob/73a5a24e635938026538673f94cbae1249a3f5cc/readme.md?plain=1#L232
formData.set(this.options.fieldname, {
name: this.uploadFileName,
[Symbol.toStringTag]: 'File',
stream() {
return stream;
},
});
reqOptions.body = formData;
}
else {
reqOptions.headers['content-length'] = this.size;
reqOptions.body = stream;
}
try {
const httpMethod = (this.options.httpMethod || '').toUpperCase() === 'PUT' ? 'put' : 'post';
const runRequest = (await got)[httpMethod];
const response = await runRequest(url, reqOptions);
if (this.size != null && bytesUploaded !== this.size) {
const errMsg = `uploaded only ${bytesUploaded} of ${this.size} with status: ${response.statusCode}`;
logger.error(errMsg, 'upload.multipart.mismatch.error');
throw new Error(errMsg);
}
let bodyURL = null;
try {
bodyURL = JSON.parse(response.body)?.url;
}
catch {
// response.body can be undefined or an empty string
// in that case we ignore and continue.
}
return {
url: bodyURL,
extraData: { response: getRespObj(response), bytesUploaded },
};
}
catch (err) {
logger.error(err, 'upload.multipart.error');
const statusCode = err.response?.statusCode;
if (statusCode != null) {
throw Object.assign(new Error(err.statusMessage), {
extraData: getRespObj(err.response),
});
}
throw new Error('Unknown multipart upload error', { cause: err });
}
}
/**
* Upload the file to S3 using a Multipart upload.
*/
async #uploadS3Multipart(stream, req) {
if (!this.options.s3) {
throw new Error('The S3 client is not configured on this companion instance.');
}
const filename = this.uploadFileName;
/**
* @type {{client: import('@aws-sdk/client-s3').S3Client, options: Record<string, any>}}
*/
const s3Options = this.options.s3;
const { metadata } = this.options;
const { client, options } = s3Options;
const params = {
Bucket: getBucket({ bucketOrFn: options.bucket, req, metadata }),
Key: options.getKey({ req, filename, metadata }),
ContentType: metadata.type,
Metadata: rfc2047EncodeMetadata(metadata),
Body: stream,
};
if (options.acl != null)
params.ACL = options.acl;
const upload = new Upload({
client,
params,
// using chunkSize as partSize too, see https://github.com/transloadit/uppy/pull/3511
partSize: this.options.chunkSize,
leavePartsOnError: true, // https://github.com/aws/aws-sdk-js-v3/issues/2311
});
upload.on('httpUploadProgress', ({ loaded, total }) => {
this.onProgress(loaded, total);
});
const data = await upload.done();
return {
url: data?.Location || null,
extraData: {
response: {
responseText: JSON.stringify(data),
headers: {
'content-type': 'application/json',
},
},
},
};
}
}
Uploader.FILE_NAME_PREFIX = 'uppy-file';
Uploader.STORAGE_PREFIX = 'companion';
module.exports = Uploader;
module.exports.ValidationError = ValidationError;