cache-manager-s3
Version:
Cache manager module for S3
700 lines (588 loc) • 23.5 kB
JavaScript
/** @module S3Cache */
const async = require('async')
const path = require('path')
const Url = require('url-parse')
const querystring = require('querystring')
const checksum = require('checksum')
const moment = require('moment')
const log = require('loglevel')
const prefix = require('loglevel-plugin-prefix')
const chalk = require('chalk')
const S3 = require('aws-sdk').S3
const defaultOptions = {
logLevel: 'warn',
ttl: 0,
ttlUnits: 'seconds',
pathPrefix: '',
stringifyResponses: true,
// Options for folder chunking
folderPathDepth: 2,
folderPathChunkSize: 2,
// Options for checksum, valid values are OpenSSL hash specifiers.
checksumAlgorithm: 'md5',
checksumEncoding: 'hex',
// Options for key normalization
normalizeLowercase: false,
// Options for key normalization if it's a path
parseKeyAsPath: false,
normalizePath: true,
// Options for key normalization if it's a URL
parseKeyAsUrl: false,
normalizeUrl: true,
// Options for caching
proactiveExpiry: false,
}
const logColors = {
TRACE: chalk.magenta,
DEBUG: chalk.cyan,
INFO: chalk.blue,
WARN: chalk.yellow,
ERROR: chalk.red,
}
/**
* @see https://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/Response.html
* @typedef {Error} S3Error
* @property {string} code - a unique short code representing the error that was emitted.
* @property {string} message - a longer human readable error message
* @property {Boolean} retryable - whether the error message is retryable.
* @property {number} statusCode - in the case of a request that reached the service, this value contains the response status code.
* @property {Date} time - the date time object when the error occurred.
* @property {string} hostname - set when a networking error occurs to easily identify the endpoint of the request.
* @property {string} region - set when a networking error occurs to easily identify the region of the request.
*/
/**
* @see https://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/Response.html
* @typedef {Object} S3Response
* @property {Buffer|array|string|ReadableStream} Body - Object data
* @property {number} Expires Unix epoch timestamp for ttl time
*/
/**
* A [cache-manager](https://github.com/BryanDonovan/node-cache-manager) module for storing results in S3.
* @see https://www.npmjs.com/package/cache-manager
* @example
* const s3 = new S3Cache({
* accessKey: 'AAAAAAAA',
* secretKey: 'asdnbklajsndkj',
* bucket: 'my-cache-bucket',
* ttl: 1, ttlUnits: 'hours',
* s3Options: {
* region: 'us-west-2',
* httpOptions: {
* proxy: 'http://my.proxy:3128'
* },
* params: {
* ACL: 'authenticated-read',
* },
* },
* })
*/
class S3Cache {
/**
* @constructs S3Cache
* @param {Object} options - An object of options
* @param {string} options.accessKey - An AWS access key
* @param {string} options.secretKey - An AWS secret key
* @param {string} options.bucket - The name of an AWS S3 bucket.
*
* @param {number|Object} [options.ttl] - Paired with {@link options.ttlUnits}, amount in the future to set an object to expire. Can also be an Object, as supported by Moment.
* @param {string} [options.ttlUnits=seconds] - Paired with {@link options.ttl}, this is the unit of time to set an object to expire.
* @see http://momentjs.com/docs/#/manipulating/add/
*
* @param {string} [options.logLevel] - If specified, the default log level.
* @param {string} [options.pathPrefix] - If specified, all cache objects will be placed under this folder. Slashes are not necessary (unless for a nested folder)
* @param {number} [options.folderPathDepth=2] - The number of folders to chunk checksummed names into. Increases performance by nesting objects into folders. Set to 0 to disable.
* @param {number} [options.folderPathChunkSize=2] - The number of characters to use in each folder path chunk.
*
* @param {string} [options.checksumAlgorithm=md5] - The digest algorithm to use when checksumming. Supports any OpenSSL digest (use `openssl list -digest-algorithms`) and 'none'.
* @param {string} [options.checksumEncoding=hex] - The encoding to use for the digest. Valid values (as of this writing) are 'hex', 'latin1', and 'base64'.
* @see https://nodejs.org/api/crypto.html#crypto_hash_digest_encoding
*
* @param {Boolean} [options.normalizeLowercase=false] - When normalizing, should the key be lowercased first? If using URLs, probably true. If using paths, probably false.
* @param {Boolean} [options.parseKeyAsPath=false] - Should the key be parsed as a path for normalization?
* @param {Boolean} [options.normalizePath=true] - If the key is parsed as a path, should we normalize it? (uses path.normalize)
* @param {Boolean} [options.parseKeyAsUrl=false] - Should the key be parsed as a URL for normalization?
* @param {Boolean} [options.normalizeUrl=true] - If the key is parsed as a URL, should we normalize it? (sorts query parameters)
* @param {Boolean} [options.proactiveExpiry=false] - If a key is marked as expired when we encounter it, should we delete it? Causes an additional request, but keeps the cache cleaner in case of keys()
*
* @param {Object} [options.s3Options] - An object passed into the S3 constructor.
* @param {Object} [options.s3Options.params] - An object passed into the S3 constructor. Parameters in here are included with every request to S3. Good for options like 'region'.
* @see https://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/S3.html#constructor-property
*/
constructor(options) {
this.options = Object.assign({}, defaultOptions, options)
const validateOption = param => {
if( !(param in this.options) || !this.options[param] ) {
throw new Error(`Did not get required parameter: ${param} in constructor`)
}
}
validateOption('bucket')
// Translate passed in params to S3 constructor params.
const constructorOptions = {
params: {
Bucket: this.options.bucket,
},
}
// Only set identity values if passed. Let AWS-sdk handle picking them up from the environment.
if( 'accessKey' in this.options || 'secretKey' in this.options ) {
validateOption('accessKey')
validateOption('secretKey')
constructorOptions.accessKeyId = this.options.accessKey
constructorOptions.secretAccessKey = this.options.secretKey
}
// If s3Options is provided, merge it with our constructorOptions object and create S3 object
if( 's3Options' in this.options ) {
if( typeof this.options.s3Options !== 'object' || this.options.s3Options === null ) {
throw new Error('Expected an object for s3Options!')
}
// If params is in the provided options object, manually merge them
// Otherwise this isn't a deep merge.
if( 'params' in this.options.s3Options ) {
this.options.s3Options.params = Object.assign(constructorOptions.params, this.options.s3Options.params)
}
Object.assign(constructorOptions, this.options.s3Options)
}
this.s3 = new S3(constructorOptions)
// Setup logging
if( 'S3CACHE_LOGLEVEL' in process.env && process.env.S3CACHE_LOGLEVEL ) {
this.options.logLevel = process.env.S3CACHE_LOGLEVEL
}
log.setLevel(this.options.logLevel)
prefix.reg(log)
prefix.apply(log, {
format(level, name, timestamp) {
return `${chalk.gray(`[${timestamp}]`)} ${logColors[level.toUpperCase()](level)} ${chalk.green(`${name}:`)}`
},
})
this._log = ['get', 'set', 'keys', 'head', 'ttl', 'del', 'reset', 'normalizePath', 'timestampToMoment', 'stringifyResponse']
.reduce((memo, type) => {
// Create the logger
Object.assign(memo, { [type]: log.getLogger(type) })
// Look for an environment variable with this logger's name to set level
if( `S3CACHE_${type.toUpperCase()}_LOGLEVEL` in process.env && process.env[`S3CACHE_${type.toUpperCase()}_LOGLEVEL`] ) {
memo[type].setLevel(process.env[`S3CACHE_${type.toUpperCase()}_LOGLEVEL`])
} else {
memo[type].setLevel(this.options.logLevel)
}
return memo
}, {})
}
/**
* Parses str as a URL, then sorts query parameters if {@link constructor.options.normalizeUrl} is true
* @private
* @param {string} str - The input string
* @param {Object} [options=this.options] - Override options for the class
* @return {string} - The input string, normalized.
*/
_normalizeUrl(str, options = this.options) {
const request = new Url(str)
if( options.normalizeUrl ) {
if( request.search !== null ) {
// Sort param keys
const sanitizedQuery = Object.keys(request.query).sort().map(key =>
querystring.stringify({ [key]: request.query[key] })
).join('&')
request.set('search', sanitizedQuery)
}
}
return request.toString()
}
/**
* Parses str as a path, then normalizes it if {@link constructor.options.normalizePath} is true
* @private
* @param {string} str - The input string
* @param {Object} [options=this.options] - Override options for the class
* @return {string} - The input string, normalized.
*/
_normalizePath(str, options = this.options) {
let loc = path.format(path.parse(str))
if( options.normalizePath ) {
loc = path.normalize(loc)
}
return loc
}
/**
* Takes an intended path and prepares it to be used in a cache.
* Depending on the options selected, may perform normalization on the key.
* Will checksum and folder-chunk the path to make caching more efficient.
* May prefix the key if {@link constructor.options.pathPrefix} is set.
* Uses {@link _normalizeUrl} and {@link _normalizePath} if their respective options are set.
* @param {string} pathName - The input key
* @param {Object} [options=this.options] - Override options for the class
* @return {string} - The input key, as an S3-ready path.
*/
_getPath(pathName, options = this.options) {
let key = pathName
// Perform any normalization needed before we checksum
if( options.normalizeLowercase ) {
key = key.toLowerCase()
}
if( options.parseKeyAsUrl ) {
key = this._normalizeUrl(key, options)
} else if( options.parseKeyAsPath ) {
key = this._normalizePath(key, options)
}
if( options.normalizeLowercase || options.parseKeyAsUrl || options.parseKeyAsPath ) {
this._log.normalizePath.debug('Path normalized:', key)
}
// Checksum the path to remove all potentially bad characters
if( options.checksumAlgorithm !== 'none' ) {
key = checksum(key, {
algorithm: options.checksumAlgorithm,
encoding: options.checksumEncoding,
})
} else if( options.checksumEncoding === 'base64' ) {
key = Buffer.from(key).toString('base64')
}
// Add a folder structure based on the hash.
if( options.folderPathDepth !== 0 ) {
const urlChunks = []
for( let depth = 0; depth < options.folderPathDepth; depth++ ) {
const begin = depth * options.folderPathChunkSize
const end = begin + options.folderPathChunkSize
urlChunks.push(key.slice(begin, end))
}
key = urlChunks.join('/') + '/' + key
}
// Prefix it if desired
if( options.pathPrefix !== '' ) {
key = path.join(this.options.pathPrefix, key)
}
this._log.normalizePath.debug('Final path: ', key)
return key
}
/**
* Converts S3 responses back into a data format we want.
* @param {S3Response} response Incoming response object
* @param {Object} [options=this.options] Incoming options
* @return {string}
*/
_stringifyResponse(response, options = this.options) {
if( !response || !('Body' in response) ) {
this._log.stringifyResponse.warn('Unknown response', response)
return response
}
if( options.stringifyResponses ) {
return response.Body.toString()
}
return response.Body
}
/**
* Ensures that incoming timestamps are a Moment object.
* @param {string|number} timestamp
* @return {Moment}
*/
_timestampToMoment(timestamp) {
this._log.timestampToMoment.trace('Timestamp being converted to moment:', timestamp)
// Convert a Unix timestamp to milliseconds, because javascript
if( typeof timestamp === 'number' ) {
return moment(timestamp * 1000)
}
return moment(timestamp)
}
/**
* get a key from the cache.
* @param {...string|Object|function} args - Polymorphic argument to support optional parameters
*/
get(...args) {
const key = args.shift()
let options, cb
if( log.getLevel() === log.levels.TRACE ) {
this._log.get.trace('called at:', moment().valueOf())
}
this._log.get.debug('called with:', key)
if( args.length === 2 ) {
[options, cb] = args
} else if( args.length === 1 ) {
if( typeof args[0] === 'function' ) {
cb = args[0]
} else {
options = args[0]
}
}
// Allow per-request options to override constructor options.
const currentOptions = Object.assign({}, this.options, options)
const requestOptions = {
Key: this._getPath(key, currentOptions),
}
this._log.get.trace(
'options components:',
'this.options:', this.options,
'options:', options,
'requestOptions:', requestOptions,
)
// Allow 's3Options' to override request options.
if( options && 's3Options' in options ) {
Object.assign(requestOptions, options.s3Options)
}
if(!cb) {
cb = () => {}
}
this._log.get.debug('final options: ', requestOptions)
async.waterfall([
waterCb => this.s3.getObject(requestOptions, waterCb),
(result, waterCb) => {
// Check the expiration. If it's dead, pretend there's nothing.
if( 'Expires' in result && this._timestampToMoment(result.Expires).isBefore() ) {
// If we're being proactive, delete the object.
if( currentOptions.proactiveExpiry ) {
this._log.get.info(key, ' is expired, deleting it')
this.del(key, (err, result) => err ? waterCb(err, undefined) : waterCb(null, undefined))
return
} else {
this._log.get.info(key, ' is expired, ignoring result')
}
waterCb(null, undefined)
return
}
this._log.get.trace('get returning result:', result)
waterCb(null, this._stringifyResponse(result, currentOptions))
}
], (err, result) => {
if( err instanceof Error && err.statusCode === 404 ) {
this._log.get.trace(key, ' not found according to s3' )
cb(null, undefined)
return
}
cb(err, result)
})
}
/**
* set a key in the cache. Assumes the bucket already exists.
* @param {...string|Object|function} args - Polymorphic argument to support optional parameters
*/
set(...args) {
const key = args.shift()
const value = args.shift()
let options, cb
if( log.getLevel() === log.levels.TRACE ) {
this._log.set.trace('called at:', moment().valueOf(), 'with value: ', value)
}
this._log.set.debug('called with:', key)
if( args.length === 2 ) {
[options, cb] = args
} else if( args.length === 1 ) {
if( typeof args[0] === 'function' ) {
cb = args[0]
} else {
options = args[0]
}
}
// Allow per-request options to override constructor options.
const currentOptions = Object.assign({}, this.options, options)
const requestOptions = {
Key: this._getPath(key, currentOptions),
ACL: currentOptions.acl,
ContentType: currentOptions.contentType,
}
// Coerce the value into a buffer. This ensures binary or unicode data is safe
if( value instanceof Buffer ) {
requestOptions.Body = value
} else {
requestOptions.Body = Buffer.from(value)
}
if( currentOptions.ttl ) {
requestOptions.Expires = moment().add(currentOptions.ttl, currentOptions.ttlUnits).unix()
this._log.set.debug(key, ' expires at ' + requestOptions.Expires)
}
this._log.set.trace(
'options components:',
'this.options:', this.options,
'options:', options,
'requestOptions:', requestOptions,
)
// Allow 's3Options' to override request options.
if( options && 's3Options' in options ) {
Object.assign(requestOptions, options.s3Options)
}
this._log.set.debug('final options: ', requestOptions)
const request = this.s3.putObject(requestOptions, cb)
if( !cb ) {
request.send()
}
}
/**
* delete a key in the cache.
* @param {...string|Object|function} args - Polymorphic argument to support optional parameters
*/
del(...args) {
const key = args.shift()
let options, cb
if( log.getLevel() === log.levels.TRACE ) {
this._log.del.trace('called at:', moment().valueOf())
}
this._log.del.debug('called with:', key)
if( args.length === 2 ) {
[options, cb] = args
} else if( args.length === 1 ) {
if( typeof args[0] === 'function' ) {
cb = args[0]
} else {
options = args[0]
}
}
// Allow per-request options to override constructor options.
// uncomment this if I ever use this.options, use this obj instead
// const currentOptions = Object.assign({}, this.options, options)
const requestOptions = {
Key: this._getPath(key),
// Key: this._getPath(key, currentOptions),
}
this._log.del.trace(
'options components:',
'this.options:', this.options,
'options:', options,
'requestOptions:', requestOptions,
)
// Allow 's3Options' to override request options.
if( options && 's3Options' in options ) {
Object.assign(requestOptions, options.s3Options)
}
this._log.del.debug('final options: ', requestOptions)
const request = this.s3.deleteObject(requestOptions, cb)
if(!cb) {
request.send()
}
}
/**
* Get a list of objects from the cache. This function is sort of pointless because of hashing.
* @param {...string|Object|function} args - Polymorphic argument to support optional parameters
*/
keys(...args) {
const cb = args.pop()
let key, options
if( log.getLevel() === log.levels.TRACE ) {
this._log.keys.trace('called at:', moment().valueOf())
}
this._log.keys.debug('called with:', key)
if( args.length === 2 ) {
[key, options] = args
} else if( args.length === 1 ) {
if( typeof args[0] === 'object' ) {
options = args[0]
} else {
key = args[0]
}
}
// Allow per-request options to override constructor options.
const currentOptions = Object.assign({}, this.options, options)
const requestOptions = {}
// This is pointless since the hashing will never allow this to work.
// if( key ) {
// requestOptions.Prefix = key
// }
// Allow 's3Options' to override request options.
if( options && 's3Options' in options ) {
Object.assign(requestOptions, options.s3Options)
}
this._log.keys.debug('final options: ', requestOptions)
// Grab all keys via pagination
let ContinuationToken = false
// Doing this with an external value because doWhilst seems to treat arrays wrong
const finalResults = []
async.doWhilst(whilstCb => {
const thisLoopOptions = Object.assign({}, requestOptions)
if( ContinuationToken ) {
this._log.keys.trace('got continuation token: ', ContinuationToken)
thisLoopOptions.ContinuationToken = ContinuationToken
}
this.s3.listObjectsV2(thisLoopOptions, (err, results) => {
if( err ) {
whilstCb(err)
return
}
if( results.IsTruncated ) {
ContinuationToken = results.NextContinuationToken
} else {
ContinuationToken = false
}
finalResults.push(results.Contents)
whilstCb()
})
}, () => ContinuationToken !== false, err => {
if( err ) {
cb(err)
return
}
// Secret option for optimizing reset() a little
if( currentOptions.dontConcatPages ) {
this._log.keys.debug('not concatenating pages')
cb(null, finalResults)
return
}
if( finalResults.length === 1 ) {
this._log.keys.debug('single page result')
cb(null, finalResults[0])
return
}
// Concatenate all results
this._log.keys.debug('concatenating page results')
cb(null, finalResults.reduce((memo, arr) => memo.concat(arr), []))
})
}
/**
* Get the metadata of a particular object
* @param {...string|Object|function} args - Polymorphic argument to support optional parameters
*/
head(...args) {
let options
const key = args.shift()
const cb = args.pop()
if( log.getLevel() === log.levels.TRACE ) {
this._log.keys.trace('called at:', moment().valueOf())
}
this._log.keys.debug('called with:', key)
if( args.length === 1 ) {
options = args[0]
}
// Allow per-request options to override constructor options.
// uncomment this if I ever use this.options, use this obj instead
// const currentOptions = Object.assign({}, this.options, options)
const requestOptions = {
Key: this._getPath(key),
// Key: this._getPath(key, currentOptions),
}
// Allow 's3Options' to override request options.
if( options && 's3Options' in options ) {
Object.assign(requestOptions, options.s3Options)
}
this._log.keys.debug('final options: ', requestOptions)
this.s3.headObject(requestOptions, cb)
}
/**
* Get the ttl time of a particular object
* @param {...string|Object|function} args - Polymorphic argument to support optional parameters
*/
ttl(...args) {
const cb = args.pop()
this.head(...args, (err, result) => {
if( !err && 'Expires' in result ) {
cb(null, this._timestampToMoment(result.Expires).unix())
return
}
cb(err, -1)
})
}
/**
* Empties the entire bucket. Definitely a function to use with caution.
* Uses a secret option on {@link keys}, called dontConcatPages, to make the
* API a little happier.
* @param {Function} cb The callback.
*/
reset(cb) {
this._log.reset.warn('Resetting bucket!')
async.waterfall([
waterCb => this.keys({ dontConcatPages: true }, waterCb),
(results, waterCb) => async.mapLimit(results, 2, (dataset, mapCb) => {
if( dataset.length === 0 ) { mapCb(); return }
this.s3.deleteObjects({
Delete: {
// deleteObjects does not accept any parameters except key and version
Objects: dataset.map(({ Key, VersionId }) => ({ Key, VersionId })),
},
}, mapCb)
}, waterCb)
], cb)
}
// TODO: implement setex function? make a copy of the object since S3 is weird
}
module.exports = S3Cache