UNPKG

disk-memoizer

Version:

Simple disk memoization and in memory LRU cache for high latency IO responses

443 lines (376 loc) 10.2 kB
// Simple disk memoization and in memory LRU cache for high // latency IO responses. // // https://github.com/bermi/disk-memoizer // // Check the README.md file for instructions and examples module.exports = diskMemoizer; const fs = require("graceful-fs"); const config = require("./config"); const gcTmpFiles = require("./gc"); const debug = require("debug")("disk-memoizer"); const path = require("path"); const mkdirp = require("mkdirp"); const createHash = require("crypto").createHash; const LruCache = require("lru-cache"); const lockFile = require("lockfile"); const os = require("os"); const LOCK_TMP_DIR = os.tmpdir(); function diskMemoizer(unmemoizedFn, { // Number of milliseconds before considering the cache stale // By default the cache won't expire maxAge, // Content type, right now only "json" is supported, for other types // use a custom marshaller (see bellow) type, // Optional marshaller object with a marshall and an unmarshall // asynchronous function that can prepare data before saving it on the // disk and after reading it back. // // Unmarshalled response references will be kept in memory when // memoryCacheItems > 0. // // Example JSON marshaller (simplified without error handing): // { // marshall: (data, callback) => callback(null, JSON.stringify(data)), // unmarshall: (data, callback) => callback(null, JSON.parse(data)) // } // marshaller = marshallers.none, // By default the first argument of the method to be memoized will be used // as the cache key, you can provide a custom synchronous function that // will receive the arguments of the original function and can return // a unique string as the identifier for the cache key. identity = firstArg, // Where to store the cache? Defaults to the value set via the // environment variable DISK_MEMOIZER_CACHE_DIR cacheDir = config.CACHE_DIR, // Number of elements to keep on the lru in memory cache. Keep in mind // that each worker on a cluster will keep it's own copy. // Defaults to 0 or the environment variable // DISK_MEMOIZER_MEMORY_CACHE_ITEMS memoryCacheItems = config.MEMORY_CACHE_ITEMS, // How long before considering the lock stale? lockStale = config.LOCK_STALE_MS, lruCacheOptions = { max: memoryCacheItems, maxAge } } = {}) { const memoryCache = memoryCacheItems > 0 || lruCacheOptions.max > 0 ? new LruCache(lruCacheOptions) : fakeLruCache(); function diskMemoized(...args) { const callback = args.pop(); const key = identity(args); marshaller = getMarshaller({ type, marshaller }); if (!config.FLUSH_CACHE && memoryCache.has(key)) { debug("[info] Using in memory cache (%d items) for %s", memoryCache.length, key ); return callback(null, memoryCache.get(key)); } const cachePath = getCachePath(key, cacheDir); fs.stat(cachePath, (err, stats) => { const expired = maxAge > 0 && hasExpired(maxAge, stats && stats.ctime); if (config.FLUSH_CACHE || err || expired) { grabAndCache({ key, cachePath, unmemoizedFn, args, marshaller, memoryCache, lockStale, type }, callback); } else { useCachedFile({ key, cachePath, unmemoizedFn, marshaller, lockStale, memoryCache, type }, callback); } }); } const currentCacheDir = cacheDir; const currentMaxAge = maxAge; diskMemoized.gc = function gc({ interval = config.GC_INTERVAL, cacheDir, maxAge } = {}) { cacheDir = cacheDir || currentCacheDir; maxAge = maxAge || currentMaxAge; debug("[info] GC Running with options %j", { maxAge, interval, cacheDir }); return gcTmpFiles({ maxAge, interval, cacheDir }); }; return diskMemoized; } // Used to convert md5 hashes into subfolder chunks const RE_PATHIFY = /^([a-z0-9]{2})([a-z0-9]{2})([a-z0-9]{2})(.+)/; function getCachePath(key, cacheDir = config.CACHE_DIR) { return path.normalize( `${cacheDir}/${createHash("md5"). update(key). digest("hex"). replace(RE_PATHIFY, "$1/$2/$3/$4")}.cache` ); } function getLockPath(key) { return path.normalize( `${LOCK_TMP_DIR}/${createHash("md5"). update(key). digest("hex")}.lock` ); } function grabAndCache({ key, cachePath, unmemoizedFn, args = [key], marshaller, memoryCache = fakeLruCache(), lockStale, type }, callback) { const lockPath = getLockPath(cachePath); lockFile.check(lockPath, {stale: lockStale}, (err, isLocked) => { if (err) { return callback(err); } if (isLocked) { return delayedRead({ key, cachePath, unmemoizedFn, marshaller, memoryCache, lockStale, type }, callback); } lockFile.lock(lockPath, {stale: lockStale}, (err) => { if (err) { // A concurrent lock? We'll try to read again in a bit return delayedRead({ key, cachePath, unmemoizedFn, marshaller, memoryCache, lockStale, type }, callback); } unmemoizedFn(...args.concat(grabAndCacheCallback)); }); }); function unlockAndReportError(err) { lockFile.unlock(lockPath, () => { callback(err); }); } function grabAndCacheCallback(err, unmarshalledData) { if (err) { return unlockAndReportError(err); } marshaller = getMarshaller({ type, marshaller }); const cacheDir = path.dirname(cachePath); mkdirp(cacheDir, (err) => { if (err) { return unlockAndReportError(err); } marshaller.marshall(unmarshalledData, (err, data) => { if (err) { return unlockAndReportError(err); } fs.writeFile(cachePath, data, (err) => { if (err) { debug("[error] Failed saving %s. Got error: %s", cachePath, err.message ); return unlockAndReportError(err); } lockFile.unlock(lockPath, () => { debug("[info] Saved cache for %s on %s", key, cachePath); memoryCache.set(key, unmarshalledData); callback(null, unmarshalledData); }); }); }); }); } } function delayedRead({ key, cachePath, unmemoizedFn, marshaller, memoryCache, lockStale, type }, callback) { // We'll wait until the lock setTimeout(() => { useCachedFile({ key, cachePath, unmemoizedFn, marshaller, memoryCache, lockStale, type }, callback); }, 10); } function useCachedFile({ key, cachePath, unmemoizedFn, marshaller = marshallers.none, memoryCache = fakeLruCache(), lockStale, type }, callback) { marshaller = getMarshaller({ type, marshaller }); const lockPath = getLockPath(cachePath); lockFile.check(lockPath, {stale: lockStale}, (err, isLocked) => { if (err) { return callback(err); } if (isLocked) { // If we've got this far and there's still a lock file, we've // probably hit a race condition with another concurrent process. // We'll retry when the lock is released. return delayedRead({ key, cachePath, unmemoizedFn, marshaller, memoryCache, lockStale, type }, callback); } fs.readFile(cachePath, (err, dataFromCache) => { if (err) { debug("[warning] Failed reading file %s from cache %s", key, cachePath); return grabAndCache({ key, cachePath, unmemoizedFn, marshaller, lockStale, type }, callback); } debug("[info] Using disk cache for %s from %s", key, cachePath); marshaller.unmarshall(dataFromCache, (err, data) => { if (err) { debug( "[warning] Not caching %s. Failed marshalling data. Got error %s", key, err.message); unmemoizedFn(key, callback); return; } memoryCache.set(key, data); callback(null, data); }); }); }); } function hasExpired(maxAge, creationTime) { return maxAge && creationTime ? ((new Date().getTime()) - maxAge) > creationTime.getTime() : false; } const errorObject = {value: null}; function tryCatch(fn, ctx, args) { try { return fn.apply(ctx, args); } catch (error) { errorObject.value = error; return errorObject; } } const marshallers = { none: { marshall: (data, callback) => callback(null, data), unmarshall: (data, callback) => callback(null, data) }, json: { marshall(data, callback) { const result = tryCatch(JSON.stringify, null, [data]); if (result === errorObject) { callback(new Error(`Can't stringify data. Got error ${result.value.message}`)); } else { callback(null, result); } }, unmarshall(data, callback) { if (!(data instanceof String) && !(data instanceof Buffer)) { return callback(null, data); } const result = tryCatch(JSON.parse, null, [data]); if (result === errorObject) { callback(new Error(`Invalid JSON. Got error ${result.value.message}`)); } else { callback(null, result); } } } }; function getMarshaller({ type, marshaller }) { if (marshallers[type]) { marshaller = marshallers[type]; } return marshaller; } function fakeLruCache() { return { has: () => false, set: () => { // ignored } }; } function firstArg(args) { return args[0]; } // Expose for unit testing if (process.env.NODE_ENV === "test") { module.exports.marshallers = marshallers; module.exports.grabAndCache = grabAndCache; module.exports.getCachePath = getCachePath; module.exports.hasExpired = hasExpired; module.exports.useCachedFile = useCachedFile; }