raptor-cache
Version:
Efficient JavaScript cache implementation that is safe for concurrent reads and writes
1,011 lines (820 loc) • 33.5 kB
JavaScript
'use strict';
var nodePath = require('path');
var logger = require('raptor-logging').logger(module);
var { mkdirp, mkdirpSync } = require('mkdirp');
var DataHolder = require('raptor-async/DataHolder');
var DEFAULT_FLUSH_DELAY = 1000;
var fs = require('fs');
var CacheEntry = require('./CacheEntry');
var ok = require('assert').ok;
var uuid = require('uuid');
var through = require('through');
var util = require('./util');
var CACHE_VERSION = 1;
var MODE_SINGLE_FILE = 1;
var MODE_MULTI_FILE = 2;
// for reading binary data
var dissolve = require('raptor-dissolve');
function STRING_DESERIALIZE (reader) {
return new Promise((resolve, reject) => {
var value = '';
reader()
.on('data', function (data) {
value += data;
})
.on('end', function () {
resolve(value);
})
.on('error', function (err) {
reject(err);
});
});
}
function JSON_SERIALIZE (obj) {
return JSON.stringify(obj);
}
function JSON_DESERIALIZE (reader) {
return new Promise((resolve, reject) => {
var json = '';
reader()
.on('data', function (data) {
json += data;
})
.on('end', function () {
resolve(JSON.parse(json));
})
.on('error', function (err) {
reject(err);
});
});
}
function logPrefix (store) {
return store.name || '(unnamed):';
}
function isObjectEmpty (o) {
if (!o) {
return true;
}
for (var k in o) {
if (o.hasOwnProperty(k)) {
return false;
}
}
return true;
}
function getReaderFunc (store, cacheEntry) {
var fullPath = nodePath.join(store.dir, cacheEntry.meta.file);
function doCreateReadStream () {
return fs.createReadStream(fullPath, {encoding: store.encoding});
}
var writeFileDataHolder = cacheEntry.data.writeFileDataHolder;
if (writeFileDataHolder) {
return function reader () {
var streamDataHolder = new DataHolder();
writeFileDataHolder.done(function (err) {
if (err) {
return streamDataHolder.reject(err);
}
streamDataHolder.resolve(fs.createReadStream(fullPath, {encoding: store.encoding}));
});
return util.createDelayedReadStream(streamDataHolder);
};
} else {
return doCreateReadStream;
}
}
function readFromDisk (store, callback) {
if (store.shouldReadFromDisk === false) {
store.cache = {};
}
var debugEnabled = logger.isDebugEnabled();
if (store.cache) {
// If the cache has already been read from disk then just invoke
// the callback immediately
if (callback) {
callback(null, store.cache);
}
return;
}
if (store.readDataHolder) {
// If we have already started reading the initial cache from disk then
// just piggy back off the existing read by attaching a listener to the
// async data holder
if (callback) {
store.readDataHolder.done(callback);
}
return;
}
// Create a new async data holder to keep track of the fact that we have
// started to read the cache file from disk
var readDataHolder = store.readDataHolder = new DataHolder();
if (callback) {
// If a callback was provided then we need to attach the listener
// to the async data holder for this read operation
readDataHolder.done(callback);
}
// Create an empty cache object that we will populate with the cache entries
var cache = {};
// Keep a flag to avoid invoking reject or resolve multiple times
var finished = false;
function done (err) {
if (finished) {
return;
}
if (debugEnabled) {
logger.debug(logPrefix(store), 'readFromDisk() - done.', err ? 'With error: ' + err : '(no errors)');
}
finished = true;
store.readDataHolder = null;
store.cache = cache;
// While reaading from disk the cache may have been modified
// using either "put" or "remove". These pending updates were
// applied to a temporary cache that we need to now apply to the
// cache loaded from disk
var pendingCache = store.pendingCache;
if (pendingCache) {
for (var k in pendingCache) {
if (pendingCache.hasOwnProperty(k)) {
var v = pendingCache[k];
if (v === undefined) { // A remove is handled by setting the value to undefined
// Use "remove" so that the flush will happen correctly
store.remove(k);
} else {
// Use "put" so that the flush will happen correctly
store.put(k, v);
}
}
}
store.pendingCache = null;
}
// Make sure to resolve only after applying any writes that occurred before the read finished
readDataHolder.resolve(cache);
}
if (debugEnabled) {
logger.debug(logPrefix(store), 'readFromDisk() - reading: ', store.file);
}
var inStream = fs.createReadStream(store.file);
var versionIncompatible = false;
var parser = dissolve()
// read the version
.uint8('version')
.tap(function () {
var version = this.vars.version;
if (version !== store.version) {
logger.warn('Incompatible cache version. Skipping reading cache from disk.');
versionIncompatible = true;
inStream.unpipe(parser);
done();
}
})
// read the mode
.uint8('mode')
.tap(function () {
store.mode = this.vars.mode;
})
.loop(function (end) {
if (versionIncompatible) {
return end();
}
var cacheEntry = null;
// read the key length
this.uint16le('keyLen')
// handle the key length
.tap(function () {
var keyLen = this.vars.keyLen;
if (debugEnabled) {
logger.debug(logPrefix(store), 'readFromDisk: keyLen: ', keyLen);
}
this.buffer('key', keyLen);
})
// handle the key
.tap(function () {
var key = this.vars.key.toString('utf8');
cacheEntry = new CacheEntry({
key: key,
deserialize: store.deserialize,
encoding: store.encoding
});
})
// read the meta length
.uint16le('metaLen')
// read the meta
.tap(function () {
var metaLen = this.vars.metaLen;
if (debugEnabled) {
logger.debug(logPrefix(store), 'readFromDisk: metaLen: ', metaLen);
}
if (metaLen > 0) {
this.buffer('meta', metaLen);
}
})
// handle the meta
.tap(function () {
var metaBuffer = this.vars.meta;
if (metaBuffer) {
var metaJSON = metaBuffer.toString('utf8');
if (debugEnabled) {
logger.debug(logPrefix(store), 'meta for ', cacheEntry.key, ':', metaJSON);
}
cacheEntry.meta = JSON.parse(metaJSON);
}
if (!store.isCacheEntryValid || store.isCacheEntryValid(cacheEntry)) {
cache[cacheEntry.key] = cacheEntry;
}
// Even if we are skipping this entry we still need to read through the
// remaining bytes...
if (store.mode === MODE_SINGLE_FILE) {
// The value is stored in the same file...
// read the value length
this.uint32le('valueLen')
// read the value
.tap(function () {
var valueLen = this.vars.valueLen;
if (debugEnabled) {
logger.debug(logPrefix(store), 'readFromDisk: valueLen: ', valueLen);
}
this.buffer('value', valueLen);
})
// handle the value
.tap(function () {
var value = this.vars.value;
if (store.encoding) {
value = value.toString(store.encoding);
}
cacheEntry.value = value;
});
} else {
this.tap(function () {
cacheEntry.reader = getReaderFunc(store, cacheEntry);
});
}
});
});
inStream.on('error', done);
inStream.on('end', done); // <-- This is the one that will trigger done() if everything goes through successfully
parser.on('error', done);
inStream.pipe(parser);
}
function scheduleFlush (store) {
if (store.flushDelay < 0) {
// no automatic flushing
return;
} else if (store.flushDelay === 0) {
// don't wait to flush
store.flush();
return;
}
// since flushing was deferred due to a reschedule, don't allow flush
// to occur immediately after any pending flush (if one exists)
store.flushAfterComplete = false;
if (store.flushTimeoutID) {
clearTimeout(store.flushTimeoutID);
store.flushTimeoutID = null;
}
store.flushTimeoutID = setTimeout(function () {
store.flushTimeoutID = null;
store.flush();
}, store.flushDelay);
}
function getUniqueFile () {
var id = uuid.v4();
var l1 = id.substring(0, 2);
var l2 = id.substring(2);
return l1 + '/' + l2.replace(/-/g, '');
}
function writeCacheValueToSeparateFile (store, cacheEntry) {
if (cacheEntry.meta.file || cacheEntry.data.writeFileDataHolder) {
// The cache entry has already been written to disk or it is in the process
// of being written to disk... nothing to do
return;
}
logger.debug(logPrefix(store), 'writeCacheValueToSeparateFile() - key: ', cacheEntry.key);
var key = cacheEntry.key;
var encoding = store.encoding;
var writeFileDataHolder = cacheEntry.data.writeFileDataHolder = new DataHolder();
var relPath = getUniqueFile();
cacheEntry.meta.file = relPath;
var originalReader = cacheEntry.reader;
var value = cacheEntry.value;
var fullPath = nodePath.join(store.dir, relPath);
var parentDir = nodePath.dirname(fullPath);
function done (err) {
if (err) {
writeFileDataHolder.reject(err);
} else {
// Remove the value from the cache entry since we are flushing it to disk
// and do not want to keep it in memory
delete cacheEntry.value;
cacheEntry.deserialized = false;
cacheEntry.reader = getReaderFunc(store, cacheEntry); // Replace the original reader with a new reader... one that will read from the separate file that will write to
writeFileDataHolder.resolve(relPath);
}
delete cacheEntry.data.writeFileDataHolder;
}
mkdirp(parentDir).then(function () {
if (value !== undefined) {
if (typeof value !== 'string' && !(value instanceof Buffer)) {
var serialize = store.serialize;
if (!serialize) {
throw new Error('Serializer is required for non-String/Buffer values');
}
value = serialize(value);
}
if (typeof value === 'string') {
value = Buffer.from(value, encoding);
}
fs.writeFile(fullPath, value, done);
} else if (originalReader) {
var inStream = originalReader();
if (!inStream || typeof inStream.pipe !== 'function') {
throw new Error('Cache reader for key "' + key + '" did not return a stream');
}
var outStream = fs.createWriteStream(fullPath, {encoding: encoding});
outStream.on('close', done);
inStream.pipe(outStream);
} else {
throw new Error('Illegal state');
}
}, done);
}
function removeExternalCacheFile (cacheEntry) {
if (cacheEntry.writeFileDataHolder) {
cacheEntry.writeFileDataHolder.done(function (err, file) {
if (err) {
return;
}
delete cacheEntry.meta.file;
fs.unlink(file, function () {});
});
} else if (cacheEntry.meta.file) {
fs.unlink(cacheEntry.meta.file, function () {});
delete cacheEntry.meta.file;
} else {
throw new Error('Illegal state');
}
}
/**
* This cache store has the following characteristics:
* - An in-memory representation is maintained at all times
* - The in-memory cache is backed by a disk cache that is stored in a single file
* - The cache file is read in its entirety the first time the cache is read or written to
* - Whenever the in-memory cache is modified, a flush is scheduled. If a flush had already been scheduled then it is cancelled so that
* flushes can be batched up. Essentially, after a x delay of no activity the in-memory cache is flushed to disk
* - Values put into the cache must be an instance of Buffer
* - Values cannot be null or undefined
*
* NOTES:
* - This cache store is not suitable for storing very large amounts of data since it is all kept in memory
*
* Configuration options:
* - flushDelay (int) - The amount of delay in ms after a modification to flush the updated cache to disk. -1 will disable autoamtic flushing. 0 will result in an immediate flush
*
* @param {Object} config Configuration options for this cache (see above)
*/
function DiskStore (config) {
if (!config) {
config = {};
}
var dir = config.dir;
if (dir) {
dir = nodePath.resolve(process.cwd(), dir);
} else {
dir = nodePath.join(process.cwd(), '.cache');
}
this.name = config.name;
this.flushDelay = config.flushDelay || DEFAULT_FLUSH_DELAY;
this.dir = dir;
this.mode = config.singleFile === false ? MODE_MULTI_FILE : MODE_SINGLE_FILE;
this.encoding = config.encoding;
this.serialize = config.serialize;
this.deserialize = config.deserialize;
if (config.valueType === 'string') {
if (!this.encoding) {
this.encoding = 'utf8';
}
if (!this.deserialize) {
this.deserialize = STRING_DESERIALIZE;
}
} else if (config.valueType === 'json') {
if (!this.encoding) {
this.encoding = 'utf8';
}
if (!this.deserialize) {
this.deserialize = JSON_DESERIALIZE;
}
if (!this.serialize) {
this.serialize = JSON_SERIALIZE;
}
}
this.version = CACHE_VERSION;
this.file = nodePath.join(dir, 'cache');
this._reset();
this.isCacheEntryValid = null;
mkdirpSync(nodePath.dirname(this.file));
this.shouldReadFromDisk = config.readFromDisk !== false;
if (this.shouldReadFromDisk === false) {
// If the cache is configured to not be read from disk then populate the cache with an empty
// object to prevent it being from read from disk
this.cache = {};
}
logger.info(logPrefix(this), 'Created DiskStore cache at ' + dir);
}
DiskStore.prototype = {
_reset: function () {
this.readDataHolder = null;
this.cache = null;
this.flushTimeoutID = null;
this.pendingCache = null;
this.flushingDataHolder = null;
this.flushAfterComplete = false;
this.modified = false;
},
free: function () {
var _this = this;
// Don't reset things in the middle of a pending read or flush...
if (this.readDataHolder) {
this.readDataHolder.done(function () {
_this._reset();
});
} else if (this.flushingDataHolder) {
this.flushingDataHolder.done(function () {
_this._reset();
});
} else {
this._reset();
}
},
get (key) {
if (this.pendingCache && this.pendingCache.hasOwnProperty(key)) {
if (logger.isDebugEnabled()) {
logger.debug(logPrefix(this), 'Found cache entry for key "' + key + '" in pendingCache');
}
return Promise.resolve(this.pendingCache[key]);
}
if (this.cache) {
// cache has been read from disk
return Promise.resolve(this.cache[key]);
} else {
// wait for read from disk to finish
return new Promise((resolve, reject) => {
function callback (err, cache) {
return err ? reject(err) : resolve(cache[key]);
}
readFromDisk(this, callback);
});
}
},
put: function (key, cacheEntry) {
ok(typeof key === 'string', 'key should be a string');
ok(cacheEntry, 'cacheEntry is required');
if (cacheEntry.constructor !== CacheEntry) {
var value = cacheEntry;
cacheEntry = new CacheEntry({
key: key,
value: value
});
} else {
cacheEntry.key = key;
}
if (this.deserialize) {
cacheEntry.deserialize = this.deserialize;
}
if (this.mode === MODE_MULTI_FILE) {
writeCacheValueToSeparateFile(this, cacheEntry);
}
if (this.cache) {
if (logger.isDebugEnabled()) {
logger.debug(logPrefix(this), 'Value put into cache with key: ' + key);
}
this.cache[key] = cacheEntry;
this.modified = true;
scheduleFlush(this);
} else {
if (!this.pendingCache) {
this.pendingCache = {};
}
if (logger.isDebugEnabled()) {
logger.debug(logPrefix(this), 'Value put into pendingCache with key: ' + key);
}
this.pendingCache[key] = cacheEntry;
// Start reading from disk (it not started already) so that
// we can update the cache and apply the "puts" and then flush
// the cache back to disk
readFromDisk(this);
}
},
remove: function (key) {
if (this.cache) {
if (this.mode === MODE_MULTI_FILE) {
var cacheEntry = this.cache[key];
if (cacheEntry) {
removeExternalCacheFile(cacheEntry);
}
}
delete this.cache[key];
this.modified = true;
scheduleFlush(this);
} else {
if (!this.pendingCache) {
this.pendingCache = {};
}
this.pendingCache[key] = undefined;
// Start reading from disk (it not started already) so that
// we can update the cache and apply the updates and then flush
// the cache back to disk
readFromDisk(this);
}
},
flush () {
return new Promise((resolve, reject) => {
function callback (err, data) {
return err ? reject(err) : resolve(data);
}
var debugEnabled = logger.isDebugEnabled();
if (!this.cache) {
if (!this.readDataHolder) {
// If there is no read in progress then there is nothing to do
return resolve();
}
readFromDisk(this, (err) => {
if (err) return callback(err);
this.flush().then(resolve).catch(reject);
});
return;
}
if (this.flushTimeoutID) {
clearTimeout(this.flushTimeoutID);
this.flushTimeoutID = null;
}
if (this.modified === false) {
// No changes to flush...
if (this.flushingDataHolder) {
// If there is a flush in progress then attach a
// listener to the current async data holder
this.flushingDataHolder.done(callback);
} else {
// Otherwise, no flush is happening and nothing to
// do so invoke callback immediately
resolve();
}
return;
}
if (debugEnabled) {
logger.debug(logPrefix(this), 'flush() cache keys: ', Object.keys(this.cache));
}
if (this.flushingDataHolder) {
// a flush is already in progress but set flag that we have been asked to
// flush so that flushing can begin again immediately after the current flush completes
this.flushAfterComplete = true;
// If a callback was provided then attach a listener to the async flushing data holder
this.flushingDataHolder.done(callback);
} else {
this.flushingDataHolder = new DataHolder();
// If a callback was provided then attach a listener to the async flushing data holder
this.flushingDataHolder.done(callback);
this._doFlush();
}
});
},
_doFlush: function () {
var _this = this;
var debugEnabled = logger.isDebugEnabled();
this.flushAfterComplete = false;
this.modified = false;
var encoding = this.encoding;
var finished = false;
var cache = this.cache;
// Now let's start actually writing the cache to disk...
var tempFile = nodePath.join(this.dir, 'tmp' + uuid.v1());
var file = this.file;
var ended = false;
var out = fs.createWriteStream(tempFile);
function end () {
if (ended) {
return;
}
ended = true;
out.end();
}
function done (err) {
if (debugEnabled) {
logger.debug(logPrefix(_this), 'Cache flush() - done.', err ? 'Error: ' + err : '');
}
if (finished) {
return;
}
finished = true;
end();
if (err) {
try {
// flush didn't work so remove the temp file
fs.unlinkSync(tempFile);
} catch (e) {
// ignore
}
}
if (_this.modified) {
// modification occurred while flushing was happening
if (_this.flushAfterComplete) {
// while flushing was happening we were asked for an immediate flush
// but we deferred it due to the flush that was already in progress
// so let's go ahead and immediately do another flush
_this._doFlush();
return;
}
}
// no modifications happened during flush
// reset the flag to flush after complete
_this.flushAfterComplete = false;
if (err) {
// if err occurred then let any callbacks know (not much we can do)
_this.flushingDataHolder.reject(err);
} else {
// let callbacks know that flush finished successfully
_this.flushingDataHolder.resolve();
}
// since flush completed and we notified all of the listeners, clear the data holder
_this.flushingDataHolder = null;
}
out.on('close', function () { // The flush is completed when the file is closed
if (debugEnabled) {
logger.debug(logPrefix(_this), 'Cache flush completed to file ' + tempFile);
}
// Delete the existing file if it exists
fs.unlink(file, function () {
fs.rename(tempFile, file, function (err) {
if (err) {
return done(err);
}
logger.debug(logPrefix(_this), 'Cache renamed temp file to ' + file);
// Keep track that there is no longer a flush in progress
done();
});
});
});
out.on('error', done);
// Save copy of the keys that we are going to write
var keys = Object.keys(cache);
// Number of keys that we are going to write
var numKeys = keys.length;
var i = 0;
var readyForNext = true;
var bufferAvailable = true;
var serialize = this.serialize;
function writeUInt8 (value) {
var buffer = Buffer.alloc(1);
buffer.writeUInt8(value, 0);
bufferAvailable = out.write(buffer);
}
function writeBufferShort (buffer) {
var len = buffer ? buffer.length : 0;
var lenBuffer = Buffer.alloc(2);
lenBuffer.writeUInt16LE(len, 0);
bufferAvailable = out.write(lenBuffer); // Write the length of the key as a 32bit unsigned integer (little endian)
if (buffer) {
bufferAvailable = out.write(buffer);
}
}
function writeBufferLong (buffer) {
var lenBuffer = Buffer.alloc(4);
lenBuffer.writeUInt32LE(buffer ? buffer.length : 0, 0);
out.write(lenBuffer); // Write the length of the key as a 32bit unsigned integer (little endian)
if (buffer != null) {
bufferAvailable = out.write(buffer);
}
}
function writeInlineValue (key, cacheEntry) {
var value = cacheEntry.value;
if (value !== undefined) {
if (typeof value !== 'string' && !(value instanceof Buffer)) {
// convert non-String/non-Buffer to something that is a String or Buffer
if (!serialize) {
throw new Error('Error writing value for cache. Serializer is required for non-String/Buffer values. Cannot write ' + key + ' with value of type ' + (typeof value));
}
value = serialize(value);
}
// it's possible that serialize function (if called) returned a String
if (typeof value === 'string') {
// convert String to Buffer
value = Buffer.from(value, encoding || 'utf8');
}
if (debugEnabled) {
logger.debug(logPrefix(_this), 'writeInlineValue: valueLen: ', value ? value.length : undefined);
}
writeBufferLong(value);
} else if (cacheEntry.reader) {
readyForNext = false;
var inStream = cacheEntry.reader();
if (!inStream || typeof inStream.pipe !== 'function') {
throw new Error('Cache reader for key "' + key + '" did not return a stream');
}
var buffers = [];
var totalLength = 0;
inStream.on('error', done);
inStream.pipe(through(function write (data) {
if (typeof data === 'string') {
data = Buffer.from(data, encoding || 'utf8');
}
buffers.push(data); // data *must* not be null
totalLength += data.length;
},
function end () { // optional
var valueBuffer = Buffer.concat(buffers, totalLength);
if (debugEnabled) {
logger.debug(logPrefix(_this), 'writeInlineValue: valueLen: ', valueBuffer.length);
}
writeBufferLong(valueBuffer);
readyForNext = true;
continueWriting();
}));
} else {
writeBufferLong(0);
}
}
function writeExternalValue (key, cacheEntry) {
if (cacheEntry.data.writeFileDataHolder) {
if (debugEnabled) {
logger.debug(logPrefix(_this), 'writeExternalValue() - waiting for: ', key);
}
readyForNext = false;
// We are waiting for this entries value to be flushed to a separate file...
cacheEntry.data.writeFileDataHolder.done(function (err, file) {
if (debugEnabled) {
logger.debug(logPrefix(_this), 'writeExternalValue() - done waiting for: ', key);
}
if (err) {
return done(err);
}
readyForNext = true;
continueWriting();
});
}
}
// WRITE VERSION (unsigned 8-bit integer)
writeUInt8(this.version);
// WRITE MODE (MODE_SINGLE_FILE or MODE_MULTI_FILE, unsigned 8-bit integer)
writeUInt8(this.mode);
// variable to keep track of actual number written (since we might skip records that are removed while writing)
var numWritten = 0;
// This method is used to asynchronously write out cache entries to disk
// NOTE: We did not make a copy of the cache so it is possible that some of the keys
// may no longer exist as we are flushing to disk, but that is okay since
// there is code to check if the key still exists in the cache
function continueWriting () {
if (debugEnabled) {
logger.debug(logPrefix(_this), 'continueWriting(), i:', i, numKeys, 'bufferAvailable:', bufferAvailable, 'readyForNext:', readyForNext);
}
if (i === numKeys && readyForNext) {
end();
return;
}
// We'll be nice and keep writing to disk until the output file stream tells us that
// it has no more buffer available. When that happens we wait for the drain event
// to be fired before continuing writing where we left off.
// NOTE: It is not mandatory to stop writing to the output stream when its buffer fills up (the bytes will be buffered by Node.js)
while (i < numKeys && bufferAvailable && readyForNext) {
var key = keys[i];
if (debugEnabled) {
logger.debug(logPrefix(_this), 'Writing #' + (numWritten + 1) + ', ' + (i + 1) + ' of ' + numKeys, ', key: ', key);
}
// go ahead and increment index so that we don't try to write this key again
i++;
if (!cache.hasOwnProperty(key)) {
// A cache entry may have been removed while flushing
continue;
}
var cacheEntry = cache[key];
writeBufferShort(Buffer.from(key, 'utf8'));
var meta = cacheEntry.meta;
if (!isObjectEmpty(meta)) {
var metaJson = JSON.stringify(meta);
var metaBuffer = Buffer.from(JSON.stringify(meta, 'utf8'));
if (debugEnabled) {
logger.debug(logPrefix(_this), 'Meta (length: ' + metaBuffer.length + '): ' + metaJson);
}
writeBufferShort(metaBuffer);
} else {
writeBufferShort(0);
}
if (_this.mode === MODE_SINGLE_FILE) {
writeInlineValue(key, cacheEntry);
} else {
writeExternalValue(key, cacheEntry);
}
numWritten++;
}
if (i === numKeys && readyForNext) {
end();
}
}
out.on('drain', function () {
bufferAvailable = true;
if (i < numKeys && readyForNext) {
continueWriting();
}
});
// kick off writing entries (stop writing when we run out of buffers and resume when drained)
continueWriting();
}
};
module.exports = DiskStore;