simple-on-disk-cache
Version:
A simple on-disk cache, supporting local and remote filesystem targets, with time based expiration policies.
288 lines • 13.8 kB
JavaScript
;
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.createCache = exports.isRecordExpired = exports.RESERVED_CACHE_KEY_FOR_VALID_KEYS = void 0;
const uni_time_1 = require("@ehmpathy/uni-time");
const bottleneck_1 = __importDefault(require("bottleneck"));
const fs_1 = require("fs");
const helpful_errors_1 = require("helpful-errors");
const simple_in_memory_cache_1 = require("simple-in-memory-cache");
const type_fns_1 = require("type-fns");
const assertIsValidOnDiskCacheKey_1 = require("./key/assertIsValidOnDiskCacheKey");
const updateKeyFileBottleneck = new bottleneck_1.default({ maxConcurrent: 1 });
exports.RESERVED_CACHE_KEY_FOR_VALID_KEYS = '_.simple_on_disk_cache.valid_keys';
const isLocalDirectory = (directory) => !!directory?.local?.path;
const isCloudDirectory = (directory) => !!directory?.cloud?.path && !!directory?.cloud?.via;
const getMseNow = () => new Date().getTime();
/**
* cast a cloud path and key to a cache URI
*
* .what = combines base path and key with consistent `/` separator
* .why = handles paths with or without terminal slash
*/
const asCacheUri = ({ path, key }) => {
const basePath = path.replace(/\/$/, ''); // strip terminal slash if present
return [basePath, key].join('/');
};
/**
* invoke the adapter's get method, dispatches to either namespace or direct function style
*/
const invokeAdapterGet = async ({ adapter, uri, }) => {
if (typeof adapter.get === 'function')
return adapter.get({ uri });
return adapter.get.one({ uri });
};
const saveToDisk = async ({ directory, key, value, }) => {
if (isLocalDirectory(directory))
return await fs_1.promises.writeFile(asCacheUri({ path: directory.local.path, key }), value, { flag: 'w', encoding: 'utf-8' });
if (isCloudDirectory(directory)) {
return await directory.cloud.via.set({
uri: asCacheUri({ path: directory.cloud.path, key }),
body: value,
});
}
throw new helpful_errors_1.UnexpectedCodePathError('directory was neither local or cloud. unsupported');
};
const readFromDisk = async ({ directory, key, }) => {
if (isLocalDirectory(directory))
return await fs_1.promises
.readFile(asCacheUri({ path: directory.local.path, key }), {
encoding: 'utf-8',
})
.catch((error) => {
if (error.code === 'ENOENT')
return undefined; // file not found error => never cached
throw error; // otherwise, something else is messed up
});
if (isCloudDirectory(directory)) {
// adapter returns null for not-found, we convert to undefined
const result = await invokeAdapterGet({
adapter: directory.cloud.via,
uri: asCacheUri({ path: directory.cloud.path, key }),
});
return result ?? undefined;
}
throw new helpful_errors_1.UnexpectedCodePathError('directory was neither local or cloud. unsupported');
};
/**
* a utility function for deciding whether a record is valid
*/
const isRecordExpired = ({ expiresAtMse, }) => {
// if expiresAtMse = null, then it never expires
if (expiresAtMse === null)
return false;
// otherwise, check whether its expired
return expiresAtMse < getMseNow();
};
exports.isRecordExpired = isRecordExpired;
/**
* declares a method that's able to resolve the directory to persist to
*
* supports
* - async getters
* - direct declaration
*/
const resolveDirectoryToPersistTo = async (input) => (0, type_fns_1.isAFunction)(input) ? await input() : input;
/**
* create a simple on-disk cache
*/
const createCache = ({ directory: directoryToPersistToInput, expiration: defaultExpiration = { minutes: 5 }, }) => {
// kick off a promise to get the directory to persist to
const promiseDirectoryToPersistTo = resolveDirectoryToPersistTo(directoryToPersistToInput);
// kick off creating the directory if it doesn't already exist, to prevent usage errors
void promiseDirectoryToPersistTo.then(async (directoryToPersistTo) => {
if (isLocalDirectory(directoryToPersistTo))
await fs_1.promises.mkdir(directoryToPersistTo.local.path, { recursive: true });
});
/**
* define how to set an item into the cache
*/
const set = async (key, value, { expiration = defaultExpiration, } = {}) => {
(0, assertIsValidOnDiskCacheKey_1.assertIsValidOnDiskCacheKey)({ key });
const expiresAtMse = value === undefined
? 0 // if value was "undefined", then this key was just invalidated; mark it as invalid with the expiresAt timestamp as well
: getMseNow() + (expiration ? (0, uni_time_1.toMilliseconds)(expiration) : Infinity); // infinity if null
// define the most observable format of the value; specifically, see if it is json.parseable; if so, parse it and use that, since its easier to look at in the cache file
const awaitedValue = await value;
const mostObservableValue = (() => {
// if its undefined, its as observable as it gets
if (awaitedValue === undefined)
return undefined;
// see if can json.parse
try {
// if we can, then return the parsed value, so when we save it it is easy to read manually
return JSON.parse(awaitedValue);
}
catch {
// otherwise, return the raw value, nothing more we can do
return awaitedValue;
}
})();
// save to disk
const directoryToPersistTo = await promiseDirectoryToPersistTo;
await saveToDisk({
directory: directoryToPersistTo,
key,
value: JSON.stringify({
expiresAtMse,
deserializedForObservability: typeof mostObservableValue !== 'string', // if its not a string, then it was deserialized by this method for observability
value: mostObservableValue,
}, null, 2),
});
// return the key with metadata
return {
key,
expiresAtMse,
};
};
/**
* define how to get an item from the cache
*/
const get = async (key) => {
(0, assertIsValidOnDiskCacheKey_1.assertIsValidOnDiskCacheKey)({ key });
const directoryToPersistTo = await promiseDirectoryToPersistTo;
const cacheContentSerialized = await readFromDisk({
directory: directoryToPersistTo,
key,
});
if (!(0, type_fns_1.isPresent)(cacheContentSerialized))
return undefined; // if not in cache, then undefined
try {
const cacheContent = JSON.parse(cacheContentSerialized);
if ((0, exports.isRecordExpired)(cacheContent))
return undefined; // if already expired, then undefined
if (cacheContent.deserializedForObservability)
return JSON.stringify(cacheContent.value); // if it had been deserialized for observability, reserialize it
return cacheContent.value; // otherwise, its in the cache and not expired, so return the value
}
catch (error) {
// if it was a json parsing error, warn about it and treat the key as invalid
if (error instanceof Error &&
error.message.includes('Unexpected string in JSON at position')) {
// eslint-disable-next-line no-console
console.warn('simple-on-disk-cache: detected unparseable cache file. treating the result as invalid. this should not have occured', { key });
return undefined;
}
// otherwise, propagate the error, we dont know how to handle it
throw error;
}
};
/**
* define how to lookup valid keys for the cache
*/
const getValidKeysWithMetadata = async () => {
// lookup the last saved valid keys
const cachedValidKeysSerialized = await get(exports.RESERVED_CACHE_KEY_FOR_VALID_KEYS);
const cachedValidKeys = cachedValidKeysSerialized
? JSON.parse(cachedValidKeysSerialized)
: [];
const validKeys = cachedValidKeys.filter((0, type_fns_1.withNot)(exports.isRecordExpired));
return validKeys;
};
/**
* define how to save valid keys for the cache
*
* note
* - record a key w/ effectiveAtMse = 0 to invalidate it
*
* TODO: eventually, support lossless high-concurrency writing (potentially optionally, as a cache option, since it's not important for most applications)
* - we need some way of ensuring that parallel processes wont conflict + overwrite eachother
* - for example, imagine you have two keys that were set to cache in parallel
* - requestA = [...savedKeys, newKeyA]
* - requestB = [...savedKeys, newKeyB]
* - read-before-write would make it so that either newKeyA or newKeyB is dropped and doesn't make it to the final destination // TODO: lookup the formal word for this race condition, its common in dbs
* - in other words,
* - there is a risk a query _will_ have been cached but not saved to the valid keys -> immediately invalidated
* - this is a safe failure mode, as it's the same as the query never having been cached in the first place (i.e., just requires extra requests)
* - if we find a usecase where it _is_ critical to solve, we can do so
* - probably with
* - per-thread "append" file (which all read from, but only one thread writes to) (similar in spi)
* - plus
* - globally locked global file update, similar to
* - inspiration: https://stackoverflow.com/a/53193851/3068233
*/
const updateKeyWithMetadataState = async ({ for: forKeyWithMetadata, }) => {
// write inside of a bottleneck, to ensure that within one machine no more than one process no more than one thread is writing to the same file; prevents corrupted key files when writing to mounted directories + prevents same-machine race conditions
return updateKeyFileBottleneck.schedule(async () => {
// lookup current valid keys
const currentKeysWithMetadata = await getValidKeysWithMetadata();
// save the keys w/ an extra key
await set(exports.RESERVED_CACHE_KEY_FOR_VALID_KEYS, JSON.stringify([
// save the current keys, excluding the previous state of this key if it was there
...currentKeysWithMetadata.filter(({ key }) => key !== forKeyWithMetadata.key),
// save this key, if it isn't expired
...((0, exports.isRecordExpired)(forKeyWithMetadata) ? [] : [forKeyWithMetadata]),
]), { expiration: null });
});
};
/**
* define how to set an item to the cache, with valid key tracking
*/
const setWithValidKeyTracking = async (...args) => {
// write to the cache
const newKeyWithMetadata = await set(...args);
// add the key as valid
await updateKeyWithMetadataState({ for: newKeyWithMetadata });
};
/**
* define how to get valid keys
*/
const getValidKeys = async () => getValidKeysWithMetadata().then((keysWithMetadata) => keysWithMetadata.map(({ key }) => key));
/**
* define how to get an item from the cache, synced with valid key tracking
*/
const getWithValidKeyTracking = async (...args) => {
// if its not a valid key, then dont try to get (this is critical, as it ensures that the validKeys array is a source of truth)
const validKeys = await getValidKeys();
if (!validKeys.includes(args[0]))
return undefined; // if the key is not valid, then no value
// otherwise, lookup the value
return get(...args);
};
/**
* wrap the get and set around an in memory cache, to prevent redundant disk.reads
*
* why?
* - disk reads are ~15ms each
* - memory reads are nanoseconds (1000x faster)
* - with memory.hit before disk.hit, performance improves massively for cache.hits
*/
const cacheInMemory = (0, simple_in_memory_cache_1.createCache)({
expiration: defaultExpiration,
});
const getWithMemory = async (...args) => {
// check in memory, to prevent disk hits
const valueFoundInMemoryBefore = await cacheInMemory.get(...args);
if (valueFoundInMemoryBefore)
return valueFoundInMemoryBefore;
// if not in memory, then .get from disk
const valueFoundOnDisk = await getWithValidKeyTracking(...args);
if (!valueFoundOnDisk)
return undefined; // if not found on disk either, then defo undefined
// since found on disk, set to in memory cache, for successful subsequent lookups
await cacheInMemory.set(args[0], valueFoundOnDisk);
// and get it from memory now, to ensure consistent output
const valueFoundInMemoryAfter = await cacheInMemory.get(...args);
if (!valueFoundInMemoryAfter)
throw new helpful_errors_1.UnexpectedCodePathError('could not find value in memory after having been set');
return valueFoundInMemoryAfter;
};
const setWithMemory = async (...args) => {
// set to disk
await setWithValidKeyTracking(...args);
// set to memory
await cacheInMemory.set(...args);
};
/**
* return the api
*/
return {
set: setWithMemory,
get: getWithMemory,
keys: getValidKeys,
};
};
exports.createCache = createCache;
//# sourceMappingURL=cache.js.map