simple-on-disk-cache
Version:
A simple on-disk cache, supporting local and remote filesystem targets, with time based expiration policies.
288 lines • 14.9 kB
JavaScript
;
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.createCache = exports.isRecordExpired = exports.RESERVED_CACHE_KEY_FOR_VALID_KEYS = void 0;
const error_fns_1 = require("@ehmpathy/error-fns");
const uni_time_1 = require("@ehmpathy/uni-time");
const bottleneck_1 = __importDefault(require("bottleneck"));
const fs_1 = require("fs");
const simple_in_memory_cache_1 = require("simple-in-memory-cache");
const type_fns_1 = require("type-fns");
const assertIsValidOnDiskCacheKey_1 = require("./key/assertIsValidOnDiskCacheKey");
const s3_1 = require("./utils/s3");
const updateKeyFileBottleneck = new bottleneck_1.default({ maxConcurrent: 1 });
exports.RESERVED_CACHE_KEY_FOR_VALID_KEYS = '_.simple_on_disk_cache.valid_keys';
const isMountedDirectory = (directory) => { var _a; return !!((_a = directory === null || directory === void 0 ? void 0 : directory.mounted) === null || _a === void 0 ? void 0 : _a.path); };
const isS3Directory = (directory) => { var _a; return !!((_a = directory === null || directory === void 0 ? void 0 : directory.s3) === null || _a === void 0 ? void 0 : _a.bucket); };
const getMseNow = () => new Date().getTime();
const saveToDisk = (_a) => __awaiter(void 0, [_a], void 0, function* ({ directory, key, value, }) {
if (isMountedDirectory(directory))
return yield fs_1.promises.writeFile([directory.mounted.path, key].join('/'), value, {
flag: 'w',
encoding: 'utf-8',
});
if (isS3Directory(directory))
return yield s3_1.s3.putObject({
bucket: directory.s3.bucket,
key: [directory.s3.prefix, key].join('/'),
data: value,
});
throw new error_fns_1.UnexpectedCodePathError('directory was neither mounted or s3. unsupported');
});
const readFromDisk = (_b) => __awaiter(void 0, [_b], void 0, function* ({ directory, key, }) {
if (isMountedDirectory(directory))
return yield fs_1.promises
.readFile([directory.mounted.path, key].join('/'), {
encoding: 'utf-8',
})
.catch((error) => {
if (error.code === 'ENOENT')
return undefined; // file not found error => never cached
throw error; // otherwise, something else is messed up
});
if (isS3Directory(directory))
return yield s3_1.s3
.getObjectAsString({
bucket: directory.s3.bucket,
key: [directory.s3.prefix, key].join('/'),
})
.catch((error) => {
if (error.message.includes('Could not find object in s3 in bucket'))
return undefined;
throw error;
});
throw new error_fns_1.UnexpectedCodePathError('directory was neither mounted or s3. unsupported');
});
/**
* a utility function for deciding whether a record is valid
*/
const isRecordExpired = ({ expiresAtMse, }) => {
// if expiresAtMse = null, then it never expires
if (expiresAtMse === null)
return false;
// otherwise, check whether its expired
return expiresAtMse < getMseNow();
};
exports.isRecordExpired = isRecordExpired;
/**
* declares a method that's able to resolve the directory to persist to
*
* supports
* - async getters
* - direct declaration
*/
const resolveDirectoryToPersistTo = (input) => __awaiter(void 0, void 0, void 0, function* () { return (0, type_fns_1.isAFunction)(input) ? yield input() : input; });
/**
* create a simple on-disk cache
*/
const createCache = ({ directory: directoryToPersistToInput, expiration: defaultExpiration = { minutes: 5 }, }) => {
// kick off a promise to get the directory to persist to
const promiseDirectoryToPersistTo = resolveDirectoryToPersistTo(directoryToPersistToInput);
// kick off creating the directory if it doesn't already exist, to prevent usage errors
void promiseDirectoryToPersistTo.then((directoryToPersistTo) => __awaiter(void 0, void 0, void 0, function* () {
if (isMountedDirectory(directoryToPersistTo))
yield fs_1.promises.mkdir(directoryToPersistTo.mounted.path, { recursive: true });
}));
/**
* define how to set an item into the cache
*/
const set = (key_1, value_1, ...args_1) => __awaiter(void 0, [key_1, value_1, ...args_1], void 0, function* (key, value, { expiration = defaultExpiration, } = {}) {
(0, assertIsValidOnDiskCacheKey_1.assertIsValidOnDiskCacheKey)({ key });
const expiresAtMse = value === undefined
? 0 // if value was "undefined", then this key was just invalidated; mark it as invalid with the expiresAt timestamp as well
: getMseNow() + (expiration ? (0, uni_time_1.toMilliseconds)(expiration) : Infinity); // infinity if null
// define the most observable format of the value; specifically, see if it is json.parseable; if so, parse it and use that, since its easier to look at in the cache file
const awaitedValue = yield value;
const mostObservableValue = (() => {
// if its undefined, its as observable as it gets
if (awaitedValue === undefined)
return undefined;
// see if can json.parse
try {
// if we can, then return the parsed value, so when we save it it is easy to read manually
return JSON.parse(awaitedValue);
}
catch (_a) {
// otherwise, return the raw value, nothing more we can do
return awaitedValue;
}
})();
// save to disk
const directoryToPersistTo = yield promiseDirectoryToPersistTo;
yield saveToDisk({
directory: directoryToPersistTo,
key,
value: JSON.stringify({
expiresAtMse,
deserializedForObservability: typeof mostObservableValue !== 'string', // if its not a string, then it was deserialized by this method for observability
value: mostObservableValue,
}, null, 2),
});
// return the key with metadata
return {
key,
expiresAtMse,
};
});
/**
* define how to get an item from the cache
*/
const get = (key) => __awaiter(void 0, void 0, void 0, function* () {
(0, assertIsValidOnDiskCacheKey_1.assertIsValidOnDiskCacheKey)({ key });
const directoryToPersistTo = yield promiseDirectoryToPersistTo;
const cacheContentSerialized = yield readFromDisk({
directory: directoryToPersistTo,
key,
});
if (cacheContentSerialized === undefined)
return undefined; // if not in cache, then undefined
try {
const cacheContent = JSON.parse(cacheContentSerialized);
if ((0, exports.isRecordExpired)(cacheContent))
return undefined; // if already expired, then undefined
if (cacheContent.deserializedForObservability)
return JSON.stringify(cacheContent.value); // if it had been deserialized for observability, reserialize it
return cacheContent.value; // otherwise, its in the cache and not expired, so return the value
}
catch (error) {
// if it was a json parsing error, warn about it and treat the key as invalid
if (error instanceof Error &&
error.message.includes('Unexpected string in JSON at position')) {
// eslint-disable-next-line no-console
console.warn('simple-on-disk-cache: detected unparseable cache file. treating the result as invalid. this should not have occured', { key });
return undefined;
}
// otherwise, propagate the error, we dont know how to handle it
throw error;
}
});
/**
* define how to lookup valid keys for the cache
*/
const getValidKeysWithMetadata = () => __awaiter(void 0, void 0, void 0, function* () {
// lookup the last saved valid keys
const cachedValidKeysSerialized = yield get(exports.RESERVED_CACHE_KEY_FOR_VALID_KEYS);
const cachedValidKeys = cachedValidKeysSerialized
? JSON.parse(cachedValidKeysSerialized)
: [];
const validKeys = cachedValidKeys.filter((0, type_fns_1.withNot)(exports.isRecordExpired));
return validKeys;
});
/**
* define how to save valid keys for the cache
*
* note
* - record a key w/ effectiveAtMse = 0 to invalidate it
*
* TODO: eventually, support lossless high-concurrency writing (potentially optionally, as a cache option, since it's not important for most applications)
* - we need some way of ensuring that parallel processes wont conflict + overwrite eachother
* - for example, imagine you have two keys that were set to cache in parallel
* - requestA = [...savedKeys, newKeyA]
* - requestB = [...savedKeys, newKeyB]
* - read-before-write would make it so that either newKeyA or newKeyB is dropped and doesn't make it to the final destination // TODO: lookup the formal word for this race condition, its common in dbs
* - in other words,
* - there is a risk a query _will_ have been cached but not saved to the valid keys -> immediately invalidated
* - this is a safe failure mode, as it's the same as the query never having been cached in the first place (i.e., just requires extra requests)
* - if we find a usecase where it _is_ critical to solve, we can do so
* - probably with
* - per-thread "append" file (which all read from, but only one thread writes to) (similar in spi)
* - plus
* - globally locked global file update, similar to
* - inspiration: https://stackoverflow.com/a/53193851/3068233
*/
const updateKeyWithMetadataState = (_a) => __awaiter(void 0, [_a], void 0, function* ({ for: forKeyWithMetadata, }) {
// write inside of a bottleneck, to ensure that within one machine no more than one process no more than one thread is writing to the same file; prevents corrupted key files when writing to mounted directories + prevents same-machine race conditions
return updateKeyFileBottleneck.schedule(() => __awaiter(void 0, void 0, void 0, function* () {
// lookup current valid keys
const currentKeysWithMetadata = yield getValidKeysWithMetadata();
// save the keys w/ an extra key
yield set(exports.RESERVED_CACHE_KEY_FOR_VALID_KEYS, JSON.stringify([
// save the current keys, excluding the previous state of this key if it was there
...currentKeysWithMetadata.filter(({ key }) => key !== forKeyWithMetadata.key),
// save this key, if it isn't expired
...((0, exports.isRecordExpired)(forKeyWithMetadata) ? [] : [forKeyWithMetadata]),
]), { expiration: null });
}));
});
/**
* define how to set an item to the cache, with valid key tracking
*/
const setWithValidKeyTracking = (...args) => __awaiter(void 0, void 0, void 0, function* () {
// write to the cache
const newKeyWithMetadata = yield set(...args);
// add the key as valid
yield updateKeyWithMetadataState({ for: newKeyWithMetadata });
});
/**
* define how to get valid keys
*/
const getValidKeys = () => __awaiter(void 0, void 0, void 0, function* () {
return getValidKeysWithMetadata().then((keysWithMetadata) => keysWithMetadata.map(({ key }) => key));
});
/**
* define how to get an item from the cache, synced with valid key tracking
*/
const getWithValidKeyTracking = (...args) => __awaiter(void 0, void 0, void 0, function* () {
// if its not a valid key, then dont try to get (this is critical, as it ensures that the validKeys array is a source of truth)
const validKeys = yield getValidKeys();
if (!validKeys.includes(args[0]))
return undefined; // if the key is not valid, then no value
// otherwise, lookup the value
return get(...args);
});
/**
* wrap the get and set around an in memory cache, to prevent redundant disk.reads
*
* why?
* - disk reads are ~15ms each
* - memory reads are nanoseconds (1000x faster)
* - with memory.hit before disk.hit, performance improves massively for cache.hits
*/
const cacheInMemory = (0, simple_in_memory_cache_1.createCache)({
expiration: defaultExpiration,
});
const getWithMemory = (...args) => __awaiter(void 0, void 0, void 0, function* () {
// check in memory, to prevent disk hits
const valueFoundInMemoryBefore = yield cacheInMemory.get(...args);
if (valueFoundInMemoryBefore)
return valueFoundInMemoryBefore;
// if not in memory, then .get from disk
const valueFoundOnDisk = yield getWithValidKeyTracking(...args);
if (!valueFoundOnDisk)
return undefined; // if not found on disk either, then defo undefined
// since found on disk, set to in memory cache, for successful subsequent lookups
yield cacheInMemory.set(args[0], valueFoundOnDisk);
// and get it from memory now, to ensure consistent output
const valueFoundInMemoryAfter = yield cacheInMemory.get(...args);
if (!valueFoundInMemoryAfter)
throw new error_fns_1.UnexpectedCodePathError('could not find value in memory after having been set');
return valueFoundInMemoryAfter;
});
const setWithMemory = (...args) => __awaiter(void 0, void 0, void 0, function* () {
// set to disk
yield setWithValidKeyTracking(...args);
// set to memory
yield cacheInMemory.set(...args);
});
/**
* return the api
*/
return {
set: setWithMemory,
get: getWithMemory,
keys: getValidKeys,
};
};
exports.createCache = createCache;
//# sourceMappingURL=cache.js.map