UNPKG

simple-on-disk-cache

Version:

A simple on-disk cache, supporting local and remote filesystem targets, with time based expiration policies.

288 lines 14.9 kB
"use strict"; var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.createCache = exports.isRecordExpired = exports.RESERVED_CACHE_KEY_FOR_VALID_KEYS = void 0; const error_fns_1 = require("@ehmpathy/error-fns"); const uni_time_1 = require("@ehmpathy/uni-time"); const bottleneck_1 = __importDefault(require("bottleneck")); const fs_1 = require("fs"); const simple_in_memory_cache_1 = require("simple-in-memory-cache"); const type_fns_1 = require("type-fns"); const assertIsValidOnDiskCacheKey_1 = require("./key/assertIsValidOnDiskCacheKey"); const s3_1 = require("./utils/s3"); const updateKeyFileBottleneck = new bottleneck_1.default({ maxConcurrent: 1 }); exports.RESERVED_CACHE_KEY_FOR_VALID_KEYS = '_.simple_on_disk_cache.valid_keys'; const isMountedDirectory = (directory) => { var _a; return !!((_a = directory === null || directory === void 0 ? void 0 : directory.mounted) === null || _a === void 0 ? void 0 : _a.path); }; const isS3Directory = (directory) => { var _a; return !!((_a = directory === null || directory === void 0 ? void 0 : directory.s3) === null || _a === void 0 ? void 0 : _a.bucket); }; const getMseNow = () => new Date().getTime(); const saveToDisk = (_a) => __awaiter(void 0, [_a], void 0, function* ({ directory, key, value, }) { if (isMountedDirectory(directory)) return yield fs_1.promises.writeFile([directory.mounted.path, key].join('/'), value, { flag: 'w', encoding: 'utf-8', }); if (isS3Directory(directory)) return yield s3_1.s3.putObject({ bucket: directory.s3.bucket, key: [directory.s3.prefix, key].join('/'), data: value, }); throw new error_fns_1.UnexpectedCodePathError('directory was neither mounted or s3. unsupported'); }); const readFromDisk = (_b) => __awaiter(void 0, [_b], void 0, function* ({ directory, key, }) { if (isMountedDirectory(directory)) return yield fs_1.promises .readFile([directory.mounted.path, key].join('/'), { encoding: 'utf-8', }) .catch((error) => { if (error.code === 'ENOENT') return undefined; // file not found error => never cached throw error; // otherwise, something else is messed up }); if (isS3Directory(directory)) return yield s3_1.s3 .getObjectAsString({ bucket: directory.s3.bucket, key: [directory.s3.prefix, key].join('/'), }) .catch((error) => { if (error.message.includes('Could not find object in s3 in bucket')) return undefined; throw error; }); throw new error_fns_1.UnexpectedCodePathError('directory was neither mounted or s3. unsupported'); }); /** * a utility function for deciding whether a record is valid */ const isRecordExpired = ({ expiresAtMse, }) => { // if expiresAtMse = null, then it never expires if (expiresAtMse === null) return false; // otherwise, check whether its expired return expiresAtMse < getMseNow(); }; exports.isRecordExpired = isRecordExpired; /** * declares a method that's able to resolve the directory to persist to * * supports * - async getters * - direct declaration */ const resolveDirectoryToPersistTo = (input) => __awaiter(void 0, void 0, void 0, function* () { return (0, type_fns_1.isAFunction)(input) ? yield input() : input; }); /** * create a simple on-disk cache */ const createCache = ({ directory: directoryToPersistToInput, expiration: defaultExpiration = { minutes: 5 }, }) => { // kick off a promise to get the directory to persist to const promiseDirectoryToPersistTo = resolveDirectoryToPersistTo(directoryToPersistToInput); // kick off creating the directory if it doesn't already exist, to prevent usage errors void promiseDirectoryToPersistTo.then((directoryToPersistTo) => __awaiter(void 0, void 0, void 0, function* () { if (isMountedDirectory(directoryToPersistTo)) yield fs_1.promises.mkdir(directoryToPersistTo.mounted.path, { recursive: true }); })); /** * define how to set an item into the cache */ const set = (key_1, value_1, ...args_1) => __awaiter(void 0, [key_1, value_1, ...args_1], void 0, function* (key, value, { expiration = defaultExpiration, } = {}) { (0, assertIsValidOnDiskCacheKey_1.assertIsValidOnDiskCacheKey)({ key }); const expiresAtMse = value === undefined ? 0 // if value was "undefined", then this key was just invalidated; mark it as invalid with the expiresAt timestamp as well : getMseNow() + (expiration ? (0, uni_time_1.toMilliseconds)(expiration) : Infinity); // infinity if null // define the most observable format of the value; specifically, see if it is json.parseable; if so, parse it and use that, since its easier to look at in the cache file const awaitedValue = yield value; const mostObservableValue = (() => { // if its undefined, its as observable as it gets if (awaitedValue === undefined) return undefined; // see if can json.parse try { // if we can, then return the parsed value, so when we save it it is easy to read manually return JSON.parse(awaitedValue); } catch (_a) { // otherwise, return the raw value, nothing more we can do return awaitedValue; } })(); // save to disk const directoryToPersistTo = yield promiseDirectoryToPersistTo; yield saveToDisk({ directory: directoryToPersistTo, key, value: JSON.stringify({ expiresAtMse, deserializedForObservability: typeof mostObservableValue !== 'string', // if its not a string, then it was deserialized by this method for observability value: mostObservableValue, }, null, 2), }); // return the key with metadata return { key, expiresAtMse, }; }); /** * define how to get an item from the cache */ const get = (key) => __awaiter(void 0, void 0, void 0, function* () { (0, assertIsValidOnDiskCacheKey_1.assertIsValidOnDiskCacheKey)({ key }); const directoryToPersistTo = yield promiseDirectoryToPersistTo; const cacheContentSerialized = yield readFromDisk({ directory: directoryToPersistTo, key, }); if (cacheContentSerialized === undefined) return undefined; // if not in cache, then undefined try { const cacheContent = JSON.parse(cacheContentSerialized); if ((0, exports.isRecordExpired)(cacheContent)) return undefined; // if already expired, then undefined if (cacheContent.deserializedForObservability) return JSON.stringify(cacheContent.value); // if it had been deserialized for observability, reserialize it return cacheContent.value; // otherwise, its in the cache and not expired, so return the value } catch (error) { // if it was a json parsing error, warn about it and treat the key as invalid if (error instanceof Error && error.message.includes('Unexpected string in JSON at position')) { // eslint-disable-next-line no-console console.warn('simple-on-disk-cache: detected unparseable cache file. treating the result as invalid. this should not have occured', { key }); return undefined; } // otherwise, propagate the error, we dont know how to handle it throw error; } }); /** * define how to lookup valid keys for the cache */ const getValidKeysWithMetadata = () => __awaiter(void 0, void 0, void 0, function* () { // lookup the last saved valid keys const cachedValidKeysSerialized = yield get(exports.RESERVED_CACHE_KEY_FOR_VALID_KEYS); const cachedValidKeys = cachedValidKeysSerialized ? JSON.parse(cachedValidKeysSerialized) : []; const validKeys = cachedValidKeys.filter((0, type_fns_1.withNot)(exports.isRecordExpired)); return validKeys; }); /** * define how to save valid keys for the cache * * note * - record a key w/ effectiveAtMse = 0 to invalidate it * * TODO: eventually, support lossless high-concurrency writing (potentially optionally, as a cache option, since it's not important for most applications) * - we need some way of ensuring that parallel processes wont conflict + overwrite eachother * - for example, imagine you have two keys that were set to cache in parallel * - requestA = [...savedKeys, newKeyA] * - requestB = [...savedKeys, newKeyB] * - read-before-write would make it so that either newKeyA or newKeyB is dropped and doesn't make it to the final destination // TODO: lookup the formal word for this race condition, its common in dbs * - in other words, * - there is a risk a query _will_ have been cached but not saved to the valid keys -> immediately invalidated * - this is a safe failure mode, as it's the same as the query never having been cached in the first place (i.e., just requires extra requests) * - if we find a usecase where it _is_ critical to solve, we can do so * - probably with * - per-thread "append" file (which all read from, but only one thread writes to) (similar in spi) * - plus * - globally locked global file update, similar to * - inspiration: https://stackoverflow.com/a/53193851/3068233 */ const updateKeyWithMetadataState = (_a) => __awaiter(void 0, [_a], void 0, function* ({ for: forKeyWithMetadata, }) { // write inside of a bottleneck, to ensure that within one machine no more than one process no more than one thread is writing to the same file; prevents corrupted key files when writing to mounted directories + prevents same-machine race conditions return updateKeyFileBottleneck.schedule(() => __awaiter(void 0, void 0, void 0, function* () { // lookup current valid keys const currentKeysWithMetadata = yield getValidKeysWithMetadata(); // save the keys w/ an extra key yield set(exports.RESERVED_CACHE_KEY_FOR_VALID_KEYS, JSON.stringify([ // save the current keys, excluding the previous state of this key if it was there ...currentKeysWithMetadata.filter(({ key }) => key !== forKeyWithMetadata.key), // save this key, if it isn't expired ...((0, exports.isRecordExpired)(forKeyWithMetadata) ? [] : [forKeyWithMetadata]), ]), { expiration: null }); })); }); /** * define how to set an item to the cache, with valid key tracking */ const setWithValidKeyTracking = (...args) => __awaiter(void 0, void 0, void 0, function* () { // write to the cache const newKeyWithMetadata = yield set(...args); // add the key as valid yield updateKeyWithMetadataState({ for: newKeyWithMetadata }); }); /** * define how to get valid keys */ const getValidKeys = () => __awaiter(void 0, void 0, void 0, function* () { return getValidKeysWithMetadata().then((keysWithMetadata) => keysWithMetadata.map(({ key }) => key)); }); /** * define how to get an item from the cache, synced with valid key tracking */ const getWithValidKeyTracking = (...args) => __awaiter(void 0, void 0, void 0, function* () { // if its not a valid key, then dont try to get (this is critical, as it ensures that the validKeys array is a source of truth) const validKeys = yield getValidKeys(); if (!validKeys.includes(args[0])) return undefined; // if the key is not valid, then no value // otherwise, lookup the value return get(...args); }); /** * wrap the get and set around an in memory cache, to prevent redundant disk.reads * * why? * - disk reads are ~15ms each * - memory reads are nanoseconds (1000x faster) * - with memory.hit before disk.hit, performance improves massively for cache.hits */ const cacheInMemory = (0, simple_in_memory_cache_1.createCache)({ expiration: defaultExpiration, }); const getWithMemory = (...args) => __awaiter(void 0, void 0, void 0, function* () { // check in memory, to prevent disk hits const valueFoundInMemoryBefore = yield cacheInMemory.get(...args); if (valueFoundInMemoryBefore) return valueFoundInMemoryBefore; // if not in memory, then .get from disk const valueFoundOnDisk = yield getWithValidKeyTracking(...args); if (!valueFoundOnDisk) return undefined; // if not found on disk either, then defo undefined // since found on disk, set to in memory cache, for successful subsequent lookups yield cacheInMemory.set(args[0], valueFoundOnDisk); // and get it from memory now, to ensure consistent output const valueFoundInMemoryAfter = yield cacheInMemory.get(...args); if (!valueFoundInMemoryAfter) throw new error_fns_1.UnexpectedCodePathError('could not find value in memory after having been set'); return valueFoundInMemoryAfter; }); const setWithMemory = (...args) => __awaiter(void 0, void 0, void 0, function* () { // set to disk yield setWithValidKeyTracking(...args); // set to memory yield cacheInMemory.set(...args); }); /** * return the api */ return { set: setWithMemory, get: getWithMemory, keys: getValidKeys, }; }; exports.createCache = createCache; //# sourceMappingURL=cache.js.map