transitory
Version:
In-memory cache with high hit rates via LFU eviction. Supports time-based expiration, automatic loading and metrics.
949 lines (809 loc) • 25 kB
text/typescript
import { AbstractCache } from '../AbstractCache';
import { Cache } from '../Cache';
import { CacheNode } from '../CacheNode';
import { CacheSPI } from '../CacheSPI';
import { KeyType } from '../KeyType';
import { Metrics } from '../metrics/Metrics';
import { RemovalListener } from '../RemovalListener';
import { RemovalReason } from '../RemovalReason';
import { ON_REMOVE, ON_MAINTENANCE, TRIGGER_REMOVE, MAINTENANCE } from '../symbols';
import { Weigher } from '../Weigher';
import { CountMinSketch } from './CountMinSketch';
const percentInMain = 0.99;
const percentProtected = 0.8;
const percentOverflow = 0.01;
const adaptiveRestartThreshold = 0.05;
const adaptiveStepPercent = 0.0625;
const adaptiveStepDecayRate = 0.98;
const DATA = Symbol('boundedData');
/**
* Options usable with a BoundedCache.
*/
export interface BoundedCacheOptions<K extends KeyType, V> {
/**
* The maximum size of the cache. For unweighed caches this is the maximum
* number of entries in the cache, for weighed caches this is the maximum
* weight of the cache.
*/
maxSize: number;
/**
* Weigher function to use. If this is specified the cache turns into
* a weighted cache and the function is called when cached data is stored
* to determine its weight.
*/
weigher?: Weigher<K, V> | null;
/**
* Listener to call whenever something is removed from the cache.
*/
removalListener?: RemovalListener<K, V> | null;
}
/**
* Data as used by the bounded cache.
*/
interface BoundedCacheData<K extends KeyType, V> {
/**
* Values within the cache.
*/
values: Map<K, BoundedNode<K, V>>;
/**
* The maximum size of the cache or -1 if the cache uses weighing.
*/
maxSize: number;
/**
* Weigher being used for this cache. Invoked to determine the weight of
* an item being cached.
*/
weigher: Weigher<K, V> | null;
/**
* Maximum size of the cache as a weight.
*/
weightedMaxSize: number;
/**
* The current weight of all items in the cache.
*/
weightedSize: number;
/**
* Listener to invoke when removals occur.
*/
removalListener: RemovalListener<K, V> | null;
/**
* Sketch used to keep track of the frequency of which items are used.
*/
sketch: CountMinSketch;
/**
* The limit at which to grow the sketch.
*/
sketchGrowLimit: number;
/**
* Timeout holder for performing maintenance. When this is set it means
* that a maintenance is queued for later.
*/
maintenanceTimeout: any;
/**
* The maximum size the cache can grow without an eviction being applied
* directly.
*/
forceEvictionLimit: number;
/**
* The time in milliseconds to delay maintenance.
*/
maintenanceInterval: number;
/**
* Adaptive data used to adjust the size of the window.
*/
adaptiveData: AdaptiveData;
/**
* Tracking of the window cache, starts at around 1% of the total cache.
*/
window: CacheSection<K, V>;
/**
* SLRU protected segment, 80% * (100% - windowSize) of the total cache
*/
protected: CacheSection<K, V>;
/**
* SLRU probation segment, 20% * (100% - windowSize) of the total cache
*/
probation: ProbationSection<K, V>;
}
/**
* Node in a double-linked list used for the segments within the cache.
*/
class BoundedNode<K extends KeyType, V> extends CacheNode<K, V> {
public readonly hashCode: number;
public weight: number;
public location: Location;
public constructor(key: K | null, value: V | null) {
super(key, value);
this.hashCode = key === null ? 0 : CountMinSketch.hash(key);
this.weight = 1;
this.location = Location.WINDOW;
}
}
/**
* Location of a node within the caches segments.
*/
const enum Location {
WINDOW = 0,
PROTECTED = 1,
PROBATION = 2
}
/**
* Segment within the cache including a tracker for the current size and
* the maximum size it can be.
*/
interface CacheSection<K extends KeyType, V> {
/**
* Head of the linked list containing nodes for this segment.
*/
head: BoundedNode<K, V>;
/**
* Current size of the segment. Updated whenever something is added or
* removed from the segment.
*/
size: number;
/**
* The maximum size of the segment. Set on creation and can then be moved
* around using the adaptive adjustment.
*/
maxSize: number;
}
/**
* Special type for the probation segment that doesn't track its size.
*/
interface ProbationSection<K extends KeyType, V> {
/**
* Head of the linked list containing nodes for this segment.
*/
head: BoundedNode<K, V>;
}
/**
* Data used for adaptive adjustment of the window segment.
*/
interface AdaptiveData {
/**
* The adjustment left to perform, a positive number indicates that the
* window size should be increased.
*/
adjustment: any;
/**
* The current step size for the hill climbing.
*/
stepSize: any;
/**
* The hit rate of the previous sample.
*/
previousHitRate: number;
/**
* The number of this in the current sample.
*/
misses: number;
/**
* The number of misses in the current sample.
*/
hits: number;
}
/**
* Bounded cache implementation using W-TinyLFU to keep track of data.
*
* See https://arxiv.org/pdf/1512.00727.pdf for details about TinyLFU and
* the W-TinyLFU optimization.
*/
export class BoundedCache<K extends KeyType, V> extends AbstractCache<K, V> implements Cache<K, V>, CacheSPI<K, V> {
private [DATA]: BoundedCacheData<K, V>;
public [ON_REMOVE]?: RemovalListener<K, V>;
public [ON_MAINTENANCE]?: () => void;
public constructor(options: BoundedCacheOptions<K, V>) {
super();
const maxMain = Math.floor(percentInMain * options.maxSize);
/*
* For weighted caches use an initial sketch size of 256. It will
* grow when the size of the cache approaches that size.
*
* Otherwise set it to a minimum of 128 or the maximum requested size
* of the graph.
*/
const sketchWidth = options.weigher ? 256 : Math.max(options.maxSize, 128);
this[MAINTENANCE] = this[MAINTENANCE].bind(this);
this[DATA] = {
maxSize: options.weigher ? -1 : options.maxSize,
removalListener: options.removalListener || null,
weigher: options.weigher || null,
weightedMaxSize: options.maxSize,
weightedSize: 0,
sketch: CountMinSketch.uint8(sketchWidth, 4),
sketchGrowLimit: sketchWidth,
values: new Map(),
adaptiveData: {
hits: 0,
misses: 0,
adjustment: 0,
previousHitRate: 0,
stepSize: -adaptiveStepPercent * options.maxSize
},
window: {
head: new BoundedNode<K, V>(null, null),
size: 0,
maxSize: options.maxSize - maxMain
},
protected: {
head: new BoundedNode<K, V>(null, null),
size: 0,
maxSize: Math.floor(maxMain * percentProtected)
},
probation: {
head: new BoundedNode<K, V>(null, null),
},
maintenanceTimeout: null,
forceEvictionLimit: options.maxSize + Math.max(Math.floor(options.maxSize * percentOverflow), 5),
maintenanceInterval: 5000
};
}
/**
* Get the maximum size this cache can be.
*
* @returns
* maximum size of the cache
*/
public get maxSize() {
return this[DATA].maxSize;
}
/**
* Get the current size of the cache.
*
* @returns
* items currently in the cache
*/
public get size() {
return this[DATA].values.size;
}
/**
* Get the weighted size of all items in the cache.
*
* @returns
* the weighted size of all items in the cache
*/
public get weightedSize() {
return this[DATA].weightedSize;
}
/**
* Store a value tied to the specified key. Returns the previous value or
* `null` if no value currently exists for the given key.
*
* @param key -
* key to store value under
* @param value -
* value to store
* @returns
* current value or `null`
*/
public set(key: K, value: V): V | null {
const data = this[DATA];
const old = data.values.get(key);
// Create a node and add it to the backing map
const node = new BoundedNode(key, value);
data.values.set(key, node);
if(data.weigher) {
node.weight = data.weigher(key, value);
}
// Update our weight
data.weightedSize += node.weight;
if(old) {
// Remove the old node
old.remove();
// Adjust weight
data.weightedSize -= old.weight;
// Update weights of where the node belonged
switch(old.location) {
case Location.PROTECTED:
// Node was protected, reduce the size
data.protected.size -= old.weight;
break;
case Location.WINDOW:
// Node was in window, reduce window size
data.window.size -= old.weight;
break;
}
}
// Check if we reached the grow limit of the sketch
if(data.weigher && data.values.size >= data.sketchGrowLimit) {
const sketchWidth = data.values.size * 2;
data.sketch = CountMinSketch.uint8(sketchWidth, 4);
data.sketchGrowLimit = sketchWidth;
}
// Append the new node to the window space
node.appendToTail(data.window.head);
data.window.size += node.weight;
// Register access to the key
data.sketch.update(node.hashCode);
// Schedule eviction
if(data.weightedSize >= data.forceEvictionLimit) {
this[MAINTENANCE]();
} else if(! data.maintenanceTimeout) {
data.maintenanceTimeout = setTimeout(this[MAINTENANCE], data.maintenanceInterval);
}
// Return the value we replaced
if(old) {
this[TRIGGER_REMOVE](key, old.value, RemovalReason.REPLACED);
return old.value;
} else {
return null;
}
}
/**
* Get the cached value for the specified key if it exists. Will return
* the value or `null` if no cached value exist. Updates the usage of the
* key.
*
* @param key -
* key to get
* @returns
* current value or `null`
*/
public getIfPresent(key: K) {
const data = this[DATA];
const node = data.values.get(key);
if(! node) {
// This value does not exist in the cache
data.adaptiveData.misses++;
return null;
}
// Keep track of the hit
data.adaptiveData.hits++;
// Register access to the key
data.sketch.update(node.hashCode);
switch(node.location) {
case Location.WINDOW:
// In window cache, mark as most recently used
node.moveToTail(data.window.head);
break;
case Location.PROBATION:
// In SLRU probation segment, move to protected
node.location = Location.PROTECTED;
node.moveToTail(data.protected.head);
// Plenty of room, keep track of the size
data.protected.size += node.weight;
while(data.protected.size > data.protected.maxSize) {
/*
* There is now too many nodes in the protected segment
* so demote the least recently used.
*/
const lru = data.protected.head.next;
lru.location = Location.PROBATION;
lru.moveToTail(data.probation.head);
data.protected.size -= lru.weight;
}
break;
case Location.PROTECTED:
// SLRU protected segment, mark as most recently used
node.moveToTail(data.protected.head);
break;
}
return node.value;
}
/**
* Peek to see if a key is present without updating the usage of the
* key. Returns the value associated with the key or `null` if the key
* is not present.
*
* In many cases `has(key)` is a better option to see if a key is present.
*
* @param key -
* the key to check
* @returns
* value associated with key or `null`
*/
public peek(key: K) {
const data = this[DATA];
const node = data.values.get(key);
return node ? node.value : null;
}
/**
* Delete a value in the cache. Returns the deleted value or `null` if
* there was no value associated with the key in the cache.
*
* @param key -
* the key to delete
* @returns
* deleted value or `null`
*/
public delete(key: K) {
const data = this[DATA];
const node = data.values.get(key);
if(node) {
// Remove the node from its current list
node.remove();
switch(node.location) {
case Location.PROTECTED:
// Node was protected, reduce the size
data.protected.size -= node.weight;
break;
case Location.WINDOW:
// Node was in window, reduce window size
data.window.size -= node.weight;
break;
}
// Reduce overall weight
data.weightedSize -= node.weight;
// Remove from main value storage
data.values.delete(key);
this[TRIGGER_REMOVE](key, node.value, RemovalReason.EXPLICIT);
if(! data.maintenanceTimeout) {
data.maintenanceTimeout = setTimeout(this[MAINTENANCE], data.maintenanceInterval);
}
return node.value;
}
return null;
}
/**
* Check if the given key exists in the cache.
*
* @param key -
* key to check
* @returns
* `true` if value currently exists, `false` otherwise
*/
public has(key: K) {
const data = this[DATA];
return data.values.has(key);
}
/**
* Clear the cache removing all of the entries cached.
*/
public clear() {
const data = this[DATA];
const oldValues = data.values;
data.values = new Map();
for(const [ key, node ] of oldValues) {
this[TRIGGER_REMOVE](key, node.value, RemovalReason.EXPLICIT);
}
data.weightedSize = 0;
data.window.head.remove();
data.window.size = 0;
data.probation.head.remove();
data.protected.head.remove();
data.protected.size = 0;
if(data.maintenanceTimeout) {
clearTimeout(data.maintenanceTimeout);
data.maintenanceTimeout = null;
}
}
/**
* Get all of the keys in the cache as an array. Can be used to iterate
* over all of the values in the cache, but be sure to protect against
* values being removed during iteration due to time-based expiration if
* used.
*
* @returns
* snapshot of keys
*/
public keys(): K[] {
this[MAINTENANCE]();
return Array.from(this[DATA].values.keys());
}
/**
* Request clean up of the cache by removing expired entries and
* old data. Clean up is done automatically a short time after sets and
* deletes, but if your cache uses time-based expiration and has very
* sporadic updates it might be a good idea to call `cleanUp()` at times.
*
* A good starting point would be to call `cleanUp()` in a `setInterval`
* with a delay of at least a few minutes.
*/
public cleanUp() {
this[MAINTENANCE]();
}
/**
* Get metrics for this cache. Returns an object with the keys `hits`,
* `misses` and `hitRate`. For caches that do not have metrics enabled
* trying to access metrics will throw an error.
*/
public get metrics(): Metrics {
throw new Error('Metrics are not supported by this cache');
}
private [TRIGGER_REMOVE](key: K, value: any, cause: RemovalReason) {
const data = this[DATA];
// Trigger any extended remove listeners
const onRemove = this[ON_REMOVE];
if(onRemove) {
onRemove(key, value, cause);
}
// Trigger the removal listener
if(data.removalListener) {
data.removalListener(key, value, cause);
}
}
private [MAINTENANCE]() {
/*
* Trigger the onMaintenance listener if one exists. This is done
* before eviction occurs so that extra layers have a chance to
* apply their own eviction rules.
*
* This can be things such as things being removed because they have
* been expired which in turn might cause eviction to be unnecessary.
*/
const onMaintenance = this[ON_MAINTENANCE];
if(onMaintenance) {
onMaintenance();
}
const data = this[DATA];
/*
* Evict the least recently used node in the window space to the
* probation segment until we are below the maximum size.
*/
let evictedToProbation = 0;
while(data.window.size > data.window.maxSize) {
const first = data.window.head.next;
first.moveToTail(data.probation.head);
first.location = Location.PROBATION;
data.window.size -= first.weight;
evictedToProbation++;
}
/*
* Evict nodes for real until we are below our maximum size.
*/
while(data.weightedSize > data.weightedMaxSize) {
const probation = data.probation.head.next;
const evictedCandidate = evictedToProbation === 0 ? data.probation.head : data.probation.head.previous;
const hasProbation = probation !== data.probation.head;
const hasEvicted = evictedCandidate !== data.probation.head;
let toRemove: BoundedNode<K, V>;
if(! hasProbation && ! hasEvicted) {
// TODO: Probation queue is empty, how is this handled?
break;
} else if(! hasEvicted) {
toRemove = probation;
} else if(! hasProbation) {
toRemove = evictedCandidate;
evictedToProbation--;
} else {
/*
* Estimate how often the two nodes have been accessed to
* determine which of the keys should actually be evicted.
*
* Also protect against hash collision attacks where the
* frequency of an node in the cache is raised causing the
* candidate to never be admitted into the cache.
*/
let removeCandidate;
const freqEvictedCandidate = data.sketch.estimate(evictedCandidate.hashCode);
const freqProbation = data.sketch.estimate(probation.hashCode);
if(freqEvictedCandidate > freqProbation) {
removeCandidate = false;
} else if(freqEvictedCandidate < data.sketch.slightlyLessThanHalfMaxSize) {
/*
* If the frequency of the candidate is slightly less than
* half it can be admitted without going through randomness
* checks.
*
* The idea here is that will reduce the number of random
* admittances.
*/
removeCandidate = true;
} else {
/*
* Make it a 1 in 1000 chance that the candidate is not
* removed.
*
* TODO: Should this be lower or higher? Please open an issue if you have thoughts on this
*/
removeCandidate = Math.floor(Math.random() * 1000) >= 1;
}
toRemove = removeCandidate ? evictedCandidate : probation;
evictedToProbation--;
}
if(toRemove.key === null) {
throw new Error('Cache issue, problem with removal');
}
data.values.delete(toRemove.key);
toRemove.remove();
data.weightedSize -= toRemove.weight;
this[TRIGGER_REMOVE](toRemove.key, toRemove.value, RemovalReason.SIZE);
}
// Perform adaptive adjustment of size of window cache
adaptiveAdjustment(data);
if(data.maintenanceTimeout) {
clearTimeout(data.maintenanceTimeout);
data.maintenanceTimeout = null;
}
}
}
/**
* Perform adaptive adjustment. This will do a simple hill climb and attempt
* to find the best balance between the recency and frequency parts of the
* cache.
*
* This is based on the work done in Caffeine and the paper Adaptive Software
* Cache Management by Gil Einziger, Ohad Eytan, Roy Friedman and Ben Manes.
*
* This implementation does work in chunks so that not too many nodes are
* moved around at once. At every maintenance interval it:
*
* 1) Checks if there are enough samples to calculate a new adjustment.
* 2)
* Takes the current adjustment and increases or decreases the window in
* chunks. At every invocation it currently moves a maximum of 1000 nodes
* around.
*
* @param data -
*/
function adaptiveAdjustment<K extends KeyType, V>(data: BoundedCacheData<K, V>) {
/*
* Calculate the new adaptive adjustment. This might result in a
* recalculation or it may skip touching the adjustment.
*/
calculateAdaptiveAdjustment(data);
const a = data.adaptiveData.adjustment;
if(a > 0) {
// Increase the window size if the adjustment is positive
increaseWindowSegmentSize(data);
} else if(a < 0) {
// Decrease the window size if the adjustment is negative
decreaseWindowSegmentSize(data);
}
}
/**
* Evict nodes from the protected segment to the probation segment if there
* are too many nodes in the protected segment.
*
* @param data -
*/
function evictProtectedToProbation<K extends KeyType, V>(data: BoundedCacheData<K, V>) {
/*
* Move up to 1000 nodes from the protected segment to the probation one
* if the segment is over max size.
*/
let i = 0;
while(i++ < 1000 && data.protected.size > data.protected.maxSize) {
const lru = data.protected.head.next;
if(lru === data.protected.head) break;
lru.location = Location.PROBATION;
lru.moveToTail(data.probation.head);
data.protected.size -= lru.weight;
}
}
/**
* Calculate the adjustment to the window size. This will check if there is
* enough samples to do a step and if so perform a simple hill climbing to
* find the new adjustment.
*
* @param data -
* @returns
* `true` if an adjustment occurred, `false` otherwise
*/
function calculateAdaptiveAdjustment<K extends KeyType, V>(data: BoundedCacheData<K, V>): boolean {
const adaptiveData = data.adaptiveData;
const requestCount = adaptiveData.hits + adaptiveData.misses;
if(requestCount < data.sketch.resetAfter) {
/*
* Skip adjustment if the number of gets in the cache has not reached
* the same size as the sketch reset.
*/
return false;
}
const hitRate = adaptiveData.hits / requestCount;
const hitRateDiff = hitRate - adaptiveData.previousHitRate;
const amount = hitRateDiff >= 0 ? adaptiveData.stepSize : -adaptiveData.stepSize;
let nextStep;
if(Math.abs(hitRateDiff) >= adaptiveRestartThreshold) {
nextStep = adaptiveStepPercent * data.weightedMaxSize * (amount >= 0 ? 1 : -1);
} else {
nextStep = adaptiveStepDecayRate * amount;
}
// Store the adjustment, step size and previous hit rate for the next step
adaptiveData.adjustment = Math.floor(amount);
adaptiveData.stepSize = nextStep;
adaptiveData.previousHitRate = hitRate;
// Reset the sample data
adaptiveData.misses = 0;
adaptiveData.hits = 0;
return true;
}
/**
* Increase the size of the window segment. This will change increase the max
* size of the window segment and decrease the max size of the protected
* segment. The method will then move nodes from the probation and protected
* segment the window segment.
*
* @param data -
*/
function increaseWindowSegmentSize<K extends KeyType, V>(data: BoundedCacheData<K, V>) {
if(data.protected.maxSize === 0) {
// Can't increase the window size anymore
return;
}
let amountLeftToAdjust = Math.min(data.adaptiveData.adjustment, data.protected.maxSize);
data.protected.maxSize -= amountLeftToAdjust;
data.window.maxSize += amountLeftToAdjust;
/*
* Evict nodes from the protected are to the probation area now that it
* is smaller.
*/
evictProtectedToProbation(data);
/*
* Transfer up to 1000 node into the window segment.
*/
for(let i = 0; i < 1000; i++) {
let lru = data.probation.head.next;
if(lru === data.probation.head || lru.weight > amountLeftToAdjust) {
/*
* Either got the probation head or the node was to big to fit.
* Move on and check in the protected area.
*/
lru = data.protected.head.next;
if(lru === data.protected.head) {
// No more values to remove
break;
}
}
if(lru.weight > amountLeftToAdjust) {
/*
* The node weight exceeds what is left of the adjustment.
*/
break;
}
amountLeftToAdjust -= lru.weight;
// Remove node from its current segment
if(lru.location === Location.PROTECTED) {
// If its protected reduce the size
data.protected.size -= lru.weight;
}
// Move to the window segment
lru.moveToTail(data.window.head);
data.window.size += lru.weight;
lru.location = Location.WINDOW;
}
/*
* Keep track of the adjustment amount that is left. The next maintenance
* invocation will look at this and attempt to adjust for it.
*/
data.protected.maxSize += amountLeftToAdjust;
data.window.maxSize -= amountLeftToAdjust;
data.adaptiveData.adjustment = amountLeftToAdjust;
}
/**
* Decrease the size of the window. This will increase the size of the
* protected segment while decreasing the size of the window segment. Nodes
* will be moved from the window segment into the probation segment, where
* they are later moved to the protected segment when they are accessed.
*
* @param data -
*/
function decreaseWindowSegmentSize<K extends KeyType, V>(data: BoundedCacheData<K, V>) {
if(data.window.maxSize <= 1) {
// Can't decrease the size of the window anymore
return;
}
let amountLeftToAdjust = Math.min(-data.adaptiveData.adjustment, Math.max(data.window.maxSize - 1, 0));
data.window.maxSize -= amountLeftToAdjust;
data.protected.maxSize += amountLeftToAdjust;
/*
* Transfer upp to 1000 nodes from the window segment into the probation
* segment.
*/
for(let i = 0; i < 1000; i++) {
const lru = data.window.head.next;
if(lru === data.window.head) {
// No more nodes in the window segment, can't adjust anymore
break;
}
if(lru.weight > amountLeftToAdjust) {
/*
* The node weight exceeds what is left of the change. Can't move
* it around.
*/
break;
}
amountLeftToAdjust -= lru.weight;
// Remove node from the window
lru.moveToTail(data.probation.head);
lru.location = Location.PROBATION;
data.window.size -= lru.weight;
}
/*
* Keep track of the adjustment amount that is left. The next maintenance
* invocation will look at this and attempt to adjust for it.
*/
data.window.maxSize += amountLeftToAdjust;
data.protected.maxSize -= amountLeftToAdjust;
data.adaptiveData.adjustment = -amountLeftToAdjust;
}