@skybolt/vite-plugin
Version:
Vite plugin for Skybolt - High-performance asset caching for multi-page applications
394 lines (346 loc) • 12 kB
JavaScript
/**
* Skybolt Cache Digest - Cuckoo Filter Implementation
*
* A space-efficient probabilistic data structure for tracking cached assets.
* Instead of storing "asset:hash" pairs (~40+ bytes each), we store fingerprints
* in a Cuckoo filter (~2 bytes per asset) with configurable false positive rate.
*
* Key properties:
* - No false negatives: if an asset is cached, the filter will always report it
* - Small false positive rate: ~1-3% chance of reporting uncached assets as cached
* - Compact: ~2 bytes per asset vs ~40+ bytes for full serialization
* - Supports deletion: unlike Bloom filters, items can be removed
*
* For Skybolt, false positives mean occasionally serving external links for
* uncached assets (browser fetches from network) - a minor performance hit.
* False negatives would be worse (inlining already-cached assets).
*/
// Constants for the Cuckoo filter
const FINGERPRINT_BITS = 12 // ~0.1% false positive rate with bucket size 4
const BUCKET_SIZE = 4 // 4 entries per bucket (good fill rate ~95%)
const MAX_KICKS = 500 // Max relocations before declaring full
const EMPTY_FINGERPRINT = 0 // 0 means empty slot
/**
* Simple non-cryptographic hash function (FNV-1a variant)
* Uses BigInt for precise 32-bit arithmetic (matches PHP implementation)
*/
function fnv1a(str) {
let hash = 2166136261n // FNV offset basis
const prime = 16777619n
const mask = 0xFFFFFFFFn
for (let i = 0; i < str.length; i++) {
hash ^= BigInt(str.charCodeAt(i))
hash = (hash * prime) & mask
}
return Number(hash)
}
/**
* Generate a fingerprint from a string (asset key like "src/css/main.css:Pw3rT8vL")
* Returns a non-zero value in range [1, 2^FINGERPRINT_BITS - 1]
*/
function fingerprint(str) {
const hash = fnv1a(str)
// Take lower bits, ensure non-zero (0 means empty)
const fp = (hash & ((1 << FINGERPRINT_BITS) - 1)) || 1
return fp
}
/**
* Compute primary bucket index from string
*/
function primaryBucket(str, numBuckets) {
const hash = fnv1a(str)
return hash % numBuckets
}
/**
* Compute alternate bucket using partial-key cuckoo hashing
* bucket2 = bucket1 XOR hash(fingerprint)
* This allows finding the other bucket from just the fingerprint
*
* IMPORTANT: This must be reversible - alternateBucket(alternateBucket(b, fp), fp) == b
* Since numBuckets is always a power of 2, we can use XOR with a masked hash
*/
function alternateBucket(bucket, fp, numBuckets) {
// Hash the fingerprint to get good distribution
const fpHash = fnv1a(String(fp))
// Mask to valid bucket range (numBuckets is power of 2)
const bucketMask = numBuckets - 1
// XOR with masked hash - this is reversible since (a ^ b) ^ b == a
// Use | 1 to ensure we always move (avoid XOR with 0)
const offset = (fpHash | 1) & bucketMask
return (bucket ^ offset) & bucketMask
}
/**
* CuckooFilter class - a space-efficient probabilistic set
*/
export class CuckooFilter {
/**
* Create a new Cuckoo filter
* @param {number} capacity - Expected number of items (will be rounded up)
*/
constructor(capacity = 64) {
// Calculate number of buckets needed
// With 95% fill rate and bucket size 4, we need capacity / (4 * 0.95) buckets
// Round up to power of 2 for faster modulo (can use bitmask)
const minBuckets = Math.ceil(capacity / (BUCKET_SIZE * 0.95))
this.numBuckets = nextPowerOf2(Math.max(minBuckets, 4))
this.bucketMask = this.numBuckets - 1
// Initialize buckets as flat Uint16Array for efficiency
// Each bucket has BUCKET_SIZE slots
this.buckets = new Uint16Array(this.numBuckets * BUCKET_SIZE)
this.count = 0
}
/**
* Insert an item into the filter
* @param {string} item - The item to insert (e.g., "src/css/main.css:Pw3rT8vL")
* @returns {boolean} - True if inserted, false if filter is full
*/
insert(item) {
const fp = fingerprint(item)
const i1 = primaryBucket(item, this.numBuckets)
const i2 = alternateBucket(i1, fp, this.numBuckets)
// Try to insert in bucket i1
if (this._insertIntoBucket(i1, fp)) {
this.count++
return true
}
// Try to insert in bucket i2
if (this._insertIntoBucket(i2, fp)) {
this.count++
return true
}
// Both buckets full, need to relocate
// Save state in case we need to rollback
const savedBuckets = this.buckets.slice()
// Randomly pick one of the two buckets to start eviction chain
let bucket = Math.random() < 0.5 ? i1 : i2
let currentFp = fp
for (let kick = 0; kick < MAX_KICKS; kick++) {
// Pick a random slot in the bucket to evict
const slotIndex = Math.floor(Math.random() * BUCKET_SIZE)
const bucketOffset = bucket * BUCKET_SIZE
// Swap fingerprints
const evictedFp = this.buckets[bucketOffset + slotIndex]
this.buckets[bucketOffset + slotIndex] = currentFp
currentFp = evictedFp
// Find alternate bucket for evicted fingerprint
bucket = alternateBucket(bucket, currentFp, this.numBuckets)
// Try to insert evicted fingerprint
if (this._insertIntoBucket(bucket, currentFp)) {
this.count++
return true
}
}
// Filter is too full - rollback changes and report failure
this.buckets.set(savedBuckets)
return false
}
/**
* Check if an item might be in the filter
* @param {string} item - The item to look up
* @returns {boolean} - True if item might be present (possible false positive),
* False if item is definitely not present
*/
lookup(item) {
const fp = fingerprint(item)
const i1 = primaryBucket(item, this.numBuckets)
const i2 = alternateBucket(i1, fp, this.numBuckets)
return this._bucketContains(i1, fp) || this._bucketContains(i2, fp)
}
/**
* Remove an item from the filter
* @param {string} item - The item to remove
* @returns {boolean} - True if removed, false if not found
*/
delete(item) {
const fp = fingerprint(item)
const i1 = primaryBucket(item, this.numBuckets)
const i2 = alternateBucket(i1, fp, this.numBuckets)
if (this._removeFromBucket(i1, fp)) {
this.count--
return true
}
if (this._removeFromBucket(i2, fp)) {
this.count--
return true
}
return false
}
/**
* Serialize the filter to a compact binary format
* Format: [version:1][numBuckets:2][count:2][fingerprints as 16-bit values...]
* Using 16-bit storage for simplicity (only 33% overhead vs 12-bit packing)
* @returns {Uint8Array} - Serialized filter
*/
serialize() {
// Header: version (1 byte) + numBuckets (2 bytes) + count (2 bytes) = 5 bytes
// Data: numBuckets * BUCKET_SIZE fingerprints as 16-bit big-endian values
const numFingerprints = this.numBuckets * BUCKET_SIZE
const buffer = new Uint8Array(5 + numFingerprints * 2)
// Write header
buffer[0] = 1 // Version
buffer[1] = (this.numBuckets >> 8) & 0xff
buffer[2] = this.numBuckets & 0xff
buffer[3] = (this.count >> 8) & 0xff
buffer[4] = this.count & 0xff
// Write fingerprints as 16-bit big-endian values
for (let i = 0; i < numFingerprints; i++) {
const fp = this.buckets[i]
const offset = 5 + i * 2
buffer[offset] = (fp >> 8) & 0xff
buffer[offset + 1] = fp & 0xff
}
return buffer
}
/**
* Serialize to base64 string (for cookie storage)
* @returns {string} - Base64-encoded filter (URL-safe)
*/
toBase64() {
const bytes = this.serialize()
// Use URL-safe base64 (replace + with -, / with _)
// Works in both browser and Node.js
if (typeof Buffer !== 'undefined' && typeof btoa === 'undefined') {
// Node.js environment without btoa
return Buffer.from(bytes).toString('base64url')
}
// Browser or Node.js 16+ with btoa
const binary = String.fromCharCode(...bytes)
return btoa(binary).replace(/\+/g, '-').replace(/\//g, '_').replace(/=/g, '')
}
/**
* Deserialize from binary format
* @param {Uint8Array} buffer - Serialized filter
* @returns {CuckooFilter} - Restored filter
*/
static deserialize(buffer) {
if (buffer.length < 5) {
throw new Error('Invalid cache digest: too short')
}
const version = buffer[0]
if (version !== 1) {
throw new Error(`Invalid cache digest version: ${version}`)
}
const numBuckets = (buffer[1] << 8) | buffer[2]
const count = (buffer[3] << 8) | buffer[4]
// Create filter with correct size
const filter = new CuckooFilter(numBuckets * BUCKET_SIZE)
filter.numBuckets = numBuckets
filter.bucketMask = numBuckets - 1
filter.buckets = new Uint16Array(numBuckets * BUCKET_SIZE)
filter.count = count
// Read fingerprints as 16-bit big-endian values
const numFingerprints = numBuckets * BUCKET_SIZE
for (let i = 0; i < numFingerprints; i++) {
const offset = 5 + i * 2
if (offset + 1 < buffer.length) {
filter.buckets[i] = (buffer[offset] << 8) | buffer[offset + 1]
}
}
return filter
}
/**
* Deserialize from base64 string
* @param {string} base64 - Base64-encoded filter
* @returns {CuckooFilter} - Restored filter
*/
static fromBase64(base64) {
// Handle URL-safe base64
let normalized = base64.replace(/-/g, '+').replace(/_/g, '/')
// Add padding if needed
while (normalized.length % 4) {
normalized += '='
}
let bytes
if (typeof atob !== 'undefined') {
const binary = atob(normalized)
bytes = new Uint8Array(binary.length)
for (let i = 0; i < binary.length; i++) {
bytes[i] = binary.charCodeAt(i)
}
} else {
// Node.js environment
bytes = new Uint8Array(Buffer.from(base64, 'base64url'))
}
return CuckooFilter.deserialize(bytes)
}
// Private helper methods
_insertIntoBucket(bucketIndex, fp) {
const offset = bucketIndex * BUCKET_SIZE
for (let i = 0; i < BUCKET_SIZE; i++) {
if (this.buckets[offset + i] === EMPTY_FINGERPRINT) {
this.buckets[offset + i] = fp
return true
}
}
return false
}
_bucketContains(bucketIndex, fp) {
const offset = bucketIndex * BUCKET_SIZE
for (let i = 0; i < BUCKET_SIZE; i++) {
if (this.buckets[offset + i] === fp) {
return true
}
}
return false
}
_removeFromBucket(bucketIndex, fp) {
const offset = bucketIndex * BUCKET_SIZE
for (let i = 0; i < BUCKET_SIZE; i++) {
if (this.buckets[offset + i] === fp) {
this.buckets[offset + i] = EMPTY_FINGERPRINT
return true
}
}
return false
}
/**
* Get filter statistics
*/
get stats() {
const totalSlots = this.numBuckets * BUCKET_SIZE
const usedSlots = this.count
return {
numBuckets: this.numBuckets,
bucketSize: BUCKET_SIZE,
totalSlots,
usedSlots,
loadFactor: usedSlots / totalSlots,
fingerprintBits: FINGERPRINT_BITS,
estimatedFalsePositiveRate: Math.pow(2, -FINGERPRINT_BITS) * BUCKET_SIZE * 2,
serializedSizeBytes: 5 + totalSlots * 2, // 5 byte header + 2 bytes per slot
}
}
}
/**
* Round up to next power of 2
*/
function nextPowerOf2(n) {
n--
n |= n >> 1
n |= n >> 2
n |= n >> 4
n |= n >> 8
n |= n >> 16
return n + 1
}
/**
* Create a cache digest from a list of asset keys
* @param {string[]} assetKeys - Array of "name:hash" strings
* @returns {CuckooFilter} - Populated filter
*/
export function createCacheDigest(assetKeys) {
const filter = new CuckooFilter(assetKeys.length)
for (const key of assetKeys) {
if (!filter.insert(key)) {
// Filter is full, create a larger one
const largerFilter = new CuckooFilter(assetKeys.length * 2)
for (const k of assetKeys) {
largerFilter.insert(k)
}
return largerFilter
}
}
return filter
}
// Export for testing
export { fnv1a, fingerprint, primaryBucket, alternateBucket, FINGERPRINT_BITS, BUCKET_SIZE }