UNPKG

@socketsupply/socket

Version:

A Cross-Platform, Native Runtime for Desktop and Mobile Apps — Create apps using HTML, CSS, and JavaScript. Written from the ground up to be small and maintainable.

github.com/socketsupply/socket

socketsupply/socket

369 lines (321 loc) • 11.5 kB

JavaScript

import { isBufferLike, toBuffer } from '../util.js' import { Buffer } from '../buffer.js' import { createDigest } from '../crypto.js' import { Packet, PacketPublish, PACKET_BYTES, sha256 } from './packets.js' /** * Tries to convert input to a `Buffer`, if possible, otherwise `null`. * @ignore * @param {(string|Buffer|Uint8Array)?} m * @return {Buffer?} */ function toBufferMaybe (m) { return isBufferLike(m) || typeof m === 'string' ? toBuffer(m) : m && typeof m === 'object' ? toBuffer(JSON.stringify(m)) : null } /** * Default max size of a `Cache` instance. */ export const DEFAULT_MAX_SIZE = Math.ceil(16_000_000 / PACKET_BYTES) /** * @typedef {Packet} CacheEntry * @typedef {function(CacheEntry, CacheEntry): number} CacheEntrySiblingResolver */ /** * Default cache sibling resolver that computes a delta between * two entries clocks. * @param {CacheEntry} a * @param {CacheEntry} b * @return {number} */ export function defaultSiblingResolver (a, b) { return a.clock - b.clock } /** * Internal mapping of packet IDs to packet data used by `Cache`. */ export class CacheData extends Map {} /** * A class for storing a cache of packets by ID. This class includes a scheme * for reconciling disjointed packet caches in a large distributed system. The * following are key design characteristics. * * Space Efficiency: This scheme can be space-efficient because it summarizes * the cache's contents in a compact binary format. By sharing these summaries, * two computers can quickly determine whether their caches have common data or * differences. * * Bandwidth Efficiency: Sharing summaries instead of the full data can save * bandwidth. If the differences between the caches are small, sharing summaries * allows for more efficient data synchronization. * * Time Efficiency: The time efficiency of this scheme depends on the size of * the cache and the differences between the two caches. Generating summaries * and comparing them can be faster than transferring and comparing the entire * dataset, especially for large caches. * * Complexity: The scheme introduces some complexity due to the need to encode * and decode summaries. In some cases, the overhead introduced by this * complexity might outweigh the benefits, especially if the caches are * relatively small. In this case, you should be using a query. * * Data Synchronization Needs: The efficiency also depends on the data * synchronization needs. If the data needs to be synchronized in real-time, * this scheme might not be suitable. It's more appropriate for cases where * periodic or batch synchronization is acceptable. * * Scalability: The scheme's efficiency can vary depending on the scalability * of the system. As the number of cache entries or computers involved * increases, the complexity of generating and comparing summaries will stay * bound to a maximum of 16Mb. * */ export class Cache { data = new CacheData() maxSize = DEFAULT_MAX_SIZE static HASH_SIZE_BYTES = 20 static HASH_EMPTY = 'da39a3ee5e6b4b0d3255bfef95601890afd80709' /** * `Cache` class constructor. * @param {CacheData?} [data] */ constructor (data = new CacheData(), siblingResolver = defaultSiblingResolver) { if (data instanceof Map) { this.data = data } else if (Array.isArray(data)) { this.data = new CacheData(data) } if (typeof siblingResolver === 'function') { this.siblingResolver = siblingResolver } else { this.siblingResolver = defaultSiblingResolver } } /** * Readonly count of the number of cache entries. * @type {number} */ get size () { return this.data.size } /** * Readonly size of the cache in bytes. * @type {number} */ get bytes () { return this.data.size * PACKET_BYTES } /** * Inserts a `CacheEntry` value `v` into the cache at key `k`. * @param {string} k * @param {CacheEntry} v * @return {boolean} */ insert (k, v) { if (v.type !== PacketPublish.type) return false if (this.has(k)) return false if (this.data.size === this.maxSize) { const oldest = [...this.data.values()] .sort((a, b) => a.timestamp - b.timestamp) // take a slice of 128 of the oldest candidates and // eject 32 that are mostly from non-related clusters, // some random and cluster-related. .pop() this.data.delete(Buffer.from(oldest.packetId).toString('hex')) if (this.onEjected) this.onEjected(oldest) } v.timestamp = Date.now() if (!v.ttl) v.ttl = Packet.ttl // use default TTL if none provided this.data.set(k, v) if (this.onInsert) this.onInsert(k, v) return true } /** * Gets a `CacheEntry` value at key `k`. * @param {string} k * @return {CacheEntry?} */ get (k) { return this.data.get(k) } /** * @param {string} k * @return {boolean} */ delete (k) { if (!this.has(k)) return false this.data.delete(k) return true } /** * Predicate to determine if cache contains an entry at key `k`. * @param {string} k * @return {boolean} */ has (k) { return this.data.has(k) } /** * Composes an indexed packet into a new `Packet` * @param {Packet} packet */ async compose (packet, source = this.data) { let previous = packet if (packet?.index > 0) previous = source.get(packet.previousId?.toString('hex')) if (!previous) return null const { meta, size, indexes, ts } = previous.message // follow the chain to get the buffers in order let bufs = [...source.values()].filter(p => { if (!p.previousId) return false return Buffer.from(p.previousId).compare(Buffer.from(previous.packetId)) === 0 }) if (!indexes || bufs.length < indexes) return null bufs = bufs.sort((a, b) => a.index - b.index) // sort after confirming they are all there // concat and then hash, the original should match const messages = bufs.map(p => p.message) const buffers = messages.map(toBufferMaybe).filter(Boolean) const message = Buffer.concat(buffers, size) if (!meta.ts) meta.ts = ts // generate a new packet ID const packetId = await sha256(Buffer.concat([Buffer.from(packet.previousId || ''), message]), { bytes: true }) return Packet.from({ ...packet, packetId, message, isComposed: true, index: -1, meta }) } async sha1 (value, toHex) { const buf = await createDigest('SHA-1', isBufferLike(value) ? value : Buffer.from(value)) if (!toHex) return buf return buf.toString('hex') } /** * * The summarize method returns a terse yet comparable summary of the cache * contents. * * Think of the cache as a trie of hex characters, the summary returns a * checksum for the current level of the trie and for its 16 children. * * This is similar to a merkel tree as equal subtrees can easily be detected * without the need for further recursion. When the subtree checksums are * inequivalent then further negotiation at lower levels may be required, this * process continues until the two trees become synchonized. * * When the prefix is empty, the summary will return an array of 16 checksums * these checksums provide a way of comparing that subtree with other peers. * * When a variable-length hexidecimal prefix is provided, then only cache * member hashes sharing this prefix will be considered. * * For each hex character provided in the prefix, the trie will decend by one * level, each level divides the 2^128 address space by 16. For exmaple... * * ``` * Level 0 1 2 * ---------------- * 2b00 * aa0e ━┓ ━┓ * aa1b ┃ ┃ * aae3 ┃ ┃ ━┓ * aaea ┃ ┃ ┃ * aaeb ┃ ━┛ ━┛ * ab00 ┃ ━┓ * ab1e ┃ ┃ * ab2a ┃ ┃ * abef ┃ ┃ * abf0 ━┛ ━┛ * bff9 * ``` * * @param {string} prefix - a string of lowercased hexidecimal characters * @return {Object} * */ async summarize (prefix = '', predicate = o => false) { // each level has 16 children (0x0-0xf) const children = new Array(16).fill(null).map(_ => []) // partition the cache into children for (const [key, packet] of this.data.entries()) { if (!key || !key.slice) continue if (prefix.length && !key.startsWith(prefix)) continue if (!predicate(packet)) continue const hex = key.slice(prefix.length, prefix.length + 1) if (children[parseInt(hex, 16)]) children[parseInt(hex, 16)].push(key) } // compute a checksum for all child members (deterministically) // if the bucket is empty, return null const buckets = await Promise.all(children.map(child => { return child.length ? this.sha1(child.sort().join(''), true) : Promise.resolve(null) })) let hash // compute a summary hash (checksum of all other hashes) if (!buckets.every(b => b === null)) { hash = await this.sha1(buckets.join(''), true) } else { hash = Cache.HASH_EMPTY } return { prefix, hash, buckets } } /** * The encodeSummary method provides a compact binary encoding of the output * of summary() * * @param {Object} summary - the output of calling summary() * @return {Buffer} **/ static encodeSummary (summary) { // prefix is variable-length hex string encoded with the first byte indicating the length const prefixBin = Buffer.alloc(1 + Math.ceil(summary.prefix.length / 2)) prefixBin.writeUInt8(summary.prefix.length, 0) const prefixHex = summary.prefix.length % 2 ? '0' + summary.prefix : summary.prefix Buffer.from(prefixHex, 'hex').copy(prefixBin, 1) // hash is the summary hash (checksum of all other hashes) const hashBin = Buffer.from(summary.hash, 'hex') // buckets are rows of { offset, sum } where the sum is not null const bucketBin = Buffer.concat(summary.buckets.map((sum, offset) => { // empty buckets are omitted from the encoding if (!sum) return Buffer.alloc(0) // write the child offset as a uint8 const offsetBin = Buffer.alloc(1) offsetBin.writeUInt8(offset, 0) return Buffer.concat([offsetBin, Buffer.from(sum, 'hex')]) })) return Buffer.concat([prefixBin, hashBin, bucketBin]) } /** * The decodeSummary method decodes the output of encodeSummary() * * @param {Buffer} bin - the output of calling encodeSummary() * @return {Object} summary **/ static decodeSummary (bin) { let o = 0 // byte offset // prefix is variable-length hex string encoded with the first byte indicating the length const plen = bin.readUint8(o++) const prefix = bin.slice(o, o += Math.ceil(plen / 2)).toString('hex').slice(-plen) // hash is the summary hash (checksum of all other hashes) const hash = bin.slice(o, o += Cache.HASH_SIZE_BYTES).toString('hex') // buckets are rows of { offset, sum } where the sum is not null const buckets = new Array(16).fill(null) while (o < bin.length) { buckets[bin.readUint8(o++)] = bin.slice(o, o += Cache.HASH_SIZE_BYTES).toString('hex') } return { prefix, hash, buckets } } /** * Test a summary hash format is valid * * @param {string} hash * @returns boolean */ static isValidSummaryHashFormat (hash) { return typeof hash === 'string' && /^[A-Fa-f0-9]{40}$/.test(hash) } } export default Cache