UNPKG

@tanstack/db-ivm

Version:

Incremental View Maintenance for TanStack DB based on Differential Dataflow

452 lines (411 loc) 15 kB
/** * # Optimized Index Data Structure * * Multi-level index that adapts storage strategy based on data patterns to minimize memory * usage, eliminate wasteful lookups, and avoid hashing whenever possible. * * ## Storage Strategy * * **Single value**: `IndexMap['key'] → [value, multiplicity]` (no hashing needed) * * **Multiple unprefixed values**: Direct ValueMap (avoids NO_PREFIX lookup) * ``` * IndexMap['key'] → ValueMap { hash(value1) → [value1, mult1], ... } * ``` * * **Values with prefixes**: PrefixMap uses prefix keys directly (no hashing) * ``` * IndexMap['key'] → PrefixMap { 'prefix1' → [value1, mult1], NO_PREFIX → ValueMap{...} } * ``` * * **Multiple values per prefix**: ValueMap within PrefixMap (hash only suffixes) * ``` * PrefixMap['prefix'] → ValueMap { hash(suffix1) → [full_value1, mult1], ... } * ``` * * ## Dynamic Evolution * * Structure automatically evolves as data is added: * - Single → ValueMap (when both values unprefixed) * - Single → PrefixMap (when at least one prefixed) * - ValueMap → PrefixMap (adding prefixed value to unprefixed) * * Prefixes extracted from array values: `['prefix', 'suffix']` → prefix='prefix' */ import { MultiSet } from "./multiset.js" import { hash } from "./hashing/index.js" import type { Hash } from "./hashing/index.js" // We use a symbol to represent the absence of a prefix, unprefixed values a stored // against this key. const NO_PREFIX = Symbol(`NO_PREFIX`) type NO_PREFIX = typeof NO_PREFIX // A single value is a tuple of the value and the multiplicity. type SingleValue<TValue> = [TValue, number] // Base map type for the index. Stores single values, prefix maps, or value maps against a key. type IndexMap<TKey, TValue, TPrefix> = Map< TKey, SingleValue<TValue> | PrefixMap<TValue, TPrefix> | ValueMap<TValue> > // Second level map type for the index, stores single values or value maps against a prefix. class PrefixMap<TValue, TPrefix> extends Map< TPrefix | NO_PREFIX, SingleValue<TValue> | ValueMap<TValue> > { /** * Add a value to the PrefixMap. Returns true if the map becomes empty after the operation. */ addValue(value: TValue, multiplicity: number): boolean { if (multiplicity === 0) return this.size === 0 const prefix = getPrefix<TValue, TPrefix>(value) const valueMapOrSingleValue = this.get(prefix) if (isSingleValue(valueMapOrSingleValue)) { const [currentValue, currentMultiplicity] = valueMapOrSingleValue const currentPrefix = getPrefix<TValue, TPrefix>(currentValue) if (currentPrefix !== prefix) { throw new Error(`Mismatching prefixes, this should never happen`) } if (currentValue === value || hash(currentValue) === hash(value)) { // Same value, update multiplicity const newMultiplicity = currentMultiplicity + multiplicity if (newMultiplicity === 0) { this.delete(prefix) } else { this.set(prefix, [value, newMultiplicity]) } } else { // Different suffixes, need to create ValueMap const valueMap = new ValueMap<TValue>() valueMap.set(hash(currentValue), valueMapOrSingleValue) valueMap.set(hash(value), [value, multiplicity]) this.set(prefix, valueMap) } } else if (valueMapOrSingleValue === undefined) { // No existing value for this prefix this.set(prefix, [value, multiplicity]) } else { // Existing ValueMap const isEmpty = valueMapOrSingleValue.addValue(value, multiplicity) if (isEmpty) { this.delete(prefix) } } return this.size === 0 } } // Third level map type for the index, stores single values or value maps against a hash. class ValueMap<TValue> extends Map<Hash, [TValue, number]> { /** * Add a value to the ValueMap. Returns true if the map becomes empty after the operation. * @param value - The full value to store * @param multiplicity - The multiplicity to add * @param hashKey - Optional hash key to use instead of hashing the full value (used when in PrefixMap context) */ addValue(value: TValue, multiplicity: number): boolean { if (multiplicity === 0) return this.size === 0 const key = hash(value) const currentValue = this.get(key) if (currentValue) { const [, currentMultiplicity] = currentValue const newMultiplicity = currentMultiplicity + multiplicity if (newMultiplicity === 0) { this.delete(key) } else { this.set(key, [value, newMultiplicity]) } } else { this.set(key, [value, multiplicity]) } return this.size === 0 } } /** * A map from a difference collection trace's keys -> (value, multiplicities) that changed. * Used in operations like join and reduce where the operation needs to * exploit the key-value structure of the data to run efficiently. */ export class Index<TKey, TValue, TPrefix = any> { /* * This index maintains a nested map of keys -> (value, multiplicities), where: * - initially the values are stored against the key as a single value tuple * - when a key gets additional values, the values are stored against the key in a * prefix map * - the prefix is extract where possible from values that are structured as * [rowPrimaryKey, rowValue], as they are in the Tanstack DB query pipeline. * - only when there are multiple values for a given prefix do we fall back to a * hash to identify identical values, storing them in a third level value map. */ #inner: IndexMap<TKey, TValue, TPrefix> constructor() { this.#inner = new Map() } /** * This method returns a string representation of the index. * @param indent - Whether to indent the string representation. * @returns A string representation of the index. */ toString(indent = false): string { return `Index(${JSON.stringify( [...this.entries()], undefined, indent ? 2 : undefined )})` } /** * The size of the index. */ get size(): number { return this.#inner.size } /** * This method checks if the index has a given key. * @param key - The key to check. * @returns True if the index has the key, false otherwise. */ has(key: TKey): boolean { return this.#inner.has(key) } /** * This method returns all values for a given key. * @param key - The key to get the values for. * @returns An array of value tuples [value, multiplicity]. */ get(key: TKey): Array<[TValue, number]> { return [...this.getIterator(key)] } /** * This method returns an iterator over all values for a given key. * @param key - The key to get the values for. * @returns An iterator of value tuples [value, multiplicity]. */ *getIterator(key: TKey): Iterable<[TValue, number]> { const mapOrSingleValue = this.#inner.get(key) if (isSingleValue(mapOrSingleValue)) { yield mapOrSingleValue } else if (mapOrSingleValue === undefined) { return } else if (mapOrSingleValue instanceof ValueMap) { // Direct ValueMap - all values have NO_PREFIX for (const valueTuple of mapOrSingleValue.values()) { yield valueTuple } } else { // PrefixMap - iterate through all prefixes for (const singleValueOrValueMap of mapOrSingleValue.values()) { if (isSingleValue(singleValueOrValueMap)) { yield singleValueOrValueMap } else { for (const valueTuple of singleValueOrValueMap.values()) { yield valueTuple } } } } } /** * This returns an iterator that iterates over all key-value pairs. * @returns An iterable of all key-value pairs (and their multiplicities) in the index. */ *entries(): Iterable<[TKey, [TValue, number]]> { for (const key of this.#inner.keys()) { for (const valueTuple of this.getIterator(key)) { yield [key, valueTuple] } } } /** * This method only iterates over the keys and not over the values. * Hence, it is more efficient than the `#entries` method. * It returns an iterator that you can use if you need to iterate over the values for a given key. * @returns An iterator of all *keys* in the index and their corresponding value iterator. */ *entriesIterators(): Iterable<[TKey, Iterable<[TValue, number]>]> { for (const key of this.#inner.keys()) { yield [key, this.getIterator(key)] } } /** * This method adds a value to the index. * @param key - The key to add the value to. * @param valueTuple - The value tuple [value, multiplicity] to add to the index. */ addValue(key: TKey, valueTuple: SingleValue<TValue>) { const [value, multiplicity] = valueTuple // If the multiplicity is 0, do nothing if (multiplicity === 0) return const mapOrSingleValue = this.#inner.get(key) if (mapOrSingleValue === undefined) { // First value for this key this.#inner.set(key, valueTuple) return } if (isSingleValue(mapOrSingleValue)) { // Handle transition from single value to map this.#handleSingleValueTransition( key, mapOrSingleValue, value, multiplicity ) return } if (mapOrSingleValue instanceof ValueMap) { // Handle existing ValueMap const prefix = getPrefix<TValue, TPrefix>(value) if (prefix !== NO_PREFIX) { // Convert ValueMap to PrefixMap since we have a prefixed value const prefixMap = new PrefixMap<TValue, TPrefix>() prefixMap.set(NO_PREFIX, mapOrSingleValue) prefixMap.set(prefix, valueTuple) this.#inner.set(key, prefixMap) } else { // Add to existing ValueMap const isEmpty = mapOrSingleValue.addValue(value, multiplicity) if (isEmpty) { this.#inner.delete(key) } } } else { // Handle existing PrefixMap const isEmpty = mapOrSingleValue.addValue(value, multiplicity) if (isEmpty) { this.#inner.delete(key) } } } /** * Handle the transition from a single value to either a ValueMap or PrefixMap */ #handleSingleValueTransition( key: TKey, currentSingleValue: SingleValue<TValue>, newValue: TValue, multiplicity: number ) { const [currentValue, currentMultiplicity] = currentSingleValue // Check for exact same value (reference equality) if (currentValue === newValue) { const newMultiplicity = currentMultiplicity + multiplicity if (newMultiplicity === 0) { this.#inner.delete(key) } else { this.#inner.set(key, [newValue, newMultiplicity]) } return } // Get prefixes for both values const newPrefix = getPrefix<TValue, TPrefix>(newValue) const currentPrefix = getPrefix<TValue, TPrefix>(currentValue) // Check if they're the same value by prefix/suffix comparison if ( currentPrefix === newPrefix && (currentValue === newValue || hash(currentValue) === hash(newValue)) ) { const newMultiplicity = currentMultiplicity + multiplicity if (newMultiplicity === 0) { this.#inner.delete(key) } else { this.#inner.set(key, [newValue, newMultiplicity]) } return } // Different values - choose appropriate map type if (currentPrefix === NO_PREFIX && newPrefix === NO_PREFIX) { // Both have NO_PREFIX, use ValueMap directly const valueMap = new ValueMap<TValue>() valueMap.set(hash(currentValue), currentSingleValue) valueMap.set(hash(newValue), [newValue, multiplicity]) this.#inner.set(key, valueMap) } else { // At least one has a prefix, use PrefixMap const prefixMap = new PrefixMap<TValue, TPrefix>() if (currentPrefix === newPrefix) { // Same prefix, different suffixes - need ValueMap within PrefixMap const valueMap = new ValueMap<TValue>() valueMap.set(hash(currentValue), currentSingleValue) valueMap.set(hash(newValue), [newValue, multiplicity]) prefixMap.set(currentPrefix, valueMap) } else { // Different prefixes - store as separate single values prefixMap.set(currentPrefix, currentSingleValue) prefixMap.set(newPrefix, [newValue, multiplicity]) } this.#inner.set(key, prefixMap) } } /** * This method appends another index to the current index. * @param other - The index to append to the current index. */ append(other: Index<TKey, TValue>): void { for (const [key, value] of other.entries()) { this.addValue(key, value) } } /** * This method joins two indexes. * @param other - The index to join with the current index. * @returns A multiset of the joined values. */ join<TValue2>( other: Index<TKey, TValue2> ): MultiSet<[TKey, [TValue, TValue2]]> { const result: Array<[[TKey, [TValue, TValue2]], number]> = [] // We want to iterate over the smaller of the two indexes to reduce the // number of operations we need to do. if (this.size <= other.size) { for (const [key, valueIt] of this.entriesIterators()) { if (!other.has(key)) continue const otherValues = other.get(key) for (const [val1, mul1] of valueIt) { for (const [val2, mul2] of otherValues) { if (mul1 !== 0 && mul2 !== 0) { result.push([[key, [val1, val2]], mul1 * mul2]) } } } } } else { for (const [key, otherValueIt] of other.entriesIterators()) { if (!this.has(key)) continue const values = this.get(key) for (const [val2, mul2] of otherValueIt) { for (const [val1, mul1] of values) { if (mul1 !== 0 && mul2 !== 0) { result.push([[key, [val1, val2]], mul1 * mul2]) } } } } } return new MultiSet(result) } } /** * This function extracts the prefix from a value. * @param value - The value to extract the prefix from. * @returns The prefix and the suffix. */ function getPrefix<TValue, TPrefix>(value: TValue): TPrefix | NO_PREFIX { // If the value is an array and the first element is a string or number, then the // first element is the prefix. This is used to distinguish between values without // the need for hashing unless there are multiple values for the same prefix. if ( Array.isArray(value) && (typeof value[0] === `string` || typeof value[0] === `number` || typeof value[0] === `bigint`) ) { return value[0] as TPrefix } return NO_PREFIX } /** * This function checks if a value is a single value. * @param value - The value to check. * @returns True if the value is a single value, false otherwise. */ function isSingleValue<TValue>( value: SingleValue<TValue> | unknown ): value is SingleValue<TValue> { return Array.isArray(value) }