UNPKG

@tanstack/db-ivm

Version:

Incremental View Maintenance for TanStack DB based on Differential Dataflow

177 lines (158 loc) 6.27 kB
import { MultiSet } from "./multiset.js" import { HashIndex } from "./hashIndex.js" import { ValueIndex } from "./valueIndex.js" import { concatIterable, mapIterable } from "./utils.js" /** * A map from a difference collection trace's keys -> (value, multiplicities) that changed. * Used in operations like join and reduce where the operation needs to * exploit the key-value structure of the data to run efficiently. */ export class Index<K, V> { /* * This is a hybrid Index that composes a ValueIndex and a HashIndex. * Keys that have only one value are stored in the ValueIndex. * Keys that have multiple values are stored in the HashIndex, the hash distinguishes between the values. * This reduces the amount of hashes we need to compute since often times only a small portion of the keys are updated * so we don't have to hash the keys that are never updated. * * Note: The `valueIndex` and `hashIndex` have disjoint keys. * When a key that has only one value gets a new distinct value, * it is added to the `hashIndex` and removed from the `valueIndex` and vice versa. */ #valueIndex: ValueIndex<K, V> #hashIndex: HashIndex<K, V> constructor() { this.#valueIndex = new ValueIndex<K, V>() this.#hashIndex = new HashIndex<K, V>() } toString(indent = false): string { return `Index(\n ${this.#valueIndex.toString(indent)},\n ${this.#hashIndex.toString(indent)}\n)` } get(key: K): Array<[V, number]> { if (this.#valueIndex.has(key)) { return [this.#valueIndex.get(key)!] } return this.#hashIndex.get(key) } getMultiplicity(key: K, value: V): number { if (this.#valueIndex.has(key)) { return this.#valueIndex.getMultiplicity(key) } return this.#hashIndex.getMultiplicity(key, value) } /** * This returns an iterator that iterates over all key-value pairs. * @returns An iterable of all key-value pairs (and their multiplicities) in the index. */ #entries(): Iterable<[K, [V, number]]> { return concatIterable( this.#valueIndex.entries(), this.#hashIndex.entriesIterator() ) } /** * This method only iterates over the keys and not over the values. * Hence, it is more efficient than the `#entries` method. * It returns an iterator that you can use if you need to iterate over the values for a given key. * @returns An iterator of all *keys* in the index and their corresponding value iterator. */ *#entriesIterators(): Iterable<[K, Iterable<[V, number]>]> { for (const [key, [value, multiplicity]] of this.#valueIndex.entries()) { yield [key, new Map<V, number>([[value, multiplicity]])] } for (const [key, valueMap] of this.#hashIndex.entries()) { yield [ key, mapIterable(valueMap, ([_hash, [value, multiplicity]]) => [ value, multiplicity, ]), ] } } has(key: K): boolean { return this.#valueIndex.has(key) || this.#hashIndex.has(key) } get size(): number { return this.#valueIndex.size + this.#hashIndex.size } addValue(key: K, value: [V, number]): void { const containedInValueIndex = this.#valueIndex.has(key) const containedInHashIndex = this.#hashIndex.has(key) if (containedInHashIndex && containedInValueIndex) { throw new Error( `Key ${key} is contained in both the value index and the hash index. This should never happen because they should have disjoint keysets.` ) } if (!containedInValueIndex && !containedInHashIndex) { // This is the first time we see the key // Add it to the value index this.#valueIndex.addValue(key, value) return } if (containedInValueIndex) { // This key is already in the value index // It could be that it's the same value or a different one // If it's a different value we will need to remove the key from the value index // and add the key and its two values to the hash index try { this.#valueIndex.addValue(key, value) } catch { // This is a different value, need to move the key to the hash index const existingValue = this.#valueIndex.get(key)! this.#valueIndex.delete(key) this.#hashIndex.addValue(key, existingValue) this.#hashIndex.addValue(key, value) } return } if (containedInHashIndex) { // This key is already in the hash index so it already has two or more values. // However, this new value and multiplicity could cause an existing value to be removed // and lead to the key having only a single value in which case we need to move it back to the value index const singleRemainingValue = this.#hashIndex.addValue(key, value) if (singleRemainingValue) { // The key only has a single remaining value so we need to move it back to the value index this.#hashIndex.delete(key) this.#valueIndex.addValue(key, singleRemainingValue) } return } } append(other: Index<K, V>): void { for (const [key, value] of other.#entries()) { this.addValue(key, value) } } join<V2>(other: Index<K, V2>): MultiSet<[K, [V, V2]]> { const result: Array<[[K, [V, V2]], number]> = [] // We want to iterate over the smaller of the two indexes to reduce the // number of operations we need to do. if (this.size <= other.size) { for (const [key, valueIt] of this.#entriesIterators()) { if (!other.has(key)) continue const otherValues = other.get(key) for (const [val1, mul1] of valueIt) { for (const [val2, mul2] of otherValues) { if (mul1 !== 0 && mul2 !== 0) { result.push([[key, [val1, val2]], mul1 * mul2]) } } } } } else { for (const [key, otherValueIt] of other.#entriesIterators()) { if (!this.has(key)) continue const values = this.get(key) for (const [val2, mul2] of otherValueIt) { for (const [val1, mul1] of values) { if (mul1 !== 0 && mul2 !== 0) { result.push([[key, [val1, val2]], mul1 * mul2]) } } } } } return new MultiSet(result) } }