UNPKG

@tanstack/db-ivm

Version:

Incremental View Maintenance for TanStack DB based on Differential Dataflow

375 lines (343 loc) 12.4 kB
/** * # Direct Join Algorithms for Incremental View Maintenance * * High-performance join operations implementing all join types (inner, left, right, full, anti) * with minimal state and optimized performance. * * ## Algorithm * * For each tick, the algorithm processes incoming changes (deltas) and emits join results: * * 1. **Build deltas**: Create delta indexes from input messages using `Index.fromMultiSet()` * 2. **Inner results**: Emit `ΔA⋈B_old + A_old⋈ΔB + ΔA⋈ΔB` (matched pairs) * 3. **Outer results**: For unmatched rows, emit null-extended tuples: * - New unmatched rows from deltas (when opposite side empty) * - Presence transitions: when key goes `0→>0` (retract nulls) or `>0→0` (emit nulls) * 4. **Update state**: Append deltas to indexes (consolidated multiplicity tracking automatic) * * **Consolidated multiplicity tracking** enables O(1) presence checks instead of scanning index buckets. * * ## State * * **Indexes** store the actual data: * - `indexA: Index<K, V1>` - all left-side rows accumulated over time * - `indexB: Index<K, V2>` - all right-side rows accumulated over time * * **Consolidated multiplicity tracking** (built into Index): * - Each Index maintains sum of multiplicities per key internally * - Provides O(1) presence checks: `index.hasPresence(key)` and `index.getConsolidatedMultiplicity(key)` * - Avoids scanning entire index buckets just to check if key has any rows * * ## Join Types * * - **Inner**: Standard delta terms only * - **Outer**: Inner results + null-extended unmatched rows with transition handling * - **Anti**: Unmatched rows only (no inner results) * * ## Key Optimizations * * - **No temp copying**: Uses `(A⊎ΔA)⋈ΔB = A⋈ΔB ⊎ ΔA⋈ΔB` distributive property * - **Early-out checks**: Skip phases when no deltas present * - **Zero-entry pruning**: Keep maps compact, O(distinct keys) memory * - **Final presence logic**: Avoid emit→retract churn within same tick * * ## Correctness * * - **Ordering**: Pre-append snapshots for emissions, post-emit state updates * - **Presence**: Key matched iff mass ≠ 0, transitions trigger null handling * - **Bag semantics**: Proper multiplicity handling including negatives */ import { BinaryOperator, DifferenceStreamWriter } from '../graph.js' import { StreamBuilder } from '../d2.js' import { MultiSet } from '../multiset.js' import { Index } from '../indexes.js' import type { DifferenceStreamReader } from '../graph.js' import type { IStreamBuilder, KeyValue, PipedOperator } from '../types.js' /** * Type of join to perform */ export type JoinType = `inner` | `left` | `right` | `full` | `anti` /** * Operator that joins two input streams using direct join algorithms */ export class JoinOperator<K, V1, V2> extends BinaryOperator< [K, V1] | [K, V2] | [K, [V1, V2]] | [K, [V1 | null, V2 | null]] > { #indexA = new Index<K, V1>() #indexB = new Index<K, V2>() #mode: JoinType constructor( id: number, inputA: DifferenceStreamReader<[K, V1]>, inputB: DifferenceStreamReader<[K, V2]>, output: DifferenceStreamWriter<any>, mode: JoinType = `inner`, ) { super(id, inputA, inputB, output) this.#mode = mode } run(): void { // Build deltas from input messages const deltaA = Index.fromMultiSets<K, V1>( this.inputAMessages() as Array<MultiSet<[K, V1]>>, ) const deltaB = Index.fromMultiSets<K, V2>( this.inputBMessages() as Array<MultiSet<[K, V2]>>, ) // Early-out if nothing changed if (deltaA.size === 0 && deltaB.size === 0) return const results = new MultiSet<any>() // Emit inner results (all modes except anti) if (this.#mode !== `anti`) { this.emitInnerResults(deltaA, deltaB, results) } // Emit left outer/anti results if ( this.#mode === `left` || this.#mode === `full` || this.#mode === `anti` ) { this.emitLeftOuterResults(deltaA, deltaB, results) } // Emit right outer results if (this.#mode === `right` || this.#mode === `full`) { this.emitRightOuterResults(deltaA, deltaB, results) } // Update state and send results // IMPORTANT: All emissions use pre-append snapshots of indexA/indexB. // Now append ALL deltas to indices - this happens unconditionally for every key, // regardless of whether presence flipped. Consolidated multiplicity tracking is automatic. this.#indexA.append(deltaA) this.#indexB.append(deltaB) // Send results if (results.getInner().length > 0) { this.output.sendData(results) } } private emitInnerResults( deltaA: Index<K, V1>, deltaB: Index<K, V2>, results: MultiSet<any>, ): void { // Emit the three standard delta terms: ΔA⋈B_old, A_old⋈ΔB, ΔA⋈ΔB if (deltaA.size > 0) results.extend(deltaA.join(this.#indexB)) if (deltaB.size > 0) results.extend(this.#indexA.join(deltaB)) if (deltaA.size > 0 && deltaB.size > 0) results.extend(deltaA.join(deltaB)) } private emitLeftOuterResults( deltaA: Index<K, V1>, deltaB: Index<K, V2>, results: MultiSet<any>, ): void { // Emit unmatched left rows from deltaA if (deltaA.size > 0) { for (const [key, valueIterator] of deltaA.entriesIterators()) { const currentMultiplicityB = this.#indexB.getConsolidatedMultiplicity(key) const deltaMultiplicityB = deltaB.getConsolidatedMultiplicity(key) const finalMultiplicityB = currentMultiplicityB + deltaMultiplicityB if (finalMultiplicityB === 0) { for (const [value, multiplicity] of valueIterator) { if (multiplicity !== 0) { results.add([key, [value, null]], multiplicity) } } } } } // Handle presence transitions from right side changes if (deltaB.size > 0) { for (const key of deltaB.getPresenceKeys()) { const before = this.#indexB.getConsolidatedMultiplicity(key) const deltaMult = deltaB.getConsolidatedMultiplicity(key) if (deltaMult === 0) continue const after = before + deltaMult // Skip transition handling if presence doesn't flip (both zero or both non-zero) // Note: Index updates happen later regardless - we're only skipping null-extension emissions here if ((before === 0) === (after === 0)) continue // Determine the type of transition: // - 0 → non-zero: Right becomes non-empty, left rows transition from unmatched to matched // → RETRACT previously emitted null-extended rows (emit with negative multiplicity) // - non-zero → 0: Right becomes empty, left rows transition from matched to unmatched // → EMIT new null-extended rows (emit with positive multiplicity) const transitioningToMatched = before === 0 for (const [value, multiplicity] of this.#indexA.getIterator(key)) { if (multiplicity !== 0) { results.add( [key, [value, null]], transitioningToMatched ? -multiplicity : +multiplicity, ) } } } } } private emitRightOuterResults( deltaA: Index<K, V1>, deltaB: Index<K, V2>, results: MultiSet<any>, ): void { // Emit unmatched right rows from deltaB if (deltaB.size > 0) { for (const [key, valueIterator] of deltaB.entriesIterators()) { const currentMultiplicityA = this.#indexA.getConsolidatedMultiplicity(key) const deltaMultiplicityA = deltaA.getConsolidatedMultiplicity(key) const finalMultiplicityA = currentMultiplicityA + deltaMultiplicityA if (finalMultiplicityA === 0) { for (const [value, multiplicity] of valueIterator) { if (multiplicity !== 0) { results.add([key, [null, value]], multiplicity) } } } } } // Handle presence transitions from left side changes if (deltaA.size > 0) { for (const key of deltaA.getPresenceKeys()) { const before = this.#indexA.getConsolidatedMultiplicity(key) const deltaMult = deltaA.getConsolidatedMultiplicity(key) if (deltaMult === 0) continue const after = before + deltaMult // Skip transition handling if presence doesn't flip (both zero or both non-zero) // Note: Index updates happen later regardless - we're only skipping null-extension emissions here if ((before === 0) === (after === 0)) continue // Determine the type of transition: // - 0 → non-zero: Left becomes non-empty, right rows transition from unmatched to matched // → RETRACT previously emitted null-extended rows (emit with negative multiplicity) // - non-zero → 0: Left becomes empty, right rows transition from matched to unmatched // → EMIT new null-extended rows (emit with positive multiplicity) const transitioningToMatched = before === 0 for (const [value, multiplicity] of this.#indexB.getIterator(key)) { if (multiplicity !== 0) { results.add( [key, [null, value]], transitioningToMatched ? -multiplicity : +multiplicity, ) } } } } } } /** * Joins two input streams * @param other - The other stream to join with * @param type - The type of join to perform */ export function join< K, V1 extends T extends KeyValue<infer _KT, infer VT> ? VT : never, V2, T, >( other: IStreamBuilder<KeyValue<K, V2>>, type: JoinType = `inner`, ): PipedOperator<T, KeyValue<K, [V1 | null, V2 | null]>> { return ( stream: IStreamBuilder<T>, ): IStreamBuilder<KeyValue<K, [V1 | null, V2 | null]>> => { if (stream.graph !== other.graph) { throw new Error(`Cannot join streams from different graphs`) } const output = new StreamBuilder<KeyValue<K, [V1 | null, V2 | null]>>( stream.graph, new DifferenceStreamWriter<KeyValue<K, [V1 | null, V2 | null]>>(), ) const operator = new JoinOperator<K, V1, V2>( stream.graph.getNextOperatorId(), stream.connectReader() as DifferenceStreamReader<KeyValue<K, V1>>, other.connectReader(), output.writer, type, ) stream.graph.addOperator(operator) return output } } /** * Joins two input streams (inner join) * @param other - The other stream to join with */ export function innerJoin< K, V1 extends T extends KeyValue<infer _KT, infer VT> ? VT : never, V2, T, >( other: IStreamBuilder<KeyValue<K, V2>>, ): PipedOperator<T, KeyValue<K, [V1, V2]>> { return join(other, `inner`) as unknown as PipedOperator< T, KeyValue<K, [V1, V2]> > } /** * Joins two input streams (anti join) * @param other - The other stream to join with */ export function antiJoin< K, V1 extends T extends KeyValue<infer _KT, infer VT> ? VT : never, V2, T, >( other: IStreamBuilder<KeyValue<K, V2>>, ): PipedOperator<T, KeyValue<K, [V1, null]>> { return join(other, `anti`) as unknown as PipedOperator< T, KeyValue<K, [V1, null]> > } /** * Joins two input streams (left join) * @param other - The other stream to join with */ export function leftJoin< K, V1 extends T extends KeyValue<infer _KT, infer VT> ? VT : never, V2, T, >( other: IStreamBuilder<KeyValue<K, V2>>, ): PipedOperator<T, KeyValue<K, [V1, V2 | null]>> { return join(other, `left`) as unknown as PipedOperator< T, KeyValue<K, [V1, V2 | null]> > } /** * Joins two input streams (right join) * @param other - The other stream to join with */ export function rightJoin< K, V1 extends T extends KeyValue<infer _KT, infer VT> ? VT : never, V2, T, >( other: IStreamBuilder<KeyValue<K, V2>>, ): PipedOperator<T, KeyValue<K, [V1 | null, V2]>> { return join(other, `right`) as unknown as PipedOperator< T, KeyValue<K, [V1 | null, V2]> > } /** * Joins two input streams (full join) * @param other - The other stream to join with */ export function fullJoin< K, V1 extends T extends KeyValue<infer _KT, infer VT> ? VT : never, V2, T, >( other: IStreamBuilder<KeyValue<K, V2>>, ): PipedOperator<T, KeyValue<K, [V1 | null, V2 | null]>> { return join(other, `full`) as unknown as PipedOperator< T, KeyValue<K, [V1 | null, V2 | null]> > }