UNPKG

@electric-sql/d2ts

Version:

D2TS is a TypeScript implementation of Differential Dataflow.

297 lines (277 loc) 8.59 kB
import { IStreamBuilder, PipedOperator, DataMessage, MessageType, KeyValue, } from '../types.js' import { DifferenceStreamReader, DifferenceStreamWriter, BinaryOperator, } from '../graph.js' import { StreamBuilder } from '../d2.js' import { MultiSet } from '../multiset.js' import { Antichain, Version } from '../order.js' import { Index } from '../version-index.js' import { negate } from './negate.js' import { map } from './map.js' import { concat } from './concat.js' /** * Type of join to perform */ export type JoinType = 'inner' | 'left' | 'right' | 'full' | 'anti' /** * Operator that joins two input streams */ export class JoinOperator<K, V1, V2> extends BinaryOperator< [K, V1] | [K, V2] | [K, [V1, V2]] > { #indexA = new Index<K, V1>() #indexB = new Index<K, V2>() constructor( id: number, inputA: DifferenceStreamReader<[K, V1]>, inputB: DifferenceStreamReader<[K, V2]>, output: DifferenceStreamWriter<[K, [V1, V2]]>, initialFrontier: Antichain, ) { super(id, inputA, inputB, output, initialFrontier) } run(): void { const deltaA = new Index<K, V1>() const deltaB = new Index<K, V2>() // Process input A for (const message of this.inputAMessages()) { if (message.type === MessageType.DATA) { const { version, collection } = message.data as DataMessage<[K, V1]> for (const [item, multiplicity] of collection.getInner()) { const [key, value] = item deltaA.addValue(key, version, [value, multiplicity]) } } else if (message.type === MessageType.FRONTIER) { const frontier = message.data as Antichain if (!this.inputAFrontier().lessEqual(frontier)) { throw new Error('Invalid frontier update') } this.setInputAFrontier(frontier) } } // Process input B for (const message of this.inputBMessages()) { if (message.type === MessageType.DATA) { const { version, collection } = message.data as DataMessage<[K, V2]> for (const [item, multiplicity] of collection.getInner()) { const [key, value] = item deltaB.addValue(key, version, [value, multiplicity]) } } else if (message.type === MessageType.FRONTIER) { const frontier = message.data as Antichain if (!this.inputBFrontier().lessEqual(frontier)) { throw new Error('Invalid frontier update') } this.setInputBFrontier(frontier) } } // Process results const results = new Map<Version, MultiSet<[K, [V1, V2]]>>() // Join deltaA with existing indexB for (const [version, collection] of deltaA.join(this.#indexB)) { const existing = results.get(version) || new MultiSet<[K, [V1, V2]]>() existing.extend(collection) results.set(version, existing) } // Append deltaA to indexA this.#indexA.append(deltaA) // Join existing indexA with deltaB for (const [version, collection] of this.#indexA.join(deltaB)) { const existing = results.get(version) || new MultiSet<[K, [V1, V2]]>() existing.extend(collection) results.set(version, existing) } // Send results for (const [version, collection] of results) { this.output.sendData(version, collection) } // Append deltaB to indexB this.#indexB.append(deltaB) // Update frontiers const inputFrontier = this.inputAFrontier().meet(this.inputBFrontier()) if (!this.outputFrontier.lessEqual(inputFrontier)) { throw new Error('Invalid frontier state') } if (this.outputFrontier.lessThan(inputFrontier)) { this.outputFrontier = inputFrontier this.output.sendFrontier(this.outputFrontier) this.#indexA.compact(this.outputFrontier) this.#indexB.compact(this.outputFrontier) } } } /** * Joins two input streams * @param other - The other stream to join with * @param type - The type of join to perform */ export function join< K, V1 extends T extends KeyValue<infer _KT, infer VT> ? VT : never, V2, T, >( other: IStreamBuilder<KeyValue<K, V2>>, type: JoinType = 'inner', ): PipedOperator<T, KeyValue<K, [V1 | null, V2 | null]>> { switch (type) { case 'inner': return innerJoin(other) as PipedOperator<T, KeyValue<K, [V1, V2]>> case 'anti': return antiJoin(other) as PipedOperator<T, KeyValue<K, [V1, null]>> case 'left': return leftJoin(other) as PipedOperator<T, KeyValue<K, [V1, V2 | null]>> case 'right': return rightJoin(other) case 'full': return fullJoin(other) default: throw new Error(`Join type ${type} is invalid`) } } /** * Joins two input streams * @param other - The other stream to join with */ export function innerJoin< K, V1 extends T extends KeyValue<infer _KT, infer VT> ? VT : never, V2, T, >( other: IStreamBuilder<KeyValue<K, V2>>, ): PipedOperator<T, KeyValue<K, [V1, V2]>> { return (stream: IStreamBuilder<T>): IStreamBuilder<KeyValue<K, [V1, V2]>> => { if (stream.graph !== other.graph) { throw new Error('Cannot join streams from different graphs') } const output = new StreamBuilder<KeyValue<K, [V1, V2]>>( stream.graph, new DifferenceStreamWriter<KeyValue<K, [V1, V2]>>(), ) const operator = new JoinOperator<K, V1, V2>( stream.graph.getNextOperatorId(), stream.connectReader() as DifferenceStreamReader<KeyValue<K, V1>>, other.connectReader() as DifferenceStreamReader<KeyValue<K, V2>>, output.writer, stream.graph.frontier(), ) stream.graph.addOperator(operator) stream.graph.addStream(output.connectReader()) return output } } /** * Joins two input streams * @param other - The other stream to join with */ export function antiJoin< K, V1 extends T extends KeyValue<infer _KT, infer VT> ? VT : never, V2, T, >( other: IStreamBuilder<KeyValue<K, V2>>, ): PipedOperator<T, KeyValue<K, [V1, null]>> { return ( stream: IStreamBuilder<T>, ): IStreamBuilder<KeyValue<K, [V1, null]>> => { const matchedLeft = stream.pipe( innerJoin(other), map(([key, [valueLeft, _valueRight]]) => [key, valueLeft]), ) const anti = stream.pipe( concat(matchedLeft.pipe(negate())), // @ts-ignore TODO: fix this map(([key, value]) => [key, [value, null]]), ) return anti as IStreamBuilder<KeyValue<K, [V1, null]>> } } /** * Joins two input streams * @param other - The other stream to join with */ export function leftJoin< K, V1 extends T extends KeyValue<infer _KT, infer VT> ? VT : never, V2, T, >( other: IStreamBuilder<KeyValue<K, V2>>, ): PipedOperator<T, KeyValue<K, [V1, V2 | null]>> { return ( stream: IStreamBuilder<T>, ): IStreamBuilder<KeyValue<K, [V1, V2 | null]>> => { const left = stream const right = other const inner = left.pipe(innerJoin(right)) const anti = left.pipe(antiJoin(right)) return inner.pipe(concat(anti)) as IStreamBuilder< KeyValue<K, [V1, V2 | null]> > } } /** * Joins two input streams * @param other - The other stream to join with */ export function rightJoin< K, V1 extends T extends KeyValue<infer _KT, infer VT> ? VT : never, V2, T, >( other: IStreamBuilder<KeyValue<K, V2>>, ): PipedOperator<T, KeyValue<K, [V1 | null, V2]>> { return ( stream: IStreamBuilder<T>, ): IStreamBuilder<KeyValue<K, [V1 | null, V2]>> => { const left = stream as IStreamBuilder<KeyValue<K, V1>> const right = other const inner = left.pipe(innerJoin(right)) const anti = right.pipe( antiJoin(left), map(([key, [a, b]]) => [key, [b, a]]), ) return inner.pipe(concat(anti)) as IStreamBuilder< KeyValue<K, [V1 | null, V2]> > } } /** * Joins two input streams * @param other - The other stream to join with */ export function fullJoin< K, V1 extends T extends KeyValue<infer _KT, infer VT> ? VT : never, V2, T, >( other: IStreamBuilder<KeyValue<K, V2>>, ): PipedOperator<T, KeyValue<K, [V1 | null, V2 | null]>> { return ( stream: IStreamBuilder<T>, ): IStreamBuilder<KeyValue<K, [V1 | null, V2 | null]>> => { const left = stream as IStreamBuilder<KeyValue<K, V1>> const right = other const inner = left.pipe(innerJoin(right)) const antiLeft = left.pipe(antiJoin(right)) const antiRight = right.pipe( antiJoin(left), map(([key, [a, b]]) => [key, [b, a]]), ) return inner.pipe(concat(antiLeft), concat(antiRight)) as IStreamBuilder< KeyValue<K, [V1 | null, V2 | null]> > } }