UNPKG

@electric-sql/d2mini

Version:

D2Mini is a minimal implementation of Differential Dataflow for performing in-memory incremental view maintenance.

307 lines (276 loc) 10.2 kB
import { IStreamBuilder, KeyValue, PipedOperator } from '../types.js' import { DifferenceStreamReader, DifferenceStreamWriter } from '../graph.js' import { StreamBuilder } from '../d2.js' import { generateKeyBetween } from 'fractional-indexing' import { getIndex, getValue, TaggedValue, indexedValue, IndexedValue, TopK, TopKChanges, TopKWithFractionalIndexOperator, TopKWithFractionalIndexOptions, } from './topKWithFractionalIndex.js' interface BTree<Key, Value> { nextLowerPair(key: Key): [Key, Value] | undefined nextHigherPair(key: Key): [Key, Value] | undefined set(key: Key, value: Value, overwrite?: boolean): boolean maxKey(): Key | undefined get(key: Key, defaultValue?: Value): Value | undefined delete(key: Key): boolean size: number } interface BTreeClass { new <Key, Value>( entries?: [Key, Value][], compare?: (a: Key, b: Key) => number, maxNodeSize?: number, ): BTree<Key, Value> } let BTree: BTreeClass export async function loadBTree() { if (!BTree) { const { default: BTreeClass } = await import('sorted-btree') BTree = BTreeClass } } /** * Implementation of a topK data structure that uses a B+ tree. * The tree allows for logarithmic time insertions and deletions. */ class TopKTree<V> implements TopK<V> { #comparator: (a: V, b: V) => number // topK is a window at position [topKStart, topKEnd[ // i.e. `topKStart` is inclusive and `topKEnd` is exclusive #topKStart: number #topKEnd: number #tree: BTree<V, IndexedValue<V>> #topKFirstElem: IndexedValue<V> | null = null // inclusive #topKLastElem: IndexedValue<V> | null = null // inclusive constructor( offset: number, limit: number, comparator: (a: V, b: V) => number, ) { if (!BTree) { throw new Error( 'B+ tree not loaded. You need to call loadBTree() before using TopKTree.', ) } this.#topKStart = offset this.#topKEnd = offset + limit this.#comparator = comparator this.#tree = new BTree(undefined, comparator) } /** * Insert a *new* value. * Ignores the value if it is already present. */ insert(value: V): TopKChanges<V> { let result: TopKChanges<V> = { moveIn: null, moveOut: null } // Get the elements before and after the value const [, indexedValueBefore] = this.#tree.nextLowerPair(value) ?? [ null, null, ] const [, indexedValueAfter] = this.#tree.nextHigherPair(value) ?? [ null, null, ] const indexBefore = indexedValueBefore ? getIndex(indexedValueBefore) : null const indexAfter = indexedValueAfter ? getIndex(indexedValueAfter) : null // Generate a fractional index for the value // based on the fractional indices of the elements before and after it const fractionalIndex = generateKeyBetween(indexBefore, indexAfter) const insertedElem = indexedValue(value, fractionalIndex) // Insert the value into the tree const inserted = this.#tree.set(value, insertedElem, false) if (!inserted) { // The value was already present in the tree // ignore this insertions since we don't support overwrites! return result } if (this.#tree.size - 1 < this.#topKStart) { // We don't have a topK yet // so we don't need to do anything return result } if (this.#topKFirstElem) { // We have a topK containing at least 1 element if (this.#comparator(value, getValue(this.#topKFirstElem)) < 0) { // The element was inserted before the topK // so it moves the element that is right before the topK into the topK const firstElem = getValue(this.#topKFirstElem) const [, newFirstElem] = this.#tree.nextLowerPair(firstElem)! this.#topKFirstElem = newFirstElem result.moveIn = this.#topKFirstElem } else if ( !this.#topKLastElem || this.#comparator(value, getValue(this.#topKLastElem)) < 0 ) { // The element was inserted within the topK result.moveIn = insertedElem } if ( this.#topKLastElem && this.#comparator(value, getValue(this.#topKLastElem)) < 0 ) { // The element was inserted before or within the topK // the newly inserted element pushes the last element of the topK out of the topK // so the one before that becomes the new last element of the topK const lastElem = this.#topKLastElem const lastValue = getValue(lastElem) const [, newLastElem] = this.#tree.nextLowerPair(lastValue)! this.#topKLastElem = newLastElem result.moveOut = lastElem } } // If the tree has as many elements as the offset (i.e. #topKStart) // then the insertion shifted the elements 1 position to the right // and the last element in the tree is now the first element of the topK if (this.#tree.size - 1 === this.#topKStart) { const topKFirstKey = this.#tree.maxKey()! this.#topKFirstElem = this.#tree.get(topKFirstKey)! result.moveIn = this.#topKFirstElem } // By inserting this new element we now have a complete topK // store the last element of the topK if (this.#tree.size === this.#topKEnd) { const topKLastKey = this.#tree.maxKey()! this.#topKLastElem = this.#tree.get(topKLastKey)! } return result } delete(value: V): TopKChanges<V> { let result: TopKChanges<V> = { moveIn: null, moveOut: null } const deletedElem = this.#tree.get(value) const deleted = this.#tree.delete(value) if (!deleted) { return result } if (!this.#topKFirstElem) { // We didn't have a topK before the delete // so we still can't have a topK after the delete return result } if (this.#comparator(value, getValue(this.#topKFirstElem)) < 0) { // We deleted an element that was before the topK // so the topK has shifted one position to the left // the old first element moves out of the topK result.moveOut = this.#topKFirstElem // the element that was right after the first element of the topK // is now the new first element of the topK const firstElem = getValue(this.#topKFirstElem) const [, newFirstElem] = this.#tree.nextHigherPair(firstElem) ?? [ null, null, ] this.#topKFirstElem = newFirstElem } else if ( !this.#topKLastElem || // TODO: if on equal order the element is inserted *after* the already existing one // then this check should become < 0 this.#comparator(value, getValue(this.#topKLastElem)) <= 0 ) { // The element we deleted was within the topK // so we need to signal that that element is no longer in the topK result.moveOut = deletedElem! } if ( this.#topKLastElem && // TODO: if on equal order the element is inserted *after* the already existing one // then this check should become < 0 this.#comparator(value, getValue(this.#topKLastElem)) <= 0 ) { // The element we deleted was before or within the topK // So the first element after the topK moved one position to the left // and thus falls into the topK now const lastElem = this.#topKLastElem const lastValue = getValue(lastElem) const [, newLastElem] = this.#tree.nextHigherPair(lastValue) ?? [ null, null, ] this.#topKLastElem = newLastElem if (newLastElem) { result.moveIn = newLastElem } } return result } } /** * Operator for fractional indexed topK operations * This operator maintains fractional indices for sorted elements * and only updates indices when elements move position */ export class TopKWithFractionalIndexBTreeOperator< K, V1, > extends TopKWithFractionalIndexOperator<K, V1> { protected override createTopK( offset: number, limit: number, comparator: ( a: TaggedValue<V1>, b: TaggedValue<V1>, ) => number, ): TopK<TaggedValue<V1>> { if (!BTree) { throw new Error( 'B+ tree not loaded. You need to call loadBTree() before using TopKWithFractionalIndexBTreeOperator.', ) } return new TopKTree(offset, limit, comparator) } } /** * Limits the number of results based on a comparator, with optional offset. * This works on a keyed stream, where the key is the first element of the tuple. * The ordering is within a key group, i.e. elements are sorted within a key group * and the limit + offset is applied to that sorted group. * To order the entire stream, key by the same value for all elements such as null. * * Uses fractional indexing to minimize the number of changes when elements move positions. * Each element is assigned a fractional index that is lexicographically sortable. * When elements move, only the indices of the moved elements are updated, not all elements. * * @param comparator - A function that compares two elements * @param options - An optional object containing limit and offset properties * @returns A piped operator that orders the elements and limits the number of results */ export function topKWithFractionalIndexBTree< K extends T extends KeyValue<infer K, infer _V> ? K : never, V1 extends T extends KeyValue<K, infer V> ? V : never, T, >( comparator: (a: V1, b: V1) => number, options?: TopKWithFractionalIndexOptions, ): PipedOperator<T, KeyValue<K, [V1, string]>> { const opts = options || {} if (!BTree) { throw new Error( 'B+ tree not loaded. You need to call loadBTree() before using topKWithFractionalIndexBTree.', ) } return ( stream: IStreamBuilder<T>, ): IStreamBuilder<KeyValue<K, [V1, string]>> => { const output = new StreamBuilder<KeyValue<K, [V1, string]>>( stream.graph, new DifferenceStreamWriter<KeyValue<K, [V1, string]>>(), ) const operator = new TopKWithFractionalIndexOperator<K, V1>( stream.graph.getNextOperatorId(), stream.connectReader() as DifferenceStreamReader<KeyValue<K, V1>>, output.writer, comparator, opts, ) stream.graph.addOperator(operator) stream.graph.addStream(output.connectReader()) return output } }