@tanstack/db-ivm
Version:
Incremental View Maintenance for TanStack DB based on Differential Dataflow
308 lines (276 loc) • 10.3 kB
text/typescript
import { generateKeyBetween } from "fractional-indexing"
import { DifferenceStreamWriter } from "../graph.js"
import { StreamBuilder } from "../d2.js"
import {
TopKWithFractionalIndexOperator,
getIndex,
getValue,
indexedValue,
} from "./topKWithFractionalIndex.js"
import type { IStreamBuilder, PipedOperator } from "../types.js"
import type {
IndexedValue,
TaggedValue,
TopK,
TopKChanges,
TopKWithFractionalIndexOptions,
} from "./topKWithFractionalIndex.js"
interface BTree<Key, Value> {
nextLowerPair: (key: Key) => [Key, Value] | undefined
nextHigherPair: (key: Key) => [Key, Value] | undefined
set: (key: Key, value: Value, overwrite?: boolean) => boolean
maxKey: () => Key | undefined
get: (key: Key, defaultValue?: Value) => Value | undefined
delete: (key: Key) => boolean
size: number
}
interface BTreeClass {
new <Key, Value>(
entries?: Array<[Key, Value]>,
compare?: (a: Key, b: Key) => number,
maxNodeSize?: number
): BTree<Key, Value>
}
let BTree: BTreeClass | undefined
export async function loadBTree() {
if (BTree === undefined) {
const { default: _BTreeClass } = await import(`sorted-btree`)
BTree = _BTreeClass
}
}
/**
* Implementation of a topK data structure that uses a B+ tree.
* The tree allows for logarithmic time insertions and deletions.
*/
class TopKTree<V> implements TopK<V> {
#comparator: (a: V, b: V) => number
// topK is a window at position [topKStart, topKEnd[
// i.e. `topKStart` is inclusive and `topKEnd` is exclusive
#topKStart: number
#topKEnd: number
#tree: BTree<V, IndexedValue<V>>
#topKFirstElem: IndexedValue<V> | null = null // inclusive
#topKLastElem: IndexedValue<V> | null = null // inclusive
constructor(
offset: number,
limit: number,
comparator: (a: V, b: V) => number
) {
if (BTree === undefined) {
throw new Error(
`B+ tree not loaded. You need to call loadBTree() before using TopKTree.`
)
}
this.#topKStart = offset
this.#topKEnd = offset + limit
this.#comparator = comparator
this.#tree = new BTree(undefined, comparator)
}
get size(): number {
const offset = this.#topKStart
const limit = this.#topKEnd - this.#topKStart
const available = this.#tree.size - offset
return Math.max(0, Math.min(limit, available))
}
/**
* Insert a *new* value.
* Ignores the value if it is already present.
*/
insert(value: V): TopKChanges<V> {
const result: TopKChanges<V> = { moveIn: null, moveOut: null }
// Get the elements before and after the value
const [, indexedValueBefore] = this.#tree.nextLowerPair(value) ?? [
null,
null,
]
const [, indexedValueAfter] = this.#tree.nextHigherPair(value) ?? [
null,
null,
]
const indexBefore = indexedValueBefore ? getIndex(indexedValueBefore) : null
const indexAfter = indexedValueAfter ? getIndex(indexedValueAfter) : null
// Generate a fractional index for the value
// based on the fractional indices of the elements before and after it
const fractionalIndex = generateKeyBetween(indexBefore, indexAfter)
const insertedElem = indexedValue(value, fractionalIndex)
// Insert the value into the tree
const inserted = this.#tree.set(value, insertedElem, false)
if (!inserted) {
// The value was already present in the tree
// ignore this insertions since we don't support overwrites!
return result
}
if (this.#tree.size - 1 < this.#topKStart) {
// We don't have a topK yet
// so we don't need to do anything
return result
}
if (this.#topKFirstElem) {
// We have a topK containing at least 1 element
if (this.#comparator(value, getValue(this.#topKFirstElem)) < 0) {
// The element was inserted before the topK
// so it moves the element that is right before the topK into the topK
const firstElem = getValue(this.#topKFirstElem)
const [, newFirstElem] = this.#tree.nextLowerPair(firstElem)!
this.#topKFirstElem = newFirstElem
result.moveIn = this.#topKFirstElem
} else if (
!this.#topKLastElem ||
this.#comparator(value, getValue(this.#topKLastElem)) < 0
) {
// The element was inserted within the topK
result.moveIn = insertedElem
}
if (
this.#topKLastElem &&
this.#comparator(value, getValue(this.#topKLastElem)) < 0
) {
// The element was inserted before or within the topK
// the newly inserted element pushes the last element of the topK out of the topK
// so the one before that becomes the new last element of the topK
const lastElem = this.#topKLastElem
const lastValue = getValue(lastElem)
const [, newLastElem] = this.#tree.nextLowerPair(lastValue)!
this.#topKLastElem = newLastElem
result.moveOut = lastElem
}
}
// If the tree has as many elements as the offset (i.e. #topKStart)
// then the insertion shifted the elements 1 position to the right
// and the last element in the tree is now the first element of the topK
if (this.#tree.size - 1 === this.#topKStart) {
const topKFirstKey = this.#tree.maxKey()!
this.#topKFirstElem = this.#tree.get(topKFirstKey)!
result.moveIn = this.#topKFirstElem
}
// By inserting this new element we now have a complete topK
// store the last element of the topK
if (this.#tree.size === this.#topKEnd) {
const topKLastKey = this.#tree.maxKey()!
this.#topKLastElem = this.#tree.get(topKLastKey)!
}
return result
}
delete(value: V): TopKChanges<V> {
const result: TopKChanges<V> = { moveIn: null, moveOut: null }
const deletedElem = this.#tree.get(value)
const deleted = this.#tree.delete(value)
if (!deleted) {
return result
}
if (!this.#topKFirstElem) {
// We didn't have a topK before the delete
// so we still can't have a topK after the delete
return result
}
if (this.#comparator(value, getValue(this.#topKFirstElem)) < 0) {
// We deleted an element that was before the topK
// so the topK has shifted one position to the left
// the old first element moves out of the topK
result.moveOut = this.#topKFirstElem
// the element that was right after the first element of the topK
// is now the new first element of the topK
const firstElem = getValue(this.#topKFirstElem)
const [, newFirstElem] = this.#tree.nextHigherPair(firstElem) ?? [
null,
null,
]
this.#topKFirstElem = newFirstElem
} else if (
!this.#topKLastElem ||
// TODO: if on equal order the element is inserted *after* the already existing one
// then this check should become < 0
this.#comparator(value, getValue(this.#topKLastElem)) <= 0
) {
// The element we deleted was within the topK
// so we need to signal that that element is no longer in the topK
result.moveOut = deletedElem!
}
if (
this.#topKLastElem &&
// TODO: if on equal order the element is inserted *after* the already existing one
// then this check should become < 0
this.#comparator(value, getValue(this.#topKLastElem)) <= 0
) {
// The element we deleted was before or within the topK
// So the first element after the topK moved one position to the left
// and thus falls into the topK now
const lastElem = this.#topKLastElem
const lastValue = getValue(lastElem)
const [, newLastElem] = this.#tree.nextHigherPair(lastValue) ?? [
null,
null,
]
this.#topKLastElem = newLastElem
if (newLastElem) {
result.moveIn = newLastElem
}
}
return result
}
}
/**
* Operator for fractional indexed topK operations
* This operator maintains fractional indices for sorted elements
* and only updates indices when elements move position
*/
export class TopKWithFractionalIndexBTreeOperator<
K,
T,
> extends TopKWithFractionalIndexOperator<K, T> {
protected override createTopK(
offset: number,
limit: number,
comparator: (a: TaggedValue<K, T>, b: TaggedValue<K, T>) => number
): TopK<TaggedValue<K, T>> {
if (BTree === undefined) {
throw new Error(
`B+ tree not loaded. You need to call loadBTree() before using TopKWithFractionalIndexBTreeOperator.`
)
}
return new TopKTree(offset, limit, comparator)
}
}
/**
* Limits the number of results based on a comparator, with optional offset.
* This works on a keyed stream, where the key is the first element of the tuple.
* The ordering is within a key group, i.e. elements are sorted within a key group
* and the limit + offset is applied to that sorted group.
* To order the entire stream, key by the same value for all elements such as null.
*
* Uses fractional indexing to minimize the number of changes when elements move positions.
* Each element is assigned a fractional index that is lexicographically sortable.
* When elements move, only the indices of the moved elements are updated, not all elements.
*
* @param comparator - A function that compares two elements
* @param options - An optional object containing limit and offset properties
* @returns A piped operator that orders the elements and limits the number of results
*/
export function topKWithFractionalIndexBTree<KType, T>(
comparator: (a: T, b: T) => number,
options?: TopKWithFractionalIndexOptions
): PipedOperator<[KType, T], [KType, IndexedValue<T>]> {
const opts = options || {}
if (BTree === undefined) {
throw new Error(
`B+ tree not loaded. You need to call loadBTree() before using topKWithFractionalIndexBTree.`
)
}
return (
stream: IStreamBuilder<[KType, T]>
): IStreamBuilder<[KType, IndexedValue<T>]> => {
const output = new StreamBuilder<[KType, IndexedValue<T>]>(
stream.graph,
new DifferenceStreamWriter<[KType, IndexedValue<T>]>()
)
const operator = new TopKWithFractionalIndexBTreeOperator<KType, T>(
stream.graph.getNextOperatorId(),
stream.connectReader(),
output.writer,
comparator,
opts
)
stream.graph.addOperator(operator)
return output
}
}