UNPKG

@electric-sql/d2mini

Version:

D2Mini is a minimal implementation of Differential Dataflow for performing in-memory incremental view maintenance.

237 lines 10.9 kB
import { DifferenceStreamWriter, UnaryOperator, } from '../graph.js'; import { StreamBuilder } from '../d2.js'; import { MultiSet } from '../multiset.js'; import { Index } from '../indexes.js'; import { generateKeyBetween } from 'fractional-indexing'; import { binarySearch } from '../utils.js'; import { globalObjectIdGenerator } from '../utils.js'; /** * Implementation of a topK data structure. * Uses a sorted array internally to store the values and keeps a topK window over that array. * Inserts and deletes are O(n) operations because worst case an element is inserted/deleted * at the start of the array which causes all the elements to shift to the right/left. */ class TopKArray { #sortedValues = []; #comparator; #topKStart; #topKEnd; constructor(offset, limit, comparator) { this.#topKStart = offset; this.#topKEnd = offset + limit; this.#comparator = comparator; } insert(value) { let result = { moveIn: null, moveOut: null }; // Lookup insert position const index = this.#findIndex(value); // Generate fractional index based on the fractional indices of the elements before and after it const indexBefore = index === 0 ? null : getIndex(this.#sortedValues[index - 1]); const indexAfter = index === this.#sortedValues.length ? null : getIndex(this.#sortedValues[index]); const fractionalIndex = generateKeyBetween(indexBefore, indexAfter); // Insert the value at the correct position const val = indexedValue(value, fractionalIndex); // Splice is O(n) where n = all elements in the collection (i.e. n >= k) ! this.#sortedValues.splice(index, 0, val); // Check if the topK changed if (index < this.#topKEnd) { // The inserted element is either before the top K or within the top K // If it is before the top K then it moves the element that was right before the topK into the topK // If it is within the top K then the inserted element moves into the top K // In both cases the last element of the old top K now moves out of the top K const moveInIndex = Math.max(index, this.#topKStart); if (moveInIndex < this.#sortedValues.length) { // We actually have a topK // because in some cases there may not be enough elements in the array to reach the start of the topK // e.g. [1, 2, 3] with K = 2 and offset = 3 does not have a topK result.moveIn = this.#sortedValues[moveInIndex]; // We need to remove the element that falls out of the top K // The element that falls out of the top K has shifted one to the right // because of the element we inserted, so we find it at index topKEnd if (this.#topKEnd < this.#sortedValues.length) { result.moveOut = this.#sortedValues[this.#topKEnd]; } } } return result; } /** * Deletes a value that may or may not be in the topK. * IMPORTANT: this assumes that the value is present in the collection * if it's not the case it will remove the element * that is on the position where the provided `value` would be. */ delete(value) { let result = { moveIn: null, moveOut: null }; // Lookup delete position const index = this.#findIndex(value); // Remove the value at that position const [removedElem] = this.#sortedValues.splice(index, 1); // Check if the topK changed if (index < this.#topKEnd) { // The removed element is either before the top K or within the top K // If it is before the top K then the first element of the topK moves out of the topK // If it is within the top K then the removed element moves out of the topK result.moveOut = removedElem; if (index < this.#topKStart) { // The removed element is before the topK // so actually, the first element of the topK moves out of the topK // and not the element that we removed // The first element of the topK is now at index topKStart - 1 // since we removed an element before the topK const moveOutIndex = this.#topKStart - 1; if (moveOutIndex < this.#sortedValues.length) { result.moveOut = this.#sortedValues[moveOutIndex]; } else { // No value is moving out of the topK // because there are no elements in the topK result.moveOut = null; } } // Since we removed an element that was before or in the topK // the first element after the topK moved one position to the left // and thus falls into the topK now const moveInIndex = this.#topKEnd - 1; if (moveInIndex < this.#sortedValues.length) { result.moveIn = this.#sortedValues[moveInIndex]; } } return result; } // TODO: see if there is a way to refactor the code for insert and delete in the topK above // because they are very similar, one is shifting the topK window to the left and the other is shifting it to the right // so i have the feeling there is a common pattern here and we can implement both cases using that pattern #findIndex(value) { return binarySearch(this.#sortedValues, indexedValue(value, ''), (a, b) => this.#comparator(getValue(a), getValue(b))); } } /** * Operator for fractional indexed topK operations * This operator maintains fractional indices for sorted elements * and only updates indices when elements move position */ export class TopKWithFractionalIndexOperator extends UnaryOperator { #index = new Index(); /** * topK data structure that supports insertions and deletions * and returns changes to the topK. */ #topK; constructor(id, inputA, output, comparator, options) { super(id, inputA, output); const limit = options.limit ?? Infinity; const offset = options.offset ?? 0; const compareTaggedValues = (a, b) => { // First compare on the value const valueComparison = comparator(untagValue(a), untagValue(b)); if (valueComparison !== 0) { return valueComparison; } // If the values are equal, compare on the tag (object identity) const tieBreakerA = getTag(a); const tieBreakerB = getTag(b); return tieBreakerA - tieBreakerB; }; this.#topK = this.createTopK(offset, limit, compareTaggedValues); } createTopK(offset, limit, comparator) { return new TopKArray(offset, limit, comparator); } run() { const result = []; for (const message of this.inputMessages()) { for (const [item, multiplicity] of message.getInner()) { const [key, value] = item; this.processElement(key, value, multiplicity, result); } } if (result.length > 0) { this.output.sendData(new MultiSet(result)); } } processElement(key, value, multiplicity, result) { const oldMultiplicity = this.#index.getMultiplicity(key, value); this.#index.addValue(key, [value, multiplicity]); const newMultiplicity = this.#index.getMultiplicity(key, value); let res = { moveIn: null, moveOut: null, }; if (oldMultiplicity <= 0 && newMultiplicity > 0) { // The value was invisible but should now be visible // Need to insert it into the array of sorted values const taggedValue = tagValue(value); res = this.#topK.insert(taggedValue); } else if (oldMultiplicity > 0 && newMultiplicity <= 0) { // The value was visible but should now be invisible // Need to remove it from the array of sorted values const taggedValue = tagValue(value); res = this.#topK.delete(taggedValue); } else { // The value was invisible and it remains invisible // or it was visible and remains visible // so it doesn't affect the topK } if (res.moveIn) { const valueWithoutTieBreaker = mapValue(res.moveIn, untagValue); result.push([[key, valueWithoutTieBreaker], 1]); } if (res.moveOut) { const valueWithoutTieBreaker = mapValue(res.moveOut, untagValue); result.push([[key, valueWithoutTieBreaker], -1]); } return; } } /** * Limits the number of results based on a comparator, with optional offset. * This works on a keyed stream, where the key is the first element of the tuple. * The ordering is within a key group, i.e. elements are sorted within a key group * and the limit + offset is applied to that sorted group. * To order the entire stream, key by the same value for all elements such as null. * * Uses fractional indexing to minimize the number of changes when elements move positions. * Each element is assigned a fractional index that is lexicographically sortable. * When elements move, only the indices of the moved elements are updated, not all elements. * * @param comparator - A function that compares two elements * @param options - An optional object containing limit and offset properties * @returns A piped operator that orders the elements and limits the number of results */ export function topKWithFractionalIndex(comparator, options) { const opts = options || {}; return (stream) => { const output = new StreamBuilder(stream.graph, new DifferenceStreamWriter()); const operator = new TopKWithFractionalIndexOperator(stream.graph.getNextOperatorId(), stream.connectReader(), output.writer, comparator, opts); stream.graph.addOperator(operator); stream.graph.addStream(output.connectReader()); return output; }; } export function indexedValue(value, index) { return [value, index]; } export function getValue(indexedValue) { return indexedValue[0]; } export function getIndex(indexedValue) { return indexedValue[1]; } function mapValue(value, f) { return [f(getValue(value)), getIndex(value)]; } function tagValue(value) { return [value, globalObjectIdGenerator.getId(value)]; } function untagValue(tieBreakerTaggedValue) { return tieBreakerTaggedValue[0]; } function getTag(tieBreakerTaggedValue) { return tieBreakerTaggedValue[1]; } //# sourceMappingURL=topKWithFractionalIndex.js.map