@electric-sql/d2mini
Version:
D2Mini is a minimal implementation of Differential Dataflow for performing in-memory incremental view maintenance.
237 lines • 10.9 kB
JavaScript
import { DifferenceStreamWriter, UnaryOperator, } from '../graph.js';
import { StreamBuilder } from '../d2.js';
import { MultiSet } from '../multiset.js';
import { Index } from '../indexes.js';
import { generateKeyBetween } from 'fractional-indexing';
import { binarySearch } from '../utils.js';
import { globalObjectIdGenerator } from '../utils.js';
/**
* Implementation of a topK data structure.
* Uses a sorted array internally to store the values and keeps a topK window over that array.
* Inserts and deletes are O(n) operations because worst case an element is inserted/deleted
* at the start of the array which causes all the elements to shift to the right/left.
*/
class TopKArray {
#sortedValues = [];
#comparator;
#topKStart;
#topKEnd;
constructor(offset, limit, comparator) {
this.#topKStart = offset;
this.#topKEnd = offset + limit;
this.#comparator = comparator;
}
insert(value) {
let result = { moveIn: null, moveOut: null };
// Lookup insert position
const index = this.#findIndex(value);
// Generate fractional index based on the fractional indices of the elements before and after it
const indexBefore = index === 0 ? null : getIndex(this.#sortedValues[index - 1]);
const indexAfter = index === this.#sortedValues.length
? null
: getIndex(this.#sortedValues[index]);
const fractionalIndex = generateKeyBetween(indexBefore, indexAfter);
// Insert the value at the correct position
const val = indexedValue(value, fractionalIndex);
// Splice is O(n) where n = all elements in the collection (i.e. n >= k) !
this.#sortedValues.splice(index, 0, val);
// Check if the topK changed
if (index < this.#topKEnd) {
// The inserted element is either before the top K or within the top K
// If it is before the top K then it moves the element that was right before the topK into the topK
// If it is within the top K then the inserted element moves into the top K
// In both cases the last element of the old top K now moves out of the top K
const moveInIndex = Math.max(index, this.#topKStart);
if (moveInIndex < this.#sortedValues.length) {
// We actually have a topK
// because in some cases there may not be enough elements in the array to reach the start of the topK
// e.g. [1, 2, 3] with K = 2 and offset = 3 does not have a topK
result.moveIn = this.#sortedValues[moveInIndex];
// We need to remove the element that falls out of the top K
// The element that falls out of the top K has shifted one to the right
// because of the element we inserted, so we find it at index topKEnd
if (this.#topKEnd < this.#sortedValues.length) {
result.moveOut = this.#sortedValues[this.#topKEnd];
}
}
}
return result;
}
/**
* Deletes a value that may or may not be in the topK.
* IMPORTANT: this assumes that the value is present in the collection
* if it's not the case it will remove the element
* that is on the position where the provided `value` would be.
*/
delete(value) {
let result = { moveIn: null, moveOut: null };
// Lookup delete position
const index = this.#findIndex(value);
// Remove the value at that position
const [removedElem] = this.#sortedValues.splice(index, 1);
// Check if the topK changed
if (index < this.#topKEnd) {
// The removed element is either before the top K or within the top K
// If it is before the top K then the first element of the topK moves out of the topK
// If it is within the top K then the removed element moves out of the topK
result.moveOut = removedElem;
if (index < this.#topKStart) {
// The removed element is before the topK
// so actually, the first element of the topK moves out of the topK
// and not the element that we removed
// The first element of the topK is now at index topKStart - 1
// since we removed an element before the topK
const moveOutIndex = this.#topKStart - 1;
if (moveOutIndex < this.#sortedValues.length) {
result.moveOut = this.#sortedValues[moveOutIndex];
}
else {
// No value is moving out of the topK
// because there are no elements in the topK
result.moveOut = null;
}
}
// Since we removed an element that was before or in the topK
// the first element after the topK moved one position to the left
// and thus falls into the topK now
const moveInIndex = this.#topKEnd - 1;
if (moveInIndex < this.#sortedValues.length) {
result.moveIn = this.#sortedValues[moveInIndex];
}
}
return result;
}
// TODO: see if there is a way to refactor the code for insert and delete in the topK above
// because they are very similar, one is shifting the topK window to the left and the other is shifting it to the right
// so i have the feeling there is a common pattern here and we can implement both cases using that pattern
#findIndex(value) {
return binarySearch(this.#sortedValues, indexedValue(value, ''), (a, b) => this.#comparator(getValue(a), getValue(b)));
}
}
/**
* Operator for fractional indexed topK operations
* This operator maintains fractional indices for sorted elements
* and only updates indices when elements move position
*/
export class TopKWithFractionalIndexOperator extends UnaryOperator {
#index = new Index();
/**
* topK data structure that supports insertions and deletions
* and returns changes to the topK.
*/
#topK;
constructor(id, inputA, output, comparator, options) {
super(id, inputA, output);
const limit = options.limit ?? Infinity;
const offset = options.offset ?? 0;
const compareTaggedValues = (a, b) => {
// First compare on the value
const valueComparison = comparator(untagValue(a), untagValue(b));
if (valueComparison !== 0) {
return valueComparison;
}
// If the values are equal, compare on the tag (object identity)
const tieBreakerA = getTag(a);
const tieBreakerB = getTag(b);
return tieBreakerA - tieBreakerB;
};
this.#topK = this.createTopK(offset, limit, compareTaggedValues);
}
createTopK(offset, limit, comparator) {
return new TopKArray(offset, limit, comparator);
}
run() {
const result = [];
for (const message of this.inputMessages()) {
for (const [item, multiplicity] of message.getInner()) {
const [key, value] = item;
this.processElement(key, value, multiplicity, result);
}
}
if (result.length > 0) {
this.output.sendData(new MultiSet(result));
}
}
processElement(key, value, multiplicity, result) {
const oldMultiplicity = this.#index.getMultiplicity(key, value);
this.#index.addValue(key, [value, multiplicity]);
const newMultiplicity = this.#index.getMultiplicity(key, value);
let res = {
moveIn: null,
moveOut: null,
};
if (oldMultiplicity <= 0 && newMultiplicity > 0) {
// The value was invisible but should now be visible
// Need to insert it into the array of sorted values
const taggedValue = tagValue(value);
res = this.#topK.insert(taggedValue);
}
else if (oldMultiplicity > 0 && newMultiplicity <= 0) {
// The value was visible but should now be invisible
// Need to remove it from the array of sorted values
const taggedValue = tagValue(value);
res = this.#topK.delete(taggedValue);
}
else {
// The value was invisible and it remains invisible
// or it was visible and remains visible
// so it doesn't affect the topK
}
if (res.moveIn) {
const valueWithoutTieBreaker = mapValue(res.moveIn, untagValue);
result.push([[key, valueWithoutTieBreaker], 1]);
}
if (res.moveOut) {
const valueWithoutTieBreaker = mapValue(res.moveOut, untagValue);
result.push([[key, valueWithoutTieBreaker], -1]);
}
return;
}
}
/**
* Limits the number of results based on a comparator, with optional offset.
* This works on a keyed stream, where the key is the first element of the tuple.
* The ordering is within a key group, i.e. elements are sorted within a key group
* and the limit + offset is applied to that sorted group.
* To order the entire stream, key by the same value for all elements such as null.
*
* Uses fractional indexing to minimize the number of changes when elements move positions.
* Each element is assigned a fractional index that is lexicographically sortable.
* When elements move, only the indices of the moved elements are updated, not all elements.
*
* @param comparator - A function that compares two elements
* @param options - An optional object containing limit and offset properties
* @returns A piped operator that orders the elements and limits the number of results
*/
export function topKWithFractionalIndex(comparator, options) {
const opts = options || {};
return (stream) => {
const output = new StreamBuilder(stream.graph, new DifferenceStreamWriter());
const operator = new TopKWithFractionalIndexOperator(stream.graph.getNextOperatorId(), stream.connectReader(), output.writer, comparator, opts);
stream.graph.addOperator(operator);
stream.graph.addStream(output.connectReader());
return output;
};
}
export function indexedValue(value, index) {
return [value, index];
}
export function getValue(indexedValue) {
return indexedValue[0];
}
export function getIndex(indexedValue) {
return indexedValue[1];
}
function mapValue(value, f) {
return [f(getValue(value)), getIndex(value)];
}
function tagValue(value) {
return [value, globalObjectIdGenerator.getId(value)];
}
function untagValue(tieBreakerTaggedValue) {
return tieBreakerTaggedValue[0];
}
function getTag(tieBreakerTaggedValue) {
return tieBreakerTaggedValue[1];
}
//# sourceMappingURL=topKWithFractionalIndex.js.map