UNPKG

@electric-sql/d2mini

Version:

D2Mini is a minimal implementation of Differential Dataflow for performing in-memory incremental view maintenance.

268 lines 13.2 kB
import { DifferenceStreamWriter, UnaryOperator, } from '../graph.js'; import { StreamBuilder } from '../d2.js'; import { MultiSet } from '../multiset.js'; import { Index } from '../indexes.js'; import { generateKeyBetween } from 'fractional-indexing'; import { hash } from '../utils.js'; /** * Operator for fractional indexed topK operations * This operator maintains fractional indices for sorted elements * and only updates indices when elements move position */ export class TopKWithFractionalIndexOperator extends UnaryOperator { #index = new Index(); #indexOut = new Index(); #comparator; #limit; #offset; constructor(id, inputA, output, comparator, options) { super(id, inputA, output); this.#comparator = comparator; this.#limit = options.limit ?? Infinity; this.#offset = options.offset ?? 0; } run() { const keysTodo = new Set(); for (const message of this.inputMessages()) { for (const [item, multiplicity] of message.getInner()) { const [key, value] = item; this.#index.addValue(key, [value, multiplicity]); keysTodo.add(key); } } const result = []; for (const key of keysTodo) { const curr = this.#index.get(key); const currOut = this.#indexOut.get(key); // Sort the current values const consolidated = new MultiSet(curr).consolidate(); const sortedValues = consolidated .getInner() .sort((a, b) => this.#comparator(a[0], b[0])) .slice(this.#offset, this.#offset + this.#limit); // Create a map for quick value lookup with pre-stringified keys const currValueMap = new Map(); const prevOutputMap = new Map(); // Pre-stringify all values once const valueKeys = []; const valueToKey = new Map(); // Process current values for (const [value, multiplicity] of sortedValues) { if (multiplicity > 0) { // Only stringify each value once and store the result let valueKey = valueToKey.get(value); if (!valueKey) { valueKey = hash(value); valueToKey.set(value, valueKey); valueKeys.push(valueKey); } currValueMap.set(valueKey, value); } } // Process previous output values for (const [[value, index], multiplicity] of currOut) { if (multiplicity > 0) { // Only stringify each value once and store the result let valueKey = valueToKey.get(value); if (!valueKey) { valueKey = hash(value); valueToKey.set(value, valueKey); } prevOutputMap.set(valueKey, [value, index]); } } // Find values that are no longer in the result for (const [valueKey, [value, index]] of prevOutputMap.entries()) { if (!currValueMap.has(valueKey)) { // Value is no longer in the result, remove it result.push([[key, [value, index]], -1]); this.#indexOut.addValue(key, [[value, index], -1]); } } // Process the sorted values and assign fractional indices let prevIndex = null; let nextIndex = null; const newIndices = new Map(); // First pass: reuse existing indices for values that haven't moved for (let i = 0; i < sortedValues.length; i++) { const [value, _multiplicity] = sortedValues[i]; // Use the pre-computed valueKey const valueKey = valueToKey.get(value); // Check if this value already has an index const existingEntry = prevOutputMap.get(valueKey); if (existingEntry) { const [_, existingIndex] = existingEntry; // Check if we need to update the index if (i === 0) { // First element prevIndex = null; nextIndex = i + 1 < sortedValues.length ? newIndices.get(valueToKey.get(sortedValues[i + 1][0])) || null : null; if (nextIndex !== null && existingIndex >= nextIndex) { // Need to update index const newIndex = generateKeyBetween(prevIndex, nextIndex); newIndices.set(valueKey, newIndex); } else { // Can reuse existing index newIndices.set(valueKey, existingIndex); } } else if (i === sortedValues.length - 1) { // Last element prevIndex = newIndices.get(valueToKey.get(sortedValues[i - 1][0])) || null; nextIndex = null; if (prevIndex !== null && existingIndex <= prevIndex) { // Need to update index const newIndex = generateKeyBetween(prevIndex, nextIndex); newIndices.set(valueKey, newIndex); } else { // Can reuse existing index newIndices.set(valueKey, existingIndex); } } else { // Middle element prevIndex = newIndices.get(valueToKey.get(sortedValues[i - 1][0])) || null; nextIndex = i + 1 < sortedValues.length ? newIndices.get(valueToKey.get(sortedValues[i + 1][0])) || null : null; if ((prevIndex !== null && existingIndex <= prevIndex) || (nextIndex !== null && existingIndex >= nextIndex)) { // Need to update index const newIndex = generateKeyBetween(prevIndex, nextIndex); newIndices.set(valueKey, newIndex); } else { // Can reuse existing index newIndices.set(valueKey, existingIndex); } } } } // Pre-compute valid previous and next indices for each position // This avoids repeated lookups during index generation const validPrevIndices = new Array(sortedValues.length); const validNextIndices = new Array(sortedValues.length); // Initialize with null values validPrevIndices.fill(null); validNextIndices.fill(null); // First element has no previous validPrevIndices[0] = null; // Last element has no next validNextIndices[sortedValues.length - 1] = null; // Compute next valid indices (working forward) let lastValidNextIndex = null; for (let i = sortedValues.length - 1; i >= 0; i--) { const valueKey = valueToKey.get(sortedValues[i][0]); // Set the next index for the current position validNextIndices[i] = lastValidNextIndex; // Update lastValidNextIndex if this element has an index if (newIndices.has(valueKey)) { lastValidNextIndex = newIndices.get(valueKey) || null; } else { const existingEntry = prevOutputMap.get(valueKey); if (existingEntry) { lastValidNextIndex = existingEntry[1]; } } } // Compute previous valid indices (working backward) let lastValidPrevIndex = null; for (let i = 0; i < sortedValues.length; i++) { const valueKey = valueToKey.get(sortedValues[i][0]); // Set the previous index for the current position validPrevIndices[i] = lastValidPrevIndex; // Update lastValidPrevIndex if this element has an index if (newIndices.has(valueKey)) { lastValidPrevIndex = newIndices.get(valueKey) || null; } else { const existingEntry = prevOutputMap.get(valueKey); if (existingEntry) { lastValidPrevIndex = existingEntry[1]; } } } // Second pass: assign new indices for values that don't have one or need to be updated for (let i = 0; i < sortedValues.length; i++) { const [value, _multiplicity] = sortedValues[i]; // Use the pre-computed valueKey const valueKey = valueToKey.get(value); if (!newIndices.has(valueKey)) { // This value doesn't have an index yet, use pre-computed indices prevIndex = validPrevIndices[i]; nextIndex = validNextIndices[i]; const newIndex = generateKeyBetween(prevIndex, nextIndex); newIndices.set(valueKey, newIndex); // Update validPrevIndices for subsequent elements if (i < sortedValues.length - 1 && validPrevIndices[i + 1] === null) { validPrevIndices[i + 1] = newIndex; } } } // Now create the output with the new indices for (let i = 0; i < sortedValues.length; i++) { const [value, _multiplicity] = sortedValues[i]; // Use the pre-computed valueKey const valueKey = valueToKey.get(value); const index = newIndices.get(valueKey); // Check if this is a new value or if the index has changed const existingEntry = prevOutputMap.get(valueKey); if (!existingEntry) { // New value result.push([[key, [value, index]], 1]); this.#indexOut.addValue(key, [[value, index], 1]); } else if (existingEntry[1] !== index) { // Index has changed, remove old entry and add new one result.push([[key, existingEntry], -1]); result.push([[key, [value, index]], 1]); this.#indexOut.addValue(key, [existingEntry, -1]); this.#indexOut.addValue(key, [[value, index], 1]); } // If the value exists and the index hasn't changed, do nothing } } if (result.length > 0) { this.output.sendData(new MultiSet(result)); } // Compact both indexes to consolidate values and remove zero-multiplicity entries // Only compact changed keys for efficiency this.#index.compact(); this.#indexOut.compact(); } } /** * Limits the number of results based on a comparator, with optional offset. * This works on a keyed stream, where the key is the first element of the tuple. * The ordering is within a key group, i.e. elements are sorted within a key group * and the limit + offset is applied to that sorted group. * To order the entire stream, key by the same value for all elements such as null. * * Uses fractional indexing to minimize the number of changes when elements move positions. * Each element is assigned a fractional index that is lexicographically sortable. * When elements move, only the indices of the moved elements are updated, not all elements. * * @param comparator - A function that compares two elements * @param options - An optional object containing limit and offset properties * @returns A piped operator that orders the elements and limits the number of results */ export function topKWithFractionalIndex(comparator, options) { const opts = options || {}; return (stream) => { const output = new StreamBuilder(stream.graph, new DifferenceStreamWriter()); const operator = new TopKWithFractionalIndexOperator(stream.graph.getNextOperatorId(), stream.connectReader(), output.writer, comparator, opts); stream.graph.addOperator(operator); stream.graph.addStream(output.connectReader()); return output; }; } //# sourceMappingURL=topKWithFractionalIndex.js.map