UNPKG

@electric-sql/d2ts

Version:

D2TS is a TypeScript implementation of Differential Dataflow.

389 lines 20.1 kB
import { MessageType, } from '../../types.js'; import { DifferenceStreamWriter, UnaryOperator, } from '../../graph.js'; import { StreamBuilder } from '../../d2.js'; import { MultiSet } from '../../multiset.js'; import { Version } from '../../order.js'; import { SQLIndex } from '../version-index.js'; import { generateKeyBetween } from 'fractional-indexing'; import { SQLiteContext } from '../context.js'; /** * Operator for fractional indexed topK operations * This operator maintains fractional indices for sorted elements * and only updates indices when elements move position */ export class TopKWithFractionalIndexOperator extends UnaryOperator { #index; #indexOut; #preparedStatements; #comparator; #limit; #offset; constructor(id, inputA, output, comparator, options, initialFrontier, db) { super(id, inputA, output, initialFrontier); this.#comparator = comparator; this.#limit = options.limit ?? Infinity; this.#offset = options.offset ?? 0; // Initialize indexes this.#index = new SQLIndex(db, `topKFI_index_${id}`); this.#indexOut = new SQLIndex(db, `topKFI_index_out_${id}`); // Create tables db.exec(` CREATE TABLE IF NOT EXISTS topKFI_keys_todo_${id} ( version TEXT NOT NULL, key TEXT NOT NULL, PRIMARY KEY (version, key) ) `); // Create indexes for better performance db.exec(` CREATE INDEX IF NOT EXISTS topKFI_keys_todo_${id}_version_idx ON topKFI_keys_todo_${id}(version) `); // Prepare statements this.#preparedStatements = { createKeysTodoTable: db.prepare(` CREATE TABLE IF NOT EXISTS topKFI_keys_todo_${id} ( version TEXT NOT NULL, key TEXT NOT NULL, PRIMARY KEY (version, key) ) `), dropKeysTodoTable: db.prepare(` DROP TABLE IF EXISTS topKFI_keys_todo_${id} `), insertKeyTodo: db.prepare(` INSERT OR IGNORE INTO topKFI_keys_todo_${id} (version, key) VALUES (?, ?) `), getKeysTodo: db.prepare(` SELECT version, key FROM topKFI_keys_todo_${id} `), deleteKeysTodo: db.prepare(` DELETE FROM topKFI_keys_todo_${id} WHERE version = ? `), }; } run() { for (const message of this.inputMessages()) { if (message.type === MessageType.DATA) { const { version, collection } = message.data; for (const [item, multiplicity] of collection.getInner()) { const [key, value] = item; this.#index.addValue(key, version, [value, multiplicity]); // Add key to todo list for this version this.#preparedStatements.insertKeyTodo.run(version.toJSON(), JSON.stringify(key)); // Add key to all join versions for (const v2 of this.#index.versions(key)) { const joinVersion = version.join(v2); this.#preparedStatements.insertKeyTodo.run(joinVersion.toJSON(), JSON.stringify(key)); } } } else if (message.type === MessageType.FRONTIER) { const frontier = message.data; if (!this.inputFrontier().lessEqual(frontier)) { throw new Error('Invalid frontier update'); } this.setInputFrontier(frontier); } } // Find versions that are complete const finishedVersionsRows = this.#preparedStatements.getKeysTodo .all() .map((row) => ({ version: Version.fromJSON(row.version), key: JSON.parse(row.key), })); // Group by version const finishedVersionsMap = new Map(); for (const { version, key } of finishedVersionsRows) { const keys = finishedVersionsMap.get(version) || []; keys.push(key); finishedVersionsMap.set(version, keys); } const finishedVersions = Array.from(finishedVersionsMap.entries()) .filter(([version]) => !this.inputFrontier().lessEqualVersion(version)) .sort((a, b) => (a[0].lessEqual(b[0]) ? -1 : 1)); for (const [version, keys] of finishedVersions) { const result = []; for (const key of keys) { const curr = this.#index.reconstructAt(key, version); const currOut = this.#indexOut.reconstructAt(key, version); // Sort the current values const consolidated = new MultiSet(curr).consolidate(); const sortedValues = consolidated .getInner() .sort((a, b) => this.#comparator(a[0], b[0])) .slice(this.#offset, this.#offset + this.#limit); // Create a map for quick value lookup with pre-stringified keys const currValueMap = new Map(); const prevOutputMap = new Map(); // Pre-stringify all values once const valueKeys = []; const valueToKey = new Map(); // Process current values for (const [value, multiplicity] of sortedValues) { if (multiplicity > 0) { // Only stringify each value once and store the result let valueKey = valueToKey.get(value); if (!valueKey) { valueKey = JSON.stringify(value); valueToKey.set(value, valueKey); valueKeys.push(valueKey); } currValueMap.set(valueKey, value); } } // Process previous output values for (const [[value, index], multiplicity] of currOut) { if (multiplicity > 0) { // Only stringify each value once and store the result let valueKey = valueToKey.get(value); if (!valueKey) { valueKey = JSON.stringify(value); valueToKey.set(value, valueKey); } prevOutputMap.set(valueKey, [value, index]); } } // Find values that are no longer in the result for (const [valueKey, [value, index]] of prevOutputMap.entries()) { if (!currValueMap.has(valueKey)) { // Value is no longer in the result, remove it result.push([[key, [value, index]], -1]); this.#indexOut.addValue(key, version, [[value, index], -1]); } } // Process the sorted values and assign fractional indices let prevIndex = null; let nextIndex = null; const newIndices = new Map(); // First pass: reuse existing indices for values that haven't moved for (let i = 0; i < sortedValues.length; i++) { const [value, _multiplicity] = sortedValues[i]; // Use the pre-computed valueKey const valueKey = valueToKey.get(value); // Check if this value already has an index const existingEntry = prevOutputMap.get(valueKey); if (existingEntry) { const [_, existingIndex] = existingEntry; // Check if we need to update the index if (i === 0) { // First element prevIndex = null; // For the nextIndex, we need to check if there's an existing element after this one // If the next element in the sorted array already has an index, use that // Otherwise, we need to look for the first existing element in the collection let nextElementIndex = null; for (let j = i + 1; j < sortedValues.length; j++) { const nextValueKey = valueToKey.get(sortedValues[j][0]); // First check if this element already has a new index if (newIndices.has(nextValueKey)) { nextElementIndex = newIndices.get(nextValueKey) || null; break; } // Then check if it has an existing index const existingNextEntry = prevOutputMap.get(nextValueKey); if (existingNextEntry) { nextElementIndex = existingNextEntry[1]; break; } } nextIndex = nextElementIndex; if (nextIndex !== null && existingIndex >= nextIndex) { // Need to update index const newIndex = generateKeyBetween(prevIndex, nextIndex); newIndices.set(valueKey, newIndex); } else { // Can reuse existing index newIndices.set(valueKey, existingIndex); } } else if (i === sortedValues.length - 1) { // Last element prevIndex = newIndices.get(valueToKey.get(sortedValues[i - 1][0])) || null; nextIndex = null; if (prevIndex !== null && existingIndex <= prevIndex) { // Need to update index const newIndex = generateKeyBetween(prevIndex, nextIndex); newIndices.set(valueKey, newIndex); } else { // Can reuse existing index newIndices.set(valueKey, existingIndex); } } else { // Middle element prevIndex = newIndices.get(valueToKey.get(sortedValues[i - 1][0])) || null; // Similar to the first element case, we need to find the next element with an index let nextElementIndex = null; for (let j = i + 1; j < sortedValues.length; j++) { const nextValueKey = valueToKey.get(sortedValues[j][0]); // First check if this element already has a new index if (newIndices.has(nextValueKey)) { nextElementIndex = newIndices.get(nextValueKey) || null; break; } // Then check if it has an existing index const existingNextEntry = prevOutputMap.get(nextValueKey); if (existingNextEntry) { nextElementIndex = existingNextEntry[1]; break; } } nextIndex = nextElementIndex; if ((prevIndex !== null && existingIndex <= prevIndex) || (nextIndex !== null && existingIndex >= nextIndex)) { // Need to update index const newIndex = generateKeyBetween(prevIndex, nextIndex); newIndices.set(valueKey, newIndex); } else { // Can reuse existing index newIndices.set(valueKey, existingIndex); } } } } // Pre-compute valid previous and next indices for each position // This avoids repeated lookups during index generation const validPrevIndices = new Array(sortedValues.length); const validNextIndices = new Array(sortedValues.length); // Initialize with null values validPrevIndices.fill(null); validNextIndices.fill(null); // First element has no previous validPrevIndices[0] = null; // Last element has no next validNextIndices[sortedValues.length - 1] = null; // Compute next valid indices (working forward) let lastValidNextIndex = null; for (let i = sortedValues.length - 1; i >= 0; i--) { const valueKey = valueToKey.get(sortedValues[i][0]); // Set the next index for the current position validNextIndices[i] = lastValidNextIndex; // Update lastValidNextIndex if this element has an index if (newIndices.has(valueKey)) { lastValidNextIndex = newIndices.get(valueKey) || null; } else { const existingEntry = prevOutputMap.get(valueKey); if (existingEntry) { lastValidNextIndex = existingEntry[1]; } } } // Compute previous valid indices (working backward) let lastValidPrevIndex = null; for (let i = 0; i < sortedValues.length; i++) { const valueKey = valueToKey.get(sortedValues[i][0]); // Set the previous index for the current position validPrevIndices[i] = lastValidPrevIndex; // Update lastValidPrevIndex if this element has an index if (newIndices.has(valueKey)) { lastValidPrevIndex = newIndices.get(valueKey) || null; } else { const existingEntry = prevOutputMap.get(valueKey); if (existingEntry) { lastValidPrevIndex = existingEntry[1]; } } } // Second pass: assign new indices for values that don't have one or need to be updated for (let i = 0; i < sortedValues.length; i++) { const [value, _multiplicity] = sortedValues[i]; // Use the pre-computed valueKey const valueKey = valueToKey.get(value); if (!newIndices.has(valueKey)) { // This value doesn't have an index yet, use pre-computed indices prevIndex = validPrevIndices[i]; nextIndex = validNextIndices[i]; const newIndex = generateKeyBetween(prevIndex, nextIndex); newIndices.set(valueKey, newIndex); // Update validPrevIndices for subsequent elements if (i < sortedValues.length - 1 && validPrevIndices[i + 1] === null) { validPrevIndices[i + 1] = newIndex; } } } // Now create the output with the new indices for (let i = 0; i < sortedValues.length; i++) { const [value, _multiplicity] = sortedValues[i]; // Use the pre-computed valueKey const valueKey = valueToKey.get(value); const index = newIndices.get(valueKey); // Check if this is a new value or if the index has changed const existingEntry = prevOutputMap.get(valueKey); if (!existingEntry) { // New value result.push([[key, [value, index]], 1]); this.#indexOut.addValue(key, version, [[value, index], 1]); } else if (existingEntry[1] !== index) { // Index has changed, remove old entry and add new one result.push([[key, existingEntry], -1]); result.push([[key, [value, index]], 1]); this.#indexOut.addValue(key, version, [existingEntry, -1]); this.#indexOut.addValue(key, version, [[value, index], 1]); } // If the value exists and the index hasn't changed, do nothing } } if (result.length > 0) { this.output.sendData(version, new MultiSet(result)); } this.#preparedStatements.deleteKeysTodo.run(version.toJSON()); } if (!this.outputFrontier.lessEqual(this.inputFrontier())) { throw new Error('Invalid frontier state'); } if (this.outputFrontier.lessThan(this.inputFrontier())) { this.outputFrontier = this.inputFrontier(); this.output.sendFrontier(this.outputFrontier); this.#index.compact(this.outputFrontier); this.#indexOut.compact(this.outputFrontier); } } destroy() { this.#index.destroy(); this.#indexOut.destroy(); this.#preparedStatements.dropKeysTodoTable.run(); } } /** * Limits the number of results based on a comparator, with optional offset. * This works on a keyed stream, where the key is the first element of the tuple * The ordering is withing a key group, i.e. elements are sorted within a key group * and the limit + offset is applied to that sorted group. * To order the entire stream, key by the same value for all elements such as null. * Adds a fractional index of the element to the result as [key, [value, index]] * This is useful for stable ordering in UIs. * * @param comparator - A function that compares two elements * @param db - Optional SQLite database (can be injected via context) * @param options - An optional object containing limit and offset properties * @returns A piped operator that orders the elements and limits the number of results */ export function topKWithFractionalIndex(comparator, options) { return (stream) => { // Get database from context if not provided explicitly const database = options?.db || SQLiteContext.getDb(); if (!database) { throw new Error('SQLite database is required for topKWithFractionalIndex operator. ' + 'Provide it as a parameter or use withSQLite() to inject it.'); } const output = new StreamBuilder(stream.graph, new DifferenceStreamWriter()); const operator = new TopKWithFractionalIndexOperator(stream.graph.getNextOperatorId(), stream.connectReader(), output.writer, comparator, options || {}, stream.graph.frontier(), database); stream.graph.addOperator(operator); stream.graph.addStream(output.connectReader()); return output; }; } //# sourceMappingURL=topKWithFractionalIndex.js.map