UNPKG

@electric-sql/d2ts

Version:

D2TS is a TypeScript implementation of Differential Dataflow.

514 lines (459 loc) 18.7 kB
import { IStreamBuilder, DataMessage, MessageType, KeyValue, PipedOperator, } from '../../types.js' import { DifferenceStreamReader, DifferenceStreamWriter, UnaryOperator, } from '../../graph.js' import { StreamBuilder } from '../../d2.js' import { MultiSet } from '../../multiset.js' import { Antichain, Version } from '../../order.js' import { SQLiteDb, SQLiteStatement } from '../database.js' import { SQLIndex } from '../version-index.js' import { generateKeyBetween } from 'fractional-indexing' import { SQLiteContext } from '../context.js' interface TopKWithFractionalIndexOptions { limit?: number offset?: number db?: SQLiteDb } interface KeysTodoRow { version: string key: string } /** * Operator for fractional indexed topK operations * This operator maintains fractional indices for sorted elements * and only updates indices when elements move position */ export class TopKWithFractionalIndexOperator<K, V1> extends UnaryOperator< [K, V1 | [V1, string]] > { #index: SQLIndex<K, V1> #indexOut: SQLIndex<K, [V1, string]> #preparedStatements: { insertKeyTodo: SQLiteStatement<[string, string]> getKeysTodo: SQLiteStatement<[], KeysTodoRow> deleteKeysTodo: SQLiteStatement<[string]> createKeysTodoTable: SQLiteStatement dropKeysTodoTable: SQLiteStatement } #comparator: (a: V1, b: V1) => number #limit: number #offset: number constructor( id: number, inputA: DifferenceStreamReader<[K, V1]>, output: DifferenceStreamWriter<[K, [V1, string]]>, comparator: (a: V1, b: V1) => number, options: TopKWithFractionalIndexOptions, initialFrontier: Antichain, db: SQLiteDb, ) { super(id, inputA, output, initialFrontier) this.#comparator = comparator this.#limit = options.limit ?? Infinity this.#offset = options.offset ?? 0 // Initialize indexes this.#index = new SQLIndex<K, V1>(db, `topKFI_index_${id}`) this.#indexOut = new SQLIndex<K, [V1, string]>(db, `topKFI_index_out_${id}`) // Create tables db.exec(` CREATE TABLE IF NOT EXISTS topKFI_keys_todo_${id} ( version TEXT NOT NULL, key TEXT NOT NULL, PRIMARY KEY (version, key) ) `) // Create indexes for better performance db.exec(` CREATE INDEX IF NOT EXISTS topKFI_keys_todo_${id}_version_idx ON topKFI_keys_todo_${id}(version) `) // Prepare statements this.#preparedStatements = { createKeysTodoTable: db.prepare(` CREATE TABLE IF NOT EXISTS topKFI_keys_todo_${id} ( version TEXT NOT NULL, key TEXT NOT NULL, PRIMARY KEY (version, key) ) `), dropKeysTodoTable: db.prepare(` DROP TABLE IF EXISTS topKFI_keys_todo_${id} `), insertKeyTodo: db.prepare(` INSERT OR IGNORE INTO topKFI_keys_todo_${id} (version, key) VALUES (?, ?) `), getKeysTodo: db.prepare(` SELECT version, key FROM topKFI_keys_todo_${id} `), deleteKeysTodo: db.prepare(` DELETE FROM topKFI_keys_todo_${id} WHERE version = ? `), } } run(): void { for (const message of this.inputMessages()) { if (message.type === MessageType.DATA) { const { version, collection } = message.data as DataMessage<[K, V1]> for (const [item, multiplicity] of collection.getInner()) { const [key, value] = item this.#index.addValue(key, version, [value, multiplicity]) // Add key to todo list for this version this.#preparedStatements.insertKeyTodo.run( version.toJSON(), JSON.stringify(key), ) // Add key to all join versions for (const v2 of this.#index.versions(key)) { const joinVersion = version.join(v2) this.#preparedStatements.insertKeyTodo.run( joinVersion.toJSON(), JSON.stringify(key), ) } } } else if (message.type === MessageType.FRONTIER) { const frontier = message.data as Antichain if (!this.inputFrontier().lessEqual(frontier)) { throw new Error('Invalid frontier update') } this.setInputFrontier(frontier) } } // Find versions that are complete const finishedVersionsRows = this.#preparedStatements.getKeysTodo .all() .map((row) => ({ version: Version.fromJSON(row.version), key: JSON.parse(row.key) as K, })) // Group by version const finishedVersionsMap = new Map<Version, K[]>() for (const { version, key } of finishedVersionsRows) { const keys = finishedVersionsMap.get(version) || [] keys.push(key) finishedVersionsMap.set(version, keys) } const finishedVersions = Array.from(finishedVersionsMap.entries()) .filter(([version]) => !this.inputFrontier().lessEqualVersion(version)) .sort((a, b) => (a[0].lessEqual(b[0]) ? -1 : 1)) for (const [version, keys] of finishedVersions) { const result: [[K, [V1, string]], number][] = [] for (const key of keys) { const curr = this.#index.reconstructAt(key, version) const currOut = this.#indexOut.reconstructAt(key, version) // Sort the current values const consolidated = new MultiSet(curr).consolidate() const sortedValues = consolidated .getInner() .sort((a, b) => this.#comparator(a[0] as V1, b[0] as V1)) .slice(this.#offset, this.#offset + this.#limit) // Create a map for quick value lookup with pre-stringified keys const currValueMap = new Map<string, V1>() const prevOutputMap = new Map<string, [V1, string]>() // Pre-stringify all values once const valueKeys: string[] = [] const valueToKey = new Map<V1, string>() // Process current values for (const [value, multiplicity] of sortedValues) { if (multiplicity > 0) { // Only stringify each value once and store the result let valueKey = valueToKey.get(value) if (!valueKey) { valueKey = JSON.stringify(value) valueToKey.set(value, valueKey) valueKeys.push(valueKey) } currValueMap.set(valueKey, value) } } // Process previous output values for (const [[value, index], multiplicity] of currOut) { if (multiplicity > 0) { // Only stringify each value once and store the result let valueKey = valueToKey.get(value) if (!valueKey) { valueKey = JSON.stringify(value) valueToKey.set(value, valueKey) } prevOutputMap.set(valueKey, [value, index]) } } // Find values that are no longer in the result for (const [valueKey, [value, index]] of prevOutputMap.entries()) { if (!currValueMap.has(valueKey)) { // Value is no longer in the result, remove it result.push([[key, [value, index]], -1]) this.#indexOut.addValue(key, version, [[value, index], -1]) } } // Process the sorted values and assign fractional indices let prevIndex: string | null = null let nextIndex: string | null = null const newIndices = new Map<string, string>() // First pass: reuse existing indices for values that haven't moved for (let i = 0; i < sortedValues.length; i++) { const [value, _multiplicity] = sortedValues[i] // Use the pre-computed valueKey const valueKey = valueToKey.get(value) as string // Check if this value already has an index const existingEntry = prevOutputMap.get(valueKey) if (existingEntry) { const [_, existingIndex] = existingEntry // Check if we need to update the index if (i === 0) { // First element prevIndex = null // For the nextIndex, we need to check if there's an existing element after this one // If the next element in the sorted array already has an index, use that // Otherwise, we need to look for the first existing element in the collection let nextElementIndex = null for (let j = i + 1; j < sortedValues.length; j++) { const nextValueKey = valueToKey.get( sortedValues[j][0], ) as string // First check if this element already has a new index if (newIndices.has(nextValueKey)) { nextElementIndex = newIndices.get(nextValueKey) || null break } // Then check if it has an existing index const existingNextEntry = prevOutputMap.get(nextValueKey) if (existingNextEntry) { nextElementIndex = existingNextEntry[1] break } } nextIndex = nextElementIndex if (nextIndex !== null && existingIndex >= nextIndex) { // Need to update index const newIndex = generateKeyBetween(prevIndex, nextIndex) newIndices.set(valueKey, newIndex) } else { // Can reuse existing index newIndices.set(valueKey, existingIndex) } } else if (i === sortedValues.length - 1) { // Last element prevIndex = newIndices.get( valueToKey.get(sortedValues[i - 1][0]) as string, ) || null nextIndex = null if (prevIndex !== null && existingIndex <= prevIndex) { // Need to update index const newIndex = generateKeyBetween(prevIndex, nextIndex) newIndices.set(valueKey, newIndex) } else { // Can reuse existing index newIndices.set(valueKey, existingIndex) } } else { // Middle element prevIndex = newIndices.get( valueToKey.get(sortedValues[i - 1][0]) as string, ) || null // Similar to the first element case, we need to find the next element with an index let nextElementIndex = null for (let j = i + 1; j < sortedValues.length; j++) { const nextValueKey = valueToKey.get( sortedValues[j][0], ) as string // First check if this element already has a new index if (newIndices.has(nextValueKey)) { nextElementIndex = newIndices.get(nextValueKey) || null break } // Then check if it has an existing index const existingNextEntry = prevOutputMap.get(nextValueKey) if (existingNextEntry) { nextElementIndex = existingNextEntry[1] break } } nextIndex = nextElementIndex if ( (prevIndex !== null && existingIndex <= prevIndex) || (nextIndex !== null && existingIndex >= nextIndex) ) { // Need to update index const newIndex = generateKeyBetween(prevIndex, nextIndex) newIndices.set(valueKey, newIndex) } else { // Can reuse existing index newIndices.set(valueKey, existingIndex) } } } } // Pre-compute valid previous and next indices for each position // This avoids repeated lookups during index generation const validPrevIndices: (string | null)[] = new Array( sortedValues.length, ) const validNextIndices: (string | null)[] = new Array( sortedValues.length, ) // Initialize with null values validPrevIndices.fill(null) validNextIndices.fill(null) // First element has no previous validPrevIndices[0] = null // Last element has no next validNextIndices[sortedValues.length - 1] = null // Compute next valid indices (working forward) let lastValidNextIndex: string | null = null for (let i = sortedValues.length - 1; i >= 0; i--) { const valueKey = valueToKey.get(sortedValues[i][0]) as string // Set the next index for the current position validNextIndices[i] = lastValidNextIndex // Update lastValidNextIndex if this element has an index if (newIndices.has(valueKey)) { lastValidNextIndex = newIndices.get(valueKey) || null } else { const existingEntry = prevOutputMap.get(valueKey) if (existingEntry) { lastValidNextIndex = existingEntry[1] } } } // Compute previous valid indices (working backward) let lastValidPrevIndex: string | null = null for (let i = 0; i < sortedValues.length; i++) { const valueKey = valueToKey.get(sortedValues[i][0]) as string // Set the previous index for the current position validPrevIndices[i] = lastValidPrevIndex // Update lastValidPrevIndex if this element has an index if (newIndices.has(valueKey)) { lastValidPrevIndex = newIndices.get(valueKey) || null } else { const existingEntry = prevOutputMap.get(valueKey) if (existingEntry) { lastValidPrevIndex = existingEntry[1] } } } // Second pass: assign new indices for values that don't have one or need to be updated for (let i = 0; i < sortedValues.length; i++) { const [value, _multiplicity] = sortedValues[i] // Use the pre-computed valueKey const valueKey = valueToKey.get(value) as string if (!newIndices.has(valueKey)) { // This value doesn't have an index yet, use pre-computed indices prevIndex = validPrevIndices[i] nextIndex = validNextIndices[i] const newIndex = generateKeyBetween(prevIndex, nextIndex) newIndices.set(valueKey, newIndex) // Update validPrevIndices for subsequent elements if ( i < sortedValues.length - 1 && validPrevIndices[i + 1] === null ) { validPrevIndices[i + 1] = newIndex } } } // Now create the output with the new indices for (let i = 0; i < sortedValues.length; i++) { const [value, _multiplicity] = sortedValues[i] // Use the pre-computed valueKey const valueKey = valueToKey.get(value) as string const index = newIndices.get(valueKey)! // Check if this is a new value or if the index has changed const existingEntry = prevOutputMap.get(valueKey) if (!existingEntry) { // New value result.push([[key, [value, index]], 1]) this.#indexOut.addValue(key, version, [[value, index], 1]) } else if (existingEntry[1] !== index) { // Index has changed, remove old entry and add new one result.push([[key, existingEntry], -1]) result.push([[key, [value, index]], 1]) this.#indexOut.addValue(key, version, [existingEntry, -1]) this.#indexOut.addValue(key, version, [[value, index], 1]) } // If the value exists and the index hasn't changed, do nothing } } if (result.length > 0) { this.output.sendData(version, new MultiSet(result)) } this.#preparedStatements.deleteKeysTodo.run(version.toJSON()) } if (!this.outputFrontier.lessEqual(this.inputFrontier())) { throw new Error('Invalid frontier state') } if (this.outputFrontier.lessThan(this.inputFrontier())) { this.outputFrontier = this.inputFrontier() this.output.sendFrontier(this.outputFrontier) this.#index.compact(this.outputFrontier) this.#indexOut.compact(this.outputFrontier) } } destroy(): void { this.#index.destroy() this.#indexOut.destroy() this.#preparedStatements.dropKeysTodoTable.run() } } /** * Limits the number of results based on a comparator, with optional offset. * This works on a keyed stream, where the key is the first element of the tuple * The ordering is withing a key group, i.e. elements are sorted within a key group * and the limit + offset is applied to that sorted group. * To order the entire stream, key by the same value for all elements such as null. * Adds a fractional index of the element to the result as [key, [value, index]] * This is useful for stable ordering in UIs. * * @param comparator - A function that compares two elements * @param db - Optional SQLite database (can be injected via context) * @param options - An optional object containing limit and offset properties * @returns A piped operator that orders the elements and limits the number of results */ export function topKWithFractionalIndex< K extends T extends KeyValue<infer K, infer _V> ? K : never, V1 extends T extends KeyValue<K, infer V> ? V : never, T, >( comparator: (a: V1, b: V1) => number, options?: TopKWithFractionalIndexOptions, ): PipedOperator<T, KeyValue<K, [V1, string]>> { return ( stream: IStreamBuilder<T>, ): IStreamBuilder<KeyValue<K, [V1, string]>> => { // Get database from context if not provided explicitly const database = options?.db || SQLiteContext.getDb() if (!database) { throw new Error( 'SQLite database is required for topKWithFractionalIndex operator. ' + 'Provide it as a parameter or use withSQLite() to inject it.', ) } const output = new StreamBuilder<KeyValue<K, [V1, string]>>( stream.graph, new DifferenceStreamWriter<KeyValue<K, [V1, string]>>(), ) const operator = new TopKWithFractionalIndexOperator<K, V1>( stream.graph.getNextOperatorId(), stream.connectReader() as DifferenceStreamReader<KeyValue<K, V1>>, output.writer, comparator, options || {}, stream.graph.frontier(), database, ) stream.graph.addOperator(operator) stream.graph.addStream(output.connectReader()) return output } }