UNPKG

@tanstack/db-ivm

Version:

Incremental View Maintenance for TanStack DB based on Differential Dataflow

1 lines 16.9 kB
{"version":3,"file":"topKWithFractionalIndex.cjs","sources":["../../../src/operators/topKWithFractionalIndex.ts"],"sourcesContent":["import { generateKeyBetween } from \"fractional-indexing\"\nimport { DifferenceStreamWriter, UnaryOperator } from \"../graph.js\"\nimport { StreamBuilder } from \"../d2.js\"\nimport { MultiSet } from \"../multiset.js\"\nimport { Index } from \"../indexes.js\"\nimport { binarySearch, globalObjectIdGenerator } from \"../utils.js\"\nimport type { DifferenceStreamReader } from \"../graph.js\"\nimport type { IStreamBuilder, KeyValue, PipedOperator } from \"../types.js\"\n\nexport interface TopKWithFractionalIndexOptions {\n limit?: number\n offset?: number\n}\n\nexport type TopKChanges<V> = {\n /** Indicates which element moves into the topK (if any) */\n moveIn: IndexedValue<V> | null\n /** Indicates which element moves out of the topK (if any) */\n moveOut: IndexedValue<V> | null\n}\n\n/**\n * A topK data structure that supports insertions and deletions\n * and returns changes to the topK.\n */\nexport interface TopK<V> {\n insert: (value: V) => TopKChanges<V>\n delete: (value: V) => TopKChanges<V>\n}\n\n/**\n * Implementation of a topK data structure.\n * Uses a sorted array internally to store the values and keeps a topK window over that array.\n * Inserts and deletes are O(n) operations because worst case an element is inserted/deleted\n * at the start of the array which causes all the elements to shift to the right/left.\n */\nclass TopKArray<V> implements TopK<V> {\n #sortedValues: Array<IndexedValue<V>> = []\n #comparator: (a: V, b: V) => number\n #topKStart: number\n #topKEnd: number\n\n constructor(\n offset: number,\n limit: number,\n comparator: (a: V, b: V) => number\n ) {\n this.#topKStart = offset\n this.#topKEnd = offset + limit\n this.#comparator = comparator\n }\n\n insert(value: V): TopKChanges<V> {\n const result: TopKChanges<V> = { moveIn: null, moveOut: null }\n\n // Lookup insert position\n const index = this.#findIndex(value)\n // Generate fractional index based on the fractional indices of the elements before and after it\n const indexBefore =\n index === 0 ? null : getIndex(this.#sortedValues[index - 1]!)\n const indexAfter =\n index === this.#sortedValues.length\n ? null\n : getIndex(this.#sortedValues[index]!)\n const fractionalIndex = generateKeyBetween(indexBefore, indexAfter)\n\n // Insert the value at the correct position\n const val = indexedValue(value, fractionalIndex)\n // Splice is O(n) where n = all elements in the collection (i.e. n >= k) !\n this.#sortedValues.splice(index, 0, val)\n\n // Check if the topK changed\n if (index < this.#topKEnd) {\n // The inserted element is either before the top K or within the top K\n // If it is before the top K then it moves the element that was right before the topK into the topK\n // If it is within the top K then the inserted element moves into the top K\n // In both cases the last element of the old top K now moves out of the top K\n const moveInIndex = Math.max(index, this.#topKStart)\n if (moveInIndex < this.#sortedValues.length) {\n // We actually have a topK\n // because in some cases there may not be enough elements in the array to reach the start of the topK\n // e.g. [1, 2, 3] with K = 2 and offset = 3 does not have a topK\n result.moveIn = this.#sortedValues[moveInIndex]!\n\n // We need to remove the element that falls out of the top K\n // The element that falls out of the top K has shifted one to the right\n // because of the element we inserted, so we find it at index topKEnd\n if (this.#topKEnd < this.#sortedValues.length) {\n result.moveOut = this.#sortedValues[this.#topKEnd]!\n }\n }\n }\n\n return result\n }\n\n /**\n * Deletes a value that may or may not be in the topK.\n * IMPORTANT: this assumes that the value is present in the collection\n * if it's not the case it will remove the element\n * that is on the position where the provided `value` would be.\n */\n delete(value: V): TopKChanges<V> {\n const result: TopKChanges<V> = { moveIn: null, moveOut: null }\n\n // Lookup delete position\n const index = this.#findIndex(value)\n // Remove the value at that position\n const [removedElem] = this.#sortedValues.splice(index, 1)\n\n // Check if the topK changed\n if (index < this.#topKEnd) {\n // The removed element is either before the top K or within the top K\n // If it is before the top K then the first element of the topK moves out of the topK\n // If it is within the top K then the removed element moves out of the topK\n result.moveOut = removedElem!\n if (index < this.#topKStart) {\n // The removed element is before the topK\n // so actually, the first element of the topK moves out of the topK\n // and not the element that we removed\n // The first element of the topK is now at index topKStart - 1\n // since we removed an element before the topK\n const moveOutIndex = this.#topKStart - 1\n if (moveOutIndex < this.#sortedValues.length) {\n result.moveOut = this.#sortedValues[moveOutIndex]!\n } else {\n // No value is moving out of the topK\n // because there are no elements in the topK\n result.moveOut = null\n }\n }\n\n // Since we removed an element that was before or in the topK\n // the first element after the topK moved one position to the left\n // and thus falls into the topK now\n const moveInIndex = this.#topKEnd - 1\n if (moveInIndex < this.#sortedValues.length) {\n result.moveIn = this.#sortedValues[moveInIndex]!\n }\n }\n\n return result\n }\n\n // TODO: see if there is a way to refactor the code for insert and delete in the topK above\n // because they are very similar, one is shifting the topK window to the left and the other is shifting it to the right\n // so i have the feeling there is a common pattern here and we can implement both cases using that pattern\n\n #findIndex(value: V): number {\n return binarySearch(this.#sortedValues, indexedValue(value, ``), (a, b) =>\n this.#comparator(getValue(a), getValue(b))\n )\n }\n}\n\n/**\n * Operator for fractional indexed topK operations\n * This operator maintains fractional indices for sorted elements\n * and only updates indices when elements move position\n */\nexport class TopKWithFractionalIndexOperator<K, V1> extends UnaryOperator<\n [K, V1],\n [K, IndexedValue<V1>]\n> {\n #index = new Index<K, V1>()\n\n /**\n * topK data structure that supports insertions and deletions\n * and returns changes to the topK.\n */\n #topK: TopK<TaggedValue<V1>>\n\n constructor(\n id: number,\n inputA: DifferenceStreamReader<[K, V1]>,\n output: DifferenceStreamWriter<[K, [V1, string]]>,\n comparator: (a: V1, b: V1) => number,\n options: TopKWithFractionalIndexOptions\n ) {\n super(id, inputA, output)\n const limit = options.limit ?? Infinity\n const offset = options.offset ?? 0\n const compareTaggedValues = (a: TaggedValue<V1>, b: TaggedValue<V1>) => {\n // First compare on the value\n const valueComparison = comparator(untagValue(a), untagValue(b))\n if (valueComparison !== 0) {\n return valueComparison\n }\n // If the values are equal, compare on the tag (object identity)\n const tieBreakerA = getTag(a)\n const tieBreakerB = getTag(b)\n return tieBreakerA - tieBreakerB\n }\n this.#topK = this.createTopK(offset, limit, compareTaggedValues)\n }\n\n protected createTopK(\n offset: number,\n limit: number,\n comparator: (a: TaggedValue<V1>, b: TaggedValue<V1>) => number\n ): TopK<TaggedValue<V1>> {\n return new TopKArray(offset, limit, comparator)\n }\n\n run(): void {\n const result: Array<[[K, [V1, string]], number]> = []\n for (const message of this.inputMessages()) {\n for (const [item, multiplicity] of message.getInner()) {\n const [key, value] = item\n this.processElement(key, value, multiplicity, result)\n }\n }\n\n if (result.length > 0) {\n this.output.sendData(new MultiSet(result))\n }\n }\n\n processElement(\n key: K,\n value: V1,\n multiplicity: number,\n result: Array<[[K, [V1, string]], number]>\n ): void {\n const oldMultiplicity = this.#index.getMultiplicity(key, value)\n this.#index.addValue(key, [value, multiplicity])\n const newMultiplicity = this.#index.getMultiplicity(key, value)\n\n let res: TopKChanges<TaggedValue<V1>> = {\n moveIn: null,\n moveOut: null,\n }\n if (oldMultiplicity <= 0 && newMultiplicity > 0) {\n // The value was invisible but should now be visible\n // Need to insert it into the array of sorted values\n const taggedValue = tagValue(value)\n res = this.#topK.insert(taggedValue)\n } else if (oldMultiplicity > 0 && newMultiplicity <= 0) {\n // The value was visible but should now be invisible\n // Need to remove it from the array of sorted values\n const taggedValue = tagValue(value)\n res = this.#topK.delete(taggedValue)\n } else {\n // The value was invisible and it remains invisible\n // or it was visible and remains visible\n // so it doesn't affect the topK\n }\n\n if (res.moveIn) {\n const valueWithoutTieBreaker = mapValue(res.moveIn, untagValue)\n result.push([[key, valueWithoutTieBreaker], 1])\n }\n\n if (res.moveOut) {\n const valueWithoutTieBreaker = mapValue(res.moveOut, untagValue)\n result.push([[key, valueWithoutTieBreaker], -1])\n }\n\n return\n }\n}\n\n/**\n * Limits the number of results based on a comparator, with optional offset.\n * This works on a keyed stream, where the key is the first element of the tuple.\n * The ordering is within a key group, i.e. elements are sorted within a key group\n * and the limit + offset is applied to that sorted group.\n * To order the entire stream, key by the same value for all elements such as null.\n *\n * Uses fractional indexing to minimize the number of changes when elements move positions.\n * Each element is assigned a fractional index that is lexicographically sortable.\n * When elements move, only the indices of the moved elements are updated, not all elements.\n *\n * @param comparator - A function that compares two elements\n * @param options - An optional object containing limit and offset properties\n * @returns A piped operator that orders the elements and limits the number of results\n */\nexport function topKWithFractionalIndex<\n KType extends T extends KeyValue<infer K, infer _V> ? K : never,\n V1Type extends T extends KeyValue<KType, infer V> ? V : never,\n T,\n>(\n comparator: (a: V1Type, b: V1Type) => number,\n options?: TopKWithFractionalIndexOptions\n): PipedOperator<T, KeyValue<KType, [V1Type, string]>> {\n const opts = options || {}\n\n return (\n stream: IStreamBuilder<T>\n ): IStreamBuilder<KeyValue<KType, [V1Type, string]>> => {\n const output = new StreamBuilder<KeyValue<KType, [V1Type, string]>>(\n stream.graph,\n new DifferenceStreamWriter<KeyValue<KType, [V1Type, string]>>()\n )\n const operator = new TopKWithFractionalIndexOperator<KType, V1Type>(\n stream.graph.getNextOperatorId(),\n stream.connectReader() as DifferenceStreamReader<KeyValue<KType, V1Type>>,\n output.writer,\n comparator,\n opts\n )\n stream.graph.addOperator(operator)\n stream.graph.addStream(output.connectReader())\n return output\n }\n}\n\n// Abstraction for fractionally indexed values\nexport type FractionalIndex = string\nexport type IndexedValue<V> = [V, FractionalIndex]\n\nexport function indexedValue<V>(\n value: V,\n index: FractionalIndex\n): IndexedValue<V> {\n return [value, index]\n}\n\nexport function getValue<V>(indexedVal: IndexedValue<V>): V {\n return indexedVal[0]\n}\n\nexport function getIndex<V>(indexedVal: IndexedValue<V>): FractionalIndex {\n return indexedVal[1]\n}\n\nfunction mapValue<V, W>(\n indexedVal: IndexedValue<V>,\n f: (value: V) => W\n): IndexedValue<W> {\n return [f(getValue(indexedVal)), getIndex(indexedVal)]\n}\n\nexport type Tag = number\nexport type TaggedValue<V> = [V, Tag]\n\nfunction tagValue<V>(value: V): TaggedValue<V> {\n return [value, globalObjectIdGenerator.getId(value)]\n}\n\nfunction untagValue<V>(tieBreakerTaggedValue: TaggedValue<V>): V {\n return tieBreakerTaggedValue[0]\n}\n\nfunction getTag<V>(tieBreakerTaggedValue: TaggedValue<V>): Tag {\n return tieBreakerTaggedValue[1]\n}\n"],"names":["generateKeyBetween","binarySearch","UnaryOperator","Index","MultiSet","StreamBuilder","DifferenceStreamWriter","globalObjectIdGenerator"],"mappings":";;;;;;;;;;;;;;;;;AAoCA,MAAM,UAAgC;AAAA,EAMpC,YACE,QACA,OACA,YACA;AAVJ;AACE,sCAAwC,CAAA;AACxC;AACA;AACA;AAOE,uBAAK,YAAa;AAClB,uBAAK,UAAW,SAAS;AACzB,uBAAK,aAAc;AAAA,EACrB;AAAA,EAEA,OAAO,OAA0B;AAC/B,UAAM,SAAyB,EAAE,QAAQ,MAAM,SAAS,KAAA;AAGxD,UAAM,QAAQ,sBAAK,oCAAL,WAAgB;AAE9B,UAAM,cACJ,UAAU,IAAI,OAAO,SAAS,mBAAK,eAAc,QAAQ,CAAC,CAAE;AAC9D,UAAM,aACJ,UAAU,mBAAK,eAAc,SACzB,OACA,SAAS,mBAAK,eAAc,KAAK,CAAE;AACzC,UAAM,kBAAkBA,mBAAAA,mBAAmB,aAAa,UAAU;AAGlE,UAAM,MAAM,aAAa,OAAO,eAAe;AAE/C,uBAAK,eAAc,OAAO,OAAO,GAAG,GAAG;AAGvC,QAAI,QAAQ,mBAAK,WAAU;AAKzB,YAAM,cAAc,KAAK,IAAI,OAAO,mBAAK,WAAU;AACnD,UAAI,cAAc,mBAAK,eAAc,QAAQ;AAI3C,eAAO,SAAS,mBAAK,eAAc,WAAW;AAK9C,YAAI,mBAAK,YAAW,mBAAK,eAAc,QAAQ;AAC7C,iBAAO,UAAU,mBAAK,eAAc,mBAAK,SAAQ;AAAA,QACnD;AAAA,MACF;AAAA,IACF;AAEA,WAAO;AAAA,EACT;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,OAAO,OAA0B;AAC/B,UAAM,SAAyB,EAAE,QAAQ,MAAM,SAAS,KAAA;AAGxD,UAAM,QAAQ,sBAAK,oCAAL,WAAgB;AAE9B,UAAM,CAAC,WAAW,IAAI,mBAAK,eAAc,OAAO,OAAO,CAAC;AAGxD,QAAI,QAAQ,mBAAK,WAAU;AAIzB,aAAO,UAAU;AACjB,UAAI,QAAQ,mBAAK,aAAY;AAM3B,cAAM,eAAe,mBAAK,cAAa;AACvC,YAAI,eAAe,mBAAK,eAAc,QAAQ;AAC5C,iBAAO,UAAU,mBAAK,eAAc,YAAY;AAAA,QAClD,OAAO;AAGL,iBAAO,UAAU;AAAA,QACnB;AAAA,MACF;AAKA,YAAM,cAAc,mBAAK,YAAW;AACpC,UAAI,cAAc,mBAAK,eAAc,QAAQ;AAC3C,eAAO,SAAS,mBAAK,eAAc,WAAW;AAAA,MAChD;AAAA,IACF;AAEA,WAAO;AAAA,EACT;AAWF;AApHE;AACA;AACA;AACA;AAJF;AAAA;AAAA;AAAA;AAgHE,wBAAW,OAAkB;AAC3B,SAAOC,MAAAA;AAAAA,IAAa,mBAAK;AAAA,IAAe,aAAa,OAAO,EAAE;AAAA,IAAG,CAAC,GAAG,MACnE,mBAAK,aAAL,WAAiB,SAAS,CAAC,GAAG,SAAS,CAAC;AAAA,EAAC;AAE7C;AAQK,MAAM,wCAA+CC,MAAAA,cAG1D;AAAA,EASA,YACE,IACA,QACA,QACA,YACA,SACA;AACA,UAAM,IAAI,QAAQ,MAAM;AAf1B,+BAAS,IAAIC,QAAAA,MAAA;AAMb;AAAA;AAAA;AAAA;AAAA;AAUE,UAAM,QAAQ,QAAQ,SAAS;AAC/B,UAAM,SAAS,QAAQ,UAAU;AACjC,UAAM,sBAAsB,CAAC,GAAoB,MAAuB;AAEtE,YAAM,kBAAkB,WAAW,WAAW,CAAC,GAAG,WAAW,CAAC,CAAC;AAC/D,UAAI,oBAAoB,GAAG;AACzB,eAAO;AAAA,MACT;AAEA,YAAM,cAAc,OAAO,CAAC;AAC5B,YAAM,cAAc,OAAO,CAAC;AAC5B,aAAO,cAAc;AAAA,IACvB;AACA,uBAAK,OAAQ,KAAK,WAAW,QAAQ,OAAO,mBAAmB;AAAA,EACjE;AAAA,EAEU,WACR,QACA,OACA,YACuB;AACvB,WAAO,IAAI,UAAU,QAAQ,OAAO,UAAU;AAAA,EAChD;AAAA,EAEA,MAAY;AACV,UAAM,SAA6C,CAAA;AACnD,eAAW,WAAW,KAAK,iBAAiB;AAC1C,iBAAW,CAAC,MAAM,YAAY,KAAK,QAAQ,YAAY;AACrD,cAAM,CAAC,KAAK,KAAK,IAAI;AACrB,aAAK,eAAe,KAAK,OAAO,cAAc,MAAM;AAAA,MACtD;AAAA,IACF;AAEA,QAAI,OAAO,SAAS,GAAG;AACrB,WAAK,OAAO,SAAS,IAAIC,SAAAA,SAAS,MAAM,CAAC;AAAA,IAC3C;AAAA,EACF;AAAA,EAEA,eACE,KACA,OACA,cACA,QACM;AACN,UAAM,kBAAkB,mBAAK,QAAO,gBAAgB,KAAK,KAAK;AAC9D,uBAAK,QAAO,SAAS,KAAK,CAAC,OAAO,YAAY,CAAC;AAC/C,UAAM,kBAAkB,mBAAK,QAAO,gBAAgB,KAAK,KAAK;AAE9D,QAAI,MAAoC;AAAA,MACtC,QAAQ;AAAA,MACR,SAAS;AAAA,IAAA;AAEX,QAAI,mBAAmB,KAAK,kBAAkB,GAAG;AAG/C,YAAM,cAAc,SAAS,KAAK;AAClC,YAAM,mBAAK,OAAM,OAAO,WAAW;AAAA,IACrC,WAAW,kBAAkB,KAAK,mBAAmB,GAAG;AAGtD,YAAM,cAAc,SAAS,KAAK;AAClC,YAAM,mBAAK,OAAM,OAAO,WAAW;AAAA,IACrC,MAAO;AAMP,QAAI,IAAI,QAAQ;AACd,YAAM,yBAAyB,SAAS,IAAI,QAAQ,UAAU;AAC9D,aAAO,KAAK,CAAC,CAAC,KAAK,sBAAsB,GAAG,CAAC,CAAC;AAAA,IAChD;AAEA,QAAI,IAAI,SAAS;AACf,YAAM,yBAAyB,SAAS,IAAI,SAAS,UAAU;AAC/D,aAAO,KAAK,CAAC,CAAC,KAAK,sBAAsB,GAAG,EAAE,CAAC;AAAA,IACjD;AAEA;AAAA,EACF;AACF;AAhGE;AAMA;AA2GK,SAAS,wBAKd,YACA,SACqD;AACrD,QAAM,OAAO,WAAW,CAAA;AAExB,SAAO,CACL,WACsD;AACtD,UAAM,SAAS,IAAIC,GAAAA;AAAAA,MACjB,OAAO;AAAA,MACP,IAAIC,MAAAA,uBAAA;AAAA,IAA0D;AAEhE,UAAM,WAAW,IAAI;AAAA,MACnB,OAAO,MAAM,kBAAA;AAAA,MACb,OAAO,cAAA;AAAA,MACP,OAAO;AAAA,MACP;AAAA,MACA;AAAA,IAAA;AAEF,WAAO,MAAM,YAAY,QAAQ;AACjC,WAAO,MAAM,UAAU,OAAO,cAAA,CAAe;AAC7C,WAAO;AAAA,EACT;AACF;AAMO,SAAS,aACd,OACA,OACiB;AACjB,SAAO,CAAC,OAAO,KAAK;AACtB;AAEO,SAAS,SAAY,YAAgC;AAC1D,SAAO,WAAW,CAAC;AACrB;AAEO,SAAS,SAAY,YAA8C;AACxE,SAAO,WAAW,CAAC;AACrB;AAEA,SAAS,SACP,YACA,GACiB;AACjB,SAAO,CAAC,EAAE,SAAS,UAAU,CAAC,GAAG,SAAS,UAAU,CAAC;AACvD;AAKA,SAAS,SAAY,OAA0B;AAC7C,SAAO,CAAC,OAAOC,MAAAA,wBAAwB,MAAM,KAAK,CAAC;AACrD;AAEA,SAAS,WAAc,uBAA0C;AAC/D,SAAO,sBAAsB,CAAC;AAChC;AAEA,SAAS,OAAU,uBAA4C;AAC7D,SAAO,sBAAsB,CAAC;AAChC;;;;;;"}