UNPKG

@convex-dev/aggregate

Version:

Convex component to calculate counts and sums of values for efficient aggregation.

491 lines 18.8 kB
import { positionToKey, boundToPosition, keyToPosition, boundsToPositions, } from "./positions.js"; /** * Write data to be aggregated, and read aggregated data. * * The data structure is effectively a key-value store sorted by key, where the * value is an ID and an optional sumValue. * 1. The key can be any Convex value (number, string, array, etc.). * 2. The ID is a string which should be unique. * 3. The sumValue is a number which is aggregated by summing. If not provided, * it's assumed to be zero. * * Once values have been added to the data structure, you can query for the * count and sum of items between a range of keys. */ export class Aggregate { component; constructor(component) { this.component = component; } /// Aggregate queries. /** * Counts items between the given bounds. */ async count(ctx, ...opts) { const { count } = await ctx.runQuery(this.component.btree.aggregateBetween, { ...boundsToPositions(opts[0]?.bounds), namespace: namespaceFromOpts(opts), }); return count; } /** * Batch version of count() - counts items for multiple bounds in a single call. */ async countBatch(ctx, queries) { const queryArgs = queries.map((query) => { if (!query) { throw new Error("You must pass bounds and/or namespace"); } const namespace = namespaceFromArg(query); const { k1, k2 } = boundsToPositions(query.bounds); return { k1, k2, namespace }; }); const results = await ctx.runQuery(this.component.btree.aggregateBetweenBatch, { queries: queryArgs, }); return results.map((result) => result.count); } /** * Adds up the sumValue of items between the given bounds. */ async sum(ctx, ...opts) { const { sum } = await ctx.runQuery(this.component.btree.aggregateBetween, { ...boundsToPositions(opts[0]?.bounds), namespace: namespaceFromOpts(opts), }); return sum; } /** * Batch version of sum() - sums items for multiple bounds in a single call. */ async sumBatch(ctx, queries) { const queryArgs = queries.map((query) => { if (!query) { throw new Error("You must pass bounds and/or namespace"); } const namespace = namespaceFromArg(query); const { k1, k2 } = boundsToPositions(query.bounds); return { k1, k2, namespace }; }); const results = await ctx.runQuery(this.component.btree.aggregateBetweenBatch, { queries: queryArgs, }); return results.map((result) => result.sum); } /** * Returns the item at the given offset/index/rank in the order of key, * within the bounds. Zero-indexed, so at(0) is the smallest key within the * bounds. * * If offset is negative, it counts from the end of the list, so at(-1) is the * item with the largest key within the bounds. */ async at(ctx, offset, ...opts) { if (offset < 0) { const item = await ctx.runQuery(this.component.btree.atNegativeOffset, { offset: -offset - 1, namespace: namespaceFromOpts(opts), ...boundsToPositions(opts[0]?.bounds), }); return btreeItemToAggregateItem(item); } const item = await ctx.runQuery(this.component.btree.atOffset, { offset, namespace: namespaceFromOpts(opts), ...boundsToPositions(opts[0]?.bounds), }); return btreeItemToAggregateItem(item); } /** * Batch version of at() - returns items at multiple offsets in a single call. */ async atBatch(ctx, queries) { const queryArgs = queries.map((q) => ({ offset: q.offset, ...boundsToPositions(q.bounds), namespace: namespaceFromArg(q), })); const results = await ctx.runQuery(this.component.btree.atOffsetBatch, { queries: queryArgs, }); return results.map((btreeItemToAggregateItem)); } /** * Returns the rank/offset/index of the given key, within the bounds. * Specifically, it returns the index of the first item with * * - key >= the given key if `order` is "asc" (default) * - key <= the given key if `order` is "desc" */ async indexOf(ctx, key, ...opts) { const { k1, k2 } = boundsToPositions(opts[0]?.bounds); if (opts[0]?.order === "desc") { return await ctx.runQuery(this.component.btree.offsetUntil, { key: boundToPosition("upper", { key, id: opts[0]?.id, inclusive: true, }), k2, namespace: namespaceFromOpts(opts), }); } return await ctx.runQuery(this.component.btree.offset, { key: boundToPosition("lower", { key, id: opts[0]?.id, inclusive: true }), k1, namespace: namespaceFromOpts(opts), }); } /** * @deprecated Use `indexOf` instead. */ async offsetOf(ctx, key, namespace, id, bounds) { return this.indexOf(ctx, key, { id, bounds, order: "asc", namespace }); } /** * @deprecated Use `indexOf` instead. */ async offsetUntil(ctx, key, namespace, id, bounds) { return this.indexOf(ctx, key, { id, bounds, order: "desc", namespace }); } /** * Gets the minimum item within the given bounds. */ async min(ctx, ...opts) { const { page } = await this.paginate(ctx, { namespace: namespaceFromOpts(opts), bounds: opts[0]?.bounds, order: "asc", pageSize: 1, }); return page[0] ?? null; } /** * Gets the maximum item within the given bounds. */ async max(ctx, ...opts) { const { page } = await this.paginate(ctx, { namespace: namespaceFromOpts(opts), bounds: opts[0]?.bounds, order: "desc", pageSize: 1, }); return page[0] ?? null; } /** * Gets a uniformly random item within the given bounds. */ async random(ctx, ...opts) { const count = await this.count(ctx, ...opts); if (count === 0) { return null; } const index = Math.floor(Math.random() * count); return await this.at(ctx, index, ...opts); } /** * Get a page of items between the given bounds, with a cursor to paginate. * Use `iter` to iterate over all items within the bounds. */ async paginate(ctx, ...opts) { const order = opts[0]?.order ?? "asc"; const pageSize = opts[0]?.pageSize ?? 100; const { page, cursor: newCursor, isDone, } = await ctx.runQuery(this.component.btree.paginate, { namespace: namespaceFromOpts(opts), ...boundsToPositions(opts[0]?.bounds), cursor: opts[0]?.cursor, order, limit: pageSize, }); return { page: page.map((btreeItemToAggregateItem)), cursor: newCursor, isDone, }; } /** * Example usage: * ```ts * for await (const item of aggregate.iter(ctx, bounds)) { * console.log(item); * } * ``` */ async *iter(ctx, ...opts) { const order = opts[0]?.order ?? "asc"; const pageSize = opts[0]?.pageSize ?? 100; const bounds = opts[0]?.bounds; const namespace = namespaceFromOpts(opts); let isDone = false; let cursor = undefined; while (!isDone) { const { page, cursor: newCursor, isDone: newIsDone, } = await this.paginate(ctx, { namespace, bounds, cursor, order, pageSize, }); for (const item of page) { yield item; } isDone = newIsDone; cursor = newCursor; } } /** Write operations. See {@link DirectAggregate} for docstrings. */ async _insert(ctx, namespace, key, id, summand) { await ctx.runMutation(this.component.public.insert, { key: keyToPosition(key, id), summand, value: id, namespace, }); } async _delete(ctx, namespace, key, id) { await ctx.runMutation(this.component.public.delete_, { key: keyToPosition(key, id), namespace, }); } async _replace(ctx, currentNamespace, currentKey, newNamespace, newKey, id, summand) { await ctx.runMutation(this.component.public.replace, { currentKey: keyToPosition(currentKey, id), newKey: keyToPosition(newKey, id), summand, value: id, namespace: currentNamespace, newNamespace, }); } async _insertIfDoesNotExist(ctx, namespace, key, id, summand) { await this._replaceOrInsert(ctx, namespace, key, namespace, key, id, summand); } async _deleteIfExists(ctx, namespace, key, id) { await ctx.runMutation(this.component.public.deleteIfExists, { key: keyToPosition(key, id), namespace, }); } async _replaceOrInsert(ctx, currentNamespace, currentKey, newNamespace, newKey, id, summand) { await ctx.runMutation(this.component.public.replaceOrInsert, { currentKey: keyToPosition(currentKey, id), newKey: keyToPosition(newKey, id), summand, value: id, namespace: currentNamespace, newNamespace, }); } /// Initialization and maintenance. /** * (re-)initialize the data structure, removing all items if it exists. * * Change the maxNodeSize if provided, otherwise keep it the same. * maxNodeSize is how you tune the data structure's width and depth. * Larger values can reduce write contention but increase read latency. * Default is 16. * Set rootLazy = false to eagerly compute aggregates on the root node, which * improves aggregation latency at the expense of making all writes contend * with each other, so it's only recommended for read-heavy workloads. * Default is true. */ async clear(ctx, ...opts) { await ctx.runMutation(this.component.public.clear, { maxNodeSize: opts[0]?.maxNodeSize, rootLazy: opts[0]?.rootLazy, namespace: namespaceFromOpts(opts), }); } /** * If rootLazy is false (the default is true but it can be set to false by * `clear`), the aggregates data structure writes to a single root node on * every insert/delete/replace, which can cause contention. * * If your data structure has frequent writes, you can reduce contention by * calling makeRootLazy, which removes the frequent writes to the root node. * With a lazy root node, updates will only contend with other updates to the * same shard of the tree. The number of shards is determined by maxNodeSize, * so larger maxNodeSize can also help. */ async makeRootLazy(ctx, namespace) { await ctx.runMutation(this.component.public.makeRootLazy, { namespace }); } async paginateNamespaces(ctx, cursor, pageSize = 100) { const { page, cursor: newCursor, isDone, } = await ctx.runQuery(this.component.btree.paginateNamespaces, { cursor, limit: pageSize, }); return { page: page, cursor: newCursor, isDone, }; } async *iterNamespaces(ctx, pageSize = 100) { let isDone = false; let cursor = undefined; while (!isDone) { const { page, cursor: newCursor, isDone: newIsDone, } = await this.paginateNamespaces(ctx, cursor, pageSize); for (const item of page) { yield item ?? undefined; } isDone = newIsDone; cursor = newCursor; } } async clearAll(ctx, opts) { for await (const namespace of this.iterNamespaces(ctx)) { await this.clear(ctx, { ...opts, namespace }); } // In case there are no namespaces, make sure we create at least one tree, // at namespace=undefined. This is where the default settings are stored. await this.clear(ctx, { ...opts, namespace: undefined }); } async makeAllRootsLazy(ctx) { for await (const namespace of this.iterNamespaces(ctx)) { await this.makeRootLazy(ctx, namespace); } } } /** * A DirectAggregate is an Aggregate where you can insert, delete, and replace * items directly, and keys and IDs can be customized. * * Contrast with TableAggregate, which follows a table with Triggers and * computes keys and sumValues from the table's documents. */ export class DirectAggregate extends Aggregate { /** * Insert a new key into the data structure. * The id should be unique. * If not provided, the sumValue is assumed to be zero. * If the tree does not exist yet, it will be initialized with the default * maxNodeSize and lazyRoot=true. * If the [key, id] pair already exists, this will throw. */ async insert(ctx, args) { await this._insert(ctx, namespaceFromArg(args), args.key, args.id, args.sumValue); } /** * Delete the key with the given ID from the data structure. * Throws if the given key and ID do not exist. */ async delete(ctx, args) { await this._delete(ctx, namespaceFromArg(args), args.key, args.id); } /** * Update an existing item in the data structure. * This is effectively a delete followed by an insert, but it's performed * atomically so it's impossible to view the data structure with the key missing. */ async replace(ctx, currentItem, newItem) { await this._replace(ctx, namespaceFromArg(currentItem), currentItem.key, namespaceFromArg(newItem), newItem.key, currentItem.id, newItem.sumValue); } /** * Equivalents to `insert`, `delete`, and `replace` where the item may or may not exist. * This can be useful for live backfills: * 1. Update live writes to use these methods to write into the new Aggregate. * 2. Run a background backfill, paginating over existing data, calling `insertIfDoesNotExist` on each item. * 3. Once the backfill is complete, use `insert`, `delete`, and `replace` for live writes. * 4. Begin using the Aggregate read methods. */ async insertIfDoesNotExist(ctx, args) { await this._insertIfDoesNotExist(ctx, namespaceFromArg(args), args.key, args.id, args.sumValue); } async deleteIfExists(ctx, args) { await this._deleteIfExists(ctx, namespaceFromArg(args), args.key, args.id); } async replaceOrInsert(ctx, currentItem, newItem) { await this._replaceOrInsert(ctx, namespaceFromArg(currentItem), currentItem.key, namespaceFromArg(newItem), newItem.key, currentItem.id, newItem.sumValue); } } export class TableAggregate extends Aggregate { options; constructor(component, options) { super(component); this.options = options; } async insert(ctx, doc) { await this._insert(ctx, this.options.namespace?.(doc), this.options.sortKey(doc), doc._id, this.options.sumValue?.(doc)); } async delete(ctx, doc) { await this._delete(ctx, this.options.namespace?.(doc), this.options.sortKey(doc), doc._id); } async replace(ctx, oldDoc, newDoc) { await this._replace(ctx, this.options.namespace?.(oldDoc), this.options.sortKey(oldDoc), this.options.namespace?.(newDoc), this.options.sortKey(newDoc), newDoc._id, this.options.sumValue?.(newDoc)); } async insertIfDoesNotExist(ctx, doc) { await this._insertIfDoesNotExist(ctx, this.options.namespace?.(doc), this.options.sortKey(doc), doc._id, this.options.sumValue?.(doc)); } async deleteIfExists(ctx, doc) { await this._deleteIfExists(ctx, this.options.namespace?.(doc), this.options.sortKey(doc), doc._id); } async replaceOrInsert(ctx, oldDoc, newDoc) { await this._replaceOrInsert(ctx, this.options.namespace?.(oldDoc), this.options.sortKey(oldDoc), this.options.namespace?.(newDoc), this.options.sortKey(newDoc), newDoc._id, this.options.sumValue?.(newDoc)); } /** * Returns the rank/offset/index of the given document, within the bounds. * This differs from `indexOf` in that it take the document rather than key. * Specifically, it returns the index of the first item with * * - key >= the given doc's key if `order` is "asc" (default) * - key <= the given doc's key if `order` is "desc" */ async indexOfDoc(ctx, doc, opts) { const key = this.options.sortKey(doc); return this.indexOf(ctx, key, { namespace: this.options.namespace?.(doc), ...opts, }); } trigger() { return async (ctx, change) => { if (change.operation === "insert") { await this.insert(ctx, change.newDoc); } else if (change.operation === "update") { await this.replace(ctx, change.oldDoc, change.newDoc); } else if (change.operation === "delete") { await this.delete(ctx, change.oldDoc); } }; } idempotentTrigger() { return async (ctx, change) => { if (change.operation === "insert") { await this.insertIfDoesNotExist(ctx, change.newDoc); } else if (change.operation === "update") { await this.replaceOrInsert(ctx, change.oldDoc, change.newDoc); } else if (change.operation === "delete") { await this.deleteIfExists(ctx, change.oldDoc); } }; } } export function btreeItemToAggregateItem({ k, s, }) { const { key, id } = positionToKey(k); return { key: key, id: id, sumValue: s, }; } function namespaceFromArg(args) { if ("namespace" in args) { return args["namespace"]; } return undefined; } function namespaceFromOpts(opts) { if (opts.length === 0) { // Only possible if Namespace extends undefined, so undefined is the only valid namespace. return undefined; } const [{ namespace }] = opts; return namespace; } //# sourceMappingURL=index.js.map