UNPKG

prom-utils

Version:

Promise utilities: rate limiting, queueing/batching, defer, etc.

847 lines (795 loc) 24.9 kB
import _debug from 'debug' import { clamp, shuffle, sumBy } from 'lodash' import makeError from 'make-error' import { size } from 'obj-walker' import { AddOptions, AsyncIter, Deferred, GetTimeframe, IteratorFailure, IteratorSuccess, QueueOptions, QueueOptionsParallel, QueueResult, QueueResultParallel, RateLimiter, SlidingWindow, ThroughputLimiterOptions, WaitOptions, } from './types' const debugRL = _debug('prom-utils:rateLimit') const debugTL = _debug('prom-utils:throughputLimiter') const debugBQ = _debug('prom-utils:batchQueue') const debugBQP = _debug('prom-utils:batchQueueParallel') const debugPM = _debug('prom-utils:pacemaker') const debugWU = _debug('prom-utils:waitUntil') const debugP = _debug('prom-utils:pausable') // Error classes export const OptionsError = makeError('OptionsError') export const TimeoutError = makeError('TimeoutError') /** * Limit the concurrency of promises. This can be used to control * how many requests are made to a server, for example. Note: * exceptions will be swallowed in order to prevent an UnhandledPromiseRejection * from being thrown in the case where the promise rejects before the limit is * reached. Therefore, you must handle exceptions on a per promise basis. * Wrapping `rateLimit` method calls in a try/catch will not work. You can * set `limit` to Infinity to disregard the limit. * * To limit the promises for a given period of time, pass one or more rate limiters. * Each rate limiter can specify `maxItemsPerPeriod` and other * throughput options. For example, the following limits the number of concurrent * requests to 5 and ensures that the rate never exceeds 75 requests per minute. * * @example * ```typescript * const limiter = rateLimit(5, { maxItemsPerPeriod: 75, period: ms('1m') }) * for (const url of urls) { * // Will wait for one promise to finish if limit is reached * await limiter.add(fetch(url)) * } * // Wait for unresolved promises to resolve * await limiter.finish() * ``` */ export const rateLimit = <T = unknown>( concurrency: number, ...limiters: RateLimiter[] ) => { debugRL('concurrency: %d', concurrency) // Set of promises const set = new Set<Promise<T>>() // Create throughput limiters for each rate limiter configuration const throughputLimiters = limiters.map((options) => { const { maxItemsPerPeriod, period = 1000 } = options debugRL('limiter: %o', { maxItemsPerPeriod, period }) return throughputLimiter(maxItemsPerPeriod, { // Allow for high throughput at the start of the period getTimeframe: getTimeframeUsingPeriod, // Expire items after the period expireAfter: period, period, // Ensure that the sliding window accurately captures all items for the period maxWindowLength: maxItemsPerPeriod, ...options, }) }) /** * Add a promise. Waits for one promise to resolve if limit is met or for * throughput to drop below threshold if any rate limiters are configured. * Optionally, set `bypass` to true to bypass async waiting. */ const add = async (prom: Promise<T>, options: AddOptions = {}) => { // Add to set set.add(prom) debugRL('add called. set size: %d', set.size) // Create a child promise // See: https://runkit.com/dubiousdavid/handling-promise-rejections prom.then( () => { debugRL('resolved') // Remove from the set set.delete(prom) }, // Handle the exception so we don't throw an UnhandledPromiseRejection exception () => { debugRL('rejected') // Remove from the set set.delete(prom) } ) // Bypass async waiting if (options.bypass) { return } // Apply all throughput limiters if (throughputLimiters.length > 0) { // Wait for all throughput limiters to drop below their thresholds await Promise.all( throughputLimiters.map((limiter) => limiter.appendAndThrottle(1)) ) } // Max concurrency was reached if (set.size === concurrency) { debugRL('limit reached: %d', concurrency) // Wait for one item to finish await Promise.race(set) } } /** * Get stats for all rate limiters */ const getStats = () => ({ itemsPerPeriod: throughputLimiters.map((limiter) => limiter.getCurrentRate() ), }) /** * Wait for all promises to resolve */ const finish = async () => { debugRL('finish') await Promise.allSettled(set) } return { add, finish, /** Number of pending promises. */ get length() { return set.size }, getStats, } } /** * Return the elapsed time since the first entry in the sliding window. * This evenly distributes the rate over the period. */ export const getTimeframeUsingElapsed: GetTimeframe = (slidingWindow) => { const { timestamp } = slidingWindow[0] return new Date().getTime() - timestamp } /** * Return the elapsed time since the first entry in the sliding window or the period, * whichever is greater. This allows for high throughput at the start of the period. */ export const getTimeframeUsingPeriod: GetTimeframe = ( slidingWindow, { period } ) => { const { timestamp } = slidingWindow[0] const elapsedSinceStartOfWindow = new Date().getTime() - timestamp return Math.max(period, elapsedSinceStartOfWindow) } const getTLDefaults = ( maxUnitsPerPeriod: number, options: ThroughputLimiterOptions ) => { const _options = { period: 1000, minWindowLength: 1, expireAfter: Infinity, getTimeframe: getTimeframeUsingElapsed, ...options, } const minWindowLength = _options.minWindowLength const _maxWindowLength = options.maxWindowLength || 3 // Ensure that maxWindowLength is at least minWindowLength const maxWindowLength = _maxWindowLength < minWindowLength ? minWindowLength : _maxWindowLength return { ..._options, maxWindowLength, // Ensure the sleep time is granular enough but between 1 and 500 ms sleepTime: clamp(_options.period / maxUnitsPerPeriod, 1, 500), } } /** * * Limit throughput by sleeping until the rate (units/period) * is less than `maxUnitsPerPeriod`. Units and period are * intentionally abstract since it could represent requests/min or bytes/sec, * for example. * * @example * ```typescript * // Limit to at most 1000 items/sec * const limiter = throughputLimiter(1000) * * for(const batch of batches) { * // Will wait until the rate is < `maxUnitsPerPeriod` * await limiter.throttleAndAppend(batch.length) * console.log('Items/sec %d', limiter.getCurrentRate()) * } * ``` */ export const throughputLimiter = ( maxUnitsPerPeriod: number, options: ThroughputLimiterOptions = {} ) => { const slidingWindow: SlidingWindow = [] const optionsWithDefaults = getTLDefaults(maxUnitsPerPeriod, options) const { period, minWindowLength, maxWindowLength, sleepTime, expireAfter, getTimeframe, } = optionsWithDefaults debugTL('init - maxUnitsPerPeriod %d', maxUnitsPerPeriod) debugTL('init - period %d ms', period) debugTL('init - minWindowLength %d', minWindowLength) debugTL('init - maxWindowLength %d', maxWindowLength) debugTL('init - sleepTime %d ms', sleepTime) debugTL('init - expireAfter %d ms', expireAfter) if (maxWindowLength === Infinity && expireAfter === Infinity) { throw new OptionsError( 'maxWindowLength and expireAfter cannot both be Infinity' ) } /** * Remove expired invocations from the sliding window. */ const cleanupExpired = () => { debugTL('cleanupExpired called') // Remove expired invocations if (expireAfter !== Infinity) { // Get the current time const now = new Date().getTime() // Remove invocations that are older than expireAfter while (now - slidingWindow[0]?.timestamp > expireAfter) { const shifted = slidingWindow.shift() debugTL('removed expired: %o', shifted) } } } /** * Get the current rate (units/period). The rate is determined by averaging the * values in the sliding window where the elapsed time is determined by * comparing the first entry in the window to the current time. * * @returns The current rate (units/period). Rate will be zero if the window * length is less than `minWindowLength`. */ const getCurrentRate = () => { debugTL('getCurrentRate called') // Remove expired invocations cleanupExpired() // Calculate the rate if (slidingWindow.length >= minWindowLength) { const numUnits = sumBy(slidingWindow, 'numUnits') const timeframe = getTimeframe(slidingWindow, optionsWithDefaults) debugTL('total units %d', numUnits) debugTL('timeframe %d ms', timeframe) const rate = numUnits / (timeframe / period) debugTL('current rate %d units/period', rate) return rate } debugTL('current rate 0') return 0 } /** * Call before processing a batch of units. After the first call, a subsequent * call assumes that the `numUnits` from the previous call were processed. A * call to `throttle` may sleep for a given period of time depending on * `maxUnitsPerPeriod` and the total number of units over the current window. */ const throttle = async () => { debugTL('throttle called') // Skip check if maxUnitsPerPeriod is Infinity if (maxUnitsPerPeriod === Infinity) { debugTL('exiting throttle - maxUnitsPerPeriod is Infinity') return } let throttleTime = 0 // Check the rate, sleep, and repeat until the rate is less than // maxUnitsPerPeriod while (getCurrentRate() >= maxUnitsPerPeriod) { debugTL('sleeping for %d ms', sleepTime) await sleep(sleepTime) throttleTime += sleepTime } debugTL('throttled for %d ms', throttleTime) } /** * Append the number of units to the sliding window. Throttle * must be called separately to ensure that the rate stays below * `maxUnitsPerPeriod`. */ const append = (numUnits: number) => { debugTL('append called with %d unit(s)', numUnits) // Get the current time const now = new Date().getTime() // Add the current invocation to the sliding window slidingWindow.push({ timestamp: now, numUnits }) // Truncate the sliding window according to the window length if (slidingWindow.length > maxWindowLength) { const shifted = slidingWindow.shift() debugTL('removed due to length: %o', shifted) } debugTL('slidingWindow: %o', slidingWindow) } /** * This method is a combination of `throttle` and `append`. It will throttle * first and then append the number of units to the sliding window. */ const throttleAndAppend = async (numUnits: number) => { await throttle() append(numUnits) } /** * This method is a combination of `append` and `throttle`. It will append * the number of units to the sliding window and then throttle. */ const appendAndThrottle = async (numUnits: number) => { append(numUnits) await throttle() } return { getCurrentRate, throttle, append, throttleAndAppend, appendAndThrottle, } } /** * Batch calls via a local queue. This can be used to batch values before * writing to a database, for example. * * Calls `fn` when either `batchSize`, `batchBytes`, or `timeout` is reached. * `batchSize` defaults to 500 and therefore will always be in effect if * no options are provided. You can pass `Infinity` to disregard `batchSize`. * If `timeout` is passed, the timer will be started when the first item is * enqueued and reset when `flush` is called explicitly or implicitly. * * Use `maxItemsPerSec` and/or `maxBytesPerSec` to limit throughput. * Call `queue.getStats()` to get the items/sec and bytes/sec rates. * * Call `queue.flush()` to flush explicitly. * * The last result of calling `fn` can be obtained by referencing `lastResult` * on the returned object. * * The cause of the last automatic queue flush can be obtained by referencing * `lastFlush` on the returned object. * * ```typescript * const writeToDatabase = async (records) => {...} * * const queue = batchQueue(writeToDatabase) * for (const record of records) { * // Will call `fn` when a threshold is met * await queue.enqueue(record) * } * // Call `fn` with remaining queued items * await queue.flush() * ``` */ export function batchQueue<A, B>( fn: (arr: A[]) => B, options: QueueOptions = {} ) { const { batchSize = 500, batchBytes, timeout, maxItemsPerSec = Infinity, maxBytesPerSec = Infinity, } = options debugBQ('options %o', options) let queue: A[] = [] let timeoutId: ReturnType<typeof setTimeout> let prom: Promise<unknown> let bytes = 0 // Limiters const itemsLimiter = throughputLimiter(maxItemsPerSec) const bytesLimiter = throughputLimiter(maxBytesPerSec) /** * Call fn on queue and clear the queue. A delay may occur before fn is * called if `maxItemsPerSec` or `maxBytesPerSec` are set and one of the * rates is above the given threshold. */ const flush = async () => { debugBQ('flush called - queue length %d', queue.length) // Clear the timeout clearTimeout(timeoutId) debugBQ('clearTimeout called') // Wait for a timeout initiated flush to complete await prom // Queue is not empty if (queue.length) { // Wait for the throughput to drop below thresholds for items/sec // and bytes/sec limiters. await Promise.all([itemsLimiter.throttle(), bytesLimiter.throttle()]) // Call fn with queue const result = await fn(queue) debugBQ('fn called') // Append the number of items and bytes to the limiters itemsLimiter.append(queue.length) bytesLimiter.append(bytes) // Set the last result obj.lastResult = result // Reset the queue queue = [] // Reset the size bytes = 0 debugBQ('queue reset') } } /** * Enqueue an item. If the batch size is reached wait * for queue to be flushed. */ const enqueue = async (item: A) => { debugBQ('enqueue called') // Wait for a timeout initiated flush to complete await prom // Start a timer if timeout is set and the queue is empty if (timeout && queue.length === 0) { timeoutId = setTimeout(() => { debugBQ('setTimeout cb') obj.lastFlush = { timeout } prom = flush() }, timeout) debugBQ('setTimeout called') } // Add item to queue queue.push(item) // Calculate total bytes if a bytes-related option is set if (batchBytes || maxBytesPerSec < Infinity) { bytes += size(item) debugBQ('bytes %d', bytes) } // Batch size reached if (queue.length === batchSize) { debugBQ('batchSize reached %d', queue.length) obj.lastFlush = { batchSize } // Wait for queue to be flushed await flush() } // Batch bytes reached else if (batchBytes && bytes >= batchBytes) { debugBQ('batchBytes reached %d', bytes) obj.lastFlush = { batchBytes } // Wait for queue to be flushed await flush() } } /** * Get stats for the two limiters. These will be zero if the * corresponding option is not enabled. * @returns The current items/sec and bytes/sec values. */ const getStats = () => ({ itemsPerSec: itemsLimiter.getCurrentRate(), bytesPerSec: bytesLimiter.getCurrentRate(), }) const obj: QueueResult<A, B> = { flush, enqueue, getStats, get length() { return queue.length }, } return obj } /** * Batch calls via a local queue. This can be used to batch values before * writing to a database, for example. Unlike `batchQueue`, this is safe to * be called concurrently. In particular, you can pair `rateLimit` with this. * * Calls `fn` when either `batchSize` or `batchBytes` is reached. * `batchSize` defaults to 500 and therefore will always be in effect if * no options are provided. You can pass `Infinity` to disregard `batchSize`. * * Call `queue.flush()` to flush explicitly. */ export function batchQueueParallel<A, B>( fn: (arr: A[]) => B, options: QueueOptionsParallel = {} ) { const { batchSize = 500, batchBytes } = options debugBQP('options %o', options) let queue: A[] = [] let bytes = 0 const results: B[] = [] /** * Call fn on queue and clear the queue */ const flush = () => { debugBQP('flush called - queue length %d', queue.length) // Queue is not empty if (queue.length) { // Call fn with queue results.push(fn(queue)) debugBQP('fn called') // Reset the queue queue = [] // Reset the size bytes = 0 debugBQP('queue reset') } } /** * Enqueue an item. If a threshold is reached flush queue immediately. */ const enqueue = (item: A) => { debugBQP('enqueue called') // Add item to queue queue.push(item) // Calculate total bytes if a bytes-related option is set if (batchBytes) { bytes += size(item) debugBQP('bytes %d', bytes) } // Batch size reached if (queue.length === batchSize) { debugBQP('batchSize reached %d', queue.length) obj.lastFlush = { batchSize } // Flush queue flush() } // Batch bytes reached else if (batchBytes && bytes >= batchBytes) { debugBQP('batchBytes reached %d', bytes) obj.lastFlush = { batchBytes } // Flush queue flush() } } const obj: QueueResultParallel<A, B> = { flush, enqueue, results, get length() { return queue.length }, } return obj } /** * Defer resolving a promise until `done` is called. */ export function defer(): Deferred { let done = () => {} const promise = new Promise<void>((resolve) => { // Swap original done fn with promise resolve fn done = () => resolve() }) return { done, promise, } } /** * Pause a loop by awaiting `maybeBlock`. When `pause` is called `maybeBlock` will * return a promise that is resolved when `resume` is called. Otherwise, * `maybeBlock` will return immediately. If `timeout` is passed, `resume` will * be called after `timeout` if it is not manually called first. * * ```typescript * const shouldProcess = pausable() * * onSomeCondition(shouldProcess.pause) * onSomeOtherCondition(shouldProcess.resume) * * for (const record of records) { * await shouldProcess.maybeBlock() * await processRecord(record) * } * ``` */ export const pausable = (timeout?: number) => { let deferred: Deferred | undefined let timeoutId: ReturnType<typeof setTimeout> let isPaused = false /** * Change the state to pause. If timeout is passed, that will change * the state to resume for each call to pause after the specified timeout. */ const pause = () => { debugP('pause called') deferred = defer() if (timeout) { timeoutId = setTimeout(() => { debugP('timeout') resume() }, timeout) debugP('setTimeout called') } isPaused = true } /** * Change the state to resume. */ const resume = () => { debugP('resume called') if (timeout) { clearTimeout(timeoutId) debugP('timeout cleared') } deferred?.done() isPaused = false } /** * Should be awaited in a loop. Will block when in a pause state. */ const maybeBlock = () => deferred?.promise return { pause, resume, maybeBlock, get isPaused() { return isPaused }, } } /** * Call heartbeatFn every interval until promise resolves or rejects. * `interval` defaults to 1000. * @returns The value of the resolved promise. */ export const pacemaker = async <T>( heartbeatFn: () => void, promise: Promise<T>, interval = 1000 ) => { const intervalId = setInterval(heartbeatFn, interval) try { return await promise } finally { clearInterval(intervalId) debugPM('interval cleared') } } /** * Wait until the predicate returns a truthy value or the timeout expires. * Will not hang like other implementations found on NPM. * Inspired by https://www.npmjs.com/package/async-wait-until * @returns A promise that resolves with the result of the predicate. * If the timeout expires, the promise will reject with a TimeoutError. * * @example * ```typescript * // Wait until a value is returned from Redis * const result = await waitUntil(() => redis.get('someKey'), { timeout: 5000 }) * ``` */ export const waitUntil = <T>( pred: () => Promise<T> | T, options: WaitOptions = {} ) => new Promise<NonNullable<T>>((resolve, reject) => { const checkFrequency = options.checkFrequency || 50 const timeout = options.timeout || 5000 let checkTimer: ReturnType<typeof setTimeout> let timeoutTimer: ReturnType<typeof setTimeout> // Start timeout timer if `timeout` is not set to Infinity if (timeout !== Infinity) { timeoutTimer = setTimeout(() => { debugWU('timeout') clearTimeout(checkTimer) reject(new TimeoutError(`Did not complete in ${timeout} ms`)) }, timeout) } /** * Check the predicate for truthiness. */ const check = async () => { debugWU('check called') try { const result = await pred() if (result) { debugWU('pred returned truthy') clearTimeout(checkTimer) clearTimeout(timeoutTimer) resolve(result) } else { checkLater() } } catch (e) { reject(e) } } /** * Check the predicate after `checkFrequency`. */ const checkLater = () => { debugWU('checkLater called') checkTimer = setTimeout(check, checkFrequency) } check() }) /** * Merges multiple async iterators into a single async iterator. The merged * iterator will yield values as they become available from the input iterators. * The order in which the iterators are checked is randomized to prevent * consistently favoring the first iterator when multiple values are available * simultaneously. If any of the input iterators throws an error, the merged * iterator will throw an error. The merged iterator will terminate when all of * the input iterators have terminated. * * @param iters - the async iterators to merge */ export const multiplex = async function* <T>( ...iters: Array<AsyncIter<T>> ): AsyncIterableIterator<T> { // Convert to Async Iterators const iterators = iters.map((iter) => Symbol.asyncIterator in iter ? (iter as AsyncIterable<T>)[Symbol.asyncIterator]() : iter ) // Call next on all iterators in random order to avoid favoring the first iterator const pending = new Map( iterators.map((iterator) => [ iterator, iterator.next().then( (res): IteratorSuccess<T> => ({ res, iterator }), (err): IteratorFailure<T> => ({ err, iterator }) ), ]) ) try { while (pending.size > 0) { // Randomize the order of the iterators const values = shuffle([...pending.values()]) // Wait for the first iterator to resolve const result = await Promise.race(values) const iterator = result.iterator // If it errored, throw the error if ('err' in result) { pending.delete(iterator) throw result.err } // If it's done, remove it from the pending list if (result.res.done) { pending.delete(iterator) } // Otherwise, yield the value and add it back to the pending list else { yield result.res.value pending.set( iterator, iterator.next().then( (res): IteratorSuccess<T> => ({ res, iterator }), (err): IteratorFailure<T> => ({ err, iterator }) ) ) } } } finally { // If we exit the loop, make sure to clean up any remaining iterators // by calling `return`. await Promise.all( [...pending.keys()].map((iterator) => iterator.return?.()) ) } } /** * Sleep for `time` ms before resolving the Promise. */ export const sleep = (time = 0) => new Promise((resolve) => setTimeout(resolve, time)) export const TIMEOUT = Symbol('TIMEOUT') /** * Returns the value of the promise if the promise resolves prior to timeout. * If the timeout happens first, the exported TIMEOUT symbol is returned. * * @example * ```ts * const winner = await raceTimeout(someProm, 5) * if (winner === TIMEOUT) { * // Do something * } * ``` */ export const raceTimeout = <A>(prom: Promise<A>, timeout: number) => Promise.race<A | typeof TIMEOUT>([ prom, new Promise((resolve) => setTimeout(() => resolve(TIMEOUT), timeout)), ])