UNPKG

streaming-iterables

Version:

A collection of utilities for async iterables. Designed to replace your streams.

1,099 lines (1,058 loc) 35.3 kB
(function (global, factory) { typeof exports === 'object' && typeof module !== 'undefined' ? factory(exports) : typeof define === 'function' && define.amd ? define(['exports'], factory) : (global = typeof globalThis !== 'undefined' ? globalThis : global || self, factory(global.streamingIterables = {})); })(this, (function (exports) { 'use strict'; async function* _batch(size, iterable) { let dataBatch = []; for await (const data of iterable) { dataBatch.push(data); if (dataBatch.length === size) { yield dataBatch; dataBatch = []; } } if (dataBatch.length > 0) { yield dataBatch; } } function* _syncBatch(size, iterable) { let dataBatch = []; for (const data of iterable) { dataBatch.push(data); if (dataBatch.length === size) { yield dataBatch; dataBatch = []; } } if (dataBatch.length > 0) { yield dataBatch; } } function batch(size, iterable) { if (iterable === undefined) { return curriedIterable => batch(size, curriedIterable); } if (iterable[Symbol.asyncIterator]) { return _batch(size, iterable); } return _syncBatch(size, iterable); } const TIMEOUT = Symbol('TIMEOUT'); const createTimer = (duration) => { let timeoutId; return [ new Promise(resolve => { timeoutId = setTimeout(() => resolve(TIMEOUT), duration); }), () => { clearTimeout(timeoutId); }, ]; }; // Like `batch` but flushes early if the `timeout` is reached // NOTE: The strategy is to only hold onto a single item for a maximum of `timeout` ms. async function* _batchWithTimeout(size, timeout, iterable) { const iterator = iterable[Symbol.asyncIterator](); let pendingData; let batchData = []; let timer; let clearTimer; const startTimer = () => { deleteTimer(); [timer, clearTimer] = createTimer(timeout); }; const deleteTimer = () => { if (clearTimer) { clearTimer(); } timer = undefined; }; pendingData = iterator.next(); while (true) { const res = await (timer ? Promise.race([pendingData, timer]) : pendingData); if (res === TIMEOUT || res.done) { // Flush early (before we reach the batch size) if (batchData.length) { yield batchData; batchData = []; } deleteTimer(); // And exit appropriately if (res !== TIMEOUT) { // done break; } continue; } // Fetch next item early doors (before we potentially yield) pendingData = iterator.next(); // Then handle the value batchData.push(res.value); if (batchData.length === 1) { // Start timer once we have at least 1 item ready to go startTimer(); } if (batchData.length === size) { yield batchData; batchData = []; deleteTimer(); continue; } } } function batchWithTimeout(size, timeout, iterable) { if (iterable === undefined) { return curriedIterable => batchWithTimeout(size, timeout, curriedIterable); } if (iterable[Symbol.asyncIterator] && timeout !== Infinity) { return _batchWithTimeout(size, timeout, iterable); } // For sync iterables or an infinite timeout, the timeout is irrelevant so just fallback to regular `batch`. return batch(size, iterable); } function getIterator(iterable) { if (typeof iterable.next === 'function') { return iterable; } if (typeof iterable[Symbol.iterator] === 'function') { return iterable[Symbol.iterator](); } if (typeof iterable[Symbol.asyncIterator] === 'function') { return iterable[Symbol.asyncIterator](); } throw new TypeError('"values" does not to conform to any of the iterator or iterable protocols'); } function defer() { let reject; let resolve; const promise = new Promise((resolveFunc, rejectFunc) => { resolve = resolveFunc; reject = rejectFunc; }); return { promise, reject, resolve, }; } function _buffer(size, iterable) { const iterator = getIterator(iterable); const resultQueue = []; const readQueue = []; let reading = false; let ended = false; function fulfillReadQueue() { while (readQueue.length > 0 && resultQueue.length > 0) { const readDeferred = readQueue.shift(); const { error, value } = resultQueue.shift(); if (error) { readDeferred.reject(error); } else { readDeferred.resolve({ done: false, value }); } } while (readQueue.length > 0 && ended) { const { resolve } = readQueue.shift(); resolve({ done: true, value: undefined }); } } async function fillQueue() { if (ended) { return; } if (reading) { return; } if (resultQueue.length >= size) { return; } reading = true; try { const { done, value } = await iterator.next(); if (done) { ended = true; } else { resultQueue.push({ value }); } } catch (error) { ended = true; resultQueue.push({ error }); } fulfillReadQueue(); reading = false; fillQueue(); } async function next() { if (resultQueue.length > 0) { const { error, value } = resultQueue.shift(); if (error) { throw error; } fillQueue(); return { done: false, value }; } if (ended) { return { done: true, value: undefined }; // stupid ts } const deferred = defer(); readQueue.push(deferred); fillQueue(); return deferred.promise; } const asyncIterableIterator = { next, [Symbol.asyncIterator]: () => asyncIterableIterator, }; return asyncIterableIterator; } function* syncBuffer(size, iterable) { const valueQueue = []; let e; try { for (const value of iterable) { valueQueue.push(value); if (valueQueue.length <= size) { continue; } yield valueQueue.shift(); } } catch (error) { e = error; } for (const value of valueQueue) { yield value; } if (e) { throw e; } } function buffer(size, iterable) { if (iterable === undefined) { return curriedIterable => buffer(size, curriedIterable); } if (size === 0) { return iterable; } if (iterable[Symbol.asyncIterator]) { return _buffer(size, iterable); } return syncBuffer(size, iterable); } async function _collect(iterable) { const values = []; for await (const value of iterable) { values.push(value); } return values; } function collect(iterable) { if (iterable[Symbol.asyncIterator]) { return _collect(iterable); } return Array.from(iterable); } async function* _concat(iterables) { for await (const iterable of iterables) { yield* iterable; } } function* _syncConcat(iterables) { for (const iterable of iterables) { yield* iterable; } } function concat(...iterables) { const hasAnyAsync = iterables.find(itr => itr[Symbol.asyncIterator] !== undefined); if (hasAnyAsync) { return _concat(iterables); } else { return _syncConcat(iterables); } } async function _consume(iterable) { for await (const _val of iterable) { // do nothing } } function consume(iterable) { if (iterable[Symbol.asyncIterator]) { return _consume(iterable); } for (const _val of iterable) { // do nothing } } async function* _drop(count, iterable) { let skipped = 0; for await (const val of iterable) { if (skipped < count) { skipped++; continue; } yield await val; } } function* _syncDrop(count, iterable) { let skipped = 0; for (const val of iterable) { if (skipped < count) { skipped++; continue; } yield val; } } function drop(count, iterable) { if (iterable === undefined) { return curriedIterable => drop(count, curriedIterable); } if (iterable[Symbol.asyncIterator]) { return _drop(count, iterable); } return _syncDrop(count, iterable); } async function* _filter(filterFunc, iterable) { for await (const data of iterable) { if (await filterFunc(data)) { yield data; } } } function filter(filterFunc, iterable) { if (iterable === undefined) { return (curriedIterable) => _filter(filterFunc, curriedIterable); } return _filter(filterFunc, iterable); } /** * Returns a new iterator by pulling every item out of `iterable` (and all its sub iterables) and yielding them depth-first. Checks for the iterable interfaces and iterates it if it exists. If the value is a string it is not iterated as that ends up in an infinite loop. Errors from the source `iterable` are raised immediately. *note*: Typescript doesn't have recursive types but you can nest iterables as deep as you like. ```ts import { flatten } from 'streaming-iterables' for await (const item of flatten([1, 2, [3, [4, 5], 6])) { console.log(item) } // 1 // 2 // 3 // 4 // 5 // 6 ``` */ async function* flatten(iterable) { for await (const maybeItr of iterable) { if (maybeItr && typeof maybeItr !== 'string' && (maybeItr[Symbol.iterator] || maybeItr[Symbol.asyncIterator])) { yield* flatten(maybeItr); } else { yield maybeItr; } } } async function* _map(func, iterable) { for await (const val of iterable) { yield await func(val); } } function map(func, iterable) { if (iterable === undefined) { return curriedIterable => _map(func, curriedIterable); } return _map(func, iterable); } function flatMap(func, iterable) { if (iterable === undefined) { return curriedIterable => flatMap(func, curriedIterable); } return filter(i => i !== undefined && i !== null, flatten(map(func, iterable))); } function _flatTransform(concurrency, func, iterable) { const iterator = getIterator(iterable); const resultQueue = []; const readQueue = []; let ended = false; let reading = false; let inflightCount = 0; let lastError = null; function fulfillReadQueue() { while (readQueue.length > 0 && resultQueue.length > 0) { const { resolve } = readQueue.shift(); const value = resultQueue.shift(); resolve({ done: false, value }); } while (readQueue.length > 0 && inflightCount === 0 && ended) { const { resolve, reject } = readQueue.shift(); if (lastError) { reject(lastError); lastError = null; } else { resolve({ done: true, value: undefined }); } } } async function fillQueue() { if (ended) { fulfillReadQueue(); return; } if (reading) { return; } if (inflightCount + resultQueue.length >= concurrency) { return; } reading = true; inflightCount++; try { const { done, value } = await iterator.next(); if (done) { ended = true; inflightCount--; fulfillReadQueue(); } else { mapAndQueue(value); } } catch (error) { ended = true; inflightCount--; lastError = error; fulfillReadQueue(); } reading = false; fillQueue(); } async function mapAndQueue(itrValue) { try { const value = await func(itrValue); if (value && value[Symbol.asyncIterator]) { for await (const asyncVal of value) { resultQueue.push(asyncVal); } } else { resultQueue.push(value); } } catch (error) { ended = true; lastError = error; } inflightCount--; fulfillReadQueue(); fillQueue(); } async function next() { if (resultQueue.length === 0) { const deferred = defer(); readQueue.push(deferred); fillQueue(); return deferred.promise; } const value = resultQueue.shift(); fillQueue(); return { done: false, value }; } const asyncIterableIterator = { next, [Symbol.asyncIterator]: () => asyncIterableIterator, }; return asyncIterableIterator; } function flatTransform(concurrency, func, iterable) { if (func === undefined) { return (curriedFunc, curriedIterable) => curriedIterable ? flatTransform(concurrency, curriedFunc, curriedIterable) : flatTransform(concurrency, curriedFunc); } if (iterable === undefined) { return (curriedIterable) => flatTransform(concurrency, func, curriedIterable); } return filter(i => i !== undefined && i !== null, flatten(_flatTransform(concurrency, func, iterable))); } async function onceReadable(stream) { return new Promise(resolve => { stream.once('readable', () => { resolve(); }); }); } async function* _fromStream(stream) { while (true) { const data = stream.read(); if (data !== null) { yield data; continue; } if (stream._readableState.ended) { break; } await onceReadable(stream); } } /** * Wraps the stream in an async iterator or returns the stream if it already is an async iterator. *note*: Since Node 10, streams already async iterators. This function may be used to ensure compatibility with older versions of Node. ```ts import { fromStream } from 'streaming-iterables' import { createReadStream } from 'fs' const pokeLog = fromStream(createReadStream('./pokedex-operating-system.log')) for await (const pokeData of pokeLog) { console.log(pokeData) // Buffer(...) } ``` * @deprecated This method is deprecated since, node 10 is out of LTS. It may be removed in an upcoming major release. */ function fromStream(stream) { if (typeof stream[Symbol.asyncIterator] === 'function') { return stream; } return _fromStream(stream); } /** * Combine multiple iterators into a single iterable. Reads one item off each iterable in order repeatedly until they are all exhausted. If you care less about order and want them faster see `parallelMerge()`. */ async function* merge(...iterables) { const sources = new Set(iterables.map(getIterator)); while (sources.size > 0) { for (const iterator of sources) { const nextVal = await iterator.next(); if (nextVal.done) { sources.delete(iterator); } else { yield nextVal.value; } } } } function pipeline(firstFn, ...fns) { let previousFn = firstFn(); for (const func of fns) { previousFn = func(previousFn); } return previousFn; } async function* _parallelMap(concurrency, func, iterable) { let transformError = null; const wrapFunc = value => ({ value: func(value), }); const stopOnError = async function* (source) { for await (const value of source) { if (transformError) { return; } yield value; } }; const output = pipeline(() => iterable, buffer(1), stopOnError, map(wrapFunc), buffer(concurrency - 1)); const itr = getIterator(output); while (true) { const { value, done } = await itr.next(); if (done) { break; } try { const val = await value.value; if (!transformError) { yield val; } } catch (error) { transformError = error; } } if (transformError) { throw transformError; } } function parallelMap(concurrency, func, iterable) { if (func === undefined) { return (curriedFunc, curriedIterable) => parallelMap(concurrency, curriedFunc, curriedIterable); } if (iterable === undefined) { return curriedIterable => parallelMap(concurrency, func, curriedIterable); } if (concurrency === 1) { return map(func, iterable); } return _parallelMap(concurrency, func, iterable); } function parallelFlatMap(concurrency, func, iterable) { if (func === undefined) { return (curriedFunc, curriedIterable) => curriedIterable ? parallelFlatMap(concurrency, curriedFunc, curriedIterable) : parallelFlatMap(concurrency, curriedFunc); } if (iterable === undefined) { return (curriedIterable) => parallelFlatMap(concurrency, func, curriedIterable); } return filter(i => i !== undefined && i !== null, flatten(parallelMap(concurrency, func, iterable))); } /** *Combine multiple iterators into a single iterable. Reads one item off of every iterable and yields them as they resolve. This is useful for pulling items out of a collection of iterables as soon as they're available. Errors `iterables` are raised immediately. ```ts import { parallelMerge } from 'streaming-iterables' import { getPokemon, getTransformer } from 'iterable-pokedex' // pokemon are much faster to load btw const heros = parallelMerge(getPokemon(), getTransformer()) for await (const hero of heros) { console.log(hero) } // charmander // bulbasaur // megatron // pikachu // eevee // bumblebee // jazz ``` */ async function* parallelMerge(...iterables) { const inputs = iterables.map(getIterator); const concurrentWork = new Set(); const values = new Map(); let lastError = null; let errCb = null; let valueCb = null; const notifyError = err => { lastError = err; if (errCb) { errCb(err); } }; const notifyDone = value => { if (valueCb) { valueCb(value); } }; const waitForQueue = () => new Promise((resolve, reject) => { if (lastError) { reject(lastError); } if (values.size > 0) { return resolve(); } valueCb = resolve; errCb = reject; }); const queueNext = input => { const nextVal = Promise.resolve(input.next()).then(async ({ done, value }) => { if (!done) { values.set(input, value); } concurrentWork.delete(nextVal); }); concurrentWork.add(nextVal); nextVal.then(notifyDone, notifyError); }; for (const input of inputs) { queueNext(input); } while (true) { // We technically don't have to check `values.size` as the for loop should have emptied it // However I haven't yet found specs verifying that behavior, only tests // the guard in waitForQueue() checking for values is in place for the same reason if (concurrentWork.size === 0 && values.size === 0) { return; } await waitForQueue(); for (const [input, value] of values) { values.delete(input); yield value; queueNext(input); } } } async function _reduce(func, start, iterable) { let value = start; for await (const nextItem of iterable) { value = await func(value, nextItem); } return value; } function reduce(func, start, iterable) { if (start === undefined) { return (curriedStart, curriedIterable) => curriedIterable ? _reduce(func, curriedStart, curriedIterable) : reduce(func, curriedStart); } if (iterable === undefined) { return (curriedIterable) => reduce(func, start, curriedIterable); } return _reduce(func, start, iterable); } async function* _take(count, iterable) { let taken = 0; for await (const val of iterable) { yield await val; taken++; if (taken >= count) { break; } } } function* _syncTake(count, iterable) { let taken = 0; for (const val of iterable) { yield val; taken++; if (taken >= count) { break; } } } function take(count, iterable) { if (iterable === undefined) { return curriedIterable => take(count, curriedIterable); } if (iterable[Symbol.asyncIterator]) { return _take(count, iterable); } return _syncTake(count, iterable); } async function* _takeLast(count, iterable) { const buffer = []; for await (const res of iterable) { buffer.push(res); if (buffer.length > count) { buffer.shift(); } } while (buffer.length) { yield await buffer.shift(); } } function* _syncTakeLast(count, iterable) { const buffer = []; for (const res of iterable) { buffer.push(res); if (buffer.length > count) { buffer.shift(); } } while (buffer.length) { yield buffer.shift(); } } function takeLast(count, iterable) { if (iterable === undefined) { return curriedIterable => takeLast(count, curriedIterable); } if (iterable[Symbol.asyncIterator]) { return _takeLast(count, iterable); } return _syncTakeLast(count, iterable); } async function* _takeWhile(predicate, iterable) { for await (const data of iterable) { if (!await predicate(data)) { return; } yield data; } } function takeWhile(predicate, iterable) { if (iterable === undefined) { return (curriedIterable) => _takeWhile(predicate, curriedIterable); } return _takeWhile(predicate, iterable); } async function* _asyncTap(func, iterable) { for await (const val of iterable) { await func(val); yield val; } } function tap(func, iterable) { if (iterable === undefined) { return (curriedIterable) => _asyncTap(func, curriedIterable); } return _asyncTap(func, iterable); } const sleep = (ms) => new Promise(resolve => setTimeout(resolve, ms)); function _throttle(limit, interval, iterable) { if (!Number.isFinite(limit)) { throw new TypeError('Expected `limit` to be a finite number'); } if (limit <= 0) { throw new TypeError('Expected `limit` to be greater than 0'); } if (!Number.isFinite(interval)) { throw new TypeError('Expected `interval` to be a finite number'); } return (async function* __throttle() { let sent = 0; let time; for await (const val of iterable) { if (sent < limit) { if (typeof time === 'undefined') { time = Date.now(); } sent++; yield val; continue; } // Only wait if the interval hasn't already passed while we were // yielding the previous values. const elapsedMs = Date.now() - (time || 0); const waitFor = interval - elapsedMs; if (waitFor > 0) { await sleep(waitFor); } time = Date.now(); sent = 1; yield val; } })(); } function throttle(limit, interval, iterable) { if (iterable === undefined) { return (curriedIterable) => _throttle(limit, interval, curriedIterable); } return _throttle(limit, interval, iterable); } function addTime(a, b) { let seconds = a[0] + b[0]; let nanoseconds = a[1] + b[1]; if (nanoseconds >= 1000000000) { const remainder = nanoseconds % 1000000000; seconds += (nanoseconds - remainder) / 1000000000; nanoseconds = remainder; } return [seconds, nanoseconds]; } async function* _asyncTime(config, iterable) { const itr = iterable[Symbol.asyncIterator](); let total = [0, 0]; while (true) { const start = process.hrtime(); const { value, done } = await itr.next(); const delta = process.hrtime(start); total = addTime(total, delta); if (config.progress) { config.progress(delta, total); } if (done) { if (config.total) { config.total(total); } return value; } yield value; } } function* _syncTime(config, iterable) { const itr = iterable[Symbol.iterator](); let total = [0, 0]; while (true) { const start = process.hrtime(); const { value, done } = itr.next(); const delta = process.hrtime(start); total = addTime(total, delta); if (config.progress) { config.progress(delta, total); } if (done) { if (config.total) { config.total(total); } return value; } yield value; } } function time(config = {}, iterable) { if (iterable === undefined) { return curriedIterable => time(config, curriedIterable); } if (iterable[Symbol.asyncIterator] !== undefined) { return _asyncTime(config, iterable); } else { return _syncTime(config, iterable); } } function _transform(concurrency, func, iterable) { const iterator = getIterator(iterable); const resultQueue = []; const readQueue = []; let ended = false; let reading = false; let inflightCount = 0; let lastError = null; function fulfillReadQueue() { while (readQueue.length > 0 && resultQueue.length > 0) { const { resolve } = readQueue.shift(); const value = resultQueue.shift(); resolve({ done: false, value }); } while (readQueue.length > 0 && inflightCount === 0 && ended) { const { resolve, reject } = readQueue.shift(); if (lastError) { reject(lastError); lastError = null; } else { resolve({ done: true, value: undefined }); } } } async function fillQueue() { if (ended) { fulfillReadQueue(); return; } if (reading) { return; } if (inflightCount + resultQueue.length >= concurrency) { return; } reading = true; inflightCount++; try { const { done, value } = await iterator.next(); if (done) { ended = true; inflightCount--; fulfillReadQueue(); } else { mapAndQueue(value); } } catch (error) { ended = true; inflightCount--; lastError = error; fulfillReadQueue(); } reading = false; fillQueue(); } async function mapAndQueue(itrValue) { try { const value = await func(itrValue); resultQueue.push(value); } catch (error) { ended = true; lastError = error; } inflightCount--; fulfillReadQueue(); fillQueue(); } async function next() { if (resultQueue.length === 0) { const deferred = defer(); readQueue.push(deferred); fillQueue(); return deferred.promise; } const value = resultQueue.shift(); fillQueue(); return { done: false, value }; } const asyncIterableIterator = { next, [Symbol.asyncIterator]: () => asyncIterableIterator, }; return asyncIterableIterator; } function transform(concurrency, func, iterable) { if (func === undefined) { return (curriedFunc, curriedIterable) => curriedIterable ? transform(concurrency, curriedFunc, curriedIterable) : transform(concurrency, curriedFunc); } if (iterable === undefined) { return (curriedIterable) => transform(concurrency, func, curriedIterable); } return _transform(concurrency, func, iterable); } async function _writeToStream(stream, iterable) { let lastError = null; let errCb = null; let drainCb = null; const notifyError = err => { lastError = err; if (errCb) { errCb(err); } }; const notifyDrain = () => { if (drainCb) { drainCb(); } }; const cleanup = () => { stream.removeListener('error', notifyError); stream.removeListener('drain', notifyDrain); }; stream.once('error', notifyError); const waitForDrain = () => new Promise((resolve, reject) => { if (lastError) { return reject(lastError); } stream.once('drain', notifyDrain); drainCb = resolve; errCb = reject; }); for await (const value of iterable) { if (stream.write(value) === false) { await waitForDrain(); } if (lastError) { break; } } cleanup(); if (lastError) { throw lastError; } } function writeToStream(stream, iterable) { if (iterable === undefined) { return (curriedIterable) => _writeToStream(stream, curriedIterable); } return _writeToStream(stream, iterable); } exports.batch = batch; exports.batchWithTimeout = batchWithTimeout; exports.buffer = buffer; exports.collect = collect; exports.concat = concat; exports.consume = consume; exports.drop = drop; exports.filter = filter; exports.flatMap = flatMap; exports.flatTransform = flatTransform; exports.flatten = flatten; exports.fromStream = fromStream; exports.getIterator = getIterator; exports.map = map; exports.merge = merge; exports.parallelFlatMap = parallelFlatMap; exports.parallelMap = parallelMap; exports.parallelMerge = parallelMerge; exports.pipeline = pipeline; exports.reduce = reduce; exports.take = take; exports.takeLast = takeLast; exports.takeWhile = takeWhile; exports.tap = tap; exports.throttle = throttle; exports.time = time; exports.transform = transform; exports.writeToStream = writeToStream; }));