streaming-iterables
Version:
A collection of utilities for async iterables. Designed to replace your streams.
1,064 lines (1,025 loc) • 30.2 kB
JavaScript
async function* _batch(size, iterable) {
let dataBatch = [];
for await (const data of iterable) {
dataBatch.push(data);
if (dataBatch.length === size) {
yield dataBatch;
dataBatch = [];
}
}
if (dataBatch.length > 0) {
yield dataBatch;
}
}
function* _syncBatch(size, iterable) {
let dataBatch = [];
for (const data of iterable) {
dataBatch.push(data);
if (dataBatch.length === size) {
yield dataBatch;
dataBatch = [];
}
}
if (dataBatch.length > 0) {
yield dataBatch;
}
}
function batch(size, iterable) {
if (iterable === undefined) {
return curriedIterable => batch(size, curriedIterable);
}
if (iterable[Symbol.asyncIterator]) {
return _batch(size, iterable);
}
return _syncBatch(size, iterable);
}
const TIMEOUT = Symbol('TIMEOUT');
const createTimer = (duration) => {
let timeoutId;
return [
new Promise(resolve => {
timeoutId = setTimeout(() => resolve(TIMEOUT), duration);
}),
() => {
clearTimeout(timeoutId);
},
];
};
// Like `batch` but flushes early if the `timeout` is reached
// NOTE: The strategy is to only hold onto a single item for a maximum of `timeout` ms.
async function* _batchWithTimeout(size, timeout, iterable) {
const iterator = iterable[Symbol.asyncIterator]();
let pendingData;
let batchData = [];
let timer;
let clearTimer;
const startTimer = () => {
deleteTimer();
[timer, clearTimer] = createTimer(timeout);
};
const deleteTimer = () => {
if (clearTimer) {
clearTimer();
}
timer = undefined;
};
pendingData = iterator.next();
while (true) {
const res = await (timer ? Promise.race([pendingData, timer]) : pendingData);
if (res === TIMEOUT || res.done) {
// Flush early (before we reach the batch size)
if (batchData.length) {
yield batchData;
batchData = [];
}
deleteTimer();
// And exit appropriately
if (res !== TIMEOUT) {
// done
break;
}
continue;
}
// Fetch next item early doors (before we potentially yield)
pendingData = iterator.next();
// Then handle the value
batchData.push(res.value);
if (batchData.length === 1) {
// Start timer once we have at least 1 item ready to go
startTimer();
}
if (batchData.length === size) {
yield batchData;
batchData = [];
deleteTimer();
continue;
}
}
}
function batchWithTimeout(size, timeout, iterable) {
if (iterable === undefined) {
return curriedIterable => batchWithTimeout(size, timeout, curriedIterable);
}
if (iterable[Symbol.asyncIterator] && timeout !== Infinity) {
return _batchWithTimeout(size, timeout, iterable);
}
// For sync iterables or an infinite timeout, the timeout is irrelevant so just fallback to regular `batch`.
return batch(size, iterable);
}
function getIterator(iterable) {
if (typeof iterable.next === 'function') {
return iterable;
}
if (typeof iterable[Symbol.iterator] === 'function') {
return iterable[Symbol.iterator]();
}
if (typeof iterable[Symbol.asyncIterator] === 'function') {
return iterable[Symbol.asyncIterator]();
}
throw new TypeError('"values" does not to conform to any of the iterator or iterable protocols');
}
function defer() {
let reject;
let resolve;
const promise = new Promise((resolveFunc, rejectFunc) => {
resolve = resolveFunc;
reject = rejectFunc;
});
return {
promise,
reject,
resolve,
};
}
function _buffer(size, iterable) {
const iterator = getIterator(iterable);
const resultQueue = [];
const readQueue = [];
let reading = false;
let ended = false;
function fulfillReadQueue() {
while (readQueue.length > 0 && resultQueue.length > 0) {
const readDeferred = readQueue.shift();
const { error, value } = resultQueue.shift();
if (error) {
readDeferred.reject(error);
}
else {
readDeferred.resolve({ done: false, value });
}
}
while (readQueue.length > 0 && ended) {
const { resolve } = readQueue.shift();
resolve({ done: true, value: undefined });
}
}
async function fillQueue() {
if (ended) {
return;
}
if (reading) {
return;
}
if (resultQueue.length >= size) {
return;
}
reading = true;
try {
const { done, value } = await iterator.next();
if (done) {
ended = true;
}
else {
resultQueue.push({ value });
}
}
catch (error) {
ended = true;
resultQueue.push({ error });
}
fulfillReadQueue();
reading = false;
fillQueue();
}
async function next() {
if (resultQueue.length > 0) {
const { error, value } = resultQueue.shift();
if (error) {
throw error;
}
fillQueue();
return { done: false, value };
}
if (ended) {
return { done: true, value: undefined }; // stupid ts
}
const deferred = defer();
readQueue.push(deferred);
fillQueue();
return deferred.promise;
}
const asyncIterableIterator = {
next,
[Symbol.asyncIterator]: () => asyncIterableIterator,
};
return asyncIterableIterator;
}
function* syncBuffer(size, iterable) {
const valueQueue = [];
let e;
try {
for (const value of iterable) {
valueQueue.push(value);
if (valueQueue.length <= size) {
continue;
}
yield valueQueue.shift();
}
}
catch (error) {
e = error;
}
for (const value of valueQueue) {
yield value;
}
if (e) {
throw e;
}
}
function buffer(size, iterable) {
if (iterable === undefined) {
return curriedIterable => buffer(size, curriedIterable);
}
if (size === 0) {
return iterable;
}
if (iterable[Symbol.asyncIterator]) {
return _buffer(size, iterable);
}
return syncBuffer(size, iterable);
}
async function _collect(iterable) {
const values = [];
for await (const value of iterable) {
values.push(value);
}
return values;
}
function collect(iterable) {
if (iterable[Symbol.asyncIterator]) {
return _collect(iterable);
}
return Array.from(iterable);
}
async function* _concat(iterables) {
for await (const iterable of iterables) {
yield* iterable;
}
}
function* _syncConcat(iterables) {
for (const iterable of iterables) {
yield* iterable;
}
}
function concat(...iterables) {
const hasAnyAsync = iterables.find(itr => itr[Symbol.asyncIterator] !== undefined);
if (hasAnyAsync) {
return _concat(iterables);
}
else {
return _syncConcat(iterables);
}
}
async function _consume(iterable) {
for await (const _val of iterable) {
// do nothing
}
}
function consume(iterable) {
if (iterable[Symbol.asyncIterator]) {
return _consume(iterable);
}
for (const _val of iterable) {
// do nothing
}
}
async function* _drop(count, iterable) {
let skipped = 0;
for await (const val of iterable) {
if (skipped < count) {
skipped++;
continue;
}
yield await val;
}
}
function* _syncDrop(count, iterable) {
let skipped = 0;
for (const val of iterable) {
if (skipped < count) {
skipped++;
continue;
}
yield val;
}
}
function drop(count, iterable) {
if (iterable === undefined) {
return curriedIterable => drop(count, curriedIterable);
}
if (iterable[Symbol.asyncIterator]) {
return _drop(count, iterable);
}
return _syncDrop(count, iterable);
}
async function* _filter(filterFunc, iterable) {
for await (const data of iterable) {
if (await filterFunc(data)) {
yield data;
}
}
}
function filter(filterFunc, iterable) {
if (iterable === undefined) {
return (curriedIterable) => _filter(filterFunc, curriedIterable);
}
return _filter(filterFunc, iterable);
}
/**
* Returns a new iterator by pulling every item out of `iterable` (and all its sub iterables) and yielding them depth-first. Checks for the iterable interfaces and iterates it if it exists. If the value is a string it is not iterated as that ends up in an infinite loop. Errors from the source `iterable` are raised immediately.
*note*: Typescript doesn't have recursive types but you can nest iterables as deep as you like.
```ts
import { flatten } from 'streaming-iterables'
for await (const item of flatten([1, 2, [3, [4, 5], 6])) {
console.log(item)
}
// 1
// 2
// 3
// 4
// 5
// 6
```
*/
async function* flatten(iterable) {
for await (const maybeItr of iterable) {
if (maybeItr && typeof maybeItr !== 'string' && (maybeItr[Symbol.iterator] || maybeItr[Symbol.asyncIterator])) {
yield* flatten(maybeItr);
}
else {
yield maybeItr;
}
}
}
async function* _map(func, iterable) {
for await (const val of iterable) {
yield await func(val);
}
}
function map(func, iterable) {
if (iterable === undefined) {
return curriedIterable => _map(func, curriedIterable);
}
return _map(func, iterable);
}
function flatMap(func, iterable) {
if (iterable === undefined) {
return curriedIterable => flatMap(func, curriedIterable);
}
return filter(i => i !== undefined && i !== null, flatten(map(func, iterable)));
}
function _flatTransform(concurrency, func, iterable) {
const iterator = getIterator(iterable);
const resultQueue = [];
const readQueue = [];
let ended = false;
let reading = false;
let inflightCount = 0;
let lastError = null;
function fulfillReadQueue() {
while (readQueue.length > 0 && resultQueue.length > 0) {
const { resolve } = readQueue.shift();
const value = resultQueue.shift();
resolve({ done: false, value });
}
while (readQueue.length > 0 && inflightCount === 0 && ended) {
const { resolve, reject } = readQueue.shift();
if (lastError) {
reject(lastError);
lastError = null;
}
else {
resolve({ done: true, value: undefined });
}
}
}
async function fillQueue() {
if (ended) {
fulfillReadQueue();
return;
}
if (reading) {
return;
}
if (inflightCount + resultQueue.length >= concurrency) {
return;
}
reading = true;
inflightCount++;
try {
const { done, value } = await iterator.next();
if (done) {
ended = true;
inflightCount--;
fulfillReadQueue();
}
else {
mapAndQueue(value);
}
}
catch (error) {
ended = true;
inflightCount--;
lastError = error;
fulfillReadQueue();
}
reading = false;
fillQueue();
}
async function mapAndQueue(itrValue) {
try {
const value = await func(itrValue);
if (value && value[Symbol.asyncIterator]) {
for await (const asyncVal of value) {
resultQueue.push(asyncVal);
}
}
else {
resultQueue.push(value);
}
}
catch (error) {
ended = true;
lastError = error;
}
inflightCount--;
fulfillReadQueue();
fillQueue();
}
async function next() {
if (resultQueue.length === 0) {
const deferred = defer();
readQueue.push(deferred);
fillQueue();
return deferred.promise;
}
const value = resultQueue.shift();
fillQueue();
return { done: false, value };
}
const asyncIterableIterator = {
next,
[Symbol.asyncIterator]: () => asyncIterableIterator,
};
return asyncIterableIterator;
}
function flatTransform(concurrency, func, iterable) {
if (func === undefined) {
return (curriedFunc, curriedIterable) => curriedIterable
? flatTransform(concurrency, curriedFunc, curriedIterable)
: flatTransform(concurrency, curriedFunc);
}
if (iterable === undefined) {
return (curriedIterable) => flatTransform(concurrency, func, curriedIterable);
}
return filter(i => i !== undefined && i !== null, flatten(_flatTransform(concurrency, func, iterable)));
}
async function onceReadable(stream) {
return new Promise(resolve => {
stream.once('readable', () => {
resolve();
});
});
}
async function* _fromStream(stream) {
while (true) {
const data = stream.read();
if (data !== null) {
yield data;
continue;
}
if (stream._readableState.ended) {
break;
}
await onceReadable(stream);
}
}
/**
* Wraps the stream in an async iterator or returns the stream if it already is an async iterator.
*note*: Since Node 10, streams already async iterators. This function may be used to ensure compatibility with older versions of Node.
```ts
import { fromStream } from 'streaming-iterables'
import { createReadStream } from 'fs'
const pokeLog = fromStream(createReadStream('./pokedex-operating-system.log'))
for await (const pokeData of pokeLog) {
console.log(pokeData) // Buffer(...)
}
```
* @deprecated This method is deprecated since, node 10 is out of LTS. It may be removed in an upcoming major release.
*/
function fromStream(stream) {
if (typeof stream[Symbol.asyncIterator] === 'function') {
return stream;
}
return _fromStream(stream);
}
/**
* Combine multiple iterators into a single iterable. Reads one item off each iterable in order repeatedly until they are all exhausted. If you care less about order and want them faster see `parallelMerge()`.
*/
async function* merge(...iterables) {
const sources = new Set(iterables.map(getIterator));
while (sources.size > 0) {
for (const iterator of sources) {
const nextVal = await iterator.next();
if (nextVal.done) {
sources.delete(iterator);
}
else {
yield nextVal.value;
}
}
}
}
function pipeline(firstFn, ...fns) {
let previousFn = firstFn();
for (const func of fns) {
previousFn = func(previousFn);
}
return previousFn;
}
async function* _parallelMap(concurrency, func, iterable) {
let transformError = null;
const wrapFunc = value => ({
value: func(value),
});
const stopOnError = async function* (source) {
for await (const value of source) {
if (transformError) {
return;
}
yield value;
}
};
const output = pipeline(() => iterable, buffer(1), stopOnError, map(wrapFunc), buffer(concurrency - 1));
const itr = getIterator(output);
while (true) {
const { value, done } = await itr.next();
if (done) {
break;
}
try {
const val = await value.value;
if (!transformError) {
yield val;
}
}
catch (error) {
transformError = error;
}
}
if (transformError) {
throw transformError;
}
}
function parallelMap(concurrency, func, iterable) {
if (func === undefined) {
return (curriedFunc, curriedIterable) => parallelMap(concurrency, curriedFunc, curriedIterable);
}
if (iterable === undefined) {
return curriedIterable => parallelMap(concurrency, func, curriedIterable);
}
if (concurrency === 1) {
return map(func, iterable);
}
return _parallelMap(concurrency, func, iterable);
}
function parallelFlatMap(concurrency, func, iterable) {
if (func === undefined) {
return (curriedFunc, curriedIterable) => curriedIterable
? parallelFlatMap(concurrency, curriedFunc, curriedIterable)
: parallelFlatMap(concurrency, curriedFunc);
}
if (iterable === undefined) {
return (curriedIterable) => parallelFlatMap(concurrency, func, curriedIterable);
}
return filter(i => i !== undefined && i !== null, flatten(parallelMap(concurrency, func, iterable)));
}
/**
*Combine multiple iterators into a single iterable. Reads one item off of every iterable and yields them as they resolve. This is useful for pulling items out of a collection of iterables as soon as they're available. Errors `iterables` are raised immediately.
```ts
import { parallelMerge } from 'streaming-iterables'
import { getPokemon, getTransformer } from 'iterable-pokedex'
// pokemon are much faster to load btw
const heros = parallelMerge(getPokemon(), getTransformer())
for await (const hero of heros) {
console.log(hero)
}
// charmander
// bulbasaur
// megatron
// pikachu
// eevee
// bumblebee
// jazz
```
*/
async function* parallelMerge(...iterables) {
const inputs = iterables.map(getIterator);
const concurrentWork = new Set();
const values = new Map();
let lastError = null;
let errCb = null;
let valueCb = null;
const notifyError = err => {
lastError = err;
if (errCb) {
errCb(err);
}
};
const notifyDone = value => {
if (valueCb) {
valueCb(value);
}
};
const waitForQueue = () => new Promise((resolve, reject) => {
if (lastError) {
reject(lastError);
}
if (values.size > 0) {
return resolve();
}
valueCb = resolve;
errCb = reject;
});
const queueNext = input => {
const nextVal = Promise.resolve(input.next()).then(async ({ done, value }) => {
if (!done) {
values.set(input, value);
}
concurrentWork.delete(nextVal);
});
concurrentWork.add(nextVal);
nextVal.then(notifyDone, notifyError);
};
for (const input of inputs) {
queueNext(input);
}
while (true) {
// We technically don't have to check `values.size` as the for loop should have emptied it
// However I haven't yet found specs verifying that behavior, only tests
// the guard in waitForQueue() checking for values is in place for the same reason
if (concurrentWork.size === 0 && values.size === 0) {
return;
}
await waitForQueue();
for (const [input, value] of values) {
values.delete(input);
yield value;
queueNext(input);
}
}
}
async function _reduce(func, start, iterable) {
let value = start;
for await (const nextItem of iterable) {
value = await func(value, nextItem);
}
return value;
}
function reduce(func, start, iterable) {
if (start === undefined) {
return (curriedStart, curriedIterable) => curriedIterable ? _reduce(func, curriedStart, curriedIterable) : reduce(func, curriedStart);
}
if (iterable === undefined) {
return (curriedIterable) => reduce(func, start, curriedIterable);
}
return _reduce(func, start, iterable);
}
async function* _take(count, iterable) {
let taken = 0;
for await (const val of iterable) {
yield await val;
taken++;
if (taken >= count) {
break;
}
}
}
function* _syncTake(count, iterable) {
let taken = 0;
for (const val of iterable) {
yield val;
taken++;
if (taken >= count) {
break;
}
}
}
function take(count, iterable) {
if (iterable === undefined) {
return curriedIterable => take(count, curriedIterable);
}
if (iterable[Symbol.asyncIterator]) {
return _take(count, iterable);
}
return _syncTake(count, iterable);
}
async function* _takeLast(count, iterable) {
const buffer = [];
for await (const res of iterable) {
buffer.push(res);
if (buffer.length > count) {
buffer.shift();
}
}
while (buffer.length) {
yield await buffer.shift();
}
}
function* _syncTakeLast(count, iterable) {
const buffer = [];
for (const res of iterable) {
buffer.push(res);
if (buffer.length > count) {
buffer.shift();
}
}
while (buffer.length) {
yield buffer.shift();
}
}
function takeLast(count, iterable) {
if (iterable === undefined) {
return curriedIterable => takeLast(count, curriedIterable);
}
if (iterable[Symbol.asyncIterator]) {
return _takeLast(count, iterable);
}
return _syncTakeLast(count, iterable);
}
async function* _takeWhile(predicate, iterable) {
for await (const data of iterable) {
if (!await predicate(data)) {
return;
}
yield data;
}
}
function takeWhile(predicate, iterable) {
if (iterable === undefined) {
return (curriedIterable) => _takeWhile(predicate, curriedIterable);
}
return _takeWhile(predicate, iterable);
}
async function* _asyncTap(func, iterable) {
for await (const val of iterable) {
await func(val);
yield val;
}
}
function tap(func, iterable) {
if (iterable === undefined) {
return (curriedIterable) => _asyncTap(func, curriedIterable);
}
return _asyncTap(func, iterable);
}
const sleep = (ms) => new Promise(resolve => setTimeout(resolve, ms));
function _throttle(limit, interval, iterable) {
if (!Number.isFinite(limit)) {
throw new TypeError('Expected `limit` to be a finite number');
}
if (limit <= 0) {
throw new TypeError('Expected `limit` to be greater than 0');
}
if (!Number.isFinite(interval)) {
throw new TypeError('Expected `interval` to be a finite number');
}
return (async function* __throttle() {
let sent = 0;
let time;
for await (const val of iterable) {
if (sent < limit) {
if (typeof time === 'undefined') {
time = Date.now();
}
sent++;
yield val;
continue;
}
// Only wait if the interval hasn't already passed while we were
// yielding the previous values.
const elapsedMs = Date.now() - (time || 0);
const waitFor = interval - elapsedMs;
if (waitFor > 0) {
await sleep(waitFor);
}
time = Date.now();
sent = 1;
yield val;
}
})();
}
function throttle(limit, interval, iterable) {
if (iterable === undefined) {
return (curriedIterable) => _throttle(limit, interval, curriedIterable);
}
return _throttle(limit, interval, iterable);
}
function addTime(a, b) {
let seconds = a[0] + b[0];
let nanoseconds = a[1] + b[1];
if (nanoseconds >= 1000000000) {
const remainder = nanoseconds % 1000000000;
seconds += (nanoseconds - remainder) / 1000000000;
nanoseconds = remainder;
}
return [seconds, nanoseconds];
}
async function* _asyncTime(config, iterable) {
const itr = iterable[Symbol.asyncIterator]();
let total = [0, 0];
while (true) {
const start = process.hrtime();
const { value, done } = await itr.next();
const delta = process.hrtime(start);
total = addTime(total, delta);
if (config.progress) {
config.progress(delta, total);
}
if (done) {
if (config.total) {
config.total(total);
}
return value;
}
yield value;
}
}
function* _syncTime(config, iterable) {
const itr = iterable[Symbol.iterator]();
let total = [0, 0];
while (true) {
const start = process.hrtime();
const { value, done } = itr.next();
const delta = process.hrtime(start);
total = addTime(total, delta);
if (config.progress) {
config.progress(delta, total);
}
if (done) {
if (config.total) {
config.total(total);
}
return value;
}
yield value;
}
}
function time(config = {}, iterable) {
if (iterable === undefined) {
return curriedIterable => time(config, curriedIterable);
}
if (iterable[Symbol.asyncIterator] !== undefined) {
return _asyncTime(config, iterable);
}
else {
return _syncTime(config, iterable);
}
}
function _transform(concurrency, func, iterable) {
const iterator = getIterator(iterable);
const resultQueue = [];
const readQueue = [];
let ended = false;
let reading = false;
let inflightCount = 0;
let lastError = null;
function fulfillReadQueue() {
while (readQueue.length > 0 && resultQueue.length > 0) {
const { resolve } = readQueue.shift();
const value = resultQueue.shift();
resolve({ done: false, value });
}
while (readQueue.length > 0 && inflightCount === 0 && ended) {
const { resolve, reject } = readQueue.shift();
if (lastError) {
reject(lastError);
lastError = null;
}
else {
resolve({ done: true, value: undefined });
}
}
}
async function fillQueue() {
if (ended) {
fulfillReadQueue();
return;
}
if (reading) {
return;
}
if (inflightCount + resultQueue.length >= concurrency) {
return;
}
reading = true;
inflightCount++;
try {
const { done, value } = await iterator.next();
if (done) {
ended = true;
inflightCount--;
fulfillReadQueue();
}
else {
mapAndQueue(value);
}
}
catch (error) {
ended = true;
inflightCount--;
lastError = error;
fulfillReadQueue();
}
reading = false;
fillQueue();
}
async function mapAndQueue(itrValue) {
try {
const value = await func(itrValue);
resultQueue.push(value);
}
catch (error) {
ended = true;
lastError = error;
}
inflightCount--;
fulfillReadQueue();
fillQueue();
}
async function next() {
if (resultQueue.length === 0) {
const deferred = defer();
readQueue.push(deferred);
fillQueue();
return deferred.promise;
}
const value = resultQueue.shift();
fillQueue();
return { done: false, value };
}
const asyncIterableIterator = {
next,
[Symbol.asyncIterator]: () => asyncIterableIterator,
};
return asyncIterableIterator;
}
function transform(concurrency, func, iterable) {
if (func === undefined) {
return (curriedFunc, curriedIterable) => curriedIterable
? transform(concurrency, curriedFunc, curriedIterable)
: transform(concurrency, curriedFunc);
}
if (iterable === undefined) {
return (curriedIterable) => transform(concurrency, func, curriedIterable);
}
return _transform(concurrency, func, iterable);
}
async function _writeToStream(stream, iterable) {
let lastError = null;
let errCb = null;
let drainCb = null;
const notifyError = err => {
lastError = err;
if (errCb) {
errCb(err);
}
};
const notifyDrain = () => {
if (drainCb) {
drainCb();
}
};
const cleanup = () => {
stream.removeListener('error', notifyError);
stream.removeListener('drain', notifyDrain);
};
stream.once('error', notifyError);
const waitForDrain = () => new Promise((resolve, reject) => {
if (lastError) {
return reject(lastError);
}
stream.once('drain', notifyDrain);
drainCb = resolve;
errCb = reject;
});
for await (const value of iterable) {
if (stream.write(value) === false) {
await waitForDrain();
}
if (lastError) {
break;
}
}
cleanup();
if (lastError) {
throw lastError;
}
}
function writeToStream(stream, iterable) {
if (iterable === undefined) {
return (curriedIterable) => _writeToStream(stream, curriedIterable);
}
return _writeToStream(stream, iterable);
}
export { batch, batchWithTimeout, buffer, collect, concat, consume, drop, filter, flatMap, flatTransform, flatten, fromStream, getIterator, map, merge, parallelFlatMap, parallelMap, parallelMerge, pipeline, reduce, take, takeLast, takeWhile, tap, throttle, time, transform, writeToStream };