iterparse
Version:
Delightful data parsing
200 lines (199 loc) • 7.55 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.onProgress = exports.onDone = exports.trailingMap = exports.trailingGroupBy = void 0;
const tslib_1 = require("tslib");
const ix_1 = require("ix");
const P = tslib_1.__importStar(require("ts-prime"));
const helpers_1 = require("./helpers");
function _trailingGroupBy(data, args) {
function iter() {
return tslib_1.__asyncGenerator(this, arguments, function* iter_1() {
var e_1, _a;
const stats = {
totalItemsInMemory: 0
};
const groups = {};
try {
for (var data_1 = tslib_1.__asyncValues(data), data_1_1; data_1_1 = yield tslib_1.__await(data_1.next()), !data_1_1.done;) {
const item = data_1_1.value;
const id = yield tslib_1.__await(args.groupBy(item).toString());
if (groups[id] == null) {
groups[id] = [];
}
if (stats.totalItemsInMemory >= args.totalItemsInMemory) {
const item = P.first(P.maxBy(Object.entries(groups), ([, items]) => items.length));
if (item == null) {
stats.totalItemsInMemory = 0;
}
else {
const [key, items] = item;
yield yield tslib_1.__await({
key,
items
});
groups[key] = [];
}
}
groups[id].push(item);
stats.totalItemsInMemory += 1;
if (groups[id].length >= args.maxGroupSize) {
yield yield tslib_1.__await({
key: id,
items: groups[id]
});
stats.totalItemsInMemory -= groups[id].length;
groups[id] = [];
}
continue;
}
}
catch (e_1_1) { e_1 = { error: e_1_1 }; }
finally {
try {
if (data_1_1 && !data_1_1.done && (_a = data_1.return)) yield tslib_1.__await(_a.call(data_1));
}
finally { if (e_1) throw e_1.error; }
}
});
}
return ix_1.AsyncIterable.from(iter());
}
function trailingGroupBy() {
return P.purry(_trailingGroupBy, arguments);
}
exports.trailingGroupBy = trailingGroupBy;
function _trailingMap(data, args) {
function iter() {
return tslib_1.__asyncGenerator(this, arguments, function* iter_2() {
let done = false;
const iter = ix_1.AsyncIterable.from(data)[Symbol.asyncIterator]();
const requestQueue = [];
while (!done) {
const id = Date.now();
let value = iter.next().then(async (q) => {
const req = await args.mapFunc(q.value);
if (q.done) {
done = true;
}
return {
id,
result: req
};
});
requestQueue.push({
id, request: value
});
if (requestQueue.length === args.maxConcurrency) {
const result = yield tslib_1.__await(Promise.race(requestQueue.map((q) => q.request)));
requestQueue.splice(requestQueue.findIndex((q) => q.id === result.id), 1);
}
}
while (requestQueue.length !== 0) {
const result = yield tslib_1.__await(Promise.race(requestQueue.map((q) => q.request)));
requestQueue.splice(requestQueue.findIndex((q) => q.id === result.id), 1);
}
});
}
return ix_1.AsyncIterable.from(iter());
}
function trailingMap() {
return P.purry(_trailingMap, arguments);
}
exports.trailingMap = trailingMap;
function _onDone(data, callback) {
function iter() {
return tslib_1.__asyncGenerator(this, arguments, function* iter_3() {
var e_2, _a;
try {
for (var data_2 = tslib_1.__asyncValues(data), data_2_1; data_2_1 = yield tslib_1.__await(data_2.next()), !data_2_1.done;) {
const item = data_2_1.value;
yield yield tslib_1.__await(item);
}
}
catch (e_2_1) { e_2 = { error: e_2_1 }; }
finally {
try {
if (data_2_1 && !data_2_1.done && (_a = data_2.return)) yield tslib_1.__await(_a.call(data_2));
}
finally { if (e_2) throw e_2.error; }
}
callback();
});
}
return ix_1.AsyncIterable.from(iter());
}
function onDone() {
return P.purry(_onDone, arguments);
}
exports.onDone = onDone;
class ProgressTrack {
constructor() {
this.items = 0;
this.isRunning = false;
this.startTime = 0;
this.rollingDurations = [];
}
addItem() {
if (this.startTime !== 0) {
this.rollingDurations.push(Date.now() - this.startTime);
if (this.rollingDurations.length >= 20) {
this.rollingDurations.shift();
}
}
this.items += 1;
this.isRunning = true;
this.startTime = Date.now();
}
get average() {
const mean = P.stats(this.rollingDurations, (q) => q).arithmetic_mean;
return mean;
}
}
class Progress {
constructor(progress) {
this.progress = progress;
}
toString() {
const speed = this.progress.items > 1 ? ` Speed: ${(1 / (this.progress.average / 1000)).toFixed(2)} items/s,` : "";
return `Items: ${this.progress.items.toLocaleString()},${speed} Memory: ${helpers_1.formatBytes(process.memoryUsage().heapUsed)}`;
}
toJSON() {
return {
speed: 1 / (this.progress.average / 1000),
items: this.progress.items
};
}
}
function _onProgress(data, args) {
const progressInstance = new ProgressTrack();
const interval = setInterval(() => {
args.progress(new Progress(progressInstance));
}, args.progressFrequency || 2000);
function iter() {
return tslib_1.__asyncGenerator(this, arguments, function* iter_4() {
var e_3, _a;
try {
for (var data_3 = tslib_1.__asyncValues(data), data_3_1; data_3_1 = yield tslib_1.__await(data_3.next()), !data_3_1.done;) {
const item = data_3_1.value;
progressInstance.addItem();
yield yield tslib_1.__await(item);
}
}
catch (e_3_1) { e_3 = { error: e_3_1 }; }
finally {
try {
if (data_3_1 && !data_3_1.done && (_a = data_3.return)) yield tslib_1.__await(_a.call(data_3));
}
finally { if (e_3) throw e_3.error; }
}
});
}
args.progress(new Progress(progressInstance));
return ix_1.AsyncIterable.from(iter()).finally(() => {
clearInterval(interval);
});
}
function onProgress() {
return P.purry(_onProgress, arguments);
}
exports.onProgress = onProgress;