UNPKG

iterparse

Version:
198 lines (197 loc) 8.02 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.fileGroupBy = void 0; const tslib_1 = require("tslib"); const ix_1 = require("ix"); const tmp_1 = require("tmp"); const fs_extra_1 = require("fs-extra"); const P = tslib_1.__importStar(require("ts-prime")); const helpers_1 = require("./helpers"); class GroupingProgressDisplay { constructor(progress) { this.progress = progress; } toString() { const json = this.toJSON(); if (this.progress.state === 'IDLE') { return `Grouping idle...`; } if (this.progress.state === 'GROUPING') { return `Grouping, Items: ${this.progress.groupedItems.toLocaleString()}, Total Groups: ${json.groupedGroups}, Grouped Size: ${helpers_1.formatBytes(json.groupedBytes)}, Memory: ${helpers_1.formatBytes(json.memory)}`; } return `Reading, Progress: ${((json.readedItems / json.groupedItems) * 100).toFixed(2)}%, Groups: ${json.readedGroups}/${json.groupedGroups}, Memory: ${helpers_1.formatBytes(json.memory)}`; } toJSON() { const groupingDiff = Math.floor(Date.now() - this.progress.parsingStartTime); const groupingBytesPerMs = Math.floor(this.progress.groupedBytes / groupingDiff) || 0; const groupingBytesPerSecond = Math.floor(groupingBytesPerMs * 1000); const readingDiff = Math.floor(Date.now() - this.progress.parsingStartTime); const readingBytesPerMs = Math.floor(this.progress.groupedBytes / readingDiff) || 0; const readingBytesPerSecond = Math.floor(readingBytesPerMs * 1000); return { state: this.progress.state, groupingBytesPerSecond, readingBytesPerSecond, memory: process.memoryUsage().heapUsed, groupingStartTime: this.progress.parsingStartTime, groupingStopTime: this.progress.parsingStopTime, readingStartTime: this.progress.readingStartTime, readingStopTime: this.progress.readingStopTime, groupedItems: this.progress.groupedItems, groupedBytes: this.progress.groupedBytes, groupedGroups: this.progress.groupedGroups, readedItems: this.progress.readedItems, readedBytes: this.progress.readedBytes, readedGroups: this.progress.readedGroups }; } } class GroupingProgress { constructor() { this.groupedBytes = 0; this.groupedItems = 0; this.groupedGroups = 0; this.readedBytes = 0; this.readedGroups = 0; this.readedItems = 0; this.state = 'IDLE'; this.parsingStartTime = Date.now(); this.parsingStopTime = Date.now(); this.readingStartTime = Date.now(); this.readingStopTime = Date.now(); } start(action) { switch (action) { case 'GROUPING': this.state = 'GROUPING'; this.parsingStartTime = Date.now(); return; case 'READING': this.state = 'READING'; this.readingStartTime = Date.now(); return; default: throw new Error(`Action ${action} not allowed`); } } stop(action) { switch (action) { case 'GROUPING': this.parsingStopTime = Date.now(); return; case 'READING': this.readingStopTime = Date.now(); return; default: throw new Error(`Action ${action} not allowed`); } } addChunk(chunk) { this.groupedBytes += chunk; } addGroup(group) { this.groupedGroups += group; } addItem(count = 1) { this.groupedItems += count; } readChunk(chunk) { this.readedBytes += chunk; } readGroup(group) { this.readedGroups += group; } readItem(count = 1) { this.readedItems += count; } set(data) { if (data.currentSize) { this.groupedBytes = data.currentSize; } if (data.items) { this.groupedItems = data.items; } } } function fileGroupBy(args) { const progress = new GroupingProgress(); let interval; function groupProcess() { var _a, _b; return tslib_1.__asyncGenerator(this, arguments, function* groupProcess_1() { var e_1, _c; const tmpFile = tmp_1.tmpNameSync(); const encoding = 'utf8'; const fd = yield tslib_1.__await(fs_extra_1.open(tmpFile, 'a+')); const groupFileMap = { groups: new Map(), lastPosition: 0, }; interval = setInterval(() => { var _a; if (progress.state === 'IDLE') return; (_a = args.progress) === null || _a === void 0 ? void 0 : _a.call(args, new GroupingProgressDisplay(progress)); }, args.progressFrequency || 1000); try { for (var _d = tslib_1.__asyncValues(args.source), _e; _e = yield tslib_1.__await(_d.next()), !_e.done;) { const value = _e.value; if (progress.groupedGroups === 0) { progress.start('GROUPING'); } const parsedValue = JSON.stringify(value); const size = Buffer.byteLength(parsedValue, encoding); const groupId = args.groupingFn(value).toString(); yield tslib_1.__await(fs_extra_1.appendFile(fd, parsedValue, { encoding })); progress.addItem(1); progress.addChunk(size); const group = groupFileMap.groups.get(groupId); if (group == null) { progress.addGroup(1); } const newGroup = group || []; newGroup.push([groupFileMap.lastPosition, size]); groupFileMap.groups.set(groupId, newGroup); groupFileMap.lastPosition = groupFileMap.lastPosition + size; continue; } } catch (e_1_1) { e_1 = { error: e_1_1 }; } finally { try { if (_e && !_e.done && (_c = _d.return)) yield tslib_1.__await(_c.call(_d)); } finally { if (e_1) throw e_1.error; } } (_a = args.progress) === null || _a === void 0 ? void 0 : _a.call(args, new GroupingProgressDisplay(progress)); progress.start('READING'); for (const [groupId, mapData] of groupFileMap.groups) { progress.readGroup(1); yield yield tslib_1.__await({ key: groupId, items: yield tslib_1.__await(Promise.all(mapData.map(async ([location, size]) => { progress.readItem(1); progress.readChunk(size); const buffer = Buffer.alloc(size); await fs_extra_1.read(fd, buffer, 0, size, location); const item = P.canFail(() => { return JSON.parse(buffer.toString(encoding)); }); if (P.isError(item)) { throw new Error(`Critical error: something went wrong in grouping process`); } return item; }))) }); } clearInterval(interval); (_b = args.progress) === null || _b === void 0 ? void 0 : _b.call(args, new GroupingProgressDisplay(progress)); }); } return ix_1.AsyncIterable.from(groupProcess()).finally(() => { if (interval == null) return; clearInterval(interval); }); } exports.fileGroupBy = fileGroupBy;