iterparse
Version:
Delightful data parsing
143 lines (142 loc) • 6.85 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.cacheIter = exports._cacheIter = exports.onLastItem = void 0;
const tslib_1 = require("tslib");
const fs_extra_1 = require("fs-extra");
const ix_1 = require("ix");
const path_1 = require("path");
const ts_prime_1 = require("ts-prime");
const json_1 = require("./json");
const types_1 = require("./types");
const P = tslib_1.__importStar(require("ts-prime"));
function onLastItem(fn) {
return (q) => {
function iter() {
return tslib_1.__asyncGenerator(this, arguments, function* iter_1() {
var e_1, _a;
try {
for (var q_1 = tslib_1.__asyncValues(q), q_1_1; q_1_1 = yield tslib_1.__await(q_1.next()), !q_1_1.done;) {
const x = q_1_1.value;
yield yield tslib_1.__await(x);
}
}
catch (e_1_1) { e_1 = { error: e_1_1 }; }
finally {
try {
if (q_1_1 && !q_1_1.done && (_a = q_1.return)) yield tslib_1.__await(_a.call(q_1));
}
finally { if (e_1) throw e_1.error; }
}
yield tslib_1.__await(fn());
});
}
return iter();
};
}
exports.onLastItem = onLastItem;
function _cacheIter(data, options) {
var _a;
const { nice, enabled = true } = options;
if (!enabled) {
(_a = options.logger) === null || _a === void 0 ? void 0 : _a.info('Cache is disabled. Returning original iterator');
return types_1.IX.from(data);
}
return types_1.IX.defer(async () => {
var _a, _b, _c, _d, _e, _f;
const constructorIteratableId = () => {
const getIteratableID = (data) => {
var _a;
if (data == null)
return '';
const iteratable = data;
return `${(_a = iteratable === null || iteratable === void 0 ? void 0 : iteratable.constructor) === null || _a === void 0 ? void 0 : _a.name}:${getIteratableID(iteratable._source)}`;
};
const iteratableId = P.canFail(() => getIteratableID(data));
if (P.isError(iteratableId)) {
return '';
}
return P.hash(`${iteratableId}:${JSON.stringify(data)}`);
};
const iteratableId = constructorIteratableId();
const metaFile = path_1.resolve(options.cacheFolder, "_meta.json");
const referenceId = (_a = options.referenceId) !== null && _a !== void 0 ? _a : P.hash(new Date().toDateString());
const lockFile = path_1.resolve(options.cacheFolder, ".lock");
if (fs_extra_1.existsSync(lockFile)) {
(_b = options.logger) === null || _b === void 0 ? void 0 : _b.info('Lock file exist. This usually means that iterator was not cached fully. Cache folder will be deleted and recreated with new iterator');
fs_extra_1.rmdirSync(options.cacheFolder);
}
if (!fs_extra_1.existsSync(metaFile)) {
(_c = options.logger) === null || _c === void 0 ? void 0 : _c.info('Meta file does not exists. Deleting cache folder...');
removeIfExists(options);
}
if (fs_extra_1.existsSync(metaFile)) {
const meta = await fs_extra_1.readJSON(metaFile);
if (meta.referenceId !== referenceId) {
(_d = options.logger) === null || _d === void 0 ? void 0 : _d.info('Reference id changed. Deleting cache folder...');
removeIfExists(options);
}
if (!P.equals(meta.format, options.nice)) {
(_e = options.logger) === null || _e === void 0 ? void 0 : _e.info('Cache format changed. Deleting cache folder...');
removeIfExists(options);
}
if (!P.equals(meta.iteratableId, iteratableId)) {
(_f = options.logger) === null || _f === void 0 ? void 0 : _f.info('Source iterator structure changed. Deleting cache folder...');
removeIfExists(options);
}
}
const getCache = () => {
var _a;
if (fs_extra_1.existsSync(options.cacheFolder)) {
const files = fs_extra_1.readdirSync(options.cacheFolder);
if (files.length === 0)
return;
const cacheFiles = files.filter((path) => path_1.extname(path) === '.json' && path_1.basename(path).startsWith('cache'));
(_a = options.logger) === null || _a === void 0 ? void 0 : _a.info(`Found cache data. Reading ${cacheFiles.length} files...`);
return types_1.IX.from(cacheFiles)
.flatMap((filePath) => json_1.jsonRead({ filePath: path_1.resolve(options.cacheFolder, filePath), pattern: '*', progress: options.progress, progressFrequency: options.progressFrequency }));
}
return;
};
const cache = getCache();
if (cache)
return cache;
if (fs_extra_1.existsSync(options.cacheFolder)) {
removeIfExists(options);
}
await fs_extra_1.ensureFile(lockFile);
const onCacheComplete = async () => {
var _a;
fs_extra_1.unlinkSync(lockFile);
(_a = options.logger) === null || _a === void 0 ? void 0 : _a.info(`Cache was created successfully...`);
await fs_extra_1.writeFile(metaFile, JSON.stringify({ iteratableId, referenceId, createdAt: new Date().toISOString(), format: options.nice }, undefined, '\t'));
};
if (nice) {
await fs_extra_1.writeFile(lockFile, JSON.stringify({ started: Date.now() }));
return types_1.IX.from(data)
.buffer(nice.buffer)
// TODO use trailingMap helper
.map(async (items, index) => {
fs_extra_1.writeFile(path_1.resolve(options.cacheFolder, `cache-${index}.json`), JSON.stringify(items, undefined, '\t'));
return items;
})
.flatMap((e) => ix_1.AsyncIterable.from(e))
.pipe(onLastItem(onCacheComplete));
}
await fs_extra_1.writeFile(lockFile, JSON.stringify({ started: Date.now() }));
return types_1.IX.from(data)
.pipe(json_1.jsonWrite({ filePath: path_1.resolve(options.cacheFolder, `cache.json`) }))
.pipe(onLastItem(onCacheComplete));
});
}
exports._cacheIter = _cacheIter;
function removeIfExists(options) {
if (fs_extra_1.existsSync(options.cacheFolder)) {
fs_extra_1.rmdirSync(options.cacheFolder, {
recursive: true
});
}
}
function cacheIter() {
return ts_prime_1.purry(_cacheIter, arguments);
}
exports.cacheIter = cacheIter;