UNPKG

iterparse

Version:
170 lines (157 loc) 6.5 kB
import { ensureFile, existsSync, readdirSync, readJSON, rmdirSync, unlinkSync, writeFile } from "fs-extra"; import { AsyncIterable } from "ix"; import { basename, extname, resolve } from "path"; import { purry } from "ts-prime"; import { jsonRead, jsonWrite } from "./json"; import { AnyIterable, IX } from "./types"; import * as P from 'ts-prime' import { ProgressReportOptions } from "./helpers"; export function onLastItem<T>(fn: () => any) { return (q: AnyIterable<T>) => { async function* iter() { for await (const x of q) { yield x } await fn() } return iter() } } export interface CacheIterOptions extends ProgressReportOptions { /** * Where cache data will be stored? */ cacheFolder: string, /** * Is cache enabled? */ enabled?: boolean, /** * When reference id changes cache folder will be regenerated. */ referenceId?: string, /** * Format cache in human readable `JSON` format. */ nice?: { buffer: number }, logger?: Pick<Console, 'info'> } export function _cacheIter<T>(data: AnyIterable<T>, options: CacheIterOptions): IX<T> { const { nice, enabled = true } = options if (!enabled) { options.logger?.info('Cache is disabled. Returning original iterator') return IX.from(data) } return IX.defer(async () => { const constructorIteratableId = () => { const getIteratableID = (data?: AnyIterable<T>): string => { if (data == null) return '' const iteratable = data as any return `${iteratable?.constructor?.name}:${getIteratableID(iteratable._source)}` } const iteratableId = P.canFail(() => getIteratableID(data)) if (P.isError(iteratableId)) { return '' } return P.hash(`${iteratableId}:${JSON.stringify(data)}`) } const iteratableId = constructorIteratableId() const metaFile = resolve(options.cacheFolder, "_meta.json") const referenceId = options.referenceId ?? P.hash(new Date().toDateString()) const lockFile = resolve(options.cacheFolder, ".lock") if (existsSync(lockFile)) { options.logger?.info('Lock file exist. This usually means that iterator was not cached fully. Cache folder will be deleted and recreated with new iterator') rmdirSync(options.cacheFolder) } if (!existsSync(metaFile)) { options.logger?.info('Meta file does not exists. Deleting cache folder...') removeIfExists(options); } if (existsSync(metaFile)) { const meta: { referenceId: string, createdAt: string, iteratableId: string, format: CacheIterOptions['nice'] } = await readJSON(metaFile) if (meta.referenceId !== referenceId) { options.logger?.info('Reference id changed. Deleting cache folder...') removeIfExists(options); } if (!P.equals(meta.format, options.nice)) { options.logger?.info('Cache format changed. Deleting cache folder...') removeIfExists(options); } if (!P.equals(meta.iteratableId, iteratableId)) { options.logger?.info('Source iterator structure changed. Deleting cache folder...') removeIfExists(options); } } const getCache = () => { if (existsSync(options.cacheFolder)) { const files = readdirSync(options.cacheFolder) if (files.length === 0) return const cacheFiles = files.filter((path) => extname(path) === '.json' && basename(path).startsWith('cache')) options.logger?.info(`Found cache data. Reading ${cacheFiles.length} files...`) return IX.from(cacheFiles) .flatMap((filePath) => jsonRead<T>({ filePath: resolve(options.cacheFolder, filePath), pattern: '*', progress: options.progress, progressFrequency: options.progressFrequency })) } return } const cache = getCache() if (cache) return cache if (existsSync(options.cacheFolder)) { removeIfExists(options) } await ensureFile(lockFile) const onCacheComplete = async () => { unlinkSync(lockFile) options.logger?.info(`Cache was created successfully...`) await writeFile(metaFile, JSON.stringify({ iteratableId, referenceId, createdAt: new Date().toISOString(), format: options.nice }, undefined, '\t')) } if (nice) { await writeFile(lockFile, JSON.stringify({ started: Date.now() })) return IX.from(data) .buffer(nice.buffer) // TODO use trailingMap helper .map(async (items, index) => { writeFile(resolve(options.cacheFolder, `cache-${index}.json`), JSON.stringify(items, undefined, '\t')) return items }) .flatMap((e) => AsyncIterable.from(e)) .pipe(onLastItem(onCacheComplete)) } await writeFile(lockFile, JSON.stringify({ started: Date.now() })) return IX.from(data) .pipe(jsonWrite({ filePath: resolve(options.cacheFolder, `cache.json`) })) .pipe(onLastItem(onCacheComplete)) }) } function removeIfExists(options: CacheIterOptions) { if (existsSync(options.cacheFolder)) { rmdirSync(options.cacheFolder, { recursive: true }); } } /** * Cache iterator output to file. * Useful when we need develop complex iterator pipelines. * @include ./CacheIterOptions.md * @example * import { cacheIter } from 'iterparse' * * getFeed() // If cache exists get feed function will not be called * .pipe(cacheIter({ cacheFolder: "./_cache" })) * .count() * * @example * import { cacheIter } from 'iterparse' * * const cachedIter = cacheIter(getFeed(), { cacheFolder: "./_cache" }) * * for await (const item of cachedIter) { * console.log(item) * } * @category Utility */ export function cacheIter<T>(options: CacheIterOptions): (data: AnyIterable<T>) => IX<T> export function cacheIter<T>(data: AnyIterable<T>, options: CacheIterOptions): IX<T> export function cacheIter() { return purry(_cacheIter, arguments) }