UNPKG

website-validator

Version:
42 lines 2.31 kB
import Rx from "rxjs"; import RxJsOperators from "rxjs/operators"; import { toCanonical, isInternalLink } from "./index.js"; import { deepEqual } from "fast-equals"; export const recursiveFetchFiles = (pool, fetchFile, baseUrl, indexName) => async (startUrls) => { if (startUrls.length === 0) { return []; } const urlSubject = new Rx.Subject(); const uniqueUrls = urlSubject.pipe(RxJsOperators.scan(({ cache }, { url, role }) => { if (cache.find((cacheElement) => url === cacheElement.url && deepEqual(role, cacheElement.role))) { return { cache, emit: [] }; } else { return { cache: [...cache, { url, role }], emit: [{ url, role }] }; } }, { cache: [], emit: [] }), RxJsOperators.filter(({ emit }) => emit.length > 0), RxJsOperators.mergeMap(({ emit }) => emit), RxJsOperators.share()); const results = uniqueUrls.pipe(RxJsOperators.mergeMap(async ({ url, role }) => { const res = await fetchFile(url); return { url, role, res, }; }, 10), RxJsOperators.mergeMap(async ({ url, role, res }) => { if (res.data !== null) { const links = await pool.getLinks({ url: toCanonical(baseUrl, indexName)(url), role, res: res }); const discoveredUrls = links.map((link) => ({ url: toCanonical(url, indexName)(link.url), role: link.role })); discoveredUrls.filter(({ url }) => isInternalLink(baseUrl)(url)).forEach(({ url, role }) => urlSubject.next({ url, role })); return { url, role, res: res, links }; } else { return { url, role, res, links: null }; } }), RxJsOperators.share()); uniqueUrls.pipe(RxJsOperators.scan((num) => num + 1, 0), RxJsOperators.combineLatestWith(results.pipe(RxJsOperators.scan((num) => num + 1, 0))), //RxJsOperators.tap(([started, finished]) => console.log(`${finished} / ${started}`)), RxJsOperators.filter(([startedNum, finishedNum]) => startedNum === finishedNum)).subscribe(() => urlSubject.complete()); startUrls.forEach(({ url, role }) => urlSubject.next({ url: toCanonical(baseUrl, indexName)(url), role })); return await Rx.lastValueFrom(results.pipe(RxJsOperators.toArray())); }; //# sourceMappingURL=fetch-files.js.map