UNPKG

website-scrap-engine

Version:

Configurable website scraper in typescript

github.com/website-local/website-scrap-engine

website-local/website-scrap-engine

59 lines • 2.11 kB

JavaScript

import { AbstractDownloader } from './main.js'; import { skip } from '../logger/logger.js'; export class SingleThreadDownloader extends AbstractDownloader { constructor(pathToOptions, overrideOptions) { super(pathToOptions, overrideOptions); this.pathToOptions = pathToOptions; this.init = this._initOptions; } _internalInit(options) { if (options.initialUrl) { return this.addInitialResource(options.initialUrl); } else { return this.pipeline.init(this.pipeline, this); } } async downloadAndProcessResource(res) { let r; try { r = await this.pipeline.download(res); if (!r) { skip.debug('discarded after download', res.url, res.rawUrl, res.refUrl); return; } } catch (e) { this.handleError(e, 'downloading resource', res); return; } this.downloadedUrl.add(res.url); const submit = (resources) => { if (Array.isArray(resources)) { for (let i = 0; i < resources.length; i++) { this._addProcessedResource(resources[i]); } } else { this._addProcessedResource(resources); } }; try { const processedResource = await this.pipeline.processAfterDownload(r, submit); if (!processedResource) { skip.warn('skipped downloaded resource', r.url, r.refUrl); } else if (await this.pipeline.saveToDisk(processedResource)) { skip.warn('downloaded resource not saved', r.url, r.refUrl); } if (processedResource && processedResource.redirectedUrl && processedResource.redirectedUrl !== processedResource.url) { this.queuedUrl.add(processedResource.redirectedUrl); } } catch (e) { this.handleError(e, 'post-process', res); } } } //# sourceMappingURL=single.js.map