UNPKG

website-scrap-engine

Version:
98 lines 3.82 kB
import path from 'node:path'; import { WorkerPool } from './worker-pool.js'; import { skip } from '../logger/logger.js'; import { AbstractDownloader } from './main.js'; export class MultiThreadDownloader extends AbstractDownloader { constructor(pathToOptions, overrideOptions, _workerFactory) { super(pathToOptions, overrideOptions); this.pathToOptions = pathToOptions; this._workerFactory = _workerFactory; this.init = this._initOptions; this.workerDispose = []; } _internalInit(options) { let workerCount = options.concurrency; if (options.workerCount) { workerCount = Math.min(options.workerCount, workerCount); } if (workerCount < 1) { workerCount = 1; } const overrideOptions = options; this._pool = new WorkerPool(workerCount, // worker script should be compiled to .js (overrideOptions === null || overrideOptions === void 0 ? void 0 : overrideOptions.pathToWorker) || path.resolve(__dirname, 'worker.js'), { pathToOptions: this.pathToOptions, overrideOptions }, (overrideOptions === null || overrideOptions === void 0 ? void 0 : overrideOptions.maxLoad) || -1, this._workerFactory); for (const info of this.pool.workers) { info.worker.addListener('exit', exitCode => this.workerDispose.push(this.pipeline.dispose(this.pipeline, this, info, exitCode))); } if (this.options.initialUrl) { return this.addInitialResource(this.options.initialUrl); } else { return this._initOptions.then(() => this.pipeline.init(this.pipeline, this)); } } get pool() { if (this._pool) { return this._pool; } throw new TypeError('MultiThreadDownloader: pool not initialized'); } async downloadAndProcessResource(res) { var _a; let r; try { r = await this.pipeline.download(res); if (!r) { skip.debug('discarded after download', res.url, res.rawUrl, res.refUrl); return; } } catch (e) { this.handleError(e, 'downloading resource', res); return false; } let msg; try { if ((ArrayBuffer.isView(r.body) || Buffer.isBuffer(r.body)) && r.body.byteOffset === 0 && r.body.byteLength === r.body.buffer.byteLength && r.body.buffer instanceof ArrayBuffer) { // the array buffer view fully owns the underlying ArrayBuffer r.body = r.body.buffer; msg = await this.pool.submitTask(r, [r.body]); } else { // lets clone and send it. msg = await this.pool.submitTask(r); } } catch (e) { this.handleError(e, 'submitting resource to worker', res); return false; } this.downloadedUrl.add(res.url); if (!msg) { skip.info('discarded in post-processing', res.url, res.rawUrl, res.refUrl); return; } if (msg.error) { this.handleError(msg.error, 'post-process', res); } if ((_a = msg.body) === null || _a === void 0 ? void 0 : _a.length) { const body = msg.body; body.forEach(rawRes => this._addProcessedResource(rawRes)); } if (msg.redirectedUrl) { this.queuedUrl.add(msg.redirectedUrl); } } async dispose() { await super.dispose(); await this.pool.dispose(); const workerDispose = this.workerDispose; this.workerDispose = []; await Promise.all(workerDispose); } } //# sourceMappingURL=multi.js.map