UNPKG

website-scrap-engine

Version:
87 lines 3.42 kB
import { parentPort, workerData } from 'node:worker_threads'; import { mergeOverrideOptions } from '../options.js'; import { normalizeResource, prepareResourceForClone } from '../resource.js'; import { skip } from '../logger/logger.js'; import { importDefaultFromPath } from '../util.js'; import { WorkerMessageType } from './types.js'; import { PipelineExecutorImpl } from './pipeline-executor-impl.js'; const { pathToOptions, overrideOptions } = workerData; const asyncOptions = importDefaultFromPath(pathToOptions); const asyncPipeline = asyncOptions.then(options => { options = mergeOverrideOptions(options, overrideOptions); const pipeline = new PipelineExecutorImpl(options, options.req, options); options.configureLogger(options.localRoot, options.logSubDir || ''); const init = pipeline.init(pipeline); if (init && init.then) { return init.then(() => pipeline); } return pipeline; }); parentPort === null || parentPort === void 0 ? void 0 : parentPort.addListener('message', async (msg) => { const collectedResource = []; let error; let redirectedUrl; try { const pipeline = await asyncPipeline; const res = msg.body; const downloadResource = normalizeResource(res); const submit = (resources) => { if (Array.isArray(resources)) { for (let i = 0; i < resources.length; i++) { collectedResource.push(prepareResourceForClone(resources[i])); } } else { collectedResource.push(prepareResourceForClone(resources)); } }; const processedResource = await pipeline.processAfterDownload(downloadResource, submit); if (!processedResource) { skip.warn('skipped downloaded resource', downloadResource.url, downloadResource.refUrl); } else if (await pipeline.saveToDisk(processedResource)) { skip.warn('downloaded resource not saved', downloadResource.url, downloadResource.refUrl); } if (processedResource && processedResource.redirectedUrl && processedResource.redirectedUrl !== processedResource.url) { redirectedUrl = processedResource.redirectedUrl; } } catch (e) { // handle if object could not be cloned here // https://github.com/website-local/website-scrap-engine/issues/340 try { // should always be if (typeof structuredClone === 'function') { error = structuredClone(e); } else { // this is the old behavior before this error = e; } } catch (_a) { // can not clone, so no need to get the full error here if (e && typeof e === 'object') { const clone = {}; for (const k in e) { clone[k] = String(e[k]); } } else { error = String(e); } } } finally { const message = { taskId: msg.taskId, type: WorkerMessageType.Complete, body: collectedResource, error, redirectedUrl }; parentPort === null || parentPort === void 0 ? void 0 : parentPort.postMessage(message); } }); //# sourceMappingURL=worker.js.map