UNPKG

website-scrap-engine

Version:
70 lines 2.39 kB
import { load } from 'cheerio'; import { ResourceType } from '../resource.js'; import { toString } from '../util.js'; export const skipProcess = (fn) => (url, element, parent) => fn(url, element, parent) ? undefined : url; export const dropResource = (fn) => res => { if (fn(res)) { res.shouldBeDiscardedFromDownload = true; } return res; }; export const preProcess = (fn) => (res, element, parent) => { fn(res.url, element, res, parent); return res; }; export const requestRedirect = (fn) => res => { if (res.downloadLink) { const downloadLink = fn(res.downloadLink, res) || undefined; if (!downloadLink) { return; } res.downloadLink = downloadLink; } return res; }; export const redirectFilter = (fn) => res => { if (res.redirectedUrl) { res.redirectedUrl = fn(res.redirectedUrl, res) || undefined; } return res; }; export async function processRedirectedUrl(res, submit, options, pipeline) { if (res.redirectedUrl && res.redirectedUrl !== res.url) { const redirectedRes = await pipeline.createAndProcessResource(res.redirectedUrl, res.type, res.depth, null, res); if (redirectedRes) { res.redirectedUrl = redirectedRes.url; // https://github.com/website-local/website-scrap-engine/issues/385 // 2011/11/15 if (redirectedRes.savePath) { res.redirectedSavePath = redirectedRes.savePath; } } } return res; } export const parseHtml = (res, options) => { const encoding = res.encoding || options.encoding[res.type] || 'utf8'; if (options.cheerioParse) { return load(toString(res.body, encoding), options.cheerioParse); } return load(toString(res.body, encoding)); }; export const processHtml = (fn) => (res, submit, options) => { if (res.type === ResourceType.Html) { if (!res.meta.doc) { res.meta.doc = parseHtml(res, options); } res.meta.doc = fn(res.meta.doc, res); } return res; }; export const processHtmlAsync = (fn) => async (res, submit, options) => { if (res.type === ResourceType.Html) { if (!res.meta.doc) { res.meta.doc = parseHtml(res, options); } res.meta.doc = await fn(res.meta.doc, res); } return res; }; //# sourceMappingURL=adapters.js.map