UNPKG

website-scrap-engine

Version:
77 lines 3.16 kB
import { ResourceType } from '../resource.js'; import { error, skip } from '../logger/logger.js'; import { parseHtml } from './adapters.js'; import { getResourceBodyFromHtml } from './save-html-to-disk.js'; const svgSelectors = [ { selector: '*[xlink\\:href]', attr: 'xlink:href', type: ResourceType.Binary }, { selector: '*[href]', attr: 'href', type: ResourceType.Binary }, ]; export async function processSvg(res, submit, options, pipeline) { if (res.type !== ResourceType.Svg) { return res; } const refUrl = res.redirectedUrl || res.url; const savePath = refUrl === res.url ? res.savePath : undefined; // useless since processRedirectedUrl enabled by default // refUrl = await pipeline.linkRedirect(refUrl, null, res) || refUrl; const depth = res.depth + 1; let doc = res.meta.doc; if (!doc) { res.meta.doc = doc = parseHtml(res, options); } for (const { selector, attr, type } of svgSelectors) { const elements = doc(selector); for (let index = 0; index < elements.length; index++) { const elem = elements.eq(index); const attrValue = attr && elem.attr(attr); if (!attr || !attrValue) { continue; } const originalLink = attrValue; let replaceValue = originalLink; // skip empty links if (!originalLink) { continue; } const link = await pipeline.linkRedirect(originalLink, elem, res); if (!link) { if (skip.isTraceEnabled()) { skip.trace('skip linkRedirect', originalLink, refUrl); } continue; } const linkType = await pipeline.detectResourceType(link, type, elem, res); if (!linkType) { if (skip.isTraceEnabled()) { skip.trace('skip detectResourceType', originalLink, link, refUrl); } continue; } let resource = await pipeline.createResource(linkType, depth, link, refUrl, res.localRoot, options.encoding[linkType], savePath, res.type); resource = await pipeline.processBeforeDownload(resource, elem, res, options); if (!resource) { if (skip.isTraceEnabled()) { skip.trace('skip processBeforeDownload', originalLink, link, linkType, refUrl); } continue; } if (!resource.shouldBeDiscardedFromDownload) { submit(resource); } replaceValue = resource.replacePath; // historical workaround here if (replaceValue === '.html' || replaceValue === '/.html') { replaceValue = ''; } if (attr) { elem.attr(attr, replaceValue); } else { error.warn('skip attr replace', originalLink, replaceValue, refUrl); } } } res.body = getResourceBodyFromHtml(res, options); return res; } //# sourceMappingURL=process-svg.js.map