UNPKG

website-scrap-engine

Version:
64 lines 2.58 kB
import { ResourceType } from '../resource.js'; import { parseHtml } from './adapters.js'; import { skip } from '../logger/logger.js'; /** * Originally create by https://github.com/stevenvachon at * https://github.com/stevenvachon/http-equiv-refresh * MIT license */ const META_REFRESH_PATTERN = /^\s*(\d+)(?:\s*;(?:\s*url\s*=)?\s*(?:["']\s*(.*?)\s*['"]|(.*?)))?\s*$/i; export async function processHtmlMetaRefresh(res, submit, options, pipeline) { if (res.type !== ResourceType.Html) { return res; } if (!res.meta.doc) { res.meta.doc = parseHtml(res, options); } const $ = res.meta.doc; const metaLinks = $('meta[http-equiv="refresh"][content]'); if (metaLinks.length) { const refUrl = res.redirectedUrl || res.url; const savePath = refUrl === res.url ? res.savePath : undefined; const depth = res.depth + 1; for (let index = 0; index < metaLinks.length; index++) { const elem = metaLinks.eq(index); const attrValue = elem.attr('content'); if (!attrValue) { continue; } const match = META_REFRESH_PATTERN.exec(attrValue); if (!match) { continue; } const originalLink = match[2] || match[3]; if (!originalLink) { continue; } const link = await pipeline.linkRedirect(originalLink, elem, res); if (!link) { continue; } const linkType = await pipeline.detectResourceType(link, ResourceType.Html, elem, res); if (!linkType) { if (skip.isTraceEnabled()) { skip.trace('skip detectResourceType', originalLink, link, refUrl); } continue; } let resource = await pipeline.createResource(linkType, depth, link, refUrl, res.localRoot, options.encoding[linkType], savePath, res.type); resource = await pipeline.processBeforeDownload(resource, elem, res, options); if (!resource) { if (skip.isTraceEnabled()) { skip.trace('skip processBeforeDownload', originalLink, link, linkType, refUrl); } continue; } if (!resource.shouldBeDiscardedFromDownload) { submit(resource); } elem.attr('content', attrValue.replace(originalLink, resource.replacePath)); } } return res; } //# sourceMappingURL=process-html-meta.js.map