UNPKG

@skybloxsystems/ticket-bot

Version:
90 lines (73 loc) 2.71 kB
const cheerio = require('cheerio'); const Promise = require('bluebird'); const utils = require('../../utils'); const logger = require('../../logger'); const HtmlSourceElement = require('./html-source-element'); class HtmlResourceHandler { constructor (options, methods) { this.options = options; this.downloadChildrenPaths = methods.downloadChildrenPaths; this.updateChildrenPaths = methods.updateChildrenPaths; this.recursiveSources = this.options.recursiveSources || []; this.downloadSources = this.options.sources; this.updateSources = []; if (this.options.updateMissingSources === true) { this.updateSources = this.downloadSources; } else if (Array.isArray(this.options.updateMissingSources)) { this.updateSources = this.options.updateMissingSources; } this.allSources = utils.union(this.downloadSources, this.updateSources); } handle (resource) { const $ = loadTextToCheerio(resource.getText()); prepareToLoad($, resource); return Promise.mapSeries(this.allSources, this.loadResourcesForRule.bind(this, $, resource)) .then(() => { resource.setText($.html()); return resource; }); } loadResourcesForRule ($, parentResource, rule) { const self = this; const promises = $(rule.selector).map((i, element) => { const el = new HtmlSourceElement($(element), rule); const pathContainer = el.getPathContainer(); if (!pathContainer) { return Promise.resolve(null); } if (this.exceedMaxRecursiveDepth(el, parentResource)) { logger.debug(`filtering out ${el} by max recursive depth`); return self.updateChildrenPaths(pathContainer, parentResource).then(el.setData.bind(el)); } return self.downloadChildrenPaths(pathContainer, parentResource) .then((updatedText) => { el.setData(updatedText); el.removeIntegrityCheck(); }); }).get(); return utils.waitAllFulfilled(promises); } exceedMaxRecursiveDepth (el, parentResource) { const isRecursive = Boolean(el.findMatchedRule(this.recursiveSources)); const isDepthGreaterThanMax = this.options.maxRecursiveDepth && parentResource.getDepth() >= this.options.maxRecursiveDepth; return isRecursive && isDepthGreaterThanMax; } } function prepareToLoad ($, resource) { $('base').each((i, element) => { const el = $(element); const href = el.attr('href'); if (href) { const newUrl = utils.getUrl(resource.getUrl(), href); resource.setUrl(newUrl); el.remove(); } }); } function loadTextToCheerio (text) { return cheerio.load(text, { decodeEntities: false, lowerCaseAttributeNames: false, }); } module.exports = HtmlResourceHandler;