UNPKG

chen-crawler

Version:

Web Crawler Provider for Chen Framework

70 lines 2.94 kB
"use strict"; var __decorate = (this && this.__decorate) || function (decorators, target, key, desc) { var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d; if (typeof Reflect === "object" && typeof Reflect.decorate === "function") r = Reflect.decorate(decorators, target, key, desc); else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r; return c > 3 && r && Object.defineProperty(target, key, r), r; }; var __metadata = (this && this.__metadata) || function (k, v) { if (typeof Reflect === "object" && typeof Reflect.metadata === "function") return Reflect.metadata(k, v); }; const core_1 = require('chen/core'); const crawler_1 = require('./crawler'); const storage_1 = require('./storage'); /** * WebCrawlerManager class */ let WebCrawlerManager = class WebCrawlerManager extends core_1.Service { constructor() { super(...arguments); /** * Web crawler config * @type {WebCrawlerConfig} */ this.defaultConfig = { httpClient: { headers: { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.8', 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36' } }, storage: {} }; } /** * Init hook */ init() { this.defaultConfig = core_1._.merge(this.defaultConfig, this.context.app.getConfig().get('extensions.chen-crawler')); this.storage = new storage_1.Storage(this.defaultConfig.storage); } /** * Create page crawler * @param {string} name * @param {string} startingUrl * @param {WebCrawlerConfig} customConfig * @return {PageCrawler} */ createPageCrawler(name, startingUrl, customConfig) { let config = core_1._.merge({}, this.defaultConfig.httpClient, customConfig); return new crawler_1.PageCrawler(this.storage, name, startingUrl, config); } /** * Create sitemap crawler * @param {string} name * @param {string} startingUrl * @param {HttpClientOptions} customConfig * @return {SitemapCrawler} */ createSitemapCrawler(name, startingUrl, customConfig) { let config = core_1._.merge({}, this.defaultConfig.httpClient, customConfig); return new crawler_1.SitemapCrawler(this.storage, name, startingUrl, config); } }; WebCrawlerManager = __decorate([ core_1.injectable(), __metadata('design:paramtypes', function () { return []; }) ], WebCrawlerManager); exports.WebCrawlerManager = WebCrawlerManager; //# sourceMappingURL=manager.js.map