chen-crawler
Version:
Web Crawler Provider for Chen Framework
70 lines • 2.94 kB
JavaScript
;
var __decorate = (this && this.__decorate) || function (decorators, target, key, desc) {
var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d;
if (typeof Reflect === "object" && typeof Reflect.decorate === "function") r = Reflect.decorate(decorators, target, key, desc);
else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r;
return c > 3 && r && Object.defineProperty(target, key, r), r;
};
var __metadata = (this && this.__metadata) || function (k, v) {
if (typeof Reflect === "object" && typeof Reflect.metadata === "function") return Reflect.metadata(k, v);
};
const core_1 = require('chen/core');
const crawler_1 = require('./crawler');
const storage_1 = require('./storage');
/**
* WebCrawlerManager class
*/
let WebCrawlerManager = class WebCrawlerManager extends core_1.Service {
constructor() {
super(...arguments);
/**
* Web crawler config
* @type {WebCrawlerConfig}
*/
this.defaultConfig = {
httpClient: {
headers: {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.8',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36'
}
},
storage: {}
};
}
/**
* Init hook
*/
init() {
this.defaultConfig = core_1._.merge(this.defaultConfig, this.context.app.getConfig().get('extensions.chen-crawler'));
this.storage = new storage_1.Storage(this.defaultConfig.storage);
}
/**
* Create page crawler
* @param {string} name
* @param {string} startingUrl
* @param {WebCrawlerConfig} customConfig
* @return {PageCrawler}
*/
createPageCrawler(name, startingUrl, customConfig) {
let config = core_1._.merge({}, this.defaultConfig.httpClient, customConfig);
return new crawler_1.PageCrawler(this.storage, name, startingUrl, config);
}
/**
* Create sitemap crawler
* @param {string} name
* @param {string} startingUrl
* @param {HttpClientOptions} customConfig
* @return {SitemapCrawler}
*/
createSitemapCrawler(name, startingUrl, customConfig) {
let config = core_1._.merge({}, this.defaultConfig.httpClient, customConfig);
return new crawler_1.SitemapCrawler(this.storage, name, startingUrl, config);
}
};
WebCrawlerManager = __decorate([
core_1.injectable(),
__metadata('design:paramtypes', function () { return []; })
], WebCrawlerManager);
exports.WebCrawlerManager = WebCrawlerManager;
//# sourceMappingURL=manager.js.map