UNPKG

chen-crawler

Version:

Web Crawler Provider for Chen Framework

53 lines 1.86 kB
"use strict"; var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator.throw(value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : new P(function (resolve) { resolve(result.value); }).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments)).next()); }); }; const base_1 = require('./base'); const cluster = require('cluster'); const numCPUs = require('os').cpus().length; /** * PageCrawler class */ class PageCrawler extends base_1.Crawler { /** * PageCrawler constructor * @param {Storage} storage * @param {string} name * @param {string} startingUrl * @param {HttpClientOptions} config */ constructor(storage, name, startingUrl, config) { super(storage, name, startingUrl, config); this.worker = (cluster.isMaster) ? `Master` : `Worker ${cluster.worker.process.pid}`; } /** * Crawl url data */ crawlUrlData() { return __awaiter(this, void 0, void 0, function* () { let url; while (url = yield this.queue.shift()) { yield this.crawlUrl(url, this.worker); } }); } /** * Start crawling */ crawl() { return __awaiter(this, void 0, void 0, function* () { yield this.crawlUrl(this.getStartingUrl(), this.worker); for (let i = 1; i < numCPUs; i++) { cluster.fork(); } yield this.crawlUrlData(); }); } } exports.PageCrawler = PageCrawler; //# sourceMappingURL=page.js.map