chen-crawler
Version:
Web Crawler Provider for Chen Framework
53 lines • 1.86 kB
JavaScript
;
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator.throw(value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : new P(function (resolve) { resolve(result.value); }).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments)).next());
});
};
const base_1 = require('./base');
const cluster = require('cluster');
const numCPUs = require('os').cpus().length;
/**
* PageCrawler class
*/
class PageCrawler extends base_1.Crawler {
/**
* PageCrawler constructor
* @param {Storage} storage
* @param {string} name
* @param {string} startingUrl
* @param {HttpClientOptions} config
*/
constructor(storage, name, startingUrl, config) {
super(storage, name, startingUrl, config);
this.worker = (cluster.isMaster) ? `Master` : `Worker ${cluster.worker.process.pid}`;
}
/**
* Crawl url data
*/
crawlUrlData() {
return __awaiter(this, void 0, void 0, function* () {
let url;
while (url = yield this.queue.shift()) {
yield this.crawlUrl(url, this.worker);
}
});
}
/**
* Start crawling
*/
crawl() {
return __awaiter(this, void 0, void 0, function* () {
yield this.crawlUrl(this.getStartingUrl(), this.worker);
for (let i = 1; i < numCPUs; i++) {
cluster.fork();
}
yield this.crawlUrlData();
});
}
}
exports.PageCrawler = PageCrawler;
//# sourceMappingURL=page.js.map