UNPKG

@a11ywatch/core

Version:
118 lines 5.38 kB
"use strict"; var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; Object.defineProperty(exports, "__esModule", { value: true }); exports.establishCrawlTracking = exports.getKey = exports.crawlingSet = void 0; const website_source_builder_1 = require("@a11ywatch/website-source-builder"); const perf_hooks_1 = require("perf_hooks"); const handle_1 = require("../queues/crawl/handle"); const crawl_1 = require("../queues/crawl"); const crawl_2 = require("./emitters/crawl"); const utils_1 = require("../core/utils"); exports.crawlingSet = new Map(); const extractHostname = (domain, pages) => (0, utils_1.domainName)((0, website_source_builder_1.getHostName)(pages && pages.length === 1 ? pages[0] : domain)); const getKey = (domain, pages, user_id) => `${extractHostname(domain, pages)}-${user_id || 0}`; exports.getKey = getKey; const rebindConcurrency = () => __awaiter(void 0, void 0, void 0, function* () { const newLimit = (0, handle_1.getCWLimit)(exports.crawlingSet.size || 1); for (const item of exports.crawlingSet.values()) { const itemEvent = item && (item === null || item === void 0 ? void 0 : item.event); if (itemEvent && (itemEvent === null || itemEvent === void 0 ? void 0 : itemEvent.concurrency) > newLimit) { const q = itemEvent.getQueue(); itemEvent.killAndDrain(); yield itemEvent.drained(); item.event.concurrency = newLimit; for (let j = 0; j < q.length; j++) { yield itemEvent.unshift(q[j]); } } else if (itemEvent && (itemEvent === null || itemEvent === void 0 ? void 0 : itemEvent.concurrency) !== newLimit) { item.event.concurrency = newLimit; } } }); const deInit = (key, target, { duration, shutdown }) => __awaiter(void 0, void 0, void 0, function* () { exports.crawlingSet.delete(key); crawl_2.crawlTrackingEmitter.emit(`crawl-complete-${key}`, target); const params = { userId: target.user_id, meta: { extra: { domain: extractHostname(target.domain), duration: perf_hooks_1.performance.now() - duration, shutdown: shutdown, }, }, }; yield rebindConcurrency(); yield (0, crawl_1.asyncWorkerCrawlComplete)(params); }); const crawlStart = (target) => { setImmediate(() => { const key = (0, exports.getKey)(target.domain, target.pages, target.user_id); if (!exports.crawlingSet.has(key)) { exports.crawlingSet.set(key, { total: 0, current: 0, crawling: true, shutdown: false, duration: perf_hooks_1.performance.now(), event: (0, handle_1.bindTaskQ)(exports.crawlingSet.size + 1), }); } }); }; const crawlComplete = (target) => { setImmediate(() => __awaiter(void 0, void 0, void 0, function* () { const key = (0, exports.getKey)(target.domain, target.pages, target.user_id); if (exports.crawlingSet.has(key)) { const item = exports.crawlingSet.get(key); item.crawling = false; } })); }; const crawlProcessing = (call) => { setImmediate(() => __awaiter(void 0, void 0, void 0, function* () { const key = (0, exports.getKey)(call.request.domain, call.request.pages, call.request.user_id); if (exports.crawlingSet.has(key)) { const item = exports.crawlingSet.get(key); item.total += 1; if (item.shutdown) { call.write({ message: "shutdown" }); yield deInit(key, call.request, item); } } call.end(); })); }; const crawlProcessed = (target) => { setImmediate(() => __awaiter(void 0, void 0, void 0, function* () { const key = (0, exports.getKey)(target.domain, target.pages, target.user_id); if (exports.crawlingSet.has(key)) { const item = exports.crawlingSet.get(key); item.current += 1; if (target.shutdown) { item.shutdown = true; item.crawling = false; } if (!item.crawling && item.current === item.total) { yield deInit(key, target, item); } } })); }; const establishCrawlTracking = () => { crawl_2.crawlTrackingEmitter.on("crawl-start", crawlStart); crawl_2.crawlTrackingEmitter.on("crawl-complete", crawlComplete); crawl_2.crawlTrackingEmitter.on("crawl-processing", crawlProcessing); crawl_2.crawlTrackingEmitter.on("crawl-processed", crawlProcessed); }; exports.establishCrawlTracking = establishCrawlTracking; //# sourceMappingURL=crawl-tracking.js.map