UNPKG

@crawlee/core

Version:

The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.

238 lines (237 loc) 9.06 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.Snapshotter = void 0; const tslib_1 = require("tslib"); const ow_1 = tslib_1.__importDefault(require("ow")); const configuration_1 = require("../configuration"); const log_1 = require("../log"); const client_load_signal_1 = require("./client_load_signal"); const cpu_load_signal_1 = require("./cpu_load_signal"); const event_loop_load_signal_1 = require("./event_loop_load_signal"); const memory_load_signal_1 = require("./memory_load_signal"); /** * Creates snapshots of system resources at given intervals and marks the resource * as either overloaded or not during the last interval. Keeps a history of the snapshots. * It tracks the following resources: Memory, EventLoop, API and CPU. * The class is used by the {@link AutoscaledPool} class. * * When running on the Apify platform, the CPU and memory statistics are provided by the platform, * as collected from the running Docker container. When running locally, `Snapshotter` * makes its own statistics by querying the OS. * * CPU becomes overloaded locally when its current use exceeds the `maxUsedCpuRatio` option or * when Apify platform marks it as overloaded. * * Memory becomes overloaded if its current use exceeds the `maxUsedMemoryRatio` option. * It's computed using the total memory available to the container when running on * the Apify platform and a quarter of total system memory when running locally. * Max total memory when running locally may be overridden by using the `CRAWLEE_MEMORY_MBYTES` * environment variable. * * Event loop becomes overloaded if it slows down by more than the `maxBlockedMillis` option. * * Client becomes overloaded when rate limit errors (429 - Too Many Requests), * typically received from the request queue, exceed the set limit within the set interval. * * @category Scaling */ class Snapshotter { /** * Returns the four built-in signals as an array, so `SystemStatus` can * iterate them alongside any custom `LoadSignal` instances. */ getLoadSignals() { return [this.memorySignal, this.eventLoopSignal, this.cpuSignal, this.clientSignal]; } // Legacy public properties kept for backward compat (tests read these directly) get cpuSnapshots() { return this.cpuSignal.store.getAll(); } get eventLoopSnapshots() { return this.eventLoopSignal.store.getAll(); } get memorySnapshots() { return this.memorySignal.getMemorySnapshots(); } get clientSnapshots() { return this.clientSignal.store.getAll(); } /** * @param [options] All `Snapshotter` configuration options. */ constructor(options = {}) { Object.defineProperty(this, "log", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "client", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "config", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "events", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "memorySignal", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "eventLoopSignal", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "cpuSignal", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "clientSignal", { enumerable: true, configurable: true, writable: true, value: void 0 }); (0, ow_1.default)(options, ow_1.default.object.exactShape({ eventLoopSnapshotIntervalSecs: ow_1.default.optional.number, clientSnapshotIntervalSecs: ow_1.default.optional.number, snapshotHistorySecs: ow_1.default.optional.number, maxBlockedMillis: ow_1.default.optional.number, maxUsedMemoryRatio: ow_1.default.optional.number, maxClientErrors: ow_1.default.optional.number, log: ow_1.default.optional.object, client: ow_1.default.optional.object, config: ow_1.default.optional.object, })); const { eventLoopSnapshotIntervalSecs = 0.5, clientSnapshotIntervalSecs = 1, snapshotHistorySecs = 30, maxBlockedMillis = 50, maxUsedMemoryRatio = 0.9, maxClientErrors = 3, log = log_1.log, config = configuration_1.Configuration.getGlobalConfig(), client = config.getStorageClient(), } = options; this.log = log.child({ prefix: 'Snapshotter' }); this.client = client; this.config = config; this.events = this.config.getEventManager(); const snapshotHistoryMillis = snapshotHistorySecs * 1000; this.memorySignal = new memory_load_signal_1.MemoryLoadSignal({ maxUsedMemoryRatio, snapshotHistoryMillis, config: this.config, log: this.log, }); this.eventLoopSignal = (0, event_loop_load_signal_1.createEventLoopLoadSignal)({ eventLoopSnapshotIntervalSecs, maxBlockedMillis, snapshotHistoryMillis, }); this.cpuSignal = (0, cpu_load_signal_1.createCpuLoadSignal)({ snapshotHistoryMillis, config: this.config, }); this.clientSignal = (0, client_load_signal_1.createClientLoadSignal)({ client: this.client, clientSnapshotIntervalSecs, maxClientErrors, snapshotHistoryMillis, }); } /** * Starts capturing snapshots at configured intervals. */ async start() { await this.memorySignal.start(); await this.eventLoopSignal.start(); await this.cpuSignal.start(); await this.clientSignal.start(); } /** * Stops all resource capturing. */ async stop() { await this.memorySignal.stop(); await this.eventLoopSignal.stop(); await this.cpuSignal.stop(); await this.clientSignal.stop(); // Allow microtask queue to unwind before stop returns. await new Promise((resolve) => { setImmediate(resolve); }); } /** * Returns a sample of latest memory snapshots, with the size of the sample defined * by the sampleDurationMillis parameter. If omitted, it returns a full snapshot history. */ getMemorySample(sampleDurationMillis) { return this.memorySignal.getSample(sampleDurationMillis); } /** * Returns a sample of latest event loop snapshots, with the size of the sample defined * by the sampleDurationMillis parameter. If omitted, it returns a full snapshot history. */ getEventLoopSample(sampleDurationMillis) { return this.eventLoopSignal.getSample(sampleDurationMillis); } /** * Returns a sample of latest CPU snapshots, with the size of the sample defined * by the sampleDurationMillis parameter. If omitted, it returns a full snapshot history. */ getCpuSample(sampleDurationMillis) { return this.cpuSignal.getSample(sampleDurationMillis); } /** * Returns a sample of latest Client snapshots, with the size of the sample defined * by the sampleDurationMillis parameter. If omitted, it returns a full snapshot history. */ getClientSample(sampleDurationMillis) { return this.clientSignal.getSample(sampleDurationMillis); } /** * @deprecated Kept for backward compatibility. */ _snapshotMemory(systemInfo) { this.memorySignal._onSystemInfo(systemInfo); } /** * @deprecated Kept for backward compatibility. */ _memoryOverloadWarning(systemInfo) { this.memorySignal._memoryOverloadWarning(systemInfo); } /** * @deprecated Kept for backward compatibility. */ _snapshotEventLoop(intervalCallback) { this.eventLoopSignal.handle(intervalCallback); } /** * @deprecated Kept for backward compatibility. */ _snapshotCpu(systemInfo) { this.cpuSignal.handle(systemInfo); } /** * @deprecated Kept for backward compatibility. */ _snapshotClient(intervalCallback) { this.clientSignal.handle(intervalCallback); } /** * @deprecated Pruning is now handled by individual signals. */ _pruneSnapshots(_snapshots, _now) { // no-op — signals prune themselves } } exports.Snapshotter = Snapshotter;