@crawlee/core
Version:
The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.
238 lines (237 loc) • 9.06 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.Snapshotter = void 0;
const tslib_1 = require("tslib");
const ow_1 = tslib_1.__importDefault(require("ow"));
const configuration_1 = require("../configuration");
const log_1 = require("../log");
const client_load_signal_1 = require("./client_load_signal");
const cpu_load_signal_1 = require("./cpu_load_signal");
const event_loop_load_signal_1 = require("./event_loop_load_signal");
const memory_load_signal_1 = require("./memory_load_signal");
/**
* Creates snapshots of system resources at given intervals and marks the resource
* as either overloaded or not during the last interval. Keeps a history of the snapshots.
* It tracks the following resources: Memory, EventLoop, API and CPU.
* The class is used by the {@link AutoscaledPool} class.
*
* When running on the Apify platform, the CPU and memory statistics are provided by the platform,
* as collected from the running Docker container. When running locally, `Snapshotter`
* makes its own statistics by querying the OS.
*
* CPU becomes overloaded locally when its current use exceeds the `maxUsedCpuRatio` option or
* when Apify platform marks it as overloaded.
*
* Memory becomes overloaded if its current use exceeds the `maxUsedMemoryRatio` option.
* It's computed using the total memory available to the container when running on
* the Apify platform and a quarter of total system memory when running locally.
* Max total memory when running locally may be overridden by using the `CRAWLEE_MEMORY_MBYTES`
* environment variable.
*
* Event loop becomes overloaded if it slows down by more than the `maxBlockedMillis` option.
*
* Client becomes overloaded when rate limit errors (429 - Too Many Requests),
* typically received from the request queue, exceed the set limit within the set interval.
*
* @category Scaling
*/
class Snapshotter {
/**
* Returns the four built-in signals as an array, so `SystemStatus` can
* iterate them alongside any custom `LoadSignal` instances.
*/
getLoadSignals() {
return [this.memorySignal, this.eventLoopSignal, this.cpuSignal, this.clientSignal];
}
// Legacy public properties kept for backward compat (tests read these directly)
get cpuSnapshots() {
return this.cpuSignal.store.getAll();
}
get eventLoopSnapshots() {
return this.eventLoopSignal.store.getAll();
}
get memorySnapshots() {
return this.memorySignal.getMemorySnapshots();
}
get clientSnapshots() {
return this.clientSignal.store.getAll();
}
/**
* @param [options] All `Snapshotter` configuration options.
*/
constructor(options = {}) {
Object.defineProperty(this, "log", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "client", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "config", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "events", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "memorySignal", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "eventLoopSignal", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "cpuSignal", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "clientSignal", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
(0, ow_1.default)(options, ow_1.default.object.exactShape({
eventLoopSnapshotIntervalSecs: ow_1.default.optional.number,
clientSnapshotIntervalSecs: ow_1.default.optional.number,
snapshotHistorySecs: ow_1.default.optional.number,
maxBlockedMillis: ow_1.default.optional.number,
maxUsedMemoryRatio: ow_1.default.optional.number,
maxClientErrors: ow_1.default.optional.number,
log: ow_1.default.optional.object,
client: ow_1.default.optional.object,
config: ow_1.default.optional.object,
}));
const { eventLoopSnapshotIntervalSecs = 0.5, clientSnapshotIntervalSecs = 1, snapshotHistorySecs = 30, maxBlockedMillis = 50, maxUsedMemoryRatio = 0.9, maxClientErrors = 3, log = log_1.log, config = configuration_1.Configuration.getGlobalConfig(), client = config.getStorageClient(), } = options;
this.log = log.child({ prefix: 'Snapshotter' });
this.client = client;
this.config = config;
this.events = this.config.getEventManager();
const snapshotHistoryMillis = snapshotHistorySecs * 1000;
this.memorySignal = new memory_load_signal_1.MemoryLoadSignal({
maxUsedMemoryRatio,
snapshotHistoryMillis,
config: this.config,
log: this.log,
});
this.eventLoopSignal = (0, event_loop_load_signal_1.createEventLoopLoadSignal)({
eventLoopSnapshotIntervalSecs,
maxBlockedMillis,
snapshotHistoryMillis,
});
this.cpuSignal = (0, cpu_load_signal_1.createCpuLoadSignal)({
snapshotHistoryMillis,
config: this.config,
});
this.clientSignal = (0, client_load_signal_1.createClientLoadSignal)({
client: this.client,
clientSnapshotIntervalSecs,
maxClientErrors,
snapshotHistoryMillis,
});
}
/**
* Starts capturing snapshots at configured intervals.
*/
async start() {
await this.memorySignal.start();
await this.eventLoopSignal.start();
await this.cpuSignal.start();
await this.clientSignal.start();
}
/**
* Stops all resource capturing.
*/
async stop() {
await this.memorySignal.stop();
await this.eventLoopSignal.stop();
await this.cpuSignal.stop();
await this.clientSignal.stop();
// Allow microtask queue to unwind before stop returns.
await new Promise((resolve) => {
setImmediate(resolve);
});
}
/**
* Returns a sample of latest memory snapshots, with the size of the sample defined
* by the sampleDurationMillis parameter. If omitted, it returns a full snapshot history.
*/
getMemorySample(sampleDurationMillis) {
return this.memorySignal.getSample(sampleDurationMillis);
}
/**
* Returns a sample of latest event loop snapshots, with the size of the sample defined
* by the sampleDurationMillis parameter. If omitted, it returns a full snapshot history.
*/
getEventLoopSample(sampleDurationMillis) {
return this.eventLoopSignal.getSample(sampleDurationMillis);
}
/**
* Returns a sample of latest CPU snapshots, with the size of the sample defined
* by the sampleDurationMillis parameter. If omitted, it returns a full snapshot history.
*/
getCpuSample(sampleDurationMillis) {
return this.cpuSignal.getSample(sampleDurationMillis);
}
/**
* Returns a sample of latest Client snapshots, with the size of the sample defined
* by the sampleDurationMillis parameter. If omitted, it returns a full snapshot history.
*/
getClientSample(sampleDurationMillis) {
return this.clientSignal.getSample(sampleDurationMillis);
}
/**
* @deprecated Kept for backward compatibility.
*/
_snapshotMemory(systemInfo) {
this.memorySignal._onSystemInfo(systemInfo);
}
/**
* @deprecated Kept for backward compatibility.
*/
_memoryOverloadWarning(systemInfo) {
this.memorySignal._memoryOverloadWarning(systemInfo);
}
/**
* @deprecated Kept for backward compatibility.
*/
_snapshotEventLoop(intervalCallback) {
this.eventLoopSignal.handle(intervalCallback);
}
/**
* @deprecated Kept for backward compatibility.
*/
_snapshotCpu(systemInfo) {
this.cpuSignal.handle(systemInfo);
}
/**
* @deprecated Kept for backward compatibility.
*/
_snapshotClient(intervalCallback) {
this.clientSignal.handle(intervalCallback);
}
/**
* @deprecated Pruning is now handled by individual signals.
*/
_pruneSnapshots(_snapshots, _now) {
// no-op — signals prune themselves
}
}
exports.Snapshotter = Snapshotter;