UNPKG

@crawlee/core

Version:

The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.

155 lines (154 loc) 6 kB
import type { StorageClient } from '@crawlee/types'; import type { Log } from '@apify/log'; import { Configuration } from '../configuration'; import type { EventManager } from '../events/event_manager'; import type { ClientSnapshot } from './client_load_signal'; import type { CpuSnapshot } from './cpu_load_signal'; import type { EventLoopSnapshot } from './event_loop_load_signal'; import type { LoadSignal } from './load_signal'; import type { MemorySnapshot } from './memory_load_signal'; import type { SystemInfo } from './system_status'; export interface SnapshotterOptions { /** * Defines the interval of measuring the event loop response time. * @default 0.5 */ eventLoopSnapshotIntervalSecs?: number; /** * Defines the interval of checking the current state * of the remote API client. * @default 1 */ clientSnapshotIntervalSecs?: number; /** * Maximum allowed delay of the event loop in milliseconds. * Exceeding this limit overloads the event loop. * @default 50 */ maxBlockedMillis?: number; /** * Defines the maximum ratio of total memory that can be used. * Exceeding this limit overloads the memory. * @default 0.9 */ maxUsedMemoryRatio?: number; /** * Defines the maximum number of new rate limit errors within * the given interval. * @default 1 */ maxClientErrors?: number; /** * Sets the interval in seconds for which a history of resource snapshots * will be kept. Increasing this to very high numbers will affect performance. * @default 60 */ snapshotHistorySecs?: number; /** @internal */ log?: Log; /** @internal */ client?: StorageClient; /** @internal */ config?: Configuration; } /** * Creates snapshots of system resources at given intervals and marks the resource * as either overloaded or not during the last interval. Keeps a history of the snapshots. * It tracks the following resources: Memory, EventLoop, API and CPU. * The class is used by the {@link AutoscaledPool} class. * * When running on the Apify platform, the CPU and memory statistics are provided by the platform, * as collected from the running Docker container. When running locally, `Snapshotter` * makes its own statistics by querying the OS. * * CPU becomes overloaded locally when its current use exceeds the `maxUsedCpuRatio` option or * when Apify platform marks it as overloaded. * * Memory becomes overloaded if its current use exceeds the `maxUsedMemoryRatio` option. * It's computed using the total memory available to the container when running on * the Apify platform and a quarter of total system memory when running locally. * Max total memory when running locally may be overridden by using the `CRAWLEE_MEMORY_MBYTES` * environment variable. * * Event loop becomes overloaded if it slows down by more than the `maxBlockedMillis` option. * * Client becomes overloaded when rate limit errors (429 - Too Many Requests), * typically received from the request queue, exceed the set limit within the set interval. * * @category Scaling */ export declare class Snapshotter { log: Log; client: StorageClient; config: Configuration; events: EventManager; private readonly memorySignal; private readonly eventLoopSignal; private readonly cpuSignal; private readonly clientSignal; /** * Returns the four built-in signals as an array, so `SystemStatus` can * iterate them alongside any custom `LoadSignal` instances. */ getLoadSignals(): LoadSignal[]; get cpuSnapshots(): CpuSnapshot[]; get eventLoopSnapshots(): EventLoopSnapshot[]; get memorySnapshots(): MemorySnapshot[]; get clientSnapshots(): ClientSnapshot[]; /** * @param [options] All `Snapshotter` configuration options. */ constructor(options?: SnapshotterOptions); /** * Starts capturing snapshots at configured intervals. */ start(): Promise<void>; /** * Stops all resource capturing. */ stop(): Promise<void>; /** * Returns a sample of latest memory snapshots, with the size of the sample defined * by the sampleDurationMillis parameter. If omitted, it returns a full snapshot history. */ getMemorySample(sampleDurationMillis?: number): MemorySnapshot[]; /** * Returns a sample of latest event loop snapshots, with the size of the sample defined * by the sampleDurationMillis parameter. If omitted, it returns a full snapshot history. */ getEventLoopSample(sampleDurationMillis?: number): EventLoopSnapshot[]; /** * Returns a sample of latest CPU snapshots, with the size of the sample defined * by the sampleDurationMillis parameter. If omitted, it returns a full snapshot history. */ getCpuSample(sampleDurationMillis?: number): CpuSnapshot[]; /** * Returns a sample of latest Client snapshots, with the size of the sample defined * by the sampleDurationMillis parameter. If omitted, it returns a full snapshot history. */ getClientSample(sampleDurationMillis?: number): ClientSnapshot[]; /** * @deprecated Kept for backward compatibility. */ protected _snapshotMemory(systemInfo: SystemInfo): void; /** * @deprecated Kept for backward compatibility. */ protected _memoryOverloadWarning(systemInfo: SystemInfo): void; /** * @deprecated Kept for backward compatibility. */ protected _snapshotEventLoop(intervalCallback: () => unknown): void; /** * @deprecated Kept for backward compatibility. */ protected _snapshotCpu(systemInfo: SystemInfo): void; /** * @deprecated Kept for backward compatibility. */ protected _snapshotClient(intervalCallback: () => unknown): void; /** * @deprecated Pruning is now handled by individual signals. */ protected _pruneSnapshots(_snapshots: any[], _now: Date): void; }