@crawlee/core
Version:
The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.
155 lines (154 loc) • 6 kB
TypeScript
import type { StorageClient } from '@crawlee/types';
import type { Log } from '@apify/log';
import { Configuration } from '../configuration';
import type { EventManager } from '../events/event_manager';
import type { ClientSnapshot } from './client_load_signal';
import type { CpuSnapshot } from './cpu_load_signal';
import type { EventLoopSnapshot } from './event_loop_load_signal';
import type { LoadSignal } from './load_signal';
import type { MemorySnapshot } from './memory_load_signal';
import type { SystemInfo } from './system_status';
export interface SnapshotterOptions {
/**
* Defines the interval of measuring the event loop response time.
* @default 0.5
*/
eventLoopSnapshotIntervalSecs?: number;
/**
* Defines the interval of checking the current state
* of the remote API client.
* @default 1
*/
clientSnapshotIntervalSecs?: number;
/**
* Maximum allowed delay of the event loop in milliseconds.
* Exceeding this limit overloads the event loop.
* @default 50
*/
maxBlockedMillis?: number;
/**
* Defines the maximum ratio of total memory that can be used.
* Exceeding this limit overloads the memory.
* @default 0.9
*/
maxUsedMemoryRatio?: number;
/**
* Defines the maximum number of new rate limit errors within
* the given interval.
* @default 1
*/
maxClientErrors?: number;
/**
* Sets the interval in seconds for which a history of resource snapshots
* will be kept. Increasing this to very high numbers will affect performance.
* @default 60
*/
snapshotHistorySecs?: number;
/** @internal */
log?: Log;
/** @internal */
client?: StorageClient;
/** @internal */
config?: Configuration;
}
/**
* Creates snapshots of system resources at given intervals and marks the resource
* as either overloaded or not during the last interval. Keeps a history of the snapshots.
* It tracks the following resources: Memory, EventLoop, API and CPU.
* The class is used by the {@link AutoscaledPool} class.
*
* When running on the Apify platform, the CPU and memory statistics are provided by the platform,
* as collected from the running Docker container. When running locally, `Snapshotter`
* makes its own statistics by querying the OS.
*
* CPU becomes overloaded locally when its current use exceeds the `maxUsedCpuRatio` option or
* when Apify platform marks it as overloaded.
*
* Memory becomes overloaded if its current use exceeds the `maxUsedMemoryRatio` option.
* It's computed using the total memory available to the container when running on
* the Apify platform and a quarter of total system memory when running locally.
* Max total memory when running locally may be overridden by using the `CRAWLEE_MEMORY_MBYTES`
* environment variable.
*
* Event loop becomes overloaded if it slows down by more than the `maxBlockedMillis` option.
*
* Client becomes overloaded when rate limit errors (429 - Too Many Requests),
* typically received from the request queue, exceed the set limit within the set interval.
*
* @category Scaling
*/
export declare class Snapshotter {
log: Log;
client: StorageClient;
config: Configuration;
events: EventManager;
private readonly memorySignal;
private readonly eventLoopSignal;
private readonly cpuSignal;
private readonly clientSignal;
/**
* Returns the four built-in signals as an array, so `SystemStatus` can
* iterate them alongside any custom `LoadSignal` instances.
*/
getLoadSignals(): LoadSignal[];
get cpuSnapshots(): CpuSnapshot[];
get eventLoopSnapshots(): EventLoopSnapshot[];
get memorySnapshots(): MemorySnapshot[];
get clientSnapshots(): ClientSnapshot[];
/**
* @param [options] All `Snapshotter` configuration options.
*/
constructor(options?: SnapshotterOptions);
/**
* Starts capturing snapshots at configured intervals.
*/
start(): Promise<void>;
/**
* Stops all resource capturing.
*/
stop(): Promise<void>;
/**
* Returns a sample of latest memory snapshots, with the size of the sample defined
* by the sampleDurationMillis parameter. If omitted, it returns a full snapshot history.
*/
getMemorySample(sampleDurationMillis?: number): MemorySnapshot[];
/**
* Returns a sample of latest event loop snapshots, with the size of the sample defined
* by the sampleDurationMillis parameter. If omitted, it returns a full snapshot history.
*/
getEventLoopSample(sampleDurationMillis?: number): EventLoopSnapshot[];
/**
* Returns a sample of latest CPU snapshots, with the size of the sample defined
* by the sampleDurationMillis parameter. If omitted, it returns a full snapshot history.
*/
getCpuSample(sampleDurationMillis?: number): CpuSnapshot[];
/**
* Returns a sample of latest Client snapshots, with the size of the sample defined
* by the sampleDurationMillis parameter. If omitted, it returns a full snapshot history.
*/
getClientSample(sampleDurationMillis?: number): ClientSnapshot[];
/**
* @deprecated Kept for backward compatibility.
*/
protected _snapshotMemory(systemInfo: SystemInfo): void;
/**
* @deprecated Kept for backward compatibility.
*/
protected _memoryOverloadWarning(systemInfo: SystemInfo): void;
/**
* @deprecated Kept for backward compatibility.
*/
protected _snapshotEventLoop(intervalCallback: () => unknown): void;
/**
* @deprecated Kept for backward compatibility.
*/
protected _snapshotCpu(systemInfo: SystemInfo): void;
/**
* @deprecated Kept for backward compatibility.
*/
protected _snapshotClient(intervalCallback: () => unknown): void;
/**
* @deprecated Pruning is now handled by individual signals.
*/
protected _pruneSnapshots(_snapshots: any[], _now: Date): void;
}