@crawlee/core
Version:
The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.
189 lines • 7.05 kB
TypeScript
import type { StorageClient } from '@crawlee/types';
import type { Log } from '@apify/log';
import type { BetterIntervalID } from '@apify/utilities';
import { Configuration } from '../configuration';
import type { EventManager } from '../events/event_manager';
import type { SystemInfo } from './system_status';
export interface SnapshotterOptions {
/**
* Defines the interval of measuring the event loop response time.
* @default 0.5
*/
eventLoopSnapshotIntervalSecs?: number;
/**
* Defines the interval of checking the current state
* of the remote API client.
* @default 1
*/
clientSnapshotIntervalSecs?: number;
/**
* Maximum allowed delay of the event loop in milliseconds.
* Exceeding this limit overloads the event loop.
* @default 50
*/
maxBlockedMillis?: number;
/**
* Defines the maximum ratio of total memory that can be used.
* Exceeding this limit overloads the memory.
* @default 0.9
*/
maxUsedMemoryRatio?: number;
/**
* Defines the maximum number of new rate limit errors within
* the given interval.
* @default 1
*/
maxClientErrors?: number;
/**
* Sets the interval in seconds for which a history of resource snapshots
* will be kept. Increasing this to very high numbers will affect performance.
* @default 60
*/
snapshotHistorySecs?: number;
/** @internal */
log?: Log;
/** @internal */
client?: StorageClient;
/** @internal */
config?: Configuration;
}
interface MemorySnapshot {
createdAt: Date;
isOverloaded: boolean;
usedBytes?: number;
}
interface CpuSnapshot {
createdAt: Date;
isOverloaded: boolean;
usedRatio: number;
ticks?: {
idle: number;
total: number;
};
}
interface EventLoopSnapshot {
createdAt: Date;
isOverloaded: boolean;
exceededMillis: number;
}
interface ClientSnapshot {
createdAt: Date;
isOverloaded: boolean;
rateLimitErrorCount: number;
}
/**
* Creates snapshots of system resources at given intervals and marks the resource
* as either overloaded or not during the last interval. Keeps a history of the snapshots.
* It tracks the following resources: Memory, EventLoop, API and CPU.
* The class is used by the {@link AutoscaledPool} class.
*
* When running on the Apify platform, the CPU and memory statistics are provided by the platform,
* as collected from the running Docker container. When running locally, `Snapshotter`
* makes its own statistics by querying the OS.
*
* CPU becomes overloaded locally when its current use exceeds the `maxUsedCpuRatio` option or
* when Apify platform marks it as overloaded.
*
* Memory becomes overloaded if its current use exceeds the `maxUsedMemoryRatio` option.
* It's computed using the total memory available to the container when running on
* the Apify platform and a quarter of total system memory when running locally.
* Max total memory when running locally may be overridden by using the `CRAWLEE_MEMORY_MBYTES`
* environment variable.
*
* Event loop becomes overloaded if it slows down by more than the `maxBlockedMillis` option.
*
* Client becomes overloaded when rate limit errors (429 - Too Many Requests),
* typically received from the request queue, exceed the set limit within the set interval.
* @category Scaling
*/
export declare class Snapshotter {
log: Log;
client: StorageClient;
config: Configuration;
events: EventManager;
eventLoopSnapshotIntervalMillis: number;
clientSnapshotIntervalMillis: number;
snapshotHistoryMillis: number;
maxBlockedMillis: number;
maxUsedMemoryRatio: number;
maxClientErrors: number;
maxMemoryBytes: number;
cpuSnapshots: CpuSnapshot[];
eventLoopSnapshots: EventLoopSnapshot[];
memorySnapshots: MemorySnapshot[];
clientSnapshots: ClientSnapshot[];
eventLoopInterval: BetterIntervalID;
clientInterval: BetterIntervalID;
lastLoggedCriticalMemoryOverloadAt: Date | null;
/**
* @param [options] All `Snapshotter` configuration options.
*/
constructor(options?: SnapshotterOptions);
/**
* Starts capturing snapshots at configured intervals.
*/
start(): Promise<void>;
/**
* Stops all resource capturing.
*/
stop(): Promise<void>;
/**
* Returns a sample of latest memory snapshots, with the size of the sample defined
* by the sampleDurationMillis parameter. If omitted, it returns a full snapshot history.
*/
getMemorySample(sampleDurationMillis?: number): MemorySnapshot[];
/**
* Returns a sample of latest event loop snapshots, with the size of the sample defined
* by the sampleDurationMillis parameter. If omitted, it returns a full snapshot history.
*/
getEventLoopSample(sampleDurationMillis?: number): EventLoopSnapshot[];
/**
* Returns a sample of latest CPU snapshots, with the size of the sample defined
* by the sampleDurationMillis parameter. If omitted, it returns a full snapshot history.
*/
getCpuSample(sampleDurationMillis?: number): CpuSnapshot[];
/**
* Returns a sample of latest Client snapshots, with the size of the sample defined
* by the sampleDurationMillis parameter. If omitted, it returns a full snapshot history.
*/
getClientSample(sampleDurationMillis?: number): ClientSnapshot[];
/**
* Finds the latest snapshots by sampleDurationMillis in the provided array.
*/
protected _getSample<T extends {
createdAt: Date;
}>(snapshots: T[], sampleDurationMillis?: number): T[];
/**
* Creates a snapshot of current memory usage
* using the Apify platform `systemInfo` event.
*/
protected _snapshotMemory(systemInfo: SystemInfo): void;
/**
* Checks for critical memory overload and logs it to the console.
*/
protected _memoryOverloadWarning(systemInfo: SystemInfo): void;
/**
* Creates a snapshot of current event loop delay.
*/
protected _snapshotEventLoop(intervalCallback: () => unknown): void;
/**
* Creates a snapshot of current CPU usage using the Apify platform `systemInfo` event.
*/
protected _snapshotCpu(systemInfo: SystemInfo): void;
/**
* Creates a snapshot of current API state by checking for
* rate limit errors. Only errors produced by a 2nd retry
* of the API call are considered for snapshotting since
* earlier errors may just be caused by a random spike in
* number of requests and do not necessarily signify API
* overloading.
*/
protected _snapshotClient(intervalCallback: () => unknown): void;
/**
* Removes snapshots that are older than the snapshotHistorySecs option
* from the array (destructively - in place).
*/
protected _pruneSnapshots(snapshots: MemorySnapshot[] | CpuSnapshot[] | EventLoopSnapshot[] | ClientSnapshot[], now: Date): void;
}
export {};
//# sourceMappingURL=snapshotter.d.ts.map