UNPKG

@crawlee/core

Version:

The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.

204 lines 6.24 kB
import type { Log } from '@apify/log'; import { Configuration } from '../configuration'; import { KeyValueStore } from '../storages/key_value_store'; import { ErrorTracker } from './error_tracker'; /** * Persistence-related options to control how and when crawler's data gets persisted. */ export interface PersistenceOptions { /** * Use this flag to disable or enable periodic persistence to key value store. * @default true */ enable?: boolean; } /** * The statistics class provides an interface to collecting and logging run * statistics for requests. * * All statistic information is saved on key value store * under the key `SDK_CRAWLER_STATISTICS_*`, persists between * migrations and abort/resurrect * * @category Crawlers */ export declare class Statistics { private static id; /** * An error tracker for final retry errors. */ errorTracker: ErrorTracker; /** * An error tracker for retry errors prior to the final retry. */ errorTrackerRetry: ErrorTracker; /** * Statistic instance id. */ readonly id: number; /** * Current statistic state used for doing calculations on {@link Statistics.calculate} calls */ state: StatisticState; /** * Contains the current retries histogram. Index 0 means 0 retries, index 2, 2 retries, and so on */ readonly requestRetryHistogram: number[]; /** * Contains the associated Configuration instance */ private readonly config; protected keyValueStore?: KeyValueStore; protected persistStateKey: string; private logIntervalMillis; private logMessage; private listener; private requestsInProgress; private readonly log; private instanceStart; private logInterval; private events; private persistenceOptions; /** * @internal */ constructor(options?: StatisticsOptions); /** * Set the current statistic instance to pristine values */ reset(): void; /** * @param options - Override the persistence options provided in the constructor */ resetStore(options?: PersistenceOptions): Promise<void>; /** * Increments the status code counter. */ registerStatusCode(code: number): void; /** * Starts a job * @ignore */ startJob(id: number | string): void; /** * Mark job as finished and sets the state * @ignore */ finishJob(id: number | string, retryCount: number): void; /** * Mark job as failed and sets the state * @ignore */ failJob(id: number | string, retryCount: number): void; /** * Calculate the current statistics */ calculate(): { requestAvgFailedDurationMillis: number; requestAvgFinishedDurationMillis: number; requestsFinishedPerMinute: number; requestsFailedPerMinute: number; requestTotalDurationMillis: number; requestsTotal: number; crawlerRuntimeMillis: number; }; /** * Initializes the key value store for persisting the statistics, * displaying the current state in predefined intervals */ startCapturing(): Promise<void>; /** * Stops logging and remove event listeners, then persist */ stopCapturing(): Promise<void>; protected _saveRetryCountForJob(retryCount: number): void; /** * Persist internal state to the key value store * @param options - Override the persistence options provided in the constructor */ persistState(options?: PersistenceOptions): Promise<void>; /** * Loads the current statistic from the key value store if any */ protected _maybeLoadStatistics(): Promise<void>; protected _teardown(): void; /** * Make this class serializable when called with `JSON.stringify(statsInstance)` directly * or through `keyValueStore.setValue('KEY', statsInstance)` */ toJSON(): StatisticPersistedState; } /** * Configuration for the {@link Statistics} instance used by the crawler */ export interface StatisticsOptions { /** * Interval in seconds to log the current statistics * @default 60 */ logIntervalSecs?: number; /** * Message to log with the current statistics * @default 'Statistics' */ logMessage?: string; /** * Parent logger instance, the statistics will create a child logger from this. * @default crawler.log */ log?: Log; /** * Key value store instance to persist the statistics. * If not provided, the default one will be used when capturing starts */ keyValueStore?: KeyValueStore; /** * Configuration instance to use * @default Configuration.getGlobalConfig() */ config?: Configuration; /** * Control how and when to persist the statistics. */ persistenceOptions?: PersistenceOptions; /** * Save HTML snapshot (and a screenshot if possible) when an error occurs. * @default false */ saveErrorSnapshots?: boolean; } /** * Format of the persisted stats */ export interface StatisticPersistedState extends Omit<StatisticState, 'statsPersistedAt'> { requestRetryHistogram: number[]; statsId: number; requestAvgFailedDurationMillis: number; requestAvgFinishedDurationMillis: number; requestTotalDurationMillis: number; requestsTotal: number; crawlerLastStartTimestamp: number; statsPersistedAt: string; } /** * Contains the statistics state */ export interface StatisticState { requestsFinished: number; requestsFailed: number; requestsRetries: number; requestsFailedPerMinute: number; requestsFinishedPerMinute: number; requestMinDurationMillis: number; requestMaxDurationMillis: number; requestTotalFailedDurationMillis: number; requestTotalFinishedDurationMillis: number; crawlerStartedAt: Date | string | null; crawlerFinishedAt: Date | string | null; crawlerRuntimeMillis: number; statsPersistedAt: Date | string | null; errors: Record<string, unknown>; retryErrors: Record<string, unknown>; requestsWithStatusCode: Record<string, number>; } //# sourceMappingURL=statistics.d.ts.map