crawler
Version:
Crawler is a ready-to-use web spider that works with proxies, asynchrony, rate limit, configurable request pools, jQuery, and HTTP/2 support.
142 lines • 4.59 kB
TypeScript
export type GlobalOnlyOptions = {
/**
* Global Only option.
* @default 10
* @description The maximum number of requests that can be sent simultaneously.
* @example If the value is 10, the crawler will send at most 10 requests at the same time.
* Note: The maxConnections(> 1) will be valid only if the global ratelimit is set to be 0.
*/
maxConnections: number;
/**
* Global Only option.
* @default 10
* @description The number of levels of priority. Can be only assigned at the beginning.
*/
priorityLevels: number;
/**
* Global Only option.
* @default 0
* @description The default priority of the tasks. Can be only assigned at the beginning.
* @example 1000 means 1000 milliseconds delay between after the first request.
*/
rateLimit: number;
/**
* Global Only option.
* @default false
* @description If true, the crawler will skip duplicate tasks.
* @example If the task is already in the queue, the crawler will not add it again.
*/
skipDuplicates: boolean;
/**
* Global Only option.
* @default false
* @description If true, the crawler will dynamically reallocate the tasks within the queue blocked due to header blocking to other queues.
*/
homogeneous: boolean;
/**
* Global Only option.
* @default undefined
* @description If passed, the crawler will rotate the user agent for each request. The "userAgents" option must be an array if activated.
*/
userAgents?: string | string[];
/**
* Global Only option.
* @default false
* @description If true, the crawler will mute all warning and error messages. The request error will be still thrown.
*/
silence?: boolean;
};
export type RequestOptions = {
forceUTF8?: boolean;
/**
* crawlerOption
* @default true
* @description If true, the crawler will use the cheerio library to parse the HTML content.
* @see cheerio.load()
* @example If inject successfully, the response object will have "$" property, which is a function to use jQuery.
*/
jQuery?: boolean;
/**
* @deprecated
* @description Please use "encoding" instead.
*/
incomingEncoding?: string | null;
/**
* @default "utf8"
* @description The encoding of the response body.
*/
encoding?: string | null;
/**
* @default 0
* @description rateLimiter ID
*/
rateLimiterId?: number;
retries?: number;
retryInterval?: number;
timeout?: number;
priority?: number;
seenreq?: any;
method?: string;
skipEventRequest?: boolean;
html?: boolean;
proxies?: string[];
proxy?: string;
http2?: boolean;
body?: string | Record<string, unknown>;
headers?: Record<string, unknown>;
agent?: any;
/**
* @deprecated Please use "url" instead.
*/
uri?: string | Function;
url?: string | Function;
/**
* @deprecated Please use "searchParams" instead.
*/
qs?: string | Record<string, unknown>;
searchParams?: Record<string, unknown>;
/**
* @deprecated Please use "rejectUnauthorized" instead.
*/
strictSSL?: boolean;
/**
* @description If false, the crawler will ignore SSL certificate errors.
* @default true
*/
rejectUnauthorized?: boolean;
/**
* @deprecated Please use "decompress" instead.
*/
gzip?: boolean;
decompress?: boolean;
/**
* @deprecated Please use "cookieJar" instead.
* @see tough-cookie https://github.com/sindresorhus/got/blob/main/documentation/migration-guides/request.md
*/
jar?: object;
cookieJar?: object;
/**
* @description If true, the crawler will parse the response body as JSON.
* @default false
*/
isJson?: boolean;
referer?: string;
userParams?: unknown;
/**
* @deprecated Please use "parseJson" instead.
*/
jsonReviver?: Function;
parseJson?: Function;
/**
* @deprecated Please use "stringifyJson" instead.
*/
jsonReplacer?: Function;
stringifyJson?: Function;
preRequest?: (options: RequestOptions, done?: (error?: Error | null) => void) => void;
release?: () => void;
callback?: (error: unknown, response: CrawlerResponse, done?: unknown) => void;
};
export type RequestConfig = string | RequestOptions | RequestOptions[];
export type CrawlerOptions = Partial<GlobalOnlyOptions> & RequestOptions;
export type CrawlerResponse = any;
//# sourceMappingURL=crawler.d.ts.map