@crawlee/core
Version:
The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.
134 lines • 5.53 kB
TypeScript
import type { Readable } from 'node:stream';
import { type SearchParams } from '@crawlee/utils';
import type { FormDataLike } from './form-data-like';
type Timeout = {
lookup: number;
connect: number;
secureConnect: number;
socket: number;
send: number;
response: number;
} | {
request: number;
};
type Method = 'GET' | 'POST' | 'PUT' | 'PATCH' | 'HEAD' | 'DELETE' | 'OPTIONS' | 'TRACE' | 'get' | 'post' | 'put' | 'patch' | 'head' | 'delete' | 'options' | 'trace';
/**
* Maps permitted values of the `responseType` option on {@link HttpRequest} to the types that they produce.
*/
export interface ResponseTypes {
'json': unknown;
'text': string;
'buffer': Buffer;
}
interface Progress {
percent: number;
transferred: number;
total?: number;
}
interface ToughCookieJar {
getCookieString: ((currentUrl: string, options: Record<string, unknown>, callback: (error: Error | null, cookies: string) => void) => string) & ((url: string, callback: (error: Error | null, cookieHeader: string) => void) => string);
setCookie: ((cookieOrString: unknown, currentUrl: string, options: Record<string, unknown>, callback: (error: Error | null, cookie: unknown) => void) => void) & ((rawCookie: string, url: string, callback: (error: Error | null, result: unknown) => void) => void);
}
interface PromiseCookieJar {
getCookieString: (url: string) => Promise<string>;
setCookie: (rawCookie: string, url: string) => Promise<unknown>;
}
type SimpleHeaders = Record<string, string | string[] | undefined>;
/**
* HTTP Request as accepted by {@link BaseHttpClient} methods.
*/
export interface HttpRequest<TResponseType extends keyof ResponseTypes = 'text'> {
[k: string]: unknown;
url: string | URL;
method?: Method;
headers?: SimpleHeaders;
body?: string | Buffer | Readable | Generator | AsyncGenerator | FormDataLike;
signal?: AbortSignal;
timeout?: Partial<Timeout>;
cookieJar?: ToughCookieJar | PromiseCookieJar;
followRedirect?: boolean | ((response: any) => boolean);
maxRedirects?: number;
encoding?: BufferEncoding;
responseType?: TResponseType;
throwHttpErrors?: boolean;
proxyUrl?: string;
headerGeneratorOptions?: Record<string, unknown>;
useHeaderGenerator?: boolean;
headerGenerator?: {
getHeaders: (options: Record<string, unknown>) => Record<string, string>;
};
insecureHTTPParser?: boolean;
sessionToken?: object;
}
/**
* Additional options for HTTP requests that need to be handled separately before passing to {@link BaseHttpClient}.
*/
export interface HttpRequestOptions<TResponseType extends keyof ResponseTypes = 'text'> extends HttpRequest<TResponseType> {
/** Search (query string) parameters to be appended to the request URL */
searchParams?: SearchParams;
/** A form to be sent in the HTTP request body (URL encoding will be used) */
form?: Record<string, string>;
/** Artbitrary object to be JSON-serialized and sent as the HTTP request body */
json?: unknown;
/** Basic HTTP Auth username */
username?: string;
/** Basic HTTP Auth password */
password?: string;
}
/**
* HTTP response data, without a body, as returned by {@link BaseHttpClient} methods.
*/
export interface BaseHttpResponseData {
redirectUrls: URL[];
url: string;
ip?: string;
statusCode: number;
statusMessage?: string;
headers: SimpleHeaders;
trailers: SimpleHeaders;
complete: boolean;
}
interface HttpResponseWithoutBody<TResponseType extends keyof ResponseTypes = keyof ResponseTypes> extends BaseHttpResponseData {
request: HttpRequest<TResponseType>;
}
/**
* HTTP response data as returned by the {@link BaseHttpClient.sendRequest} method.
*/
export interface HttpResponse<TResponseType extends keyof ResponseTypes = keyof ResponseTypes> extends HttpResponseWithoutBody<TResponseType> {
[k: string]: any;
body: ResponseTypes[TResponseType];
}
/**
* HTTP response data as returned by the {@link BaseHttpClient.stream} method.
*/
export interface StreamingHttpResponse extends HttpResponseWithoutBody {
stream: Readable;
readonly downloadProgress: Progress;
readonly uploadProgress: Progress;
}
/**
* Type of a function called when an HTTP redirect takes place. It is allowed to mutate the `updatedRequest` argument.
*/
export type RedirectHandler = (redirectResponse: BaseHttpResponseData, updatedRequest: {
url?: string | URL;
headers: SimpleHeaders;
}) => void;
/**
* Interface for user-defined HTTP clients to be used for plain HTTP crawling and for sending additional requests during a crawl.
*/
export interface BaseHttpClient {
/**
* Perform an HTTP Request and return the complete response.
*/
sendRequest<TResponseType extends keyof ResponseTypes = 'text'>(request: HttpRequest<TResponseType>): Promise<HttpResponse<TResponseType>>;
/**
* Perform an HTTP Request and return after the response headers are received. The body may be read from a stream contained in the response.
*/
stream(request: HttpRequest, onRedirect?: RedirectHandler): Promise<StreamingHttpResponse>;
}
/**
* Converts {@link HttpRequestOptions} to a {@link HttpRequest}.
*/
export declare function processHttpRequestOptions<TResponseType extends keyof ResponseTypes = 'text'>({ searchParams, form, json, username, password, ...request }: HttpRequestOptions<TResponseType>): HttpRequest<TResponseType>;
export {};
//# sourceMappingURL=base-http-client.d.ts.map