UNPKG

@tuplo/fletcher

Version:

Web scraping HTTP request library

156 lines (136 loc) 4.06 kB
import { type IncomingHttpHeaders } from "node:http"; import type * as VM from "node:vm"; import { type AxiosRequestConfig, type AxiosResponse, type AxiosResponseHeaders, type Method, } from "axios"; import * as $ from "cheerio"; import { type Cheerio } from "cheerio"; import type { AnyNode, Element } from "domhandler"; import { type Page, type ScreenshotOptions } from "puppeteer-core"; import { type IOptions as IRetryOptions } from "./helpers/async-retry"; import { type CookieJar } from "./helpers/cookie-jar"; export type UrlSearchParams = Record<string, number | string | undefined>; export type FetchOptions = AxiosRequestConfig; export { type CookieJar, type ICookie } from "./helpers/cookie-jar"; export type IProxyConfig = { host: string; password?: string; port: number; protocol?: string; username?: string; }; type RequestRedirect = "error" | "follow" | "manual"; type RequestData = Record<string, unknown>; type IOnAfterRequestArgs = { response: AxiosResponse; }; export type IOnAfterRequestFn = { (args: IOnAfterRequestArgs): Promise<void> | void; }; export type IFletcherBrowserUserOptions = { blockedResourceTypes: boolean | string[]; endpoint: string; onPageReady: (page: Page) => Promise<unknown>; screenshot: ScreenshotOptions; waitForSelector: string; }; export type ICacheParams = { format: string; options?: Partial<IFletcherUserOptions>; payload?: string; url: string; }; export type IFletcherCacheMethods = { hit: (key: string) => null | unknown; key: (params: ICacheParams) => string; write: (key: string, payload?: string) => void; }; export type IFletcherUserOptions = { browser: Partial<IFletcherBrowserUserOptions>; cache: boolean; cacheMethods: Partial<IFletcherCacheMethods>; delay: number; embeddedJsonSelector: string; encoding: BufferEncoding; formData: RequestData; formUrlEncoded: Record<string, boolean | number | string>; headers: Record<string, string>; jsonData: RequestData; log: boolean; maxRedirections: number; method: Method; onAfterRequest?: IOnAfterRequestFn; proxy: IProxyConfig; rejectUnauthorized?: boolean; retry: boolean | IRetryOptions | number; scriptFindFn: (script: Element) => boolean; scriptPath: string; scriptSandbox: VM.Context; timeout: number; urlSearchParams: UrlSearchParams; userAgent: string; validateStatus: (statusCode: number) => boolean; }; export type IFletcherOptions = { body?: string; cache: boolean; delay: number; encoding: BufferEncoding; headers: Record<string, string>; maxRedirections?: number | undefined; // =20 maximum redirect count. 0 to not follow redirect method: Method; onAfterRequest?: IOnAfterRequestFn; proxy?: IProxyConfig; rejectUnauthorized?: boolean; retry: IRetryOptions; timeout: number; url: string; validateStatus: (statusCode: number) => boolean; }; type IInstanceMethod<T> = { (url: string, options?: Partial<IFletcherUserOptions>): Promise<T>; }; export type IInstance = { browser: { close: () => Promise<void>; html: IInstanceMethod<Cheerio<AnyNode>>; json: <T = unknown>( url: string, requestUrl: RegExp | string, options?: Partial<IFletcherUserOptions> ) => Promise<T>; jsonld: IInstanceMethod<$.Cheerio<unknown>>; script: <T = unknown>( url: string, options?: Partial<IFletcherUserOptions> ) => Promise<T>; }; cookies: IInstanceMethod<CookieJar>; embeddedJson: <T = unknown>( url: string, options?: Partial<IFletcherUserOptions> ) => Promise<T>; headers: IInstanceMethod<IncomingHttpHeaders>; html: IInstanceMethod<Cheerio<AnyNode>>; json: <T = unknown>( url: string, options?: Partial<IFletcherUserOptions> ) => Promise<T>; jsonld: IInstanceMethod<$.Cheerio<unknown>>; response: IInstanceMethod<IResponse>; script: <T = unknown>( url: string, options?: Partial<IFletcherUserOptions> ) => Promise<T>; text: IInstanceMethod<string>; }; export type IResponse = { // body: Readable & Dispatcher.BodyMixin; headers: AxiosResponseHeaders; statusCode: number; statusMessage?: string; text: () => Promise<string>; };