UNPKG

website-scrap-engine

Version:
151 lines 4.98 kB
import type { RequestError, RetryFunction, TimeoutError } from 'got'; import type { ResourceEncoding, ResourceType } from './resource.js'; import type { ProcessingLifeCycle, RequestOptions } from './life-cycle/types.js'; import { configureLogger } from './logger/config-logger.js'; import type { DownloaderWithMeta } from './downloader/types.js'; import type { SourceDefinition } from './sources.js'; import type { CheerioOptionsInterface } from './types.js'; /** * Extra options for custom life cycle */ export interface StaticDownloadMeta extends Record<string, string | number | boolean | void> { detectIncompleteHtml?: '</html>' | '</body>' | string; warnForNonHtml?: boolean; } /** * Options which should not be changed at runtime, and safe for cloning */ export interface StaticDownloadOptions { /** * @see Resource.localRoot */ localRoot: string; /** * Local source path to download from, * if empty or undefined, file:// url would not be accepted, * this should not start with file:// * Note: must use slash (/) on windows * https://github.com/website-local/website-scrap-engine/issues/126 */ localSrcRoot?: string; /** * Maximum recursive depth * @see Resource.depth */ maxDepth: number; /** * Downloading concurrency */ concurrency: number; /** * Resource default encoding by type. * * Encoding of a resource can be changed at * {@link ProcessingLifeCycle.processBeforeDownload} */ encoding: Record<ResourceType, ResourceEncoding>; /** * WorkerPool.coreSize = Math.min( * {@link concurrency}, * {@link workerCount} * ) */ workerCount?: number; /** * Minimum concurrency, for {@link DownloadOptions.adjustConcurrencyFunc} */ minConcurrency?: number; /** * If url search params should be stripped. * If false, saved file name would contains search or hash of search */ deduplicateStripSearch?: boolean; /** * Optional parse option for cheerio */ cheerioParse?: CheerioOptionsInterface; /** * Optional serialize option for cheerio */ cheerioSerialize?: CheerioOptionsInterface; /** * Custom html sources */ sources?: SourceDefinition[]; /** * Got options * * Never include functions or class instances * with {@link StaticDownloadOptions}. * Configure them functions or class instances * using {@link DownloadOptions} only. * @see RequestOptions * @see got.mergeOptions */ req?: RequestOptions; /** * Urls being pushed to pipeline with depth 0 and the url self as refUrl */ initialUrl?: string[]; /** * @see DownloadOptions.configureLogger */ logSubDir?: string; /** * @see StaticDownloadMeta */ meta: StaticDownloadMeta; /** * true to skip replacePath processing in case of parser error * https://github.com/website-local/website-scrap-engine/issues/107 * @see createResource */ skipReplacePathError?: boolean; /** * Wait for this.init in method onIdle. * See https://github.com/website-local/website-scrap-engine/issues/152 * @deprecated since 0.8.2 */ waitForInitBeforeIdle?: boolean; /** * Set last modified time of local saved file with value from the * Last-Modified http header, if available in response. * * https://developer.mozilla.org/docs/Web/HTTP/Headers/Last-Modified * https://github.com/website-local/website-scrap-engine/issues/174 */ preferRemoteLastModifiedTime?: boolean; } export interface DownloadOptions extends StaticDownloadOptions, ProcessingLifeCycle { /** * Functions or class callbacks can only be here. * * @see StaticDownloadOptions.req */ req: RequestOptions; /** * Adjust downloaded concurrency at runtime. * * Note: this would not affect worker_threads */ adjustConcurrencyPeriod?: number; adjustConcurrencyFunc?: (downloader: DownloaderWithMeta) => void; /** * Use a custom function to configure logger. */ configureLogger: typeof configureLogger; } export type ExtendedError = (TimeoutError | RequestError) & { retryLimitExceeded: boolean; }; /** * If you would like to implement it yourself, * set error.retryLimitExceeded to 1 or true * if attemptCount > retryOptions.limit * or you think retry should end */ export declare const calculateFastDelay: RetryFunction; export declare function defaultDownloadOptions(options: ProcessingLifeCycle & Partial<DownloadOptions>): DownloadOptions; export declare function checkDownloadOptions(options: DownloadOptions): DownloadOptions; export declare function mergeOverrideOptions(options: DownloadOptions | (() => DownloadOptions), overrideOptions?: Partial<StaticDownloadOptions>): DownloadOptions; //# sourceMappingURL=options.d.ts.map