crawlee
Version:
The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.
172 lines (170 loc) • 9.36 kB
JavaScript
import mod from "./index.js";
export default mod;
export const API_PROCESSED_REQUESTS_DELAY_MILLIS = mod.API_PROCESSED_REQUESTS_DELAY_MILLIS;
export const AdaptivePlaywrightCrawler = mod.AdaptivePlaywrightCrawler;
export const AutoscaledPool = mod.AutoscaledPool;
export const BASIC_CRAWLER_TIMEOUT_BUFFER_SECS = mod.BASIC_CRAWLER_TIMEOUT_BUFFER_SECS;
export const BLOCKED_STATUS_CODES = mod.BLOCKED_STATUS_CODES;
export const BROWSER_CONTROLLER_EVENTS = mod.BROWSER_CONTROLLER_EVENTS;
export const BROWSER_POOL_EVENTS = mod.BROWSER_POOL_EVENTS;
export const BasicCrawler = mod.BasicCrawler;
export const BrowserController = mod.BrowserController;
export const BrowserCrawler = mod.BrowserCrawler;
export const BrowserLaunchError = mod.BrowserLaunchError;
export const BrowserLauncher = mod.BrowserLauncher;
export const BrowserName = mod.BrowserName;
export const BrowserPlugin = mod.BrowserPlugin;
export const BrowserPool = mod.BrowserPool;
export const CLOUDFLARE_RETRY_CSS_SELECTORS = mod.CLOUDFLARE_RETRY_CSS_SELECTORS;
export const CheerioCrawler = mod.CheerioCrawler;
export const Configuration = mod.Configuration;
export const CookieParseError = mod.CookieParseError;
export const CrawlerExtension = mod.CrawlerExtension;
export const CriticalError = mod.CriticalError;
export const DATASET_ITERATORS_DEFAULT_LIMIT = mod.DATASET_ITERATORS_DEFAULT_LIMIT;
export const DEFAULT_USER_AGENT = mod.DEFAULT_USER_AGENT;
export const Dataset = mod.Dataset;
export const DeviceCategory = mod.DeviceCategory;
export const EVENT_SESSION_RETIRED = mod.EVENT_SESSION_RETIRED;
export const EnqueueStrategy = mod.EnqueueStrategy;
export const ErrorSnapshotter = mod.ErrorSnapshotter;
export const ErrorTracker = mod.ErrorTracker;
export const EventManager = mod.EventManager;
export const EventType = mod.EventType;
export const FileDownload = mod.FileDownload;
export const GotScrapingHttpClient = mod.GotScrapingHttpClient;
export const HttpCrawler = mod.HttpCrawler;
export const JSDOMCrawler = mod.JSDOMCrawler;
export const KeyValueStore = mod.KeyValueStore;
export const LaunchContext = mod.LaunchContext;
export const LinkeDOMCrawler = mod.LinkeDOMCrawler;
export const LocalEventManager = mod.LocalEventManager;
export const Log = mod.Log;
export const LogLevel = mod.LogLevel;
export const Logger = mod.Logger;
export const LoggerJson = mod.LoggerJson;
export const LoggerText = mod.LoggerText;
export const MAX_POOL_SIZE = mod.MAX_POOL_SIZE;
export const MAX_QUERIES_FOR_CONSISTENCY = mod.MAX_QUERIES_FOR_CONSISTENCY;
export const MissingRouteError = mod.MissingRouteError;
export const NonRetryableError = mod.NonRetryableError;
export const OperatingSystemsName = mod.OperatingSystemsName;
export const PERSIST_STATE_KEY = mod.PERSIST_STATE_KEY;
export const PlaywrightBrowser = mod.PlaywrightBrowser;
export const PlaywrightController = mod.PlaywrightController;
export const PlaywrightCrawler = mod.PlaywrightCrawler;
export const PlaywrightLauncher = mod.PlaywrightLauncher;
export const PlaywrightPlugin = mod.PlaywrightPlugin;
export const ProxyConfiguration = mod.ProxyConfiguration;
export const PseudoUrl = mod.PseudoUrl;
export const PuppeteerController = mod.PuppeteerController;
export const PuppeteerCrawler = mod.PuppeteerCrawler;
export const PuppeteerLauncher = mod.PuppeteerLauncher;
export const PuppeteerPlugin = mod.PuppeteerPlugin;
export const QUERY_HEAD_BUFFER = mod.QUERY_HEAD_BUFFER;
export const QUERY_HEAD_MIN_LENGTH = mod.QUERY_HEAD_MIN_LENGTH;
export const REQUESTS_PERSISTENCE_KEY = mod.REQUESTS_PERSISTENCE_KEY;
export const RETRY_CSS_SELECTORS = mod.RETRY_CSS_SELECTORS;
export const ROTATE_PROXY_ERRORS = mod.ROTATE_PROXY_ERRORS;
export const Request = mod.Request;
export const RequestHandlerResult = mod.RequestHandlerResult;
export const RequestList = mod.RequestList;
export const RequestProvider = mod.RequestProvider;
export const RequestQueue = mod.RequestQueue;
export const RequestQueueV1 = mod.RequestQueueV1;
export const RequestQueueV2 = mod.RequestQueueV2;
export const RequestState = mod.RequestState;
export const RetryRequestError = mod.RetryRequestError;
export const RobotsFile = mod.RobotsFile;
export const Router = mod.Router;
export const STATE_PERSISTENCE_KEY = mod.STATE_PERSISTENCE_KEY;
export const STORAGE_CONSISTENCY_DELAY_MILLIS = mod.STORAGE_CONSISTENCY_DELAY_MILLIS;
export const Session = mod.Session;
export const SessionError = mod.SessionError;
export const SessionPool = mod.SessionPool;
export const Sitemap = mod.Sitemap;
export const SitemapRequestList = mod.SitemapRequestList;
export const Snapshotter = mod.Snapshotter;
export const Statistics = mod.Statistics;
export const StorageManager = mod.StorageManager;
export const SystemStatus = mod.SystemStatus;
export const URL_NO_COMMAS_REGEX = mod.URL_NO_COMMAS_REGEX;
export const URL_WITH_COMMAS_REGEX = mod.URL_WITH_COMMAS_REGEX;
export const applySearchParams = mod.applySearchParams;
export const browserCrawlerEnqueueLinks = mod.browserCrawlerEnqueueLinks;
export const browserPoolCookieToToughCookie = mod.browserPoolCookieToToughCookie;
export const checkAndSerialize = mod.checkAndSerialize;
export const checkStorageAccess = mod.checkStorageAccess;
export const cheerioCrawlerEnqueueLinks = mod.cheerioCrawlerEnqueueLinks;
export const chunk = mod.chunk;
export const chunkBySize = mod.chunkBySize;
export const constructGlobObjectsFromGlobs = mod.constructGlobObjectsFromGlobs;
export const constructRegExpObjectsFromPseudoUrls = mod.constructRegExpObjectsFromPseudoUrls;
export const constructRegExpObjectsFromRegExps = mod.constructRegExpObjectsFromRegExps;
export const cookieStringToToughCookie = mod.cookieStringToToughCookie;
export const createAdaptivePlaywrightRouter = mod.createAdaptivePlaywrightRouter;
export const createBasicRouter = mod.createBasicRouter;
export const createCheerioRouter = mod.createCheerioRouter;
export const createDeserialize = mod.createDeserialize;
export const createFileRouter = mod.createFileRouter;
export const createHttpRouter = mod.createHttpRouter;
export const createJSDOMRouter = mod.createJSDOMRouter;
export const createLinkeDOMRouter = mod.createLinkeDOMRouter;
export const createPlaywrightRouter = mod.createPlaywrightRouter;
export const createPuppeteerRouter = mod.createPuppeteerRouter;
export const createRequestDebugInfo = mod.createRequestDebugInfo;
export const createRequestOptions = mod.createRequestOptions;
export const createRequests = mod.createRequests;
export const deserializeArray = mod.deserializeArray;
export const domCrawlerEnqueueLinks = mod.domCrawlerEnqueueLinks;
export const downloadListOfUrls = mod.downloadListOfUrls;
export const enqueueLinks = mod.enqueueLinks;
export const entries = mod.entries;
export const expandShadowRoots = mod.expandShadowRoots;
export const extractUrls = mod.extractUrls;
export const extractUrlsFromCheerio = mod.extractUrlsFromCheerio;
export const extractUrlsFromPage = mod.extractUrlsFromPage;
export const filterRequestsByPatterns = mod.filterRequestsByPatterns;
export const getCgroupsVersion = mod.getCgroupsVersion;
export const getCookiesFromResponse = mod.getCookiesFromResponse;
export const getCurrentCpuTicksV2 = mod.getCurrentCpuTicksV2;
export const getDefaultCookieExpirationDate = mod.getDefaultCookieExpirationDate;
export const getMemoryInfo = mod.getMemoryInfo;
export const getMemoryInfoV2 = mod.getMemoryInfoV2;
export const getRequestId = mod.getRequestId;
export const gotScraping = mod.gotScraping;
export const handleRequestTimeout = mod.handleRequestTimeout;
export const htmlToText = mod.htmlToText;
export const isContainerized = mod.isContainerized;
export const isDocker = mod.isDocker;
export const isLambda = mod.isLambda;
export const keys = mod.keys;
export const launchPlaywright = mod.launchPlaywright;
export const launchPuppeteer = mod.launchPuppeteer;
export const linkedomCrawlerEnqueueLinks = mod.linkedomCrawlerEnqueueLinks;
export const log = mod.log;
export const maybeStringify = mod.maybeStringify;
export const mergeCookies = mod.mergeCookies;
export const parseOpenGraph = mod.parseOpenGraph;
export const parseSitemap = mod.parseSitemap;
export const playwrightClickElements = mod.playwrightClickElements;
export const playwrightUtils = mod.playwrightUtils;
export const processHttpRequestOptions = mod.processHttpRequestOptions;
export const puppeteerClickElements = mod.puppeteerClickElements;
export const puppeteerRequestInterception = mod.puppeteerRequestInterception;
export const puppeteerUtils = mod.puppeteerUtils;
export const purgeDefaultStorages = mod.purgeDefaultStorages;
export const resolveBaseUrlForEnqueueLinksFiltering = mod.resolveBaseUrlForEnqueueLinksFiltering;
export const serializeArray = mod.serializeArray;
export const sleep = mod.sleep;
export const snakeCaseToCamelCase = mod.snakeCaseToCamelCase;
export const social = mod.social;
export const toughCookieToBrowserPoolCookie = mod.toughCookieToBrowserPoolCookie;
export const tryAbsoluteURL = mod.tryAbsoluteURL;
export const updateEnqueueLinksPatternCache = mod.updateEnqueueLinksPatternCache;
export const useState = mod.useState;
export const utils = mod.utils;
export const validateGlobPattern = mod.validateGlobPattern;
export const validators = mod.validators;
export const weightedAvg = mod.weightedAvg;
export const withCheckedStorageAccess = mod.withCheckedStorageAccess;