UNPKG

crawl4ai

Version:

TypeScript SDK for Crawl4AI REST API - Bun & Node.js compatible

380 lines (379 loc) 9.96 kB
/** * Crawl4AI TypeScript SDK - Type Definitions * Based on actual API endpoints and Swagger documentation */ export type HttpMethod = 'GET' | 'POST' | 'PUT' | 'DELETE' | 'PATCH'; export type CacheMode = 'enabled' | 'disabled' | 'bypass' | 'read_only' | 'write_only'; export type BrowserType = 'chromium' | 'firefox' | 'webkit'; export type ContentFilter = 'raw' | 'fit' | 'bm25' | 'llm'; export type ContextType = 'code' | 'doc' | 'all'; export interface Viewport { width: number; height: number; } export interface BrowserConfig { headless?: boolean; browser_type?: BrowserType; browser_mode?: 'dedicated' | 'builtin' | 'custom' | 'docker'; use_managed_browser?: boolean; cdp_url?: string; debugging_port?: number; host?: string; proxy_config?: ProxyConfig; user_agent?: string; user_agent_mode?: string; proxy?: string; page_timeout?: number; verbose?: boolean; simulate_user?: boolean; magic?: boolean; override_navigator?: boolean; user_data_dir?: string; use_persistent_context?: boolean; text_mode?: boolean; light_mode?: boolean; enable_stealth?: boolean; viewport?: Viewport; viewport_width?: number; viewport_height?: number; headers?: Record<string, string>; cookies?: Cookie[]; extra_args?: string[]; ignore_https_errors?: boolean; java_script_enabled?: boolean; accept_downloads?: boolean; downloads_path?: string; extra?: Record<string, unknown>; } export interface Cookie { name: string; value: string; domain?: string; path?: string; expires?: number; httpOnly?: boolean; secure?: boolean; sameSite?: 'Strict' | 'Lax' | 'None'; } export interface JsonCssExtractionParams { schema: CssExtractionSchema; } export interface CssExtractionSchema { name?: string; baseSelector: string; fields: CssExtractionField[]; } export interface CssExtractionField { name: string; selector: string; type: 'text' | 'html' | 'attribute' | 'href' | 'src'; attribute?: string; multiple?: boolean; transform?: string; } export interface LlmExtractionParams { provider: string; api_token?: string; api_key?: string; schema?: Record<string, unknown>; extraction_type?: 'schema' | 'block' | 'markdown'; instruction?: string; model?: string; base_url?: string; extra_headers?: Record<string, string>; extra_body?: Record<string, unknown>; } export interface CosineExtractionParams { semantic_filter?: string; word_count_threshold?: number; max_dist?: number; top_k?: number; model_name?: string; } export interface ProxyConfig { server: string; username?: string; password?: string; } export interface GeolocationConfig { latitude: number; longitude: number; accuracy?: number; } export type ExtractionStrategy = { type: 'json_css'; params: JsonCssExtractionParams; } | { type: 'llm'; params: LlmExtractionParams; } | { type: 'cosine'; params: CosineExtractionParams; }; export interface CrawlerRunConfig { word_count_threshold?: number; extraction_strategy?: ExtractionStrategy; chunking_strategy?: ChunkingStrategy; markdown_generator?: Record<string, unknown>; css_selector?: string; screenshot?: boolean; pdf?: boolean; capture_mhtml?: boolean; cache_mode?: CacheMode; bypass_cache?: boolean; disable_cache?: boolean; no_cache_read?: boolean; no_cache_write?: boolean; capture_network_requests?: boolean; capture_console_messages?: boolean; log_console?: boolean; stream?: boolean; warmup?: boolean; js_code?: string | string[]; c4a_script?: string | string[]; js_only?: boolean; wait_for?: string; wait_until?: string; scan_full_page?: boolean; scroll_delay?: number; page_timeout?: number; delay_before_return_html?: number; remove_overlay_elements?: boolean; mean_delay?: number; max_range?: number; semaphore_count?: number; base_delay?: number; random_delay?: number; ignore_robots_txt?: boolean; anti_bot?: boolean; light_mode?: boolean; locale?: string; timezone_id?: string; geolocation?: GeolocationConfig; proxy_config?: ProxyConfig; proxy_rotation_strategy?: string | Record<string, unknown>; url_matcher?: string | string[] | Record<string, unknown>; match_mode?: 'or' | 'and' | 'OR' | 'AND' | string; scraping_strategy?: Record<string, unknown>; verbose?: boolean; extra?: Record<string, unknown>; } export interface ChunkingStrategy { type: 'regex' | 'nltk' | 'recursive_url_based_chunking'; params?: Record<string, unknown>; } export interface CrawlRequest { urls: string | string[]; browser_config?: BrowserConfig; crawler_config?: CrawlerRunConfig; session_id?: string; priority?: number; ttl?: number; extra?: Record<string, unknown>; } export interface MarkdownRequest { url: string; filter?: ContentFilter; query?: string; cache?: string; f?: ContentFilter; q?: string | null; c?: string | null; } export interface HtmlRequest { url: string; } export interface ScreenshotRequest { url: string; screenshot_wait_for?: number; output_path?: string; } export interface PdfRequest { url: string; output_path?: string; } export interface ExecuteJsRequest { url: string; scripts: string[]; } export interface TokenRequest { email: string; } export interface ConfigDumpRequest { code: string; } export interface AskRequest { context_type?: ContextType; query?: string; score_ratio?: number; max_results?: number; } export interface MarkdownGenerationResult { raw_markdown: string; markdown_with_citations: string; references_markdown: string; fit_markdown?: string; fit_html?: string; } export interface CrawlResponse { success?: boolean; results: CrawlResult[]; server_processing_time_s?: number; server_memory_delta_mb?: number; server_peak_memory_mb?: number; } export interface CrawlStreamStatus { status: string; [key: string]: unknown; } export type CrawlStreamChunk = CrawlResult | CrawlStreamStatus; export interface MarkdownResponse { url: string; filter?: string | null; query?: string | null; cache?: string | null; markdown: string; success?: boolean; } export interface HtmlResponse { html: string; url?: string; success?: boolean; } export interface ScreenshotResponse { screenshot: string; success?: boolean; } export interface PdfResponse { pdf: string; success?: boolean; } export interface CrawlResult { url: string; html: string; success: boolean; cleaned_html?: string; media?: MediaInfo; links?: LinksInfo; downloaded_files?: string[]; js_execution_result?: Record<string, unknown>; screenshot?: string; pdf?: string; mhtml?: string; markdown?: string | MarkdownGenerationResult; fit_markdown?: string; raw_markdown?: string; markdown_with_citations?: string; references_markdown?: string; fit_html?: string; extracted_content?: string; metadata?: PageMetadata; error_message?: string; session_id?: string; response_headers?: Record<string, string>; status_code?: number; ssl_certificate?: SSLCertificate; dispatch_result?: DispatchResult; redirected_url?: string; network_requests?: Array<Record<string, unknown>>; console_messages?: Array<Record<string, unknown>>; crawl_depth?: number; text?: string; cookies?: Cookie[]; tables?: Array<Record<string, unknown>>; server_memory_mb?: number; } export interface MediaInfo { images: MediaItem[]; videos: MediaItem[]; audios: MediaItem[]; } export interface MediaItem { src: string; alt?: string; desc?: string; description?: string; score?: number; type?: string; mime_type?: string; } export interface LinksInfo { internal: LinkItem[]; external: LinkItem[]; } export interface LinkItem { href: string; text?: string; title?: string; base_domain?: string; relevance_score?: number; type?: string; } export interface PageMetadata { title?: string; description?: string; keywords?: string; author?: string; language?: string; canonical_url?: string; open_graph?: Record<string, string>; twitter_card?: Record<string, string>; } export interface SSLCertificate { issuer?: string; subject?: string; valid_from?: string; valid_to?: string; fingerprint?: string; } export interface DispatchResult { status?: string; message?: string; data?: unknown; } export interface HealthResponse { status: string; timestamp: number; version: string; } export type ConfigDumpResponse = Record<string, unknown> | string; export type McpSchemaResponse = Record<string, unknown>; export interface TokenResponse { email: string; access_token: string; token_type: string; } export interface AskResponse { context: string; type: ContextType; query?: string; results_count: number; } export interface ValidationError { detail: Array<{ loc: Array<string | number>; msg: string; type: string; }>; } export interface ApiError extends Error { status?: number; statusText?: string; data?: ValidationError | Record<string, unknown>; } export interface Crawl4AIConfig { baseUrl: string; apiToken?: string; timeout?: number; retries?: number; retryDelay?: number; defaultHeaders?: Record<string, string>; throwOnError?: boolean; validateStatus?: (status: number) => boolean; debug?: boolean; } export type RequestConfig = { timeout?: number; signal?: AbortSignal; headers?: Record<string, string>; };