UNPKG

crawl4ai

Version:

TypeScript SDK for Crawl4AI REST API - Bun & Node.js compatible

240 lines (239 loc) 7.53 kB
/** * Crawl4AI TypeScript SDK * A comprehensive SDK for interacting with Crawl4AI REST API */ import type { AskRequest, AskResponse, Crawl4AIConfig, CrawlRequest, CrawlResult, ExecuteJsRequest, HealthResponse, HtmlRequest, MarkdownRequest, RequestConfig } from './types'; /** * Crawl4AI SDK Client - Main class for interacting with Crawl4AI REST API * * Provides methods for web crawling, content extraction, and various * web automation tasks through the Crawl4AI service. * * @example Basic usage * ```typescript * const client = new Crawl4AI({ * baseUrl: 'https://example.com', * apiToken: 'your_token_here' * }); * * const result = await client.crawl({ * urls: 'https://example.com', * browser_config: { headless: true } * }); * ``` * * @example With custom configuration * ```typescript * const client = new Crawl4AI({ * baseUrl: 'http://localhost:11235', * timeout: 60000, * retries: 5, * debug: true * }); * ``` */ export declare class Crawl4AI { private config; /** * Create a new Crawl4AI client instance * * @param config - Client configuration options * @param config.baseUrl - Base URL of Crawl4AI server * @param config.apiToken - Optional API token for authentication * @param config.timeout - Request timeout in milliseconds (default: 300000) * @param config.retries - Number of retry attempts (default: 3) * @param config.retryDelay - Delay between retries in milliseconds (default: 1000) * @param config.debug - Enable debug logging (default: false) * @param config.throwOnError - Throw on HTTP errors (default: true) */ constructor(config: Crawl4AIConfig); /** * Validate URL format */ private validateUrl; /** * Log debug information */ private log; /** * Normalize different API response formats to a consistent array */ private normalizeArrayResponse; /** * Build query parameters from an object, filtering out undefined values */ private buildQueryParams; private request; private requestWithRetry; /** * Main crawl endpoint - Extract content from one or more URLs * * @param request - Crawl configuration including URLs and options * @param config - Optional request configuration (timeout, headers, etc.) * @returns Promise resolving to array of crawl results * * @example * ```typescript * const results = await client.crawl({ * urls: ['https://example.com'], * browser_config: { headless: true }, * crawler_config: { cache_mode: 'bypass' } * }); * ``` * * @throws {RequestValidationError} If URLs are invalid * @throws {NetworkError} If network request fails * @throws {TimeoutError} If request times out */ crawl(request: CrawlRequest, config?: RequestConfig): Promise<CrawlResult[]>; /** * Get markdown content from URL with optional filtering * * @param request - Markdown extraction configuration * @param request.url - URL to extract markdown from * @param request.filter - Content filter: 'raw' | 'fit' | 'bm25' | 'llm' * @param request.query - Query for BM25/LLM filtering * @param request.cache - Cache mode (e.g., 'bypass') * @param config - Optional request configuration * @returns Promise resolving to markdown string * * @example * ```typescript * const markdown = await client.markdown({ * url: 'https://example.com', * filter: 'fit' * }); * ``` */ markdown(request: MarkdownRequest, config?: RequestConfig): Promise<string>; /** * Get HTML content from URL * @param request HTML extraction options */ html(request: HtmlRequest, config?: RequestConfig): Promise<string>; /** * Execute JavaScript on webpage and return results * * @param request - JavaScript execution configuration * @param request.url - URL to execute scripts on * @param request.scripts - Array of JavaScript code to execute * @param config - Optional request configuration * @returns Promise resolving to CrawlResult with js_execution_result * * @example * ```typescript * const result = await client.executeJs({ * url: 'https://example.com', * scripts: [ * 'return document.title;', * 'return document.querySelectorAll("a").length;' * ] * }); * console.log(result.js_execution_result); * ``` */ executeJs(request: ExecuteJsRequest, config?: RequestConfig): Promise<CrawlResult>; /** * Get Crawl4AI library context for AI assistants * @param params Query parameters */ ask(params?: AskRequest, config?: RequestConfig): Promise<AskResponse>; /** * LLM endpoint - Process a webpage with an LLM query * * @param url URL to process * @param query Query string * @returns Promise resolving to the LLM's answer * * @example * ```typescript * const answer = await client.llm( * 'https://example.com', * 'What is the main heading on this page?' * ); * console.log(answer); // "The main heading on this page is..." * ``` */ llm(url: string, query: string, config?: RequestConfig): Promise<string>; /** * Get API health status */ health(config?: RequestConfig): Promise<HealthResponse>; /** * Get Prometheus metrics */ metrics(config?: RequestConfig): Promise<string>; /** * Get API schema */ schema(config?: RequestConfig): Promise<unknown>; /** * Get root endpoint information */ getRoot(config?: RequestConfig): Promise<string>; /** * Test connection to the Crawl4AI API server * * @param options - Optional configuration * @param options.throwOnError - Throw error instead of returning false (default: false) * @returns Promise resolving to true if connected, false otherwise * * @example * ```typescript * if (await client.testConnection()) { * console.log('Connected to Crawl4AI'); * } * ``` * * @example With error details * ```typescript * try { * await client.testConnection({ throwOnError: true }); * } catch (error) { * console.error('Connection failed:', error); * } * ``` */ testConnection(options?: { throwOnError?: boolean; }): Promise<boolean>; /** * Get API version * * @param options - Optional configuration * @param options.throwOnError - Throw error instead of returning 'unknown' (default: false) * @returns Promise resolving to version string or 'unknown' if unavailable * * @example * ```typescript * const version = await client.version(); * console.log('API version:', version); * ``` */ version(options?: { throwOnError?: boolean; }): Promise<string>; /** * Update API token for authentication * * @param token - New API token (empty string to remove) * * @example * ```typescript * client.setApiToken('new-api-token'); * ``` */ setApiToken(token: string): void; /** * Update base URL */ setBaseUrl(baseUrl: string): void; /** * Enable/disable debug mode */ setDebug(debug: boolean): void; } /** * Default export - Crawl4AI client class */ export default Crawl4AI;