crawl4ai
Version:
TypeScript SDK for Crawl4AI REST API - Bun & Node.js compatible
240 lines (239 loc) • 7.53 kB
TypeScript
/**
* Crawl4AI TypeScript SDK
* A comprehensive SDK for interacting with Crawl4AI REST API
*/
import type { AskRequest, AskResponse, Crawl4AIConfig, CrawlRequest, CrawlResult, ExecuteJsRequest, HealthResponse, HtmlRequest, MarkdownRequest, RequestConfig } from './types';
/**
* Crawl4AI SDK Client - Main class for interacting with Crawl4AI REST API
*
* Provides methods for web crawling, content extraction, and various
* web automation tasks through the Crawl4AI service.
*
* @example Basic usage
* ```typescript
* const client = new Crawl4AI({
* baseUrl: 'https://example.com',
* apiToken: 'your_token_here'
* });
*
* const result = await client.crawl({
* urls: 'https://example.com',
* browser_config: { headless: true }
* });
* ```
*
* @example With custom configuration
* ```typescript
* const client = new Crawl4AI({
* baseUrl: 'http://localhost:11235',
* timeout: 60000,
* retries: 5,
* debug: true
* });
* ```
*/
export declare class Crawl4AI {
private config;
/**
* Create a new Crawl4AI client instance
*
* @param config - Client configuration options
* @param config.baseUrl - Base URL of Crawl4AI server
* @param config.apiToken - Optional API token for authentication
* @param config.timeout - Request timeout in milliseconds (default: 300000)
* @param config.retries - Number of retry attempts (default: 3)
* @param config.retryDelay - Delay between retries in milliseconds (default: 1000)
* @param config.debug - Enable debug logging (default: false)
* @param config.throwOnError - Throw on HTTP errors (default: true)
*/
constructor(config: Crawl4AIConfig);
/**
* Validate URL format
*/
private validateUrl;
/**
* Log debug information
*/
private log;
/**
* Normalize different API response formats to a consistent array
*/
private normalizeArrayResponse;
/**
* Build query parameters from an object, filtering out undefined values
*/
private buildQueryParams;
private request;
private requestWithRetry;
/**
* Main crawl endpoint - Extract content from one or more URLs
*
* @param request - Crawl configuration including URLs and options
* @param config - Optional request configuration (timeout, headers, etc.)
* @returns Promise resolving to array of crawl results
*
* @example
* ```typescript
* const results = await client.crawl({
* urls: ['https://example.com'],
* browser_config: { headless: true },
* crawler_config: { cache_mode: 'bypass' }
* });
* ```
*
* @throws {RequestValidationError} If URLs are invalid
* @throws {NetworkError} If network request fails
* @throws {TimeoutError} If request times out
*/
crawl(request: CrawlRequest, config?: RequestConfig): Promise<CrawlResult[]>;
/**
* Get markdown content from URL with optional filtering
*
* @param request - Markdown extraction configuration
* @param request.url - URL to extract markdown from
* @param request.filter - Content filter: 'raw' | 'fit' | 'bm25' | 'llm'
* @param request.query - Query for BM25/LLM filtering
* @param request.cache - Cache mode (e.g., 'bypass')
* @param config - Optional request configuration
* @returns Promise resolving to markdown string
*
* @example
* ```typescript
* const markdown = await client.markdown({
* url: 'https://example.com',
* filter: 'fit'
* });
* ```
*/
markdown(request: MarkdownRequest, config?: RequestConfig): Promise<string>;
/**
* Get HTML content from URL
* @param request HTML extraction options
*/
html(request: HtmlRequest, config?: RequestConfig): Promise<string>;
/**
* Execute JavaScript on webpage and return results
*
* @param request - JavaScript execution configuration
* @param request.url - URL to execute scripts on
* @param request.scripts - Array of JavaScript code to execute
* @param config - Optional request configuration
* @returns Promise resolving to CrawlResult with js_execution_result
*
* @example
* ```typescript
* const result = await client.executeJs({
* url: 'https://example.com',
* scripts: [
* 'return document.title;',
* 'return document.querySelectorAll("a").length;'
* ]
* });
* console.log(result.js_execution_result);
* ```
*/
executeJs(request: ExecuteJsRequest, config?: RequestConfig): Promise<CrawlResult>;
/**
* Get Crawl4AI library context for AI assistants
* @param params Query parameters
*/
ask(params?: AskRequest, config?: RequestConfig): Promise<AskResponse>;
/**
* LLM endpoint - Process a webpage with an LLM query
*
* @param url URL to process
* @param query Query string
* @returns Promise resolving to the LLM's answer
*
* @example
* ```typescript
* const answer = await client.llm(
* 'https://example.com',
* 'What is the main heading on this page?'
* );
* console.log(answer); // "The main heading on this page is..."
* ```
*/
llm(url: string, query: string, config?: RequestConfig): Promise<string>;
/**
* Get API health status
*/
health(config?: RequestConfig): Promise<HealthResponse>;
/**
* Get Prometheus metrics
*/
metrics(config?: RequestConfig): Promise<string>;
/**
* Get API schema
*/
schema(config?: RequestConfig): Promise<unknown>;
/**
* Get root endpoint information
*/
getRoot(config?: RequestConfig): Promise<string>;
/**
* Test connection to the Crawl4AI API server
*
* @param options - Optional configuration
* @param options.throwOnError - Throw error instead of returning false (default: false)
* @returns Promise resolving to true if connected, false otherwise
*
* @example
* ```typescript
* if (await client.testConnection()) {
* console.log('Connected to Crawl4AI');
* }
* ```
*
* @example With error details
* ```typescript
* try {
* await client.testConnection({ throwOnError: true });
* } catch (error) {
* console.error('Connection failed:', error);
* }
* ```
*/
testConnection(options?: {
throwOnError?: boolean;
}): Promise<boolean>;
/**
* Get API version
*
* @param options - Optional configuration
* @param options.throwOnError - Throw error instead of returning 'unknown' (default: false)
* @returns Promise resolving to version string or 'unknown' if unavailable
*
* @example
* ```typescript
* const version = await client.version();
* console.log('API version:', version);
* ```
*/
version(options?: {
throwOnError?: boolean;
}): Promise<string>;
/**
* Update API token for authentication
*
* @param token - New API token (empty string to remove)
*
* @example
* ```typescript
* client.setApiToken('new-api-token');
* ```
*/
setApiToken(token: string): void;
/**
* Update base URL
*/
setBaseUrl(baseUrl: string): void;
/**
* Enable/disable debug mode
*/
setDebug(debug: boolean): void;
}
/**
* Default export - Crawl4AI client class
*/
export default Crawl4AI;