crawl4ai
Version:
TypeScript SDK for Crawl4AI REST API - Bun & Node.js compatible
380 lines (379 loc) • 9.96 kB
TypeScript
/**
* Crawl4AI TypeScript SDK - Type Definitions
* Based on actual API endpoints and Swagger documentation
*/
export type HttpMethod = 'GET' | 'POST' | 'PUT' | 'DELETE' | 'PATCH';
export type CacheMode = 'enabled' | 'disabled' | 'bypass' | 'read_only' | 'write_only';
export type BrowserType = 'chromium' | 'firefox' | 'webkit';
export type ContentFilter = 'raw' | 'fit' | 'bm25' | 'llm';
export type ContextType = 'code' | 'doc' | 'all';
export interface Viewport {
width: number;
height: number;
}
export interface BrowserConfig {
headless?: boolean;
browser_type?: BrowserType;
browser_mode?: 'dedicated' | 'builtin' | 'custom' | 'docker';
use_managed_browser?: boolean;
cdp_url?: string;
debugging_port?: number;
host?: string;
proxy_config?: ProxyConfig;
user_agent?: string;
user_agent_mode?: string;
proxy?: string;
page_timeout?: number;
verbose?: boolean;
simulate_user?: boolean;
magic?: boolean;
override_navigator?: boolean;
user_data_dir?: string;
use_persistent_context?: boolean;
text_mode?: boolean;
light_mode?: boolean;
enable_stealth?: boolean;
viewport?: Viewport;
viewport_width?: number;
viewport_height?: number;
headers?: Record<string, string>;
cookies?: Cookie[];
extra_args?: string[];
ignore_https_errors?: boolean;
java_script_enabled?: boolean;
accept_downloads?: boolean;
downloads_path?: string;
extra?: Record<string, unknown>;
}
export interface Cookie {
name: string;
value: string;
domain?: string;
path?: string;
expires?: number;
httpOnly?: boolean;
secure?: boolean;
sameSite?: 'Strict' | 'Lax' | 'None';
}
export interface JsonCssExtractionParams {
schema: CssExtractionSchema;
}
export interface CssExtractionSchema {
name?: string;
baseSelector: string;
fields: CssExtractionField[];
}
export interface CssExtractionField {
name: string;
selector: string;
type: 'text' | 'html' | 'attribute' | 'href' | 'src';
attribute?: string;
multiple?: boolean;
transform?: string;
}
export interface LlmExtractionParams {
provider: string;
api_token?: string;
api_key?: string;
schema?: Record<string, unknown>;
extraction_type?: 'schema' | 'block' | 'markdown';
instruction?: string;
model?: string;
base_url?: string;
extra_headers?: Record<string, string>;
extra_body?: Record<string, unknown>;
}
export interface CosineExtractionParams {
semantic_filter?: string;
word_count_threshold?: number;
max_dist?: number;
top_k?: number;
model_name?: string;
}
export interface ProxyConfig {
server: string;
username?: string;
password?: string;
}
export interface GeolocationConfig {
latitude: number;
longitude: number;
accuracy?: number;
}
export type ExtractionStrategy = {
type: 'json_css';
params: JsonCssExtractionParams;
} | {
type: 'llm';
params: LlmExtractionParams;
} | {
type: 'cosine';
params: CosineExtractionParams;
};
export interface CrawlerRunConfig {
word_count_threshold?: number;
extraction_strategy?: ExtractionStrategy;
chunking_strategy?: ChunkingStrategy;
markdown_generator?: Record<string, unknown>;
css_selector?: string;
screenshot?: boolean;
pdf?: boolean;
capture_mhtml?: boolean;
cache_mode?: CacheMode;
bypass_cache?: boolean;
disable_cache?: boolean;
no_cache_read?: boolean;
no_cache_write?: boolean;
capture_network_requests?: boolean;
capture_console_messages?: boolean;
log_console?: boolean;
stream?: boolean;
warmup?: boolean;
js_code?: string | string[];
c4a_script?: string | string[];
js_only?: boolean;
wait_for?: string;
wait_until?: string;
scan_full_page?: boolean;
scroll_delay?: number;
page_timeout?: number;
delay_before_return_html?: number;
remove_overlay_elements?: boolean;
mean_delay?: number;
max_range?: number;
semaphore_count?: number;
base_delay?: number;
random_delay?: number;
ignore_robots_txt?: boolean;
anti_bot?: boolean;
light_mode?: boolean;
locale?: string;
timezone_id?: string;
geolocation?: GeolocationConfig;
proxy_config?: ProxyConfig;
proxy_rotation_strategy?: string | Record<string, unknown>;
url_matcher?: string | string[] | Record<string, unknown>;
match_mode?: 'or' | 'and' | 'OR' | 'AND' | string;
scraping_strategy?: Record<string, unknown>;
verbose?: boolean;
extra?: Record<string, unknown>;
}
export interface ChunkingStrategy {
type: 'regex' | 'nltk' | 'recursive_url_based_chunking';
params?: Record<string, unknown>;
}
export interface CrawlRequest {
urls: string | string[];
browser_config?: BrowserConfig;
crawler_config?: CrawlerRunConfig;
session_id?: string;
priority?: number;
ttl?: number;
extra?: Record<string, unknown>;
}
export interface MarkdownRequest {
url: string;
filter?: ContentFilter;
query?: string;
cache?: string;
f?: ContentFilter;
q?: string | null;
c?: string | null;
}
export interface HtmlRequest {
url: string;
}
export interface ScreenshotRequest {
url: string;
screenshot_wait_for?: number;
output_path?: string;
}
export interface PdfRequest {
url: string;
output_path?: string;
}
export interface ExecuteJsRequest {
url: string;
scripts: string[];
}
export interface TokenRequest {
email: string;
}
export interface ConfigDumpRequest {
code: string;
}
export interface AskRequest {
context_type?: ContextType;
query?: string;
score_ratio?: number;
max_results?: number;
}
export interface MarkdownGenerationResult {
raw_markdown: string;
markdown_with_citations: string;
references_markdown: string;
fit_markdown?: string;
fit_html?: string;
}
export interface CrawlResponse {
success?: boolean;
results: CrawlResult[];
server_processing_time_s?: number;
server_memory_delta_mb?: number;
server_peak_memory_mb?: number;
}
export interface CrawlStreamStatus {
status: string;
[key: string]: unknown;
}
export type CrawlStreamChunk = CrawlResult | CrawlStreamStatus;
export interface MarkdownResponse {
url: string;
filter?: string | null;
query?: string | null;
cache?: string | null;
markdown: string;
success?: boolean;
}
export interface HtmlResponse {
html: string;
url?: string;
success?: boolean;
}
export interface ScreenshotResponse {
screenshot: string;
success?: boolean;
}
export interface PdfResponse {
pdf: string;
success?: boolean;
}
export interface CrawlResult {
url: string;
html: string;
success: boolean;
cleaned_html?: string;
media?: MediaInfo;
links?: LinksInfo;
downloaded_files?: string[];
js_execution_result?: Record<string, unknown>;
screenshot?: string;
pdf?: string;
mhtml?: string;
markdown?: string | MarkdownGenerationResult;
fit_markdown?: string;
raw_markdown?: string;
markdown_with_citations?: string;
references_markdown?: string;
fit_html?: string;
extracted_content?: string;
metadata?: PageMetadata;
error_message?: string;
session_id?: string;
response_headers?: Record<string, string>;
status_code?: number;
ssl_certificate?: SSLCertificate;
dispatch_result?: DispatchResult;
redirected_url?: string;
network_requests?: Array<Record<string, unknown>>;
console_messages?: Array<Record<string, unknown>>;
crawl_depth?: number;
text?: string;
cookies?: Cookie[];
tables?: Array<Record<string, unknown>>;
server_memory_mb?: number;
}
export interface MediaInfo {
images: MediaItem[];
videos: MediaItem[];
audios: MediaItem[];
}
export interface MediaItem {
src: string;
alt?: string;
desc?: string;
description?: string;
score?: number;
type?: string;
mime_type?: string;
}
export interface LinksInfo {
internal: LinkItem[];
external: LinkItem[];
}
export interface LinkItem {
href: string;
text?: string;
title?: string;
base_domain?: string;
relevance_score?: number;
type?: string;
}
export interface PageMetadata {
title?: string;
description?: string;
keywords?: string;
author?: string;
language?: string;
canonical_url?: string;
open_graph?: Record<string, string>;
twitter_card?: Record<string, string>;
}
export interface SSLCertificate {
issuer?: string;
subject?: string;
valid_from?: string;
valid_to?: string;
fingerprint?: string;
}
export interface DispatchResult {
status?: string;
message?: string;
data?: unknown;
}
export interface HealthResponse {
status: string;
timestamp: number;
version: string;
}
export type ConfigDumpResponse = Record<string, unknown> | string;
export type McpSchemaResponse = Record<string, unknown>;
export interface TokenResponse {
email: string;
access_token: string;
token_type: string;
}
export interface AskResponse {
context: string;
type: ContextType;
query?: string;
results_count: number;
}
export interface ValidationError {
detail: Array<{
loc: Array<string | number>;
msg: string;
type: string;
}>;
}
export interface ApiError extends Error {
status?: number;
statusText?: string;
data?: ValidationError | Record<string, unknown>;
}
export interface Crawl4AIConfig {
baseUrl: string;
apiToken?: string;
timeout?: number;
retries?: number;
retryDelay?: number;
defaultHeaders?: Record<string, string>;
throwOnError?: boolean;
validateStatus?: (status: number) => boolean;
debug?: boolean;
}
export type RequestConfig = {
timeout?: number;
signal?: AbortSignal;
headers?: Record<string, string>;
};