UNPKG

@mendable/firecrawl-js

Version:
1,653 lines (1,645 loc) 74.1 kB
import * as zt from 'zod'; import { ZodTypeAny } from 'zod'; import { AxiosResponse, AxiosRequestHeaders } from 'axios'; import { EventEmitter } from 'events'; import { TypedEventTarget } from 'typescript-event-target'; type FormatString = "markdown" | "html" | "rawHtml" | "links" | "images" | "screenshot" | "summary" | "changeTracking" | "json" | "attributes" | "branding" | "audio" | "video"; interface Viewport { width: number; height: number; } interface Format { type: FormatString; } interface JsonFormat extends Format { type: "json"; prompt?: string; schema?: Record<string, unknown> | ZodTypeAny; } interface ScreenshotFormat { type: "screenshot"; fullPage?: boolean; quality?: number; viewport?: Viewport | { width: number; height: number; }; } interface ChangeTrackingFormat extends Format { type: "changeTracking"; modes: ("git-diff" | "json")[]; /** * Either a JSON Schema object or a Zod schema. Zod schemas are * auto-converted to JSON Schema by the SDK before being sent — see * `utils/validation.ts`. */ schema?: Record<string, unknown> | ZodTypeAny; prompt?: string; tag?: string; } interface AttributesFormat extends Format { type: "attributes"; selectors: Array<{ selector: string; attribute: string; }>; } interface QuestionFormat { type: "question"; question: string; } interface HighlightsFormat { type: "highlights"; query: string; } /** @deprecated Use QuestionFormat or HighlightsFormat instead. */ interface QueryFormat { type: "query"; prompt: string; mode?: "freeform" | "directQuote"; } type FormatOption = FormatString | Format | JsonFormat | ChangeTrackingFormat | ScreenshotFormat | AttributesFormat | QuestionFormat | HighlightsFormat | QueryFormat; type ParseFormatString = Exclude<FormatString, "screenshot" | "changeTracking" | "branding" | "audio" | "video">; interface ParseFormat { type: ParseFormatString; } type ParseFormatOption = ParseFormatString | ParseFormat | JsonFormat | AttributesFormat | QuestionFormat | HighlightsFormat | QueryFormat; interface LocationConfig$1 { country?: string; languages?: string[]; } interface WaitAction { type: "wait"; milliseconds?: number; selector?: string; } interface ScreenshotAction { type: "screenshot"; fullPage?: boolean; quality?: number; viewport?: Viewport | { width: number; height: number; }; } interface ClickAction { type: "click"; selector: string; } interface WriteAction { type: "write"; text: string; } interface PressAction { type: "press"; key: string; } interface ScrollAction { type: "scroll"; direction: "up" | "down"; selector?: string; } interface ScrapeAction { type: "scrape"; } interface ExecuteJavascriptAction { type: "executeJavascript"; script: string; } interface PDFAction { type: "pdf"; format?: "A0" | "A1" | "A2" | "A3" | "A4" | "A5" | "A6" | "Letter" | "Legal" | "Tabloid" | "Ledger"; landscape?: boolean; scale?: number; } type ActionOption = WaitAction | ScreenshotAction | ClickAction | WriteAction | PressAction | ScrollAction | ScrapeAction | ExecuteJavascriptAction | PDFAction; interface ScrapeOptions { formats?: FormatOption[]; headers?: Record<string, string>; includeTags?: string[]; excludeTags?: string[]; onlyMainContent?: boolean; timeout?: number; waitFor?: number; mobile?: boolean; parsers?: Array<string | { type: "pdf"; mode?: "fast" | "auto" | "ocr"; maxPages?: number; }>; actions?: ActionOption[]; location?: LocationConfig$1; skipTlsVerification?: boolean; removeBase64Images?: boolean; fastMode?: boolean; useMock?: string; blockAds?: boolean; proxy?: "basic" | "stealth" | "enhanced" | "auto" | string; maxAge?: number; minAge?: number; storeInCache?: boolean; lockdown?: boolean; profile?: { name: string; saveChanges?: boolean; }; integration?: string; origin?: string; } type ParseFileData = Blob | File | Buffer | Uint8Array | ArrayBuffer | string; interface ParseFile { data: ParseFileData; filename: string; contentType?: string; } type ParseOptions = Omit<ScrapeOptions, "formats" | "waitFor" | "mobile" | "actions" | "location" | "maxAge" | "minAge" | "storeInCache" | "lockdown" | "proxy"> & { formats?: ParseFormatOption[]; proxy?: "basic" | "auto"; }; interface WebhookConfig { url: string; headers?: Record<string, string>; metadata?: Record<string, string>; events?: Array<"completed" | "failed" | "page" | "started">; } type AgentWebhookEvent = "started" | "action" | "completed" | "failed" | "cancelled"; interface AgentWebhookConfig { url: string; headers?: Record<string, string>; metadata?: Record<string, string>; events?: AgentWebhookEvent[]; } interface BrandingProfile { colorScheme?: "light" | "dark"; logo?: string | null; fonts?: Array<{ family: string; [key: string]: unknown; }>; colors?: { primary?: string; secondary?: string; accent?: string; background?: string; textPrimary?: string; textSecondary?: string; link?: string; success?: string; warning?: string; error?: string; [key: string]: string | undefined; }; typography?: { fontFamilies?: { primary?: string; heading?: string; code?: string; [key: string]: string | undefined; }; fontStacks?: { primary?: string[]; heading?: string[]; body?: string[]; paragraph?: string[]; [key: string]: string[] | undefined; }; fontSizes?: { h1?: string; h2?: string; h3?: string; body?: string; small?: string; [key: string]: string | undefined; }; lineHeights?: { heading?: number; body?: number; [key: string]: number | undefined; }; fontWeights?: { light?: number; regular?: number; medium?: number; bold?: number; [key: string]: number | undefined; }; }; spacing?: { baseUnit?: number; padding?: Record<string, number>; margins?: Record<string, number>; gridGutter?: number; borderRadius?: string; [key: string]: number | string | Record<string, number> | undefined; }; components?: { buttonPrimary?: { background?: string; textColor?: string; borderColor?: string; borderRadius?: string; [key: string]: string | undefined; }; buttonSecondary?: { background?: string; textColor?: string; borderColor?: string; borderRadius?: string; [key: string]: string | undefined; }; input?: { borderColor?: string; focusBorderColor?: string; borderRadius?: string; [key: string]: string | undefined; }; [key: string]: unknown; }; icons?: { style?: string; primaryColor?: string; [key: string]: string | undefined; }; images?: { logo?: string | null; favicon?: string | null; ogImage?: string | null; [key: string]: string | null | undefined; }; animations?: { transitionDuration?: string; easing?: string; [key: string]: string | undefined; }; layout?: { grid?: { columns?: number; maxWidth?: string; [key: string]: number | string | undefined; }; headerHeight?: string; footerHeight?: string; [key: string]: number | string | Record<string, number | string | undefined> | undefined; }; tone?: { voice?: string; emojiUsage?: string; [key: string]: string | undefined; }; personality?: { tone: "professional" | "playful" | "modern" | "traditional" | "minimalist" | "bold"; energy: "low" | "medium" | "high"; targetAudience: string; }; [key: string]: unknown; } interface DocumentMetadata { title?: string; description?: string; url?: string; language?: string; keywords?: string | string[]; robots?: string; ogTitle?: string; ogDescription?: string; ogUrl?: string; ogImage?: string; ogAudio?: string; ogDeterminer?: string; ogLocale?: string; ogLocaleAlternate?: string[]; ogSiteName?: string; ogVideo?: string; favicon?: string; dcTermsCreated?: string; dcDateCreated?: string; dcDate?: string; dcTermsType?: string; dcType?: string; dcTermsAudience?: string; dcTermsSubject?: string; dcSubject?: string; dcDescription?: string; dcTermsKeywords?: string; modifiedTime?: string; publishedTime?: string; articleTag?: string; articleSection?: string; sourceURL?: string; statusCode?: number; scrapeId?: string; numPages?: number; contentType?: string; timezone?: string; proxyUsed?: "basic" | "stealth"; cacheState?: "hit" | "miss"; cachedAt?: string; creditsUsed?: number; concurrencyLimited?: boolean; concurrencyQueueDurationMs?: number; error?: string; [key: string]: unknown; } interface Document { markdown?: string; html?: string; rawHtml?: string; json?: unknown; summary?: string; metadata?: DocumentMetadata; links?: string[]; images?: string[]; screenshot?: string; audio?: string; video?: string; attributes?: Array<{ selector: string; attribute: string; values: string[]; }>; actions?: Record<string, unknown>; answer?: string; highlights?: string; warning?: string; changeTracking?: Record<string, unknown>; branding?: BrandingProfile; } interface PaginationConfig { /** When true (default), automatically follow `next` links and aggregate all documents. */ autoPaginate?: boolean; /** Maximum number of additional pages to fetch after the first response. */ maxPages?: number; /** Maximum total number of documents to return across all pages. */ maxResults?: number; /** Maximum time to spend fetching additional pages (in seconds). */ maxWaitTime?: number; } interface SearchResultWeb { url: string; title?: string; description?: string; category?: string; } interface SearchResultNews { title?: string; url?: string; snippet?: string; date?: string; imageUrl?: string; position?: number; category?: string; } interface SearchResultImages { title?: string; imageUrl?: string; imageWidth?: number; imageHeight?: number; url?: string; position?: number; } interface SearchData { web?: Array<SearchResultWeb | Document>; news?: Array<SearchResultNews | Document>; images?: Array<SearchResultImages | Document>; } interface CategoryOption { type: "github" | "research" | "pdf"; } interface SearchRequest { query: string; sources?: Array<"web" | "news" | "images" | { type: "web" | "news" | "images"; }>; categories?: Array<"github" | "research" | "pdf" | CategoryOption>; includeDomains?: string[]; excludeDomains?: string[]; limit?: number; tbs?: string; location?: string; ignoreInvalidURLs?: boolean; timeout?: number; scrapeOptions?: ScrapeOptions; integration?: string; origin?: string; } interface CrawlOptions { prompt?: string | null; excludePaths?: string[] | null; includePaths?: string[] | null; maxDiscoveryDepth?: number | null; sitemap?: "skip" | "include" | "only"; ignoreQueryParameters?: boolean; deduplicateSimilarURLs?: boolean; limit?: number | null; crawlEntireDomain?: boolean; allowExternalLinks?: boolean; allowSubdomains?: boolean; ignoreRobotsTxt?: boolean; robotsUserAgent?: string | null; delay?: number | null; maxConcurrency?: number | null; webhook?: string | WebhookConfig | null; scrapeOptions?: ScrapeOptions | null; regexOnFullURL?: boolean; zeroDataRetention?: boolean; integration?: string; origin?: string; } interface CrawlResponse$1 { id: string; url: string; } interface CrawlJob { id: string; status: "scraping" | "completed" | "failed" | "cancelled"; total: number; completed: number; creditsUsed?: number; expiresAt?: string; next?: string | null; data: Document[]; } interface BatchScrapeOptions { options?: ScrapeOptions; webhook?: string | WebhookConfig; appendToId?: string; ignoreInvalidURLs?: boolean; maxConcurrency?: number; zeroDataRetention?: boolean; idempotencyKey?: string; integration?: string; origin?: string; } interface BatchScrapeResponse$1 { id: string; url: string; invalidURLs?: string[]; } interface BatchScrapeJob { id: string; status: "scraping" | "completed" | "failed" | "cancelled"; completed: number; total: number; creditsUsed?: number; expiresAt?: string; next?: string | null; data: Document[]; } interface MapData { links: SearchResultWeb[]; } interface MapOptions { search?: string; sitemap?: "only" | "include" | "skip"; includeSubdomains?: boolean; ignoreQueryParameters?: boolean; limit?: number; timeout?: number; integration?: string; origin?: string; location?: LocationConfig$1; } /** * Schedule for a monitor. * * On create/update, provide exactly one of `cron` or `text`: * - `cron`: a 5-field cron expression (e.g. `"*\u002F30 * * * *"`). * - `text`: a natural-language schedule (e.g. `"every 30 minutes"`, * `"hourly"`, `"daily at 9:00"`). Firecrawl normalizes this to a cron * expression server-side. * * On read, the API always returns the normalized `cron` value, so `cron` * is populated in responses even when the monitor was created with `text`. */ interface MonitorSchedule { cron?: string; text?: string; timezone?: string; } interface MonitorEmailNotification { enabled?: boolean; recipients?: string[]; includeDiffs?: boolean; } interface MonitorNotification { email?: MonitorEmailNotification; } interface MonitorWebhookConfig { url: string; headers?: Record<string, string>; metadata?: Record<string, string>; events?: string[]; } interface MonitorScrapeTarget { id?: string; type: "scrape"; urls: string[]; scrapeOptions?: ScrapeOptions; } interface MonitorCrawlTarget { id?: string; type: "crawl"; url: string; crawlOptions?: CrawlOptions; scrapeOptions?: ScrapeOptions; } type MonitorTarget = MonitorScrapeTarget | MonitorCrawlTarget; interface CreateMonitorRequest { name: string; schedule: MonitorSchedule; webhook?: MonitorWebhookConfig; notification?: MonitorNotification; targets: MonitorTarget[]; retentionDays?: number; } interface UpdateMonitorRequest { name?: string; status?: "active" | "paused"; schedule?: MonitorSchedule; webhook?: MonitorWebhookConfig | null; notification?: MonitorNotification | null; targets?: MonitorTarget[]; retentionDays?: number; } interface MonitorSummary { totalPages: number; same: number; changed: number; new: number; removed: number; error: number; } interface Monitor { id: string; name: string; status: "active" | "paused" | "deleted"; schedule: MonitorSchedule; nextRunAt?: string | null; lastRunAt?: string | null; currentCheckId?: string | null; targets: MonitorTarget[]; webhook?: MonitorWebhookConfig | null; notification?: MonitorNotification | null; retentionDays: number; estimatedCreditsPerMonth?: number | null; lastCheckSummary?: MonitorSummary | null; createdAt: string; updatedAt: string; } interface MonitorCheck { id: string; monitorId: string; status: "queued" | "running" | "completed" | "failed" | "partial" | "skipped_overlap"; trigger: "scheduled" | "manual"; scheduledFor?: string | null; startedAt?: string | null; finishedAt?: string | null; estimatedCredits?: number | null; reservedCredits?: number | null; actualCredits?: number | null; billingStatus: "not_applicable" | "reserved" | "confirmed" | "released" | "failed"; summary: MonitorSummary; targetResults?: unknown; notificationStatus?: unknown; error?: string | null; createdAt: string; updatedAt: string; } /** Per-field diff for monitors that requested JSON extraction. */ interface MonitorJsonFieldDiff { [field: string]: { previous: unknown; current: unknown; }; } /** * Diff payload returned alongside a monitor page when its scrape produced * a change. The shape depends on what the monitor's formats asked for: * * - markdown-only monitors → `{ text, json }` where `json` is the * `parseDiff` AST (a `{ files: [...] }` object). * - JSON-extraction monitors → `{ json }` where `json` is the per-field * `{ previous, current }` map. * - Mixed (JSON + git-diff) monitors → both `text` (markdown sidecar) * and `json` (field-level diff) are present. */ interface MonitorPageDiff { text?: string; /** Markdown variants: parseDiff AST. JSON variants: per-field diff. */ json?: MonitorJsonFieldDiff | { files: unknown[]; }; } /** * Snapshot of the current JSON extraction at this run. Present on JSON * and mixed-mode monitors; absent for markdown-only. */ interface MonitorPageSnapshot { json?: Record<string, unknown>; } interface MonitorCheckPage { id: string; targetId: string; url: string; status: "same" | "new" | "changed" | "removed" | "error"; previousScrapeId?: string | null; currentScrapeId?: string | null; statusCode?: number | null; error?: string | null; metadata?: unknown; diff?: MonitorPageDiff | null; snapshot?: MonitorPageSnapshot | null; createdAt: string; } interface MonitorCheckDetail extends MonitorCheck { pages: MonitorCheckPage[]; next?: string | null; } interface ListMonitorsOptions { limit?: number; offset?: number; } type ListMonitorChecksOptions = ListMonitorsOptions; type GetMonitorCheckOptions = PaginationConfig & { limit?: number; skip?: number; status?: MonitorCheckPage["status"]; }; interface ExtractResponse$1 { success?: boolean; id?: string; status?: "processing" | "completed" | "failed" | "cancelled"; data?: unknown; error?: string; warning?: string; warnings?: string[]; replacement?: string; sources?: Record<string, unknown>; expiresAt?: string; creditsUsed?: number; } interface AgentResponse { success: boolean; id: string; error?: string; } interface AgentStatusResponse { success: boolean; status: "processing" | "completed" | "failed"; error?: string; data?: unknown; model?: "spark-1-pro" | "spark-1-mini"; expiresAt: string; creditsUsed?: number; } interface AgentOptions$1 { model: "FIRE-1" | "v3-beta"; } interface ConcurrencyCheck { concurrency: number; maxConcurrency: number; } interface CreditUsage { remainingCredits: number; planCredits?: number; billingPeriodStart?: string | null; billingPeriodEnd?: string | null; } interface TokenUsage { remainingTokens: number; planTokens?: number; billingPeriodStart?: string | null; billingPeriodEnd?: string | null; } interface CreditUsageHistoricalPeriod { startDate: string | null; endDate: string | null; apiKey?: string; creditsUsed: number; } interface CreditUsageHistoricalResponse { success: boolean; periods: CreditUsageHistoricalPeriod[]; } interface TokenUsageHistoricalPeriod { startDate: string | null; endDate: string | null; apiKey?: string; tokensUsed: number; } interface TokenUsageHistoricalResponse { success: boolean; periods: TokenUsageHistoricalPeriod[]; } interface CrawlErrorsResponse$1 { errors: { id: string; timestamp?: string; url: string; code?: string; error: string; }[]; robotsBlocked: string[]; } interface ActiveCrawl { id: string; teamId: string; url: string; options?: Record<string, unknown> | null; } interface ActiveCrawlsResponse { success: boolean; crawls: ActiveCrawl[]; } interface ErrorDetails { code?: string; message: string; details?: Record<string, unknown>; status?: number; } declare class SdkError extends Error { status?: number; code?: string; details?: unknown; jobId?: string; constructor(message: string, status?: number, code?: string, details?: unknown, jobId?: string); } declare class JobTimeoutError extends SdkError { timeoutSeconds: number; constructor(jobId: string, timeoutSeconds: number, jobType?: "batch" | "crawl"); } interface QueueStatusResponse$1 { success: boolean; jobsInQueue: number; activeJobsInQueue: number; waitingJobsInQueue: number; maxConcurrency: number; mostRecentSuccess: string | null; } interface BrowserCreateResponse { success: boolean; id?: string; cdpUrl?: string; liveViewUrl?: string; interactiveLiveViewUrl?: string; expiresAt?: string; error?: string; } interface BrowserExecuteResponse { success: boolean; liveViewUrl?: string; interactiveLiveViewUrl?: string; output?: string; stdout?: string; result?: string; stderr?: string; exitCode?: number; killed?: boolean; error?: string; } interface BrowserDeleteResponse { success: boolean; sessionDurationMs?: number; creditsBilled?: number; error?: string; } interface ScrapeExecuteRequest { code?: string; prompt?: string; language?: "python" | "node" | "bash"; timeout?: number; origin?: string; } type ScrapeExecuteResponse = BrowserExecuteResponse; type ScrapeBrowserDeleteResponse = BrowserDeleteResponse; interface BrowserSession { id: string; status: string; cdpUrl: string; liveViewUrl: string; interactiveLiveViewUrl?: string; streamWebView: boolean; createdAt: string; lastActivity: string; } interface BrowserListResponse { success: boolean; sessions?: BrowserSession[]; error?: string; } interface HttpClientOptions { apiKey: string; apiUrl: string; timeoutMs?: number; maxRetries?: number; backoffFactor?: number; } interface RequestOptions { headers?: Record<string, string>; timeoutMs?: number; } declare class HttpClient { private instance; private readonly apiKey; private readonly apiUrl; private readonly maxRetries; private readonly backoffFactor; constructor(options: HttpClientOptions); getApiUrl(): string; getApiKey(): string; private request; private sleep; post<T = any>(endpoint: string, body: Record<string, unknown>, options?: RequestOptions): Promise<AxiosResponse<T, any, {}>>; postMultipart<T = any>(endpoint: string, formData: FormData, options?: RequestOptions): Promise<AxiosResponse<T, any, {}>>; get<T = any>(endpoint: string, headers?: Record<string, string>): Promise<AxiosResponse<T, any, {}>>; delete<T = any>(endpoint: string, headers?: Record<string, string>): Promise<AxiosResponse<T, any, {}>>; patch<T = any>(endpoint: string, body: Record<string, unknown>, options?: RequestOptions): Promise<AxiosResponse<T, any, {}>>; prepareHeaders(idempotencyKey?: string): Record<string, string>; } declare function prepareExtractPayload(args: { urls?: string[]; prompt?: string; schema?: Record<string, unknown> | ZodTypeAny; systemPrompt?: string; allowExternalLinks?: boolean; enableWebSearch?: boolean; showSources?: boolean; scrapeOptions?: ScrapeOptions; ignoreInvalidURLs?: boolean; integration?: string; origin?: string; agent?: AgentOptions$1; }): Record<string, unknown>; /** * @deprecated The extract endpoint is in maintenance mode and its use is discouraged. * Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement. */ declare function startExtract(http: HttpClient, args: Parameters<typeof prepareExtractPayload>[0]): Promise<ExtractResponse$1>; declare function prepareAgentPayload(args: { urls?: string[]; prompt: string; schema?: Record<string, unknown> | ZodTypeAny; integration?: string; origin?: string; maxCredits?: number; strictConstrainToURLs?: boolean; model?: "spark-1-pro" | "spark-1-mini"; webhook?: string | AgentWebhookConfig; }): Record<string, unknown>; declare function startAgent(http: HttpClient, args: Parameters<typeof prepareAgentPayload>[0]): Promise<AgentResponse>; declare function browser(http: HttpClient, args?: { ttl?: number; activityTtl?: number; streamWebView?: boolean; profile?: { name: string; saveChanges?: boolean; }; integration?: string; origin?: string; }): Promise<BrowserCreateResponse>; declare function browserExecute(http: HttpClient, sessionId: string, args: { code: string; language?: "python" | "node" | "bash"; timeout?: number; }): Promise<BrowserExecuteResponse>; declare function listBrowsers(http: HttpClient, args?: { status?: "active" | "destroyed"; }): Promise<BrowserListResponse>; type JobKind = "crawl" | "batch"; interface WatcherOptions { kind?: JobKind; pollInterval?: number; timeout?: number; } declare class Watcher extends EventEmitter { private readonly http; private readonly jobId; private readonly kind; private readonly pollInterval; private readonly timeout?; private ws?; private closed; private readonly emittedDocumentKeys; constructor(http: HttpClient, jobId: string, opts?: WatcherOptions); private buildWsUrl; start(): Promise<void>; private attachWsHandlers; private documentKey; private emitDocuments; private emitSnapshot; private pollLoop; close(): void; } type ExtractJsonSchemaFromFormats<Formats> = Formats extends readonly any[] ? Extract<Formats[number], { type: "json"; schema?: unknown; }>["schema"] : never; type InferredJsonFromOptions<Opts> = Opts extends { formats?: infer Fmts; } ? ExtractJsonSchemaFromFormats<Fmts> extends zt.ZodTypeAny ? zt.infer<ExtractJsonSchemaFromFormats<Fmts>> : unknown : unknown; /** * Configuration for the v2 client transport. */ interface FirecrawlClientOptions { /** API key (falls back to FIRECRAWL_API_KEY). */ apiKey?: string | null; /** API base URL (falls back to FIRECRAWL_API_URL or https://api.firecrawl.dev). */ apiUrl?: string | null; /** Per-request timeout in milliseconds (optional). */ timeoutMs?: number; /** Max automatic retries for transient failures (optional). */ maxRetries?: number; /** Exponential backoff factor for retries (optional). */ backoffFactor?: number; } /** * Firecrawl v2 client. Provides typed access to all v2 endpoints and utilities. */ declare class FirecrawlClient { private readonly http; private isCloudService; /** * Create a v2 client. * @param options Transport configuration (API key, base URL, timeouts, retries). */ constructor(options?: FirecrawlClientOptions); /** * Scrape a single URL. * @param url Target URL. * @param options Optional scrape options (formats, headers, etc.). * @returns Resolved document with requested formats. */ scrape<Opts extends ScrapeOptions>(url: string, options: Opts): Promise<Omit<Document, "json"> & { json?: InferredJsonFromOptions<Opts>; }>; scrape(url: string, options?: ScrapeOptions): Promise<Document>; /** * Interact with the browser session associated with a scrape job. * @param jobId Scrape job id. * @param args Code or prompt to execute, with language/timeout options. * @returns Execution result including output, stdout, stderr, exitCode, and killed status. */ interact(jobId: string, args: ScrapeExecuteRequest): Promise<ScrapeExecuteResponse>; /** * Stop the interaction session associated with a scrape job. * @param jobId Scrape job id. */ stopInteraction(jobId: string): Promise<ScrapeBrowserDeleteResponse>; /** * @deprecated Use interact(). */ scrapeExecute(jobId: string, args: ScrapeExecuteRequest): Promise<ScrapeExecuteResponse>; /** * @deprecated Use stopInteraction(). */ stopInteractiveBrowser(jobId: string): Promise<ScrapeBrowserDeleteResponse>; /** * @deprecated Use stopInteraction(). */ deleteScrapeBrowser(jobId: string): Promise<ScrapeBrowserDeleteResponse>; /** * Parse an uploaded file via the v2 parse endpoint. * @param file File payload (data, filename, optional contentType). * @param options Optional parse options (formats, parsers, etc.). * Note: parse does not support changeTracking, screenshot, branding, * audio, video, * actions, waitFor, location, or mobile options. * @returns Parsed document with requested formats. */ parse<Opts extends ParseOptions>(file: ParseFile, options: Opts): Promise<Omit<Document, "json"> & { json?: InferredJsonFromOptions<Opts>; }>; parse(file: ParseFile, options?: ParseOptions): Promise<Document>; /** * Search the web and optionally scrape each result. * @param query Search query string. * @param req Additional search options (sources, limit, scrapeOptions, etc.). * @returns Structured search results. */ search(query: string, req?: Omit<SearchRequest, "query">): Promise<SearchData>; /** * Map a site to discover URLs (sitemap-aware). * @param url Root URL to map. * @param options Mapping options (sitemap mode, includeSubdomains, limit, timeout). * @returns Discovered links. */ map(url: string, options?: MapOptions): Promise<MapData>; /** * Start a crawl job (async). * @param url Root URL to crawl. * @param req Crawl configuration (paths, limits, scrapeOptions, webhook, etc.). * @returns Job id and url. */ startCrawl(url: string, req?: CrawlOptions): Promise<CrawlResponse$1>; /** * Get the status and partial data of a crawl job. * @param jobId Crawl job id. */ getCrawlStatus(jobId: string, pagination?: PaginationConfig): Promise<CrawlJob>; /** * Cancel a crawl job. * @param jobId Crawl job id. * @returns True if cancelled. */ cancelCrawl(jobId: string): Promise<boolean>; /** * Convenience waiter: start a crawl and poll until it finishes. * @param url Root URL to crawl. * @param req Crawl configuration plus waiter controls (pollInterval, timeout seconds). * @returns Final job snapshot. */ crawl(url: string, req?: CrawlOptions & { pollInterval?: number; timeout?: number; }): Promise<CrawlJob>; /** * Retrieve crawl errors and robots.txt blocks. * @param crawlId Crawl job id. */ getCrawlErrors(crawlId: string): Promise<CrawlErrorsResponse$1>; /** * List active crawls for the authenticated team. */ getActiveCrawls(): Promise<ActiveCrawlsResponse>; /** * Preview normalized crawl parameters produced by a natural-language prompt. * @param url Root URL. * @param prompt Natural-language instruction. */ crawlParamsPreview(url: string, prompt: string): Promise<Record<string, unknown>>; /** * Create a scheduled monitor. */ createMonitor(request: CreateMonitorRequest): Promise<Monitor>; /** * List monitors for the authenticated team. */ listMonitors(options?: ListMonitorsOptions): Promise<Monitor[]>; /** * Get a monitor by id. */ getMonitor(monitorId: string): Promise<Monitor>; /** * Update a monitor. */ updateMonitor(monitorId: string, request: UpdateMonitorRequest): Promise<Monitor>; /** * Delete a monitor. */ deleteMonitor(monitorId: string): Promise<boolean>; /** * Trigger a manual monitor check. */ runMonitor(monitorId: string): Promise<MonitorCheck>; /** * List checks for a monitor. */ listMonitorChecks(monitorId: string, options?: ListMonitorChecksOptions): Promise<MonitorCheck[]>; /** * Get a monitor check with paginated page results and inline diffs. */ getMonitorCheck(monitorId: string, checkId: string, options?: GetMonitorCheckOptions): Promise<MonitorCheckDetail>; /** * Start a batch scrape job for multiple URLs (async). * @param urls URLs to scrape. * @param opts Batch options (scrape options, webhook, concurrency, idempotency key, etc.). * @returns Job id and url. */ startBatchScrape(urls: string[], opts?: BatchScrapeOptions): Promise<BatchScrapeResponse$1>; /** * Get the status and partial data of a batch scrape job. * @param jobId Batch job id. */ getBatchScrapeStatus(jobId: string, pagination?: PaginationConfig): Promise<BatchScrapeJob>; /** * Retrieve batch scrape errors and robots.txt blocks. * @param jobId Batch job id. */ getBatchScrapeErrors(jobId: string): Promise<CrawlErrorsResponse$1>; /** * Cancel a batch scrape job. * @param jobId Batch job id. * @returns True if cancelled. */ cancelBatchScrape(jobId: string): Promise<boolean>; /** * Convenience waiter: start a batch scrape and poll until it finishes. * @param urls URLs to scrape. * @param opts Batch options plus waiter controls (pollInterval, timeout seconds). * @returns Final job snapshot. */ batchScrape(urls: string[], opts?: BatchScrapeOptions & { pollInterval?: number; timeout?: number; }): Promise<BatchScrapeJob>; /** * Start an extract job (async). * @param args Extraction request (urls, schema or prompt, flags). * @returns Job id or processing state. * @deprecated The extract endpoint is in maintenance mode and its use is discouraged. * Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement. */ startExtract(args: Parameters<typeof startExtract>[1]): Promise<ExtractResponse$1>; /** * Get extract job status/data. * @param jobId Extract job id. * @deprecated The extract endpoint is in maintenance mode and its use is discouraged. * Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement. */ getExtractStatus(jobId: string): Promise<ExtractResponse$1>; /** * Convenience waiter: start an extract and poll until it finishes. * @param args Extraction request plus waiter controls (pollInterval, timeout seconds). * @returns Final extract response. * @deprecated The extract endpoint is in maintenance mode and its use is discouraged. * Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement. */ extract(args: Parameters<typeof startExtract>[1] & { pollInterval?: number; timeout?: number; }): Promise<ExtractResponse$1>; /** * Start an agent job (async). * @param args Agent request (urls, prompt, schema). * @returns Job id or processing state. */ startAgent(args: Parameters<typeof startAgent>[1]): Promise<AgentResponse>; /** * Get agent job status/data. * @param jobId Agent job id. */ getAgentStatus(jobId: string): Promise<AgentStatusResponse>; /** * Convenience waiter: start an agent and poll until it finishes. * @param args Agent request plus waiter controls (pollInterval, timeout seconds). * @returns Final agent response. */ agent(args: Parameters<typeof startAgent>[1] & { pollInterval?: number; timeout?: number; }): Promise<AgentStatusResponse>; /** * Cancel an agent job. * @param jobId Agent job id. * @returns True if cancelled. */ cancelAgent(jobId: string): Promise<boolean>; /** * Create a new browser session. * @param args Session options (ttl, activityTtl, streamWebView, profile). * @returns Session id, CDP URL, live view URL, and expiration time. */ browser(args?: Parameters<typeof browser>[1]): Promise<BrowserCreateResponse>; /** * Execute code in a browser session. * @param sessionId Browser session id. * @param args Code, language ("python" | "node" | "bash"), and optional timeout. * @returns Execution result including stdout, stderr, exitCode, and killed status. */ browserExecute(sessionId: string, args: Parameters<typeof browserExecute>[2]): Promise<BrowserExecuteResponse>; /** * Delete a browser session. * @param sessionId Browser session id. */ deleteBrowser(sessionId: string): Promise<BrowserDeleteResponse>; /** * List browser sessions. * @param args Optional filter (status: "active" | "destroyed"). * @returns List of browser sessions. */ listBrowsers(args?: Parameters<typeof listBrowsers>[1]): Promise<BrowserListResponse>; /** Current concurrency usage. */ getConcurrency(): Promise<ConcurrencyCheck>; /** Current credit usage. */ getCreditUsage(): Promise<CreditUsage>; /** Recent token usage. */ getTokenUsage(): Promise<TokenUsage>; /** Historical credit usage by month; set byApiKey to true to break down by API key. */ getCreditUsageHistorical(byApiKey?: boolean): Promise<CreditUsageHistoricalResponse>; /** Historical token usage by month; set byApiKey to true to break down by API key. */ getTokenUsageHistorical(byApiKey?: boolean): Promise<TokenUsageHistoricalResponse>; /** Metrics about the team's scrape queue. */ getQueueStatus(): Promise<QueueStatusResponse$1>; /** * Create a watcher for a crawl or batch job. Emits: `document`, `snapshot`, `done`, `error`. * @param jobId Job id. * @param opts Watcher options (kind, pollInterval, timeout seconds). */ watcher(jobId: string, opts?: WatcherOptions): Watcher; } /** * Configuration interface for FirecrawlApp. * @param apiKey - Optional API key for authentication. * @param apiUrl - Optional base URL of the API; defaults to 'https://api.firecrawl.dev'. */ interface FirecrawlAppConfig { apiKey?: string | null; apiUrl?: string | null; } /** * Metadata for a Firecrawl document. * Includes various optional properties for document metadata. */ interface FirecrawlDocumentMetadata { title?: string; description?: string; language?: string; keywords?: string; robots?: string; ogTitle?: string; ogDescription?: string; ogUrl?: string; ogImage?: string; ogAudio?: string; ogDeterminer?: string; ogLocale?: string; ogLocaleAlternate?: string[]; ogSiteName?: string; ogVideo?: string; dctermsCreated?: string; dcDateCreated?: string; dcDate?: string; dctermsType?: string; dcType?: string; dctermsAudience?: string; dctermsSubject?: string; dcSubject?: string; dcDescription?: string; dctermsKeywords?: string; modifiedTime?: string; publishedTime?: string; articleTag?: string; articleSection?: string; sourceURL?: string; statusCode?: number; timezone?: string; error?: string; proxyUsed?: "basic" | "stealth"; cacheState?: "miss" | "hit"; cachedAt?: string; creditsUsed?: number; concurrencyLimited?: boolean; concurrencyQueueDurationMs?: number; [key: string]: any; } /** * Document interface for Firecrawl. * Represents a document retrieved or processed by Firecrawl. */ interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult | never) = never> { url?: string; markdown?: string; html?: string; rawHtml?: string; links?: string[]; extract?: T; json?: T; screenshot?: string; metadata?: FirecrawlDocumentMetadata; actions: ActionsSchema; changeTracking?: { previousScrapeAt: string | null; changeStatus: "new" | "same" | "changed" | "removed"; visibility: "visible" | "hidden"; diff?: { text: string; json: { files: Array<{ from: string | null; to: string | null; chunks: Array<{ content: string; changes: Array<{ type: string; normal?: boolean; ln?: number; ln1?: number; ln2?: number; content: string; }>; }>; }>; }; }; json?: any; }; title?: string; description?: string; } /** * Location configuration for proxy location */ interface LocationConfig { country?: string; languages?: string[]; } /** * Parameters for scraping operations. * Defines the options and configurations available for scraping web content. */ interface CrawlScrapeOptions { formats?: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract" | "json" | "changeTracking")[]; headers?: Record<string, string>; includeTags?: string[]; excludeTags?: string[]; onlyMainContent?: boolean; waitFor?: number; timeout?: number; location?: LocationConfig; mobile?: boolean; skipTlsVerification?: boolean; removeBase64Images?: boolean; blockAds?: boolean; proxy?: "basic" | "stealth" | "enhanced" | "auto"; storeInCache?: boolean; maxAge?: number; parsePDF?: boolean; } type Action = { type: "wait"; milliseconds?: number; selector?: string; } | { type: "click"; selector: string; all?: boolean; } | { type: "screenshot"; fullPage?: boolean; quality?: number; } | { type: "write"; text: string; } | { type: "press"; key: string; } | { type: "scroll"; direction?: "up" | "down"; selector?: string; } | { type: "scrape"; } | { type: "executeJavascript"; script: string; }; interface ScrapeParams<LLMSchema extends zt.ZodSchema = any, ActionsSchema extends (Action[] | undefined) = undefined> extends CrawlScrapeOptions { extract?: { prompt?: string; schema?: LLMSchema; systemPrompt?: string; }; jsonOptions?: { prompt?: string; schema?: LLMSchema; systemPrompt?: string; }; changeTrackingOptions?: { prompt?: string; schema?: any; modes?: ("json" | "git-diff")[]; tag?: string | null; }; actions?: ActionsSchema; agent?: AgentOptions; zeroDataRetention?: boolean; } interface ActionsResult { screenshots: string[]; scrapes: ({ url: string; html: string; })[]; javascriptReturns: { type: string; value: unknown; }[]; } /** * Response interface for scraping operations. * Defines the structure of the response received after a scraping operation. */ interface ScrapeResponse<LLMResult = any, ActionsSchema extends (ActionsResult | never) = never> extends FirecrawlDocument<LLMResult, ActionsSchema> { success: true; warning?: string; error?: string; } /** * Parameters for crawling operations. * Includes options for both scraping and mapping during a crawl. */ interface CrawlParams { includePaths?: string[]; excludePaths?: string[]; maxDepth?: number; maxDiscoveryDepth?: number; limit?: number; allowBackwardLinks?: boolean; crawlEntireDomain?: boolean; allowExternalLinks?: boolean; ignoreSitemap?: boolean; scrapeOptions?: CrawlScrapeOptions; webhook?: string | { url: string; headers?: Record<string, string>; metadata?: Record<string, string>; events?: ["completed", "failed", "page", "started"][number][]; }; deduplicateSimilarURLs?: boolean; ignoreQueryParameters?: boolean; regexOnFullURL?: boolean; /** * Delay in seconds between scrapes. This helps respect website rate limits. * If not provided, the crawler may use the robots.txt crawl delay if available. */ delay?: number; allowSubdomains?: boolean; maxConcurrency?: number; zeroDataRetention?: boolean; } /** * Response interface for crawling operations. * Defines the structure of the response received after initiating a crawl. */ interface CrawlResponse { id?: string; url?: string; success: true; error?: string; } /** * Response interface for batch scrape operations. * Defines the structure of the response received after initiating a crawl. */ interface BatchScrapeResponse { id?: string; url?: string; success: true; error?: string; invalidURLs?: string[]; } /** * Response interface for job status checks. * Provides detailed status of a crawl job including progress and results. */ interface CrawlStatusResponse { success: true; status: "scraping" | "completed" | "failed" | "cancelled"; completed: number; total: number; creditsUsed: number; expiresAt: Date; next?: string; data: FirecrawlDocument<undefined>[]; } /** * Response interface for batch scrape job status checks. * Provides detailed status of a batch scrape job including progress and results. */ interface BatchScrapeStatusResponse { success: true; status: "scraping" | "completed" | "failed" | "cancelled"; completed: number; total: number; creditsUsed: number; expiresAt: Date; next?: string; data: FirecrawlDocument<undefined>[]; } /** * Parameters for mapping operations. * Defines options for mapping URLs during a crawl. */ interface MapParams { search?: string; ignoreSitemap?: boolean; includeSubdomains?: boolean; sitemapOnly?: boolean; limit?: number; timeout?: number; useIndex?: boolean; location?: LocationConfig; } /** * Response interface for mapping operations. * Defines the structure of the response received after a mapping operation. */ interface MapResponse { success: true; links?: string[]; error?: string; } /** * Parameters for extracting information from URLs. * Defines options for extracting information from URLs. */ interface AgentOptions { model?: string; prompt?: string; sessionId?: string; } /** * Parameters for extracting information from URLs. * Defines options for extracting information from URLs. */ interface AgentOptionsExtract { model?: string; sessionId?: string; } interface ExtractParams<LLMSchema extends zt.ZodSchema = any> { prompt?: string; schema?: LLMSchema | object; systemPrompt?: string; allowExternalLinks?: boolean; enableWebSearch?: boolean; includeSubdomains?: boolean; origin?: string; showSources?: boolean; scrapeOptions?: CrawlScrapeOptions; agent?: AgentOptionsExtract; } /** * Response interface for extracting information from URLs. * Defines the structure of the response received after extracting information from URLs. */ interface ExtractResponse<LLMSchema extends zt.ZodSchema = any> { success: boolean; data: LLMSchema; error?: string; warning?: string; warnings?: string[]; replacement?: string; sources?: string[]; creditsUsed?: number; } /** * Error response interface. * Defines the structure of the response received when an error occurs. */ interface ErrorResponse { success: false; error: string; } /** * Parameters for search operations. * Defines options for searching and scraping search results. */ interface SearchParams { limit?: number; tbs?: string; filter?: string; lang?: string; country?: string; location?: string; origin?: string; timeout?: number; scrapeOptions?: ScrapeParams; } /** * Response interface for search operations. * Defines the structure of the response received after a search operation. */ interface SearchResponse { success: boolean; data: FirecrawlDocument<undefined>[]; warning?: string; error?: string; } /** * Response interface for crawl/batch scrape error monitoring. */ interface CrawlErrorsResponse { /** * Scrapes that errored out + error details */ errors: { id: string; timestamp?: string; url: string; code?: string; error: string; }[]; /** * URLs blocked by robots.txt */ robotsBlocked: string[]; } /** * Parameters for deep research operations. * Defines options for conducting deep research on a query. */ interface DeepResearchParams<LLMSchema extends zt.ZodSchema = any> { /** * Maximum depth of research iterations (1-10) * @default 7 */ maxDepth?: number; /** * Time limit in seconds (30-300) * @default 270 */ timeLimit?: number; /** * Maximum number of URLs to analyze (1-1000) * @defa