UNPKG

@mendable/firecrawl-js

Version:
1,046 lines (945 loc) 23.1 kB
import type { ZodTypeAny } from "zod"; // Public types for Firecrawl JS/TS SDK v2 (camelCase only) export type FormatString = | "markdown" | "html" | "rawHtml" | "links" | "images" | "screenshot" | "summary" | "changeTracking" | "json" | "attributes" | "branding" | "audio" | "video"; export interface Viewport { width: number; height: number; } export interface Format { type: FormatString; } export interface JsonFormat extends Format { type: "json"; prompt?: string; schema?: Record<string, unknown> | ZodTypeAny; } export interface ScreenshotFormat { type: "screenshot"; fullPage?: boolean; quality?: number; viewport?: Viewport | { width: number; height: number }; } export interface ChangeTrackingFormat extends Format { type: "changeTracking"; modes: ("git-diff" | "json")[]; /** * Either a JSON Schema object or a Zod schema. Zod schemas are * auto-converted to JSON Schema by the SDK before being sent — see * `utils/validation.ts`. */ schema?: Record<string, unknown> | ZodTypeAny; prompt?: string; tag?: string; } export interface AttributesFormat extends Format { type: "attributes"; selectors: Array<{ selector: string; attribute: string; }>; } export interface QuestionFormat { type: "question"; question: string; } export interface HighlightsFormat { type: "highlights"; query: string; } /** @deprecated Use QuestionFormat or HighlightsFormat instead. */ export interface QueryFormat { type: "query"; prompt: string; mode?: "freeform" | "directQuote"; } export type FormatOption = | FormatString | Format | JsonFormat | ChangeTrackingFormat | ScreenshotFormat | AttributesFormat | QuestionFormat | HighlightsFormat | QueryFormat; export type ParseFormatString = Exclude< FormatString, "screenshot" | "changeTracking" | "branding" | "audio" | "video" >; export interface ParseFormat { type: ParseFormatString; } export type ParseFormatOption = | ParseFormatString | ParseFormat | JsonFormat | AttributesFormat | QuestionFormat | HighlightsFormat | QueryFormat; export interface LocationConfig { country?: string; languages?: string[]; } export interface WaitAction { type: "wait"; milliseconds?: number; selector?: string; } export interface ScreenshotAction { type: "screenshot"; fullPage?: boolean; quality?: number; viewport?: Viewport | { width: number; height: number }; } export interface ClickAction { type: "click"; selector: string; } export interface WriteAction { type: "write"; text: string; } export interface PressAction { type: "press"; key: string; } export interface ScrollAction { type: "scroll"; direction: "up" | "down"; selector?: string; } export interface ScrapeAction { type: "scrape"; } export interface ExecuteJavascriptAction { type: "executeJavascript"; script: string; } export interface PDFAction { type: "pdf"; format?: | "A0" | "A1" | "A2" | "A3" | "A4" | "A5" | "A6" | "Letter" | "Legal" | "Tabloid" | "Ledger"; landscape?: boolean; scale?: number; } export type ActionOption = | WaitAction | ScreenshotAction | ClickAction | WriteAction | PressAction | ScrollAction | ScrapeAction | ExecuteJavascriptAction | PDFAction; export interface ScrapeOptions { formats?: FormatOption[]; headers?: Record<string, string>; includeTags?: string[]; excludeTags?: string[]; onlyMainContent?: boolean; timeout?: number; waitFor?: number; mobile?: boolean; parsers?: Array< string | { type: "pdf"; mode?: "fast" | "auto" | "ocr"; maxPages?: number } >; actions?: ActionOption[]; location?: LocationConfig; skipTlsVerification?: boolean; removeBase64Images?: boolean; fastMode?: boolean; useMock?: string; blockAds?: boolean; proxy?: "basic" | "stealth" | "enhanced" | "auto" | string; maxAge?: number; minAge?: number; storeInCache?: boolean; lockdown?: boolean; profile?: { name: string; saveChanges?: boolean; }; integration?: string; origin?: string; } export type ParseFileData = | Blob | File | Buffer | Uint8Array | ArrayBuffer | string; export interface ParseFile { data: ParseFileData; filename: string; contentType?: string; } export type ParseOptions = Omit< ScrapeOptions, | "formats" | "waitFor" | "mobile" | "actions" | "location" | "maxAge" | "minAge" | "storeInCache" | "lockdown" | "proxy" > & { formats?: ParseFormatOption[]; proxy?: "basic" | "auto"; }; export interface WebhookConfig { url: string; headers?: Record<string, string>; metadata?: Record<string, string>; events?: Array<"completed" | "failed" | "page" | "started">; } // Agent webhook events differ from crawl: has 'action' and 'cancelled', no 'page' export type AgentWebhookEvent = | "started" | "action" | "completed" | "failed" | "cancelled"; export interface AgentWebhookConfig { url: string; headers?: Record<string, string>; metadata?: Record<string, string>; events?: AgentWebhookEvent[]; } export interface BrandingProfile { colorScheme?: "light" | "dark"; logo?: string | null; fonts?: Array<{ family: string; [key: string]: unknown; }>; colors?: { primary?: string; secondary?: string; accent?: string; background?: string; textPrimary?: string; textSecondary?: string; link?: string; success?: string; warning?: string; error?: string; [key: string]: string | undefined; }; typography?: { fontFamilies?: { primary?: string; heading?: string; code?: string; [key: string]: string | undefined; }; fontStacks?: { primary?: string[]; heading?: string[]; body?: string[]; paragraph?: string[]; [key: string]: string[] | undefined; }; fontSizes?: { h1?: string; h2?: string; h3?: string; body?: string; small?: string; [key: string]: string | undefined; }; lineHeights?: { heading?: number; body?: number; [key: string]: number | undefined; }; fontWeights?: { light?: number; regular?: number; medium?: number; bold?: number; [key: string]: number | undefined; }; }; spacing?: { baseUnit?: number; padding?: Record<string, number>; margins?: Record<string, number>; gridGutter?: number; borderRadius?: string; [key: string]: number | string | Record<string, number> | undefined; }; components?: { buttonPrimary?: { background?: string; textColor?: string; borderColor?: string; borderRadius?: string; [key: string]: string | undefined; }; buttonSecondary?: { background?: string; textColor?: string; borderColor?: string; borderRadius?: string; [key: string]: string | undefined; }; input?: { borderColor?: string; focusBorderColor?: string; borderRadius?: string; [key: string]: string | undefined; }; [key: string]: unknown; }; icons?: { style?: string; primaryColor?: string; [key: string]: string | undefined; }; images?: { logo?: string | null; favicon?: string | null; ogImage?: string | null; [key: string]: string | null | undefined; }; animations?: { transitionDuration?: string; easing?: string; [key: string]: string | undefined; }; layout?: { grid?: { columns?: number; maxWidth?: string; [key: string]: number | string | undefined; }; headerHeight?: string; footerHeight?: string; [key: string]: | number | string | Record<string, number | string | undefined> | undefined; }; tone?: { voice?: string; emojiUsage?: string; [key: string]: string | undefined; }; personality?: { tone: | "professional" | "playful" | "modern" | "traditional" | "minimalist" | "bold"; energy: "low" | "medium" | "high"; targetAudience: string; }; [key: string]: unknown; } export interface DocumentMetadata { // Common metadata fields title?: string; description?: string; url?: string; language?: string; keywords?: string | string[]; robots?: string; // OpenGraph and social metadata ogTitle?: string; ogDescription?: string; ogUrl?: string; ogImage?: string; ogAudio?: string; ogDeterminer?: string; ogLocale?: string; ogLocaleAlternate?: string[]; ogSiteName?: string; ogVideo?: string; // Dublin Core and other site metadata favicon?: string; dcTermsCreated?: string; dcDateCreated?: string; dcDate?: string; dcTermsType?: string; dcType?: string; dcTermsAudience?: string; dcTermsSubject?: string; dcSubject?: string; dcDescription?: string; dcTermsKeywords?: string; modifiedTime?: string; publishedTime?: string; articleTag?: string; articleSection?: string; // Response-level metadata sourceURL?: string; statusCode?: number; scrapeId?: string; numPages?: number; contentType?: string; timezone?: string; proxyUsed?: "basic" | "stealth"; cacheState?: "hit" | "miss"; cachedAt?: string; creditsUsed?: number; concurrencyLimited?: boolean; concurrencyQueueDurationMs?: number; // Error information error?: string; [key: string]: unknown; } export interface Document { markdown?: string; html?: string; rawHtml?: string; json?: unknown; summary?: string; metadata?: DocumentMetadata; links?: string[]; images?: string[]; screenshot?: string; audio?: string; video?: string; attributes?: Array<{ selector: string; attribute: string; values: string[]; }>; actions?: Record<string, unknown>; answer?: string; highlights?: string; warning?: string; changeTracking?: Record<string, unknown>; branding?: BrandingProfile; } // Pagination configuration for auto-fetching pages from v2 endpoints that return a `next` URL export interface PaginationConfig { /** When true (default), automatically follow `next` links and aggregate all documents. */ autoPaginate?: boolean; /** Maximum number of additional pages to fetch after the first response. */ maxPages?: number; /** Maximum total number of documents to return across all pages. */ maxResults?: number; /** Maximum time to spend fetching additional pages (in seconds). */ maxWaitTime?: number; } export interface SearchResultWeb { url: string; title?: string; description?: string; category?: string; } export interface SearchResultNews { title?: string; url?: string; snippet?: string; date?: string; imageUrl?: string; position?: number; category?: string; } export interface SearchResultImages { title?: string; imageUrl?: string; imageWidth?: number; imageHeight?: number; url?: string; position?: number; } export interface SearchData { web?: Array<SearchResultWeb | Document>; news?: Array<SearchResultNews | Document>; images?: Array<SearchResultImages | Document>; } export interface CategoryOption { type: "github" | "research" | "pdf"; } export interface SearchRequest { query: string; sources?: Array< "web" | "news" | "images" | { type: "web" | "news" | "images" } >; categories?: Array<"github" | "research" | "pdf" | CategoryOption>; includeDomains?: string[]; excludeDomains?: string[]; limit?: number; tbs?: string; location?: string; ignoreInvalidURLs?: boolean; timeout?: number; // ms scrapeOptions?: ScrapeOptions; integration?: string; origin?: string; } export interface CrawlOptions { prompt?: string | null; excludePaths?: string[] | null; includePaths?: string[] | null; maxDiscoveryDepth?: number | null; sitemap?: "skip" | "include" | "only"; ignoreQueryParameters?: boolean; deduplicateSimilarURLs?: boolean; limit?: number | null; crawlEntireDomain?: boolean; allowExternalLinks?: boolean; allowSubdomains?: boolean; ignoreRobotsTxt?: boolean; robotsUserAgent?: string | null; delay?: number | null; maxConcurrency?: number | null; webhook?: string | WebhookConfig | null; scrapeOptions?: ScrapeOptions | null; regexOnFullURL?: boolean; zeroDataRetention?: boolean; integration?: string; origin?: string; } export interface CrawlResponse { id: string; url: string; } export interface CrawlJob { id: string; status: "scraping" | "completed" | "failed" | "cancelled"; total: number; completed: number; creditsUsed?: number; expiresAt?: string; next?: string | null; data: Document[]; } export interface BatchScrapeOptions { options?: ScrapeOptions; webhook?: string | WebhookConfig; appendToId?: string; ignoreInvalidURLs?: boolean; maxConcurrency?: number; zeroDataRetention?: boolean; idempotencyKey?: string; integration?: string; origin?: string; } export interface BatchScrapeResponse { id: string; url: string; invalidURLs?: string[]; } export interface BatchScrapeJob { id: string; status: "scraping" | "completed" | "failed" | "cancelled"; completed: number; total: number; creditsUsed?: number; expiresAt?: string; next?: string | null; data: Document[]; } export interface MapData { links: SearchResultWeb[]; } export interface MapOptions { search?: string; sitemap?: "only" | "include" | "skip"; includeSubdomains?: boolean; ignoreQueryParameters?: boolean; limit?: number; timeout?: number; integration?: string; origin?: string; location?: LocationConfig; } /** * Schedule for a monitor. * * On create/update, provide exactly one of `cron` or `text`: * - `cron`: a 5-field cron expression (e.g. `"*\u002F30 * * * *"`). * - `text`: a natural-language schedule (e.g. `"every 30 minutes"`, * `"hourly"`, `"daily at 9:00"`). Firecrawl normalizes this to a cron * expression server-side. * * On read, the API always returns the normalized `cron` value, so `cron` * is populated in responses even when the monitor was created with `text`. */ export interface MonitorSchedule { cron?: string; text?: string; timezone?: string; } export interface MonitorEmailNotification { enabled?: boolean; recipients?: string[]; includeDiffs?: boolean; } export interface MonitorNotification { email?: MonitorEmailNotification; } export interface MonitorWebhookConfig { url: string; headers?: Record<string, string>; metadata?: Record<string, string>; events?: string[]; } export interface MonitorScrapeTarget { id?: string; type: "scrape"; urls: string[]; scrapeOptions?: ScrapeOptions; } export interface MonitorCrawlTarget { id?: string; type: "crawl"; url: string; crawlOptions?: CrawlOptions; scrapeOptions?: ScrapeOptions; } export type MonitorTarget = MonitorScrapeTarget | MonitorCrawlTarget; export interface CreateMonitorRequest { name: string; schedule: MonitorSchedule; webhook?: MonitorWebhookConfig; notification?: MonitorNotification; targets: MonitorTarget[]; retentionDays?: number; } export interface UpdateMonitorRequest { name?: string; status?: "active" | "paused"; schedule?: MonitorSchedule; webhook?: MonitorWebhookConfig | null; notification?: MonitorNotification | null; targets?: MonitorTarget[]; retentionDays?: number; } export interface MonitorSummary { totalPages: number; same: number; changed: number; new: number; removed: number; error: number; } export interface Monitor { id: string; name: string; status: "active" | "paused" | "deleted"; schedule: MonitorSchedule; nextRunAt?: string | null; lastRunAt?: string | null; currentCheckId?: string | null; targets: MonitorTarget[]; webhook?: MonitorWebhookConfig | null; notification?: MonitorNotification | null; retentionDays: number; estimatedCreditsPerMonth?: number | null; lastCheckSummary?: MonitorSummary | null; createdAt: string; updatedAt: string; } export interface MonitorCheck { id: string; monitorId: string; status: | "queued" | "running" | "completed" | "failed" | "partial" | "skipped_overlap"; trigger: "scheduled" | "manual"; scheduledFor?: string | null; startedAt?: string | null; finishedAt?: string | null; estimatedCredits?: number | null; reservedCredits?: number | null; actualCredits?: number | null; billingStatus: | "not_applicable" | "reserved" | "confirmed" | "released" | "failed"; summary: MonitorSummary; targetResults?: unknown; notificationStatus?: unknown; error?: string | null; createdAt: string; updatedAt: string; } /** Per-field diff for monitors that requested JSON extraction. */ export interface MonitorJsonFieldDiff { [field: string]: { previous: unknown; current: unknown }; } /** * Diff payload returned alongside a monitor page when its scrape produced * a change. The shape depends on what the monitor's formats asked for: * * - markdown-only monitors → `{ text, json }` where `json` is the * `parseDiff` AST (a `{ files: [...] }` object). * - JSON-extraction monitors → `{ json }` where `json` is the per-field * `{ previous, current }` map. * - Mixed (JSON + git-diff) monitors → both `text` (markdown sidecar) * and `json` (field-level diff) are present. */ export interface MonitorPageDiff { text?: string; /** Markdown variants: parseDiff AST. JSON variants: per-field diff. */ json?: MonitorJsonFieldDiff | { files: unknown[] }; } /** * Snapshot of the current JSON extraction at this run. Present on JSON * and mixed-mode monitors; absent for markdown-only. */ export interface MonitorPageSnapshot { json?: Record<string, unknown>; } export interface MonitorCheckPage { id: string; targetId: string; url: string; status: "same" | "new" | "changed" | "removed" | "error"; previousScrapeId?: string | null; currentScrapeId?: string | null; statusCode?: number | null; error?: string | null; metadata?: unknown; diff?: MonitorPageDiff | null; snapshot?: MonitorPageSnapshot | null; createdAt: string; } export interface MonitorCheckDetail extends MonitorCheck { pages: MonitorCheckPage[]; next?: string | null; } export interface ListMonitorsOptions { limit?: number; offset?: number; } export type ListMonitorChecksOptions = ListMonitorsOptions; export type GetMonitorCheckOptions = PaginationConfig & { limit?: number; skip?: number; status?: MonitorCheckPage["status"]; }; export interface ExtractResponse { success?: boolean; id?: string; status?: "processing" | "completed" | "failed" | "cancelled"; data?: unknown; error?: string; warning?: string; warnings?: string[]; replacement?: string; sources?: Record<string, unknown>; expiresAt?: string; creditsUsed?: number; } export interface AgentResponse { success: boolean; id: string; error?: string; } export interface AgentStatusResponse { success: boolean; status: "processing" | "completed" | "failed"; error?: string; data?: unknown; model?: "spark-1-pro" | "spark-1-mini"; expiresAt: string; creditsUsed?: number; } export interface AgentOptions { model: "FIRE-1" | "v3-beta"; } export interface ConcurrencyCheck { concurrency: number; maxConcurrency: number; } export interface CreditUsage { remainingCredits: number; planCredits?: number; billingPeriodStart?: string | null; billingPeriodEnd?: string | null; } export interface TokenUsage { remainingTokens: number; planTokens?: number; billingPeriodStart?: string | null; billingPeriodEnd?: string | null; } export interface CreditUsageHistoricalPeriod { startDate: string | null; endDate: string | null; apiKey?: string; creditsUsed: number; } export interface CreditUsageHistoricalResponse { success: boolean; periods: CreditUsageHistoricalPeriod[]; } export interface TokenUsageHistoricalPeriod { startDate: string | null; endDate: string | null; apiKey?: string; tokensUsed: number; } export interface TokenUsageHistoricalResponse { success: boolean; periods: TokenUsageHistoricalPeriod[]; } export interface CrawlErrorsResponse { errors: { id: string; timestamp?: string; url: string; code?: string; error: string; }[]; robotsBlocked: string[]; } export interface ActiveCrawl { id: string; teamId: string; url: string; options?: Record<string, unknown> | null; } export interface ActiveCrawlsResponse { success: boolean; crawls: ActiveCrawl[]; } export interface ErrorDetails { code?: string; message: string; details?: Record<string, unknown>; status?: number; } export class SdkError extends Error { status?: number; code?: string; details?: unknown; jobId?: string; constructor( message: string, status?: number, code?: string, details?: unknown, jobId?: string, ) { super(message); this.name = "FirecrawlSdkError"; this.status = status; this.code = code; this.details = details; this.jobId = jobId; } } export class JobTimeoutError extends SdkError { timeoutSeconds: number; constructor( jobId: string, timeoutSeconds: number, jobType: "batch" | "crawl" = "batch", ) { const jobTypeLabel = jobType === "batch" ? "batch scrape" : "crawl"; super( `${jobTypeLabel.charAt(0).toUpperCase() + jobTypeLabel.slice(1)} job ${jobId} did not complete within ${timeoutSeconds} seconds`, undefined, "JOB_TIMEOUT", undefined, jobId, ); this.name = "JobTimeoutError"; this.timeoutSeconds = timeoutSeconds; } } export interface QueueStatusResponse { success: boolean; jobsInQueue: number; activeJobsInQueue: number; waitingJobsInQueue: number; maxConcurrency: number; mostRecentSuccess: string | null; } // Browser types export interface BrowserCreateResponse { success: boolean; id?: string; cdpUrl?: string; liveViewUrl?: string; interactiveLiveViewUrl?: string; expiresAt?: string; error?: string; } export interface BrowserExecuteResponse { success: boolean; liveViewUrl?: string; interactiveLiveViewUrl?: string; output?: string; stdout?: string; result?: string; stderr?: string; exitCode?: number; killed?: boolean; error?: string; } export interface BrowserDeleteResponse { success: boolean; sessionDurationMs?: number; creditsBilled?: number; error?: string; } export interface ScrapeExecuteRequest { code?: string; prompt?: string; language?: "python" | "node" | "bash"; timeout?: number; origin?: string; } export type ScrapeExecuteResponse = BrowserExecuteResponse; export type ScrapeBrowserDeleteResponse = BrowserDeleteResponse; export interface BrowserSession { id: string; status: string; cdpUrl: string; liveViewUrl: string; interactiveLiveViewUrl?: string; streamWebView: boolean; createdAt: string; lastActivity: string; } export interface BrowserListResponse { success: boolean; sessions?: BrowserSession[]; error?: string; }