@mendable/firecrawl-js
Version:
JavaScript SDK for Firecrawl API
1,653 lines (1,645 loc) • 74.1 kB
text/typescript
import * as zt from 'zod';
import { ZodTypeAny } from 'zod';
import { AxiosResponse, AxiosRequestHeaders } from 'axios';
import { EventEmitter } from 'events';
import { TypedEventTarget } from 'typescript-event-target';
type FormatString = "markdown" | "html" | "rawHtml" | "links" | "images" | "screenshot" | "summary" | "changeTracking" | "json" | "attributes" | "branding" | "audio" | "video";
interface Viewport {
width: number;
height: number;
}
interface Format {
type: FormatString;
}
interface JsonFormat extends Format {
type: "json";
prompt?: string;
schema?: Record<string, unknown> | ZodTypeAny;
}
interface ScreenshotFormat {
type: "screenshot";
fullPage?: boolean;
quality?: number;
viewport?: Viewport | {
width: number;
height: number;
};
}
interface ChangeTrackingFormat extends Format {
type: "changeTracking";
modes: ("git-diff" | "json")[];
/**
* Either a JSON Schema object or a Zod schema. Zod schemas are
* auto-converted to JSON Schema by the SDK before being sent — see
* `utils/validation.ts`.
*/
schema?: Record<string, unknown> | ZodTypeAny;
prompt?: string;
tag?: string;
}
interface AttributesFormat extends Format {
type: "attributes";
selectors: Array<{
selector: string;
attribute: string;
}>;
}
interface QuestionFormat {
type: "question";
question: string;
}
interface HighlightsFormat {
type: "highlights";
query: string;
}
/** @deprecated Use QuestionFormat or HighlightsFormat instead. */
interface QueryFormat {
type: "query";
prompt: string;
mode?: "freeform" | "directQuote";
}
type FormatOption = FormatString | Format | JsonFormat | ChangeTrackingFormat | ScreenshotFormat | AttributesFormat | QuestionFormat | HighlightsFormat | QueryFormat;
type ParseFormatString = Exclude<FormatString, "screenshot" | "changeTracking" | "branding" | "audio" | "video">;
interface ParseFormat {
type: ParseFormatString;
}
type ParseFormatOption = ParseFormatString | ParseFormat | JsonFormat | AttributesFormat | QuestionFormat | HighlightsFormat | QueryFormat;
interface LocationConfig$1 {
country?: string;
languages?: string[];
}
interface WaitAction {
type: "wait";
milliseconds?: number;
selector?: string;
}
interface ScreenshotAction {
type: "screenshot";
fullPage?: boolean;
quality?: number;
viewport?: Viewport | {
width: number;
height: number;
};
}
interface ClickAction {
type: "click";
selector: string;
}
interface WriteAction {
type: "write";
text: string;
}
interface PressAction {
type: "press";
key: string;
}
interface ScrollAction {
type: "scroll";
direction: "up" | "down";
selector?: string;
}
interface ScrapeAction {
type: "scrape";
}
interface ExecuteJavascriptAction {
type: "executeJavascript";
script: string;
}
interface PDFAction {
type: "pdf";
format?: "A0" | "A1" | "A2" | "A3" | "A4" | "A5" | "A6" | "Letter" | "Legal" | "Tabloid" | "Ledger";
landscape?: boolean;
scale?: number;
}
type ActionOption = WaitAction | ScreenshotAction | ClickAction | WriteAction | PressAction | ScrollAction | ScrapeAction | ExecuteJavascriptAction | PDFAction;
interface ScrapeOptions {
formats?: FormatOption[];
headers?: Record<string, string>;
includeTags?: string[];
excludeTags?: string[];
onlyMainContent?: boolean;
timeout?: number;
waitFor?: number;
mobile?: boolean;
parsers?: Array<string | {
type: "pdf";
mode?: "fast" | "auto" | "ocr";
maxPages?: number;
}>;
actions?: ActionOption[];
location?: LocationConfig$1;
skipTlsVerification?: boolean;
removeBase64Images?: boolean;
fastMode?: boolean;
useMock?: string;
blockAds?: boolean;
proxy?: "basic" | "stealth" | "enhanced" | "auto" | string;
maxAge?: number;
minAge?: number;
storeInCache?: boolean;
lockdown?: boolean;
profile?: {
name: string;
saveChanges?: boolean;
};
integration?: string;
origin?: string;
}
type ParseFileData = Blob | File | Buffer | Uint8Array | ArrayBuffer | string;
interface ParseFile {
data: ParseFileData;
filename: string;
contentType?: string;
}
type ParseOptions = Omit<ScrapeOptions, "formats" | "waitFor" | "mobile" | "actions" | "location" | "maxAge" | "minAge" | "storeInCache" | "lockdown" | "proxy"> & {
formats?: ParseFormatOption[];
proxy?: "basic" | "auto";
};
interface WebhookConfig {
url: string;
headers?: Record<string, string>;
metadata?: Record<string, string>;
events?: Array<"completed" | "failed" | "page" | "started">;
}
type AgentWebhookEvent = "started" | "action" | "completed" | "failed" | "cancelled";
interface AgentWebhookConfig {
url: string;
headers?: Record<string, string>;
metadata?: Record<string, string>;
events?: AgentWebhookEvent[];
}
interface BrandingProfile {
colorScheme?: "light" | "dark";
logo?: string | null;
fonts?: Array<{
family: string;
[key: string]: unknown;
}>;
colors?: {
primary?: string;
secondary?: string;
accent?: string;
background?: string;
textPrimary?: string;
textSecondary?: string;
link?: string;
success?: string;
warning?: string;
error?: string;
[key: string]: string | undefined;
};
typography?: {
fontFamilies?: {
primary?: string;
heading?: string;
code?: string;
[key: string]: string | undefined;
};
fontStacks?: {
primary?: string[];
heading?: string[];
body?: string[];
paragraph?: string[];
[key: string]: string[] | undefined;
};
fontSizes?: {
h1?: string;
h2?: string;
h3?: string;
body?: string;
small?: string;
[key: string]: string | undefined;
};
lineHeights?: {
heading?: number;
body?: number;
[key: string]: number | undefined;
};
fontWeights?: {
light?: number;
regular?: number;
medium?: number;
bold?: number;
[key: string]: number | undefined;
};
};
spacing?: {
baseUnit?: number;
padding?: Record<string, number>;
margins?: Record<string, number>;
gridGutter?: number;
borderRadius?: string;
[key: string]: number | string | Record<string, number> | undefined;
};
components?: {
buttonPrimary?: {
background?: string;
textColor?: string;
borderColor?: string;
borderRadius?: string;
[key: string]: string | undefined;
};
buttonSecondary?: {
background?: string;
textColor?: string;
borderColor?: string;
borderRadius?: string;
[key: string]: string | undefined;
};
input?: {
borderColor?: string;
focusBorderColor?: string;
borderRadius?: string;
[key: string]: string | undefined;
};
[key: string]: unknown;
};
icons?: {
style?: string;
primaryColor?: string;
[key: string]: string | undefined;
};
images?: {
logo?: string | null;
favicon?: string | null;
ogImage?: string | null;
[key: string]: string | null | undefined;
};
animations?: {
transitionDuration?: string;
easing?: string;
[key: string]: string | undefined;
};
layout?: {
grid?: {
columns?: number;
maxWidth?: string;
[key: string]: number | string | undefined;
};
headerHeight?: string;
footerHeight?: string;
[key: string]: number | string | Record<string, number | string | undefined> | undefined;
};
tone?: {
voice?: string;
emojiUsage?: string;
[key: string]: string | undefined;
};
personality?: {
tone: "professional" | "playful" | "modern" | "traditional" | "minimalist" | "bold";
energy: "low" | "medium" | "high";
targetAudience: string;
};
[key: string]: unknown;
}
interface DocumentMetadata {
title?: string;
description?: string;
url?: string;
language?: string;
keywords?: string | string[];
robots?: string;
ogTitle?: string;
ogDescription?: string;
ogUrl?: string;
ogImage?: string;
ogAudio?: string;
ogDeterminer?: string;
ogLocale?: string;
ogLocaleAlternate?: string[];
ogSiteName?: string;
ogVideo?: string;
favicon?: string;
dcTermsCreated?: string;
dcDateCreated?: string;
dcDate?: string;
dcTermsType?: string;
dcType?: string;
dcTermsAudience?: string;
dcTermsSubject?: string;
dcSubject?: string;
dcDescription?: string;
dcTermsKeywords?: string;
modifiedTime?: string;
publishedTime?: string;
articleTag?: string;
articleSection?: string;
sourceURL?: string;
statusCode?: number;
scrapeId?: string;
numPages?: number;
contentType?: string;
timezone?: string;
proxyUsed?: "basic" | "stealth";
cacheState?: "hit" | "miss";
cachedAt?: string;
creditsUsed?: number;
concurrencyLimited?: boolean;
concurrencyQueueDurationMs?: number;
error?: string;
[key: string]: unknown;
}
interface Document {
markdown?: string;
html?: string;
rawHtml?: string;
json?: unknown;
summary?: string;
metadata?: DocumentMetadata;
links?: string[];
images?: string[];
screenshot?: string;
audio?: string;
video?: string;
attributes?: Array<{
selector: string;
attribute: string;
values: string[];
}>;
actions?: Record<string, unknown>;
answer?: string;
highlights?: string;
warning?: string;
changeTracking?: Record<string, unknown>;
branding?: BrandingProfile;
}
interface PaginationConfig {
/** When true (default), automatically follow `next` links and aggregate all documents. */
autoPaginate?: boolean;
/** Maximum number of additional pages to fetch after the first response. */
maxPages?: number;
/** Maximum total number of documents to return across all pages. */
maxResults?: number;
/** Maximum time to spend fetching additional pages (in seconds). */
maxWaitTime?: number;
}
interface SearchResultWeb {
url: string;
title?: string;
description?: string;
category?: string;
}
interface SearchResultNews {
title?: string;
url?: string;
snippet?: string;
date?: string;
imageUrl?: string;
position?: number;
category?: string;
}
interface SearchResultImages {
title?: string;
imageUrl?: string;
imageWidth?: number;
imageHeight?: number;
url?: string;
position?: number;
}
interface SearchData {
web?: Array<SearchResultWeb | Document>;
news?: Array<SearchResultNews | Document>;
images?: Array<SearchResultImages | Document>;
}
interface CategoryOption {
type: "github" | "research" | "pdf";
}
interface SearchRequest {
query: string;
sources?: Array<"web" | "news" | "images" | {
type: "web" | "news" | "images";
}>;
categories?: Array<"github" | "research" | "pdf" | CategoryOption>;
includeDomains?: string[];
excludeDomains?: string[];
limit?: number;
tbs?: string;
location?: string;
ignoreInvalidURLs?: boolean;
timeout?: number;
scrapeOptions?: ScrapeOptions;
integration?: string;
origin?: string;
}
interface CrawlOptions {
prompt?: string | null;
excludePaths?: string[] | null;
includePaths?: string[] | null;
maxDiscoveryDepth?: number | null;
sitemap?: "skip" | "include" | "only";
ignoreQueryParameters?: boolean;
deduplicateSimilarURLs?: boolean;
limit?: number | null;
crawlEntireDomain?: boolean;
allowExternalLinks?: boolean;
allowSubdomains?: boolean;
ignoreRobotsTxt?: boolean;
robotsUserAgent?: string | null;
delay?: number | null;
maxConcurrency?: number | null;
webhook?: string | WebhookConfig | null;
scrapeOptions?: ScrapeOptions | null;
regexOnFullURL?: boolean;
zeroDataRetention?: boolean;
integration?: string;
origin?: string;
}
interface CrawlResponse$1 {
id: string;
url: string;
}
interface CrawlJob {
id: string;
status: "scraping" | "completed" | "failed" | "cancelled";
total: number;
completed: number;
creditsUsed?: number;
expiresAt?: string;
next?: string | null;
data: Document[];
}
interface BatchScrapeOptions {
options?: ScrapeOptions;
webhook?: string | WebhookConfig;
appendToId?: string;
ignoreInvalidURLs?: boolean;
maxConcurrency?: number;
zeroDataRetention?: boolean;
idempotencyKey?: string;
integration?: string;
origin?: string;
}
interface BatchScrapeResponse$1 {
id: string;
url: string;
invalidURLs?: string[];
}
interface BatchScrapeJob {
id: string;
status: "scraping" | "completed" | "failed" | "cancelled";
completed: number;
total: number;
creditsUsed?: number;
expiresAt?: string;
next?: string | null;
data: Document[];
}
interface MapData {
links: SearchResultWeb[];
}
interface MapOptions {
search?: string;
sitemap?: "only" | "include" | "skip";
includeSubdomains?: boolean;
ignoreQueryParameters?: boolean;
limit?: number;
timeout?: number;
integration?: string;
origin?: string;
location?: LocationConfig$1;
}
/**
* Schedule for a monitor.
*
* On create/update, provide exactly one of `cron` or `text`:
* - `cron`: a 5-field cron expression (e.g. `"*\u002F30 * * * *"`).
* - `text`: a natural-language schedule (e.g. `"every 30 minutes"`,
* `"hourly"`, `"daily at 9:00"`). Firecrawl normalizes this to a cron
* expression server-side.
*
* On read, the API always returns the normalized `cron` value, so `cron`
* is populated in responses even when the monitor was created with `text`.
*/
interface MonitorSchedule {
cron?: string;
text?: string;
timezone?: string;
}
interface MonitorEmailNotification {
enabled?: boolean;
recipients?: string[];
includeDiffs?: boolean;
}
interface MonitorNotification {
email?: MonitorEmailNotification;
}
interface MonitorWebhookConfig {
url: string;
headers?: Record<string, string>;
metadata?: Record<string, string>;
events?: string[];
}
interface MonitorScrapeTarget {
id?: string;
type: "scrape";
urls: string[];
scrapeOptions?: ScrapeOptions;
}
interface MonitorCrawlTarget {
id?: string;
type: "crawl";
url: string;
crawlOptions?: CrawlOptions;
scrapeOptions?: ScrapeOptions;
}
type MonitorTarget = MonitorScrapeTarget | MonitorCrawlTarget;
interface CreateMonitorRequest {
name: string;
schedule: MonitorSchedule;
webhook?: MonitorWebhookConfig;
notification?: MonitorNotification;
targets: MonitorTarget[];
retentionDays?: number;
}
interface UpdateMonitorRequest {
name?: string;
status?: "active" | "paused";
schedule?: MonitorSchedule;
webhook?: MonitorWebhookConfig | null;
notification?: MonitorNotification | null;
targets?: MonitorTarget[];
retentionDays?: number;
}
interface MonitorSummary {
totalPages: number;
same: number;
changed: number;
new: number;
removed: number;
error: number;
}
interface Monitor {
id: string;
name: string;
status: "active" | "paused" | "deleted";
schedule: MonitorSchedule;
nextRunAt?: string | null;
lastRunAt?: string | null;
currentCheckId?: string | null;
targets: MonitorTarget[];
webhook?: MonitorWebhookConfig | null;
notification?: MonitorNotification | null;
retentionDays: number;
estimatedCreditsPerMonth?: number | null;
lastCheckSummary?: MonitorSummary | null;
createdAt: string;
updatedAt: string;
}
interface MonitorCheck {
id: string;
monitorId: string;
status: "queued" | "running" | "completed" | "failed" | "partial" | "skipped_overlap";
trigger: "scheduled" | "manual";
scheduledFor?: string | null;
startedAt?: string | null;
finishedAt?: string | null;
estimatedCredits?: number | null;
reservedCredits?: number | null;
actualCredits?: number | null;
billingStatus: "not_applicable" | "reserved" | "confirmed" | "released" | "failed";
summary: MonitorSummary;
targetResults?: unknown;
notificationStatus?: unknown;
error?: string | null;
createdAt: string;
updatedAt: string;
}
/** Per-field diff for monitors that requested JSON extraction. */
interface MonitorJsonFieldDiff {
[field: string]: {
previous: unknown;
current: unknown;
};
}
/**
* Diff payload returned alongside a monitor page when its scrape produced
* a change. The shape depends on what the monitor's formats asked for:
*
* - markdown-only monitors → `{ text, json }` where `json` is the
* `parseDiff` AST (a `{ files: [...] }` object).
* - JSON-extraction monitors → `{ json }` where `json` is the per-field
* `{ previous, current }` map.
* - Mixed (JSON + git-diff) monitors → both `text` (markdown sidecar)
* and `json` (field-level diff) are present.
*/
interface MonitorPageDiff {
text?: string;
/** Markdown variants: parseDiff AST. JSON variants: per-field diff. */
json?: MonitorJsonFieldDiff | {
files: unknown[];
};
}
/**
* Snapshot of the current JSON extraction at this run. Present on JSON
* and mixed-mode monitors; absent for markdown-only.
*/
interface MonitorPageSnapshot {
json?: Record<string, unknown>;
}
interface MonitorCheckPage {
id: string;
targetId: string;
url: string;
status: "same" | "new" | "changed" | "removed" | "error";
previousScrapeId?: string | null;
currentScrapeId?: string | null;
statusCode?: number | null;
error?: string | null;
metadata?: unknown;
diff?: MonitorPageDiff | null;
snapshot?: MonitorPageSnapshot | null;
createdAt: string;
}
interface MonitorCheckDetail extends MonitorCheck {
pages: MonitorCheckPage[];
next?: string | null;
}
interface ListMonitorsOptions {
limit?: number;
offset?: number;
}
type ListMonitorChecksOptions = ListMonitorsOptions;
type GetMonitorCheckOptions = PaginationConfig & {
limit?: number;
skip?: number;
status?: MonitorCheckPage["status"];
};
interface ExtractResponse$1 {
success?: boolean;
id?: string;
status?: "processing" | "completed" | "failed" | "cancelled";
data?: unknown;
error?: string;
warning?: string;
warnings?: string[];
replacement?: string;
sources?: Record<string, unknown>;
expiresAt?: string;
creditsUsed?: number;
}
interface AgentResponse {
success: boolean;
id: string;
error?: string;
}
interface AgentStatusResponse {
success: boolean;
status: "processing" | "completed" | "failed";
error?: string;
data?: unknown;
model?: "spark-1-pro" | "spark-1-mini";
expiresAt: string;
creditsUsed?: number;
}
interface AgentOptions$1 {
model: "FIRE-1" | "v3-beta";
}
interface ConcurrencyCheck {
concurrency: number;
maxConcurrency: number;
}
interface CreditUsage {
remainingCredits: number;
planCredits?: number;
billingPeriodStart?: string | null;
billingPeriodEnd?: string | null;
}
interface TokenUsage {
remainingTokens: number;
planTokens?: number;
billingPeriodStart?: string | null;
billingPeriodEnd?: string | null;
}
interface CreditUsageHistoricalPeriod {
startDate: string | null;
endDate: string | null;
apiKey?: string;
creditsUsed: number;
}
interface CreditUsageHistoricalResponse {
success: boolean;
periods: CreditUsageHistoricalPeriod[];
}
interface TokenUsageHistoricalPeriod {
startDate: string | null;
endDate: string | null;
apiKey?: string;
tokensUsed: number;
}
interface TokenUsageHistoricalResponse {
success: boolean;
periods: TokenUsageHistoricalPeriod[];
}
interface CrawlErrorsResponse$1 {
errors: {
id: string;
timestamp?: string;
url: string;
code?: string;
error: string;
}[];
robotsBlocked: string[];
}
interface ActiveCrawl {
id: string;
teamId: string;
url: string;
options?: Record<string, unknown> | null;
}
interface ActiveCrawlsResponse {
success: boolean;
crawls: ActiveCrawl[];
}
interface ErrorDetails {
code?: string;
message: string;
details?: Record<string, unknown>;
status?: number;
}
declare class SdkError extends Error {
status?: number;
code?: string;
details?: unknown;
jobId?: string;
constructor(message: string, status?: number, code?: string, details?: unknown, jobId?: string);
}
declare class JobTimeoutError extends SdkError {
timeoutSeconds: number;
constructor(jobId: string, timeoutSeconds: number, jobType?: "batch" | "crawl");
}
interface QueueStatusResponse$1 {
success: boolean;
jobsInQueue: number;
activeJobsInQueue: number;
waitingJobsInQueue: number;
maxConcurrency: number;
mostRecentSuccess: string | null;
}
interface BrowserCreateResponse {
success: boolean;
id?: string;
cdpUrl?: string;
liveViewUrl?: string;
interactiveLiveViewUrl?: string;
expiresAt?: string;
error?: string;
}
interface BrowserExecuteResponse {
success: boolean;
liveViewUrl?: string;
interactiveLiveViewUrl?: string;
output?: string;
stdout?: string;
result?: string;
stderr?: string;
exitCode?: number;
killed?: boolean;
error?: string;
}
interface BrowserDeleteResponse {
success: boolean;
sessionDurationMs?: number;
creditsBilled?: number;
error?: string;
}
interface ScrapeExecuteRequest {
code?: string;
prompt?: string;
language?: "python" | "node" | "bash";
timeout?: number;
origin?: string;
}
type ScrapeExecuteResponse = BrowserExecuteResponse;
type ScrapeBrowserDeleteResponse = BrowserDeleteResponse;
interface BrowserSession {
id: string;
status: string;
cdpUrl: string;
liveViewUrl: string;
interactiveLiveViewUrl?: string;
streamWebView: boolean;
createdAt: string;
lastActivity: string;
}
interface BrowserListResponse {
success: boolean;
sessions?: BrowserSession[];
error?: string;
}
interface HttpClientOptions {
apiKey: string;
apiUrl: string;
timeoutMs?: number;
maxRetries?: number;
backoffFactor?: number;
}
interface RequestOptions {
headers?: Record<string, string>;
timeoutMs?: number;
}
declare class HttpClient {
private instance;
private readonly apiKey;
private readonly apiUrl;
private readonly maxRetries;
private readonly backoffFactor;
constructor(options: HttpClientOptions);
getApiUrl(): string;
getApiKey(): string;
private request;
private sleep;
post<T = any>(endpoint: string, body: Record<string, unknown>, options?: RequestOptions): Promise<AxiosResponse<T, any, {}>>;
postMultipart<T = any>(endpoint: string, formData: FormData, options?: RequestOptions): Promise<AxiosResponse<T, any, {}>>;
get<T = any>(endpoint: string, headers?: Record<string, string>): Promise<AxiosResponse<T, any, {}>>;
delete<T = any>(endpoint: string, headers?: Record<string, string>): Promise<AxiosResponse<T, any, {}>>;
patch<T = any>(endpoint: string, body: Record<string, unknown>, options?: RequestOptions): Promise<AxiosResponse<T, any, {}>>;
prepareHeaders(idempotencyKey?: string): Record<string, string>;
}
declare function prepareExtractPayload(args: {
urls?: string[];
prompt?: string;
schema?: Record<string, unknown> | ZodTypeAny;
systemPrompt?: string;
allowExternalLinks?: boolean;
enableWebSearch?: boolean;
showSources?: boolean;
scrapeOptions?: ScrapeOptions;
ignoreInvalidURLs?: boolean;
integration?: string;
origin?: string;
agent?: AgentOptions$1;
}): Record<string, unknown>;
/**
* @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
* Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
*/
declare function startExtract(http: HttpClient, args: Parameters<typeof prepareExtractPayload>[0]): Promise<ExtractResponse$1>;
declare function prepareAgentPayload(args: {
urls?: string[];
prompt: string;
schema?: Record<string, unknown> | ZodTypeAny;
integration?: string;
origin?: string;
maxCredits?: number;
strictConstrainToURLs?: boolean;
model?: "spark-1-pro" | "spark-1-mini";
webhook?: string | AgentWebhookConfig;
}): Record<string, unknown>;
declare function startAgent(http: HttpClient, args: Parameters<typeof prepareAgentPayload>[0]): Promise<AgentResponse>;
declare function browser(http: HttpClient, args?: {
ttl?: number;
activityTtl?: number;
streamWebView?: boolean;
profile?: {
name: string;
saveChanges?: boolean;
};
integration?: string;
origin?: string;
}): Promise<BrowserCreateResponse>;
declare function browserExecute(http: HttpClient, sessionId: string, args: {
code: string;
language?: "python" | "node" | "bash";
timeout?: number;
}): Promise<BrowserExecuteResponse>;
declare function listBrowsers(http: HttpClient, args?: {
status?: "active" | "destroyed";
}): Promise<BrowserListResponse>;
type JobKind = "crawl" | "batch";
interface WatcherOptions {
kind?: JobKind;
pollInterval?: number;
timeout?: number;
}
declare class Watcher extends EventEmitter {
private readonly http;
private readonly jobId;
private readonly kind;
private readonly pollInterval;
private readonly timeout?;
private ws?;
private closed;
private readonly emittedDocumentKeys;
constructor(http: HttpClient, jobId: string, opts?: WatcherOptions);
private buildWsUrl;
start(): Promise<void>;
private attachWsHandlers;
private documentKey;
private emitDocuments;
private emitSnapshot;
private pollLoop;
close(): void;
}
type ExtractJsonSchemaFromFormats<Formats> = Formats extends readonly any[] ? Extract<Formats[number], {
type: "json";
schema?: unknown;
}>["schema"] : never;
type InferredJsonFromOptions<Opts> = Opts extends {
formats?: infer Fmts;
} ? ExtractJsonSchemaFromFormats<Fmts> extends zt.ZodTypeAny ? zt.infer<ExtractJsonSchemaFromFormats<Fmts>> : unknown : unknown;
/**
* Configuration for the v2 client transport.
*/
interface FirecrawlClientOptions {
/** API key (falls back to FIRECRAWL_API_KEY). */
apiKey?: string | null;
/** API base URL (falls back to FIRECRAWL_API_URL or https://api.firecrawl.dev). */
apiUrl?: string | null;
/** Per-request timeout in milliseconds (optional). */
timeoutMs?: number;
/** Max automatic retries for transient failures (optional). */
maxRetries?: number;
/** Exponential backoff factor for retries (optional). */
backoffFactor?: number;
}
/**
* Firecrawl v2 client. Provides typed access to all v2 endpoints and utilities.
*/
declare class FirecrawlClient {
private readonly http;
private isCloudService;
/**
* Create a v2 client.
* @param options Transport configuration (API key, base URL, timeouts, retries).
*/
constructor(options?: FirecrawlClientOptions);
/**
* Scrape a single URL.
* @param url Target URL.
* @param options Optional scrape options (formats, headers, etc.).
* @returns Resolved document with requested formats.
*/
scrape<Opts extends ScrapeOptions>(url: string, options: Opts): Promise<Omit<Document, "json"> & {
json?: InferredJsonFromOptions<Opts>;
}>;
scrape(url: string, options?: ScrapeOptions): Promise<Document>;
/**
* Interact with the browser session associated with a scrape job.
* @param jobId Scrape job id.
* @param args Code or prompt to execute, with language/timeout options.
* @returns Execution result including output, stdout, stderr, exitCode, and killed status.
*/
interact(jobId: string, args: ScrapeExecuteRequest): Promise<ScrapeExecuteResponse>;
/**
* Stop the interaction session associated with a scrape job.
* @param jobId Scrape job id.
*/
stopInteraction(jobId: string): Promise<ScrapeBrowserDeleteResponse>;
/**
* @deprecated Use interact().
*/
scrapeExecute(jobId: string, args: ScrapeExecuteRequest): Promise<ScrapeExecuteResponse>;
/**
* @deprecated Use stopInteraction().
*/
stopInteractiveBrowser(jobId: string): Promise<ScrapeBrowserDeleteResponse>;
/**
* @deprecated Use stopInteraction().
*/
deleteScrapeBrowser(jobId: string): Promise<ScrapeBrowserDeleteResponse>;
/**
* Parse an uploaded file via the v2 parse endpoint.
* @param file File payload (data, filename, optional contentType).
* @param options Optional parse options (formats, parsers, etc.).
* Note: parse does not support changeTracking, screenshot, branding,
* audio, video,
* actions, waitFor, location, or mobile options.
* @returns Parsed document with requested formats.
*/
parse<Opts extends ParseOptions>(file: ParseFile, options: Opts): Promise<Omit<Document, "json"> & {
json?: InferredJsonFromOptions<Opts>;
}>;
parse(file: ParseFile, options?: ParseOptions): Promise<Document>;
/**
* Search the web and optionally scrape each result.
* @param query Search query string.
* @param req Additional search options (sources, limit, scrapeOptions, etc.).
* @returns Structured search results.
*/
search(query: string, req?: Omit<SearchRequest, "query">): Promise<SearchData>;
/**
* Map a site to discover URLs (sitemap-aware).
* @param url Root URL to map.
* @param options Mapping options (sitemap mode, includeSubdomains, limit, timeout).
* @returns Discovered links.
*/
map(url: string, options?: MapOptions): Promise<MapData>;
/**
* Start a crawl job (async).
* @param url Root URL to crawl.
* @param req Crawl configuration (paths, limits, scrapeOptions, webhook, etc.).
* @returns Job id and url.
*/
startCrawl(url: string, req?: CrawlOptions): Promise<CrawlResponse$1>;
/**
* Get the status and partial data of a crawl job.
* @param jobId Crawl job id.
*/
getCrawlStatus(jobId: string, pagination?: PaginationConfig): Promise<CrawlJob>;
/**
* Cancel a crawl job.
* @param jobId Crawl job id.
* @returns True if cancelled.
*/
cancelCrawl(jobId: string): Promise<boolean>;
/**
* Convenience waiter: start a crawl and poll until it finishes.
* @param url Root URL to crawl.
* @param req Crawl configuration plus waiter controls (pollInterval, timeout seconds).
* @returns Final job snapshot.
*/
crawl(url: string, req?: CrawlOptions & {
pollInterval?: number;
timeout?: number;
}): Promise<CrawlJob>;
/**
* Retrieve crawl errors and robots.txt blocks.
* @param crawlId Crawl job id.
*/
getCrawlErrors(crawlId: string): Promise<CrawlErrorsResponse$1>;
/**
* List active crawls for the authenticated team.
*/
getActiveCrawls(): Promise<ActiveCrawlsResponse>;
/**
* Preview normalized crawl parameters produced by a natural-language prompt.
* @param url Root URL.
* @param prompt Natural-language instruction.
*/
crawlParamsPreview(url: string, prompt: string): Promise<Record<string, unknown>>;
/**
* Create a scheduled monitor.
*/
createMonitor(request: CreateMonitorRequest): Promise<Monitor>;
/**
* List monitors for the authenticated team.
*/
listMonitors(options?: ListMonitorsOptions): Promise<Monitor[]>;
/**
* Get a monitor by id.
*/
getMonitor(monitorId: string): Promise<Monitor>;
/**
* Update a monitor.
*/
updateMonitor(monitorId: string, request: UpdateMonitorRequest): Promise<Monitor>;
/**
* Delete a monitor.
*/
deleteMonitor(monitorId: string): Promise<boolean>;
/**
* Trigger a manual monitor check.
*/
runMonitor(monitorId: string): Promise<MonitorCheck>;
/**
* List checks for a monitor.
*/
listMonitorChecks(monitorId: string, options?: ListMonitorChecksOptions): Promise<MonitorCheck[]>;
/**
* Get a monitor check with paginated page results and inline diffs.
*/
getMonitorCheck(monitorId: string, checkId: string, options?: GetMonitorCheckOptions): Promise<MonitorCheckDetail>;
/**
* Start a batch scrape job for multiple URLs (async).
* @param urls URLs to scrape.
* @param opts Batch options (scrape options, webhook, concurrency, idempotency key, etc.).
* @returns Job id and url.
*/
startBatchScrape(urls: string[], opts?: BatchScrapeOptions): Promise<BatchScrapeResponse$1>;
/**
* Get the status and partial data of a batch scrape job.
* @param jobId Batch job id.
*/
getBatchScrapeStatus(jobId: string, pagination?: PaginationConfig): Promise<BatchScrapeJob>;
/**
* Retrieve batch scrape errors and robots.txt blocks.
* @param jobId Batch job id.
*/
getBatchScrapeErrors(jobId: string): Promise<CrawlErrorsResponse$1>;
/**
* Cancel a batch scrape job.
* @param jobId Batch job id.
* @returns True if cancelled.
*/
cancelBatchScrape(jobId: string): Promise<boolean>;
/**
* Convenience waiter: start a batch scrape and poll until it finishes.
* @param urls URLs to scrape.
* @param opts Batch options plus waiter controls (pollInterval, timeout seconds).
* @returns Final job snapshot.
*/
batchScrape(urls: string[], opts?: BatchScrapeOptions & {
pollInterval?: number;
timeout?: number;
}): Promise<BatchScrapeJob>;
/**
* Start an extract job (async).
* @param args Extraction request (urls, schema or prompt, flags).
* @returns Job id or processing state.
* @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
* Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
*/
startExtract(args: Parameters<typeof startExtract>[1]): Promise<ExtractResponse$1>;
/**
* Get extract job status/data.
* @param jobId Extract job id.
* @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
* Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
*/
getExtractStatus(jobId: string): Promise<ExtractResponse$1>;
/**
* Convenience waiter: start an extract and poll until it finishes.
* @param args Extraction request plus waiter controls (pollInterval, timeout seconds).
* @returns Final extract response.
* @deprecated The extract endpoint is in maintenance mode and its use is discouraged.
* Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor to find a replacement.
*/
extract(args: Parameters<typeof startExtract>[1] & {
pollInterval?: number;
timeout?: number;
}): Promise<ExtractResponse$1>;
/**
* Start an agent job (async).
* @param args Agent request (urls, prompt, schema).
* @returns Job id or processing state.
*/
startAgent(args: Parameters<typeof startAgent>[1]): Promise<AgentResponse>;
/**
* Get agent job status/data.
* @param jobId Agent job id.
*/
getAgentStatus(jobId: string): Promise<AgentStatusResponse>;
/**
* Convenience waiter: start an agent and poll until it finishes.
* @param args Agent request plus waiter controls (pollInterval, timeout seconds).
* @returns Final agent response.
*/
agent(args: Parameters<typeof startAgent>[1] & {
pollInterval?: number;
timeout?: number;
}): Promise<AgentStatusResponse>;
/**
* Cancel an agent job.
* @param jobId Agent job id.
* @returns True if cancelled.
*/
cancelAgent(jobId: string): Promise<boolean>;
/**
* Create a new browser session.
* @param args Session options (ttl, activityTtl, streamWebView, profile).
* @returns Session id, CDP URL, live view URL, and expiration time.
*/
browser(args?: Parameters<typeof browser>[1]): Promise<BrowserCreateResponse>;
/**
* Execute code in a browser session.
* @param sessionId Browser session id.
* @param args Code, language ("python" | "node" | "bash"), and optional timeout.
* @returns Execution result including stdout, stderr, exitCode, and killed status.
*/
browserExecute(sessionId: string, args: Parameters<typeof browserExecute>[2]): Promise<BrowserExecuteResponse>;
/**
* Delete a browser session.
* @param sessionId Browser session id.
*/
deleteBrowser(sessionId: string): Promise<BrowserDeleteResponse>;
/**
* List browser sessions.
* @param args Optional filter (status: "active" | "destroyed").
* @returns List of browser sessions.
*/
listBrowsers(args?: Parameters<typeof listBrowsers>[1]): Promise<BrowserListResponse>;
/** Current concurrency usage. */
getConcurrency(): Promise<ConcurrencyCheck>;
/** Current credit usage. */
getCreditUsage(): Promise<CreditUsage>;
/** Recent token usage. */
getTokenUsage(): Promise<TokenUsage>;
/** Historical credit usage by month; set byApiKey to true to break down by API key. */
getCreditUsageHistorical(byApiKey?: boolean): Promise<CreditUsageHistoricalResponse>;
/** Historical token usage by month; set byApiKey to true to break down by API key. */
getTokenUsageHistorical(byApiKey?: boolean): Promise<TokenUsageHistoricalResponse>;
/** Metrics about the team's scrape queue. */
getQueueStatus(): Promise<QueueStatusResponse$1>;
/**
* Create a watcher for a crawl or batch job. Emits: `document`, `snapshot`, `done`, `error`.
* @param jobId Job id.
* @param opts Watcher options (kind, pollInterval, timeout seconds).
*/
watcher(jobId: string, opts?: WatcherOptions): Watcher;
}
/**
* Configuration interface for FirecrawlApp.
* @param apiKey - Optional API key for authentication.
* @param apiUrl - Optional base URL of the API; defaults to 'https://api.firecrawl.dev'.
*/
interface FirecrawlAppConfig {
apiKey?: string | null;
apiUrl?: string | null;
}
/**
* Metadata for a Firecrawl document.
* Includes various optional properties for document metadata.
*/
interface FirecrawlDocumentMetadata {
title?: string;
description?: string;
language?: string;
keywords?: string;
robots?: string;
ogTitle?: string;
ogDescription?: string;
ogUrl?: string;
ogImage?: string;
ogAudio?: string;
ogDeterminer?: string;
ogLocale?: string;
ogLocaleAlternate?: string[];
ogSiteName?: string;
ogVideo?: string;
dctermsCreated?: string;
dcDateCreated?: string;
dcDate?: string;
dctermsType?: string;
dcType?: string;
dctermsAudience?: string;
dctermsSubject?: string;
dcSubject?: string;
dcDescription?: string;
dctermsKeywords?: string;
modifiedTime?: string;
publishedTime?: string;
articleTag?: string;
articleSection?: string;
sourceURL?: string;
statusCode?: number;
timezone?: string;
error?: string;
proxyUsed?: "basic" | "stealth";
cacheState?: "miss" | "hit";
cachedAt?: string;
creditsUsed?: number;
concurrencyLimited?: boolean;
concurrencyQueueDurationMs?: number;
[key: string]: any;
}
/**
* Document interface for Firecrawl.
* Represents a document retrieved or processed by Firecrawl.
*/
interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult | never) = never> {
url?: string;
markdown?: string;
html?: string;
rawHtml?: string;
links?: string[];
extract?: T;
json?: T;
screenshot?: string;
metadata?: FirecrawlDocumentMetadata;
actions: ActionsSchema;
changeTracking?: {
previousScrapeAt: string | null;
changeStatus: "new" | "same" | "changed" | "removed";
visibility: "visible" | "hidden";
diff?: {
text: string;
json: {
files: Array<{
from: string | null;
to: string | null;
chunks: Array<{
content: string;
changes: Array<{
type: string;
normal?: boolean;
ln?: number;
ln1?: number;
ln2?: number;
content: string;
}>;
}>;
}>;
};
};
json?: any;
};
title?: string;
description?: string;
}
/**
* Location configuration for proxy location
*/
interface LocationConfig {
country?: string;
languages?: string[];
}
/**
* Parameters for scraping operations.
* Defines the options and configurations available for scraping web content.
*/
interface CrawlScrapeOptions {
formats?: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract" | "json" | "changeTracking")[];
headers?: Record<string, string>;
includeTags?: string[];
excludeTags?: string[];
onlyMainContent?: boolean;
waitFor?: number;
timeout?: number;
location?: LocationConfig;
mobile?: boolean;
skipTlsVerification?: boolean;
removeBase64Images?: boolean;
blockAds?: boolean;
proxy?: "basic" | "stealth" | "enhanced" | "auto";
storeInCache?: boolean;
maxAge?: number;
parsePDF?: boolean;
}
type Action = {
type: "wait";
milliseconds?: number;
selector?: string;
} | {
type: "click";
selector: string;
all?: boolean;
} | {
type: "screenshot";
fullPage?: boolean;
quality?: number;
} | {
type: "write";
text: string;
} | {
type: "press";
key: string;
} | {
type: "scroll";
direction?: "up" | "down";
selector?: string;
} | {
type: "scrape";
} | {
type: "executeJavascript";
script: string;
};
interface ScrapeParams<LLMSchema extends zt.ZodSchema = any, ActionsSchema extends (Action[] | undefined) = undefined> extends CrawlScrapeOptions {
extract?: {
prompt?: string;
schema?: LLMSchema;
systemPrompt?: string;
};
jsonOptions?: {
prompt?: string;
schema?: LLMSchema;
systemPrompt?: string;
};
changeTrackingOptions?: {
prompt?: string;
schema?: any;
modes?: ("json" | "git-diff")[];
tag?: string | null;
};
actions?: ActionsSchema;
agent?: AgentOptions;
zeroDataRetention?: boolean;
}
interface ActionsResult {
screenshots: string[];
scrapes: ({
url: string;
html: string;
})[];
javascriptReturns: {
type: string;
value: unknown;
}[];
}
/**
* Response interface for scraping operations.
* Defines the structure of the response received after a scraping operation.
*/
interface ScrapeResponse<LLMResult = any, ActionsSchema extends (ActionsResult | never) = never> extends FirecrawlDocument<LLMResult, ActionsSchema> {
success: true;
warning?: string;
error?: string;
}
/**
* Parameters for crawling operations.
* Includes options for both scraping and mapping during a crawl.
*/
interface CrawlParams {
includePaths?: string[];
excludePaths?: string[];
maxDepth?: number;
maxDiscoveryDepth?: number;
limit?: number;
allowBackwardLinks?: boolean;
crawlEntireDomain?: boolean;
allowExternalLinks?: boolean;
ignoreSitemap?: boolean;
scrapeOptions?: CrawlScrapeOptions;
webhook?: string | {
url: string;
headers?: Record<string, string>;
metadata?: Record<string, string>;
events?: ["completed", "failed", "page", "started"][number][];
};
deduplicateSimilarURLs?: boolean;
ignoreQueryParameters?: boolean;
regexOnFullURL?: boolean;
/**
* Delay in seconds between scrapes. This helps respect website rate limits.
* If not provided, the crawler may use the robots.txt crawl delay if available.
*/
delay?: number;
allowSubdomains?: boolean;
maxConcurrency?: number;
zeroDataRetention?: boolean;
}
/**
* Response interface for crawling operations.
* Defines the structure of the response received after initiating a crawl.
*/
interface CrawlResponse {
id?: string;
url?: string;
success: true;
error?: string;
}
/**
* Response interface for batch scrape operations.
* Defines the structure of the response received after initiating a crawl.
*/
interface BatchScrapeResponse {
id?: string;
url?: string;
success: true;
error?: string;
invalidURLs?: string[];
}
/**
* Response interface for job status checks.
* Provides detailed status of a crawl job including progress and results.
*/
interface CrawlStatusResponse {
success: true;
status: "scraping" | "completed" | "failed" | "cancelled";
completed: number;
total: number;
creditsUsed: number;
expiresAt: Date;
next?: string;
data: FirecrawlDocument<undefined>[];
}
/**
* Response interface for batch scrape job status checks.
* Provides detailed status of a batch scrape job including progress and results.
*/
interface BatchScrapeStatusResponse {
success: true;
status: "scraping" | "completed" | "failed" | "cancelled";
completed: number;
total: number;
creditsUsed: number;
expiresAt: Date;
next?: string;
data: FirecrawlDocument<undefined>[];
}
/**
* Parameters for mapping operations.
* Defines options for mapping URLs during a crawl.
*/
interface MapParams {
search?: string;
ignoreSitemap?: boolean;
includeSubdomains?: boolean;
sitemapOnly?: boolean;
limit?: number;
timeout?: number;
useIndex?: boolean;
location?: LocationConfig;
}
/**
* Response interface for mapping operations.
* Defines the structure of the response received after a mapping operation.
*/
interface MapResponse {
success: true;
links?: string[];
error?: string;
}
/**
* Parameters for extracting information from URLs.
* Defines options for extracting information from URLs.
*/
interface AgentOptions {
model?: string;
prompt?: string;
sessionId?: string;
}
/**
* Parameters for extracting information from URLs.
* Defines options for extracting information from URLs.
*/
interface AgentOptionsExtract {
model?: string;
sessionId?: string;
}
interface ExtractParams<LLMSchema extends zt.ZodSchema = any> {
prompt?: string;
schema?: LLMSchema | object;
systemPrompt?: string;
allowExternalLinks?: boolean;
enableWebSearch?: boolean;
includeSubdomains?: boolean;
origin?: string;
showSources?: boolean;
scrapeOptions?: CrawlScrapeOptions;
agent?: AgentOptionsExtract;
}
/**
* Response interface for extracting information from URLs.
* Defines the structure of the response received after extracting information from URLs.
*/
interface ExtractResponse<LLMSchema extends zt.ZodSchema = any> {
success: boolean;
data: LLMSchema;
error?: string;
warning?: string;
warnings?: string[];
replacement?: string;
sources?: string[];
creditsUsed?: number;
}
/**
* Error response interface.
* Defines the structure of the response received when an error occurs.
*/
interface ErrorResponse {
success: false;
error: string;
}
/**
* Parameters for search operations.
* Defines options for searching and scraping search results.
*/
interface SearchParams {
limit?: number;
tbs?: string;
filter?: string;
lang?: string;
country?: string;
location?: string;
origin?: string;
timeout?: number;
scrapeOptions?: ScrapeParams;
}
/**
* Response interface for search operations.
* Defines the structure of the response received after a search operation.
*/
interface SearchResponse {
success: boolean;
data: FirecrawlDocument<undefined>[];
warning?: string;
error?: string;
}
/**
* Response interface for crawl/batch scrape error monitoring.
*/
interface CrawlErrorsResponse {
/**
* Scrapes that errored out + error details
*/
errors: {
id: string;
timestamp?: string;
url: string;
code?: string;
error: string;
}[];
/**
* URLs blocked by robots.txt
*/
robotsBlocked: string[];
}
/**
* Parameters for deep research operations.
* Defines options for conducting deep research on a query.
*/
interface DeepResearchParams<LLMSchema extends zt.ZodSchema = any> {
/**
* Maximum depth of research iterations (1-10)
* @default 7
*/
maxDepth?: number;
/**
* Time limit in seconds (30-300)
* @default 270
*/
timeLimit?: number;
/**
* Maximum number of URLs to analyze (1-1000)
* @defa