@mendable/firecrawl-js
Version:
JavaScript SDK for Firecrawl API
1,587 lines (1,580 loc) • 57.7 kB
text/typescript
import * as zt from 'zod';
import { ZodTypeAny } from 'zod';
import { AxiosResponse, AxiosRequestHeaders } from 'axios';
import { EventEmitter } from 'events';
import { TypedEventTarget } from 'typescript-event-target';
type FormatString = 'markdown' | 'html' | 'rawHtml' | 'links' | 'images' | 'screenshot' | 'summary' | 'changeTracking' | 'json' | 'attributes' | 'branding';
interface Viewport {
width: number;
height: number;
}
interface Format {
type: FormatString;
}
interface JsonFormat extends Format {
type: 'json';
prompt?: string;
schema?: Record<string, unknown> | ZodTypeAny;
}
interface ScreenshotFormat {
type: 'screenshot';
fullPage?: boolean;
quality?: number;
viewport?: Viewport | {
width: number;
height: number;
};
}
interface ChangeTrackingFormat extends Format {
type: 'changeTracking';
modes: ('git-diff' | 'json')[];
schema?: Record<string, unknown>;
prompt?: string;
tag?: string;
}
interface AttributesFormat extends Format {
type: 'attributes';
selectors: Array<{
selector: string;
attribute: string;
}>;
}
type FormatOption = FormatString | Format | JsonFormat | ChangeTrackingFormat | ScreenshotFormat | AttributesFormat;
interface LocationConfig$1 {
country?: string;
languages?: string[];
}
interface WaitAction {
type: 'wait';
milliseconds?: number;
selector?: string;
}
interface ScreenshotAction {
type: 'screenshot';
fullPage?: boolean;
quality?: number;
viewport?: Viewport | {
width: number;
height: number;
};
}
interface ClickAction {
type: 'click';
selector: string;
}
interface WriteAction {
type: 'write';
text: string;
}
interface PressAction {
type: 'press';
key: string;
}
interface ScrollAction {
type: 'scroll';
direction: 'up' | 'down';
selector?: string;
}
interface ScrapeAction {
type: 'scrape';
}
interface ExecuteJavascriptAction {
type: 'executeJavascript';
script: string;
}
interface PDFAction {
type: 'pdf';
format?: 'A0' | 'A1' | 'A2' | 'A3' | 'A4' | 'A5' | 'A6' | 'Letter' | 'Legal' | 'Tabloid' | 'Ledger';
landscape?: boolean;
scale?: number;
}
type ActionOption = WaitAction | ScreenshotAction | ClickAction | WriteAction | PressAction | ScrollAction | ScrapeAction | ExecuteJavascriptAction | PDFAction;
interface ScrapeOptions {
formats?: FormatOption[];
headers?: Record<string, string>;
includeTags?: string[];
excludeTags?: string[];
onlyMainContent?: boolean;
timeout?: number;
waitFor?: number;
mobile?: boolean;
parsers?: Array<string | {
type: 'pdf';
maxPages?: number;
}>;
actions?: ActionOption[];
location?: LocationConfig$1;
skipTlsVerification?: boolean;
removeBase64Images?: boolean;
fastMode?: boolean;
useMock?: string;
blockAds?: boolean;
proxy?: 'basic' | 'stealth' | 'auto' | string;
maxAge?: number;
minAge?: number;
storeInCache?: boolean;
integration?: string;
}
interface WebhookConfig {
url: string;
headers?: Record<string, string>;
metadata?: Record<string, string>;
events?: Array<'completed' | 'failed' | 'page' | 'started'>;
}
interface BrandingProfile {
colorScheme?: 'light' | 'dark';
logo?: string | null;
fonts?: Array<{
family: string;
[key: string]: unknown;
}>;
colors?: {
primary?: string;
secondary?: string;
accent?: string;
background?: string;
textPrimary?: string;
textSecondary?: string;
link?: string;
success?: string;
warning?: string;
error?: string;
[key: string]: string | undefined;
};
typography?: {
fontFamilies?: {
primary?: string;
heading?: string;
code?: string;
[key: string]: string | undefined;
};
fontStacks?: {
primary?: string[];
heading?: string[];
body?: string[];
paragraph?: string[];
[key: string]: string[] | undefined;
};
fontSizes?: {
h1?: string;
h2?: string;
h3?: string;
body?: string;
small?: string;
[key: string]: string | undefined;
};
lineHeights?: {
heading?: number;
body?: number;
[key: string]: number | undefined;
};
fontWeights?: {
light?: number;
regular?: number;
medium?: number;
bold?: number;
[key: string]: number | undefined;
};
};
spacing?: {
baseUnit?: number;
padding?: Record<string, number>;
margins?: Record<string, number>;
gridGutter?: number;
borderRadius?: string;
[key: string]: number | string | Record<string, number> | undefined;
};
components?: {
buttonPrimary?: {
background?: string;
textColor?: string;
borderColor?: string;
borderRadius?: string;
[key: string]: string | undefined;
};
buttonSecondary?: {
background?: string;
textColor?: string;
borderColor?: string;
borderRadius?: string;
[key: string]: string | undefined;
};
input?: {
borderColor?: string;
focusBorderColor?: string;
borderRadius?: string;
[key: string]: string | undefined;
};
[key: string]: unknown;
};
icons?: {
style?: string;
primaryColor?: string;
[key: string]: string | undefined;
};
images?: {
logo?: string | null;
favicon?: string | null;
ogImage?: string | null;
[key: string]: string | null | undefined;
};
animations?: {
transitionDuration?: string;
easing?: string;
[key: string]: string | undefined;
};
layout?: {
grid?: {
columns?: number;
maxWidth?: string;
[key: string]: number | string | undefined;
};
headerHeight?: string;
footerHeight?: string;
[key: string]: number | string | Record<string, number | string | undefined> | undefined;
};
tone?: {
voice?: string;
emojiUsage?: string;
[key: string]: string | undefined;
};
personality?: {
tone: 'professional' | 'playful' | 'modern' | 'traditional' | 'minimalist' | 'bold';
energy: 'low' | 'medium' | 'high';
targetAudience: string;
};
[key: string]: unknown;
}
interface DocumentMetadata {
title?: string;
description?: string;
url?: string;
language?: string;
keywords?: string | string[];
robots?: string;
ogTitle?: string;
ogDescription?: string;
ogUrl?: string;
ogImage?: string;
ogAudio?: string;
ogDeterminer?: string;
ogLocale?: string;
ogLocaleAlternate?: string[];
ogSiteName?: string;
ogVideo?: string;
favicon?: string;
dcTermsCreated?: string;
dcDateCreated?: string;
dcDate?: string;
dcTermsType?: string;
dcType?: string;
dcTermsAudience?: string;
dcTermsSubject?: string;
dcSubject?: string;
dcDescription?: string;
dcTermsKeywords?: string;
modifiedTime?: string;
publishedTime?: string;
articleTag?: string;
articleSection?: string;
sourceURL?: string;
statusCode?: number;
scrapeId?: string;
numPages?: number;
contentType?: string;
timezone?: string;
proxyUsed?: 'basic' | 'stealth';
cacheState?: 'hit' | 'miss';
cachedAt?: string;
creditsUsed?: number;
concurrencyLimited?: boolean;
concurrencyQueueDurationMs?: number;
error?: string;
[key: string]: unknown;
}
interface Document {
markdown?: string;
html?: string;
rawHtml?: string;
json?: unknown;
summary?: string;
metadata?: DocumentMetadata;
links?: string[];
images?: string[];
screenshot?: string;
attributes?: Array<{
selector: string;
attribute: string;
values: string[];
}>;
actions?: Record<string, unknown>;
warning?: string;
changeTracking?: Record<string, unknown>;
branding?: BrandingProfile;
}
interface PaginationConfig {
/** When true (default), automatically follow `next` links and aggregate all documents. */
autoPaginate?: boolean;
/** Maximum number of additional pages to fetch after the first response. */
maxPages?: number;
/** Maximum total number of documents to return across all pages. */
maxResults?: number;
/** Maximum time to spend fetching additional pages (in seconds). */
maxWaitTime?: number;
}
interface SearchResultWeb {
url: string;
title?: string;
description?: string;
category?: string;
}
interface SearchResultNews {
title?: string;
url?: string;
snippet?: string;
date?: string;
imageUrl?: string;
position?: number;
category?: string;
}
interface SearchResultImages {
title?: string;
imageUrl?: string;
imageWidth?: number;
imageHeight?: number;
url?: string;
position?: number;
}
interface SearchData {
web?: Array<SearchResultWeb | Document>;
news?: Array<SearchResultNews | Document>;
images?: Array<SearchResultImages | Document>;
}
interface CategoryOption {
type: 'github' | 'research' | 'pdf';
}
interface SearchRequest {
query: string;
sources?: Array<'web' | 'news' | 'images' | {
type: 'web' | 'news' | 'images';
}>;
categories?: Array<'github' | 'research' | 'pdf' | CategoryOption>;
limit?: number;
tbs?: string;
location?: string;
ignoreInvalidURLs?: boolean;
timeout?: number;
scrapeOptions?: ScrapeOptions;
integration?: string;
}
interface CrawlOptions {
prompt?: string | null;
excludePaths?: string[] | null;
includePaths?: string[] | null;
maxDiscoveryDepth?: number | null;
sitemap?: 'skip' | 'include';
ignoreQueryParameters?: boolean;
limit?: number | null;
crawlEntireDomain?: boolean;
allowExternalLinks?: boolean;
allowSubdomains?: boolean;
delay?: number | null;
maxConcurrency?: number | null;
webhook?: string | WebhookConfig | null;
scrapeOptions?: ScrapeOptions | null;
zeroDataRetention?: boolean;
integration?: string;
}
interface CrawlResponse$1 {
id: string;
url: string;
}
interface CrawlJob {
id: string;
status: 'scraping' | 'completed' | 'failed' | 'cancelled';
total: number;
completed: number;
creditsUsed?: number;
expiresAt?: string;
next?: string | null;
data: Document[];
}
interface BatchScrapeOptions {
options?: ScrapeOptions;
webhook?: string | WebhookConfig;
appendToId?: string;
ignoreInvalidURLs?: boolean;
maxConcurrency?: number;
zeroDataRetention?: boolean;
idempotencyKey?: string;
integration?: string;
}
interface BatchScrapeResponse$1 {
id: string;
url: string;
invalidURLs?: string[];
}
interface BatchScrapeJob {
id: string;
status: 'scraping' | 'completed' | 'failed' | 'cancelled';
completed: number;
total: number;
creditsUsed?: number;
expiresAt?: string;
next?: string | null;
data: Document[];
}
interface MapData {
links: SearchResultWeb[];
}
interface MapOptions {
search?: string;
sitemap?: 'only' | 'include' | 'skip';
includeSubdomains?: boolean;
ignoreQueryParameters?: boolean;
limit?: number;
timeout?: number;
integration?: string;
location?: LocationConfig$1;
}
interface ExtractResponse$1 {
success?: boolean;
id?: string;
status?: 'processing' | 'completed' | 'failed' | 'cancelled';
data?: unknown;
error?: string;
warning?: string;
sources?: Record<string, unknown>;
expiresAt?: string;
creditsUsed?: number;
}
interface AgentResponse {
success: boolean;
id: string;
error?: string;
}
interface AgentStatusResponse {
success: boolean;
status: 'processing' | 'completed' | 'failed';
error?: string;
data?: unknown;
expiresAt: string;
creditsUsed?: number;
}
interface AgentOptions$1 {
model: 'FIRE-1' | 'v3-beta';
}
interface ConcurrencyCheck {
concurrency: number;
maxConcurrency: number;
}
interface CreditUsage {
remainingCredits: number;
planCredits?: number;
billingPeriodStart?: string | null;
billingPeriodEnd?: string | null;
}
interface TokenUsage {
remainingTokens: number;
planTokens?: number;
billingPeriodStart?: string | null;
billingPeriodEnd?: string | null;
}
interface CreditUsageHistoricalPeriod {
startDate: string | null;
endDate: string | null;
apiKey?: string;
creditsUsed: number;
}
interface CreditUsageHistoricalResponse {
success: boolean;
periods: CreditUsageHistoricalPeriod[];
}
interface TokenUsageHistoricalPeriod {
startDate: string | null;
endDate: string | null;
apiKey?: string;
tokensUsed: number;
}
interface TokenUsageHistoricalResponse {
success: boolean;
periods: TokenUsageHistoricalPeriod[];
}
interface CrawlErrorsResponse$1 {
errors: {
id: string;
timestamp?: string;
url: string;
code?: string;
error: string;
}[];
robotsBlocked: string[];
}
interface ActiveCrawl {
id: string;
teamId: string;
url: string;
options?: Record<string, unknown> | null;
}
interface ActiveCrawlsResponse {
success: boolean;
crawls: ActiveCrawl[];
}
interface ErrorDetails {
code?: string;
message: string;
details?: Record<string, unknown>;
status?: number;
}
declare class SdkError extends Error {
status?: number;
code?: string;
details?: unknown;
jobId?: string;
constructor(message: string, status?: number, code?: string, details?: unknown, jobId?: string);
}
declare class JobTimeoutError extends SdkError {
timeoutSeconds: number;
constructor(jobId: string, timeoutSeconds: number, jobType?: 'batch' | 'crawl');
}
interface QueueStatusResponse$1 {
success: boolean;
jobsInQueue: number;
activeJobsInQueue: number;
waitingJobsInQueue: number;
maxConcurrency: number;
mostRecentSuccess: string | null;
}
interface HttpClientOptions {
apiKey: string;
apiUrl: string;
timeoutMs?: number;
maxRetries?: number;
backoffFactor?: number;
}
declare class HttpClient {
private instance;
private readonly apiKey;
private readonly apiUrl;
private readonly maxRetries;
private readonly backoffFactor;
constructor(options: HttpClientOptions);
getApiUrl(): string;
getApiKey(): string;
private request;
private sleep;
post<T = any>(endpoint: string, body: Record<string, unknown>, headers?: Record<string, string>): Promise<AxiosResponse<T, any, {}>>;
get<T = any>(endpoint: string, headers?: Record<string, string>): Promise<AxiosResponse<T, any, {}>>;
delete<T = any>(endpoint: string, headers?: Record<string, string>): Promise<AxiosResponse<T, any, {}>>;
prepareHeaders(idempotencyKey?: string): Record<string, string>;
}
declare function prepareExtractPayload(args: {
urls?: string[];
prompt?: string;
schema?: Record<string, unknown> | ZodTypeAny;
systemPrompt?: string;
allowExternalLinks?: boolean;
enableWebSearch?: boolean;
showSources?: boolean;
scrapeOptions?: ScrapeOptions;
ignoreInvalidURLs?: boolean;
integration?: string;
agent?: AgentOptions$1;
}): Record<string, unknown>;
declare function startExtract(http: HttpClient, args: Parameters<typeof prepareExtractPayload>[0]): Promise<ExtractResponse$1>;
declare function prepareAgentPayload(args: {
urls?: string[];
prompt: string;
schema?: Record<string, unknown> | ZodTypeAny;
integration?: string;
maxCredits?: number;
strictConstrainToURLs?: boolean;
}): Record<string, unknown>;
declare function startAgent(http: HttpClient, args: Parameters<typeof prepareAgentPayload>[0]): Promise<AgentResponse>;
type JobKind = "crawl" | "batch";
interface WatcherOptions {
kind?: JobKind;
pollInterval?: number;
timeout?: number;
}
declare class Watcher extends EventEmitter {
private readonly http;
private readonly jobId;
private readonly kind;
private readonly pollInterval;
private readonly timeout?;
private ws?;
private closed;
private readonly emittedDocumentKeys;
constructor(http: HttpClient, jobId: string, opts?: WatcherOptions);
private buildWsUrl;
start(): Promise<void>;
private attachWsHandlers;
private documentKey;
private emitDocuments;
private emitSnapshot;
private pollLoop;
close(): void;
}
type ExtractJsonSchemaFromFormats<Formats> = Formats extends readonly any[] ? Extract<Formats[number], {
type: "json";
schema?: unknown;
}>["schema"] : never;
type InferredJsonFromOptions<Opts> = Opts extends {
formats?: infer Fmts;
} ? ExtractJsonSchemaFromFormats<Fmts> extends zt.ZodTypeAny ? zt.infer<ExtractJsonSchemaFromFormats<Fmts>> : unknown : unknown;
/**
* Configuration for the v2 client transport.
*/
interface FirecrawlClientOptions {
/** API key (falls back to FIRECRAWL_API_KEY). */
apiKey?: string | null;
/** API base URL (falls back to FIRECRAWL_API_URL or https://api.firecrawl.dev). */
apiUrl?: string | null;
/** Per-request timeout in milliseconds (optional). */
timeoutMs?: number;
/** Max automatic retries for transient failures (optional). */
maxRetries?: number;
/** Exponential backoff factor for retries (optional). */
backoffFactor?: number;
}
/**
* Firecrawl v2 client. Provides typed access to all v2 endpoints and utilities.
*/
declare class FirecrawlClient {
private readonly http;
private isCloudService;
/**
* Create a v2 client.
* @param options Transport configuration (API key, base URL, timeouts, retries).
*/
constructor(options?: FirecrawlClientOptions);
/**
* Scrape a single URL.
* @param url Target URL.
* @param options Optional scrape options (formats, headers, etc.).
* @returns Resolved document with requested formats.
*/
scrape<Opts extends ScrapeOptions>(url: string, options: Opts): Promise<Omit<Document, "json"> & {
json?: InferredJsonFromOptions<Opts>;
}>;
scrape(url: string, options?: ScrapeOptions): Promise<Document>;
/**
* Search the web and optionally scrape each result.
* @param query Search query string.
* @param req Additional search options (sources, limit, scrapeOptions, etc.).
* @returns Structured search results.
*/
search(query: string, req?: Omit<SearchRequest, "query">): Promise<SearchData>;
/**
* Map a site to discover URLs (sitemap-aware).
* @param url Root URL to map.
* @param options Mapping options (sitemap mode, includeSubdomains, limit, timeout).
* @returns Discovered links.
*/
map(url: string, options?: MapOptions): Promise<MapData>;
/**
* Start a crawl job (async).
* @param url Root URL to crawl.
* @param req Crawl configuration (paths, limits, scrapeOptions, webhook, etc.).
* @returns Job id and url.
*/
startCrawl(url: string, req?: CrawlOptions): Promise<CrawlResponse$1>;
/**
* Get the status and partial data of a crawl job.
* @param jobId Crawl job id.
*/
getCrawlStatus(jobId: string, pagination?: PaginationConfig): Promise<CrawlJob>;
/**
* Cancel a crawl job.
* @param jobId Crawl job id.
* @returns True if cancelled.
*/
cancelCrawl(jobId: string): Promise<boolean>;
/**
* Convenience waiter: start a crawl and poll until it finishes.
* @param url Root URL to crawl.
* @param req Crawl configuration plus waiter controls (pollInterval, timeout seconds).
* @returns Final job snapshot.
*/
crawl(url: string, req?: CrawlOptions & {
pollInterval?: number;
timeout?: number;
}): Promise<CrawlJob>;
/**
* Retrieve crawl errors and robots.txt blocks.
* @param crawlId Crawl job id.
*/
getCrawlErrors(crawlId: string): Promise<CrawlErrorsResponse$1>;
/**
* List active crawls for the authenticated team.
*/
getActiveCrawls(): Promise<ActiveCrawlsResponse>;
/**
* Preview normalized crawl parameters produced by a natural-language prompt.
* @param url Root URL.
* @param prompt Natural-language instruction.
*/
crawlParamsPreview(url: string, prompt: string): Promise<Record<string, unknown>>;
/**
* Start a batch scrape job for multiple URLs (async).
* @param urls URLs to scrape.
* @param opts Batch options (scrape options, webhook, concurrency, idempotency key, etc.).
* @returns Job id and url.
*/
startBatchScrape(urls: string[], opts?: BatchScrapeOptions): Promise<BatchScrapeResponse$1>;
/**
* Get the status and partial data of a batch scrape job.
* @param jobId Batch job id.
*/
getBatchScrapeStatus(jobId: string, pagination?: PaginationConfig): Promise<BatchScrapeJob>;
/**
* Retrieve batch scrape errors and robots.txt blocks.
* @param jobId Batch job id.
*/
getBatchScrapeErrors(jobId: string): Promise<CrawlErrorsResponse$1>;
/**
* Cancel a batch scrape job.
* @param jobId Batch job id.
* @returns True if cancelled.
*/
cancelBatchScrape(jobId: string): Promise<boolean>;
/**
* Convenience waiter: start a batch scrape and poll until it finishes.
* @param urls URLs to scrape.
* @param opts Batch options plus waiter controls (pollInterval, timeout seconds).
* @returns Final job snapshot.
*/
batchScrape(urls: string[], opts?: BatchScrapeOptions & {
pollInterval?: number;
timeout?: number;
}): Promise<BatchScrapeJob>;
/**
* Start an extract job (async).
* @param args Extraction request (urls, schema or prompt, flags).
* @returns Job id or processing state.
*/
startExtract(args: Parameters<typeof startExtract>[1]): Promise<ExtractResponse$1>;
/**
* Get extract job status/data.
* @param jobId Extract job id.
*/
getExtractStatus(jobId: string): Promise<ExtractResponse$1>;
/**
* Convenience waiter: start an extract and poll until it finishes.
* @param args Extraction request plus waiter controls (pollInterval, timeout seconds).
* @returns Final extract response.
*/
extract(args: Parameters<typeof startExtract>[1] & {
pollInterval?: number;
timeout?: number;
}): Promise<ExtractResponse$1>;
/**
* Start an agent job (async).
* @param args Agent request (urls, prompt, schema).
* @returns Job id or processing state.
*/
startAgent(args: Parameters<typeof startAgent>[1]): Promise<AgentResponse>;
/**
* Get agent job status/data.
* @param jobId Agent job id.
*/
getAgentStatus(jobId: string): Promise<AgentStatusResponse>;
/**
* Convenience waiter: start an agent and poll until it finishes.
* @param args Agent request plus waiter controls (pollInterval, timeout seconds).
* @returns Final agent response.
*/
agent(args: Parameters<typeof startAgent>[1] & {
pollInterval?: number;
timeout?: number;
}): Promise<AgentStatusResponse>;
/**
* Cancel an agent job.
* @param jobId Agent job id.
* @returns True if cancelled.
*/
cancelAgent(jobId: string): Promise<boolean>;
/** Current concurrency usage. */
getConcurrency(): Promise<ConcurrencyCheck>;
/** Current credit usage. */
getCreditUsage(): Promise<CreditUsage>;
/** Recent token usage. */
getTokenUsage(): Promise<TokenUsage>;
/** Historical credit usage by month; set byApiKey to true to break down by API key. */
getCreditUsageHistorical(byApiKey?: boolean): Promise<CreditUsageHistoricalResponse>;
/** Historical token usage by month; set byApiKey to true to break down by API key. */
getTokenUsageHistorical(byApiKey?: boolean): Promise<TokenUsageHistoricalResponse>;
/** Metrics about the team's scrape queue. */
getQueueStatus(): Promise<QueueStatusResponse$1>;
/**
* Create a watcher for a crawl or batch job. Emits: `document`, `snapshot`, `done`, `error`.
* @param jobId Job id.
* @param opts Watcher options (kind, pollInterval, timeout seconds).
*/
watcher(jobId: string, opts?: WatcherOptions): Watcher;
}
/**
* Configuration interface for FirecrawlApp.
* @param apiKey - Optional API key for authentication.
* @param apiUrl - Optional base URL of the API; defaults to 'https://api.firecrawl.dev'.
*/
interface FirecrawlAppConfig {
apiKey?: string | null;
apiUrl?: string | null;
}
/**
* Metadata for a Firecrawl document.
* Includes various optional properties for document metadata.
*/
interface FirecrawlDocumentMetadata {
title?: string;
description?: string;
language?: string;
keywords?: string;
robots?: string;
ogTitle?: string;
ogDescription?: string;
ogUrl?: string;
ogImage?: string;
ogAudio?: string;
ogDeterminer?: string;
ogLocale?: string;
ogLocaleAlternate?: string[];
ogSiteName?: string;
ogVideo?: string;
dctermsCreated?: string;
dcDateCreated?: string;
dcDate?: string;
dctermsType?: string;
dcType?: string;
dctermsAudience?: string;
dctermsSubject?: string;
dcSubject?: string;
dcDescription?: string;
dctermsKeywords?: string;
modifiedTime?: string;
publishedTime?: string;
articleTag?: string;
articleSection?: string;
sourceURL?: string;
statusCode?: number;
timezone?: string;
error?: string;
proxyUsed?: "basic" | "stealth";
cacheState?: "miss" | "hit";
cachedAt?: string;
creditsUsed?: number;
concurrencyLimited?: boolean;
concurrencyQueueDurationMs?: number;
[key: string]: any;
}
/**
* Document interface for Firecrawl.
* Represents a document retrieved or processed by Firecrawl.
*/
interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult | never) = never> {
url?: string;
markdown?: string;
html?: string;
rawHtml?: string;
links?: string[];
extract?: T;
json?: T;
screenshot?: string;
metadata?: FirecrawlDocumentMetadata;
actions: ActionsSchema;
changeTracking?: {
previousScrapeAt: string | null;
changeStatus: "new" | "same" | "changed" | "removed";
visibility: "visible" | "hidden";
diff?: {
text: string;
json: {
files: Array<{
from: string | null;
to: string | null;
chunks: Array<{
content: string;
changes: Array<{
type: string;
normal?: boolean;
ln?: number;
ln1?: number;
ln2?: number;
content: string;
}>;
}>;
}>;
};
};
json?: any;
};
title?: string;
description?: string;
}
/**
* Location configuration for proxy location
*/
interface LocationConfig {
country?: string;
languages?: string[];
}
/**
* Parameters for scraping operations.
* Defines the options and configurations available for scraping web content.
*/
interface CrawlScrapeOptions {
formats?: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract" | "json" | "changeTracking")[];
headers?: Record<string, string>;
includeTags?: string[];
excludeTags?: string[];
onlyMainContent?: boolean;
waitFor?: number;
timeout?: number;
location?: LocationConfig;
mobile?: boolean;
skipTlsVerification?: boolean;
removeBase64Images?: boolean;
blockAds?: boolean;
proxy?: "basic" | "stealth" | "auto";
storeInCache?: boolean;
maxAge?: number;
parsePDF?: boolean;
}
type Action = {
type: "wait";
milliseconds?: number;
selector?: string;
} | {
type: "click";
selector: string;
all?: boolean;
} | {
type: "screenshot";
fullPage?: boolean;
quality?: number;
} | {
type: "write";
text: string;
} | {
type: "press";
key: string;
} | {
type: "scroll";
direction?: "up" | "down";
selector?: string;
} | {
type: "scrape";
} | {
type: "executeJavascript";
script: string;
};
interface ScrapeParams<LLMSchema extends zt.ZodSchema = any, ActionsSchema extends (Action[] | undefined) = undefined> extends CrawlScrapeOptions {
extract?: {
prompt?: string;
schema?: LLMSchema;
systemPrompt?: string;
};
jsonOptions?: {
prompt?: string;
schema?: LLMSchema;
systemPrompt?: string;
};
changeTrackingOptions?: {
prompt?: string;
schema?: any;
modes?: ("json" | "git-diff")[];
tag?: string | null;
};
actions?: ActionsSchema;
agent?: AgentOptions;
zeroDataRetention?: boolean;
}
interface ActionsResult {
screenshots: string[];
scrapes: ({
url: string;
html: string;
})[];
javascriptReturns: {
type: string;
value: unknown;
}[];
}
/**
* Response interface for scraping operations.
* Defines the structure of the response received after a scraping operation.
*/
interface ScrapeResponse<LLMResult = any, ActionsSchema extends (ActionsResult | never) = never> extends FirecrawlDocument<LLMResult, ActionsSchema> {
success: true;
warning?: string;
error?: string;
}
/**
* Parameters for crawling operations.
* Includes options for both scraping and mapping during a crawl.
*/
interface CrawlParams {
includePaths?: string[];
excludePaths?: string[];
maxDepth?: number;
maxDiscoveryDepth?: number;
limit?: number;
allowBackwardLinks?: boolean;
crawlEntireDomain?: boolean;
allowExternalLinks?: boolean;
ignoreSitemap?: boolean;
scrapeOptions?: CrawlScrapeOptions;
webhook?: string | {
url: string;
headers?: Record<string, string>;
metadata?: Record<string, string>;
events?: ["completed", "failed", "page", "started"][number][];
};
deduplicateSimilarURLs?: boolean;
ignoreQueryParameters?: boolean;
regexOnFullURL?: boolean;
/**
* Delay in seconds between scrapes. This helps respect website rate limits.
* If not provided, the crawler may use the robots.txt crawl delay if available.
*/
delay?: number;
allowSubdomains?: boolean;
maxConcurrency?: number;
zeroDataRetention?: boolean;
}
/**
* Response interface for crawling operations.
* Defines the structure of the response received after initiating a crawl.
*/
interface CrawlResponse {
id?: string;
url?: string;
success: true;
error?: string;
}
/**
* Response interface for batch scrape operations.
* Defines the structure of the response received after initiating a crawl.
*/
interface BatchScrapeResponse {
id?: string;
url?: string;
success: true;
error?: string;
invalidURLs?: string[];
}
/**
* Response interface for job status checks.
* Provides detailed status of a crawl job including progress and results.
*/
interface CrawlStatusResponse {
success: true;
status: "scraping" | "completed" | "failed" | "cancelled";
completed: number;
total: number;
creditsUsed: number;
expiresAt: Date;
next?: string;
data: FirecrawlDocument<undefined>[];
}
/**
* Response interface for batch scrape job status checks.
* Provides detailed status of a batch scrape job including progress and results.
*/
interface BatchScrapeStatusResponse {
success: true;
status: "scraping" | "completed" | "failed" | "cancelled";
completed: number;
total: number;
creditsUsed: number;
expiresAt: Date;
next?: string;
data: FirecrawlDocument<undefined>[];
}
/**
* Parameters for mapping operations.
* Defines options for mapping URLs during a crawl.
*/
interface MapParams {
search?: string;
ignoreSitemap?: boolean;
includeSubdomains?: boolean;
sitemapOnly?: boolean;
limit?: number;
timeout?: number;
useIndex?: boolean;
location?: LocationConfig;
}
/**
* Response interface for mapping operations.
* Defines the structure of the response received after a mapping operation.
*/
interface MapResponse {
success: true;
links?: string[];
error?: string;
}
/**
* Parameters for extracting information from URLs.
* Defines options for extracting information from URLs.
*/
interface AgentOptions {
model?: string;
prompt?: string;
sessionId?: string;
}
/**
* Parameters for extracting information from URLs.
* Defines options for extracting information from URLs.
*/
interface AgentOptionsExtract {
model?: string;
sessionId?: string;
}
interface ExtractParams<LLMSchema extends zt.ZodSchema = any> {
prompt?: string;
schema?: LLMSchema | object;
systemPrompt?: string;
allowExternalLinks?: boolean;
enableWebSearch?: boolean;
includeSubdomains?: boolean;
origin?: string;
showSources?: boolean;
scrapeOptions?: CrawlScrapeOptions;
agent?: AgentOptionsExtract;
}
/**
* Response interface for extracting information from URLs.
* Defines the structure of the response received after extracting information from URLs.
*/
interface ExtractResponse<LLMSchema extends zt.ZodSchema = any> {
success: boolean;
data: LLMSchema;
error?: string;
warning?: string;
sources?: string[];
creditsUsed?: number;
}
/**
* Error response interface.
* Defines the structure of the response received when an error occurs.
*/
interface ErrorResponse {
success: false;
error: string;
}
/**
* Parameters for search operations.
* Defines options for searching and scraping search results.
*/
interface SearchParams {
limit?: number;
tbs?: string;
filter?: string;
lang?: string;
country?: string;
location?: string;
origin?: string;
timeout?: number;
scrapeOptions?: ScrapeParams;
}
/**
* Response interface for search operations.
* Defines the structure of the response received after a search operation.
*/
interface SearchResponse {
success: boolean;
data: FirecrawlDocument<undefined>[];
warning?: string;
error?: string;
}
/**
* Response interface for crawl/batch scrape error monitoring.
*/
interface CrawlErrorsResponse {
/**
* Scrapes that errored out + error details
*/
errors: {
id: string;
timestamp?: string;
url: string;
code?: string;
error: string;
}[];
/**
* URLs blocked by robots.txt
*/
robotsBlocked: string[];
}
/**
* Parameters for deep research operations.
* Defines options for conducting deep research on a query.
*/
interface DeepResearchParams<LLMSchema extends zt.ZodSchema = any> {
/**
* Maximum depth of research iterations (1-10)
* @default 7
*/
maxDepth?: number;
/**
* Time limit in seconds (30-300)
* @default 270
*/
timeLimit?: number;
/**
* Maximum number of URLs to analyze (1-1000)
* @default 20
*/
maxUrls?: number;
/**
* The prompt to use for the final analysis
*/
analysisPrompt?: string;
/**
* The system prompt to use for the research agent
*/
systemPrompt?: string;
/**
* The formats to use for the final analysis
*/
formats?: ("markdown" | "json")[];
/**
* The JSON options to use for the final analysis
*/
jsonOptions?: {
prompt?: string;
schema?: LLMSchema;
systemPrompt?: string;
};
}
/**
* Response interface for deep research operations.
*/
interface DeepResearchResponse {
success: boolean;
id: string;
}
/**
* Status response interface for deep research operations.
*/
interface DeepResearchStatusResponse {
success: boolean;
data: {
finalAnalysis: string;
activities: Array<{
type: string;
status: string;
message: string;
timestamp: string;
depth: number;
}>;
sources: Array<{
url: string;
title: string;
description: string;
}>;
};
status: "processing" | "completed" | "failed";
error?: string;
expiresAt: string;
currentDepth: number;
maxDepth: number;
activities: Array<{
type: string;
status: string;
message: string;
timestamp: string;
depth: number;
}>;
sources: Array<{
url: string;
title: string;
description: string;
}>;
summaries: string[];
}
/**
* Parameters for LLMs.txt generation operations.
*/
interface GenerateLLMsTextParams {
/**
* Maximum number of URLs to process (1-100)
* @default 10
*/
maxUrls?: number;
/**
* Whether to show the full LLMs-full.txt in the response
* @default false
*/
showFullText?: boolean;
/**
* Whether to use cached content if available
* @default true
*/
cache?: boolean;
/**
* Experimental flag for streaming
*/
__experimental_stream?: boolean;
}
/**
* Response interface for LLMs.txt generation operations.
*/
interface GenerateLLMsTextResponse {
success: boolean;
id: string;
}
/**
* Status response interface for LLMs.txt generation operations.
*/
interface GenerateLLMsTextStatusResponse {
success: boolean;
data: {
llmstxt: string;
llmsfulltxt?: string;
};
status: "processing" | "completed" | "failed";
error?: string;
expiresAt: string;
}
/**
* Response interface for queue status operations.
*/
interface QueueStatusResponse {
success: boolean;
jobsInQueue: number;
activeJobsInQueue: number;
waitingJobsInQueue: number;
maxConcurrency: number;
/**
* ISO timestamp of the most recent successful scrape in the past 24 hours. Will be null if no successful scrape has occurred in the past 24 hours.
*/
mostRecentSuccess: string | null;
}
/** Credit usage for v1 API (snake_case fields as returned by API). */
interface CreditUsageResponseV1 {
success: boolean;
data: {
remaining_credits: number;
plan_credits: number;
billing_period_start: string | null;
billing_period_end: string | null;
};
}
/** Token usage for v1 API (snake_case fields as returned by API). */
interface TokenUsageResponseV1 {
success: boolean;
data: {
remaining_tokens: number;
plan_tokens: number;
billing_period_start: string | null;
billing_period_end: string | null;
};
}
interface CreditUsageHistoricalResponseV1 {
success: boolean;
periods: {
startDate: string | null;
endDate: string | null;
apiKey?: string;
creditsUsed: number;
}[];
}
interface TokenUsageHistoricalResponseV1 {
success: boolean;
periods: {
startDate: string | null;
endDate: string | null;
apiKey?: string;
tokensUsed: number;
}[];
}
/**
* Main class for interacting with the Firecrawl API.
* Provides methods for scraping, searching, crawling, and mapping web content.
*/
declare class FirecrawlApp {
apiKey: string;
apiUrl: string;
version: string;
private isCloudService;
private getVersion;
private init;
/**
* Initializes a new instance of the FirecrawlApp class.
* @param config - Configuration options for the FirecrawlApp instance.
*/
constructor({ apiKey, apiUrl }: FirecrawlAppConfig);
/**
* Scrapes a URL using the Firecrawl API.
* @param url - The URL to scrape.
* @param params - Additional parameters for the scrape request.
* @returns The response from the scrape operation.
*/
scrapeUrl<T extends zt.ZodSchema, ActionsSchema extends (Action[] | undefined) = undefined>(url: string, params?: ScrapeParams<T, ActionsSchema>): Promise<ScrapeResponse<zt.infer<T>, ActionsSchema extends Action[] ? ActionsResult : never> | ErrorResponse>;
/**
* Searches using the Firecrawl API and optionally scrapes the results.
* @param query - The search query string.
* @param params - Optional parameters for the search request.
* @returns The response from the search operation.
*/
search(query: string, params?: SearchParams | Record<string, any>): Promise<SearchResponse>;
/**
* Initiates a crawl job for a URL using the Firecrawl API.
* @param url - The URL to crawl.
* @param params - Additional parameters for the crawl request.
* @param pollInterval - Time in seconds for job status checks.
* @param idempotencyKey - Optional idempotency key for the request.
* @returns The response from the crawl operation.
*/
crawlUrl(url: string, params?: CrawlParams, pollInterval?: number, idempotencyKey?: string): Promise<CrawlStatusResponse | ErrorResponse>;
asyncCrawlUrl(url: string, params?: CrawlParams, idempotencyKey?: string): Promise<CrawlResponse | ErrorResponse>;
/**
* Checks the status of a crawl job using the Firecrawl API.
* @param id - The ID of the crawl operation.
* @param getAllData - Paginate through all the pages of documents, returning the full list of all documents. (default: `false`)
* @param nextURL - The `next` URL from the previous crawl status. Only required if you're not manually increasing `skip`. Only used when `getAllData = false`.
* @param skip - How many entries to skip to paginate. Only required if you're not providing `nextURL`. Only used when `getAllData = false`.
* @param limit - How many entries to return. Only used when `getAllData = false`.
* @returns The response containing the job status.
*/
checkCrawlStatus(id?: string, getAllData?: boolean, nextURL?: string, skip?: number, limit?: number): Promise<CrawlStatusResponse | ErrorResponse>;
/**
* Returns information about crawl errors.
* @param id - The ID of the crawl operation.
* @returns Information about crawl errors.
*/
checkCrawlErrors(id: string): Promise<CrawlErrorsResponse | ErrorResponse>;
/**
* Cancels a crawl job using the Firecrawl API.
* @param id - The ID of the crawl operation.
* @returns The response from the cancel crawl operation.
*/
cancelCrawl(id: string): Promise<ErrorResponse>;
/**
* Initiates a crawl job and returns a CrawlWatcher to monitor the job via WebSocket.
* @param url - The URL to crawl.
* @param params - Additional parameters for the crawl request.
* @param idempotencyKey - Optional idempotency key for the request.
* @returns A CrawlWatcher instance to monitor the crawl job.
*/
crawlUrlAndWatch(url: string, params?: CrawlParams, idempotencyKey?: string): Promise<CrawlWatcher>;
/**
* Maps a URL using the Firecrawl API.
* @param url - The URL to map.
* @param params - Additional parameters for the map request.
* @returns The response from the map operation.
*/
mapUrl(url: string, params?: MapParams): Promise<MapResponse | ErrorResponse>;
/**
* Initiates a batch scrape job for multiple URLs using the Firecrawl API.
* @param url - The URLs to scrape.
* @param params - Additional parameters for the scrape request.
* @param pollInterval - Time in seconds for job status checks.
* @param idempotencyKey - Optional idempotency key for the request.
* @param webhook - Optional webhook for the batch scrape.
* @param ignoreInvalidURLs - Optional flag to ignore invalid URLs.
* @returns The response from the crawl operation.
*/
batchScrapeUrls(urls: string[], params?: ScrapeParams, pollInterval?: number, idempotencyKey?: string, webhook?: CrawlParams["webhook"], ignoreInvalidURLs?: boolean, maxConcurrency?: number): Promise<BatchScrapeStatusResponse | ErrorResponse>;
asyncBatchScrapeUrls(urls: string[], params?: ScrapeParams, idempotencyKey?: string, webhook?: CrawlParams["webhook"], ignoreInvalidURLs?: boolean): Promise<BatchScrapeResponse | ErrorResponse>;
/**
* Initiates a batch scrape job and returns a CrawlWatcher to monitor the job via WebSocket.
* @param urls - The URL to scrape.
* @param params - Additional parameters for the scrape request.
* @param idempotencyKey - Optional idempotency key for the request.
* @returns A CrawlWatcher instance to monitor the crawl job.
*/
batchScrapeUrlsAndWatch(urls: string[], params?: ScrapeParams, idempotencyKey?: string, webhook?: CrawlParams["webhook"], ignoreInvalidURLs?: boolean): Promise<CrawlWatcher>;
/**
* Checks the status of a batch scrape job using the Firecrawl API.
* @param id - The ID of the batch scrape operation.
* @param getAllData - Paginate through all the pages of documents, returning the full list of all documents. (default: `false`)
* @param nextURL - The `next` URL from the previous batch scrape status. Only required if you're not manually increasing `skip`. Only used when `getAllData = false`.
* @param skip - How many entries to skip to paginate. Only used when `getAllData = false`.
* @param limit - How many entries to return. Only used when `getAllData = false`.
* @returns The response containing the job status.
*/
checkBatchScrapeStatus(id?: string, getAllData?: boolean, nextURL?: string, skip?: number, limit?: number): Promise<BatchScrapeStatusResponse | ErrorResponse>;
/**
* Returns information about batch scrape errors.
* @param id - The ID of the batch scrape operation.
* @returns Information about batch scrape errors.
*/
checkBatchScrapeErrors(id: string): Promise<CrawlErrorsResponse | ErrorResponse>;
/**
* Extracts information from URLs using the Firecrawl API.
* Currently in Beta. Expect breaking changes on future minor versions.
* @param urls - The URLs to extract information from. Optional if using other methods for data extraction.
* @param params - Additional parameters for the extract request.
* @returns The response from the extract operation.
*/
extract<T extends zt.ZodSchema = any>(urls?: string[], params?: ExtractParams<T>): Promise<ExtractResponse<zt.infer<T>> | ErrorResponse>;
/**
* Initiates an asynchronous extract job for a URL using the Firecrawl API.
* @param url - The URL to extract data from.
* @param params - Additional parameters for the extract request.
* @param idempotencyKey - Optional idempotency key for the request.
* @returns The response from the extract operation.
*/
asyncExtract(urls: string[], params?: ExtractParams, idempotencyKey?: string): Promise<ExtractResponse | ErrorResponse>;
/**
* Retrieves the status of an extract job.
* @param jobId - The ID of the extract job.
* @returns The status of the extract job.
*/
getExtractStatus(jobId: string): Promise<any>;
/**
* Prepares the headers for an API request.
* @param idempotencyKey - Optional key to ensure idempotency.
* @returns The prepared headers.
*/
prepareHeaders(idempotencyKey?: string): AxiosRequestHeaders;
/**
* Sends a POST request to the specified URL.
* @param url - The URL to send the request to.
* @param data - The data to send in the request.
* @param headers - The headers for the request.
* @returns The response from the POST request.
*/
postRequest(url: string, data: any, headers: AxiosRequestHeaders): Promise<AxiosResponse>;
/**
* Sends a GET request to the specified URL.
* @param url - The URL to send the request to.
* @param headers - The headers for the request.
* @returns The response from the GET request.
*/
getRequest(url: string, headers: AxiosRequestHeaders): Promise<AxiosResponse>;
/**
* Sends a DELETE request to the specified URL.
* @param url - The URL to send the request to.
* @param headers - The headers for the request.
* @returns The response from the DELETE request.
*/
deleteRequest(url: string, headers: AxiosRequestHeaders): Promise<AxiosResponse>;
/**
* Monitors the status of a crawl job until completion or failure.
* @param id - The ID of the crawl operation.
* @param headers - The headers for the request.
* @param checkInterval - Interval in seconds for job status checks.
* @param checkUrl - Optional URL to check the status (used for v1 API)
* @returns The final job status or data.
*/
monitorJobStatus(id: string, headers: AxiosRequestHeaders, checkInterval: number): Promise<CrawlStatusResponse | ErrorResponse>;
/**
* Determines if an error is retryable (transient network error)
* @param error - The error to check
* @returns True if the error should be retried
*/
private isRetryableError;
/**
* Handles errors from API responses.
* @param {AxiosResponse} response - The response from the API.
* @param {string} action - The action being performed when the error occurred.
*/
handleError(response: AxiosResponse, action: string): never;
/**
* Initiate