UNPKG

@mendable/firecrawl-js

Version:
1,440 lines (1,422 loc) 79.1 kB
import { require_package } from "./chunk-NWEXGJHU.js"; // src/v2/utils/httpClient.ts import axios from "axios"; // src/v2/utils/getVersion.ts function getVersion() { try { if (typeof process !== "undefined" && process.env && process.env.npm_package_version) { return process.env.npm_package_version; } const pkg = require_package(); return pkg?.version || "3.x.x"; } catch { return "3.x.x"; } } // src/v2/utils/httpClient.ts var HttpClient = class { instance; apiKey; apiUrl; maxRetries; backoffFactor; constructor(options) { this.apiKey = options.apiKey; this.apiUrl = options.apiUrl.replace(/\/$/, ""); this.maxRetries = options.maxRetries ?? 3; this.backoffFactor = options.backoffFactor ?? 0.5; this.instance = axios.create({ baseURL: this.apiUrl, timeout: options.timeoutMs ?? 6e4, headers: { "Content-Type": "application/json", Authorization: `Bearer ${this.apiKey}` }, transitional: { clarifyTimeoutError: true } }); } getApiUrl() { return this.apiUrl; } getApiKey() { return this.apiKey; } async request(config) { const version = getVersion(); config.headers = { ...config.headers || {} }; let lastError; for (let attempt = 0; attempt < this.maxRetries; attempt++) { try { const cfg = { ...config }; if (cfg.method && ["post", "put", "patch"].includes(cfg.method.toLowerCase())) { const data = cfg.data ?? {}; cfg.data = { ...data, origin: `js-sdk@${version}` }; } const res = await this.instance.request(cfg); if (res.status === 502 && attempt < this.maxRetries - 1) { await this.sleep(this.backoffFactor * Math.pow(2, attempt)); continue; } return res; } catch (err) { lastError = err; const status = err?.response?.status; if (status === 502 && attempt < this.maxRetries - 1) { await this.sleep(this.backoffFactor * Math.pow(2, attempt)); continue; } throw err; } } throw lastError ?? new Error("Unexpected HTTP client error"); } sleep(seconds) { return new Promise((r) => setTimeout(r, seconds * 1e3)); } post(endpoint, body, headers) { return this.request({ method: "post", url: endpoint, data: body, headers }); } get(endpoint, headers) { return this.request({ method: "get", url: endpoint, headers }); } delete(endpoint, headers) { return this.request({ method: "delete", url: endpoint, headers }); } prepareHeaders(idempotencyKey) { const headers = {}; if (idempotencyKey) headers["x-idempotency-key"] = idempotencyKey; return headers; } }; // src/v2/types.ts var SdkError = class extends Error { status; code; details; constructor(message, status, code, details) { super(message); this.name = "FirecrawlSdkError"; this.status = status; this.code = code; this.details = details; } }; // src/v2/utils/validation.ts import zodToJsonSchema from "zod-to-json-schema"; function ensureValidFormats(formats) { if (!formats) return; for (const fmt of formats) { if (typeof fmt === "string") { if (fmt === "json") { throw new Error("json format must be an object with { type: 'json', prompt, schema }"); } continue; } if (fmt.type === "json") { const j = fmt; if (!j.prompt && !j.schema) { throw new Error("json format requires either 'prompt' or 'schema' (or both)"); } const maybeSchema = j.schema; const isZod = !!maybeSchema && (typeof maybeSchema.safeParse === "function" || typeof maybeSchema.parse === "function") && !!maybeSchema._def; if (isZod) { try { j.schema = zodToJsonSchema(maybeSchema); } catch { } } continue; } if (fmt.type === "screenshot") { const s = fmt; if (s.quality != null && (typeof s.quality !== "number" || s.quality < 0)) { throw new Error("screenshot.quality must be a non-negative number"); } } } } function ensureValidScrapeOptions(options) { if (!options) return; if (options.timeout != null && options.timeout <= 0) { throw new Error("timeout must be positive"); } if (options.waitFor != null && options.waitFor < 0) { throw new Error("waitFor must be non-negative"); } ensureValidFormats(options.formats); } // src/v2/utils/errorHandler.ts import "axios"; function throwForBadResponse(resp, action) { const status = resp.status; const body = resp.data || {}; const msg = body?.error || body?.message || `Request failed (${status}) while trying to ${action}`; throw new SdkError(msg, status, void 0, body?.details); } function normalizeAxiosError(err, action) { const status = err.response?.status; const body = err.response?.data; const message = body?.error || err.message || `Request failed${status ? ` (${status})` : ""} while trying to ${action}`; const code = body?.code || err.code; throw new SdkError(message, status, code, body?.details ?? body); } // src/v2/methods/scrape.ts async function scrape(http, url, options) { if (!url || !url.trim()) { throw new Error("URL cannot be empty"); } if (options) ensureValidScrapeOptions(options); const payload = { url: url.trim() }; if (options) Object.assign(payload, options); try { const res = await http.post("/v2/scrape", payload); if (res.status !== 200 || !res.data?.success) { throwForBadResponse(res, "scrape"); } return res.data.data || {}; } catch (err) { if (err?.isAxiosError) return normalizeAxiosError(err, "scrape"); throw err; } } // src/v2/methods/search.ts function prepareSearchPayload(req) { if (!req.query || !req.query.trim()) throw new Error("Query cannot be empty"); if (req.limit != null && req.limit <= 0) throw new Error("limit must be positive"); if (req.timeout != null && req.timeout <= 0) throw new Error("timeout must be positive"); const payload = { query: req.query }; if (req.sources) payload.sources = req.sources; if (req.categories) payload.categories = req.categories; if (req.limit != null) payload.limit = req.limit; if (req.tbs != null) payload.tbs = req.tbs; if (req.location != null) payload.location = req.location; if (req.ignoreInvalidURLs != null) payload.ignoreInvalidURLs = req.ignoreInvalidURLs; if (req.timeout != null) payload.timeout = req.timeout; if (req.scrapeOptions) { ensureValidScrapeOptions(req.scrapeOptions); payload.scrapeOptions = req.scrapeOptions; } return payload; } function transformArray(arr) { const results = []; for (const item of arr) { if (item && typeof item === "object") { if ("markdown" in item || "html" in item || "rawHtml" in item || "links" in item || "screenshot" in item || "changeTracking" in item || "summary" in item || "json" in item) { results.push(item); } else { results.push(item); } } else { results.push({ url: item }); } } return results; } async function search(http, request) { const payload = prepareSearchPayload(request); try { const res = await http.post("/v2/search", payload); if (res.status !== 200 || !res.data?.success) { throwForBadResponse(res, "search"); } const data = res.data.data || {}; const out = {}; if (data.web) out.web = transformArray(data.web); if (data.news) out.news = transformArray(data.news); if (data.images) out.images = transformArray(data.images); return out; } catch (err) { if (err?.isAxiosError) return normalizeAxiosError(err, "search"); throw err; } } // src/v2/methods/map.ts function prepareMapPayload(url, options) { if (!url || !url.trim()) throw new Error("URL cannot be empty"); const payload = { url: url.trim() }; if (options) { if (options.sitemap != null) payload.sitemap = options.sitemap; if (options.search != null) payload.search = options.search; if (options.includeSubdomains != null) payload.includeSubdomains = options.includeSubdomains; if (options.limit != null) payload.limit = options.limit; if (options.timeout != null) payload.timeout = options.timeout; } return payload; } async function map(http, url, options) { const payload = prepareMapPayload(url, options); try { const res = await http.post("/v2/map", payload); if (res.status !== 200 || !res.data?.success) { throwForBadResponse(res, "map"); } const linksIn = res.data.links || []; const links = []; for (const item of linksIn) { if (typeof item === "string") links.push({ url: item }); else if (item && typeof item === "object") links.push({ url: item.url, title: item.title, description: item.description }); } return { links }; } catch (err) { if (err?.isAxiosError) return normalizeAxiosError(err, "map"); throw err; } } // src/v2/methods/crawl.ts function prepareCrawlPayload(request) { if (!request.url || !request.url.trim()) throw new Error("URL cannot be empty"); const data = { url: request.url.trim() }; if (request.prompt) data.prompt = request.prompt; if (request.excludePaths) data.excludePaths = request.excludePaths; if (request.includePaths) data.includePaths = request.includePaths; if (request.maxDiscoveryDepth != null) data.maxDiscoveryDepth = request.maxDiscoveryDepth; if (request.sitemap != null) data.sitemap = request.sitemap; if (request.ignoreQueryParameters != null) data.ignoreQueryParameters = request.ignoreQueryParameters; if (request.limit != null) data.limit = request.limit; if (request.crawlEntireDomain != null) data.crawlEntireDomain = request.crawlEntireDomain; if (request.allowExternalLinks != null) data.allowExternalLinks = request.allowExternalLinks; if (request.allowSubdomains != null) data.allowSubdomains = request.allowSubdomains; if (request.delay != null) data.delay = request.delay; if (request.maxConcurrency != null) data.maxConcurrency = request.maxConcurrency; if (request.webhook != null) data.webhook = request.webhook; if (request.scrapeOptions) { ensureValidScrapeOptions(request.scrapeOptions); data.scrapeOptions = request.scrapeOptions; } if (request.zeroDataRetention != null) data.zeroDataRetention = request.zeroDataRetention; return data; } async function startCrawl(http, request) { const payload = prepareCrawlPayload(request); try { const res = await http.post("/v2/crawl", payload); if (res.status !== 200 || !res.data?.success) { throwForBadResponse(res, "start crawl"); } return { id: res.data.id, url: res.data.url }; } catch (err) { if (err?.isAxiosError) return normalizeAxiosError(err, "start crawl"); throw err; } } async function getCrawlStatus(http, jobId) { try { const res = await http.get(`/v2/crawl/${jobId}`); if (res.status !== 200 || !res.data?.success) { throwForBadResponse(res, "get crawl status"); } const body = res.data; return { status: body.status, completed: body.completed ?? 0, total: body.total ?? 0, creditsUsed: body.creditsUsed, expiresAt: body.expiresAt, next: body.next ?? null, data: body.data || [] }; } catch (err) { if (err?.isAxiosError) return normalizeAxiosError(err, "get crawl status"); throw err; } } async function cancelCrawl(http, jobId) { try { const res = await http.delete(`/v2/crawl/${jobId}`); if (res.status !== 200) throwForBadResponse(res, "cancel crawl"); return res.data?.status === "cancelled"; } catch (err) { if (err?.isAxiosError) return normalizeAxiosError(err, "cancel crawl"); throw err; } } async function waitForCrawlCompletion(http, jobId, pollInterval = 2, timeout) { const start = Date.now(); while (true) { const status = await getCrawlStatus(http, jobId); if (["completed", "failed", "cancelled"].includes(status.status)) return status; if (timeout != null && Date.now() - start > timeout * 1e3) { throw new Error(`Crawl job ${jobId} did not complete within ${timeout} seconds`); } await new Promise((r) => setTimeout(r, Math.max(1e3, pollInterval * 1e3))); } } async function crawl(http, request, pollInterval = 2, timeout) { const started = await startCrawl(http, request); return waitForCrawlCompletion(http, started.id, pollInterval, timeout); } async function getCrawlErrors(http, crawlId) { try { const res = await http.get(`/v2/crawl/${crawlId}/errors`); if (res.status !== 200) throwForBadResponse(res, "get crawl errors"); const payload = res.data?.data ?? res.data; return { errors: payload.errors || [], robotsBlocked: payload.robotsBlocked || [] }; } catch (err) { if (err?.isAxiosError) return normalizeAxiosError(err, "get crawl errors"); throw err; } } async function getActiveCrawls(http) { try { const res = await http.get(`/v2/crawl/active`); if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "get active crawls"); const crawlsIn = res.data?.crawls || []; const crawls = crawlsIn.map((c) => ({ id: c.id, teamId: c.teamId ?? c.team_id, url: c.url, options: c.options ?? null })); return { success: true, crawls }; } catch (err) { if (err?.isAxiosError) return normalizeAxiosError(err, "get active crawls"); throw err; } } async function crawlParamsPreview(http, url, prompt) { if (!url || !url.trim()) throw new Error("URL cannot be empty"); if (!prompt || !prompt.trim()) throw new Error("Prompt cannot be empty"); try { const res = await http.post("/v2/crawl/params-preview", { url: url.trim(), prompt }); if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "crawl params preview"); const data = res.data.data || {}; if (res.data.warning) data.warning = res.data.warning; return data; } catch (err) { if (err?.isAxiosError) return normalizeAxiosError(err, "crawl params preview"); throw err; } } // src/v2/methods/batch.ts async function startBatchScrape(http, urls, { options, webhook, appendToId, ignoreInvalidURLs, maxConcurrency, zeroDataRetention, integration, idempotencyKey } = {}) { if (!Array.isArray(urls) || urls.length === 0) throw new Error("URLs list cannot be empty"); const payload = { urls }; if (options) { ensureValidScrapeOptions(options); Object.assign(payload, options); } if (webhook != null) payload.webhook = webhook; if (appendToId != null) payload.appendToId = appendToId; if (ignoreInvalidURLs != null) payload.ignoreInvalidURLs = ignoreInvalidURLs; if (maxConcurrency != null) payload.maxConcurrency = maxConcurrency; if (zeroDataRetention != null) payload.zeroDataRetention = zeroDataRetention; if (integration != null) payload.integration = integration; try { const headers = http.prepareHeaders(idempotencyKey); const res = await http.post("/v2/batch/scrape", payload, headers); if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "start batch scrape"); return { id: res.data.id, url: res.data.url, invalidURLs: res.data.invalidURLs || void 0 }; } catch (err) { if (err?.isAxiosError) return normalizeAxiosError(err, "start batch scrape"); throw err; } } async function getBatchScrapeStatus(http, jobId) { try { const res = await http.get(`/v2/batch/scrape/${jobId}`); if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "get batch scrape status"); const body = res.data; return { status: body.status, completed: body.completed ?? 0, total: body.total ?? 0, creditsUsed: body.creditsUsed, expiresAt: body.expiresAt, next: body.next ?? null, data: body.data || [] }; } catch (err) { if (err?.isAxiosError) return normalizeAxiosError(err, "get batch scrape status"); throw err; } } async function cancelBatchScrape(http, jobId) { try { const res = await http.delete(`/v2/batch/scrape/${jobId}`); if (res.status !== 200) throwForBadResponse(res, "cancel batch scrape"); return res.data?.status === "cancelled"; } catch (err) { if (err?.isAxiosError) return normalizeAxiosError(err, "cancel batch scrape"); throw err; } } async function getBatchScrapeErrors(http, jobId) { try { const res = await http.get(`/v2/batch/scrape/${jobId}/errors`); if (res.status !== 200) throwForBadResponse(res, "get batch scrape errors"); const payload = res.data?.data ?? res.data; return { errors: payload.errors || [], robotsBlocked: payload.robotsBlocked || [] }; } catch (err) { if (err?.isAxiosError) return normalizeAxiosError(err, "get batch scrape errors"); throw err; } } async function waitForBatchCompletion(http, jobId, pollInterval = 2, timeout) { const start = Date.now(); while (true) { const status = await getBatchScrapeStatus(http, jobId); if (["completed", "failed", "cancelled"].includes(status.status)) return status; if (timeout != null && Date.now() - start > timeout * 1e3) { throw new Error(`Batch scrape job ${jobId} did not complete within ${timeout} seconds`); } await new Promise((r) => setTimeout(r, Math.max(1e3, pollInterval * 1e3))); } } async function batchScrape(http, urls, opts = {}) { const start = await startBatchScrape(http, urls, opts); return waitForBatchCompletion(http, start.id, opts.pollInterval ?? 2, opts.timeout); } // src/v2/methods/extract.ts import { zodToJsonSchema as zodToJsonSchema2 } from "zod-to-json-schema"; function prepareExtractPayload(args) { const body = {}; if (args.urls) body.urls = args.urls; if (args.prompt != null) body.prompt = args.prompt; if (args.schema != null) { const s = args.schema; const isZod = s && (typeof s.safeParse === "function" || typeof s.parse === "function") && s._def; body.schema = isZod ? zodToJsonSchema2(s) : args.schema; } if (args.systemPrompt != null) body.systemPrompt = args.systemPrompt; if (args.allowExternalLinks != null) body.allowExternalLinks = args.allowExternalLinks; if (args.enableWebSearch != null) body.enableWebSearch = args.enableWebSearch; if (args.showSources != null) body.showSources = args.showSources; if (args.ignoreInvalidURLs != null) body.ignoreInvalidURLs = args.ignoreInvalidURLs; if (args.scrapeOptions) { ensureValidScrapeOptions(args.scrapeOptions); body.scrapeOptions = args.scrapeOptions; } return body; } async function startExtract(http, args) { const payload = prepareExtractPayload(args); try { const res = await http.post("/v2/extract", payload); if (res.status !== 200) throwForBadResponse(res, "extract"); return res.data; } catch (err) { if (err?.isAxiosError) return normalizeAxiosError(err, "extract"); throw err; } } async function getExtractStatus(http, jobId) { try { const res = await http.get(`/v2/extract/${jobId}`); if (res.status !== 200) throwForBadResponse(res, "extract status"); return res.data; } catch (err) { if (err?.isAxiosError) return normalizeAxiosError(err, "extract status"); throw err; } } async function waitExtract(http, jobId, pollInterval = 2, timeout) { const start = Date.now(); while (true) { const status = await getExtractStatus(http, jobId); if (["completed", "failed", "cancelled"].includes(status.status || "")) return status; if (timeout != null && Date.now() - start > timeout * 1e3) return status; await new Promise((r) => setTimeout(r, Math.max(1e3, pollInterval * 1e3))); } } async function extract(http, args) { const started = await startExtract(http, args); const jobId = started.id; if (!jobId) return started; return waitExtract(http, jobId, args.pollInterval ?? 2, args.timeout); } // src/v2/methods/usage.ts async function getConcurrency(http) { try { const res = await http.get("/v2/concurrency-check"); if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "get concurrency"); const d = res.data.data || res.data; return { concurrency: d.concurrency, maxConcurrency: d.maxConcurrency ?? d.max_concurrency }; } catch (err) { if (err?.isAxiosError) return normalizeAxiosError(err, "get concurrency"); throw err; } } async function getCreditUsage(http) { try { const res = await http.get("/v2/team/credit-usage"); if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "get credit usage"); const d = res.data.data || res.data; return { remainingCredits: d.remainingCredits ?? d.remaining_credits ?? 0 }; } catch (err) { if (err?.isAxiosError) return normalizeAxiosError(err, "get credit usage"); throw err; } } async function getTokenUsage(http) { try { const res = await http.get("/v2/team/token-usage"); if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "get token usage"); return res.data.data || res.data; } catch (err) { if (err?.isAxiosError) return normalizeAxiosError(err, "get token usage"); throw err; } } // src/v2/watcher.ts import { EventEmitter } from "events"; var Watcher = class extends EventEmitter { http; jobId; kind; pollInterval; timeout; ws; closed = false; constructor(http, jobId, opts = {}) { super(); this.http = http; this.jobId = jobId; this.kind = opts.kind ?? "crawl"; this.pollInterval = opts.pollInterval ?? 2; this.timeout = opts.timeout; } buildWsUrl() { const apiUrl = this.http.getApiUrl(); const wsBase = apiUrl.replace(/^http/, "ws"); const path = this.kind === "crawl" ? `/v2/crawl/${this.jobId}` : `/v2/batch/scrape/${this.jobId}`; return `${wsBase}${path}`; } async start() { try { const url = this.buildWsUrl(); this.ws = new WebSocket(url, this.http.getApiKey()); this.attachWsHandlers(this.ws); } catch { this.pollLoop(); } } attachWsHandlers(ws) { let startTs = Date.now(); const timeoutMs = this.timeout ? this.timeout * 1e3 : void 0; ws.onmessage = (ev) => { try { const body = typeof ev.data === "string" ? JSON.parse(ev.data) : null; if (!body) return; const type = body.type; if (type === "error") { this.emit("error", { status: "failed", data: [], error: body.error, id: this.jobId }); return; } if (type === "catchup") { const payload2 = body.data || {}; this.emitDocuments(payload2.data || []); this.emitSnapshot(payload2); return; } if (type === "document") { const doc = body.data; if (doc) this.emit("document", doc); return; } if (type === "done") { this.emit("done", { status: "completed", data: [], id: this.jobId }); this.close(); return; } const payload = body.data || body; if (payload && payload.status) this.emitSnapshot(payload); } catch { } if (timeoutMs && Date.now() - startTs > timeoutMs) this.close(); }; ws.onerror = () => { this.emit("error", { status: "failed", data: [], error: "WebSocket error", id: this.jobId }); this.close(); }; ws.onclose = () => { if (!this.closed) this.pollLoop(); }; } emitDocuments(docs) { for (const doc of docs) this.emit("document", { ...doc, id: this.jobId }); } emitSnapshot(payload) { const status = payload.status; const data = payload.data || []; const snap = this.kind === "crawl" ? { status, completed: payload.completed ?? 0, total: payload.total ?? 0, creditsUsed: payload.creditsUsed, expiresAt: payload.expiresAt, next: payload.next ?? null, data } : { status, completed: payload.completed ?? 0, total: payload.total ?? 0, creditsUsed: payload.creditsUsed, expiresAt: payload.expiresAt, next: payload.next ?? null, data }; this.emit("snapshot", snap); if (["completed", "failed", "cancelled"].includes(status)) { this.emit("done", { status, data, id: this.jobId }); this.close(); } } async pollLoop() { const startTs = Date.now(); const timeoutMs = this.timeout ? this.timeout * 1e3 : void 0; while (!this.closed) { try { const snap = this.kind === "crawl" ? await getCrawlStatus(this.http, this.jobId) : await getBatchScrapeStatus(this.http, this.jobId); this.emit("snapshot", snap); if (["completed", "failed", "cancelled"].includes(snap.status)) { this.emit("done", { status: snap.status, data: snap.data, id: this.jobId }); this.close(); break; } } catch { } if (timeoutMs && Date.now() - startTs > timeoutMs) break; await new Promise((r) => setTimeout(r, Math.max(1e3, this.pollInterval * 1e3))); } } close() { this.closed = true; if (this.ws && this.ws.close) this.ws.close(); } }; // src/v2/client.ts import "zod"; var FirecrawlClient = class { http; /** * Create a v2 client. * @param options Transport configuration (API key, base URL, timeouts, retries). */ constructor(options = {}) { const apiKey = options.apiKey ?? process.env.FIRECRAWL_API_KEY ?? ""; const apiUrl = (options.apiUrl ?? process.env.FIRECRAWL_API_URL ?? "https://api.firecrawl.dev").replace(/\/$/, ""); if (!apiKey) { throw new Error("API key is required. Set FIRECRAWL_API_KEY env or pass apiKey."); } this.http = new HttpClient({ apiKey, apiUrl, timeoutMs: options.timeoutMs, maxRetries: options.maxRetries, backoffFactor: options.backoffFactor }); } async scrape(url, options) { return scrape(this.http, url, options); } // Search /** * Search the web and optionally scrape each result. * @param query Search query string. * @param req Additional search options (sources, limit, scrapeOptions, etc.). * @returns Structured search results. */ async search(query, req = {}) { return search(this.http, { query, ...req }); } // Map /** * Map a site to discover URLs (sitemap-aware). * @param url Root URL to map. * @param options Mapping options (sitemap mode, includeSubdomains, limit, timeout). * @returns Discovered links. */ async map(url, options) { return map(this.http, url, options); } // Crawl /** * Start a crawl job (async). * @param url Root URL to crawl. * @param req Crawl configuration (paths, limits, scrapeOptions, webhook, etc.). * @returns Job id and url. */ async startCrawl(url, req = {}) { return startCrawl(this.http, { url, ...req }); } /** * Get the status and partial data of a crawl job. * @param jobId Crawl job id. */ async getCrawlStatus(jobId) { return getCrawlStatus(this.http, jobId); } /** * Cancel a crawl job. * @param jobId Crawl job id. * @returns True if cancelled. */ async cancelCrawl(jobId) { return cancelCrawl(this.http, jobId); } /** * Convenience waiter: start a crawl and poll until it finishes. * @param url Root URL to crawl. * @param req Crawl configuration plus waiter controls (pollInterval, timeout seconds). * @returns Final job snapshot. */ async crawl(url, req = {}) { return crawl(this.http, { url, ...req }, req.pollInterval, req.timeout); } /** * Retrieve crawl errors and robots.txt blocks. * @param crawlId Crawl job id. */ async getCrawlErrors(crawlId) { return getCrawlErrors(this.http, crawlId); } /** * List active crawls for the authenticated team. */ async getActiveCrawls() { return getActiveCrawls(this.http); } /** * Preview normalized crawl parameters produced by a natural-language prompt. * @param url Root URL. * @param prompt Natural-language instruction. */ async crawlParamsPreview(url, prompt) { return crawlParamsPreview(this.http, url, prompt); } // Batch /** * Start a batch scrape job for multiple URLs (async). * @param urls URLs to scrape. * @param opts Batch options (scrape options, webhook, concurrency, idempotency key, etc.). * @returns Job id and url. */ async startBatchScrape(urls, opts) { return startBatchScrape(this.http, urls, opts); } /** * Get the status and partial data of a batch scrape job. * @param jobId Batch job id. */ async getBatchScrapeStatus(jobId) { return getBatchScrapeStatus(this.http, jobId); } /** * Retrieve batch scrape errors and robots.txt blocks. * @param jobId Batch job id. */ async getBatchScrapeErrors(jobId) { return getBatchScrapeErrors(this.http, jobId); } /** * Cancel a batch scrape job. * @param jobId Batch job id. * @returns True if cancelled. */ async cancelBatchScrape(jobId) { return cancelBatchScrape(this.http, jobId); } /** * Convenience waiter: start a batch scrape and poll until it finishes. * @param urls URLs to scrape. * @param opts Batch options plus waiter controls (pollInterval, timeout seconds). * @returns Final job snapshot. */ async batchScrape(urls, opts) { return batchScrape(this.http, urls, opts); } // Extract /** * Start an extract job (async). * @param args Extraction request (urls, schema or prompt, flags). * @returns Job id or processing state. */ async startExtract(args) { return startExtract(this.http, args); } /** * Get extract job status/data. * @param jobId Extract job id. */ async getExtractStatus(jobId) { return getExtractStatus(this.http, jobId); } /** * Convenience waiter: start an extract and poll until it finishes. * @param args Extraction request plus waiter controls (pollInterval, timeout seconds). * @returns Final extract response. */ async extract(args) { return extract(this.http, args); } // Usage /** Current concurrency usage. */ async getConcurrency() { return getConcurrency(this.http); } /** Current credit usage. */ async getCreditUsage() { return getCreditUsage(this.http); } /** Recent token usage. */ async getTokenUsage() { return getTokenUsage(this.http); } // Watcher /** * Create a watcher for a crawl or batch job. Emits: `document`, `snapshot`, `done`, `error`. * @param jobId Job id. * @param opts Watcher options (kind, pollInterval, timeout seconds). */ watcher(jobId, opts = {}) { return new Watcher(this.http, jobId, opts); } }; // src/v1/index.ts import axios2, { AxiosError } from "axios"; import "zod"; import { zodToJsonSchema as zodToJsonSchema3 } from "zod-to-json-schema"; // node_modules/typescript-event-target/dist/index.mjs var e = class extends EventTarget { dispatchTypedEvent(s, t) { return super.dispatchEvent(t); } }; // src/v1/index.ts var FirecrawlError = class extends Error { statusCode; details; constructor(message, statusCode, details) { super(message); this.statusCode = statusCode; this.details = details; } }; var FirecrawlApp = class { apiKey; apiUrl; version = "1.25.1"; isCloudService(url) { return url.includes("api.firecrawl.dev"); } async getVersion() { try { if (typeof process !== "undefined" && process.env && process.env.npm_package_version) { return process.env.npm_package_version; } const packageJson = await import("./package-2MDJVY6K.js"); return packageJson.default.version; } catch (error) { const isTest = typeof process !== "undefined" && (process.env.JEST_WORKER_ID != null || false); if (!isTest) { console.error("Error getting version:", error); } return "1.25.1"; } } async init() { this.version = await this.getVersion(); } /** * Initializes a new instance of the FirecrawlApp class. * @param config - Configuration options for the FirecrawlApp instance. */ constructor({ apiKey = null, apiUrl = null }) { const baseUrl = apiUrl || "https://api.firecrawl.dev"; if (this.isCloudService(baseUrl) && typeof apiKey !== "string") { throw new FirecrawlError("No API key provided", 401); } this.apiKey = apiKey || ""; this.apiUrl = baseUrl; this.init(); } /** * Scrapes a URL using the Firecrawl API. * @param url - The URL to scrape. * @param params - Additional parameters for the scrape request. * @returns The response from the scrape operation. */ async scrapeUrl(url, params) { const headers = { "Content-Type": "application/json", Authorization: `Bearer ${this.apiKey}` }; let jsonData = { url, ...params, origin: `js-sdk@${this.version}` }; if (jsonData?.extract?.schema) { let schema = jsonData.extract.schema; try { schema = zodToJsonSchema3(schema); } catch (error) { } jsonData = { ...jsonData, extract: { ...jsonData.extract, schema } }; } if (jsonData?.jsonOptions?.schema) { let schema = jsonData.jsonOptions.schema; try { schema = zodToJsonSchema3(schema); } catch (error) { } jsonData = { ...jsonData, jsonOptions: { ...jsonData.jsonOptions, schema } }; } try { const response = await axios2.post( this.apiUrl + `/v1/scrape`, jsonData, { headers, timeout: params?.timeout !== void 0 ? params.timeout + 5e3 : void 0 } ); if (response.status === 200) { const responseData = response.data; if (responseData.success) { return { success: true, warning: responseData.warning, error: responseData.error, ...responseData.data }; } else { throw new FirecrawlError(`Failed to scrape URL. Error: ${responseData.error}`, response.status); } } else { this.handleError(response, "scrape URL"); } } catch (error) { this.handleError(error.response, "scrape URL"); } return { success: false, error: "Internal server error." }; } /** * Searches using the Firecrawl API and optionally scrapes the results. * @param query - The search query string. * @param params - Optional parameters for the search request. * @returns The response from the search operation. */ async search(query, params) { const headers = { "Content-Type": "application/json", Authorization: `Bearer ${this.apiKey}` }; let jsonData = { query, limit: params?.limit ?? 5, tbs: params?.tbs, filter: params?.filter, lang: params?.lang ?? "en", country: params?.country ?? "us", location: params?.location, origin: `js-sdk@${this.version}`, timeout: params?.timeout ?? 6e4, scrapeOptions: params?.scrapeOptions ?? { formats: [] } }; if (jsonData?.scrapeOptions?.extract?.schema) { let schema = jsonData.scrapeOptions.extract.schema; try { schema = zodToJsonSchema3(schema); } catch (error) { } jsonData = { ...jsonData, scrapeOptions: { ...jsonData.scrapeOptions, extract: { ...jsonData.scrapeOptions.extract, schema } } }; } try { const response = await this.postRequest( this.apiUrl + `/v1/search`, jsonData, headers ); if (response.status === 200) { const responseData = response.data; if (responseData.success) { return { success: true, data: responseData.data, warning: responseData.warning }; } else { throw new FirecrawlError(`Failed to search. Error: ${responseData.error}`, response.status); } } else { this.handleError(response, "search"); } } catch (error) { if (error.response?.data?.error) { throw new FirecrawlError(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ""}`, error.response.status); } else { throw new FirecrawlError(error.message, 500); } } return { success: false, error: "Internal server error.", data: [] }; } /** * Initiates a crawl job for a URL using the Firecrawl API. * @param url - The URL to crawl. * @param params - Additional parameters for the crawl request. * @param pollInterval - Time in seconds for job status checks. * @param idempotencyKey - Optional idempotency key for the request. * @returns The response from the crawl operation. */ async crawlUrl(url, params, pollInterval = 2, idempotencyKey) { const headers = this.prepareHeaders(idempotencyKey); let jsonData = { url, ...params, origin: `js-sdk@${this.version}` }; try { const response = await this.postRequest( this.apiUrl + `/v1/crawl`, jsonData, headers ); if (response.status === 200) { const id = response.data.id; return this.monitorJobStatus(id, headers, pollInterval); } else { this.handleError(response, "start crawl job"); } } catch (error) { if (error.response?.data?.error) { throw new FirecrawlError(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ""}`, error.response.status); } else { throw new FirecrawlError(error.message, 500); } } return { success: false, error: "Internal server error." }; } async asyncCrawlUrl(url, params, idempotencyKey) { const headers = this.prepareHeaders(idempotencyKey); let jsonData = { url, ...params, origin: `js-sdk@${this.version}` }; try { const response = await this.postRequest( this.apiUrl + `/v1/crawl`, jsonData, headers ); if (response.status === 200) { return response.data; } else { this.handleError(response, "start crawl job"); } } catch (error) { if (error.response?.data?.error) { throw new FirecrawlError(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ""}`, error.response.status); } else { throw new FirecrawlError(error.message, 500); } } return { success: false, error: "Internal server error." }; } /** * Checks the status of a crawl job using the Firecrawl API. * @param id - The ID of the crawl operation. * @param getAllData - Paginate through all the pages of documents, returning the full list of all documents. (default: `false`) * @param nextURL - The `next` URL from the previous crawl status. Only required if you're not manually increasing `skip`. Only used when `getAllData = false`. * @param skip - How many entries to skip to paginate. Only required if you're not providing `nextURL`. Only used when `getAllData = false`. * @param limit - How many entries to return. Only used when `getAllData = false`. * @returns The response containing the job status. */ async checkCrawlStatus(id, getAllData = false, nextURL, skip, limit) { if (!id) { throw new FirecrawlError("No crawl ID provided", 400); } const headers = this.prepareHeaders(); const targetURL = new URL(nextURL ?? `${this.apiUrl}/v1/crawl/${id}`); if (skip !== void 0) { targetURL.searchParams.set("skip", skip.toString()); } if (limit !== void 0) { targetURL.searchParams.set("limit", limit.toString()); } try { const response = await this.getRequest( targetURL.href, headers ); if (response.status === 200) { let allData = response.data.data; if (getAllData && response.data.status === "completed") { let statusData = response.data; if ("data" in statusData) { let data = statusData.data; while (typeof statusData === "object" && "next" in statusData) { if (data.length === 0) { break; } statusData = (await this.getRequest(statusData.next, headers)).data; data = data.concat(statusData.data); } allData = data; } } let resp = { success: response.data.success, status: response.data.status, total: response.data.total, completed: response.data.completed, creditsUsed: response.data.creditsUsed, next: getAllData ? void 0 : response.data.next, expiresAt: new Date(response.data.expiresAt), data: allData }; if (!response.data.success && response.data.error) { resp = { ...resp, success: false, error: response.data.error }; } if (response.data.next) { resp.next = response.data.next; } return resp; } else { this.handleError(response, "check crawl status"); } } catch (error) { throw new FirecrawlError(error.message, 500); } return { success: false, error: "Internal server error." }; } /** * Returns information about crawl errors. * @param id - The ID of the crawl operation. * @returns Information about crawl errors. */ async checkCrawlErrors(id) { const headers = this.prepareHeaders(); try { const response = await this.deleteRequest( `${this.apiUrl}/v1/crawl/${id}/errors`, headers ); if (response.status === 200) { return response.data; } else { this.handleError(response, "check crawl errors"); } } catch (error) { throw new FirecrawlError(error.message, 500); } return { success: false, error: "Internal server error." }; } /** * Cancels a crawl job using the Firecrawl API. * @param id - The ID of the crawl operation. * @returns The response from the cancel crawl operation. */ async cancelCrawl(id) { const headers = this.prepareHeaders(); try { const response = await this.deleteRequest( `${this.apiUrl}/v1/crawl/${id}`, headers ); if (response.status === 200) { return response.data; } else { this.handleError(response, "cancel crawl job"); } } catch (error) { throw new FirecrawlError(error.message, 500); } return { success: false, error: "Internal server error." }; } /** * Initiates a crawl job and returns a CrawlWatcher to monitor the job via WebSocket. * @param url - The URL to crawl. * @param params - Additional parameters for the crawl request. * @param idempotencyKey - Optional idempotency key for the request. * @returns A CrawlWatcher instance to monitor the crawl job. */ async crawlUrlAndWatch(url, params, idempotencyKey) { const crawl2 = await this.asyncCrawlUrl(url, params, idempotencyKey); if (crawl2.success && crawl2.id) { const id = crawl2.id; return new CrawlWatcher(id, this); } throw new FirecrawlError("Crawl job failed to start", 400); } /** * Maps a URL using the Firecrawl API. * @param url - The URL to map. * @param params - Additional parameters for the map request. * @returns The response from the map operation. */ async mapUrl(url, params) { const headers = this.prepareHeaders(); let jsonData = { url, ...params, origin: `js-sdk@${this.version}` }; try { const response = await this.postRequest( this.apiUrl + `/v1/map`, jsonData, headers ); if (response.status === 200) { return response.data; } else { this.handleError(response, "map"); } } catch (error) { throw new FirecrawlError(error.message, 500); } return { success: false, error: "Internal server error." }; } /** * Initiates a batch scrape job for multiple URLs using the Firecrawl API. * @param url - The URLs to scrape. * @param params - Additional parameters for the scrape request. * @param pollInterval - Time in seconds for job status checks. * @param idempotencyKey - Optional idempotency key for the request. * @param webhook - Optional webhook for the batch scrape. * @param ignoreInvalidURLs - Optional flag to ignore invalid URLs. * @returns The response from the crawl operation. */ async batchScrapeUrls(urls, params, pollInterval = 2, idempotencyKey, webhook, ignoreInvalidURLs, maxConcurrency) { const headers = this.prepareHeaders(idempotencyKey); let jsonData = { urls, webhook, ignoreInvalidURLs, maxConcurrency, ...params, origin: `js-sdk@${this.version}` }; if (jsonData?.extract?.schema) { let schema = jsonData.extract.schema; try { schema = zodToJsonSchema3(schema); } catch (error) { } jsonData = { ...jsonData, extract: { ...jsonData.extract, schema } }; } if (jsonData?.jsonOptions?.schema) { let schema = jsonData.jsonOptions.schema; try { schema = zodToJsonSchema3(schema); } catch (error) { } jsonData = { ...jsonData, jsonOptions: { ...jsonData.jsonOptions, schema } }; } try { const response = await this.postRequest( this.apiUrl + `/v1/batch/scrape`, jsonData, headers ); if (response.status === 200) { const id = response.data.id; return this.monitorJobStatus(id, headers, pollInterval); } else { this.handleError(response, "start batch scrape job"); } } catch (error) { if (error.response?.data?.error) { throw new FirecrawlError(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ""}`, error.response.status); } else { throw new FirecrawlError(error.message, 500); } } return { success: false, error: "Internal server error." }; } async asyncBatchScrapeUrls(urls, params, idempotencyKey, webhook, ignoreInvalidURLs) { const headers = this.prepareHeaders(idempotencyKey); let jsonData = { urls, webhook, ignoreInvalidURLs, ...params, origin: `js-sdk@${this.version}` }; try { const response = await this.postRequest( this.apiUrl + `/v1/batch/scrape`, jsonData, headers ); if (response.status === 200) { return response.data; } else { this.handleError(response, "start batch scrape job"); } } catch (error) { if (error.response?.data?.error) { throw new FirecrawlError(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ""}`, error.response.status); } else { throw new FirecrawlError(error.message, 500); } } return { success: false, error: "Internal server error." }; } /** * Initiates a batch scrape job and returns a CrawlWatcher to monitor the job via WebSocket. * @param urls - The URL to scrape. * @param params - Additional parameters for the scrape request. * @param idempotencyKey - Optional idempotency key for the request. * @returns A CrawlWatcher instance to monitor the crawl job. */ async batchScrapeUrlsAndWatch(urls, params, idempotencyKey, webhook, ignoreInvalidURLs) { const crawl2 = await this.asyncBatchScrapeUrls(urls, params, idempotencyKey, webhook, ignoreInvalidURLs); if (crawl2.success && crawl2.id) { const id = crawl2.id; return new CrawlWatcher(id, this); } throw new FirecrawlError("Batch scrape job failed to start", 400); } /** * Checks the status of a batch scrape job using the Firecrawl API. * @param id - The ID of the batch scrape operation. * @param getAllData - Paginate through all the pages of documents, returning the full list of all documents. (default: `false`) * @param nextURL - The `next` URL from the previous batch scrape status. Only required if you're not manually increasing `skip`. Only used when `getAllData = false`. * @param skip - How many entries to skip to paginate. Only used when `getAllData = false`. * @param limit - How many entries to return. Only used when `getAllData = false`. * @returns The response containing the job status. */ async checkBatchScrapeStatus(id, getAllData = false, nextURL, skip, limit) { if (!id) { throw new FirecrawlError("No batch scrape ID provided", 400); } const headers = this.prepareHeaders(); const targetURL = new URL(nextURL ?? `${this.apiUrl}/v1/batch/scrape/${id}`); if (skip !== void 0) { targetURL.searchParams.set("skip", skip.toString()); } if (limit !== void 0) { targetURL.searchParams.set("limit", limit.toString()); } try { const response = await this.getRequest( targetURL.href, headers ); if (response.status === 200) { let allData = response.data.data; if (getAllData && response.data.status === "completed") { let statusData = response.data; if ("data" in statusData) { let data = statusData.data; while (typeof statusData === "object" && "next" in statusData) { if (data