firecrawl
Version:
JavaScript SDK for Firecrawl API
1,495 lines (1,475 loc) • 98.1 kB
JavaScript
import {
require_package
} from "./chunk-GY35KXDS.js";
// src/v2/utils/httpClient.ts
import axios from "axios";
// src/v2/utils/getVersion.ts
function getVersion() {
try {
if (typeof process !== "undefined" && process.env && process.env.npm_package_version) {
return process.env.npm_package_version;
}
const pkg = require_package();
return pkg?.version || "3.x.x";
} catch {
return "3.x.x";
}
}
// src/v2/utils/httpClient.ts
var HttpClient = class {
instance;
apiKey;
apiUrl;
maxRetries;
backoffFactor;
constructor(options) {
this.apiKey = options.apiKey;
this.apiUrl = options.apiUrl.replace(/\/$/, "");
this.maxRetries = options.maxRetries ?? 3;
this.backoffFactor = options.backoffFactor ?? 0.5;
this.instance = axios.create({
baseURL: this.apiUrl,
timeout: options.timeoutMs ?? 3e5,
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${this.apiKey}`
},
transitional: { clarifyTimeoutError: true }
});
}
getApiUrl() {
return this.apiUrl;
}
getApiKey() {
return this.apiKey;
}
async request(config) {
const version = getVersion();
config.headers = {
...config.headers || {}
};
let lastError;
for (let attempt = 0; attempt < this.maxRetries; attempt++) {
try {
const cfg = { ...config };
if (cfg.method && ["post", "put", "patch"].includes(cfg.method.toLowerCase())) {
const data = cfg.data ?? {};
cfg.data = { ...data, origin: typeof data.origin === "string" && data.origin.includes("mcp") ? data.origin : `js-sdk@${version}` };
if (typeof data.timeout === "number") {
cfg.timeout = data.timeout + 5e3;
}
}
const res = await this.instance.request(cfg);
if (res.status === 502 && attempt < this.maxRetries - 1) {
await this.sleep(this.backoffFactor * Math.pow(2, attempt));
continue;
}
return res;
} catch (err) {
lastError = err;
const status = err?.response?.status;
if (status === 502 && attempt < this.maxRetries - 1) {
await this.sleep(this.backoffFactor * Math.pow(2, attempt));
continue;
}
throw err;
}
}
throw lastError ?? new Error("Unexpected HTTP client error");
}
sleep(seconds) {
return new Promise((r) => setTimeout(r, seconds * 1e3));
}
post(endpoint, body, headers) {
return this.request({ method: "post", url: endpoint, data: body, headers });
}
get(endpoint, headers) {
return this.request({ method: "get", url: endpoint, headers });
}
delete(endpoint, headers) {
return this.request({ method: "delete", url: endpoint, headers });
}
prepareHeaders(idempotencyKey) {
const headers = {};
if (idempotencyKey) headers["x-idempotency-key"] = idempotencyKey;
return headers;
}
};
// src/v2/types.ts
var SdkError = class extends Error {
status;
code;
details;
jobId;
constructor(message, status, code, details, jobId) {
super(message);
this.name = "FirecrawlSdkError";
this.status = status;
this.code = code;
this.details = details;
this.jobId = jobId;
}
};
var JobTimeoutError = class extends SdkError {
timeoutSeconds;
constructor(jobId, timeoutSeconds, jobType = "batch") {
const jobTypeLabel = jobType === "batch" ? "batch scrape" : "crawl";
super(
`${jobTypeLabel.charAt(0).toUpperCase() + jobTypeLabel.slice(1)} job ${jobId} did not complete within ${timeoutSeconds} seconds`,
void 0,
"JOB_TIMEOUT",
void 0,
jobId
);
this.name = "JobTimeoutError";
this.timeoutSeconds = timeoutSeconds;
}
};
// src/v2/utils/validation.ts
import { zodToJsonSchema } from "zod-to-json-schema";
function ensureValidFormats(formats) {
if (!formats) return;
for (const fmt of formats) {
if (typeof fmt === "string") {
if (fmt === "json") {
throw new Error("json format must be an object with { type: 'json', prompt, schema }");
}
continue;
}
if (fmt.type === "json") {
const j = fmt;
if (!j.prompt && !j.schema) {
throw new Error("json format requires either 'prompt' or 'schema' (or both)");
}
const maybeSchema = j.schema;
const isZod = !!maybeSchema && (typeof maybeSchema.safeParse === "function" || typeof maybeSchema.parse === "function") && !!maybeSchema._def;
if (isZod) {
try {
j.schema = zodToJsonSchema(maybeSchema);
} catch {
}
}
continue;
}
if (fmt.type === "changeTracking") {
const ct = fmt;
const maybeSchema = ct.schema;
const isZod = !!maybeSchema && (typeof maybeSchema.safeParse === "function" || typeof maybeSchema.parse === "function") && !!maybeSchema._def;
if (isZod) {
try {
ct.schema = zodToJsonSchema(maybeSchema);
} catch {
}
}
continue;
}
if (fmt.type === "screenshot") {
const s = fmt;
if (s.quality != null && (typeof s.quality !== "number" || s.quality < 0)) {
throw new Error("screenshot.quality must be a non-negative number");
}
}
}
}
function ensureValidScrapeOptions(options) {
if (!options) return;
if (options.timeout != null && options.timeout <= 0) {
throw new Error("timeout must be positive");
}
if (options.waitFor != null && options.waitFor < 0) {
throw new Error("waitFor must be non-negative");
}
ensureValidFormats(options.formats);
}
// src/v2/utils/errorHandler.ts
import "axios";
function throwForBadResponse(resp, action) {
const status = resp.status;
const body = resp.data || {};
const msg = body?.error || body?.message || `Request failed (${status}) while trying to ${action}`;
throw new SdkError(msg, status, void 0, body?.details);
}
function normalizeAxiosError(err, action) {
const status = err.response?.status;
const body = err.response?.data;
const message = body?.error || err.message || `Request failed${status ? ` (${status})` : ""} while trying to ${action}`;
const code = body?.code || err.code;
throw new SdkError(message, status, code, body?.details ?? body);
}
function isRetryableError(err) {
if (err instanceof JobTimeoutError) {
return false;
}
if (err instanceof SdkError || err && typeof err === "object" && "status" in err) {
const status = err.status;
if (status && status >= 400 && status < 500) {
return false;
}
if (status && status >= 500) {
return true;
}
}
if (err?.isAxiosError && !err.response) {
return true;
}
if (err?.code === "ECONNABORTED" || err?.message?.includes("timeout")) {
return true;
}
return true;
}
// src/v2/methods/scrape.ts
async function scrape(http, url, options) {
if (!url || !url.trim()) {
throw new Error("URL cannot be empty");
}
if (options) ensureValidScrapeOptions(options);
const payload = { url: url.trim() };
if (options) Object.assign(payload, options);
try {
const res = await http.post("/v2/scrape", payload);
if (res.status !== 200 || !res.data?.success) {
throwForBadResponse(res, "scrape");
}
return res.data.data || {};
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "scrape");
throw err;
}
}
// src/v2/methods/search.ts
function prepareSearchPayload(req) {
if (!req.query || !req.query.trim()) throw new Error("Query cannot be empty");
if (req.limit != null && req.limit <= 0) throw new Error("limit must be positive");
if (req.timeout != null && req.timeout <= 0) throw new Error("timeout must be positive");
const payload = {
query: req.query
};
if (req.sources) payload.sources = req.sources;
if (req.categories) payload.categories = req.categories;
if (req.limit != null) payload.limit = req.limit;
if (req.tbs != null) payload.tbs = req.tbs;
if (req.location != null) payload.location = req.location;
if (req.ignoreInvalidURLs != null) payload.ignoreInvalidURLs = req.ignoreInvalidURLs;
if (req.timeout != null) payload.timeout = req.timeout;
if (req.integration && req.integration.trim()) payload.integration = req.integration.trim();
if (req.scrapeOptions) {
ensureValidScrapeOptions(req.scrapeOptions);
payload.scrapeOptions = req.scrapeOptions;
}
return payload;
}
function transformArray(arr) {
const results = [];
for (const item of arr) {
if (item && typeof item === "object") {
if ("markdown" in item || "html" in item || "rawHtml" in item || "links" in item || "screenshot" in item || "changeTracking" in item || "summary" in item || "json" in item) {
results.push(item);
} else {
results.push(item);
}
} else {
results.push({ url: item });
}
}
return results;
}
async function search(http, request) {
const payload = prepareSearchPayload(request);
try {
const res = await http.post("/v2/search", payload);
if (res.status !== 200 || !res.data?.success) {
throwForBadResponse(res, "search");
}
const data = res.data.data || {};
const out = {};
if (data.web) out.web = transformArray(data.web);
if (data.news) out.news = transformArray(data.news);
if (data.images) out.images = transformArray(data.images);
return out;
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "search");
throw err;
}
}
// src/v2/methods/map.ts
function prepareMapPayload(url, options) {
if (!url || !url.trim()) throw new Error("URL cannot be empty");
const payload = { url: url.trim() };
if (options) {
if (options.sitemap != null) payload.sitemap = options.sitemap;
if (options.search != null) payload.search = options.search;
if (options.includeSubdomains != null) payload.includeSubdomains = options.includeSubdomains;
if (options.ignoreQueryParameters != null) payload.ignoreQueryParameters = options.ignoreQueryParameters;
if (options.limit != null) payload.limit = options.limit;
if (options.timeout != null) payload.timeout = options.timeout;
if (options.integration != null && options.integration.trim()) payload.integration = options.integration.trim();
if (options.location != null) payload.location = options.location;
}
return payload;
}
async function map(http, url, options) {
const payload = prepareMapPayload(url, options);
try {
const res = await http.post("/v2/map", payload);
if (res.status !== 200 || !res.data?.success) {
throwForBadResponse(res, "map");
}
const linksIn = res.data.links || [];
const links = [];
for (const item of linksIn) {
if (typeof item === "string") links.push({ url: item });
else if (item && typeof item === "object") links.push({ url: item.url, title: item.title, description: item.description });
}
return { links };
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "map");
throw err;
}
}
// src/v2/utils/pagination.ts
async function fetchAllPages(http, nextUrl, initial, pagination) {
const docs = initial.slice();
let current = nextUrl;
let pageCount = 0;
const maxPages = pagination?.maxPages ?? void 0;
const maxResults = pagination?.maxResults ?? void 0;
const maxWaitTime = pagination?.maxWaitTime ?? void 0;
const started = Date.now();
while (current) {
if (maxPages != null && pageCount >= maxPages) break;
if (maxWaitTime != null && (Date.now() - started) / 1e3 > maxWaitTime) break;
let payload = null;
try {
const res = await http.get(current);
payload = res.data;
} catch {
break;
}
if (!payload?.success) break;
for (const d of payload.data || []) {
if (maxResults != null && docs.length >= maxResults) break;
docs.push(d);
}
if (maxResults != null && docs.length >= maxResults) break;
current = payload.next ?? null;
pageCount += 1;
}
return docs;
}
// src/v2/methods/crawl.ts
function prepareCrawlPayload(request) {
if (!request.url || !request.url.trim()) throw new Error("URL cannot be empty");
const data = { url: request.url.trim() };
if (request.prompt) data.prompt = request.prompt;
if (request.excludePaths) data.excludePaths = request.excludePaths;
if (request.includePaths) data.includePaths = request.includePaths;
if (request.maxDiscoveryDepth != null) data.maxDiscoveryDepth = request.maxDiscoveryDepth;
if (request.sitemap != null) data.sitemap = request.sitemap;
if (request.ignoreQueryParameters != null) data.ignoreQueryParameters = request.ignoreQueryParameters;
if (request.limit != null) data.limit = request.limit;
if (request.crawlEntireDomain != null) data.crawlEntireDomain = request.crawlEntireDomain;
if (request.allowExternalLinks != null) data.allowExternalLinks = request.allowExternalLinks;
if (request.allowSubdomains != null) data.allowSubdomains = request.allowSubdomains;
if (request.delay != null) data.delay = request.delay;
if (request.maxConcurrency != null) data.maxConcurrency = request.maxConcurrency;
if (request.webhook != null) data.webhook = request.webhook;
if (request.integration != null && request.integration.trim()) data.integration = request.integration.trim();
if (request.scrapeOptions) {
ensureValidScrapeOptions(request.scrapeOptions);
data.scrapeOptions = request.scrapeOptions;
}
if (request.zeroDataRetention != null) data.zeroDataRetention = request.zeroDataRetention;
return data;
}
async function startCrawl(http, request) {
const payload = prepareCrawlPayload(request);
try {
const res = await http.post("/v2/crawl", payload);
if (res.status !== 200 || !res.data?.success) {
throwForBadResponse(res, "start crawl");
}
return { id: res.data.id, url: res.data.url };
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "start crawl");
throw err;
}
}
async function getCrawlStatus(http, jobId, pagination) {
try {
const res = await http.get(`/v2/crawl/${jobId}`);
if (res.status !== 200 || !res.data?.success) {
throwForBadResponse(res, "get crawl status");
}
const body = res.data;
const initialDocs = body.data || [];
const auto = pagination?.autoPaginate ?? true;
if (!auto || !body.next) {
return {
id: jobId,
status: body.status,
completed: body.completed ?? 0,
total: body.total ?? 0,
creditsUsed: body.creditsUsed,
expiresAt: body.expiresAt,
next: body.next ?? null,
data: initialDocs
};
}
const aggregated = await fetchAllPages(http, body.next, initialDocs, pagination);
return {
id: jobId,
status: body.status,
completed: body.completed ?? 0,
total: body.total ?? 0,
creditsUsed: body.creditsUsed,
expiresAt: body.expiresAt,
next: null,
data: aggregated
};
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "get crawl status");
throw err;
}
}
async function cancelCrawl(http, jobId) {
try {
const res = await http.delete(`/v2/crawl/${jobId}`);
if (res.status !== 200) throwForBadResponse(res, "cancel crawl");
return res.data?.status === "cancelled";
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "cancel crawl");
throw err;
}
}
async function waitForCrawlCompletion(http, jobId, pollInterval = 2, timeout) {
const start = Date.now();
while (true) {
try {
const status = await getCrawlStatus(http, jobId);
if (["completed", "failed", "cancelled"].includes(status.status)) {
return status;
}
} catch (err) {
if (!isRetryableError(err)) {
if (err instanceof SdkError) {
const errorWithJobId = new SdkError(
err.message,
err.status,
err.code,
err.details,
jobId
);
throw errorWithJobId;
}
throw err;
}
}
if (timeout != null && Date.now() - start > timeout * 1e3) {
throw new JobTimeoutError(jobId, timeout, "crawl");
}
await new Promise((r) => setTimeout(r, Math.max(1e3, pollInterval * 1e3)));
}
}
async function crawl(http, request, pollInterval = 2, timeout) {
const started = await startCrawl(http, request);
return waitForCrawlCompletion(http, started.id, pollInterval, timeout);
}
async function getCrawlErrors(http, crawlId) {
try {
const res = await http.get(`/v2/crawl/${crawlId}/errors`);
if (res.status !== 200) throwForBadResponse(res, "get crawl errors");
const payload = res.data?.data ?? res.data;
return { errors: payload.errors || [], robotsBlocked: payload.robotsBlocked || [] };
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "get crawl errors");
throw err;
}
}
async function getActiveCrawls(http) {
try {
const res = await http.get(`/v2/crawl/active`);
if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "get active crawls");
const crawlsIn = res.data?.crawls || [];
const crawls = crawlsIn.map((c) => ({ id: c.id, teamId: c.teamId ?? c.team_id, url: c.url, options: c.options ?? null }));
return { success: true, crawls };
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "get active crawls");
throw err;
}
}
async function crawlParamsPreview(http, url, prompt) {
if (!url || !url.trim()) throw new Error("URL cannot be empty");
if (!prompt || !prompt.trim()) throw new Error("Prompt cannot be empty");
try {
const res = await http.post("/v2/crawl/params-preview", { url: url.trim(), prompt });
if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "crawl params preview");
const data = res.data.data || {};
if (res.data.warning) data.warning = res.data.warning;
return data;
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "crawl params preview");
throw err;
}
}
// src/v2/methods/batch.ts
async function startBatchScrape(http, urls, {
options,
webhook,
appendToId,
ignoreInvalidURLs,
maxConcurrency,
zeroDataRetention,
idempotencyKey,
integration
} = {}) {
if (!Array.isArray(urls) || urls.length === 0) throw new Error("URLs list cannot be empty");
const payload = { urls };
if (options) {
ensureValidScrapeOptions(options);
Object.assign(payload, options);
}
if (webhook != null) payload.webhook = webhook;
if (appendToId != null) payload.appendToId = appendToId;
if (ignoreInvalidURLs != null) payload.ignoreInvalidURLs = ignoreInvalidURLs;
if (maxConcurrency != null) payload.maxConcurrency = maxConcurrency;
if (zeroDataRetention != null) payload.zeroDataRetention = zeroDataRetention;
if (integration != null && integration.trim()) payload.integration = integration.trim();
try {
const headers = http.prepareHeaders(idempotencyKey);
const res = await http.post("/v2/batch/scrape", payload, headers);
if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "start batch scrape");
return { id: res.data.id, url: res.data.url, invalidURLs: res.data.invalidURLs || void 0 };
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "start batch scrape");
throw err;
}
}
async function getBatchScrapeStatus(http, jobId, pagination) {
try {
const res = await http.get(`/v2/batch/scrape/${jobId}`);
if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "get batch scrape status");
const body = res.data;
const initialDocs = body.data || [];
const auto = pagination?.autoPaginate ?? true;
if (!auto || !body.next) {
return {
id: jobId,
status: body.status,
completed: body.completed ?? 0,
total: body.total ?? 0,
creditsUsed: body.creditsUsed,
expiresAt: body.expiresAt,
next: body.next ?? null,
data: initialDocs
};
}
const aggregated = await fetchAllPages(http, body.next, initialDocs, pagination);
return {
id: jobId,
status: body.status,
completed: body.completed ?? 0,
total: body.total ?? 0,
creditsUsed: body.creditsUsed,
expiresAt: body.expiresAt,
next: null,
data: aggregated
};
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "get batch scrape status");
throw err;
}
}
async function cancelBatchScrape(http, jobId) {
try {
const res = await http.delete(`/v2/batch/scrape/${jobId}`);
if (res.status !== 200) throwForBadResponse(res, "cancel batch scrape");
return res.data?.status === "cancelled";
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "cancel batch scrape");
throw err;
}
}
async function getBatchScrapeErrors(http, jobId) {
try {
const res = await http.get(`/v2/batch/scrape/${jobId}/errors`);
if (res.status !== 200) throwForBadResponse(res, "get batch scrape errors");
const payload = res.data?.data ?? res.data;
return { errors: payload.errors || [], robotsBlocked: payload.robotsBlocked || [] };
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "get batch scrape errors");
throw err;
}
}
async function waitForBatchCompletion(http, jobId, pollInterval = 2, timeout) {
const start = Date.now();
while (true) {
try {
const status = await getBatchScrapeStatus(http, jobId);
if (["completed", "failed", "cancelled"].includes(status.status)) {
return status;
}
} catch (err) {
if (!isRetryableError(err)) {
if (err instanceof SdkError) {
const errorWithJobId = new SdkError(
err.message,
err.status,
err.code,
err.details,
jobId
);
throw errorWithJobId;
}
throw err;
}
}
if (timeout != null && Date.now() - start > timeout * 1e3) {
throw new JobTimeoutError(jobId, timeout, "batch");
}
await new Promise((r) => setTimeout(r, Math.max(1e3, pollInterval * 1e3)));
}
}
async function batchScrape(http, urls, opts = {}) {
const start = await startBatchScrape(http, urls, opts);
return waitForBatchCompletion(http, start.id, opts.pollInterval ?? 2, opts.timeout);
}
// src/v2/methods/extract.ts
import { zodToJsonSchema as zodToJsonSchema2 } from "zod-to-json-schema";
function prepareExtractPayload(args) {
const body = {};
if (args.urls) body.urls = args.urls;
if (args.prompt != null) body.prompt = args.prompt;
if (args.schema != null) {
const s = args.schema;
const isZod = s && (typeof s.safeParse === "function" || typeof s.parse === "function") && s._def;
body.schema = isZod ? zodToJsonSchema2(s) : args.schema;
}
if (args.systemPrompt != null) body.systemPrompt = args.systemPrompt;
if (args.allowExternalLinks != null) body.allowExternalLinks = args.allowExternalLinks;
if (args.enableWebSearch != null) body.enableWebSearch = args.enableWebSearch;
if (args.showSources != null) body.showSources = args.showSources;
if (args.ignoreInvalidURLs != null) body.ignoreInvalidURLs = args.ignoreInvalidURLs;
if (args.integration && args.integration.trim()) body.integration = args.integration.trim();
if (args.agent) body.agent = args.agent;
if (args.scrapeOptions) {
ensureValidScrapeOptions(args.scrapeOptions);
body.scrapeOptions = args.scrapeOptions;
}
return body;
}
async function startExtract(http, args) {
const payload = prepareExtractPayload(args);
try {
const res = await http.post("/v2/extract", payload);
if (res.status !== 200) throwForBadResponse(res, "extract");
return res.data;
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "extract");
throw err;
}
}
async function getExtractStatus(http, jobId) {
try {
const res = await http.get(`/v2/extract/${jobId}`);
if (res.status !== 200) throwForBadResponse(res, "extract status");
return res.data;
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "extract status");
throw err;
}
}
async function waitExtract(http, jobId, pollInterval = 2, timeout) {
const start = Date.now();
while (true) {
const status = await getExtractStatus(http, jobId);
if (["completed", "failed", "cancelled"].includes(status.status || "")) return status;
if (timeout != null && Date.now() - start > timeout * 1e3) return status;
await new Promise((r) => setTimeout(r, Math.max(1e3, pollInterval * 1e3)));
}
}
async function extract(http, args) {
const started = await startExtract(http, args);
const jobId = started.id;
if (!jobId) return started;
return waitExtract(http, jobId, args.pollInterval ?? 2, args.timeout);
}
// src/v2/methods/agent.ts
import { zodToJsonSchema as zodToJsonSchema3 } from "zod-to-json-schema";
function prepareAgentPayload(args) {
const body = {};
if (args.urls) body.urls = args.urls;
body.prompt = args.prompt;
if (args.schema != null) {
const s = args.schema;
const isZod = s && (typeof s.safeParse === "function" || typeof s.parse === "function") && s._def;
body.schema = isZod ? zodToJsonSchema3(s) : args.schema;
}
if (args.integration && args.integration.trim()) body.integration = args.integration.trim();
if (args.maxCredits !== null && args.maxCredits !== void 0) body.maxCredits = args.maxCredits;
if (args.strictConstrainToURLs !== null && args.strictConstrainToURLs !== void 0) body.strictConstrainToURLs = args.strictConstrainToURLs;
return body;
}
async function startAgent(http, args) {
const payload = prepareAgentPayload(args);
try {
const res = await http.post("/v2/agent", payload);
if (res.status !== 200) throwForBadResponse(res, "agent");
return res.data;
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "agent");
throw err;
}
}
async function getAgentStatus(http, jobId) {
try {
const res = await http.get(`/v2/agent/${jobId}`);
if (res.status !== 200) throwForBadResponse(res, "agent status");
return res.data;
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "agent status");
throw err;
}
}
async function waitAgent(http, jobId, pollInterval = 2, timeout) {
const start = Date.now();
while (true) {
const status = await getAgentStatus(http, jobId);
if (["completed", "failed", "cancelled"].includes(status.status || "")) return status;
if (timeout != null && Date.now() - start > timeout * 1e3) return status;
await new Promise((r) => setTimeout(r, Math.max(1e3, pollInterval * 1e3)));
}
}
async function agent(http, args) {
const started = await startAgent(http, args);
const jobId = started.id;
if (!jobId) return started;
return waitAgent(http, jobId, args.pollInterval ?? 2, args.timeout);
}
async function cancelAgent(http, jobId) {
try {
const res = await http.delete(`/v2/agent/${jobId}`);
if (res.status !== 200) throwForBadResponse(res, "cancel agent");
return res.data?.success === true;
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "cancel agent");
throw err;
}
}
// src/v2/methods/usage.ts
async function getConcurrency(http) {
try {
const res = await http.get("/v2/concurrency-check");
if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "get concurrency");
const d = res.data.data || res.data;
return { concurrency: d.concurrency, maxConcurrency: d.maxConcurrency ?? d.max_concurrency };
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "get concurrency");
throw err;
}
}
async function getCreditUsage(http) {
try {
const res = await http.get("/v2/team/credit-usage");
if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "get credit usage");
const d = res.data.data || res.data;
return {
remainingCredits: d.remainingCredits ?? d.remaining_credits ?? 0,
planCredits: d.planCredits ?? d.plan_credits,
billingPeriodStart: d.billingPeriodStart ?? d.billing_period_start ?? null,
billingPeriodEnd: d.billingPeriodEnd ?? d.billing_period_end ?? null
};
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "get credit usage");
throw err;
}
}
async function getTokenUsage(http) {
try {
const res = await http.get("/v2/team/token-usage");
if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "get token usage");
const d = res.data.data || res.data;
return {
remainingTokens: d.remainingTokens ?? d.remaining_tokens ?? 0,
planTokens: d.planTokens ?? d.plan_tokens,
billingPeriodStart: d.billingPeriodStart ?? d.billing_period_start ?? null,
billingPeriodEnd: d.billingPeriodEnd ?? d.billing_period_end ?? null
};
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "get token usage");
throw err;
}
}
async function getQueueStatus(http) {
try {
const res = await http.get("/v2/team/queue-status");
if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "get queue status");
return res.data;
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "get queue status");
throw err;
}
}
async function getCreditUsageHistorical(http, byApiKey) {
try {
const query = byApiKey ? "?byApiKey=true" : "";
const res = await http.get(`/v2/team/credit-usage/historical${query}`);
if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "get credit usage historical");
return res.data;
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "get credit usage historical");
throw err;
}
}
async function getTokenUsageHistorical(http, byApiKey) {
try {
const query = byApiKey ? "?byApiKey=true" : "";
const res = await http.get(`/v2/team/token-usage/historical${query}`);
if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "get token usage historical");
return res.data;
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "get token usage historical");
throw err;
}
}
// src/v2/watcher.ts
import { EventEmitter } from "events";
var hasGlobalWebSocket = () => {
if (typeof globalThis === "undefined") return void 0;
const candidate = globalThis.WebSocket;
return typeof candidate === "function" ? candidate : void 0;
};
var isNodeRuntime = () => typeof process !== "undefined" && !!process.versions?.node;
var cachedWebSocket;
var loadPromise;
var loadNodeWebSocket = async () => {
if (!isNodeRuntime()) return void 0;
try {
const undici = await import("undici");
const ctor = undici.WebSocket ?? undici.default?.WebSocket;
return typeof ctor === "function" ? ctor : void 0;
} catch {
return void 0;
}
};
var getWebSocketCtor = async () => {
if (cachedWebSocket) return cachedWebSocket;
const globalWs = hasGlobalWebSocket();
if (globalWs) {
cachedWebSocket = globalWs;
return cachedWebSocket;
}
if (!loadPromise) {
loadPromise = loadNodeWebSocket();
}
cachedWebSocket = await loadPromise;
return cachedWebSocket;
};
var decoder = typeof TextDecoder !== "undefined" ? new TextDecoder() : void 0;
var ensureUtf8String = (data) => {
if (typeof data === "string") return data;
if (typeof Buffer !== "undefined" && Buffer.isBuffer(data)) {
return data.toString("utf8");
}
const convertView = (view) => {
if (typeof Buffer !== "undefined") {
return Buffer.from(view.buffer, view.byteOffset, view.byteLength).toString("utf8");
}
return decoder?.decode(view);
};
if (ArrayBuffer.isView(data)) {
return convertView(data);
}
if (data instanceof ArrayBuffer) {
return convertView(new Uint8Array(data));
}
return void 0;
};
var Watcher = class extends EventEmitter {
http;
jobId;
kind;
pollInterval;
timeout;
ws;
closed = false;
emittedDocumentKeys = /* @__PURE__ */ new Set();
constructor(http, jobId, opts = {}) {
super();
this.http = http;
this.jobId = jobId;
this.kind = opts.kind ?? "crawl";
this.pollInterval = opts.pollInterval ?? 2;
this.timeout = opts.timeout;
}
buildWsUrl() {
const apiUrl = this.http.getApiUrl();
const wsBase = apiUrl.replace(/^http/, "ws");
const path = this.kind === "crawl" ? `/v2/crawl/${this.jobId}` : `/v2/batch/scrape/${this.jobId}`;
return `${wsBase}${path}`;
}
async start() {
try {
const url = this.buildWsUrl();
const wsCtor = await getWebSocketCtor();
if (!wsCtor) {
this.pollLoop();
return;
}
this.ws = new wsCtor(url, this.http.getApiKey());
if (this.ws && "binaryType" in this.ws) {
this.ws.binaryType = "arraybuffer";
}
if (this.ws) {
this.attachWsHandlers(this.ws);
}
} catch (err) {
this.pollLoop();
}
}
attachWsHandlers(ws) {
let startTs = Date.now();
const timeoutMs = this.timeout ? this.timeout * 1e3 : void 0;
ws.onmessage = (ev) => {
try {
const raw = ensureUtf8String(ev.data);
if (!raw) return;
const body = JSON.parse(raw);
const type = body.type;
if (type === "error") {
this.emit("error", { status: "failed", data: [], error: body.error, id: this.jobId });
return;
}
if (type === "catchup") {
const payload2 = body.data || {};
this.emitDocuments(payload2.data || []);
this.emitSnapshot(payload2);
return;
}
if (type === "document") {
const doc = body.data;
if (doc) this.emit("document", doc);
return;
}
if (type === "done") {
const payload2 = body.data || body;
const data = payload2.data || [];
if (data.length) this.emitDocuments(data);
this.emit("done", { status: "completed", data, id: this.jobId });
this.close();
return;
}
const payload = body.data || body;
if (payload && payload.status) this.emitSnapshot(payload);
} catch {
}
if (timeoutMs && Date.now() - startTs > timeoutMs) this.close();
};
ws.onerror = () => {
this.emit("error", { status: "failed", data: [], error: "WebSocket error", id: this.jobId });
this.close();
};
ws.onclose = () => {
if (!this.closed) this.pollLoop();
};
}
documentKey(doc) {
if (doc && typeof doc === "object") {
const explicitId = doc.id ?? doc.docId ?? doc.url;
if (typeof explicitId === "string" && explicitId.length) {
return explicitId;
}
}
try {
return JSON.stringify(doc);
} catch {
return `${Date.now()}-${Math.random()}`;
}
}
emitDocuments(docs) {
for (const doc of docs) {
if (!doc) continue;
const key = this.documentKey(doc);
if (this.emittedDocumentKeys.has(key)) continue;
this.emittedDocumentKeys.add(key);
this.emit("document", { ...doc, id: this.jobId });
}
}
emitSnapshot(payload) {
const status = payload.status;
const data = payload.data || [];
const snap = this.kind === "crawl" ? {
id: this.jobId,
status,
completed: payload.completed ?? 0,
total: payload.total ?? 0,
creditsUsed: payload.creditsUsed,
expiresAt: payload.expiresAt,
next: payload.next ?? null,
data
} : {
id: this.jobId,
status,
completed: payload.completed ?? 0,
total: payload.total ?? 0,
creditsUsed: payload.creditsUsed,
expiresAt: payload.expiresAt,
next: payload.next ?? null,
data
};
this.emit("snapshot", snap);
if (["completed", "failed", "cancelled"].includes(status)) {
this.emit("done", { status, data, id: this.jobId });
this.close();
}
}
async pollLoop() {
const startTs = Date.now();
const timeoutMs = this.timeout ? this.timeout * 1e3 : void 0;
while (!this.closed) {
try {
const snap = this.kind === "crawl" ? await getCrawlStatus(this.http, this.jobId) : await getBatchScrapeStatus(this.http, this.jobId);
this.emitDocuments(snap.data || []);
this.emit("snapshot", snap);
if (["completed", "failed", "cancelled"].includes(snap.status)) {
this.emit("done", { status: snap.status, data: snap.data, id: this.jobId });
this.close();
break;
}
} catch {
}
if (timeoutMs && Date.now() - startTs > timeoutMs) break;
await new Promise((r) => setTimeout(r, Math.max(1e3, this.pollInterval * 1e3)));
}
}
close() {
this.closed = true;
if (this.ws && this.ws.close) this.ws.close();
}
};
// src/v2/client.ts
import "zod";
var FirecrawlClient = class {
http;
isCloudService(url) {
return url.includes("api.firecrawl.dev");
}
/**
* Create a v2 client.
* @param options Transport configuration (API key, base URL, timeouts, retries).
*/
constructor(options = {}) {
const apiKey = options.apiKey ?? process.env.FIRECRAWL_API_KEY ?? "";
const apiUrl = (options.apiUrl ?? process.env.FIRECRAWL_API_URL ?? "https://api.firecrawl.dev").replace(/\/$/, "");
if (this.isCloudService(apiUrl) && !apiKey) {
throw new Error("API key is required for the cloud API. Set FIRECRAWL_API_KEY env or pass apiKey.");
}
this.http = new HttpClient({
apiKey,
apiUrl,
timeoutMs: options.timeoutMs,
maxRetries: options.maxRetries,
backoffFactor: options.backoffFactor
});
}
async scrape(url, options) {
return scrape(this.http, url, options);
}
// Search
/**
* Search the web and optionally scrape each result.
* @param query Search query string.
* @param req Additional search options (sources, limit, scrapeOptions, etc.).
* @returns Structured search results.
*/
async search(query, req = {}) {
return search(this.http, { query, ...req });
}
// Map
/**
* Map a site to discover URLs (sitemap-aware).
* @param url Root URL to map.
* @param options Mapping options (sitemap mode, includeSubdomains, limit, timeout).
* @returns Discovered links.
*/
async map(url, options) {
return map(this.http, url, options);
}
// Crawl
/**
* Start a crawl job (async).
* @param url Root URL to crawl.
* @param req Crawl configuration (paths, limits, scrapeOptions, webhook, etc.).
* @returns Job id and url.
*/
async startCrawl(url, req = {}) {
return startCrawl(this.http, { url, ...req });
}
/**
* Get the status and partial data of a crawl job.
* @param jobId Crawl job id.
*/
async getCrawlStatus(jobId, pagination) {
return getCrawlStatus(this.http, jobId, pagination);
}
/**
* Cancel a crawl job.
* @param jobId Crawl job id.
* @returns True if cancelled.
*/
async cancelCrawl(jobId) {
return cancelCrawl(this.http, jobId);
}
/**
* Convenience waiter: start a crawl and poll until it finishes.
* @param url Root URL to crawl.
* @param req Crawl configuration plus waiter controls (pollInterval, timeout seconds).
* @returns Final job snapshot.
*/
async crawl(url, req = {}) {
return crawl(this.http, { url, ...req }, req.pollInterval, req.timeout);
}
/**
* Retrieve crawl errors and robots.txt blocks.
* @param crawlId Crawl job id.
*/
async getCrawlErrors(crawlId) {
return getCrawlErrors(this.http, crawlId);
}
/**
* List active crawls for the authenticated team.
*/
async getActiveCrawls() {
return getActiveCrawls(this.http);
}
/**
* Preview normalized crawl parameters produced by a natural-language prompt.
* @param url Root URL.
* @param prompt Natural-language instruction.
*/
async crawlParamsPreview(url, prompt) {
return crawlParamsPreview(this.http, url, prompt);
}
// Batch
/**
* Start a batch scrape job for multiple URLs (async).
* @param urls URLs to scrape.
* @param opts Batch options (scrape options, webhook, concurrency, idempotency key, etc.).
* @returns Job id and url.
*/
async startBatchScrape(urls, opts) {
return startBatchScrape(this.http, urls, opts);
}
/**
* Get the status and partial data of a batch scrape job.
* @param jobId Batch job id.
*/
async getBatchScrapeStatus(jobId, pagination) {
return getBatchScrapeStatus(this.http, jobId, pagination);
}
/**
* Retrieve batch scrape errors and robots.txt blocks.
* @param jobId Batch job id.
*/
async getBatchScrapeErrors(jobId) {
return getBatchScrapeErrors(this.http, jobId);
}
/**
* Cancel a batch scrape job.
* @param jobId Batch job id.
* @returns True if cancelled.
*/
async cancelBatchScrape(jobId) {
return cancelBatchScrape(this.http, jobId);
}
/**
* Convenience waiter: start a batch scrape and poll until it finishes.
* @param urls URLs to scrape.
* @param opts Batch options plus waiter controls (pollInterval, timeout seconds).
* @returns Final job snapshot.
*/
async batchScrape(urls, opts) {
return batchScrape(this.http, urls, opts);
}
// Extract
/**
* Start an extract job (async).
* @param args Extraction request (urls, schema or prompt, flags).
* @returns Job id or processing state.
*/
async startExtract(args) {
return startExtract(this.http, args);
}
/**
* Get extract job status/data.
* @param jobId Extract job id.
*/
async getExtractStatus(jobId) {
return getExtractStatus(this.http, jobId);
}
/**
* Convenience waiter: start an extract and poll until it finishes.
* @param args Extraction request plus waiter controls (pollInterval, timeout seconds).
* @returns Final extract response.
*/
async extract(args) {
return extract(this.http, args);
}
// Agent
/**
* Start an agent job (async).
* @param args Agent request (urls, prompt, schema).
* @returns Job id or processing state.
*/
async startAgent(args) {
return startAgent(this.http, args);
}
/**
* Get agent job status/data.
* @param jobId Agent job id.
*/
async getAgentStatus(jobId) {
return getAgentStatus(this.http, jobId);
}
/**
* Convenience waiter: start an agent and poll until it finishes.
* @param args Agent request plus waiter controls (pollInterval, timeout seconds).
* @returns Final agent response.
*/
async agent(args) {
return agent(this.http, args);
}
/**
* Cancel an agent job.
* @param jobId Agent job id.
* @returns True if cancelled.
*/
async cancelAgent(jobId) {
return cancelAgent(this.http, jobId);
}
// Usage
/** Current concurrency usage. */
async getConcurrency() {
return getConcurrency(this.http);
}
/** Current credit usage. */
async getCreditUsage() {
return getCreditUsage(this.http);
}
/** Recent token usage. */
async getTokenUsage() {
return getTokenUsage(this.http);
}
/** Historical credit usage by month; set byApiKey to true to break down by API key. */
async getCreditUsageHistorical(byApiKey) {
return getCreditUsageHistorical(this.http, byApiKey);
}
/** Historical token usage by month; set byApiKey to true to break down by API key. */
async getTokenUsageHistorical(byApiKey) {
return getTokenUsageHistorical(this.http, byApiKey);
}
/** Metrics about the team's scrape queue. */
async getQueueStatus() {
return getQueueStatus(this.http);
}
// Watcher
/**
* Create a watcher for a crawl or batch job. Emits: `document`, `snapshot`, `done`, `error`.
* @param jobId Job id.
* @param opts Watcher options (kind, pollInterval, timeout seconds).
*/
watcher(jobId, opts = {}) {
return new Watcher(this.http, jobId, opts);
}
};
// src/v1/index.ts
import axios2, { AxiosError } from "axios";
import "zod";
import { zodToJsonSchema as zodToJsonSchema4 } from "zod-to-json-schema";
// node_modules/typescript-event-target/dist/index.mjs
var e = class extends EventTarget {
dispatchTypedEvent(s, t) {
return super.dispatchEvent(t);
}
};
// src/v1/index.ts
var FirecrawlError = class extends Error {
statusCode;
details;
constructor(message, statusCode, details) {
super(message);
this.statusCode = statusCode;
this.details = details;
}
};
var FirecrawlApp = class {
apiKey;
apiUrl;
version = "1.25.1";
isCloudService(url) {
return url.includes("api.firecrawl.dev");
}
async getVersion() {
try {
if (typeof process !== "undefined" && process.env && process.env.npm_package_version) {
return process.env.npm_package_version;
}
const packageJson = await import("./package-THA2MQX4.js");
return packageJson.default.version;
} catch (error) {
const isTest = typeof process !== "undefined" && (process.env.JEST_WORKER_ID != null || false);
if (!isTest) {
console.error("Error getting version:", error);
}
return "1.25.1";
}
}
async init() {
this.version = await this.getVersion();
}
/**
* Initializes a new instance of the FirecrawlApp class.
* @param config - Configuration options for the FirecrawlApp instance.
*/
constructor({ apiKey = null, apiUrl = null }) {
const baseUrl = apiUrl || "https://api.firecrawl.dev";
if (this.isCloudService(baseUrl) && typeof apiKey !== "string") {
throw new FirecrawlError("No API key provided", 401);
}
this.apiKey = apiKey || "";
this.apiUrl = baseUrl;
this.init();
}
/**
* Scrapes a URL using the Firecrawl API.
* @param url - The URL to scrape.
* @param params - Additional parameters for the scrape request.
* @returns The response from the scrape operation.
*/
async scrapeUrl(url, params) {
const headers = {
"Content-Type": "application/json",
Authorization: `Bearer ${this.apiKey}`
};
let jsonData = { url, ...params, origin: typeof params.origin === "string" && params.origin.includes("mcp") ? params.origin : `js-sdk@${this.version}` };
if (jsonData?.extract?.schema) {
let schema = jsonData.extract.schema;
try {
schema = zodToJsonSchema4(schema);
} catch (error) {
}
jsonData = {
...jsonData,
extract: {
...jsonData.extract,
schema
}
};
}
if (jsonData?.jsonOptions?.schema) {
let schema = jsonData.jsonOptions.schema;
try {
schema = zodToJsonSchema4(schema);
} catch (error) {
}
jsonData = {
...jsonData,
jsonOptions: {
...jsonData.jsonOptions,
schema
}
};
}
try {
const response = await axios2.post(
this.apiUrl + `/v1/scrape`,
jsonData,
{ headers, timeout: params?.timeout !== void 0 ? params.timeout + 5e3 : void 0 }
);
if (response.status === 200) {
const responseData = response.data;
if (responseData.success) {
return {
success: true,
warning: responseData.warning,
error: responseData.error,
...responseData.data
};
} else {
throw new FirecrawlError(`Failed to scrape URL. Error: ${responseData.error}`, response.status);
}
} else {
this.handleError(response, "scrape URL");
}
} catch (error) {
this.handleError(error.response, "scrape URL");
}
return { success: false, error: "Internal server error." };
}
/**
* Searches using the Firecrawl API and optionally scrapes the results.
* @param query - The search query string.
* @param params - Optional parameters for the search request.
* @returns The response from the search operation.
*/
async search(query, params) {
const headers = {
"Content-Type": "application/json",
Authorization: `Bearer ${this.apiKey}`
};
let jsonData = {
query,
limit: params?.limit ?? 5,
tbs: params?.tbs,
filter: params?.filter,
lang: params?.lang ?? "en",
country: params?.country ?? "us",
location: params?.location,
origin: typeof params.origin === "string" && params.origin.includes("mcp") ? params.origin : `js-sdk@${this.version}`,
timeout: params?.timeout ?? 6e4,
scrapeOptions: params?.scrapeOptions ?? { formats: [] }
};
if (jsonData?.scrapeOptions?.extract?.schema) {
let schema = jsonData.scrapeOptions.extract.schema;
try {
schema = zodToJsonSchema4(schema);
} catch (error) {
}
jsonData = {
...jsonData,
scrapeOptions: {
...jsonData.scrapeOptions,
extract: {
...jsonData.scrapeOptions.extract,
schema
}
}
};
}
try {
const response = await this.postRequest(
this.apiUrl + `/v1/search`,
jsonData,
headers
);
if (response.status === 200) {
const responseData = response.data;
if (responseData.success) {
return {
success: true,
data: responseData.data,
warning: responseData.warning
};
} else {
throw new FirecrawlError(`Failed to search. Error: ${responseData.error}`, response.status);
}
} else {
this.handleError(response, "search");
}
} catch (error) {
if (error.response?.data?.error) {
throw new FirecrawlError(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ""}`, error.response.status);
} else {
throw new FirecrawlError(error.message, 500);
}
}
return { s