firecrawl
Version:
JavaScript SDK for Firecrawl API
1,475 lines (1,453 loc) • 103 kB
JavaScript
"use strict";
var __create = Object.create;
var __defProp = Object.defineProperty;
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
var __getOwnPropNames = Object.getOwnPropertyNames;
var __getProtoOf = Object.getPrototypeOf;
var __hasOwnProp = Object.prototype.hasOwnProperty;
var __commonJS = (cb, mod) => function __require() {
return mod || (0, cb[__getOwnPropNames(cb)[0]])((mod = { exports: {} }).exports, mod), mod.exports;
};
var __export = (target, all) => {
for (var name in all)
__defProp(target, name, { get: all[name], enumerable: true });
};
var __copyProps = (to, from, except, desc) => {
if (from && typeof from === "object" || typeof from === "function") {
for (let key of __getOwnPropNames(from))
if (!__hasOwnProp.call(to, key) && key !== except)
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
}
return to;
};
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
// If the importer is in node compatibility mode or this is not an ESM
// file that has been converted to a CommonJS file using a Babel-
// compatible transform (i.e. "__esModule" has not been set), then set
// "default" to the CommonJS "module.exports" for node compatibility.
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
mod
));
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
// package.json
var require_package = __commonJS({
"package.json"(exports2, module2) {
module2.exports = {
name: "@mendable/firecrawl-js",
version: "4.10.0",
description: "JavaScript SDK for Firecrawl API",
main: "dist/index.js",
types: "dist/index.d.ts",
exports: {
"./package.json": "./package.json",
".": {
import: "./dist/index.js",
default: "./dist/index.cjs"
}
},
type: "module",
scripts: {
build: "tsup",
"build-and-publish": "npm run build && npm publish --access public",
"publish-beta": "npm run build && npm publish --access public --tag beta",
test: "NODE_OPTIONS=--experimental-vm-modules jest --verbose src/__tests__/e2e/v2/*.test.ts --detectOpenHandles",
"test:unit": "NODE_OPTIONS=--experimental-vm-modules jest --verbose src/__tests__/unit/v2/*.test.ts"
},
repository: {
type: "git",
url: "git+https://github.com/firecrawl/firecrawl.git"
},
author: "Mendable.ai",
license: "MIT",
dependencies: {
axios: "^1.12.2",
"typescript-event-target": "^1.1.1",
zod: "^3.23.8",
"zod-to-json-schema": "^3.23.0"
},
bugs: {
url: "https://github.com/firecrawl/firecrawl/issues"
},
homepage: "https://github.com/firecrawl/firecrawl#readme",
devDependencies: {
"@jest/globals": "^30.2.0",
"@types/dotenv": "^8.2.0",
"@types/jest": "^30.0.0",
"@types/mocha": "^10.0.6",
"@types/node": "^20.12.12",
"@types/uuid": "^9.0.8",
dotenv: "^16.4.5",
jest: "^30.2.0",
"ts-jest": "^29.4.5",
tsup: "^8.5.0",
typescript: "^5.4.5",
uuid: "^9.0.1"
},
keywords: [
"firecrawl",
"mendable",
"crawler",
"web",
"scraper",
"api",
"sdk"
],
engines: {
node: ">=22.0.0"
},
pnpm: {
overrides: {
"@babel/helpers@<7.26.10": ">=7.26.10",
"brace-expansion@>=1.0.0 <=1.1.11": ">=1.1.12",
"brace-expansion@>=2.0.0 <=2.0.1": ">=2.0.2",
"js-yaml@<3.14.2": ">=3.14.2",
"glob@>=10.2.0 <10.5.0": ">=10.5.0"
}
}
};
}
});
// src/index.ts
var index_exports = {};
__export(index_exports, {
Firecrawl: () => Firecrawl,
FirecrawlAppV1: () => FirecrawlApp,
FirecrawlClient: () => FirecrawlClient,
JobTimeoutError: () => JobTimeoutError,
SdkError: () => SdkError,
default: () => index_default
});
module.exports = __toCommonJS(index_exports);
// src/v2/utils/httpClient.ts
var import_axios = __toESM(require("axios"), 1);
// src/v2/utils/getVersion.ts
function getVersion() {
try {
if (typeof process !== "undefined" && process.env && process.env.npm_package_version) {
return process.env.npm_package_version;
}
const pkg = require_package();
return pkg?.version || "3.x.x";
} catch {
return "3.x.x";
}
}
// src/v2/utils/httpClient.ts
var HttpClient = class {
instance;
apiKey;
apiUrl;
maxRetries;
backoffFactor;
constructor(options) {
this.apiKey = options.apiKey;
this.apiUrl = options.apiUrl.replace(/\/$/, "");
this.maxRetries = options.maxRetries ?? 3;
this.backoffFactor = options.backoffFactor ?? 0.5;
this.instance = import_axios.default.create({
baseURL: this.apiUrl,
timeout: options.timeoutMs ?? 3e5,
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${this.apiKey}`
},
transitional: { clarifyTimeoutError: true }
});
}
getApiUrl() {
return this.apiUrl;
}
getApiKey() {
return this.apiKey;
}
async request(config) {
const version = getVersion();
config.headers = {
...config.headers || {}
};
let lastError;
for (let attempt = 0; attempt < this.maxRetries; attempt++) {
try {
const cfg = { ...config };
if (cfg.method && ["post", "put", "patch"].includes(cfg.method.toLowerCase())) {
const data = cfg.data ?? {};
cfg.data = { ...data, origin: typeof data.origin === "string" && data.origin.includes("mcp") ? data.origin : `js-sdk@${version}` };
if (typeof data.timeout === "number") {
cfg.timeout = data.timeout + 5e3;
}
}
const res = await this.instance.request(cfg);
if (res.status === 502 && attempt < this.maxRetries - 1) {
await this.sleep(this.backoffFactor * Math.pow(2, attempt));
continue;
}
return res;
} catch (err) {
lastError = err;
const status = err?.response?.status;
if (status === 502 && attempt < this.maxRetries - 1) {
await this.sleep(this.backoffFactor * Math.pow(2, attempt));
continue;
}
throw err;
}
}
throw lastError ?? new Error("Unexpected HTTP client error");
}
sleep(seconds) {
return new Promise((r) => setTimeout(r, seconds * 1e3));
}
post(endpoint, body, headers) {
return this.request({ method: "post", url: endpoint, data: body, headers });
}
get(endpoint, headers) {
return this.request({ method: "get", url: endpoint, headers });
}
delete(endpoint, headers) {
return this.request({ method: "delete", url: endpoint, headers });
}
prepareHeaders(idempotencyKey) {
const headers = {};
if (idempotencyKey) headers["x-idempotency-key"] = idempotencyKey;
return headers;
}
};
// src/v2/types.ts
var SdkError = class extends Error {
status;
code;
details;
jobId;
constructor(message, status, code, details, jobId) {
super(message);
this.name = "FirecrawlSdkError";
this.status = status;
this.code = code;
this.details = details;
this.jobId = jobId;
}
};
var JobTimeoutError = class extends SdkError {
timeoutSeconds;
constructor(jobId, timeoutSeconds, jobType = "batch") {
const jobTypeLabel = jobType === "batch" ? "batch scrape" : "crawl";
super(
`${jobTypeLabel.charAt(0).toUpperCase() + jobTypeLabel.slice(1)} job ${jobId} did not complete within ${timeoutSeconds} seconds`,
void 0,
"JOB_TIMEOUT",
void 0,
jobId
);
this.name = "JobTimeoutError";
this.timeoutSeconds = timeoutSeconds;
}
};
// src/v2/utils/validation.ts
var import_zod_to_json_schema = require("zod-to-json-schema");
function ensureValidFormats(formats) {
if (!formats) return;
for (const fmt of formats) {
if (typeof fmt === "string") {
if (fmt === "json") {
throw new Error("json format must be an object with { type: 'json', prompt, schema }");
}
continue;
}
if (fmt.type === "json") {
const j = fmt;
if (!j.prompt && !j.schema) {
throw new Error("json format requires either 'prompt' or 'schema' (or both)");
}
const maybeSchema = j.schema;
const isZod = !!maybeSchema && (typeof maybeSchema.safeParse === "function" || typeof maybeSchema.parse === "function") && !!maybeSchema._def;
if (isZod) {
try {
j.schema = (0, import_zod_to_json_schema.zodToJsonSchema)(maybeSchema);
} catch {
}
}
continue;
}
if (fmt.type === "changeTracking") {
const ct = fmt;
const maybeSchema = ct.schema;
const isZod = !!maybeSchema && (typeof maybeSchema.safeParse === "function" || typeof maybeSchema.parse === "function") && !!maybeSchema._def;
if (isZod) {
try {
ct.schema = (0, import_zod_to_json_schema.zodToJsonSchema)(maybeSchema);
} catch {
}
}
continue;
}
if (fmt.type === "screenshot") {
const s = fmt;
if (s.quality != null && (typeof s.quality !== "number" || s.quality < 0)) {
throw new Error("screenshot.quality must be a non-negative number");
}
}
}
}
function ensureValidScrapeOptions(options) {
if (!options) return;
if (options.timeout != null && options.timeout <= 0) {
throw new Error("timeout must be positive");
}
if (options.waitFor != null && options.waitFor < 0) {
throw new Error("waitFor must be non-negative");
}
ensureValidFormats(options.formats);
}
// src/v2/utils/errorHandler.ts
var import_axios2 = require("axios");
function throwForBadResponse(resp, action) {
const status = resp.status;
const body = resp.data || {};
const msg = body?.error || body?.message || `Request failed (${status}) while trying to ${action}`;
throw new SdkError(msg, status, void 0, body?.details);
}
function normalizeAxiosError(err, action) {
const status = err.response?.status;
const body = err.response?.data;
const message = body?.error || err.message || `Request failed${status ? ` (${status})` : ""} while trying to ${action}`;
const code = body?.code || err.code;
throw new SdkError(message, status, code, body?.details ?? body);
}
function isRetryableError(err) {
if (err instanceof JobTimeoutError) {
return false;
}
if (err instanceof SdkError || err && typeof err === "object" && "status" in err) {
const status = err.status;
if (status && status >= 400 && status < 500) {
return false;
}
if (status && status >= 500) {
return true;
}
}
if (err?.isAxiosError && !err.response) {
return true;
}
if (err?.code === "ECONNABORTED" || err?.message?.includes("timeout")) {
return true;
}
return true;
}
// src/v2/methods/scrape.ts
async function scrape(http, url, options) {
if (!url || !url.trim()) {
throw new Error("URL cannot be empty");
}
if (options) ensureValidScrapeOptions(options);
const payload = { url: url.trim() };
if (options) Object.assign(payload, options);
try {
const res = await http.post("/v2/scrape", payload);
if (res.status !== 200 || !res.data?.success) {
throwForBadResponse(res, "scrape");
}
return res.data.data || {};
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "scrape");
throw err;
}
}
// src/v2/methods/search.ts
function prepareSearchPayload(req) {
if (!req.query || !req.query.trim()) throw new Error("Query cannot be empty");
if (req.limit != null && req.limit <= 0) throw new Error("limit must be positive");
if (req.timeout != null && req.timeout <= 0) throw new Error("timeout must be positive");
const payload = {
query: req.query
};
if (req.sources) payload.sources = req.sources;
if (req.categories) payload.categories = req.categories;
if (req.limit != null) payload.limit = req.limit;
if (req.tbs != null) payload.tbs = req.tbs;
if (req.location != null) payload.location = req.location;
if (req.ignoreInvalidURLs != null) payload.ignoreInvalidURLs = req.ignoreInvalidURLs;
if (req.timeout != null) payload.timeout = req.timeout;
if (req.integration && req.integration.trim()) payload.integration = req.integration.trim();
if (req.scrapeOptions) {
ensureValidScrapeOptions(req.scrapeOptions);
payload.scrapeOptions = req.scrapeOptions;
}
return payload;
}
function transformArray(arr) {
const results = [];
for (const item of arr) {
if (item && typeof item === "object") {
if ("markdown" in item || "html" in item || "rawHtml" in item || "links" in item || "screenshot" in item || "changeTracking" in item || "summary" in item || "json" in item) {
results.push(item);
} else {
results.push(item);
}
} else {
results.push({ url: item });
}
}
return results;
}
async function search(http, request) {
const payload = prepareSearchPayload(request);
try {
const res = await http.post("/v2/search", payload);
if (res.status !== 200 || !res.data?.success) {
throwForBadResponse(res, "search");
}
const data = res.data.data || {};
const out = {};
if (data.web) out.web = transformArray(data.web);
if (data.news) out.news = transformArray(data.news);
if (data.images) out.images = transformArray(data.images);
return out;
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "search");
throw err;
}
}
// src/v2/methods/map.ts
function prepareMapPayload(url, options) {
if (!url || !url.trim()) throw new Error("URL cannot be empty");
const payload = { url: url.trim() };
if (options) {
if (options.sitemap != null) payload.sitemap = options.sitemap;
if (options.search != null) payload.search = options.search;
if (options.includeSubdomains != null) payload.includeSubdomains = options.includeSubdomains;
if (options.ignoreQueryParameters != null) payload.ignoreQueryParameters = options.ignoreQueryParameters;
if (options.limit != null) payload.limit = options.limit;
if (options.timeout != null) payload.timeout = options.timeout;
if (options.integration != null && options.integration.trim()) payload.integration = options.integration.trim();
if (options.location != null) payload.location = options.location;
}
return payload;
}
async function map(http, url, options) {
const payload = prepareMapPayload(url, options);
try {
const res = await http.post("/v2/map", payload);
if (res.status !== 200 || !res.data?.success) {
throwForBadResponse(res, "map");
}
const linksIn = res.data.links || [];
const links = [];
for (const item of linksIn) {
if (typeof item === "string") links.push({ url: item });
else if (item && typeof item === "object") links.push({ url: item.url, title: item.title, description: item.description });
}
return { links };
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "map");
throw err;
}
}
// src/v2/utils/pagination.ts
async function fetchAllPages(http, nextUrl, initial, pagination) {
const docs = initial.slice();
let current = nextUrl;
let pageCount = 0;
const maxPages = pagination?.maxPages ?? void 0;
const maxResults = pagination?.maxResults ?? void 0;
const maxWaitTime = pagination?.maxWaitTime ?? void 0;
const started = Date.now();
while (current) {
if (maxPages != null && pageCount >= maxPages) break;
if (maxWaitTime != null && (Date.now() - started) / 1e3 > maxWaitTime) break;
let payload = null;
try {
const res = await http.get(current);
payload = res.data;
} catch {
break;
}
if (!payload?.success) break;
for (const d of payload.data || []) {
if (maxResults != null && docs.length >= maxResults) break;
docs.push(d);
}
if (maxResults != null && docs.length >= maxResults) break;
current = payload.next ?? null;
pageCount += 1;
}
return docs;
}
// src/v2/methods/crawl.ts
function prepareCrawlPayload(request) {
if (!request.url || !request.url.trim()) throw new Error("URL cannot be empty");
const data = { url: request.url.trim() };
if (request.prompt) data.prompt = request.prompt;
if (request.excludePaths) data.excludePaths = request.excludePaths;
if (request.includePaths) data.includePaths = request.includePaths;
if (request.maxDiscoveryDepth != null) data.maxDiscoveryDepth = request.maxDiscoveryDepth;
if (request.sitemap != null) data.sitemap = request.sitemap;
if (request.ignoreQueryParameters != null) data.ignoreQueryParameters = request.ignoreQueryParameters;
if (request.limit != null) data.limit = request.limit;
if (request.crawlEntireDomain != null) data.crawlEntireDomain = request.crawlEntireDomain;
if (request.allowExternalLinks != null) data.allowExternalLinks = request.allowExternalLinks;
if (request.allowSubdomains != null) data.allowSubdomains = request.allowSubdomains;
if (request.delay != null) data.delay = request.delay;
if (request.maxConcurrency != null) data.maxConcurrency = request.maxConcurrency;
if (request.webhook != null) data.webhook = request.webhook;
if (request.integration != null && request.integration.trim()) data.integration = request.integration.trim();
if (request.scrapeOptions) {
ensureValidScrapeOptions(request.scrapeOptions);
data.scrapeOptions = request.scrapeOptions;
}
if (request.zeroDataRetention != null) data.zeroDataRetention = request.zeroDataRetention;
return data;
}
async function startCrawl(http, request) {
const payload = prepareCrawlPayload(request);
try {
const res = await http.post("/v2/crawl", payload);
if (res.status !== 200 || !res.data?.success) {
throwForBadResponse(res, "start crawl");
}
return { id: res.data.id, url: res.data.url };
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "start crawl");
throw err;
}
}
async function getCrawlStatus(http, jobId, pagination) {
try {
const res = await http.get(`/v2/crawl/${jobId}`);
if (res.status !== 200 || !res.data?.success) {
throwForBadResponse(res, "get crawl status");
}
const body = res.data;
const initialDocs = body.data || [];
const auto = pagination?.autoPaginate ?? true;
if (!auto || !body.next) {
return {
id: jobId,
status: body.status,
completed: body.completed ?? 0,
total: body.total ?? 0,
creditsUsed: body.creditsUsed,
expiresAt: body.expiresAt,
next: body.next ?? null,
data: initialDocs
};
}
const aggregated = await fetchAllPages(http, body.next, initialDocs, pagination);
return {
id: jobId,
status: body.status,
completed: body.completed ?? 0,
total: body.total ?? 0,
creditsUsed: body.creditsUsed,
expiresAt: body.expiresAt,
next: null,
data: aggregated
};
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "get crawl status");
throw err;
}
}
async function cancelCrawl(http, jobId) {
try {
const res = await http.delete(`/v2/crawl/${jobId}`);
if (res.status !== 200) throwForBadResponse(res, "cancel crawl");
return res.data?.status === "cancelled";
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "cancel crawl");
throw err;
}
}
async function waitForCrawlCompletion(http, jobId, pollInterval = 2, timeout) {
const start = Date.now();
while (true) {
try {
const status = await getCrawlStatus(http, jobId);
if (["completed", "failed", "cancelled"].includes(status.status)) {
return status;
}
} catch (err) {
if (!isRetryableError(err)) {
if (err instanceof SdkError) {
const errorWithJobId = new SdkError(
err.message,
err.status,
err.code,
err.details,
jobId
);
throw errorWithJobId;
}
throw err;
}
}
if (timeout != null && Date.now() - start > timeout * 1e3) {
throw new JobTimeoutError(jobId, timeout, "crawl");
}
await new Promise((r) => setTimeout(r, Math.max(1e3, pollInterval * 1e3)));
}
}
async function crawl(http, request, pollInterval = 2, timeout) {
const started = await startCrawl(http, request);
return waitForCrawlCompletion(http, started.id, pollInterval, timeout);
}
async function getCrawlErrors(http, crawlId) {
try {
const res = await http.get(`/v2/crawl/${crawlId}/errors`);
if (res.status !== 200) throwForBadResponse(res, "get crawl errors");
const payload = res.data?.data ?? res.data;
return { errors: payload.errors || [], robotsBlocked: payload.robotsBlocked || [] };
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "get crawl errors");
throw err;
}
}
async function getActiveCrawls(http) {
try {
const res = await http.get(`/v2/crawl/active`);
if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "get active crawls");
const crawlsIn = res.data?.crawls || [];
const crawls = crawlsIn.map((c) => ({ id: c.id, teamId: c.teamId ?? c.team_id, url: c.url, options: c.options ?? null }));
return { success: true, crawls };
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "get active crawls");
throw err;
}
}
async function crawlParamsPreview(http, url, prompt) {
if (!url || !url.trim()) throw new Error("URL cannot be empty");
if (!prompt || !prompt.trim()) throw new Error("Prompt cannot be empty");
try {
const res = await http.post("/v2/crawl/params-preview", { url: url.trim(), prompt });
if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "crawl params preview");
const data = res.data.data || {};
if (res.data.warning) data.warning = res.data.warning;
return data;
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "crawl params preview");
throw err;
}
}
// src/v2/methods/batch.ts
async function startBatchScrape(http, urls, {
options,
webhook,
appendToId,
ignoreInvalidURLs,
maxConcurrency,
zeroDataRetention,
idempotencyKey,
integration
} = {}) {
if (!Array.isArray(urls) || urls.length === 0) throw new Error("URLs list cannot be empty");
const payload = { urls };
if (options) {
ensureValidScrapeOptions(options);
Object.assign(payload, options);
}
if (webhook != null) payload.webhook = webhook;
if (appendToId != null) payload.appendToId = appendToId;
if (ignoreInvalidURLs != null) payload.ignoreInvalidURLs = ignoreInvalidURLs;
if (maxConcurrency != null) payload.maxConcurrency = maxConcurrency;
if (zeroDataRetention != null) payload.zeroDataRetention = zeroDataRetention;
if (integration != null && integration.trim()) payload.integration = integration.trim();
try {
const headers = http.prepareHeaders(idempotencyKey);
const res = await http.post("/v2/batch/scrape", payload, headers);
if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "start batch scrape");
return { id: res.data.id, url: res.data.url, invalidURLs: res.data.invalidURLs || void 0 };
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "start batch scrape");
throw err;
}
}
async function getBatchScrapeStatus(http, jobId, pagination) {
try {
const res = await http.get(`/v2/batch/scrape/${jobId}`);
if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "get batch scrape status");
const body = res.data;
const initialDocs = body.data || [];
const auto = pagination?.autoPaginate ?? true;
if (!auto || !body.next) {
return {
id: jobId,
status: body.status,
completed: body.completed ?? 0,
total: body.total ?? 0,
creditsUsed: body.creditsUsed,
expiresAt: body.expiresAt,
next: body.next ?? null,
data: initialDocs
};
}
const aggregated = await fetchAllPages(http, body.next, initialDocs, pagination);
return {
id: jobId,
status: body.status,
completed: body.completed ?? 0,
total: body.total ?? 0,
creditsUsed: body.creditsUsed,
expiresAt: body.expiresAt,
next: null,
data: aggregated
};
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "get batch scrape status");
throw err;
}
}
async function cancelBatchScrape(http, jobId) {
try {
const res = await http.delete(`/v2/batch/scrape/${jobId}`);
if (res.status !== 200) throwForBadResponse(res, "cancel batch scrape");
return res.data?.status === "cancelled";
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "cancel batch scrape");
throw err;
}
}
async function getBatchScrapeErrors(http, jobId) {
try {
const res = await http.get(`/v2/batch/scrape/${jobId}/errors`);
if (res.status !== 200) throwForBadResponse(res, "get batch scrape errors");
const payload = res.data?.data ?? res.data;
return { errors: payload.errors || [], robotsBlocked: payload.robotsBlocked || [] };
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "get batch scrape errors");
throw err;
}
}
async function waitForBatchCompletion(http, jobId, pollInterval = 2, timeout) {
const start = Date.now();
while (true) {
try {
const status = await getBatchScrapeStatus(http, jobId);
if (["completed", "failed", "cancelled"].includes(status.status)) {
return status;
}
} catch (err) {
if (!isRetryableError(err)) {
if (err instanceof SdkError) {
const errorWithJobId = new SdkError(
err.message,
err.status,
err.code,
err.details,
jobId
);
throw errorWithJobId;
}
throw err;
}
}
if (timeout != null && Date.now() - start > timeout * 1e3) {
throw new JobTimeoutError(jobId, timeout, "batch");
}
await new Promise((r) => setTimeout(r, Math.max(1e3, pollInterval * 1e3)));
}
}
async function batchScrape(http, urls, opts = {}) {
const start = await startBatchScrape(http, urls, opts);
return waitForBatchCompletion(http, start.id, opts.pollInterval ?? 2, opts.timeout);
}
// src/v2/methods/extract.ts
var import_zod_to_json_schema2 = require("zod-to-json-schema");
function prepareExtractPayload(args) {
const body = {};
if (args.urls) body.urls = args.urls;
if (args.prompt != null) body.prompt = args.prompt;
if (args.schema != null) {
const s = args.schema;
const isZod = s && (typeof s.safeParse === "function" || typeof s.parse === "function") && s._def;
body.schema = isZod ? (0, import_zod_to_json_schema2.zodToJsonSchema)(s) : args.schema;
}
if (args.systemPrompt != null) body.systemPrompt = args.systemPrompt;
if (args.allowExternalLinks != null) body.allowExternalLinks = args.allowExternalLinks;
if (args.enableWebSearch != null) body.enableWebSearch = args.enableWebSearch;
if (args.showSources != null) body.showSources = args.showSources;
if (args.ignoreInvalidURLs != null) body.ignoreInvalidURLs = args.ignoreInvalidURLs;
if (args.integration && args.integration.trim()) body.integration = args.integration.trim();
if (args.agent) body.agent = args.agent;
if (args.scrapeOptions) {
ensureValidScrapeOptions(args.scrapeOptions);
body.scrapeOptions = args.scrapeOptions;
}
return body;
}
async function startExtract(http, args) {
const payload = prepareExtractPayload(args);
try {
const res = await http.post("/v2/extract", payload);
if (res.status !== 200) throwForBadResponse(res, "extract");
return res.data;
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "extract");
throw err;
}
}
async function getExtractStatus(http, jobId) {
try {
const res = await http.get(`/v2/extract/${jobId}`);
if (res.status !== 200) throwForBadResponse(res, "extract status");
return res.data;
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "extract status");
throw err;
}
}
async function waitExtract(http, jobId, pollInterval = 2, timeout) {
const start = Date.now();
while (true) {
const status = await getExtractStatus(http, jobId);
if (["completed", "failed", "cancelled"].includes(status.status || "")) return status;
if (timeout != null && Date.now() - start > timeout * 1e3) return status;
await new Promise((r) => setTimeout(r, Math.max(1e3, pollInterval * 1e3)));
}
}
async function extract(http, args) {
const started = await startExtract(http, args);
const jobId = started.id;
if (!jobId) return started;
return waitExtract(http, jobId, args.pollInterval ?? 2, args.timeout);
}
// src/v2/methods/agent.ts
var import_zod_to_json_schema3 = require("zod-to-json-schema");
function prepareAgentPayload(args) {
const body = {};
if (args.urls) body.urls = args.urls;
body.prompt = args.prompt;
if (args.schema != null) {
const s = args.schema;
const isZod = s && (typeof s.safeParse === "function" || typeof s.parse === "function") && s._def;
body.schema = isZod ? (0, import_zod_to_json_schema3.zodToJsonSchema)(s) : args.schema;
}
if (args.integration && args.integration.trim()) body.integration = args.integration.trim();
if (args.maxCredits !== null && args.maxCredits !== void 0) body.maxCredits = args.maxCredits;
if (args.strictConstrainToURLs !== null && args.strictConstrainToURLs !== void 0) body.strictConstrainToURLs = args.strictConstrainToURLs;
return body;
}
async function startAgent(http, args) {
const payload = prepareAgentPayload(args);
try {
const res = await http.post("/v2/agent", payload);
if (res.status !== 200) throwForBadResponse(res, "agent");
return res.data;
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "agent");
throw err;
}
}
async function getAgentStatus(http, jobId) {
try {
const res = await http.get(`/v2/agent/${jobId}`);
if (res.status !== 200) throwForBadResponse(res, "agent status");
return res.data;
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "agent status");
throw err;
}
}
async function waitAgent(http, jobId, pollInterval = 2, timeout) {
const start = Date.now();
while (true) {
const status = await getAgentStatus(http, jobId);
if (["completed", "failed", "cancelled"].includes(status.status || "")) return status;
if (timeout != null && Date.now() - start > timeout * 1e3) return status;
await new Promise((r) => setTimeout(r, Math.max(1e3, pollInterval * 1e3)));
}
}
async function agent(http, args) {
const started = await startAgent(http, args);
const jobId = started.id;
if (!jobId) return started;
return waitAgent(http, jobId, args.pollInterval ?? 2, args.timeout);
}
async function cancelAgent(http, jobId) {
try {
const res = await http.delete(`/v2/agent/${jobId}`);
if (res.status !== 200) throwForBadResponse(res, "cancel agent");
return res.data?.success === true;
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "cancel agent");
throw err;
}
}
// src/v2/methods/usage.ts
async function getConcurrency(http) {
try {
const res = await http.get("/v2/concurrency-check");
if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "get concurrency");
const d = res.data.data || res.data;
return { concurrency: d.concurrency, maxConcurrency: d.maxConcurrency ?? d.max_concurrency };
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "get concurrency");
throw err;
}
}
async function getCreditUsage(http) {
try {
const res = await http.get("/v2/team/credit-usage");
if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "get credit usage");
const d = res.data.data || res.data;
return {
remainingCredits: d.remainingCredits ?? d.remaining_credits ?? 0,
planCredits: d.planCredits ?? d.plan_credits,
billingPeriodStart: d.billingPeriodStart ?? d.billing_period_start ?? null,
billingPeriodEnd: d.billingPeriodEnd ?? d.billing_period_end ?? null
};
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "get credit usage");
throw err;
}
}
async function getTokenUsage(http) {
try {
const res = await http.get("/v2/team/token-usage");
if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "get token usage");
const d = res.data.data || res.data;
return {
remainingTokens: d.remainingTokens ?? d.remaining_tokens ?? 0,
planTokens: d.planTokens ?? d.plan_tokens,
billingPeriodStart: d.billingPeriodStart ?? d.billing_period_start ?? null,
billingPeriodEnd: d.billingPeriodEnd ?? d.billing_period_end ?? null
};
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "get token usage");
throw err;
}
}
async function getQueueStatus(http) {
try {
const res = await http.get("/v2/team/queue-status");
if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "get queue status");
return res.data;
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "get queue status");
throw err;
}
}
async function getCreditUsageHistorical(http, byApiKey) {
try {
const query = byApiKey ? "?byApiKey=true" : "";
const res = await http.get(`/v2/team/credit-usage/historical${query}`);
if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "get credit usage historical");
return res.data;
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "get credit usage historical");
throw err;
}
}
async function getTokenUsageHistorical(http, byApiKey) {
try {
const query = byApiKey ? "?byApiKey=true" : "";
const res = await http.get(`/v2/team/token-usage/historical${query}`);
if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "get token usage historical");
return res.data;
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "get token usage historical");
throw err;
}
}
// src/v2/watcher.ts
var import_events = require("events");
var hasGlobalWebSocket = () => {
if (typeof globalThis === "undefined") return void 0;
const candidate = globalThis.WebSocket;
return typeof candidate === "function" ? candidate : void 0;
};
var isNodeRuntime = () => typeof process !== "undefined" && !!process.versions?.node;
var cachedWebSocket;
var loadPromise;
var loadNodeWebSocket = async () => {
if (!isNodeRuntime()) return void 0;
try {
const undici = await import("undici");
const ctor = undici.WebSocket ?? undici.default?.WebSocket;
return typeof ctor === "function" ? ctor : void 0;
} catch {
return void 0;
}
};
var getWebSocketCtor = async () => {
if (cachedWebSocket) return cachedWebSocket;
const globalWs = hasGlobalWebSocket();
if (globalWs) {
cachedWebSocket = globalWs;
return cachedWebSocket;
}
if (!loadPromise) {
loadPromise = loadNodeWebSocket();
}
cachedWebSocket = await loadPromise;
return cachedWebSocket;
};
var decoder = typeof TextDecoder !== "undefined" ? new TextDecoder() : void 0;
var ensureUtf8String = (data) => {
if (typeof data === "string") return data;
if (typeof Buffer !== "undefined" && Buffer.isBuffer(data)) {
return data.toString("utf8");
}
const convertView = (view) => {
if (typeof Buffer !== "undefined") {
return Buffer.from(view.buffer, view.byteOffset, view.byteLength).toString("utf8");
}
return decoder?.decode(view);
};
if (ArrayBuffer.isView(data)) {
return convertView(data);
}
if (data instanceof ArrayBuffer) {
return convertView(new Uint8Array(data));
}
return void 0;
};
var Watcher = class extends import_events.EventEmitter {
http;
jobId;
kind;
pollInterval;
timeout;
ws;
closed = false;
emittedDocumentKeys = /* @__PURE__ */ new Set();
constructor(http, jobId, opts = {}) {
super();
this.http = http;
this.jobId = jobId;
this.kind = opts.kind ?? "crawl";
this.pollInterval = opts.pollInterval ?? 2;
this.timeout = opts.timeout;
}
buildWsUrl() {
const apiUrl = this.http.getApiUrl();
const wsBase = apiUrl.replace(/^http/, "ws");
const path = this.kind === "crawl" ? `/v2/crawl/${this.jobId}` : `/v2/batch/scrape/${this.jobId}`;
return `${wsBase}${path}`;
}
async start() {
try {
const url = this.buildWsUrl();
const wsCtor = await getWebSocketCtor();
if (!wsCtor) {
this.pollLoop();
return;
}
this.ws = new wsCtor(url, this.http.getApiKey());
if (this.ws && "binaryType" in this.ws) {
this.ws.binaryType = "arraybuffer";
}
if (this.ws) {
this.attachWsHandlers(this.ws);
}
} catch (err) {
this.pollLoop();
}
}
attachWsHandlers(ws) {
let startTs = Date.now();
const timeoutMs = this.timeout ? this.timeout * 1e3 : void 0;
ws.onmessage = (ev) => {
try {
const raw = ensureUtf8String(ev.data);
if (!raw) return;
const body = JSON.parse(raw);
const type = body.type;
if (type === "error") {
this.emit("error", { status: "failed", data: [], error: body.error, id: this.jobId });
return;
}
if (type === "catchup") {
const payload2 = body.data || {};
this.emitDocuments(payload2.data || []);
this.emitSnapshot(payload2);
return;
}
if (type === "document") {
const doc = body.data;
if (doc) this.emit("document", doc);
return;
}
if (type === "done") {
const payload2 = body.data || body;
const data = payload2.data || [];
if (data.length) this.emitDocuments(data);
this.emit("done", { status: "completed", data, id: this.jobId });
this.close();
return;
}
const payload = body.data || body;
if (payload && payload.status) this.emitSnapshot(payload);
} catch {
}
if (timeoutMs && Date.now() - startTs > timeoutMs) this.close();
};
ws.onerror = () => {
this.emit("error", { status: "failed", data: [], error: "WebSocket error", id: this.jobId });
this.close();
};
ws.onclose = () => {
if (!this.closed) this.pollLoop();
};
}
documentKey(doc) {
if (doc && typeof doc === "object") {
const explicitId = doc.id ?? doc.docId ?? doc.url;
if (typeof explicitId === "string" && explicitId.length) {
return explicitId;
}
}
try {
return JSON.stringify(doc);
} catch {
return `${Date.now()}-${Math.random()}`;
}
}
emitDocuments(docs) {
for (const doc of docs) {
if (!doc) continue;
const key = this.documentKey(doc);
if (this.emittedDocumentKeys.has(key)) continue;
this.emittedDocumentKeys.add(key);
this.emit("document", { ...doc, id: this.jobId });
}
}
emitSnapshot(payload) {
const status = payload.status;
const data = payload.data || [];
const snap = this.kind === "crawl" ? {
id: this.jobId,
status,
completed: payload.completed ?? 0,
total: payload.total ?? 0,
creditsUsed: payload.creditsUsed,
expiresAt: payload.expiresAt,
next: payload.next ?? null,
data
} : {
id: this.jobId,
status,
completed: payload.completed ?? 0,
total: payload.total ?? 0,
creditsUsed: payload.creditsUsed,
expiresAt: payload.expiresAt,
next: payload.next ?? null,
data
};
this.emit("snapshot", snap);
if (["completed", "failed", "cancelled"].includes(status)) {
this.emit("done", { status, data, id: this.jobId });
this.close();
}
}
async pollLoop() {
const startTs = Date.now();
const timeoutMs = this.timeout ? this.timeout * 1e3 : void 0;
while (!this.closed) {
try {
const snap = this.kind === "crawl" ? await getCrawlStatus(this.http, this.jobId) : await getBatchScrapeStatus(this.http, this.jobId);
this.emitDocuments(snap.data || []);
this.emit("snapshot", snap);
if (["completed", "failed", "cancelled"].includes(snap.status)) {
this.emit("done", { status: snap.status, data: snap.data, id: this.jobId });
this.close();
break;
}
} catch {
}
if (timeoutMs && Date.now() - startTs > timeoutMs) break;
await new Promise((r) => setTimeout(r, Math.max(1e3, this.pollInterval * 1e3)));
}
}
close() {
this.closed = true;
if (this.ws && this.ws.close) this.ws.close();
}
};
// src/v2/client.ts
var zt = require("zod");
var FirecrawlClient = class {
http;
isCloudService(url) {
return url.includes("api.firecrawl.dev");
}
/**
* Create a v2 client.
* @param options Transport configuration (API key, base URL, timeouts, retries).
*/
constructor(options = {}) {
const apiKey = options.apiKey ?? process.env.FIRECRAWL_API_KEY ?? "";
const apiUrl = (options.apiUrl ?? process.env.FIRECRAWL_API_URL ?? "https://api.firecrawl.dev").replace(/\/$/, "");
if (this.isCloudService(apiUrl) && !apiKey) {
throw new Error("API key is required for the cloud API. Set FIRECRAWL_API_KEY env or pass apiKey.");
}
this.http = new HttpClient({
apiKey,
apiUrl,
timeoutMs: options.timeoutMs,
maxRetries: options.maxRetries,
backoffFactor: options.backoffFactor
});
}
async scrape(url, options) {
return scrape(this.http, url, options);
}
// Search
/**
* Search the web and optionally scrape each result.
* @param query Search query string.
* @param req Additional search options (sources, limit, scrapeOptions, etc.).
* @returns Structured search results.
*/
async search(query, req = {}) {
return search(this.http, { query, ...req });
}
// Map
/**
* Map a site to discover URLs (sitemap-aware).
* @param url Root URL to map.
* @param options Mapping options (sitemap mode, includeSubdomains, limit, timeout).
* @returns Discovered links.
*/
async map(url, options) {
return map(this.http, url, options);
}
// Crawl
/**
* Start a crawl job (async).
* @param url Root URL to crawl.
* @param req Crawl configuration (paths, limits, scrapeOptions, webhook, etc.).
* @returns Job id and url.
*/
async startCrawl(url, req = {}) {
return startCrawl(this.http, { url, ...req });
}
/**
* Get the status and partial data of a crawl job.
* @param jobId Crawl job id.
*/
async getCrawlStatus(jobId, pagination) {
return getCrawlStatus(this.http, jobId, pagination);
}
/**
* Cancel a crawl job.
* @param jobId Crawl job id.
* @returns True if cancelled.
*/
async cancelCrawl(jobId) {
return cancelCrawl(this.http, jobId);
}
/**
* Convenience waiter: start a crawl and poll until it finishes.
* @param url Root URL to crawl.
* @param req Crawl configuration plus waiter controls (pollInterval, timeout seconds).
* @returns Final job snapshot.
*/
async crawl(url, req = {}) {
return crawl(this.http, { url, ...req }, req.pollInterval, req.timeout);
}
/**
* Retrieve crawl errors and robots.txt blocks.
* @param crawlId Crawl job id.
*/
async getCrawlErrors(crawlId) {
return getCrawlErrors(this.http, crawlId);
}
/**
* List active crawls for the authenticated team.
*/
async getActiveCrawls() {
return getActiveCrawls(this.http);
}
/**
* Preview normalized crawl parameters produced by a natural-language prompt.
* @param url Root URL.
* @param prompt Natural-language instruction.
*/
async crawlParamsPreview(url, prompt) {
return crawlParamsPreview(this.http, url, prompt);
}
// Batch
/**
* Start a batch scrape job for multiple URLs (async).
* @param urls URLs to scrape.
* @param opts Batch options (scrape options, webhook, concurrency, idempotency key, etc.).
* @returns Job id and url.
*/
async startBatchScrape(urls, opts) {
return startBatchScrape(this.http, urls, opts);
}
/**
* Get the status and partial data of a batch scrape job.
* @param jobId Batch job id.
*/
async getBatchScrapeStatus(jobId, pagination) {
return getBatchScrapeStatus(this.http, jobId, pagination);
}
/**
* Retrieve batch scrape errors and robots.txt blocks.
* @param jobId Batch job id.
*/
async getBatchScrapeErrors(jobId) {
return getBatchScrapeErrors(this.http, jobId);
}
/**
* Cancel a batch scrape job.
* @param jobId Batch job id.
* @returns True if cancelled.
*/
async cancelBatchScrape(jobId) {
return cancelBatchScrape(this.http, jobId);
}
/**
* Convenience waiter: start a batch scrape and poll until it finishes.
* @param urls URLs to scrape.
* @param opts Batch options plus waiter controls (pollInterval, timeout seconds).
* @returns Final job snapshot.
*/
async batchScrape(urls, opts) {
return batchScrape(this.http, urls, opts);
}
// Extract
/**
* Start an extract job (async).
* @param args Extraction request (urls, schema or prompt, flags).
* @returns Job id or processing state.
*/
async startExtract(args) {
return startExtract(this.http, args);
}
/**
* Get extract job status/data.
* @param jobId Extract job id.
*/
async getExtractStatus(jobId) {
return getExtractStatus(this.http, jobId);
}
/**
* Convenience waiter: start an extract and poll until it finishes.
* @param args Extraction request plus waiter controls (pollInterval, timeout seconds).
* @returns Final extract response.
*/
async extract(args) {
return extract(this.http, args);
}
// Agent
/**
* Start an agent job (async).
* @param args Agent request (urls, prompt, schema).
* @returns Job id or processing state.
*/
async startAgent(args) {
return startAgent(this.http, args);
}
/**
* Get agent job status/data.
* @param jobId Agent job id.
*/
async getAgentStatus(jobId) {
return getAgentStatus(this.http, jobId);
}
/**
* Convenience waiter: start an agent and poll until it finishes.
* @param args Agent request plus waiter controls (pollInterval, timeout seconds).
* @returns Final agent response.
*/
async agent(args) {
return agent(this.http, args);
}
/**
* Cancel an agent job.
* @param jobId Agent job id.
* @returns True if cancelled.
*/
async cancelAgent(jobId) {
return cancelAgent(this.http, jobId);
}
// Usage
/** Current concurrency usage. */
async getConcurrency() {
return getConcurrency(this.http);
}
/** Current credit usage. */
async getCreditUsage() {
return getCreditUsage(this.http);
}
/** Recent token usage. */
async getTokenUsage() {
return getTokenUsage(this.http);
}
/** Historical credit usage by month; set byApiKey to true to break down by API key. */
async getCreditUsageHistorical(byApiKey) {
return getCreditUsageHistorical(this.http, byApiKey);
}
/** Historical token usage by month; set byApiKey to true to break down by API key. */
async getTokenUsageHistorical(byApiKey) {
return getTokenUsageHistorical(this.http, byApiKey);
}
/** Metrics about the team's scrape queue. */
async getQueueStatus() {
return getQueueStatus(this.http);
}
// Watcher
/**
* Create a watcher for a crawl or batch job. Emits: `document`, `snapshot`, `done`, `error`.
* @param jobId Job id.
* @param opts Watcher options (kind, pollInterval, timeout seconds).
*/
watcher(jobId, opts = {}) {
return new Watcher(this.http, jobId, opts);
}
};
// src/v1/index.ts
var import_axios3 = __toESM(require("axios"), 1);
var zt2 = require("zod");
var import_zod_to_json_schema4 = require("zod-to-json-schema");
// node_modules/typescript-event-target/dist/index.mjs
var e = class extends EventTarget {
dispatchTypedEvent(s, t) {
return super.dispatchEvent(t);
}
};
// src/v1/index.ts
var FirecrawlError = class extends Error {
statusCode;
details;
constructor(message, statusCode, details) {
super(message);
this.statusCode = statusCode;
this.details = details;
}
};
var FirecrawlApp = class {
apiKey;
apiUrl;
version = "1.25.1";
isCloudService(url) {
return url.includes("api.firecrawl.dev");
}
async getVersion() {
try {
if (typeof process !== "undefined" && process.env && process.env.npm_package_version) {
return process.env.npm_package_version;
}
const packageJson = await Promise.resolve().then(() => __toESM(require_package(), 1));
return packageJson.default.version;
} catch (error) {
const isTest = typeof process !== "undefined" && (process.env.JEST_WORKER_ID != null || false);
if (!isTest) {
console.error("Error getting version:", error);
}
return "1.25.1";
}
}
async init() {
this.version = await this.getVersion();
}
/**
* Initializes a new instance of the FirecrawlApp class.
* @param config - Configuration options for the FirecrawlApp instance.
*/
constructor({ apiKey = null, apiUrl = null }) {
const baseUrl = apiUrl || "https://api.firecrawl.dev";
if (this.isCloudService(baseUrl) && typeof apiKey !== "string") {
throw new FirecrawlErro