@mendable/firecrawl-js
Version:
JavaScript SDK for Firecrawl API
1,450 lines (1,429 loc) • 125 kB
JavaScript
"use strict";
var __create = Object.create;
var __defProp = Object.defineProperty;
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
var __getOwnPropNames = Object.getOwnPropertyNames;
var __getProtoOf = Object.getPrototypeOf;
var __hasOwnProp = Object.prototype.hasOwnProperty;
var __commonJS = (cb, mod) => function __require() {
return mod || (0, cb[__getOwnPropNames(cb)[0]])((mod = { exports: {} }).exports, mod), mod.exports;
};
var __export = (target, all) => {
for (var name in all)
__defProp(target, name, { get: all[name], enumerable: true });
};
var __copyProps = (to, from, except, desc) => {
if (from && typeof from === "object" || typeof from === "function") {
for (let key of __getOwnPropNames(from))
if (!__hasOwnProp.call(to, key) && key !== except)
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
}
return to;
};
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
// If the importer is in node compatibility mode or this is not an ESM
// file that has been converted to a CommonJS file using a Babel-
// compatible transform (i.e. "__esModule" has not been set), then set
// "default" to the CommonJS "module.exports" for node compatibility.
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
mod
));
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
// package.json
var require_package = __commonJS({
"package.json"(exports2, module2) {
module2.exports = {
name: "@mendable/firecrawl-js",
version: "4.24.2",
description: "JavaScript SDK for Firecrawl API",
main: "dist/index.js",
types: "dist/index.d.ts",
exports: {
"./package.json": "./package.json",
".": {
import: "./dist/index.js",
default: "./dist/index.cjs"
}
},
type: "module",
scripts: {
build: "tsup",
"build-and-publish": "pnpm run build && pnpm publish --access public",
"publish-beta": "pnpm run build && pnpm publish --access public --tag beta",
test: "NODE_OPTIONS=--experimental-vm-modules jest --verbose src/__tests__/e2e/v2/*.test.ts --detectOpenHandles",
"test:unit": "NODE_OPTIONS=--experimental-vm-modules jest --verbose src/__tests__/unit/v2/*.test.ts"
},
repository: {
type: "git",
url: "git+https://github.com/firecrawl/firecrawl.git"
},
author: "Mendable.ai",
license: "MIT",
dependencies: {
axios: "1.15.2",
firecrawl: "4.16.0",
"typescript-event-target": "^1.1.1",
zod: "^3.23.8",
"zod-to-json-schema": "^3.23.0"
},
bugs: {
url: "https://github.com/firecrawl/firecrawl/issues"
},
homepage: "https://github.com/firecrawl/firecrawl#readme",
devDependencies: {
"@jest/globals": "^30.2.0",
"@types/dotenv": "^8.2.0",
"@types/jest": "^30.0.0",
"@types/mocha": "^10.0.6",
"@types/node": "^20.12.12",
"@types/uuid": "^9.0.8",
dotenv: "^16.4.5",
jest: "^30.2.0",
"ts-jest": "^29.4.5",
tsup: "^8.5.0",
typescript: "^5.4.5",
uuid: "^9.0.1"
},
keywords: [
"firecrawl",
"mendable",
"crawler",
"web",
"scraper",
"api",
"sdk"
],
engines: {
node: ">=22.0.0"
},
pnpm: {
overrides: {
"@isaacs/brace-expansion@<=5.0.0": ">=5.0.1",
"minimatch@<10.2.3": ">=10.2.3",
"rollup@<4.59.0": ">=4.59.0",
"picomatch@<4.0.4": ">=4.0.4",
handlebars: ">=4.7.9",
"brace-expansion": ">=5.0.6",
"axios@<1.15.2": "1.15.2",
"follow-redirects@<1.16.0": ">=1.16.0 <2.0.0"
}
}
};
}
});
// src/index.ts
var index_exports = {};
__export(index_exports, {
Firecrawl: () => Firecrawl,
FirecrawlAppV1: () => FirecrawlApp,
FirecrawlClient: () => FirecrawlClient,
JobTimeoutError: () => JobTimeoutError,
SdkError: () => SdkError,
Watcher: () => Watcher,
default: () => index_default
});
module.exports = __toCommonJS(index_exports);
// src/v2/utils/httpClient.ts
var import_axios = __toESM(require("axios"), 1);
// src/v2/utils/getVersion.ts
function getVersion() {
try {
if (typeof process !== "undefined" && process.env && process.env.npm_package_version) {
return process.env.npm_package_version;
}
const pkg = require_package();
return pkg?.version || "3.x.x";
} catch {
return "3.x.x";
}
}
// src/v2/utils/httpClient.ts
var HttpClient = class {
instance;
apiKey;
apiUrl;
maxRetries;
backoffFactor;
constructor(options) {
this.apiKey = options.apiKey;
this.apiUrl = options.apiUrl.replace(/\/$/, "");
this.maxRetries = options.maxRetries ?? 3;
this.backoffFactor = options.backoffFactor ?? 0.5;
this.instance = import_axios.default.create({
baseURL: this.apiUrl,
timeout: options.timeoutMs ?? 3e5,
headers: {
Authorization: `Bearer ${this.apiKey}`
},
transitional: { clarifyTimeoutError: true }
});
}
getApiUrl() {
return this.apiUrl;
}
getApiKey() {
return this.apiKey;
}
async request(config) {
const version = getVersion();
config.headers = {
...config.headers || {}
};
let lastError;
for (let attempt = 0; attempt < this.maxRetries; attempt++) {
try {
const cfg = { ...config };
const isFormDataBody = typeof FormData !== "undefined" && cfg.data instanceof FormData;
const isPlainObjectBody = !isFormDataBody && cfg.data != null && typeof cfg.data === "object" && !Array.isArray(cfg.data);
if (isPlainObjectBody && cfg.method && ["post", "put", "patch"].includes(cfg.method.toLowerCase())) {
const data = cfg.data ?? {};
cfg.data = {
...data,
origin: typeof data.origin === "string" && data.origin.includes("mcp") ? data.origin : `js-sdk@${version}`
};
}
if (isFormDataBody) {
cfg.headers = { ...cfg.headers || {} };
delete cfg.headers["Content-Type"];
delete cfg.headers["content-type"];
}
const res = await this.instance.request(cfg);
if (res.status === 502 && attempt < this.maxRetries - 1) {
await this.sleep(this.backoffFactor * Math.pow(2, attempt));
continue;
}
return res;
} catch (err) {
lastError = err;
const status = err?.response?.status;
if (status === 502 && attempt < this.maxRetries - 1) {
await this.sleep(this.backoffFactor * Math.pow(2, attempt));
continue;
}
throw err;
}
}
throw lastError ?? new Error("Unexpected HTTP client error");
}
sleep(seconds) {
return new Promise((r) => setTimeout(r, seconds * 1e3));
}
post(endpoint, body, options) {
return this.request({
method: "post",
url: endpoint,
data: body,
headers: options?.headers,
timeout: options?.timeoutMs
});
}
postMultipart(endpoint, formData, options) {
return this.request({
method: "post",
url: endpoint,
data: formData,
headers: options?.headers,
timeout: options?.timeoutMs
});
}
get(endpoint, headers) {
return this.request({ method: "get", url: endpoint, headers });
}
delete(endpoint, headers) {
return this.request({ method: "delete", url: endpoint, headers });
}
patch(endpoint, body, options) {
return this.request({
method: "patch",
url: endpoint,
data: body,
headers: options?.headers,
timeout: options?.timeoutMs
});
}
prepareHeaders(idempotencyKey) {
const headers = {};
if (idempotencyKey) headers["x-idempotency-key"] = idempotencyKey;
return headers;
}
};
// src/v2/types.ts
var SdkError = class extends Error {
status;
code;
details;
jobId;
constructor(message, status, code, details, jobId) {
super(message);
this.name = "FirecrawlSdkError";
this.status = status;
this.code = code;
this.details = details;
this.jobId = jobId;
}
};
var JobTimeoutError = class extends SdkError {
timeoutSeconds;
constructor(jobId, timeoutSeconds, jobType = "batch") {
const jobTypeLabel = jobType === "batch" ? "batch scrape" : "crawl";
super(
`${jobTypeLabel.charAt(0).toUpperCase() + jobTypeLabel.slice(1)} job ${jobId} did not complete within ${timeoutSeconds} seconds`,
void 0,
"JOB_TIMEOUT",
void 0,
jobId
);
this.name = "JobTimeoutError";
this.timeoutSeconds = timeoutSeconds;
}
};
// src/utils/zodSchemaToJson.ts
var import_zod_to_json_schema = require("zod-to-json-schema");
function isZodSchema(value) {
if (!value || typeof value !== "object") return false;
const schema = value;
const hasV3Markers = "_def" in schema && (typeof schema.safeParse === "function" || typeof schema.parse === "function");
const hasV4Markers = "_zod" in schema && typeof schema._zod === "object";
return hasV3Markers || hasV4Markers;
}
function isZodV4Schema(schema) {
if (!schema || typeof schema !== "object") return false;
return "_zod" in schema && typeof schema._zod === "object";
}
function tryZodV4Conversion(schema) {
if (!isZodV4Schema(schema)) return null;
try {
const zodModule = schema.constructor?.prototype?.constructor;
if (zodModule && typeof zodModule.toJSONSchema === "function") {
return zodModule.toJSONSchema(schema);
}
} catch {
}
return null;
}
function zodSchemaToJsonSchema(schema) {
if (!isZodSchema(schema)) {
return schema;
}
const v4Result = tryZodV4Conversion(schema);
if (v4Result) {
return v4Result;
}
try {
return (0, import_zod_to_json_schema.zodToJsonSchema)(schema);
} catch {
return schema;
}
}
function looksLikeZodShape(obj) {
if (!obj || typeof obj !== "object" || Array.isArray(obj)) return false;
const values = Object.values(obj);
if (values.length === 0) return false;
return values.some(
(v) => v && typeof v === "object" && v._def && typeof v.safeParse === "function"
);
}
// src/v2/utils/validation.ts
function ensureValidFormats(formats) {
if (!formats) return;
for (const fmt of formats) {
if (typeof fmt === "string") {
if (fmt === "json") {
throw new Error("json format must be an object with { type: 'json', prompt, schema }");
}
continue;
}
if (fmt.type === "json") {
const j = fmt;
if (!j.prompt && !j.schema) {
throw new Error("json format requires either 'prompt' or 'schema' (or both)");
}
const maybeSchema = j.schema;
if (isZodSchema(maybeSchema)) {
j.schema = zodSchemaToJsonSchema(maybeSchema);
} else if (looksLikeZodShape(maybeSchema)) {
throw new Error(
"json format schema appears to be a Zod schema's .shape property. Pass the Zod schema directly (e.g., `schema: MySchema`) instead of `schema: MySchema.shape`. The SDK will automatically convert Zod schemas to JSON Schema format."
);
}
continue;
}
if (fmt.type === "changeTracking") {
const ct = fmt;
const maybeSchema = ct.schema;
if (isZodSchema(maybeSchema)) {
ct.schema = zodSchemaToJsonSchema(maybeSchema);
} else if (looksLikeZodShape(maybeSchema)) {
throw new Error(
"changeTracking format schema appears to be a Zod schema's .shape property. Pass the Zod schema directly (e.g., `schema: MySchema`) instead of `schema: MySchema.shape`. The SDK will automatically convert Zod schemas to JSON Schema format."
);
}
continue;
}
if (fmt.type === "question") {
const q = fmt;
if (typeof q.question !== "string" || q.question.trim().length === 0) {
throw new Error("question format requires a non-empty 'question' string");
}
continue;
}
if (fmt.type === "highlights") {
const h = fmt;
if (typeof h.query !== "string" || h.query.trim().length === 0) {
throw new Error("highlights format requires a non-empty 'query' string");
}
continue;
}
if (fmt.type === "query") {
const q = fmt;
if (typeof q.prompt !== "string" || q.prompt.trim().length === 0) {
throw new Error("query format requires a non-empty 'prompt' string");
}
if (q.mode != null && q.mode !== "freeform" && q.mode !== "directQuote") {
throw new Error("query format mode must be 'freeform' or 'directQuote'");
}
continue;
}
if (fmt.type === "screenshot") {
const s = fmt;
if (s.quality != null && (typeof s.quality !== "number" || s.quality < 0)) {
throw new Error("screenshot.quality must be a non-negative number");
}
}
}
}
function ensureValidScrapeOptions(options) {
if (!options) return;
if (options.timeout != null && options.timeout <= 0) {
throw new Error("timeout must be positive");
}
if (options.waitFor != null && options.waitFor < 0) {
throw new Error("waitFor must be non-negative");
}
ensureValidFormats(options.formats);
}
function ensureValidParseFormats(formats) {
if (!formats) return;
for (const fmt of formats) {
if (typeof fmt === "string") {
if (fmt === "json") {
throw new Error("json format must be an object with { type: 'json', prompt, schema }");
}
if (fmt === "screenshot") {
throw new Error("parse does not support screenshot format");
}
if (fmt === "changeTracking") {
throw new Error("parse does not support changeTracking format");
}
if (fmt === "branding") {
throw new Error("parse does not support branding format");
}
if (fmt === "audio" || fmt === "video") {
throw new Error(`parse does not support ${fmt} format`);
}
continue;
}
const type = fmt.type;
if (type === "changeTracking") {
throw new Error("parse does not support changeTracking format");
}
if (type === "screenshot") {
throw new Error("parse does not support screenshot format");
}
if (type === "branding") {
throw new Error("parse does not support branding format");
}
if (type === "audio" || type === "video") {
throw new Error(`parse does not support ${type} format`);
}
if (fmt.type === "json") {
const j = fmt;
if (!j.prompt && !j.schema) {
throw new Error("json format requires either 'prompt' or 'schema' (or both)");
}
const maybeSchema = j.schema;
if (isZodSchema(maybeSchema)) {
j.schema = zodSchemaToJsonSchema(maybeSchema);
} else if (looksLikeZodShape(maybeSchema)) {
throw new Error(
"json format schema appears to be a Zod schema's .shape property. Pass the Zod schema directly (e.g., `schema: MySchema`) instead of `schema: MySchema.shape`. The SDK will automatically convert Zod schemas to JSON Schema format."
);
}
continue;
}
if (fmt.type === "question") {
const q = fmt;
if (typeof q.question !== "string" || q.question.trim().length === 0) {
throw new Error("question format requires a non-empty 'question' string");
}
continue;
}
if (fmt.type === "highlights") {
const h = fmt;
if (typeof h.query !== "string" || h.query.trim().length === 0) {
throw new Error("highlights format requires a non-empty 'query' string");
}
continue;
}
if (fmt.type === "query") {
const q = fmt;
if (typeof q.prompt !== "string" || q.prompt.trim().length === 0) {
throw new Error("query format requires a non-empty 'prompt' string");
}
if (q.mode != null && q.mode !== "freeform" && q.mode !== "directQuote") {
throw new Error("query format mode must be 'freeform' or 'directQuote'");
}
}
}
}
function ensureValidParseOptions(options) {
if (!options) return;
if (options.timeout != null && options.timeout <= 0) {
throw new Error("timeout must be positive");
}
const raw = options;
if (raw.waitFor !== void 0) {
throw new Error("parse does not support waitFor");
}
if (raw.actions !== void 0) {
throw new Error("parse does not support actions");
}
if (raw.location !== void 0) {
throw new Error("parse does not support location overrides");
}
if (raw.mobile !== void 0) {
throw new Error("parse does not support mobile rendering");
}
if (raw.maxAge !== void 0 || raw.minAge !== void 0 || raw.storeInCache !== void 0 || raw.lockdown !== void 0) {
throw new Error("parse does not support cache/index options");
}
if (raw.proxy !== void 0 && raw.proxy !== "basic" && raw.proxy !== "auto") {
throw new Error("parse only supports proxy values of 'basic' or 'auto'");
}
ensureValidParseFormats(options.formats);
}
// src/v2/utils/errorHandler.ts
var import_axios2 = require("axios");
function throwForBadResponse(resp, action) {
const status = resp.status;
const body = resp.data || {};
const msg = body?.error || body?.message || `Request failed (${status}) while trying to ${action}`;
throw new SdkError(msg, status, void 0, body?.details);
}
function normalizeAxiosError(err, action) {
const status = err.response?.status;
const body = err.response?.data;
const message = body?.error || err.message || `Request failed${status ? ` (${status})` : ""} while trying to ${action}`;
const code = body?.code || err.code;
throw new SdkError(message, status, code, body?.details ?? body);
}
function isRetryableError(err) {
if (err instanceof JobTimeoutError) {
return false;
}
if (err instanceof SdkError || err && typeof err === "object" && "status" in err) {
const status = err.status;
if (status && status >= 400 && status < 500) {
return false;
}
if (status && status >= 500) {
return true;
}
}
if (err?.isAxiosError && !err.response) {
return true;
}
if (err?.code === "ECONNABORTED" || err?.message?.includes("timeout")) {
return true;
}
return true;
}
// src/v2/methods/scrape.ts
async function scrape(http, url, options) {
if (!url || !url.trim()) {
throw new Error("URL cannot be empty");
}
if (options) ensureValidScrapeOptions(options);
const payload = { url: url.trim() };
if (options) Object.assign(payload, options);
try {
const res = await http.post(
"/v2/scrape",
payload,
typeof options?.timeout === "number" ? { timeoutMs: options.timeout + 5e3 } : {}
);
if (res.status !== 200 || !res.data?.success) {
throwForBadResponse(res, "scrape");
}
return res.data.data || {};
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "scrape");
throw err;
}
}
async function interact(http, jobId, args) {
if (!jobId || !jobId.trim()) {
throw new Error("Job ID cannot be empty");
}
const hasCode = args?.code && args.code.trim();
const hasPrompt = args?.prompt && args.prompt.trim();
if (!hasCode && !hasPrompt) {
throw new Error("Either 'code' or 'prompt' must be provided");
}
const body = {};
if (hasCode) body.code = args.code;
if (hasPrompt) body.prompt = args.prompt;
body.language = args.language ?? "node";
if (args.timeout != null) body.timeout = args.timeout;
if (args.origin) body.origin = args.origin;
try {
const res = await http.post(
`/v2/scrape/${jobId}/interact`,
body,
args.timeout != null ? { timeoutMs: args.timeout * 1e3 + 5e3 } : {}
);
if (res.status !== 200)
throwForBadResponse(res, "interact with scrape browser");
return res.data;
} catch (err) {
if (err?.isAxiosError)
return normalizeAxiosError(err, "interact with scrape browser");
throw err;
}
}
async function stopInteraction(http, jobId) {
if (!jobId || !jobId.trim()) {
throw new Error("Job ID cannot be empty");
}
try {
const res = await http.delete(
`/v2/scrape/${jobId}/interact`
);
if (res.status !== 200) throwForBadResponse(res, "stop interaction");
return res.data;
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "stop interaction");
throw err;
}
}
// src/v2/methods/parse.ts
function toUploadBlob(input, contentType) {
if (typeof Blob !== "undefined" && input instanceof Blob) {
if (contentType && input.type !== contentType) {
return new Blob([input], { type: contentType });
}
return input;
}
if (typeof Buffer !== "undefined" && Buffer.isBuffer(input)) {
return new Blob([input], { type: contentType });
}
if (input instanceof ArrayBuffer) {
return new Blob([input], { type: contentType });
}
if (ArrayBuffer.isView(input)) {
return new Blob([input], { type: contentType });
}
if (typeof input === "string") {
return new Blob([input], {
type: contentType ?? "text/plain; charset=utf-8"
});
}
throw new Error("Unsupported parse file data type");
}
async function parse(http, file, options) {
if (!file || !file.filename || !file.filename.trim()) {
throw new Error("filename cannot be empty");
}
if (file.data == null) {
throw new Error("file data cannot be empty");
}
const blob = toUploadBlob(file.data, file.contentType);
if (blob.size === 0) {
throw new Error("file data cannot be empty");
}
if (options) ensureValidParseOptions(options);
const version = getVersion();
const normalizedOptions = {
...options ?? {},
origin: typeof options?.origin === "string" && options.origin.includes("mcp") ? options.origin : options?.origin ?? `js-sdk@${version}`
};
const formData = new FormData();
formData.append("options", JSON.stringify(normalizedOptions));
formData.append(
"file",
toUploadBlob(file.data, file.contentType),
file.filename.trim()
);
try {
const res = await http.postMultipart(
"/v2/parse",
formData,
typeof normalizedOptions.timeout === "number" ? { timeoutMs: normalizedOptions.timeout + 5e3 } : {}
);
if (res.status !== 200 || !res.data?.success) {
throwForBadResponse(res, "parse");
}
return res.data.data || {};
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "parse");
throw err;
}
}
// src/v2/methods/search.ts
function prepareSearchPayload(req) {
if (!req.query || !req.query.trim()) throw new Error("Query cannot be empty");
if (req.limit != null && req.limit <= 0)
throw new Error("limit must be positive");
if (req.timeout != null && req.timeout <= 0)
throw new Error("timeout must be positive");
if (req.includeDomains?.length && req.excludeDomains?.length)
throw new Error(
"includeDomains and excludeDomains cannot both be specified"
);
const payload = {
query: req.query
};
if (req.sources) payload.sources = req.sources;
if (req.categories) payload.categories = req.categories;
if (req.includeDomains) payload.includeDomains = req.includeDomains;
if (req.excludeDomains) payload.excludeDomains = req.excludeDomains;
if (req.limit != null) payload.limit = req.limit;
if (req.tbs != null) payload.tbs = req.tbs;
if (req.location != null) payload.location = req.location;
if (req.ignoreInvalidURLs != null)
payload.ignoreInvalidURLs = req.ignoreInvalidURLs;
if (req.timeout != null) payload.timeout = req.timeout;
if (req.integration && req.integration.trim())
payload.integration = req.integration.trim();
if (req.origin) payload.origin = req.origin;
if (req.scrapeOptions) {
ensureValidScrapeOptions(req.scrapeOptions);
payload.scrapeOptions = req.scrapeOptions;
}
return payload;
}
function transformArray(arr) {
const results = [];
for (const item of arr) {
if (item && typeof item === "object") {
if ("markdown" in item || "html" in item || "rawHtml" in item || "links" in item || "screenshot" in item || "changeTracking" in item || "summary" in item || "json" in item) {
results.push(item);
} else {
results.push(item);
}
} else {
results.push({ url: item });
}
}
return results;
}
async function search(http, request) {
const payload = prepareSearchPayload(request);
try {
const res = await http.post(
"/v2/search",
payload,
typeof request.timeout === "number" ? { timeoutMs: request.timeout + 5e3 } : {}
);
if (res.status !== 200 || !res.data?.success) {
throwForBadResponse(res, "search");
}
const data = res.data.data || {};
const out = {};
if (data.web) out.web = transformArray(data.web);
if (data.news) out.news = transformArray(data.news);
if (data.images)
out.images = transformArray(data.images);
return out;
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "search");
throw err;
}
}
// src/v2/methods/map.ts
function prepareMapPayload(url, options) {
if (!url || !url.trim()) throw new Error("URL cannot be empty");
const payload = { url: url.trim() };
if (options) {
if (options.sitemap != null) payload.sitemap = options.sitemap;
if (options.search != null) payload.search = options.search;
if (options.includeSubdomains != null)
payload.includeSubdomains = options.includeSubdomains;
if (options.ignoreQueryParameters != null)
payload.ignoreQueryParameters = options.ignoreQueryParameters;
if (options.limit != null) payload.limit = options.limit;
if (options.timeout != null) payload.timeout = options.timeout;
if (options.integration != null && options.integration.trim())
payload.integration = options.integration.trim();
if (options.origin) payload.origin = options.origin;
if (options.location != null) payload.location = options.location;
}
return payload;
}
async function map(http, url, options) {
const payload = prepareMapPayload(url, options);
try {
const res = await http.post(
"/v2/map",
payload,
typeof options?.timeout === "number" ? { timeoutMs: options.timeout + 5e3 } : {}
);
if (res.status !== 200 || !res.data?.success) {
throwForBadResponse(res, "map");
}
const linksIn = res.data.links || [];
const links = [];
for (const item of linksIn) {
if (typeof item === "string") links.push({ url: item });
else if (item && typeof item === "object")
links.push({
url: item.url,
title: item.title,
description: item.description
});
}
return { links };
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "map");
throw err;
}
}
// src/v2/utils/pagination.ts
async function fetchAllPages(http, nextUrl, initial, pagination) {
const docs = initial.slice();
let current = nextUrl;
let pageCount = 0;
const maxPages = pagination?.maxPages ?? void 0;
const maxResults = pagination?.maxResults ?? void 0;
const maxWaitTime = pagination?.maxWaitTime ?? void 0;
const started = Date.now();
while (current) {
if (maxPages != null && pageCount >= maxPages) break;
if (maxWaitTime != null && (Date.now() - started) / 1e3 > maxWaitTime) break;
let payload = null;
try {
const res = await http.get(current);
payload = res.data;
} catch {
break;
}
if (!payload?.success) break;
const pageData = Array.isArray(payload.data) ? payload.data : payload.data?.pages || [];
for (const d of pageData) {
if (maxResults != null && docs.length >= maxResults) break;
docs.push(d);
}
if (maxResults != null && docs.length >= maxResults) break;
current = payload.next ?? (Array.isArray(payload.data) ? null : payload.data?.next) ?? null;
pageCount += 1;
}
return docs;
}
// src/v2/methods/crawl.ts
function prepareCrawlPayload(request) {
if (!request.url || !request.url.trim()) throw new Error("URL cannot be empty");
const data = { url: request.url.trim() };
if (request.prompt) data.prompt = request.prompt;
if (request.excludePaths) data.excludePaths = request.excludePaths;
if (request.includePaths) data.includePaths = request.includePaths;
if (request.maxDiscoveryDepth != null) data.maxDiscoveryDepth = request.maxDiscoveryDepth;
if (request.sitemap != null) data.sitemap = request.sitemap;
if (request.robotsUserAgent != null) data.robotsUserAgent = request.robotsUserAgent;
if (request.ignoreQueryParameters != null) data.ignoreQueryParameters = request.ignoreQueryParameters;
if (request.deduplicateSimilarURLs != null) data.deduplicateSimilarURLs = request.deduplicateSimilarURLs;
if (request.limit != null) data.limit = request.limit;
if (request.crawlEntireDomain != null) data.crawlEntireDomain = request.crawlEntireDomain;
if (request.allowExternalLinks != null) data.allowExternalLinks = request.allowExternalLinks;
if (request.allowSubdomains != null) data.allowSubdomains = request.allowSubdomains;
if (request.delay != null) data.delay = request.delay;
if (request.maxConcurrency != null) data.maxConcurrency = request.maxConcurrency;
if (request.regexOnFullURL != null) data.regexOnFullURL = request.regexOnFullURL;
if (request.webhook != null) data.webhook = request.webhook;
if (request.integration != null && request.integration.trim()) data.integration = request.integration.trim();
if (request.origin) data.origin = request.origin;
if (request.scrapeOptions) {
ensureValidScrapeOptions(request.scrapeOptions);
data.scrapeOptions = request.scrapeOptions;
}
if (request.zeroDataRetention != null) data.zeroDataRetention = request.zeroDataRetention;
return data;
}
async function startCrawl(http, request) {
const payload = prepareCrawlPayload(request);
try {
const res = await http.post("/v2/crawl", payload);
if (res.status !== 200 || !res.data?.success) {
throwForBadResponse(res, "start crawl");
}
return { id: res.data.id, url: res.data.url };
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "start crawl");
throw err;
}
}
async function getCrawlStatus(http, jobId, pagination) {
try {
const res = await http.get(`/v2/crawl/${jobId}`);
if (res.status !== 200 || !res.data?.success) {
throwForBadResponse(res, "get crawl status");
}
const body = res.data;
const initialDocs = body.data || [];
const auto = pagination?.autoPaginate ?? true;
if (!auto || !body.next) {
return {
id: jobId,
status: body.status,
completed: body.completed ?? 0,
total: body.total ?? 0,
creditsUsed: body.creditsUsed,
expiresAt: body.expiresAt,
next: body.next ?? null,
data: initialDocs
};
}
const aggregated = await fetchAllPages(http, body.next, initialDocs, pagination);
return {
id: jobId,
status: body.status,
completed: body.completed ?? 0,
total: body.total ?? 0,
creditsUsed: body.creditsUsed,
expiresAt: body.expiresAt,
next: null,
data: aggregated
};
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "get crawl status");
throw err;
}
}
async function cancelCrawl(http, jobId) {
try {
const res = await http.delete(`/v2/crawl/${jobId}`);
if (res.status !== 200) throwForBadResponse(res, "cancel crawl");
return res.data?.status === "cancelled";
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "cancel crawl");
throw err;
}
}
async function waitForCrawlCompletion(http, jobId, pollInterval = 2, timeout) {
const start = Date.now();
while (true) {
try {
const status = await getCrawlStatus(http, jobId);
if (["completed", "failed", "cancelled"].includes(status.status)) {
return status;
}
} catch (err) {
if (!isRetryableError(err)) {
if (err instanceof SdkError) {
const errorWithJobId = new SdkError(
err.message,
err.status,
err.code,
err.details,
jobId
);
throw errorWithJobId;
}
throw err;
}
}
if (timeout != null && Date.now() - start > timeout * 1e3) {
throw new JobTimeoutError(jobId, timeout, "crawl");
}
await new Promise((r) => setTimeout(r, Math.max(1e3, pollInterval * 1e3)));
}
}
async function crawl(http, request, pollInterval = 2, timeout) {
const started = await startCrawl(http, request);
return waitForCrawlCompletion(http, started.id, pollInterval, timeout);
}
async function getCrawlErrors(http, crawlId) {
try {
const res = await http.get(`/v2/crawl/${crawlId}/errors`);
if (res.status !== 200) throwForBadResponse(res, "get crawl errors");
const payload = res.data?.data ?? res.data;
return { errors: payload.errors || [], robotsBlocked: payload.robotsBlocked || [] };
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "get crawl errors");
throw err;
}
}
async function getActiveCrawls(http) {
try {
const res = await http.get(`/v2/crawl/active`);
if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "get active crawls");
const crawlsIn = res.data?.crawls || [];
const crawls = crawlsIn.map((c) => ({ id: c.id, teamId: c.teamId ?? c.team_id, url: c.url, options: c.options ?? null }));
return { success: true, crawls };
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "get active crawls");
throw err;
}
}
async function crawlParamsPreview(http, url, prompt) {
if (!url || !url.trim()) throw new Error("URL cannot be empty");
if (!prompt || !prompt.trim()) throw new Error("Prompt cannot be empty");
try {
const res = await http.post("/v2/crawl/params-preview", { url: url.trim(), prompt });
if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "crawl params preview");
const data = res.data.data || {};
if (res.data.warning) data.warning = res.data.warning;
return data;
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "crawl params preview");
throw err;
}
}
// src/v2/methods/batch.ts
async function startBatchScrape(http, urls, {
options,
webhook,
appendToId,
ignoreInvalidURLs,
maxConcurrency,
zeroDataRetention,
idempotencyKey,
integration,
origin
} = {}) {
if (!Array.isArray(urls) || urls.length === 0)
throw new Error("URLs list cannot be empty");
const payload = { urls };
if (options) {
ensureValidScrapeOptions(options);
Object.assign(payload, options);
}
if (webhook != null) payload.webhook = webhook;
if (appendToId != null) payload.appendToId = appendToId;
if (ignoreInvalidURLs != null) payload.ignoreInvalidURLs = ignoreInvalidURLs;
if (maxConcurrency != null) payload.maxConcurrency = maxConcurrency;
if (zeroDataRetention != null) payload.zeroDataRetention = zeroDataRetention;
if (integration != null && integration.trim())
payload.integration = integration.trim();
if (origin) payload.origin = origin;
try {
const headers = http.prepareHeaders(idempotencyKey);
const res = await http.post("/v2/batch/scrape", payload, { headers });
if (res.status !== 200 || !res.data?.success)
throwForBadResponse(res, "start batch scrape");
return {
id: res.data.id,
url: res.data.url,
invalidURLs: res.data.invalidURLs || void 0
};
} catch (err) {
if (err?.isAxiosError)
return normalizeAxiosError(err, "start batch scrape");
throw err;
}
}
async function getBatchScrapeStatus(http, jobId, pagination) {
try {
const res = await http.get(`/v2/batch/scrape/${jobId}`);
if (res.status !== 200 || !res.data?.success)
throwForBadResponse(res, "get batch scrape status");
const body = res.data;
const initialDocs = body.data || [];
const auto = pagination?.autoPaginate ?? true;
if (!auto || !body.next) {
return {
id: jobId,
status: body.status,
completed: body.completed ?? 0,
total: body.total ?? 0,
creditsUsed: body.creditsUsed,
expiresAt: body.expiresAt,
next: body.next ?? null,
data: initialDocs
};
}
const aggregated = await fetchAllPages(
http,
body.next,
initialDocs,
pagination
);
return {
id: jobId,
status: body.status,
completed: body.completed ?? 0,
total: body.total ?? 0,
creditsUsed: body.creditsUsed,
expiresAt: body.expiresAt,
next: null,
data: aggregated
};
} catch (err) {
if (err?.isAxiosError)
return normalizeAxiosError(err, "get batch scrape status");
throw err;
}
}
async function cancelBatchScrape(http, jobId) {
try {
const res = await http.delete(
`/v2/batch/scrape/${jobId}`
);
if (res.status !== 200) throwForBadResponse(res, "cancel batch scrape");
return res.data?.status === "cancelled";
} catch (err) {
if (err?.isAxiosError)
return normalizeAxiosError(err, "cancel batch scrape");
throw err;
}
}
async function getBatchScrapeErrors(http, jobId) {
try {
const res = await http.get(`/v2/batch/scrape/${jobId}/errors`);
if (res.status !== 200) throwForBadResponse(res, "get batch scrape errors");
const payload = res.data?.data ?? res.data;
return {
errors: payload.errors || [],
robotsBlocked: payload.robotsBlocked || []
};
} catch (err) {
if (err?.isAxiosError)
return normalizeAxiosError(err, "get batch scrape errors");
throw err;
}
}
async function waitForBatchCompletion(http, jobId, pollInterval = 2, timeout) {
const start = Date.now();
while (true) {
try {
const status = await getBatchScrapeStatus(http, jobId);
if (["completed", "failed", "cancelled"].includes(status.status)) {
return status;
}
} catch (err) {
if (!isRetryableError(err)) {
if (err instanceof SdkError) {
const errorWithJobId = new SdkError(
err.message,
err.status,
err.code,
err.details,
jobId
);
throw errorWithJobId;
}
throw err;
}
}
if (timeout != null && Date.now() - start > timeout * 1e3) {
throw new JobTimeoutError(jobId, timeout, "batch");
}
await new Promise((r) => setTimeout(r, Math.max(1e3, pollInterval * 1e3)));
}
}
async function batchScrape(http, urls, opts = {}) {
const start = await startBatchScrape(http, urls, opts);
return waitForBatchCompletion(
http,
start.id,
opts.pollInterval ?? 2,
opts.timeout
);
}
// src/v2/methods/extract.ts
function prepareExtractPayload(args) {
const body = {};
if (args.urls) body.urls = args.urls;
if (args.prompt != null) body.prompt = args.prompt;
if (args.schema != null) {
body.schema = isZodSchema(args.schema) ? zodSchemaToJsonSchema(args.schema) : args.schema;
}
if (args.systemPrompt != null) body.systemPrompt = args.systemPrompt;
if (args.allowExternalLinks != null) body.allowExternalLinks = args.allowExternalLinks;
if (args.enableWebSearch != null) body.enableWebSearch = args.enableWebSearch;
if (args.showSources != null) body.showSources = args.showSources;
if (args.ignoreInvalidURLs != null) body.ignoreInvalidURLs = args.ignoreInvalidURLs;
if (args.integration && args.integration.trim()) body.integration = args.integration.trim();
if (args.origin) body.origin = args.origin;
if (args.agent) body.agent = args.agent;
if (args.scrapeOptions) {
ensureValidScrapeOptions(args.scrapeOptions);
body.scrapeOptions = args.scrapeOptions;
}
return body;
}
async function startExtract(http, args) {
const payload = prepareExtractPayload(args);
try {
const res = await http.post("/v2/extract", payload);
if (res.status !== 200) throwForBadResponse(res, "extract");
return res.data;
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "extract");
throw err;
}
}
async function getExtractStatus(http, jobId) {
try {
const res = await http.get(`/v2/extract/${jobId}`);
if (res.status !== 200) throwForBadResponse(res, "extract status");
return res.data;
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "extract status");
throw err;
}
}
async function waitExtract(http, jobId, pollInterval = 2, timeout) {
const start = Date.now();
while (true) {
const status = await getExtractStatus(http, jobId);
if (["completed", "failed", "cancelled"].includes(status.status || "")) return status;
if (timeout != null && Date.now() - start > timeout * 1e3) return status;
await new Promise((r) => setTimeout(r, Math.max(1e3, pollInterval * 1e3)));
}
}
async function extract(http, args) {
const started = await startExtract(http, args);
const jobId = started.id;
if (!jobId) return started;
return waitExtract(http, jobId, args.pollInterval ?? 2, args.timeout);
}
// src/v2/methods/agent.ts
function prepareAgentPayload(args) {
const body = {};
if (args.urls) body.urls = args.urls;
body.prompt = args.prompt;
if (args.schema != null) {
body.schema = isZodSchema(args.schema) ? zodSchemaToJsonSchema(args.schema) : args.schema;
}
if (args.integration && args.integration.trim()) body.integration = args.integration.trim();
if (args.origin) body.origin = args.origin;
if (args.maxCredits !== null && args.maxCredits !== void 0) body.maxCredits = args.maxCredits;
if (args.strictConstrainToURLs !== null && args.strictConstrainToURLs !== void 0) body.strictConstrainToURLs = args.strictConstrainToURLs;
if (args.model !== null && args.model !== void 0) body.model = args.model;
if (args.webhook != null) body.webhook = args.webhook;
return body;
}
async function startAgent(http, args) {
const payload = prepareAgentPayload(args);
try {
const res = await http.post("/v2/agent", payload);
if (res.status !== 200) throwForBadResponse(res, "agent");
return res.data;
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "agent");
throw err;
}
}
async function getAgentStatus(http, jobId) {
try {
const res = await http.get(`/v2/agent/${jobId}`);
if (res.status !== 200) throwForBadResponse(res, "agent status");
return res.data;
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "agent status");
throw err;
}
}
async function waitAgent(http, jobId, pollInterval = 2, timeout) {
const start = Date.now();
while (true) {
const status = await getAgentStatus(http, jobId);
if (["completed", "failed", "cancelled"].includes(status.status || "")) return status;
if (timeout != null && Date.now() - start > timeout * 1e3) return status;
await new Promise((r) => setTimeout(r, Math.max(1e3, pollInterval * 1e3)));
}
}
async function agent(http, args) {
const started = await startAgent(http, args);
const jobId = started.id;
if (!jobId) return started;
return waitAgent(http, jobId, args.pollInterval ?? 2, args.timeout);
}
async function cancelAgent(http, jobId) {
try {
const res = await http.delete(`/v2/agent/${jobId}`);
if (res.status !== 200) throwForBadResponse(res, "cancel agent");
return res.data?.success === true;
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "cancel agent");
throw err;
}
}
// src/v2/methods/browser.ts
async function browser(http, args = {}) {
const body = {};
if (args.ttl != null) body.ttl = args.ttl;
if (args.activityTtl != null) body.activityTtl = args.activityTtl;
if (args.streamWebView != null) body.streamWebView = args.streamWebView;
if (args.profile != null) body.profile = args.profile;
if (args.integration != null) body.integration = args.integration;
if (args.origin) body.origin = args.origin;
try {
const res = await http.post("/v2/browser", body);
if (res.status !== 200) throwForBadResponse(res, "create browser session");
return res.data;
} catch (err) {
if (err?.isAxiosError)
return normalizeAxiosError(err, "create browser session");
throw err;
}
}
async function browserExecute(http, sessionId, args) {
const body = {
code: args.code,
language: args.language ?? "bash"
};
if (args.timeout != null) body.timeout = args.timeout;
try {
const res = await http.post(
`/v2/browser/${sessionId}/execute`,
body,
args.timeout != null ? { timeoutMs: args.timeout * 1e3 + 5e3 } : {}
);
if (res.status !== 200) throwForBadResponse(res, "execute browser code");
return res.data;
} catch (err) {
if (err?.isAxiosError)
return normalizeAxiosError(err, "execute browser code");
throw err;
}
}
async function deleteBrowser(http, sessionId) {
try {
const res = await http.delete(
`/v2/browser/${sessionId}`
);
if (res.status !== 200) throwForBadResponse(res, "delete browser session");
return res.data;
} catch (err) {
if (err?.isAxiosError)
return normalizeAxiosError(err, "delete browser session");
throw err;
}
}
async function listBrowsers(http, args = {}) {
let endpoint = "/v2/browser";
if (args.status) endpoint += `?status=${args.status}`;
try {
const res = await http.get(endpoint);
if (res.status !== 200) throwForBadResponse(res, "list browser sessions");
return res.data;
} catch (err) {
if (err?.isAxiosError)
return normalizeAxiosError(err, "list browser sessions");
throw err;
}
}
// src/v2/methods/usage.ts
async function getConcurrency(http) {
try {
const res = await http.get("/v2/concurrency-check");
if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "get concurrency");
const d = res.data.data || res.data;
return { concurrency: d.concurrency, maxConcurrency: d.maxConcurrency ?? d.max_concurrency };
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "get concurrency");
throw err;
}
}
async function getCreditUsage(http) {
try {
const res = await http.get("/v2/team/credit-usage");
if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "get credit usage");
const d = res.data.data || res.data;
return {
remainingCredits: d.remainingCredits ?? d.remaining_credits ?? 0,
planCredits: d.planCredits ?? d.plan_credits,
billingPeriodStart: d.billingPeriodStart ?? d.billing_period_start ?? null,
billingPeriodEnd: d.billingPeriodEnd ?? d.billing_period_end ?? null
};
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "get credit usage");
throw err;
}
}
async function getTokenUsage(http) {
try {
const res = await http.get("/v2/team/token-usage");
if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "get token usage");
const d = res.data.data || res.data;
return {
remainingTokens: d.remainingTokens ?? d.remaining_tokens ?? 0,
planTokens: d.planTokens ?? d.plan_tokens,
billingPeriodStart: d.billingPeriodStart ?? d.billing_period_start ?? null,
billingPeriodEnd: d.billingPeriodEnd ?? d.billing_period_end ?? null
};
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "get token usage");
throw err;
}
}
async function getQueueStatus(http) {
try {
const res = await http.get("/v2/team/queue-status");
if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "get queue status");
return res.data;
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "get queue status");
throw err;
}
}
async function getCreditUsageHistorical(http, byApiKey) {
try {
const query = byApiKey ? "?byApiKey=true" : "";
const res = await http.get(`/v2/team/credit-usage/historical${query}`);
if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "get credit usage historical");
return res.data;
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "get credit usage historical");
throw err;
}
}
async function getTokenUsageHistorical(http, byApiKey) {
try {
const query = byApiKey ? "?byApiKey=true" : "";
const res = await http.get(`/v2/team/token-usage/historical${query}`);
if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "get token usage historical");
return res.data;
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "get token usage historical");
throw err;
}
}
// src/v2/methods/monitor.ts
function queryString(params) {
if (!params) return "";
const query = new URLSearchParams();
for (const [key, value] of Object.entries(params)) {
if (value !== void 0 && value !== null) query.set(key, String(value));
}
const str = query.toString();
return str ? `?${str}` : "";
}
function dataOrThrow(res, action) {
if (res.status !== 200 || !res.data?.success || res.data.data == null) {
throwForBadResponse(res, action);
}
return res.data.data;
}
async function createMonitor(http, request) {
try {
const res = await http.post("/v2/monitor", request);
return dataOrThrow(res, "create monitor");
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "create monitor");
throw err;
}
}
async function listMonitors(http, options) {
try {
const res = await http.get(
`/v2/monitor${queryString(options)}`
);
return dataOrThrow(res, "list monitors");
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "list monitors");
throw err;
}
}
asyn