@mendable/firecrawl-js
Version:
JavaScript SDK for Firecrawl API
1,476 lines (1,456 loc) • 121 kB
JavaScript
import {
require_package
} from "./chunk-BI4QF6CM.js";
// src/v2/utils/httpClient.ts
import axios from "axios";
// src/v2/utils/getVersion.ts
function getVersion() {
try {
if (typeof process !== "undefined" && process.env && process.env.npm_package_version) {
return process.env.npm_package_version;
}
const pkg = require_package();
return pkg?.version || "3.x.x";
} catch {
return "3.x.x";
}
}
// src/v2/utils/httpClient.ts
var HttpClient = class {
instance;
apiKey;
apiUrl;
maxRetries;
backoffFactor;
constructor(options) {
this.apiKey = options.apiKey;
this.apiUrl = options.apiUrl.replace(/\/$/, "");
this.maxRetries = options.maxRetries ?? 3;
this.backoffFactor = options.backoffFactor ?? 0.5;
this.instance = axios.create({
baseURL: this.apiUrl,
timeout: options.timeoutMs ?? 3e5,
headers: {
Authorization: `Bearer ${this.apiKey}`
},
transitional: { clarifyTimeoutError: true }
});
}
getApiUrl() {
return this.apiUrl;
}
getApiKey() {
return this.apiKey;
}
async request(config) {
const version = getVersion();
config.headers = {
...config.headers || {}
};
let lastError;
for (let attempt = 0; attempt < this.maxRetries; attempt++) {
try {
const cfg = { ...config };
const isFormDataBody = typeof FormData !== "undefined" && cfg.data instanceof FormData;
const isPlainObjectBody = !isFormDataBody && cfg.data != null && typeof cfg.data === "object" && !Array.isArray(cfg.data);
if (isPlainObjectBody && cfg.method && ["post", "put", "patch"].includes(cfg.method.toLowerCase())) {
const data = cfg.data ?? {};
cfg.data = {
...data,
origin: typeof data.origin === "string" && data.origin.includes("mcp") ? data.origin : `js-sdk@${version}`
};
}
if (isFormDataBody) {
cfg.headers = { ...cfg.headers || {} };
delete cfg.headers["Content-Type"];
delete cfg.headers["content-type"];
}
const res = await this.instance.request(cfg);
if (res.status === 502 && attempt < this.maxRetries - 1) {
await this.sleep(this.backoffFactor * Math.pow(2, attempt));
continue;
}
return res;
} catch (err) {
lastError = err;
const status = err?.response?.status;
if (status === 502 && attempt < this.maxRetries - 1) {
await this.sleep(this.backoffFactor * Math.pow(2, attempt));
continue;
}
throw err;
}
}
throw lastError ?? new Error("Unexpected HTTP client error");
}
sleep(seconds) {
return new Promise((r) => setTimeout(r, seconds * 1e3));
}
post(endpoint, body, options) {
return this.request({
method: "post",
url: endpoint,
data: body,
headers: options?.headers,
timeout: options?.timeoutMs
});
}
postMultipart(endpoint, formData, options) {
return this.request({
method: "post",
url: endpoint,
data: formData,
headers: options?.headers,
timeout: options?.timeoutMs
});
}
get(endpoint, headers) {
return this.request({ method: "get", url: endpoint, headers });
}
delete(endpoint, headers) {
return this.request({ method: "delete", url: endpoint, headers });
}
patch(endpoint, body, options) {
return this.request({
method: "patch",
url: endpoint,
data: body,
headers: options?.headers,
timeout: options?.timeoutMs
});
}
prepareHeaders(idempotencyKey) {
const headers = {};
if (idempotencyKey) headers["x-idempotency-key"] = idempotencyKey;
return headers;
}
};
// src/v2/types.ts
var SdkError = class extends Error {
status;
code;
details;
jobId;
constructor(message, status, code, details, jobId) {
super(message);
this.name = "FirecrawlSdkError";
this.status = status;
this.code = code;
this.details = details;
this.jobId = jobId;
}
};
var JobTimeoutError = class extends SdkError {
timeoutSeconds;
constructor(jobId, timeoutSeconds, jobType = "batch") {
const jobTypeLabel = jobType === "batch" ? "batch scrape" : "crawl";
super(
`${jobTypeLabel.charAt(0).toUpperCase() + jobTypeLabel.slice(1)} job ${jobId} did not complete within ${timeoutSeconds} seconds`,
void 0,
"JOB_TIMEOUT",
void 0,
jobId
);
this.name = "JobTimeoutError";
this.timeoutSeconds = timeoutSeconds;
}
};
// src/utils/zodSchemaToJson.ts
import { zodToJsonSchema as zodToJsonSchemaLib } from "zod-to-json-schema";
function isZodSchema(value) {
if (!value || typeof value !== "object") return false;
const schema = value;
const hasV3Markers = "_def" in schema && (typeof schema.safeParse === "function" || typeof schema.parse === "function");
const hasV4Markers = "_zod" in schema && typeof schema._zod === "object";
return hasV3Markers || hasV4Markers;
}
function isZodV4Schema(schema) {
if (!schema || typeof schema !== "object") return false;
return "_zod" in schema && typeof schema._zod === "object";
}
function tryZodV4Conversion(schema) {
if (!isZodV4Schema(schema)) return null;
try {
const zodModule = schema.constructor?.prototype?.constructor;
if (zodModule && typeof zodModule.toJSONSchema === "function") {
return zodModule.toJSONSchema(schema);
}
} catch {
}
return null;
}
function zodSchemaToJsonSchema(schema) {
if (!isZodSchema(schema)) {
return schema;
}
const v4Result = tryZodV4Conversion(schema);
if (v4Result) {
return v4Result;
}
try {
return zodToJsonSchemaLib(schema);
} catch {
return schema;
}
}
function looksLikeZodShape(obj) {
if (!obj || typeof obj !== "object" || Array.isArray(obj)) return false;
const values = Object.values(obj);
if (values.length === 0) return false;
return values.some(
(v) => v && typeof v === "object" && v._def && typeof v.safeParse === "function"
);
}
// src/v2/utils/validation.ts
function ensureValidFormats(formats) {
if (!formats) return;
for (const fmt of formats) {
if (typeof fmt === "string") {
if (fmt === "json") {
throw new Error("json format must be an object with { type: 'json', prompt, schema }");
}
continue;
}
if (fmt.type === "json") {
const j = fmt;
if (!j.prompt && !j.schema) {
throw new Error("json format requires either 'prompt' or 'schema' (or both)");
}
const maybeSchema = j.schema;
if (isZodSchema(maybeSchema)) {
j.schema = zodSchemaToJsonSchema(maybeSchema);
} else if (looksLikeZodShape(maybeSchema)) {
throw new Error(
"json format schema appears to be a Zod schema's .shape property. Pass the Zod schema directly (e.g., `schema: MySchema`) instead of `schema: MySchema.shape`. The SDK will automatically convert Zod schemas to JSON Schema format."
);
}
continue;
}
if (fmt.type === "changeTracking") {
const ct = fmt;
const maybeSchema = ct.schema;
if (isZodSchema(maybeSchema)) {
ct.schema = zodSchemaToJsonSchema(maybeSchema);
} else if (looksLikeZodShape(maybeSchema)) {
throw new Error(
"changeTracking format schema appears to be a Zod schema's .shape property. Pass the Zod schema directly (e.g., `schema: MySchema`) instead of `schema: MySchema.shape`. The SDK will automatically convert Zod schemas to JSON Schema format."
);
}
continue;
}
if (fmt.type === "question") {
const q = fmt;
if (typeof q.question !== "string" || q.question.trim().length === 0) {
throw new Error("question format requires a non-empty 'question' string");
}
continue;
}
if (fmt.type === "highlights") {
const h = fmt;
if (typeof h.query !== "string" || h.query.trim().length === 0) {
throw new Error("highlights format requires a non-empty 'query' string");
}
continue;
}
if (fmt.type === "query") {
const q = fmt;
if (typeof q.prompt !== "string" || q.prompt.trim().length === 0) {
throw new Error("query format requires a non-empty 'prompt' string");
}
if (q.mode != null && q.mode !== "freeform" && q.mode !== "directQuote") {
throw new Error("query format mode must be 'freeform' or 'directQuote'");
}
continue;
}
if (fmt.type === "screenshot") {
const s = fmt;
if (s.quality != null && (typeof s.quality !== "number" || s.quality < 0)) {
throw new Error("screenshot.quality must be a non-negative number");
}
}
}
}
function ensureValidScrapeOptions(options) {
if (!options) return;
if (options.timeout != null && options.timeout <= 0) {
throw new Error("timeout must be positive");
}
if (options.waitFor != null && options.waitFor < 0) {
throw new Error("waitFor must be non-negative");
}
ensureValidFormats(options.formats);
}
function ensureValidParseFormats(formats) {
if (!formats) return;
for (const fmt of formats) {
if (typeof fmt === "string") {
if (fmt === "json") {
throw new Error("json format must be an object with { type: 'json', prompt, schema }");
}
if (fmt === "screenshot") {
throw new Error("parse does not support screenshot format");
}
if (fmt === "changeTracking") {
throw new Error("parse does not support changeTracking format");
}
if (fmt === "branding") {
throw new Error("parse does not support branding format");
}
if (fmt === "audio" || fmt === "video") {
throw new Error(`parse does not support ${fmt} format`);
}
continue;
}
const type = fmt.type;
if (type === "changeTracking") {
throw new Error("parse does not support changeTracking format");
}
if (type === "screenshot") {
throw new Error("parse does not support screenshot format");
}
if (type === "branding") {
throw new Error("parse does not support branding format");
}
if (type === "audio" || type === "video") {
throw new Error(`parse does not support ${type} format`);
}
if (fmt.type === "json") {
const j = fmt;
if (!j.prompt && !j.schema) {
throw new Error("json format requires either 'prompt' or 'schema' (or both)");
}
const maybeSchema = j.schema;
if (isZodSchema(maybeSchema)) {
j.schema = zodSchemaToJsonSchema(maybeSchema);
} else if (looksLikeZodShape(maybeSchema)) {
throw new Error(
"json format schema appears to be a Zod schema's .shape property. Pass the Zod schema directly (e.g., `schema: MySchema`) instead of `schema: MySchema.shape`. The SDK will automatically convert Zod schemas to JSON Schema format."
);
}
continue;
}
if (fmt.type === "question") {
const q = fmt;
if (typeof q.question !== "string" || q.question.trim().length === 0) {
throw new Error("question format requires a non-empty 'question' string");
}
continue;
}
if (fmt.type === "highlights") {
const h = fmt;
if (typeof h.query !== "string" || h.query.trim().length === 0) {
throw new Error("highlights format requires a non-empty 'query' string");
}
continue;
}
if (fmt.type === "query") {
const q = fmt;
if (typeof q.prompt !== "string" || q.prompt.trim().length === 0) {
throw new Error("query format requires a non-empty 'prompt' string");
}
if (q.mode != null && q.mode !== "freeform" && q.mode !== "directQuote") {
throw new Error("query format mode must be 'freeform' or 'directQuote'");
}
}
}
}
function ensureValidParseOptions(options) {
if (!options) return;
if (options.timeout != null && options.timeout <= 0) {
throw new Error("timeout must be positive");
}
const raw = options;
if (raw.waitFor !== void 0) {
throw new Error("parse does not support waitFor");
}
if (raw.actions !== void 0) {
throw new Error("parse does not support actions");
}
if (raw.location !== void 0) {
throw new Error("parse does not support location overrides");
}
if (raw.mobile !== void 0) {
throw new Error("parse does not support mobile rendering");
}
if (raw.maxAge !== void 0 || raw.minAge !== void 0 || raw.storeInCache !== void 0 || raw.lockdown !== void 0) {
throw new Error("parse does not support cache/index options");
}
if (raw.proxy !== void 0 && raw.proxy !== "basic" && raw.proxy !== "auto") {
throw new Error("parse only supports proxy values of 'basic' or 'auto'");
}
ensureValidParseFormats(options.formats);
}
// src/v2/utils/errorHandler.ts
import "axios";
function throwForBadResponse(resp, action) {
const status = resp.status;
const body = resp.data || {};
const msg = body?.error || body?.message || `Request failed (${status}) while trying to ${action}`;
throw new SdkError(msg, status, void 0, body?.details);
}
function normalizeAxiosError(err, action) {
const status = err.response?.status;
const body = err.response?.data;
const message = body?.error || err.message || `Request failed${status ? ` (${status})` : ""} while trying to ${action}`;
const code = body?.code || err.code;
throw new SdkError(message, status, code, body?.details ?? body);
}
function isRetryableError(err) {
if (err instanceof JobTimeoutError) {
return false;
}
if (err instanceof SdkError || err && typeof err === "object" && "status" in err) {
const status = err.status;
if (status && status >= 400 && status < 500) {
return false;
}
if (status && status >= 500) {
return true;
}
}
if (err?.isAxiosError && !err.response) {
return true;
}
if (err?.code === "ECONNABORTED" || err?.message?.includes("timeout")) {
return true;
}
return true;
}
// src/v2/methods/scrape.ts
async function scrape(http, url, options) {
if (!url || !url.trim()) {
throw new Error("URL cannot be empty");
}
if (options) ensureValidScrapeOptions(options);
const payload = { url: url.trim() };
if (options) Object.assign(payload, options);
try {
const res = await http.post(
"/v2/scrape",
payload,
typeof options?.timeout === "number" ? { timeoutMs: options.timeout + 5e3 } : {}
);
if (res.status !== 200 || !res.data?.success) {
throwForBadResponse(res, "scrape");
}
return res.data.data || {};
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "scrape");
throw err;
}
}
async function interact(http, jobId, args) {
if (!jobId || !jobId.trim()) {
throw new Error("Job ID cannot be empty");
}
const hasCode = args?.code && args.code.trim();
const hasPrompt = args?.prompt && args.prompt.trim();
if (!hasCode && !hasPrompt) {
throw new Error("Either 'code' or 'prompt' must be provided");
}
const body = {};
if (hasCode) body.code = args.code;
if (hasPrompt) body.prompt = args.prompt;
body.language = args.language ?? "node";
if (args.timeout != null) body.timeout = args.timeout;
if (args.origin) body.origin = args.origin;
try {
const res = await http.post(
`/v2/scrape/${jobId}/interact`,
body,
args.timeout != null ? { timeoutMs: args.timeout * 1e3 + 5e3 } : {}
);
if (res.status !== 200)
throwForBadResponse(res, "interact with scrape browser");
return res.data;
} catch (err) {
if (err?.isAxiosError)
return normalizeAxiosError(err, "interact with scrape browser");
throw err;
}
}
async function stopInteraction(http, jobId) {
if (!jobId || !jobId.trim()) {
throw new Error("Job ID cannot be empty");
}
try {
const res = await http.delete(
`/v2/scrape/${jobId}/interact`
);
if (res.status !== 200) throwForBadResponse(res, "stop interaction");
return res.data;
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "stop interaction");
throw err;
}
}
// src/v2/methods/parse.ts
function toUploadBlob(input, contentType) {
if (typeof Blob !== "undefined" && input instanceof Blob) {
if (contentType && input.type !== contentType) {
return new Blob([input], { type: contentType });
}
return input;
}
if (typeof Buffer !== "undefined" && Buffer.isBuffer(input)) {
return new Blob([input], { type: contentType });
}
if (input instanceof ArrayBuffer) {
return new Blob([input], { type: contentType });
}
if (ArrayBuffer.isView(input)) {
return new Blob([input], { type: contentType });
}
if (typeof input === "string") {
return new Blob([input], {
type: contentType ?? "text/plain; charset=utf-8"
});
}
throw new Error("Unsupported parse file data type");
}
async function parse(http, file, options) {
if (!file || !file.filename || !file.filename.trim()) {
throw new Error("filename cannot be empty");
}
if (file.data == null) {
throw new Error("file data cannot be empty");
}
const blob = toUploadBlob(file.data, file.contentType);
if (blob.size === 0) {
throw new Error("file data cannot be empty");
}
if (options) ensureValidParseOptions(options);
const version = getVersion();
const normalizedOptions = {
...options ?? {},
origin: typeof options?.origin === "string" && options.origin.includes("mcp") ? options.origin : options?.origin ?? `js-sdk@${version}`
};
const formData = new FormData();
formData.append("options", JSON.stringify(normalizedOptions));
formData.append(
"file",
toUploadBlob(file.data, file.contentType),
file.filename.trim()
);
try {
const res = await http.postMultipart(
"/v2/parse",
formData,
typeof normalizedOptions.timeout === "number" ? { timeoutMs: normalizedOptions.timeout + 5e3 } : {}
);
if (res.status !== 200 || !res.data?.success) {
throwForBadResponse(res, "parse");
}
return res.data.data || {};
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "parse");
throw err;
}
}
// src/v2/methods/search.ts
function prepareSearchPayload(req) {
if (!req.query || !req.query.trim()) throw new Error("Query cannot be empty");
if (req.limit != null && req.limit <= 0)
throw new Error("limit must be positive");
if (req.timeout != null && req.timeout <= 0)
throw new Error("timeout must be positive");
if (req.includeDomains?.length && req.excludeDomains?.length)
throw new Error(
"includeDomains and excludeDomains cannot both be specified"
);
const payload = {
query: req.query
};
if (req.sources) payload.sources = req.sources;
if (req.categories) payload.categories = req.categories;
if (req.includeDomains) payload.includeDomains = req.includeDomains;
if (req.excludeDomains) payload.excludeDomains = req.excludeDomains;
if (req.limit != null) payload.limit = req.limit;
if (req.tbs != null) payload.tbs = req.tbs;
if (req.location != null) payload.location = req.location;
if (req.ignoreInvalidURLs != null)
payload.ignoreInvalidURLs = req.ignoreInvalidURLs;
if (req.timeout != null) payload.timeout = req.timeout;
if (req.integration && req.integration.trim())
payload.integration = req.integration.trim();
if (req.origin) payload.origin = req.origin;
if (req.scrapeOptions) {
ensureValidScrapeOptions(req.scrapeOptions);
payload.scrapeOptions = req.scrapeOptions;
}
return payload;
}
function transformArray(arr) {
const results = [];
for (const item of arr) {
if (item && typeof item === "object") {
if ("markdown" in item || "html" in item || "rawHtml" in item || "links" in item || "screenshot" in item || "changeTracking" in item || "summary" in item || "json" in item) {
results.push(item);
} else {
results.push(item);
}
} else {
results.push({ url: item });
}
}
return results;
}
async function search(http, request) {
const payload = prepareSearchPayload(request);
try {
const res = await http.post(
"/v2/search",
payload,
typeof request.timeout === "number" ? { timeoutMs: request.timeout + 5e3 } : {}
);
if (res.status !== 200 || !res.data?.success) {
throwForBadResponse(res, "search");
}
const data = res.data.data || {};
const out = {};
if (data.web) out.web = transformArray(data.web);
if (data.news) out.news = transformArray(data.news);
if (data.images)
out.images = transformArray(data.images);
return out;
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "search");
throw err;
}
}
// src/v2/methods/map.ts
function prepareMapPayload(url, options) {
if (!url || !url.trim()) throw new Error("URL cannot be empty");
const payload = { url: url.trim() };
if (options) {
if (options.sitemap != null) payload.sitemap = options.sitemap;
if (options.search != null) payload.search = options.search;
if (options.includeSubdomains != null)
payload.includeSubdomains = options.includeSubdomains;
if (options.ignoreQueryParameters != null)
payload.ignoreQueryParameters = options.ignoreQueryParameters;
if (options.limit != null) payload.limit = options.limit;
if (options.timeout != null) payload.timeout = options.timeout;
if (options.integration != null && options.integration.trim())
payload.integration = options.integration.trim();
if (options.origin) payload.origin = options.origin;
if (options.location != null) payload.location = options.location;
}
return payload;
}
async function map(http, url, options) {
const payload = prepareMapPayload(url, options);
try {
const res = await http.post(
"/v2/map",
payload,
typeof options?.timeout === "number" ? { timeoutMs: options.timeout + 5e3 } : {}
);
if (res.status !== 200 || !res.data?.success) {
throwForBadResponse(res, "map");
}
const linksIn = res.data.links || [];
const links = [];
for (const item of linksIn) {
if (typeof item === "string") links.push({ url: item });
else if (item && typeof item === "object")
links.push({
url: item.url,
title: item.title,
description: item.description
});
}
return { links };
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "map");
throw err;
}
}
// src/v2/utils/pagination.ts
async function fetchAllPages(http, nextUrl, initial, pagination) {
const docs = initial.slice();
let current = nextUrl;
let pageCount = 0;
const maxPages = pagination?.maxPages ?? void 0;
const maxResults = pagination?.maxResults ?? void 0;
const maxWaitTime = pagination?.maxWaitTime ?? void 0;
const started = Date.now();
while (current) {
if (maxPages != null && pageCount >= maxPages) break;
if (maxWaitTime != null && (Date.now() - started) / 1e3 > maxWaitTime) break;
let payload = null;
try {
const res = await http.get(current);
payload = res.data;
} catch {
break;
}
if (!payload?.success) break;
const pageData = Array.isArray(payload.data) ? payload.data : payload.data?.pages || [];
for (const d of pageData) {
if (maxResults != null && docs.length >= maxResults) break;
docs.push(d);
}
if (maxResults != null && docs.length >= maxResults) break;
current = payload.next ?? (Array.isArray(payload.data) ? null : payload.data?.next) ?? null;
pageCount += 1;
}
return docs;
}
// src/v2/methods/crawl.ts
function prepareCrawlPayload(request) {
if (!request.url || !request.url.trim()) throw new Error("URL cannot be empty");
const data = { url: request.url.trim() };
if (request.prompt) data.prompt = request.prompt;
if (request.excludePaths) data.excludePaths = request.excludePaths;
if (request.includePaths) data.includePaths = request.includePaths;
if (request.maxDiscoveryDepth != null) data.maxDiscoveryDepth = request.maxDiscoveryDepth;
if (request.sitemap != null) data.sitemap = request.sitemap;
if (request.robotsUserAgent != null) data.robotsUserAgent = request.robotsUserAgent;
if (request.ignoreQueryParameters != null) data.ignoreQueryParameters = request.ignoreQueryParameters;
if (request.deduplicateSimilarURLs != null) data.deduplicateSimilarURLs = request.deduplicateSimilarURLs;
if (request.limit != null) data.limit = request.limit;
if (request.crawlEntireDomain != null) data.crawlEntireDomain = request.crawlEntireDomain;
if (request.allowExternalLinks != null) data.allowExternalLinks = request.allowExternalLinks;
if (request.allowSubdomains != null) data.allowSubdomains = request.allowSubdomains;
if (request.delay != null) data.delay = request.delay;
if (request.maxConcurrency != null) data.maxConcurrency = request.maxConcurrency;
if (request.regexOnFullURL != null) data.regexOnFullURL = request.regexOnFullURL;
if (request.webhook != null) data.webhook = request.webhook;
if (request.integration != null && request.integration.trim()) data.integration = request.integration.trim();
if (request.origin) data.origin = request.origin;
if (request.scrapeOptions) {
ensureValidScrapeOptions(request.scrapeOptions);
data.scrapeOptions = request.scrapeOptions;
}
if (request.zeroDataRetention != null) data.zeroDataRetention = request.zeroDataRetention;
return data;
}
async function startCrawl(http, request) {
const payload = prepareCrawlPayload(request);
try {
const res = await http.post("/v2/crawl", payload);
if (res.status !== 200 || !res.data?.success) {
throwForBadResponse(res, "start crawl");
}
return { id: res.data.id, url: res.data.url };
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "start crawl");
throw err;
}
}
async function getCrawlStatus(http, jobId, pagination) {
try {
const res = await http.get(`/v2/crawl/${jobId}`);
if (res.status !== 200 || !res.data?.success) {
throwForBadResponse(res, "get crawl status");
}
const body = res.data;
const initialDocs = body.data || [];
const auto = pagination?.autoPaginate ?? true;
if (!auto || !body.next) {
return {
id: jobId,
status: body.status,
completed: body.completed ?? 0,
total: body.total ?? 0,
creditsUsed: body.creditsUsed,
expiresAt: body.expiresAt,
next: body.next ?? null,
data: initialDocs
};
}
const aggregated = await fetchAllPages(http, body.next, initialDocs, pagination);
return {
id: jobId,
status: body.status,
completed: body.completed ?? 0,
total: body.total ?? 0,
creditsUsed: body.creditsUsed,
expiresAt: body.expiresAt,
next: null,
data: aggregated
};
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "get crawl status");
throw err;
}
}
async function cancelCrawl(http, jobId) {
try {
const res = await http.delete(`/v2/crawl/${jobId}`);
if (res.status !== 200) throwForBadResponse(res, "cancel crawl");
return res.data?.status === "cancelled";
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "cancel crawl");
throw err;
}
}
async function waitForCrawlCompletion(http, jobId, pollInterval = 2, timeout) {
const start = Date.now();
while (true) {
try {
const status = await getCrawlStatus(http, jobId);
if (["completed", "failed", "cancelled"].includes(status.status)) {
return status;
}
} catch (err) {
if (!isRetryableError(err)) {
if (err instanceof SdkError) {
const errorWithJobId = new SdkError(
err.message,
err.status,
err.code,
err.details,
jobId
);
throw errorWithJobId;
}
throw err;
}
}
if (timeout != null && Date.now() - start > timeout * 1e3) {
throw new JobTimeoutError(jobId, timeout, "crawl");
}
await new Promise((r) => setTimeout(r, Math.max(1e3, pollInterval * 1e3)));
}
}
async function crawl(http, request, pollInterval = 2, timeout) {
const started = await startCrawl(http, request);
return waitForCrawlCompletion(http, started.id, pollInterval, timeout);
}
async function getCrawlErrors(http, crawlId) {
try {
const res = await http.get(`/v2/crawl/${crawlId}/errors`);
if (res.status !== 200) throwForBadResponse(res, "get crawl errors");
const payload = res.data?.data ?? res.data;
return { errors: payload.errors || [], robotsBlocked: payload.robotsBlocked || [] };
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "get crawl errors");
throw err;
}
}
async function getActiveCrawls(http) {
try {
const res = await http.get(`/v2/crawl/active`);
if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "get active crawls");
const crawlsIn = res.data?.crawls || [];
const crawls = crawlsIn.map((c) => ({ id: c.id, teamId: c.teamId ?? c.team_id, url: c.url, options: c.options ?? null }));
return { success: true, crawls };
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "get active crawls");
throw err;
}
}
async function crawlParamsPreview(http, url, prompt) {
if (!url || !url.trim()) throw new Error("URL cannot be empty");
if (!prompt || !prompt.trim()) throw new Error("Prompt cannot be empty");
try {
const res = await http.post("/v2/crawl/params-preview", { url: url.trim(), prompt });
if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "crawl params preview");
const data = res.data.data || {};
if (res.data.warning) data.warning = res.data.warning;
return data;
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "crawl params preview");
throw err;
}
}
// src/v2/methods/batch.ts
async function startBatchScrape(http, urls, {
options,
webhook,
appendToId,
ignoreInvalidURLs,
maxConcurrency,
zeroDataRetention,
idempotencyKey,
integration,
origin
} = {}) {
if (!Array.isArray(urls) || urls.length === 0)
throw new Error("URLs list cannot be empty");
const payload = { urls };
if (options) {
ensureValidScrapeOptions(options);
Object.assign(payload, options);
}
if (webhook != null) payload.webhook = webhook;
if (appendToId != null) payload.appendToId = appendToId;
if (ignoreInvalidURLs != null) payload.ignoreInvalidURLs = ignoreInvalidURLs;
if (maxConcurrency != null) payload.maxConcurrency = maxConcurrency;
if (zeroDataRetention != null) payload.zeroDataRetention = zeroDataRetention;
if (integration != null && integration.trim())
payload.integration = integration.trim();
if (origin) payload.origin = origin;
try {
const headers = http.prepareHeaders(idempotencyKey);
const res = await http.post("/v2/batch/scrape", payload, { headers });
if (res.status !== 200 || !res.data?.success)
throwForBadResponse(res, "start batch scrape");
return {
id: res.data.id,
url: res.data.url,
invalidURLs: res.data.invalidURLs || void 0
};
} catch (err) {
if (err?.isAxiosError)
return normalizeAxiosError(err, "start batch scrape");
throw err;
}
}
async function getBatchScrapeStatus(http, jobId, pagination) {
try {
const res = await http.get(`/v2/batch/scrape/${jobId}`);
if (res.status !== 200 || !res.data?.success)
throwForBadResponse(res, "get batch scrape status");
const body = res.data;
const initialDocs = body.data || [];
const auto = pagination?.autoPaginate ?? true;
if (!auto || !body.next) {
return {
id: jobId,
status: body.status,
completed: body.completed ?? 0,
total: body.total ?? 0,
creditsUsed: body.creditsUsed,
expiresAt: body.expiresAt,
next: body.next ?? null,
data: initialDocs
};
}
const aggregated = await fetchAllPages(
http,
body.next,
initialDocs,
pagination
);
return {
id: jobId,
status: body.status,
completed: body.completed ?? 0,
total: body.total ?? 0,
creditsUsed: body.creditsUsed,
expiresAt: body.expiresAt,
next: null,
data: aggregated
};
} catch (err) {
if (err?.isAxiosError)
return normalizeAxiosError(err, "get batch scrape status");
throw err;
}
}
async function cancelBatchScrape(http, jobId) {
try {
const res = await http.delete(
`/v2/batch/scrape/${jobId}`
);
if (res.status !== 200) throwForBadResponse(res, "cancel batch scrape");
return res.data?.status === "cancelled";
} catch (err) {
if (err?.isAxiosError)
return normalizeAxiosError(err, "cancel batch scrape");
throw err;
}
}
async function getBatchScrapeErrors(http, jobId) {
try {
const res = await http.get(`/v2/batch/scrape/${jobId}/errors`);
if (res.status !== 200) throwForBadResponse(res, "get batch scrape errors");
const payload = res.data?.data ?? res.data;
return {
errors: payload.errors || [],
robotsBlocked: payload.robotsBlocked || []
};
} catch (err) {
if (err?.isAxiosError)
return normalizeAxiosError(err, "get batch scrape errors");
throw err;
}
}
async function waitForBatchCompletion(http, jobId, pollInterval = 2, timeout) {
const start = Date.now();
while (true) {
try {
const status = await getBatchScrapeStatus(http, jobId);
if (["completed", "failed", "cancelled"].includes(status.status)) {
return status;
}
} catch (err) {
if (!isRetryableError(err)) {
if (err instanceof SdkError) {
const errorWithJobId = new SdkError(
err.message,
err.status,
err.code,
err.details,
jobId
);
throw errorWithJobId;
}
throw err;
}
}
if (timeout != null && Date.now() - start > timeout * 1e3) {
throw new JobTimeoutError(jobId, timeout, "batch");
}
await new Promise((r) => setTimeout(r, Math.max(1e3, pollInterval * 1e3)));
}
}
async function batchScrape(http, urls, opts = {}) {
const start = await startBatchScrape(http, urls, opts);
return waitForBatchCompletion(
http,
start.id,
opts.pollInterval ?? 2,
opts.timeout
);
}
// src/v2/methods/extract.ts
function prepareExtractPayload(args) {
const body = {};
if (args.urls) body.urls = args.urls;
if (args.prompt != null) body.prompt = args.prompt;
if (args.schema != null) {
body.schema = isZodSchema(args.schema) ? zodSchemaToJsonSchema(args.schema) : args.schema;
}
if (args.systemPrompt != null) body.systemPrompt = args.systemPrompt;
if (args.allowExternalLinks != null) body.allowExternalLinks = args.allowExternalLinks;
if (args.enableWebSearch != null) body.enableWebSearch = args.enableWebSearch;
if (args.showSources != null) body.showSources = args.showSources;
if (args.ignoreInvalidURLs != null) body.ignoreInvalidURLs = args.ignoreInvalidURLs;
if (args.integration && args.integration.trim()) body.integration = args.integration.trim();
if (args.origin) body.origin = args.origin;
if (args.agent) body.agent = args.agent;
if (args.scrapeOptions) {
ensureValidScrapeOptions(args.scrapeOptions);
body.scrapeOptions = args.scrapeOptions;
}
return body;
}
async function startExtract(http, args) {
const payload = prepareExtractPayload(args);
try {
const res = await http.post("/v2/extract", payload);
if (res.status !== 200) throwForBadResponse(res, "extract");
return res.data;
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "extract");
throw err;
}
}
async function getExtractStatus(http, jobId) {
try {
const res = await http.get(`/v2/extract/${jobId}`);
if (res.status !== 200) throwForBadResponse(res, "extract status");
return res.data;
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "extract status");
throw err;
}
}
async function waitExtract(http, jobId, pollInterval = 2, timeout) {
const start = Date.now();
while (true) {
const status = await getExtractStatus(http, jobId);
if (["completed", "failed", "cancelled"].includes(status.status || "")) return status;
if (timeout != null && Date.now() - start > timeout * 1e3) return status;
await new Promise((r) => setTimeout(r, Math.max(1e3, pollInterval * 1e3)));
}
}
async function extract(http, args) {
const started = await startExtract(http, args);
const jobId = started.id;
if (!jobId) return started;
return waitExtract(http, jobId, args.pollInterval ?? 2, args.timeout);
}
// src/v2/methods/agent.ts
function prepareAgentPayload(args) {
const body = {};
if (args.urls) body.urls = args.urls;
body.prompt = args.prompt;
if (args.schema != null) {
body.schema = isZodSchema(args.schema) ? zodSchemaToJsonSchema(args.schema) : args.schema;
}
if (args.integration && args.integration.trim()) body.integration = args.integration.trim();
if (args.origin) body.origin = args.origin;
if (args.maxCredits !== null && args.maxCredits !== void 0) body.maxCredits = args.maxCredits;
if (args.strictConstrainToURLs !== null && args.strictConstrainToURLs !== void 0) body.strictConstrainToURLs = args.strictConstrainToURLs;
if (args.model !== null && args.model !== void 0) body.model = args.model;
if (args.webhook != null) body.webhook = args.webhook;
return body;
}
async function startAgent(http, args) {
const payload = prepareAgentPayload(args);
try {
const res = await http.post("/v2/agent", payload);
if (res.status !== 200) throwForBadResponse(res, "agent");
return res.data;
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "agent");
throw err;
}
}
async function getAgentStatus(http, jobId) {
try {
const res = await http.get(`/v2/agent/${jobId}`);
if (res.status !== 200) throwForBadResponse(res, "agent status");
return res.data;
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "agent status");
throw err;
}
}
async function waitAgent(http, jobId, pollInterval = 2, timeout) {
const start = Date.now();
while (true) {
const status = await getAgentStatus(http, jobId);
if (["completed", "failed", "cancelled"].includes(status.status || "")) return status;
if (timeout != null && Date.now() - start > timeout * 1e3) return status;
await new Promise((r) => setTimeout(r, Math.max(1e3, pollInterval * 1e3)));
}
}
async function agent(http, args) {
const started = await startAgent(http, args);
const jobId = started.id;
if (!jobId) return started;
return waitAgent(http, jobId, args.pollInterval ?? 2, args.timeout);
}
async function cancelAgent(http, jobId) {
try {
const res = await http.delete(`/v2/agent/${jobId}`);
if (res.status !== 200) throwForBadResponse(res, "cancel agent");
return res.data?.success === true;
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "cancel agent");
throw err;
}
}
// src/v2/methods/browser.ts
async function browser(http, args = {}) {
const body = {};
if (args.ttl != null) body.ttl = args.ttl;
if (args.activityTtl != null) body.activityTtl = args.activityTtl;
if (args.streamWebView != null) body.streamWebView = args.streamWebView;
if (args.profile != null) body.profile = args.profile;
if (args.integration != null) body.integration = args.integration;
if (args.origin) body.origin = args.origin;
try {
const res = await http.post("/v2/browser", body);
if (res.status !== 200) throwForBadResponse(res, "create browser session");
return res.data;
} catch (err) {
if (err?.isAxiosError)
return normalizeAxiosError(err, "create browser session");
throw err;
}
}
async function browserExecute(http, sessionId, args) {
const body = {
code: args.code,
language: args.language ?? "bash"
};
if (args.timeout != null) body.timeout = args.timeout;
try {
const res = await http.post(
`/v2/browser/${sessionId}/execute`,
body,
args.timeout != null ? { timeoutMs: args.timeout * 1e3 + 5e3 } : {}
);
if (res.status !== 200) throwForBadResponse(res, "execute browser code");
return res.data;
} catch (err) {
if (err?.isAxiosError)
return normalizeAxiosError(err, "execute browser code");
throw err;
}
}
async function deleteBrowser(http, sessionId) {
try {
const res = await http.delete(
`/v2/browser/${sessionId}`
);
if (res.status !== 200) throwForBadResponse(res, "delete browser session");
return res.data;
} catch (err) {
if (err?.isAxiosError)
return normalizeAxiosError(err, "delete browser session");
throw err;
}
}
async function listBrowsers(http, args = {}) {
let endpoint = "/v2/browser";
if (args.status) endpoint += `?status=${args.status}`;
try {
const res = await http.get(endpoint);
if (res.status !== 200) throwForBadResponse(res, "list browser sessions");
return res.data;
} catch (err) {
if (err?.isAxiosError)
return normalizeAxiosError(err, "list browser sessions");
throw err;
}
}
// src/v2/methods/usage.ts
async function getConcurrency(http) {
try {
const res = await http.get("/v2/concurrency-check");
if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "get concurrency");
const d = res.data.data || res.data;
return { concurrency: d.concurrency, maxConcurrency: d.maxConcurrency ?? d.max_concurrency };
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "get concurrency");
throw err;
}
}
async function getCreditUsage(http) {
try {
const res = await http.get("/v2/team/credit-usage");
if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "get credit usage");
const d = res.data.data || res.data;
return {
remainingCredits: d.remainingCredits ?? d.remaining_credits ?? 0,
planCredits: d.planCredits ?? d.plan_credits,
billingPeriodStart: d.billingPeriodStart ?? d.billing_period_start ?? null,
billingPeriodEnd: d.billingPeriodEnd ?? d.billing_period_end ?? null
};
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "get credit usage");
throw err;
}
}
async function getTokenUsage(http) {
try {
const res = await http.get("/v2/team/token-usage");
if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "get token usage");
const d = res.data.data || res.data;
return {
remainingTokens: d.remainingTokens ?? d.remaining_tokens ?? 0,
planTokens: d.planTokens ?? d.plan_tokens,
billingPeriodStart: d.billingPeriodStart ?? d.billing_period_start ?? null,
billingPeriodEnd: d.billingPeriodEnd ?? d.billing_period_end ?? null
};
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "get token usage");
throw err;
}
}
async function getQueueStatus(http) {
try {
const res = await http.get("/v2/team/queue-status");
if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "get queue status");
return res.data;
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "get queue status");
throw err;
}
}
async function getCreditUsageHistorical(http, byApiKey) {
try {
const query = byApiKey ? "?byApiKey=true" : "";
const res = await http.get(`/v2/team/credit-usage/historical${query}`);
if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "get credit usage historical");
return res.data;
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "get credit usage historical");
throw err;
}
}
async function getTokenUsageHistorical(http, byApiKey) {
try {
const query = byApiKey ? "?byApiKey=true" : "";
const res = await http.get(`/v2/team/token-usage/historical${query}`);
if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "get token usage historical");
return res.data;
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "get token usage historical");
throw err;
}
}
// src/v2/methods/monitor.ts
function queryString(params) {
if (!params) return "";
const query = new URLSearchParams();
for (const [key, value] of Object.entries(params)) {
if (value !== void 0 && value !== null) query.set(key, String(value));
}
const str = query.toString();
return str ? `?${str}` : "";
}
function dataOrThrow(res, action) {
if (res.status !== 200 || !res.data?.success || res.data.data == null) {
throwForBadResponse(res, action);
}
return res.data.data;
}
async function createMonitor(http, request) {
try {
const res = await http.post("/v2/monitor", request);
return dataOrThrow(res, "create monitor");
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "create monitor");
throw err;
}
}
async function listMonitors(http, options) {
try {
const res = await http.get(
`/v2/monitor${queryString(options)}`
);
return dataOrThrow(res, "list monitors");
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "list monitors");
throw err;
}
}
async function getMonitor(http, monitorId) {
try {
const res = await http.get(`/v2/monitor/${monitorId}`);
return dataOrThrow(res, "get monitor");
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "get monitor");
throw err;
}
}
async function updateMonitor(http, monitorId, request) {
try {
const res = await http.patch(
`/v2/monitor/${monitorId}`,
request
);
return dataOrThrow(res, "update monitor");
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "update monitor");
throw err;
}
}
async function deleteMonitor(http, monitorId) {
try {
const res = await http.delete(`/v2/monitor/${monitorId}`);
if (res.status !== 200 || !res.data?.success) {
throwForBadResponse(res, "delete monitor");
}
return true;
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "delete monitor");
throw err;
}
}
async function runMonitor(http, monitorId) {
try {
const res = await http.post(
`/v2/monitor/${monitorId}/run`,
{}
);
return dataOrThrow(res, "run monitor");
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "run monitor");
throw err;
}
}
async function listMonitorChecks(http, monitorId, options) {
try {
const res = await http.get(
`/v2/monitor/${monitorId}/checks${queryString(options)}`
);
return dataOrThrow(res, "list monitor checks");
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "list monitor checks");
throw err;
}
}
async function getMonitorCheck(http, monitorId, checkId, options) {
try {
const { autoPaginate: _autoPaginate, maxPages: _maxPages, maxResults: _maxResults, maxWaitTime: _maxWaitTime, ...query } = options ?? {};
const res = await http.get(
`/v2/monitor/${monitorId}/checks/${checkId}${queryString(query)}`
);
const detail = dataOrThrow(res, "get monitor check");
const next = res.data?.next ?? detail.next ?? null;
const auto = options?.autoPaginate ?? true;
if (!auto || !next) {
return { ...detail, next };
}
return {
...detail,
pages: await fetchAllPages(
http,
next,
detail.pages || [],
options
),
next: null
};
} catch (err) {
if (err?.isAxiosError) return normalizeAxiosError(err, "get monitor check");
throw err;
}
}
// src/v2/watcher.ts
import { EventEmitter } from "events";
var hasGlobalWebSocket = () => {
if (typeof globalThis === "undefined") return void 0;
const candidate = globalThis.WebSocket;
return typeof candidate === "function" ? candidate : void 0;
};
var isNodeRuntime = () => typeof process !== "undefined" && !!process.versions?.node;
var cachedWebSocket;
var loadPromise;
var loadNodeWebSocket = async () => {
if (!isNodeRuntime()) return void 0;
try {
const undici = await import("undici");
const ctor = undici.WebSocket ?? undici.default?.WebSocket;
return typeof ctor === "function" ? ctor : void 0;
} catch {
return void 0;
}
};
var getWebSocketCtor = async () => {
if (cachedWebSocket) return cachedWebSocket;
const globalWs = hasGlobalWebSocket();
if (globalWs) {
cachedWebSocket = globalWs;
return cachedWebSocket;
}
if (!loadPromise) {
loadPromise = loadNodeWebSocket();
}
cachedWebSocket = await loadPromise;
return cachedWebSocket;
};
var decoder = typeof TextDecoder !== "undefined" ? new TextDecoder() : void 0;
var ensureUtf8String = (data) => {
if (typeof data === "string") return data;
if (typeof Buffer !== "undefined" && Buffer.isBuffer(data)) {
return data.toString("utf8");
}
const convertView = (view) => {
if (typeof Buffer !== "undefined") {
return Buffer.from(view.buffer, view.byteOffset, view.byteLength).toString("utf8");
}
return decoder?.decode(view);
};
if (ArrayBuffer.isView(data)) {
return convertView(data);
}
if (data instanceof ArrayBuffer) {
return convertView(new Uint8Array(data));
}
return void 0;
};
var Watcher = class extends EventEmitter {
http;
jobId;
kind;
pollInterval;
timeout;
ws;
closed = false;
emittedDocumentKeys = /* @__PURE__ */ new Set();
constructor(http, jobId, opts = {}) {
super();
this.http = http;
this.jobId = jobId;
this.kind = opts