UNPKG

@mendable/firecrawl-js

Version:
1,450 lines (1,429 loc) 125 kB
"use strict"; var __create = Object.create; var __defProp = Object.defineProperty; var __getOwnPropDesc = Object.getOwnPropertyDescriptor; var __getOwnPropNames = Object.getOwnPropertyNames; var __getProtoOf = Object.getPrototypeOf; var __hasOwnProp = Object.prototype.hasOwnProperty; var __commonJS = (cb, mod) => function __require() { return mod || (0, cb[__getOwnPropNames(cb)[0]])((mod = { exports: {} }).exports, mod), mod.exports; }; var __export = (target, all) => { for (var name in all) __defProp(target, name, { get: all[name], enumerable: true }); }; var __copyProps = (to, from, except, desc) => { if (from && typeof from === "object" || typeof from === "function") { for (let key of __getOwnPropNames(from)) if (!__hasOwnProp.call(to, key) && key !== except) __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable }); } return to; }; var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps( // If the importer is in node compatibility mode or this is not an ESM // file that has been converted to a CommonJS file using a Babel- // compatible transform (i.e. "__esModule" has not been set), then set // "default" to the CommonJS "module.exports" for node compatibility. isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target, mod )); var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod); // package.json var require_package = __commonJS({ "package.json"(exports2, module2) { module2.exports = { name: "@mendable/firecrawl-js", version: "4.24.2", description: "JavaScript SDK for Firecrawl API", main: "dist/index.js", types: "dist/index.d.ts", exports: { "./package.json": "./package.json", ".": { import: "./dist/index.js", default: "./dist/index.cjs" } }, type: "module", scripts: { build: "tsup", "build-and-publish": "pnpm run build && pnpm publish --access public", "publish-beta": "pnpm run build && pnpm publish --access public --tag beta", test: "NODE_OPTIONS=--experimental-vm-modules jest --verbose src/__tests__/e2e/v2/*.test.ts --detectOpenHandles", "test:unit": "NODE_OPTIONS=--experimental-vm-modules jest --verbose src/__tests__/unit/v2/*.test.ts" }, repository: { type: "git", url: "git+https://github.com/firecrawl/firecrawl.git" }, author: "Mendable.ai", license: "MIT", dependencies: { axios: "1.15.2", firecrawl: "4.16.0", "typescript-event-target": "^1.1.1", zod: "^3.23.8", "zod-to-json-schema": "^3.23.0" }, bugs: { url: "https://github.com/firecrawl/firecrawl/issues" }, homepage: "https://github.com/firecrawl/firecrawl#readme", devDependencies: { "@jest/globals": "^30.2.0", "@types/dotenv": "^8.2.0", "@types/jest": "^30.0.0", "@types/mocha": "^10.0.6", "@types/node": "^20.12.12", "@types/uuid": "^9.0.8", dotenv: "^16.4.5", jest: "^30.2.0", "ts-jest": "^29.4.5", tsup: "^8.5.0", typescript: "^5.4.5", uuid: "^9.0.1" }, keywords: [ "firecrawl", "mendable", "crawler", "web", "scraper", "api", "sdk" ], engines: { node: ">=22.0.0" }, pnpm: { overrides: { "@isaacs/brace-expansion@<=5.0.0": ">=5.0.1", "minimatch@<10.2.3": ">=10.2.3", "rollup@<4.59.0": ">=4.59.0", "picomatch@<4.0.4": ">=4.0.4", handlebars: ">=4.7.9", "brace-expansion": ">=5.0.6", "axios@<1.15.2": "1.15.2", "follow-redirects@<1.16.0": ">=1.16.0 <2.0.0" } } }; } }); // src/index.ts var index_exports = {}; __export(index_exports, { Firecrawl: () => Firecrawl, FirecrawlAppV1: () => FirecrawlApp, FirecrawlClient: () => FirecrawlClient, JobTimeoutError: () => JobTimeoutError, SdkError: () => SdkError, Watcher: () => Watcher, default: () => index_default }); module.exports = __toCommonJS(index_exports); // src/v2/utils/httpClient.ts var import_axios = __toESM(require("axios"), 1); // src/v2/utils/getVersion.ts function getVersion() { try { if (typeof process !== "undefined" && process.env && process.env.npm_package_version) { return process.env.npm_package_version; } const pkg = require_package(); return pkg?.version || "3.x.x"; } catch { return "3.x.x"; } } // src/v2/utils/httpClient.ts var HttpClient = class { instance; apiKey; apiUrl; maxRetries; backoffFactor; constructor(options) { this.apiKey = options.apiKey; this.apiUrl = options.apiUrl.replace(/\/$/, ""); this.maxRetries = options.maxRetries ?? 3; this.backoffFactor = options.backoffFactor ?? 0.5; this.instance = import_axios.default.create({ baseURL: this.apiUrl, timeout: options.timeoutMs ?? 3e5, headers: { Authorization: `Bearer ${this.apiKey}` }, transitional: { clarifyTimeoutError: true } }); } getApiUrl() { return this.apiUrl; } getApiKey() { return this.apiKey; } async request(config) { const version = getVersion(); config.headers = { ...config.headers || {} }; let lastError; for (let attempt = 0; attempt < this.maxRetries; attempt++) { try { const cfg = { ...config }; const isFormDataBody = typeof FormData !== "undefined" && cfg.data instanceof FormData; const isPlainObjectBody = !isFormDataBody && cfg.data != null && typeof cfg.data === "object" && !Array.isArray(cfg.data); if (isPlainObjectBody && cfg.method && ["post", "put", "patch"].includes(cfg.method.toLowerCase())) { const data = cfg.data ?? {}; cfg.data = { ...data, origin: typeof data.origin === "string" && data.origin.includes("mcp") ? data.origin : `js-sdk@${version}` }; } if (isFormDataBody) { cfg.headers = { ...cfg.headers || {} }; delete cfg.headers["Content-Type"]; delete cfg.headers["content-type"]; } const res = await this.instance.request(cfg); if (res.status === 502 && attempt < this.maxRetries - 1) { await this.sleep(this.backoffFactor * Math.pow(2, attempt)); continue; } return res; } catch (err) { lastError = err; const status = err?.response?.status; if (status === 502 && attempt < this.maxRetries - 1) { await this.sleep(this.backoffFactor * Math.pow(2, attempt)); continue; } throw err; } } throw lastError ?? new Error("Unexpected HTTP client error"); } sleep(seconds) { return new Promise((r) => setTimeout(r, seconds * 1e3)); } post(endpoint, body, options) { return this.request({ method: "post", url: endpoint, data: body, headers: options?.headers, timeout: options?.timeoutMs }); } postMultipart(endpoint, formData, options) { return this.request({ method: "post", url: endpoint, data: formData, headers: options?.headers, timeout: options?.timeoutMs }); } get(endpoint, headers) { return this.request({ method: "get", url: endpoint, headers }); } delete(endpoint, headers) { return this.request({ method: "delete", url: endpoint, headers }); } patch(endpoint, body, options) { return this.request({ method: "patch", url: endpoint, data: body, headers: options?.headers, timeout: options?.timeoutMs }); } prepareHeaders(idempotencyKey) { const headers = {}; if (idempotencyKey) headers["x-idempotency-key"] = idempotencyKey; return headers; } }; // src/v2/types.ts var SdkError = class extends Error { status; code; details; jobId; constructor(message, status, code, details, jobId) { super(message); this.name = "FirecrawlSdkError"; this.status = status; this.code = code; this.details = details; this.jobId = jobId; } }; var JobTimeoutError = class extends SdkError { timeoutSeconds; constructor(jobId, timeoutSeconds, jobType = "batch") { const jobTypeLabel = jobType === "batch" ? "batch scrape" : "crawl"; super( `${jobTypeLabel.charAt(0).toUpperCase() + jobTypeLabel.slice(1)} job ${jobId} did not complete within ${timeoutSeconds} seconds`, void 0, "JOB_TIMEOUT", void 0, jobId ); this.name = "JobTimeoutError"; this.timeoutSeconds = timeoutSeconds; } }; // src/utils/zodSchemaToJson.ts var import_zod_to_json_schema = require("zod-to-json-schema"); function isZodSchema(value) { if (!value || typeof value !== "object") return false; const schema = value; const hasV3Markers = "_def" in schema && (typeof schema.safeParse === "function" || typeof schema.parse === "function"); const hasV4Markers = "_zod" in schema && typeof schema._zod === "object"; return hasV3Markers || hasV4Markers; } function isZodV4Schema(schema) { if (!schema || typeof schema !== "object") return false; return "_zod" in schema && typeof schema._zod === "object"; } function tryZodV4Conversion(schema) { if (!isZodV4Schema(schema)) return null; try { const zodModule = schema.constructor?.prototype?.constructor; if (zodModule && typeof zodModule.toJSONSchema === "function") { return zodModule.toJSONSchema(schema); } } catch { } return null; } function zodSchemaToJsonSchema(schema) { if (!isZodSchema(schema)) { return schema; } const v4Result = tryZodV4Conversion(schema); if (v4Result) { return v4Result; } try { return (0, import_zod_to_json_schema.zodToJsonSchema)(schema); } catch { return schema; } } function looksLikeZodShape(obj) { if (!obj || typeof obj !== "object" || Array.isArray(obj)) return false; const values = Object.values(obj); if (values.length === 0) return false; return values.some( (v) => v && typeof v === "object" && v._def && typeof v.safeParse === "function" ); } // src/v2/utils/validation.ts function ensureValidFormats(formats) { if (!formats) return; for (const fmt of formats) { if (typeof fmt === "string") { if (fmt === "json") { throw new Error("json format must be an object with { type: 'json', prompt, schema }"); } continue; } if (fmt.type === "json") { const j = fmt; if (!j.prompt && !j.schema) { throw new Error("json format requires either 'prompt' or 'schema' (or both)"); } const maybeSchema = j.schema; if (isZodSchema(maybeSchema)) { j.schema = zodSchemaToJsonSchema(maybeSchema); } else if (looksLikeZodShape(maybeSchema)) { throw new Error( "json format schema appears to be a Zod schema's .shape property. Pass the Zod schema directly (e.g., `schema: MySchema`) instead of `schema: MySchema.shape`. The SDK will automatically convert Zod schemas to JSON Schema format." ); } continue; } if (fmt.type === "changeTracking") { const ct = fmt; const maybeSchema = ct.schema; if (isZodSchema(maybeSchema)) { ct.schema = zodSchemaToJsonSchema(maybeSchema); } else if (looksLikeZodShape(maybeSchema)) { throw new Error( "changeTracking format schema appears to be a Zod schema's .shape property. Pass the Zod schema directly (e.g., `schema: MySchema`) instead of `schema: MySchema.shape`. The SDK will automatically convert Zod schemas to JSON Schema format." ); } continue; } if (fmt.type === "question") { const q = fmt; if (typeof q.question !== "string" || q.question.trim().length === 0) { throw new Error("question format requires a non-empty 'question' string"); } continue; } if (fmt.type === "highlights") { const h = fmt; if (typeof h.query !== "string" || h.query.trim().length === 0) { throw new Error("highlights format requires a non-empty 'query' string"); } continue; } if (fmt.type === "query") { const q = fmt; if (typeof q.prompt !== "string" || q.prompt.trim().length === 0) { throw new Error("query format requires a non-empty 'prompt' string"); } if (q.mode != null && q.mode !== "freeform" && q.mode !== "directQuote") { throw new Error("query format mode must be 'freeform' or 'directQuote'"); } continue; } if (fmt.type === "screenshot") { const s = fmt; if (s.quality != null && (typeof s.quality !== "number" || s.quality < 0)) { throw new Error("screenshot.quality must be a non-negative number"); } } } } function ensureValidScrapeOptions(options) { if (!options) return; if (options.timeout != null && options.timeout <= 0) { throw new Error("timeout must be positive"); } if (options.waitFor != null && options.waitFor < 0) { throw new Error("waitFor must be non-negative"); } ensureValidFormats(options.formats); } function ensureValidParseFormats(formats) { if (!formats) return; for (const fmt of formats) { if (typeof fmt === "string") { if (fmt === "json") { throw new Error("json format must be an object with { type: 'json', prompt, schema }"); } if (fmt === "screenshot") { throw new Error("parse does not support screenshot format"); } if (fmt === "changeTracking") { throw new Error("parse does not support changeTracking format"); } if (fmt === "branding") { throw new Error("parse does not support branding format"); } if (fmt === "audio" || fmt === "video") { throw new Error(`parse does not support ${fmt} format`); } continue; } const type = fmt.type; if (type === "changeTracking") { throw new Error("parse does not support changeTracking format"); } if (type === "screenshot") { throw new Error("parse does not support screenshot format"); } if (type === "branding") { throw new Error("parse does not support branding format"); } if (type === "audio" || type === "video") { throw new Error(`parse does not support ${type} format`); } if (fmt.type === "json") { const j = fmt; if (!j.prompt && !j.schema) { throw new Error("json format requires either 'prompt' or 'schema' (or both)"); } const maybeSchema = j.schema; if (isZodSchema(maybeSchema)) { j.schema = zodSchemaToJsonSchema(maybeSchema); } else if (looksLikeZodShape(maybeSchema)) { throw new Error( "json format schema appears to be a Zod schema's .shape property. Pass the Zod schema directly (e.g., `schema: MySchema`) instead of `schema: MySchema.shape`. The SDK will automatically convert Zod schemas to JSON Schema format." ); } continue; } if (fmt.type === "question") { const q = fmt; if (typeof q.question !== "string" || q.question.trim().length === 0) { throw new Error("question format requires a non-empty 'question' string"); } continue; } if (fmt.type === "highlights") { const h = fmt; if (typeof h.query !== "string" || h.query.trim().length === 0) { throw new Error("highlights format requires a non-empty 'query' string"); } continue; } if (fmt.type === "query") { const q = fmt; if (typeof q.prompt !== "string" || q.prompt.trim().length === 0) { throw new Error("query format requires a non-empty 'prompt' string"); } if (q.mode != null && q.mode !== "freeform" && q.mode !== "directQuote") { throw new Error("query format mode must be 'freeform' or 'directQuote'"); } } } } function ensureValidParseOptions(options) { if (!options) return; if (options.timeout != null && options.timeout <= 0) { throw new Error("timeout must be positive"); } const raw = options; if (raw.waitFor !== void 0) { throw new Error("parse does not support waitFor"); } if (raw.actions !== void 0) { throw new Error("parse does not support actions"); } if (raw.location !== void 0) { throw new Error("parse does not support location overrides"); } if (raw.mobile !== void 0) { throw new Error("parse does not support mobile rendering"); } if (raw.maxAge !== void 0 || raw.minAge !== void 0 || raw.storeInCache !== void 0 || raw.lockdown !== void 0) { throw new Error("parse does not support cache/index options"); } if (raw.proxy !== void 0 && raw.proxy !== "basic" && raw.proxy !== "auto") { throw new Error("parse only supports proxy values of 'basic' or 'auto'"); } ensureValidParseFormats(options.formats); } // src/v2/utils/errorHandler.ts var import_axios2 = require("axios"); function throwForBadResponse(resp, action) { const status = resp.status; const body = resp.data || {}; const msg = body?.error || body?.message || `Request failed (${status}) while trying to ${action}`; throw new SdkError(msg, status, void 0, body?.details); } function normalizeAxiosError(err, action) { const status = err.response?.status; const body = err.response?.data; const message = body?.error || err.message || `Request failed${status ? ` (${status})` : ""} while trying to ${action}`; const code = body?.code || err.code; throw new SdkError(message, status, code, body?.details ?? body); } function isRetryableError(err) { if (err instanceof JobTimeoutError) { return false; } if (err instanceof SdkError || err && typeof err === "object" && "status" in err) { const status = err.status; if (status && status >= 400 && status < 500) { return false; } if (status && status >= 500) { return true; } } if (err?.isAxiosError && !err.response) { return true; } if (err?.code === "ECONNABORTED" || err?.message?.includes("timeout")) { return true; } return true; } // src/v2/methods/scrape.ts async function scrape(http, url, options) { if (!url || !url.trim()) { throw new Error("URL cannot be empty"); } if (options) ensureValidScrapeOptions(options); const payload = { url: url.trim() }; if (options) Object.assign(payload, options); try { const res = await http.post( "/v2/scrape", payload, typeof options?.timeout === "number" ? { timeoutMs: options.timeout + 5e3 } : {} ); if (res.status !== 200 || !res.data?.success) { throwForBadResponse(res, "scrape"); } return res.data.data || {}; } catch (err) { if (err?.isAxiosError) return normalizeAxiosError(err, "scrape"); throw err; } } async function interact(http, jobId, args) { if (!jobId || !jobId.trim()) { throw new Error("Job ID cannot be empty"); } const hasCode = args?.code && args.code.trim(); const hasPrompt = args?.prompt && args.prompt.trim(); if (!hasCode && !hasPrompt) { throw new Error("Either 'code' or 'prompt' must be provided"); } const body = {}; if (hasCode) body.code = args.code; if (hasPrompt) body.prompt = args.prompt; body.language = args.language ?? "node"; if (args.timeout != null) body.timeout = args.timeout; if (args.origin) body.origin = args.origin; try { const res = await http.post( `/v2/scrape/${jobId}/interact`, body, args.timeout != null ? { timeoutMs: args.timeout * 1e3 + 5e3 } : {} ); if (res.status !== 200) throwForBadResponse(res, "interact with scrape browser"); return res.data; } catch (err) { if (err?.isAxiosError) return normalizeAxiosError(err, "interact with scrape browser"); throw err; } } async function stopInteraction(http, jobId) { if (!jobId || !jobId.trim()) { throw new Error("Job ID cannot be empty"); } try { const res = await http.delete( `/v2/scrape/${jobId}/interact` ); if (res.status !== 200) throwForBadResponse(res, "stop interaction"); return res.data; } catch (err) { if (err?.isAxiosError) return normalizeAxiosError(err, "stop interaction"); throw err; } } // src/v2/methods/parse.ts function toUploadBlob(input, contentType) { if (typeof Blob !== "undefined" && input instanceof Blob) { if (contentType && input.type !== contentType) { return new Blob([input], { type: contentType }); } return input; } if (typeof Buffer !== "undefined" && Buffer.isBuffer(input)) { return new Blob([input], { type: contentType }); } if (input instanceof ArrayBuffer) { return new Blob([input], { type: contentType }); } if (ArrayBuffer.isView(input)) { return new Blob([input], { type: contentType }); } if (typeof input === "string") { return new Blob([input], { type: contentType ?? "text/plain; charset=utf-8" }); } throw new Error("Unsupported parse file data type"); } async function parse(http, file, options) { if (!file || !file.filename || !file.filename.trim()) { throw new Error("filename cannot be empty"); } if (file.data == null) { throw new Error("file data cannot be empty"); } const blob = toUploadBlob(file.data, file.contentType); if (blob.size === 0) { throw new Error("file data cannot be empty"); } if (options) ensureValidParseOptions(options); const version = getVersion(); const normalizedOptions = { ...options ?? {}, origin: typeof options?.origin === "string" && options.origin.includes("mcp") ? options.origin : options?.origin ?? `js-sdk@${version}` }; const formData = new FormData(); formData.append("options", JSON.stringify(normalizedOptions)); formData.append( "file", toUploadBlob(file.data, file.contentType), file.filename.trim() ); try { const res = await http.postMultipart( "/v2/parse", formData, typeof normalizedOptions.timeout === "number" ? { timeoutMs: normalizedOptions.timeout + 5e3 } : {} ); if (res.status !== 200 || !res.data?.success) { throwForBadResponse(res, "parse"); } return res.data.data || {}; } catch (err) { if (err?.isAxiosError) return normalizeAxiosError(err, "parse"); throw err; } } // src/v2/methods/search.ts function prepareSearchPayload(req) { if (!req.query || !req.query.trim()) throw new Error("Query cannot be empty"); if (req.limit != null && req.limit <= 0) throw new Error("limit must be positive"); if (req.timeout != null && req.timeout <= 0) throw new Error("timeout must be positive"); if (req.includeDomains?.length && req.excludeDomains?.length) throw new Error( "includeDomains and excludeDomains cannot both be specified" ); const payload = { query: req.query }; if (req.sources) payload.sources = req.sources; if (req.categories) payload.categories = req.categories; if (req.includeDomains) payload.includeDomains = req.includeDomains; if (req.excludeDomains) payload.excludeDomains = req.excludeDomains; if (req.limit != null) payload.limit = req.limit; if (req.tbs != null) payload.tbs = req.tbs; if (req.location != null) payload.location = req.location; if (req.ignoreInvalidURLs != null) payload.ignoreInvalidURLs = req.ignoreInvalidURLs; if (req.timeout != null) payload.timeout = req.timeout; if (req.integration && req.integration.trim()) payload.integration = req.integration.trim(); if (req.origin) payload.origin = req.origin; if (req.scrapeOptions) { ensureValidScrapeOptions(req.scrapeOptions); payload.scrapeOptions = req.scrapeOptions; } return payload; } function transformArray(arr) { const results = []; for (const item of arr) { if (item && typeof item === "object") { if ("markdown" in item || "html" in item || "rawHtml" in item || "links" in item || "screenshot" in item || "changeTracking" in item || "summary" in item || "json" in item) { results.push(item); } else { results.push(item); } } else { results.push({ url: item }); } } return results; } async function search(http, request) { const payload = prepareSearchPayload(request); try { const res = await http.post( "/v2/search", payload, typeof request.timeout === "number" ? { timeoutMs: request.timeout + 5e3 } : {} ); if (res.status !== 200 || !res.data?.success) { throwForBadResponse(res, "search"); } const data = res.data.data || {}; const out = {}; if (data.web) out.web = transformArray(data.web); if (data.news) out.news = transformArray(data.news); if (data.images) out.images = transformArray(data.images); return out; } catch (err) { if (err?.isAxiosError) return normalizeAxiosError(err, "search"); throw err; } } // src/v2/methods/map.ts function prepareMapPayload(url, options) { if (!url || !url.trim()) throw new Error("URL cannot be empty"); const payload = { url: url.trim() }; if (options) { if (options.sitemap != null) payload.sitemap = options.sitemap; if (options.search != null) payload.search = options.search; if (options.includeSubdomains != null) payload.includeSubdomains = options.includeSubdomains; if (options.ignoreQueryParameters != null) payload.ignoreQueryParameters = options.ignoreQueryParameters; if (options.limit != null) payload.limit = options.limit; if (options.timeout != null) payload.timeout = options.timeout; if (options.integration != null && options.integration.trim()) payload.integration = options.integration.trim(); if (options.origin) payload.origin = options.origin; if (options.location != null) payload.location = options.location; } return payload; } async function map(http, url, options) { const payload = prepareMapPayload(url, options); try { const res = await http.post( "/v2/map", payload, typeof options?.timeout === "number" ? { timeoutMs: options.timeout + 5e3 } : {} ); if (res.status !== 200 || !res.data?.success) { throwForBadResponse(res, "map"); } const linksIn = res.data.links || []; const links = []; for (const item of linksIn) { if (typeof item === "string") links.push({ url: item }); else if (item && typeof item === "object") links.push({ url: item.url, title: item.title, description: item.description }); } return { links }; } catch (err) { if (err?.isAxiosError) return normalizeAxiosError(err, "map"); throw err; } } // src/v2/utils/pagination.ts async function fetchAllPages(http, nextUrl, initial, pagination) { const docs = initial.slice(); let current = nextUrl; let pageCount = 0; const maxPages = pagination?.maxPages ?? void 0; const maxResults = pagination?.maxResults ?? void 0; const maxWaitTime = pagination?.maxWaitTime ?? void 0; const started = Date.now(); while (current) { if (maxPages != null && pageCount >= maxPages) break; if (maxWaitTime != null && (Date.now() - started) / 1e3 > maxWaitTime) break; let payload = null; try { const res = await http.get(current); payload = res.data; } catch { break; } if (!payload?.success) break; const pageData = Array.isArray(payload.data) ? payload.data : payload.data?.pages || []; for (const d of pageData) { if (maxResults != null && docs.length >= maxResults) break; docs.push(d); } if (maxResults != null && docs.length >= maxResults) break; current = payload.next ?? (Array.isArray(payload.data) ? null : payload.data?.next) ?? null; pageCount += 1; } return docs; } // src/v2/methods/crawl.ts function prepareCrawlPayload(request) { if (!request.url || !request.url.trim()) throw new Error("URL cannot be empty"); const data = { url: request.url.trim() }; if (request.prompt) data.prompt = request.prompt; if (request.excludePaths) data.excludePaths = request.excludePaths; if (request.includePaths) data.includePaths = request.includePaths; if (request.maxDiscoveryDepth != null) data.maxDiscoveryDepth = request.maxDiscoveryDepth; if (request.sitemap != null) data.sitemap = request.sitemap; if (request.robotsUserAgent != null) data.robotsUserAgent = request.robotsUserAgent; if (request.ignoreQueryParameters != null) data.ignoreQueryParameters = request.ignoreQueryParameters; if (request.deduplicateSimilarURLs != null) data.deduplicateSimilarURLs = request.deduplicateSimilarURLs; if (request.limit != null) data.limit = request.limit; if (request.crawlEntireDomain != null) data.crawlEntireDomain = request.crawlEntireDomain; if (request.allowExternalLinks != null) data.allowExternalLinks = request.allowExternalLinks; if (request.allowSubdomains != null) data.allowSubdomains = request.allowSubdomains; if (request.delay != null) data.delay = request.delay; if (request.maxConcurrency != null) data.maxConcurrency = request.maxConcurrency; if (request.regexOnFullURL != null) data.regexOnFullURL = request.regexOnFullURL; if (request.webhook != null) data.webhook = request.webhook; if (request.integration != null && request.integration.trim()) data.integration = request.integration.trim(); if (request.origin) data.origin = request.origin; if (request.scrapeOptions) { ensureValidScrapeOptions(request.scrapeOptions); data.scrapeOptions = request.scrapeOptions; } if (request.zeroDataRetention != null) data.zeroDataRetention = request.zeroDataRetention; return data; } async function startCrawl(http, request) { const payload = prepareCrawlPayload(request); try { const res = await http.post("/v2/crawl", payload); if (res.status !== 200 || !res.data?.success) { throwForBadResponse(res, "start crawl"); } return { id: res.data.id, url: res.data.url }; } catch (err) { if (err?.isAxiosError) return normalizeAxiosError(err, "start crawl"); throw err; } } async function getCrawlStatus(http, jobId, pagination) { try { const res = await http.get(`/v2/crawl/${jobId}`); if (res.status !== 200 || !res.data?.success) { throwForBadResponse(res, "get crawl status"); } const body = res.data; const initialDocs = body.data || []; const auto = pagination?.autoPaginate ?? true; if (!auto || !body.next) { return { id: jobId, status: body.status, completed: body.completed ?? 0, total: body.total ?? 0, creditsUsed: body.creditsUsed, expiresAt: body.expiresAt, next: body.next ?? null, data: initialDocs }; } const aggregated = await fetchAllPages(http, body.next, initialDocs, pagination); return { id: jobId, status: body.status, completed: body.completed ?? 0, total: body.total ?? 0, creditsUsed: body.creditsUsed, expiresAt: body.expiresAt, next: null, data: aggregated }; } catch (err) { if (err?.isAxiosError) return normalizeAxiosError(err, "get crawl status"); throw err; } } async function cancelCrawl(http, jobId) { try { const res = await http.delete(`/v2/crawl/${jobId}`); if (res.status !== 200) throwForBadResponse(res, "cancel crawl"); return res.data?.status === "cancelled"; } catch (err) { if (err?.isAxiosError) return normalizeAxiosError(err, "cancel crawl"); throw err; } } async function waitForCrawlCompletion(http, jobId, pollInterval = 2, timeout) { const start = Date.now(); while (true) { try { const status = await getCrawlStatus(http, jobId); if (["completed", "failed", "cancelled"].includes(status.status)) { return status; } } catch (err) { if (!isRetryableError(err)) { if (err instanceof SdkError) { const errorWithJobId = new SdkError( err.message, err.status, err.code, err.details, jobId ); throw errorWithJobId; } throw err; } } if (timeout != null && Date.now() - start > timeout * 1e3) { throw new JobTimeoutError(jobId, timeout, "crawl"); } await new Promise((r) => setTimeout(r, Math.max(1e3, pollInterval * 1e3))); } } async function crawl(http, request, pollInterval = 2, timeout) { const started = await startCrawl(http, request); return waitForCrawlCompletion(http, started.id, pollInterval, timeout); } async function getCrawlErrors(http, crawlId) { try { const res = await http.get(`/v2/crawl/${crawlId}/errors`); if (res.status !== 200) throwForBadResponse(res, "get crawl errors"); const payload = res.data?.data ?? res.data; return { errors: payload.errors || [], robotsBlocked: payload.robotsBlocked || [] }; } catch (err) { if (err?.isAxiosError) return normalizeAxiosError(err, "get crawl errors"); throw err; } } async function getActiveCrawls(http) { try { const res = await http.get(`/v2/crawl/active`); if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "get active crawls"); const crawlsIn = res.data?.crawls || []; const crawls = crawlsIn.map((c) => ({ id: c.id, teamId: c.teamId ?? c.team_id, url: c.url, options: c.options ?? null })); return { success: true, crawls }; } catch (err) { if (err?.isAxiosError) return normalizeAxiosError(err, "get active crawls"); throw err; } } async function crawlParamsPreview(http, url, prompt) { if (!url || !url.trim()) throw new Error("URL cannot be empty"); if (!prompt || !prompt.trim()) throw new Error("Prompt cannot be empty"); try { const res = await http.post("/v2/crawl/params-preview", { url: url.trim(), prompt }); if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "crawl params preview"); const data = res.data.data || {}; if (res.data.warning) data.warning = res.data.warning; return data; } catch (err) { if (err?.isAxiosError) return normalizeAxiosError(err, "crawl params preview"); throw err; } } // src/v2/methods/batch.ts async function startBatchScrape(http, urls, { options, webhook, appendToId, ignoreInvalidURLs, maxConcurrency, zeroDataRetention, idempotencyKey, integration, origin } = {}) { if (!Array.isArray(urls) || urls.length === 0) throw new Error("URLs list cannot be empty"); const payload = { urls }; if (options) { ensureValidScrapeOptions(options); Object.assign(payload, options); } if (webhook != null) payload.webhook = webhook; if (appendToId != null) payload.appendToId = appendToId; if (ignoreInvalidURLs != null) payload.ignoreInvalidURLs = ignoreInvalidURLs; if (maxConcurrency != null) payload.maxConcurrency = maxConcurrency; if (zeroDataRetention != null) payload.zeroDataRetention = zeroDataRetention; if (integration != null && integration.trim()) payload.integration = integration.trim(); if (origin) payload.origin = origin; try { const headers = http.prepareHeaders(idempotencyKey); const res = await http.post("/v2/batch/scrape", payload, { headers }); if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "start batch scrape"); return { id: res.data.id, url: res.data.url, invalidURLs: res.data.invalidURLs || void 0 }; } catch (err) { if (err?.isAxiosError) return normalizeAxiosError(err, "start batch scrape"); throw err; } } async function getBatchScrapeStatus(http, jobId, pagination) { try { const res = await http.get(`/v2/batch/scrape/${jobId}`); if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "get batch scrape status"); const body = res.data; const initialDocs = body.data || []; const auto = pagination?.autoPaginate ?? true; if (!auto || !body.next) { return { id: jobId, status: body.status, completed: body.completed ?? 0, total: body.total ?? 0, creditsUsed: body.creditsUsed, expiresAt: body.expiresAt, next: body.next ?? null, data: initialDocs }; } const aggregated = await fetchAllPages( http, body.next, initialDocs, pagination ); return { id: jobId, status: body.status, completed: body.completed ?? 0, total: body.total ?? 0, creditsUsed: body.creditsUsed, expiresAt: body.expiresAt, next: null, data: aggregated }; } catch (err) { if (err?.isAxiosError) return normalizeAxiosError(err, "get batch scrape status"); throw err; } } async function cancelBatchScrape(http, jobId) { try { const res = await http.delete( `/v2/batch/scrape/${jobId}` ); if (res.status !== 200) throwForBadResponse(res, "cancel batch scrape"); return res.data?.status === "cancelled"; } catch (err) { if (err?.isAxiosError) return normalizeAxiosError(err, "cancel batch scrape"); throw err; } } async function getBatchScrapeErrors(http, jobId) { try { const res = await http.get(`/v2/batch/scrape/${jobId}/errors`); if (res.status !== 200) throwForBadResponse(res, "get batch scrape errors"); const payload = res.data?.data ?? res.data; return { errors: payload.errors || [], robotsBlocked: payload.robotsBlocked || [] }; } catch (err) { if (err?.isAxiosError) return normalizeAxiosError(err, "get batch scrape errors"); throw err; } } async function waitForBatchCompletion(http, jobId, pollInterval = 2, timeout) { const start = Date.now(); while (true) { try { const status = await getBatchScrapeStatus(http, jobId); if (["completed", "failed", "cancelled"].includes(status.status)) { return status; } } catch (err) { if (!isRetryableError(err)) { if (err instanceof SdkError) { const errorWithJobId = new SdkError( err.message, err.status, err.code, err.details, jobId ); throw errorWithJobId; } throw err; } } if (timeout != null && Date.now() - start > timeout * 1e3) { throw new JobTimeoutError(jobId, timeout, "batch"); } await new Promise((r) => setTimeout(r, Math.max(1e3, pollInterval * 1e3))); } } async function batchScrape(http, urls, opts = {}) { const start = await startBatchScrape(http, urls, opts); return waitForBatchCompletion( http, start.id, opts.pollInterval ?? 2, opts.timeout ); } // src/v2/methods/extract.ts function prepareExtractPayload(args) { const body = {}; if (args.urls) body.urls = args.urls; if (args.prompt != null) body.prompt = args.prompt; if (args.schema != null) { body.schema = isZodSchema(args.schema) ? zodSchemaToJsonSchema(args.schema) : args.schema; } if (args.systemPrompt != null) body.systemPrompt = args.systemPrompt; if (args.allowExternalLinks != null) body.allowExternalLinks = args.allowExternalLinks; if (args.enableWebSearch != null) body.enableWebSearch = args.enableWebSearch; if (args.showSources != null) body.showSources = args.showSources; if (args.ignoreInvalidURLs != null) body.ignoreInvalidURLs = args.ignoreInvalidURLs; if (args.integration && args.integration.trim()) body.integration = args.integration.trim(); if (args.origin) body.origin = args.origin; if (args.agent) body.agent = args.agent; if (args.scrapeOptions) { ensureValidScrapeOptions(args.scrapeOptions); body.scrapeOptions = args.scrapeOptions; } return body; } async function startExtract(http, args) { const payload = prepareExtractPayload(args); try { const res = await http.post("/v2/extract", payload); if (res.status !== 200) throwForBadResponse(res, "extract"); return res.data; } catch (err) { if (err?.isAxiosError) return normalizeAxiosError(err, "extract"); throw err; } } async function getExtractStatus(http, jobId) { try { const res = await http.get(`/v2/extract/${jobId}`); if (res.status !== 200) throwForBadResponse(res, "extract status"); return res.data; } catch (err) { if (err?.isAxiosError) return normalizeAxiosError(err, "extract status"); throw err; } } async function waitExtract(http, jobId, pollInterval = 2, timeout) { const start = Date.now(); while (true) { const status = await getExtractStatus(http, jobId); if (["completed", "failed", "cancelled"].includes(status.status || "")) return status; if (timeout != null && Date.now() - start > timeout * 1e3) return status; await new Promise((r) => setTimeout(r, Math.max(1e3, pollInterval * 1e3))); } } async function extract(http, args) { const started = await startExtract(http, args); const jobId = started.id; if (!jobId) return started; return waitExtract(http, jobId, args.pollInterval ?? 2, args.timeout); } // src/v2/methods/agent.ts function prepareAgentPayload(args) { const body = {}; if (args.urls) body.urls = args.urls; body.prompt = args.prompt; if (args.schema != null) { body.schema = isZodSchema(args.schema) ? zodSchemaToJsonSchema(args.schema) : args.schema; } if (args.integration && args.integration.trim()) body.integration = args.integration.trim(); if (args.origin) body.origin = args.origin; if (args.maxCredits !== null && args.maxCredits !== void 0) body.maxCredits = args.maxCredits; if (args.strictConstrainToURLs !== null && args.strictConstrainToURLs !== void 0) body.strictConstrainToURLs = args.strictConstrainToURLs; if (args.model !== null && args.model !== void 0) body.model = args.model; if (args.webhook != null) body.webhook = args.webhook; return body; } async function startAgent(http, args) { const payload = prepareAgentPayload(args); try { const res = await http.post("/v2/agent", payload); if (res.status !== 200) throwForBadResponse(res, "agent"); return res.data; } catch (err) { if (err?.isAxiosError) return normalizeAxiosError(err, "agent"); throw err; } } async function getAgentStatus(http, jobId) { try { const res = await http.get(`/v2/agent/${jobId}`); if (res.status !== 200) throwForBadResponse(res, "agent status"); return res.data; } catch (err) { if (err?.isAxiosError) return normalizeAxiosError(err, "agent status"); throw err; } } async function waitAgent(http, jobId, pollInterval = 2, timeout) { const start = Date.now(); while (true) { const status = await getAgentStatus(http, jobId); if (["completed", "failed", "cancelled"].includes(status.status || "")) return status; if (timeout != null && Date.now() - start > timeout * 1e3) return status; await new Promise((r) => setTimeout(r, Math.max(1e3, pollInterval * 1e3))); } } async function agent(http, args) { const started = await startAgent(http, args); const jobId = started.id; if (!jobId) return started; return waitAgent(http, jobId, args.pollInterval ?? 2, args.timeout); } async function cancelAgent(http, jobId) { try { const res = await http.delete(`/v2/agent/${jobId}`); if (res.status !== 200) throwForBadResponse(res, "cancel agent"); return res.data?.success === true; } catch (err) { if (err?.isAxiosError) return normalizeAxiosError(err, "cancel agent"); throw err; } } // src/v2/methods/browser.ts async function browser(http, args = {}) { const body = {}; if (args.ttl != null) body.ttl = args.ttl; if (args.activityTtl != null) body.activityTtl = args.activityTtl; if (args.streamWebView != null) body.streamWebView = args.streamWebView; if (args.profile != null) body.profile = args.profile; if (args.integration != null) body.integration = args.integration; if (args.origin) body.origin = args.origin; try { const res = await http.post("/v2/browser", body); if (res.status !== 200) throwForBadResponse(res, "create browser session"); return res.data; } catch (err) { if (err?.isAxiosError) return normalizeAxiosError(err, "create browser session"); throw err; } } async function browserExecute(http, sessionId, args) { const body = { code: args.code, language: args.language ?? "bash" }; if (args.timeout != null) body.timeout = args.timeout; try { const res = await http.post( `/v2/browser/${sessionId}/execute`, body, args.timeout != null ? { timeoutMs: args.timeout * 1e3 + 5e3 } : {} ); if (res.status !== 200) throwForBadResponse(res, "execute browser code"); return res.data; } catch (err) { if (err?.isAxiosError) return normalizeAxiosError(err, "execute browser code"); throw err; } } async function deleteBrowser(http, sessionId) { try { const res = await http.delete( `/v2/browser/${sessionId}` ); if (res.status !== 200) throwForBadResponse(res, "delete browser session"); return res.data; } catch (err) { if (err?.isAxiosError) return normalizeAxiosError(err, "delete browser session"); throw err; } } async function listBrowsers(http, args = {}) { let endpoint = "/v2/browser"; if (args.status) endpoint += `?status=${args.status}`; try { const res = await http.get(endpoint); if (res.status !== 200) throwForBadResponse(res, "list browser sessions"); return res.data; } catch (err) { if (err?.isAxiosError) return normalizeAxiosError(err, "list browser sessions"); throw err; } } // src/v2/methods/usage.ts async function getConcurrency(http) { try { const res = await http.get("/v2/concurrency-check"); if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "get concurrency"); const d = res.data.data || res.data; return { concurrency: d.concurrency, maxConcurrency: d.maxConcurrency ?? d.max_concurrency }; } catch (err) { if (err?.isAxiosError) return normalizeAxiosError(err, "get concurrency"); throw err; } } async function getCreditUsage(http) { try { const res = await http.get("/v2/team/credit-usage"); if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "get credit usage"); const d = res.data.data || res.data; return { remainingCredits: d.remainingCredits ?? d.remaining_credits ?? 0, planCredits: d.planCredits ?? d.plan_credits, billingPeriodStart: d.billingPeriodStart ?? d.billing_period_start ?? null, billingPeriodEnd: d.billingPeriodEnd ?? d.billing_period_end ?? null }; } catch (err) { if (err?.isAxiosError) return normalizeAxiosError(err, "get credit usage"); throw err; } } async function getTokenUsage(http) { try { const res = await http.get("/v2/team/token-usage"); if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "get token usage"); const d = res.data.data || res.data; return { remainingTokens: d.remainingTokens ?? d.remaining_tokens ?? 0, planTokens: d.planTokens ?? d.plan_tokens, billingPeriodStart: d.billingPeriodStart ?? d.billing_period_start ?? null, billingPeriodEnd: d.billingPeriodEnd ?? d.billing_period_end ?? null }; } catch (err) { if (err?.isAxiosError) return normalizeAxiosError(err, "get token usage"); throw err; } } async function getQueueStatus(http) { try { const res = await http.get("/v2/team/queue-status"); if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "get queue status"); return res.data; } catch (err) { if (err?.isAxiosError) return normalizeAxiosError(err, "get queue status"); throw err; } } async function getCreditUsageHistorical(http, byApiKey) { try { const query = byApiKey ? "?byApiKey=true" : ""; const res = await http.get(`/v2/team/credit-usage/historical${query}`); if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "get credit usage historical"); return res.data; } catch (err) { if (err?.isAxiosError) return normalizeAxiosError(err, "get credit usage historical"); throw err; } } async function getTokenUsageHistorical(http, byApiKey) { try { const query = byApiKey ? "?byApiKey=true" : ""; const res = await http.get(`/v2/team/token-usage/historical${query}`); if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "get token usage historical"); return res.data; } catch (err) { if (err?.isAxiosError) return normalizeAxiosError(err, "get token usage historical"); throw err; } } // src/v2/methods/monitor.ts function queryString(params) { if (!params) return ""; const query = new URLSearchParams(); for (const [key, value] of Object.entries(params)) { if (value !== void 0 && value !== null) query.set(key, String(value)); } const str = query.toString(); return str ? `?${str}` : ""; } function dataOrThrow(res, action) { if (res.status !== 200 || !res.data?.success || res.data.data == null) { throwForBadResponse(res, action); } return res.data.data; } async function createMonitor(http, request) { try { const res = await http.post("/v2/monitor", request); return dataOrThrow(res, "create monitor"); } catch (err) { if (err?.isAxiosError) return normalizeAxiosError(err, "create monitor"); throw err; } } async function listMonitors(http, options) { try { const res = await http.get( `/v2/monitor${queryString(options)}` ); return dataOrThrow(res, "list monitors"); } catch (err) { if (err?.isAxiosError) return normalizeAxiosError(err, "list monitors"); throw err; } } asyn