UNPKG

firecrawl

Version:
1,362 lines (1,357 loc) 52.1 kB
"use strict"; var __create = Object.create; var __defProp = Object.defineProperty; var __getOwnPropDesc = Object.getOwnPropertyDescriptor; var __getOwnPropNames = Object.getOwnPropertyNames; var __getProtoOf = Object.getPrototypeOf; var __hasOwnProp = Object.prototype.hasOwnProperty; var __commonJS = (cb, mod) => function __require() { return mod || (0, cb[__getOwnPropNames(cb)[0]])((mod = { exports: {} }).exports, mod), mod.exports; }; var __export = (target, all) => { for (var name in all) __defProp(target, name, { get: all[name], enumerable: true }); }; var __copyProps = (to, from, except, desc) => { if (from && typeof from === "object" || typeof from === "function") { for (let key of __getOwnPropNames(from)) if (!__hasOwnProp.call(to, key) && key !== except) __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable }); } return to; }; var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps( // If the importer is in node compatibility mode or this is not an ESM // file that has been converted to a CommonJS file using a Babel- // compatible transform (i.e. "__esModule" has not been set), then set // "default" to the CommonJS "module.exports" for node compatibility. isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target, mod )); var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod); // package.json var require_package = __commonJS({ "package.json"(exports2, module2) { module2.exports = { name: "@mendable/firecrawl-js", version: "1.29.3", description: "JavaScript SDK for Firecrawl API", main: "dist/index.js", types: "dist/index.d.ts", exports: { "./package.json": "./package.json", ".": { import: "./dist/index.js", default: "./dist/index.cjs" } }, type: "module", scripts: { build: "tsup", "build-and-publish": "npm run build && npm publish --access public", "publish-beta": "npm run build && npm publish --access public --tag beta", test: "NODE_OPTIONS=--experimental-vm-modules jest --verbose src/__tests__/v1/**/*.test.ts", "test:unit": "NODE_OPTIONS=--experimental-vm-modules jest --verbose src/__tests__/v1/unit/*.test.ts" }, repository: { type: "git", url: "git+https://github.com/mendableai/firecrawl.git" }, author: "Mendable.ai", license: "MIT", dependencies: { axios: "^1.11.0", "typescript-event-target": "^1.1.1", zod: "^3.23.8", "zod-to-json-schema": "^3.23.0" }, bugs: { url: "https://github.com/mendableai/firecrawl/issues" }, homepage: "https://github.com/mendableai/firecrawl#readme", devDependencies: { "@jest/globals": "^30.0.5", "@types/dotenv": "^8.2.0", "@types/jest": "^30.0.0", "@types/mocha": "^10.0.6", "@types/node": "^20.12.12", "@types/uuid": "^9.0.8", dotenv: "^16.4.5", jest: "^30.0.5", "ts-jest": "^29.4.0", tsup: "^8.5.0", typescript: "^5.4.5", uuid: "^9.0.1" }, keywords: [ "firecrawl", "mendable", "crawler", "web", "scraper", "api", "sdk" ], engines: { node: ">=22.0.0" }, pnpm: { overrides: { "@babel/helpers@<7.26.10": ">=7.26.10", "brace-expansion@>=1.0.0 <=1.1.11": ">=1.1.12", "brace-expansion@>=2.0.0 <=2.0.1": ">=2.0.2" } } }; } }); // src/index.ts var index_exports = {}; __export(index_exports, { CrawlWatcher: () => CrawlWatcher, FirecrawlError: () => FirecrawlError, default: () => FirecrawlApp }); module.exports = __toCommonJS(index_exports); var import_axios = __toESM(require("axios"), 1); var zt = require("zod"); var import_zod_to_json_schema = require("zod-to-json-schema"); // node_modules/typescript-event-target/dist/index.mjs var e = class extends EventTarget { dispatchTypedEvent(s, t) { return super.dispatchEvent(t); } }; // src/index.ts var FirecrawlError = class extends Error { statusCode; details; constructor(message, statusCode, details) { super(message); this.statusCode = statusCode; this.details = details; } }; var FirecrawlApp = class { apiKey; apiUrl; version = "1.25.1"; isCloudService(url) { return url.includes("api.firecrawl.dev"); } async getVersion() { try { const packageJson = await Promise.resolve().then(() => __toESM(require_package(), 1)); return packageJson.default.version; } catch (error) { console.error("Error getting version:", error); return "1.25.1"; } } async init() { this.version = await this.getVersion(); } /** * Initializes a new instance of the FirecrawlApp class. * @param config - Configuration options for the FirecrawlApp instance. */ constructor({ apiKey = null, apiUrl = null }) { const baseUrl = apiUrl || "https://api.firecrawl.dev"; if (this.isCloudService(baseUrl) && typeof apiKey !== "string") { throw new FirecrawlError("No API key provided", 401); } this.apiKey = apiKey || ""; this.apiUrl = baseUrl; this.init(); } /** * Scrapes a URL using the Firecrawl API. * @param url - The URL to scrape. * @param params - Additional parameters for the scrape request. * @returns The response from the scrape operation. */ async scrapeUrl(url, params) { const headers = { "Content-Type": "application/json", Authorization: `Bearer ${this.apiKey}` }; let jsonData = { url, ...params, origin: `js-sdk@${this.version}` }; if (jsonData?.extract?.schema) { let schema = jsonData.extract.schema; try { schema = (0, import_zod_to_json_schema.zodToJsonSchema)(schema); } catch (error) { } jsonData = { ...jsonData, extract: { ...jsonData.extract, schema } }; } if (jsonData?.jsonOptions?.schema) { let schema = jsonData.jsonOptions.schema; try { schema = (0, import_zod_to_json_schema.zodToJsonSchema)(schema); } catch (error) { } jsonData = { ...jsonData, jsonOptions: { ...jsonData.jsonOptions, schema } }; } try { const response = await import_axios.default.post( this.apiUrl + `/v1/scrape`, jsonData, { headers, timeout: params?.timeout !== void 0 ? params.timeout + 5e3 : void 0 } ); if (response.status === 200) { const responseData = response.data; if (responseData.success) { return { success: true, warning: responseData.warning, error: responseData.error, ...responseData.data }; } else { throw new FirecrawlError(`Failed to scrape URL. Error: ${responseData.error}`, response.status); } } else { this.handleError(response, "scrape URL"); } } catch (error) { this.handleError(error.response, "scrape URL"); } return { success: false, error: "Internal server error." }; } /** * Searches using the Firecrawl API and optionally scrapes the results. * @param query - The search query string. * @param params - Optional parameters for the search request. * @returns The response from the search operation. */ async search(query, params) { const headers = { "Content-Type": "application/json", Authorization: `Bearer ${this.apiKey}` }; let jsonData = { query, limit: params?.limit ?? 5, tbs: params?.tbs, filter: params?.filter, lang: params?.lang ?? "en", country: params?.country ?? "us", location: params?.location, origin: `js-sdk@${this.version}`, timeout: params?.timeout ?? 6e4, scrapeOptions: params?.scrapeOptions ?? { formats: [] } }; if (jsonData?.scrapeOptions?.extract?.schema) { let schema = jsonData.scrapeOptions.extract.schema; try { schema = (0, import_zod_to_json_schema.zodToJsonSchema)(schema); } catch (error) { } jsonData = { ...jsonData, scrapeOptions: { ...jsonData.scrapeOptions, extract: { ...jsonData.scrapeOptions.extract, schema } } }; } try { const response = await this.postRequest( this.apiUrl + `/v1/search`, jsonData, headers ); if (response.status === 200) { const responseData = response.data; if (responseData.success) { return { success: true, data: responseData.data, warning: responseData.warning }; } else { throw new FirecrawlError(`Failed to search. Error: ${responseData.error}`, response.status); } } else { this.handleError(response, "search"); } } catch (error) { if (error.response?.data?.error) { throw new FirecrawlError(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ""}`, error.response.status); } else { throw new FirecrawlError(error.message, 500); } } return { success: false, error: "Internal server error.", data: [] }; } /** * Initiates a crawl job for a URL using the Firecrawl API. * @param url - The URL to crawl. * @param params - Additional parameters for the crawl request. * @param pollInterval - Time in seconds for job status checks. * @param idempotencyKey - Optional idempotency key for the request. * @returns The response from the crawl operation. */ async crawlUrl(url, params, pollInterval = 2, idempotencyKey) { const headers = this.prepareHeaders(idempotencyKey); let jsonData = { url, ...params, origin: `js-sdk@${this.version}` }; try { const response = await this.postRequest( this.apiUrl + `/v1/crawl`, jsonData, headers ); if (response.status === 200) { const id = response.data.id; return this.monitorJobStatus(id, headers, pollInterval); } else { this.handleError(response, "start crawl job"); } } catch (error) { if (error.response?.data?.error) { throw new FirecrawlError(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ""}`, error.response.status); } else { throw new FirecrawlError(error.message, 500); } } return { success: false, error: "Internal server error." }; } async asyncCrawlUrl(url, params, idempotencyKey) { const headers = this.prepareHeaders(idempotencyKey); let jsonData = { url, ...params, origin: `js-sdk@${this.version}` }; try { const response = await this.postRequest( this.apiUrl + `/v1/crawl`, jsonData, headers ); if (response.status === 200) { return response.data; } else { this.handleError(response, "start crawl job"); } } catch (error) { if (error.response?.data?.error) { throw new FirecrawlError(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ""}`, error.response.status); } else { throw new FirecrawlError(error.message, 500); } } return { success: false, error: "Internal server error." }; } /** * Checks the status of a crawl job using the Firecrawl API. * @param id - The ID of the crawl operation. * @param getAllData - Paginate through all the pages of documents, returning the full list of all documents. (default: `false`) * @param nextURL - The `next` URL from the previous crawl status. Only required if you're not manually increasing `skip`. Only used when `getAllData = false`. * @param skip - How many entries to skip to paginate. Only required if you're not providing `nextURL`. Only used when `getAllData = false`. * @param limit - How many entries to return. Only used when `getAllData = false`. * @returns The response containing the job status. */ async checkCrawlStatus(id, getAllData = false, nextURL, skip, limit) { if (!id) { throw new FirecrawlError("No crawl ID provided", 400); } const headers = this.prepareHeaders(); const targetURL = new URL(nextURL ?? `${this.apiUrl}/v1/crawl/${id}`); if (skip !== void 0) { targetURL.searchParams.set("skip", skip.toString()); } if (limit !== void 0) { targetURL.searchParams.set("limit", limit.toString()); } try { const response = await this.getRequest( targetURL.href, headers ); if (response.status === 200) { let allData = response.data.data; if (getAllData && response.data.status === "completed") { let statusData = response.data; if ("data" in statusData) { let data = statusData.data; while (typeof statusData === "object" && "next" in statusData) { if (data.length === 0) { break; } statusData = (await this.getRequest(statusData.next, headers)).data; data = data.concat(statusData.data); } allData = data; } } let resp = { success: response.data.success, status: response.data.status, total: response.data.total, completed: response.data.completed, creditsUsed: response.data.creditsUsed, next: getAllData ? void 0 : response.data.next, expiresAt: new Date(response.data.expiresAt), data: allData }; if (!response.data.success && response.data.error) { resp = { ...resp, success: false, error: response.data.error }; } if (response.data.next) { resp.next = response.data.next; } return resp; } else { this.handleError(response, "check crawl status"); } } catch (error) { throw new FirecrawlError(error.message, 500); } return { success: false, error: "Internal server error." }; } /** * Returns information about crawl errors. * @param id - The ID of the crawl operation. * @returns Information about crawl errors. */ async checkCrawlErrors(id) { const headers = this.prepareHeaders(); try { const response = await this.deleteRequest( `${this.apiUrl}/v1/crawl/${id}/errors`, headers ); if (response.status === 200) { return response.data; } else { this.handleError(response, "check crawl errors"); } } catch (error) { throw new FirecrawlError(error.message, 500); } return { success: false, error: "Internal server error." }; } /** * Cancels a crawl job using the Firecrawl API. * @param id - The ID of the crawl operation. * @returns The response from the cancel crawl operation. */ async cancelCrawl(id) { const headers = this.prepareHeaders(); try { const response = await this.deleteRequest( `${this.apiUrl}/v1/crawl/${id}`, headers ); if (response.status === 200) { return response.data; } else { this.handleError(response, "cancel crawl job"); } } catch (error) { throw new FirecrawlError(error.message, 500); } return { success: false, error: "Internal server error." }; } /** * Initiates a crawl job and returns a CrawlWatcher to monitor the job via WebSocket. * @param url - The URL to crawl. * @param params - Additional parameters for the crawl request. * @param idempotencyKey - Optional idempotency key for the request. * @returns A CrawlWatcher instance to monitor the crawl job. */ async crawlUrlAndWatch(url, params, idempotencyKey) { const crawl = await this.asyncCrawlUrl(url, params, idempotencyKey); if (crawl.success && crawl.id) { const id = crawl.id; return new CrawlWatcher(id, this); } throw new FirecrawlError("Crawl job failed to start", 400); } /** * Maps a URL using the Firecrawl API. * @param url - The URL to map. * @param params - Additional parameters for the map request. * @returns The response from the map operation. */ async mapUrl(url, params) { const headers = this.prepareHeaders(); let jsonData = { url, ...params, origin: `js-sdk@${this.version}` }; try { const response = await this.postRequest( this.apiUrl + `/v1/map`, jsonData, headers ); if (response.status === 200) { return response.data; } else { this.handleError(response, "map"); } } catch (error) { throw new FirecrawlError(error.message, 500); } return { success: false, error: "Internal server error." }; } /** * Initiates a batch scrape job for multiple URLs using the Firecrawl API. * @param url - The URLs to scrape. * @param params - Additional parameters for the scrape request. * @param pollInterval - Time in seconds for job status checks. * @param idempotencyKey - Optional idempotency key for the request. * @param webhook - Optional webhook for the batch scrape. * @param ignoreInvalidURLs - Optional flag to ignore invalid URLs. * @returns The response from the crawl operation. */ async batchScrapeUrls(urls, params, pollInterval = 2, idempotencyKey, webhook, ignoreInvalidURLs, maxConcurrency) { const headers = this.prepareHeaders(idempotencyKey); let jsonData = { urls, webhook, ignoreInvalidURLs, maxConcurrency, ...params, origin: `js-sdk@${this.version}` }; if (jsonData?.extract?.schema) { let schema = jsonData.extract.schema; try { schema = (0, import_zod_to_json_schema.zodToJsonSchema)(schema); } catch (error) { } jsonData = { ...jsonData, extract: { ...jsonData.extract, schema } }; } if (jsonData?.jsonOptions?.schema) { let schema = jsonData.jsonOptions.schema; try { schema = (0, import_zod_to_json_schema.zodToJsonSchema)(schema); } catch (error) { } jsonData = { ...jsonData, jsonOptions: { ...jsonData.jsonOptions, schema } }; } try { const response = await this.postRequest( this.apiUrl + `/v1/batch/scrape`, jsonData, headers ); if (response.status === 200) { const id = response.data.id; return this.monitorJobStatus(id, headers, pollInterval); } else { this.handleError(response, "start batch scrape job"); } } catch (error) { if (error.response?.data?.error) { throw new FirecrawlError(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ""}`, error.response.status); } else { throw new FirecrawlError(error.message, 500); } } return { success: false, error: "Internal server error." }; } async asyncBatchScrapeUrls(urls, params, idempotencyKey, webhook, ignoreInvalidURLs) { const headers = this.prepareHeaders(idempotencyKey); let jsonData = { urls, webhook, ignoreInvalidURLs, ...params, origin: `js-sdk@${this.version}` }; try { const response = await this.postRequest( this.apiUrl + `/v1/batch/scrape`, jsonData, headers ); if (response.status === 200) { return response.data; } else { this.handleError(response, "start batch scrape job"); } } catch (error) { if (error.response?.data?.error) { throw new FirecrawlError(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ""}`, error.response.status); } else { throw new FirecrawlError(error.message, 500); } } return { success: false, error: "Internal server error." }; } /** * Initiates a batch scrape job and returns a CrawlWatcher to monitor the job via WebSocket. * @param urls - The URL to scrape. * @param params - Additional parameters for the scrape request. * @param idempotencyKey - Optional idempotency key for the request. * @returns A CrawlWatcher instance to monitor the crawl job. */ async batchScrapeUrlsAndWatch(urls, params, idempotencyKey, webhook, ignoreInvalidURLs) { const crawl = await this.asyncBatchScrapeUrls(urls, params, idempotencyKey, webhook, ignoreInvalidURLs); if (crawl.success && crawl.id) { const id = crawl.id; return new CrawlWatcher(id, this); } throw new FirecrawlError("Batch scrape job failed to start", 400); } /** * Checks the status of a batch scrape job using the Firecrawl API. * @param id - The ID of the batch scrape operation. * @param getAllData - Paginate through all the pages of documents, returning the full list of all documents. (default: `false`) * @param nextURL - The `next` URL from the previous batch scrape status. Only required if you're not manually increasing `skip`. Only used when `getAllData = false`. * @param skip - How many entries to skip to paginate. Only used when `getAllData = false`. * @param limit - How many entries to return. Only used when `getAllData = false`. * @returns The response containing the job status. */ async checkBatchScrapeStatus(id, getAllData = false, nextURL, skip, limit) { if (!id) { throw new FirecrawlError("No batch scrape ID provided", 400); } const headers = this.prepareHeaders(); const targetURL = new URL(nextURL ?? `${this.apiUrl}/v1/batch/scrape/${id}`); if (skip !== void 0) { targetURL.searchParams.set("skip", skip.toString()); } if (limit !== void 0) { targetURL.searchParams.set("limit", limit.toString()); } try { const response = await this.getRequest( targetURL.href, headers ); if (response.status === 200) { let allData = response.data.data; if (getAllData && response.data.status === "completed") { let statusData = response.data; if ("data" in statusData) { let data = statusData.data; while (typeof statusData === "object" && "next" in statusData) { if (data.length === 0) { break; } statusData = (await this.getRequest(statusData.next, headers)).data; data = data.concat(statusData.data); } allData = data; } } let resp = { success: response.data.success, status: response.data.status, total: response.data.total, completed: response.data.completed, creditsUsed: response.data.creditsUsed, next: getAllData ? void 0 : response.data.next, expiresAt: new Date(response.data.expiresAt), data: allData }; if (!response.data.success && response.data.error) { resp = { ...resp, success: false, error: response.data.error }; } if (response.data.next) { resp.next = response.data.next; } return resp; } else { this.handleError(response, "check batch scrape status"); } } catch (error) { throw new FirecrawlError(error.message, 500); } return { success: false, error: "Internal server error." }; } /** * Returns information about batch scrape errors. * @param id - The ID of the batch scrape operation. * @returns Information about batch scrape errors. */ async checkBatchScrapeErrors(id) { const headers = this.prepareHeaders(); try { const response = await this.deleteRequest( `${this.apiUrl}/v1/batch/scrape/${id}/errors`, headers ); if (response.status === 200) { return response.data; } else { this.handleError(response, "check batch scrape errors"); } } catch (error) { throw new FirecrawlError(error.message, 500); } return { success: false, error: "Internal server error." }; } /** * Extracts information from URLs using the Firecrawl API. * Currently in Beta. Expect breaking changes on future minor versions. * @param urls - The URLs to extract information from. Optional if using other methods for data extraction. * @param params - Additional parameters for the extract request. * @returns The response from the extract operation. */ async extract(urls, params) { const headers = this.prepareHeaders(); let jsonData = { urls, ...params }; let jsonSchema; try { if (!params?.schema) { jsonSchema = void 0; } else { try { jsonSchema = (0, import_zod_to_json_schema.zodToJsonSchema)(params.schema); } catch (_) { jsonSchema = params.schema; } } } catch (error) { throw new FirecrawlError("Invalid schema. Schema must be either a valid Zod schema or JSON schema object.", 400); } try { const response = await this.postRequest( this.apiUrl + `/v1/extract`, { ...jsonData, schema: jsonSchema, origin: `js-sdk@${this.version}` }, headers ); if (response.status === 200) { const jobId = response.data.id; let extractStatus; do { const statusResponse = await this.getRequest( `${this.apiUrl}/v1/extract/${jobId}`, headers ); extractStatus = statusResponse.data; if (extractStatus.status === "completed") { if (extractStatus.success) { return { success: true, data: extractStatus.data, warning: extractStatus.warning, error: extractStatus.error, sources: extractStatus?.sources || void 0 }; } else { throw new FirecrawlError(`Failed to extract data. Error: ${extractStatus.error}`, statusResponse.status); } } else if (extractStatus.status === "failed" || extractStatus.status === "cancelled") { throw new FirecrawlError(`Extract job ${extractStatus.status}. Error: ${extractStatus.error}`, statusResponse.status); } await new Promise((resolve) => setTimeout(resolve, 1e3)); } while (extractStatus.status !== "completed"); } else { this.handleError(response, "extract"); } } catch (error) { throw new FirecrawlError(error.message, 500, error.response?.data?.details); } return { success: false, error: "Internal server error." }; } /** * Initiates an asynchronous extract job for a URL using the Firecrawl API. * @param url - The URL to extract data from. * @param params - Additional parameters for the extract request. * @param idempotencyKey - Optional idempotency key for the request. * @returns The response from the extract operation. */ async asyncExtract(urls, params, idempotencyKey) { const headers = this.prepareHeaders(idempotencyKey); let jsonData = { urls, ...params }; let jsonSchema; try { if (!params?.schema) { jsonSchema = void 0; } else { try { jsonSchema = (0, import_zod_to_json_schema.zodToJsonSchema)(params.schema); } catch (_) { jsonSchema = params.schema; } } } catch (error) { throw new FirecrawlError("Invalid schema. Schema must be either a valid Zod schema or JSON schema object.", 400); } try { const response = await this.postRequest( this.apiUrl + `/v1/extract`, { ...jsonData, schema: jsonSchema, origin: `js-sdk@${this.version}` }, headers ); if (response.status === 200) { return response.data; } else { this.handleError(response, "start extract job"); } } catch (error) { throw new FirecrawlError(error.message, 500, error.response?.data?.details); } return { success: false, error: "Internal server error." }; } /** * Retrieves the status of an extract job. * @param jobId - The ID of the extract job. * @returns The status of the extract job. */ async getExtractStatus(jobId) { try { const response = await this.getRequest( `${this.apiUrl}/v1/extract/${jobId}`, this.prepareHeaders() ); if (response.status === 200) { return response.data; } else { this.handleError(response, "get extract status"); } } catch (error) { throw new FirecrawlError(error.message, 500); } } /** * Prepares the headers for an API request. * @param idempotencyKey - Optional key to ensure idempotency. * @returns The prepared headers. */ prepareHeaders(idempotencyKey) { return { "Content-Type": "application/json", Authorization: `Bearer ${this.apiKey}`, ...idempotencyKey ? { "x-idempotency-key": idempotencyKey } : {} }; } /** * Sends a POST request to the specified URL. * @param url - The URL to send the request to. * @param data - The data to send in the request. * @param headers - The headers for the request. * @returns The response from the POST request. */ postRequest(url, data, headers) { return import_axios.default.post(url, data, { headers, timeout: data?.timeout ? data.timeout + 5e3 : void 0 }); } /** * Sends a GET request to the specified URL. * @param url - The URL to send the request to. * @param headers - The headers for the request. * @returns The response from the GET request. */ async getRequest(url, headers) { try { return await import_axios.default.get(url, { headers }); } catch (error) { if (error instanceof import_axios.AxiosError && error.response) { return error.response; } else { throw error; } } } /** * Sends a DELETE request to the specified URL. * @param url - The URL to send the request to. * @param headers - The headers for the request. * @returns The response from the DELETE request. */ async deleteRequest(url, headers) { try { return await import_axios.default.delete(url, { headers }); } catch (error) { if (error instanceof import_axios.AxiosError && error.response) { return error.response; } else { throw error; } } } /** * Monitors the status of a crawl job until completion or failure. * @param id - The ID of the crawl operation. * @param headers - The headers for the request. * @param checkInterval - Interval in seconds for job status checks. * @param checkUrl - Optional URL to check the status (used for v1 API) * @returns The final job status or data. */ async monitorJobStatus(id, headers, checkInterval) { let failedTries = 0; let networkRetries = 0; const maxNetworkRetries = 3; while (true) { try { let statusResponse = await this.getRequest( `${this.apiUrl}/v1/crawl/${id}`, headers ); if (statusResponse.status === 200) { failedTries = 0; networkRetries = 0; let statusData = statusResponse.data; if (statusData.status === "completed") { if ("data" in statusData) { let data = statusData.data; while (typeof statusData === "object" && "next" in statusData) { if (data.length === 0) { break; } statusResponse = await this.getRequest(statusData.next, headers); statusData = statusResponse.data; data = data.concat(statusData.data); } statusData.data = data; return statusData; } else { throw new FirecrawlError("Crawl job completed but no data was returned", 500); } } else if (["active", "paused", "pending", "queued", "waiting", "scraping"].includes(statusData.status)) { checkInterval = Math.max(checkInterval, 2); await new Promise( (resolve) => setTimeout(resolve, checkInterval * 1e3) ); } else { throw new FirecrawlError( `Crawl job failed or was stopped. Status: ${statusData.status}`, 500 ); } } else { failedTries++; if (failedTries >= 3) { this.handleError(statusResponse, "check crawl status"); } } } catch (error) { if (this.isRetryableError(error) && networkRetries < maxNetworkRetries) { networkRetries++; const backoffDelay = Math.min(1e3 * Math.pow(2, networkRetries - 1), 1e4); await new Promise((resolve) => setTimeout(resolve, backoffDelay)); continue; } throw new FirecrawlError(error, 500); } } } /** * Determines if an error is retryable (transient network error) * @param error - The error to check * @returns True if the error should be retried */ isRetryableError(error) { if (error instanceof import_axios.AxiosError) { if (!error.response) { const code = error.code; const message = error.message?.toLowerCase() || ""; return code === "ECONNRESET" || code === "ETIMEDOUT" || code === "ENOTFOUND" || code === "ECONNREFUSED" || message.includes("socket hang up") || message.includes("network error") || message.includes("timeout"); } if (error.response?.status === 408 || error.response?.status === 504) { return true; } } if (error && typeof error === "object") { const code = error.code; const message = error.message?.toLowerCase() || ""; if (code === "ECONNRESET" || code === "ETIMEDOUT" || code === "ENOTFOUND" || code === "ECONNREFUSED" || message.includes("socket hang up") || message.includes("network error") || message.includes("timeout")) { return true; } if (error.response?.status === 408 || error.response?.status === 504) { return true; } } return false; } /** * Handles errors from API responses. * @param {AxiosResponse} response - The response from the API. * @param {string} action - The action being performed when the error occurred. */ handleError(response, action) { if (!response) { throw new FirecrawlError( `No response received while trying to ${action}. This may be a network error or the server is unreachable.`, 0 ); } if ([400, 402, 403, 408, 409, 500].includes(response.status)) { const errorMessage = response.data.error || "Unknown error occurred"; const details = response.data.details ? ` - ${JSON.stringify(response.data.details)}` : ""; throw new FirecrawlError( `Failed to ${action}. Status code: ${response.status}. Error: ${errorMessage}${details}`, response.status, response?.data?.details ); } else { throw new FirecrawlError( `Unexpected error occurred while trying to ${action}. Status code: ${response.status}`, response.status ); } } /** * Initiates a deep research operation on a given query and polls until completion. * @param query - The query to research. * @param params - Parameters for the deep research operation. * @param onActivity - Optional callback to receive activity updates in real-time. * @param onSource - Optional callback to receive source updates in real-time. * @returns The final research results. */ async deepResearch(query, params, onActivity, onSource) { try { const response = await this.asyncDeepResearch(query, params); if (!response.success || "error" in response) { return { success: false, error: "error" in response ? response.error : "Unknown error" }; } if (!response.id) { throw new FirecrawlError(`Failed to start research. No job ID returned.`, 500); } const jobId = response.id; let researchStatus; let lastActivityCount = 0; let lastSourceCount = 0; while (true) { researchStatus = await this.checkDeepResearchStatus(jobId); if ("error" in researchStatus && !researchStatus.success) { return researchStatus; } if (onActivity && researchStatus.activities) { const newActivities = researchStatus.activities.slice(lastActivityCount); for (const activity of newActivities) { onActivity(activity); } lastActivityCount = researchStatus.activities.length; } if (onSource && researchStatus.sources) { const newSources = researchStatus.sources.slice(lastSourceCount); for (const source of newSources) { onSource(source); } lastSourceCount = researchStatus.sources.length; } if (researchStatus.status === "completed") { return researchStatus; } if (researchStatus.status === "failed") { throw new FirecrawlError( `Research job ${researchStatus.status}. Error: ${researchStatus.error}`, 500 ); } if (researchStatus.status !== "processing") { break; } await new Promise((resolve) => setTimeout(resolve, 2e3)); } return { success: false, error: "Research job terminated unexpectedly" }; } catch (error) { throw new FirecrawlError(error.message, 500, error.response?.data?.details); } } /** * Initiates a deep research operation on a given query without polling. * @param params - Parameters for the deep research operation. * @returns The response containing the research job ID. */ async asyncDeepResearch(query, params) { const headers = this.prepareHeaders(); let jsonData = { query, ...params, origin: `js-sdk@${this.version}` }; if (jsonData?.jsonOptions?.schema) { let schema = jsonData.jsonOptions.schema; try { schema = (0, import_zod_to_json_schema.zodToJsonSchema)(schema); } catch (error) { } jsonData = { ...jsonData, jsonOptions: { ...jsonData.jsonOptions, schema } }; } try { const response = await this.postRequest( `${this.apiUrl}/v1/deep-research`, jsonData, headers ); if (response.status === 200) { return response.data; } else { this.handleError(response, "start deep research"); } } catch (error) { if (error.response?.data?.error) { throw new FirecrawlError(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ""}`, error.response.status); } else { throw new FirecrawlError(error.message, 500); } } return { success: false, error: "Internal server error." }; } /** * Checks the status of a deep research operation. * @param id - The ID of the deep research operation. * @returns The current status and results of the research operation. */ async checkDeepResearchStatus(id) { const headers = this.prepareHeaders(); try { const response = await this.getRequest( `${this.apiUrl}/v1/deep-research/${id}`, headers ); if (response.status === 200) { return response.data; } else if (response.status === 404) { throw new FirecrawlError("Deep research job not found", 404); } else { this.handleError(response, "check deep research status"); } } catch (error) { if (error.response?.data?.error) { throw new FirecrawlError(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ""}`, error.response.status); } else { throw new FirecrawlError(error.message, 500); } } return { success: false, error: "Internal server error." }; } /** * @deprecated Use deepResearch() instead * Initiates a deep research operation on a given topic and polls until completion. * @param topic - The topic to research. * @param params - Parameters for the deep research operation. * @param onActivity - Optional callback to receive activity updates in real-time. * @returns The final research results. */ async __deepResearch(topic, params, onActivity) { try { const response = await this.__asyncDeepResearch(topic, params); if (!response.success || "error" in response) { return { success: false, error: "error" in response ? response.error : "Unknown error" }; } if (!response.id) { throw new FirecrawlError(`Failed to start research. No job ID returned.`, 500); } const jobId = response.id; let researchStatus; let lastActivityCount = 0; while (true) { researchStatus = await this.__checkDeepResearchStatus(jobId); if ("error" in researchStatus && !researchStatus.success) { return researchStatus; } if (onActivity && researchStatus.activities) { const newActivities = researchStatus.activities.slice(lastActivityCount); for (const activity of newActivities) { onActivity(activity); } lastActivityCount = researchStatus.activities.length; } if (researchStatus.status === "completed") { return researchStatus; } if (researchStatus.status === "failed") { throw new FirecrawlError( `Research job ${researchStatus.status}. Error: ${researchStatus.error}`, 500 ); } if (researchStatus.status !== "processing") { break; } await new Promise((resolve) => setTimeout(resolve, 2e3)); } return { success: false, error: "Research job terminated unexpectedly" }; } catch (error) { throw new FirecrawlError(error.message, 500, error.response?.data?.details); } } /** * @deprecated Use asyncDeepResearch() instead * Initiates a deep research operation on a given topic without polling. * @param params - Parameters for the deep research operation. * @returns The response containing the research job ID. */ async __asyncDeepResearch(topic, params) { const headers = this.prepareHeaders(); try { let jsonData = { topic, ...params, origin: `js-sdk@${this.version}` }; const response = await this.postRequest( `${this.apiUrl}/v1/deep-research`, jsonData, headers ); if (response.status === 200) { return response.data; } else { this.handleError(response, "start deep research"); } } catch (error) { if (error.response?.data?.error) { throw new FirecrawlError(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ""}`, error.response.status); } else { throw new FirecrawlError(error.message, 500); } } return { success: false, error: "Internal server error." }; } /** * @deprecated Use checkDeepResearchStatus() instead * Checks the status of a deep research operation. * @param id - The ID of the deep research operation. * @returns The current status and results of the research operation. */ async __checkDeepResearchStatus(id) { const headers = this.prepareHeaders(); try { const response = await this.getRequest( `${this.apiUrl}/v1/deep-research/${id}`, headers ); if (response.status === 200) { return response.data; } else if (response.status === 404) { throw new FirecrawlError("Deep research job not found", 404); } else { this.handleError(response, "check deep research status"); } } catch (error) { if (error.response?.data?.error) { throw new FirecrawlError(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ""}`, error.response.status); } else { throw new FirecrawlError(error.message, 500); } } return { success: false, error: "Internal server error." }; } /** * Generates LLMs.txt for a given URL and polls until completion. * @param url - The URL to generate LLMs.txt from. * @param params - Parameters for the LLMs.txt generation operation. * @returns The final generation results. */ async generateLLMsText(url, params) { try { const response = await this.asyncGenerateLLMsText(url, params); if (!response.success || "error" in response) { return { success: false, error: "error" in response ? response.error : "Unknown error" }; } if (!response.id) { throw new FirecrawlError(`Failed to start LLMs.txt generation. No job ID returned.`, 500); } const jobId = response.id; let generationStatus; while (true) { generationStatus = await this.checkGenerateLLMsTextStatus(jobId); if ("error" in generationStatus && !generationStatus.success) { return generationStatus; } if (generationStatus.status === "completed") { return generationStatus; } if (generationStatus.status === "failed") { throw new FirecrawlError( `LLMs.txt generation job ${generationStatus.status}. Error: ${generationStatus.error}`, 500 ); } if (generationStatus.status !== "processing") { break; } await new Promise((resolve) => setTimeout(resolve, 2e3)); } return { success: false, error: "LLMs.txt generation job terminated unexpectedly" }; } catch (error) { throw new FirecrawlError(error.message, 500, error.response?.data?.details); } } /** * Initiates a LLMs.txt generation operation without polling. * @param url - The URL to generate LLMs.txt from. * @param params - Parameters for the LLMs.txt generation operation. * @returns The response containing the generation job ID. */ async asyncGenerateLLMsText(url, params) { const headers = this.prepareHeaders(); let jsonData = { url, ...params, origin: `js-sdk@${this.version}` }; try { const response = await this.postRequest( `${this.apiUrl}/v1/llmstxt`, jsonData, headers ); if (response.status === 200) { return response.data; } else { this.handleError(response, "start LLMs.txt generation"); } } catch (error) { if (error.response?.data?.error) { throw new FirecrawlError(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ""}`, error.response.status); } else { throw new FirecrawlError(error.message, 500); } } return { success: false, error: "Internal server error." }; } /** * Checks the status of a LLMs.txt generation operation. * @param id - The ID of the LLMs.txt generation operation. * @returns The current status and results of the generation operation. */ async checkGenerateLLMsTextStatus(id) { const headers = this.prepareHeaders(); try { const response = await this.getRequest( `${this.apiUrl}/v1/llmstxt/${id}`, headers ); if (response.status === 200) { return response.data; } else if (response.status === 404) { throw new FirecrawlError("LLMs.txt generation job not found", 404); } else { this.handleError(response, "check LLMs.txt generation status"); } } catch (error) { if (error.response?.data?.error) { throw new FirecrawlError(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ""}`, error.response.status); } else { throw new FirecrawlError(error.message, 500); } } return { success: false, error: "Internal server error." }; } }; var CrawlWatcher = class extends e { ws; data; status; id; constructor(id, app) { super(); this.id = id; const wsUrl = app.apiUrl.replace(/^http/, "ws"); this.ws = new WebSocket(`${wsUrl}/v1/crawl/${id}`, app.apiKey); this.status = "scraping"; this.data = []; const messageHandler = (msg) => { if (msg.type === "done") { this.status = "completed";