firecrawl
Version:
JavaScript SDK for Firecrawl API
95 lines (85 loc) • 3.72 kB
text/typescript
/**
* E2E tests for v2 batch scrape (translated from Python tests)
*/
import Firecrawl from "../../../index";
import { config } from "dotenv";
import { getIdentity, getApiUrl } from "./utils/idmux";
import { describe, test, expect, beforeAll } from "@jest/globals";
config();
const API_URL = getApiUrl();
let client: Firecrawl;
beforeAll(async () => {
const { apiKey } = await getIdentity({ name: "js-e2e-batch" });
client = new Firecrawl({ apiKey, apiUrl: API_URL });
});
describe("v2.batch e2e", () => {
test("batch scrape minimal (wait)", async () => {
const urls = [
"https://docs.firecrawl.dev",
"https://firecrawl.dev",
];
const job = await client.batchScrape(urls, { options: { formats: ["markdown"] }, pollInterval: 1, timeout: 180 });
expect(["completed", "failed"]).toContain(job.status);
expect(job.completed).toBeGreaterThanOrEqual(0);
expect(job.total).toBeGreaterThanOrEqual(0);
expect(Array.isArray(job.data)).toBe(true);
}, 240_000);
test("batch scrape with wait returns job id for error retrieval", async () => {
const urls = [
"https://docs.firecrawl.dev",
"https://firecrawl.dev",
];
const job = await client.batchScrape(urls, { options: { formats: ["markdown"] }, pollInterval: 1, timeout: 180 });
// Verify job has id field
expect(job.id).toBeDefined();
expect(typeof job.id).toBe("string");
// Verify we can use the id to retrieve errors
const errors = await client.getBatchScrapeErrors(job.id!);
expect(errors).toHaveProperty("errors");
expect(errors).toHaveProperty("robotsBlocked");
expect(Array.isArray(errors.errors)).toBe(true);
expect(Array.isArray(errors.robotsBlocked)).toBe(true);
}, 240_000);
test("start batch minimal and status", async () => {
const urls = ["https://docs.firecrawl.dev", "https://firecrawl.dev"];
const start = await client.startBatchScrape(urls, { options: { formats: ["markdown"] }, ignoreInvalidURLs: true });
expect(typeof start.id).toBe("string");
expect(typeof start.url).toBe("string");
const status = await client.getBatchScrapeStatus(start.id);
expect(["scraping", "completed", "failed", "cancelled"]).toContain(status.status);
expect(status.total).toBeGreaterThanOrEqual(0);
// Verify status includes id field
expect(status.id).toBeDefined();
expect(status.id).toBe(start.id);
}, 120_000);
test("wait batch with all params", async () => {
const urls = ["https://docs.firecrawl.dev", "https://firecrawl.dev"];
const job = await client.batchScrape(urls, {
options: {
formats: [
"markdown",
{ type: "json", prompt: "Extract page title", schema: { type: "object", properties: { title: { type: "string" } }, required: ["title"] } },
{ type: "changeTracking", prompt: "Track changes", modes: ["json"] },
],
onlyMainContent: true,
mobile: false,
},
ignoreInvalidURLs: true,
maxConcurrency: 2,
zeroDataRetention: false,
pollInterval: 1,
timeout: 180,
});
expect(["completed", "failed", "cancelled"]).toContain(job.status);
expect(job.completed).toBeGreaterThanOrEqual(0);
expect(job.total).toBeGreaterThanOrEqual(0);
expect(Array.isArray(job.data)).toBe(true);
}, 300_000);
test("cancel batch", async () => {
const urls = ["https://docs.firecrawl.dev", "https://firecrawl.dev"];
const start = await client.startBatchScrape(urls, { options: { formats: ["markdown"] }, maxConcurrency: 1 });
expect(typeof start.id).toBe("string");
const cancelled = await client.cancelBatchScrape(start.id);
expect(cancelled).toBe(true);
}, 120_000);
});