firecrawl
Version:
JavaScript SDK for Firecrawl API
113 lines (102 loc) • 5.72 kB
text/typescript
import { describe, test, expect, jest } from "@jest/globals";
import { getCrawlStatus } from "../../../v2/methods/crawl";
import { getBatchScrapeStatus } from "../../../v2/methods/batch";
describe("JS SDK v2 pagination", () => {
function makeHttp(getImpl: (url: string) => any) {
return { get: jest.fn(async (u: string) => getImpl(u)) } as any;
}
test("crawl: autoPaginate=false returns next", async () => {
const first = { status: 200, data: { success: true, status: "completed", completed: 1, total: 2, next: "https://api/next", data: [{ markdown: "a" }] } };
const http = makeHttp(() => first);
const res = await getCrawlStatus(http, "job1", { autoPaginate: false });
expect(res.data.length).toBe(1);
expect(res.next).toBe("https://api/next");
});
test("crawl: default autoPaginate aggregates and nulls next", async () => {
const first = { status: 200, data: { success: true, status: "completed", completed: 1, total: 3, next: "https://api/n1", data: [{ markdown: "a" }] } };
const second = { status: 200, data: { success: true, next: "https://api/n2", data: [{ markdown: "b" }] } };
const third = { status: 200, data: { success: true, next: null, data: [{ markdown: "c" }] } };
const http = makeHttp((url) => {
if (url.includes("/v2/crawl/")) return first;
if (url.endsWith("n1")) return second;
return third;
});
const res = await getCrawlStatus(http, "job1");
expect(res.data.length).toBe(3);
expect(res.next).toBeNull();
});
test("crawl: respects maxPages and maxResults", async () => {
const first = { status: 200, data: { success: true, status: "completed", completed: 1, total: 10, next: "https://api/n1", data: [{ markdown: "a" }] } };
const page = (n: number) => ({ status: 200, data: { success: true, next: n < 3 ? `https://api/n${n + 1}` : null, data: [{ markdown: `p${n}` }] } });
const http = makeHttp((url) => {
if (url.includes("/v2/crawl/")) return first;
if (url.endsWith("n1")) return page(1);
if (url.endsWith("n2")) return page(2);
return page(3);
});
const res = await getCrawlStatus(http, "job1", { autoPaginate: true, maxPages: 2, maxResults: 2 });
expect(res.data.length).toBe(2);
});
test("batch: default autoPaginate aggregates and nulls next", async () => {
const first = { status: 200, data: { success: true, status: "completed", completed: 1, total: 3, next: "https://api/b1", data: [{ markdown: "a" }] } };
const second = { status: 200, data: { success: true, next: "https://api/b2", data: [{ markdown: "b" }] } };
const third = { status: 200, data: { success: true, next: null, data: [{ markdown: "c" }] } };
const http = makeHttp((url) => {
if (url.includes("/v2/batch/scrape/")) return first;
if (url.endsWith("b1")) return second;
return third;
});
const res = await getBatchScrapeStatus(http, "jobB");
expect(res.data.length).toBe(3);
expect(res.next).toBeNull();
});
test("batch: autoPaginate=false returns next", async () => {
const first = { status: 200, data: { success: true, status: "completed", completed: 1, total: 2, next: "https://api/nextBatch", data: [{ markdown: "a" }] } };
const http = makeHttp(() => first);
const res = await getBatchScrapeStatus(http, "jobB", { autoPaginate: false });
expect(res.data.length).toBe(1);
expect(res.next).toBe("https://api/nextBatch");
});
test("crawl: maxWaitTime stops pagination after first page", async () => {
const first = { status: 200, data: { success: true, status: "completed", completed: 1, total: 5, next: "https://api/n1", data: [{ markdown: "a" }] } };
const p1 = { status: 200, data: { success: true, next: "https://api/n2", data: [{ markdown: "b" }] } };
const http: any = makeHttp((url: string) => {
if (url.includes("/v2/crawl/")) return first;
if (url.endsWith("n1")) return p1;
return { status: 200, data: { success: true, next: null, data: [{ markdown: "c" }] } };
});
const nowSpy = jest.spyOn(Date, "now");
try {
nowSpy
.mockImplementationOnce(() => 0) // started
.mockImplementationOnce(() => 0) // first loop check
.mockImplementationOnce(() => 3000); // second loop check > maxWaitTime
const res = await getCrawlStatus(http, "jobC", { autoPaginate: true, maxWaitTime: 1 });
expect(res.data.length).toBe(2); // initial + first page
expect((http.get as jest.Mock).mock.calls.length).toBe(2); // initial + n1 only
} finally {
nowSpy.mockRestore();
}
});
test("batch: maxWaitTime stops pagination after first page", async () => {
const first = { status: 200, data: { success: true, status: "completed", completed: 1, total: 5, next: "https://api/b1", data: [{ markdown: "a" }] } };
const p1 = { status: 200, data: { success: true, next: "https://api/b2", data: [{ markdown: "b" }] } };
const http: any = makeHttp((url: string) => {
if (url.includes("/v2/batch/scrape/")) return first;
if (url.endsWith("b1")) return p1;
return { status: 200, data: { success: true, next: null, data: [{ markdown: "c" }] } };
});
const nowSpy = jest.spyOn(Date, "now");
try {
nowSpy
.mockImplementationOnce(() => 0) // started
.mockImplementationOnce(() => 0) // first loop check
.mockImplementationOnce(() => 3000); // second loop check > maxWaitTime
const res = await getBatchScrapeStatus(http, "jobB", { autoPaginate: true, maxWaitTime: 1 });
expect(res.data.length).toBe(2);
expect((http.get as jest.Mock).mock.calls.length).toBe(2);
} finally {
nowSpy.mockRestore();
}
});
});