@mendable/firecrawl-js
Version:
JavaScript SDK for Firecrawl API
160 lines (146 loc) • 5.11 kB
text/typescript
import { EventEmitter } from "events";
import type { BatchScrapeJob, CrawlJob, Document } from "./types";
import type { HttpClient } from "./utils/httpClient";
import { getBatchScrapeStatus } from "./methods/batch";
import { getCrawlStatus } from "./methods/crawl";
type JobKind = "crawl" | "batch";
export interface WatcherOptions {
kind?: JobKind;
pollInterval?: number; // seconds
timeout?: number; // seconds
}
type Snapshot = CrawlJob | BatchScrapeJob;
export class Watcher extends EventEmitter {
private readonly http: HttpClient;
private readonly jobId: string;
private readonly kind: JobKind;
private readonly pollInterval: number;
private readonly timeout?: number;
private ws?: WebSocket;
private closed = false;
constructor(http: HttpClient, jobId: string, opts: WatcherOptions = {}) {
super();
this.http = http;
this.jobId = jobId;
this.kind = opts.kind ?? "crawl";
this.pollInterval = opts.pollInterval ?? 2;
this.timeout = opts.timeout;
}
private buildWsUrl(): string {
// replace http/https with ws/wss
const apiUrl = this.http.getApiUrl();
const wsBase = apiUrl.replace(/^http/, "ws");
const path = this.kind === "crawl" ? `/v2/crawl/${this.jobId}` : `/v2/batch/scrape/${this.jobId}`;
return `${wsBase}${path}`;
}
async start(): Promise<void> {
try {
const url = this.buildWsUrl();
// Pass API key as subprotocol for browser compatibility
this.ws = new WebSocket(url, this.http.getApiKey());
this.attachWsHandlers(this.ws);
} catch {
// Fallback to polling immediately
this.pollLoop();
}
}
private attachWsHandlers(ws: WebSocket) {
let startTs = Date.now();
const timeoutMs = this.timeout ? this.timeout * 1000 : undefined;
ws.onmessage = (ev: MessageEvent) => {
try {
const body = typeof ev.data === "string" ? JSON.parse(ev.data) : null;
if (!body) return;
const type = body.type as string | undefined;
if (type === "error") {
this.emit("error", { status: "failed", data: [], error: body.error, id: this.jobId });
return;
}
if (type === "catchup") {
const payload = body.data || {};
this.emitDocuments(payload.data || []);
this.emitSnapshot(payload);
return;
}
if (type === "document") {
const doc = body.data;
if (doc) this.emit("document", doc as Document & { id: string });
return;
}
if (type === "done") {
this.emit("done", { status: "completed", data: [], id: this.jobId });
this.close();
return;
}
const payload = body.data || body;
if (payload && payload.status) this.emitSnapshot(payload);
} catch {
// ignore
}
if (timeoutMs && Date.now() - startTs > timeoutMs) this.close();
};
ws.onerror = () => {
this.emit("error", { status: "failed", data: [], error: "WebSocket error", id: this.jobId });
this.close();
};
ws.onclose = () => {
if (!this.closed) this.pollLoop();
};
}
private emitDocuments(docs: Document[]) {
for (const doc of docs) this.emit("document", { ...(doc as any), id: this.jobId });
}
private emitSnapshot(payload: any) {
const status = payload.status as Snapshot["status"];
const data = (payload.data || []) as Document[];
const snap: Snapshot = this.kind === "crawl"
? {
status,
completed: payload.completed ?? 0,
total: payload.total ?? 0,
creditsUsed: payload.creditsUsed,
expiresAt: payload.expiresAt,
next: payload.next ?? null,
data,
}
: {
status,
completed: payload.completed ?? 0,
total: payload.total ?? 0,
creditsUsed: payload.creditsUsed,
expiresAt: payload.expiresAt,
next: payload.next ?? null,
data,
};
this.emit("snapshot", snap);
if (["completed", "failed", "cancelled"].includes(status)) {
this.emit("done", { status, data, id: this.jobId });
this.close();
}
}
private async pollLoop() {
const startTs = Date.now();
const timeoutMs = this.timeout ? this.timeout * 1000 : undefined;
while (!this.closed) {
try {
const snap = this.kind === "crawl"
? await getCrawlStatus(this.http as any, this.jobId)
: await getBatchScrapeStatus(this.http as any, this.jobId);
this.emit("snapshot", snap);
if (["completed", "failed", "cancelled"].includes(snap.status)) {
this.emit("done", { status: snap.status, data: snap.data, id: this.jobId });
this.close();
break;
}
} catch {
// ignore polling errors
}
if (timeoutMs && Date.now() - startTs > timeoutMs) break;
await new Promise((r) => setTimeout(r, Math.max(1000, this.pollInterval * 1000)));
}
}
close() {
this.closed = true;
if (this.ws && (this.ws as any).close) (this.ws as any).close();
}
}