crawl4ai
Version:
TypeScript SDK for Crawl4AI REST API - Bun & Node.js compatible
490 lines (479 loc) • 14.6 kB
JavaScript
// src/errors.ts
class Crawl4AIError extends Error {
status;
statusText;
data;
request;
constructor(message, status, statusText, data) {
super(message);
this.name = "Crawl4AIError";
if (status !== undefined) {
this.status = status;
}
if (statusText !== undefined) {
this.statusText = statusText;
}
if (data !== undefined) {
this.data = data;
}
}
}
class NetworkError extends Crawl4AIError {
constructor(message, cause) {
super(message);
this.name = "NetworkError";
if (cause) {
this.cause = cause;
}
}
}
class TimeoutError extends NetworkError {
timeout;
constructor(timeout, url) {
const message = url ? `Request to ${url} timed out after ${timeout}ms` : `Request timed out after ${timeout}ms`;
super(message);
this.name = "TimeoutError";
this.timeout = timeout;
}
}
class RequestValidationError extends Crawl4AIError {
field;
value;
constructor(message, field, value) {
super(message, 400, "Bad Request");
this.name = "RequestValidationError";
if (field !== undefined) {
this.field = field;
}
if (value !== undefined) {
this.value = value;
}
}
}
class RateLimitError extends Crawl4AIError {
retryAfter;
limit;
remaining;
reset;
constructor(message, retryAfter, headers) {
super(message, 429, "Too Many Requests");
this.name = "RateLimitError";
if (retryAfter !== undefined) {
this.retryAfter = retryAfter;
}
if (headers) {
if (headers["x-ratelimit-limit"]) {
this.limit = parseInt(headers["x-ratelimit-limit"], 10);
}
if (headers["x-ratelimit-remaining"]) {
this.remaining = parseInt(headers["x-ratelimit-remaining"], 10);
}
if (headers["x-ratelimit-reset"]) {
this.reset = new Date(parseInt(headers["x-ratelimit-reset"], 10) * 1000);
}
}
}
}
class AuthError extends Crawl4AIError {
constructor(message = "Authentication failed", status = 401) {
super(message, status, status === 401 ? "Unauthorized" : "Forbidden");
this.name = "AuthError";
}
}
class ServerError extends Crawl4AIError {
constructor(message = "Internal server error", status = 500, statusText) {
super(message, status, statusText || "Internal Server Error");
this.name = "ServerError";
}
}
class NotFoundError extends Crawl4AIError {
resource;
constructor(resource) {
const message = resource ? `Resource not found: ${resource}` : "Resource not found";
super(message, 404, "Not Found");
this.name = "NotFoundError";
if (resource) {
this.resource = resource;
}
}
}
class ParseError extends Crawl4AIError {
responseText;
constructor(message, responseText) {
super(message);
this.name = "ParseError";
if (responseText) {
this.responseText = responseText;
}
}
}
function isCrawl4AIError(error) {
return error instanceof Crawl4AIError;
}
function isRateLimitError(error) {
return error instanceof RateLimitError;
}
function isAuthError(error) {
return error instanceof AuthError;
}
function isNetworkError(error) {
return error instanceof NetworkError;
}
function createHttpError(status, statusText, message, data, headers) {
const errorMessage = message || `HTTP ${status}: ${statusText}`;
switch (status) {
case 400:
return new RequestValidationError(errorMessage);
case 401:
return new AuthError(errorMessage, 401);
case 403:
return new AuthError(errorMessage, 403);
case 404:
return new NotFoundError;
case 429: {
const retryAfter = headers?.["retry-after"] ? parseInt(headers["retry-after"], 10) : undefined;
return new RateLimitError(errorMessage, retryAfter, headers);
}
case 500:
case 502:
case 503:
case 504:
return new ServerError(errorMessage, status, statusText);
default:
return new Crawl4AIError(errorMessage, status, statusText, data);
}
}
// src/sdk.ts
var DEFAULT_TIMEOUT = 300000;
var DEFAULT_RETRIES = 3;
var DEFAULT_RETRY_DELAY = 1000;
var RETRY_BACKOFF_MULTIPLIER = 2;
var HEALTH_CHECK_TIMEOUT = 5000;
var CLIENT_ERROR_MIN = 400;
var CLIENT_ERROR_MAX = 500;
var RATE_LIMIT_STATUS = 429;
class Crawl4AI {
config;
constructor(config) {
if (!config.baseUrl) {
throw new RequestValidationError("baseUrl is required in configuration", "baseUrl");
}
try {
new URL(config.baseUrl);
} catch {
throw new RequestValidationError(`Invalid baseUrl: ${config.baseUrl}`, "baseUrl", config.baseUrl);
}
const defaults = {
apiToken: "",
timeout: DEFAULT_TIMEOUT,
retries: DEFAULT_RETRIES,
retryDelay: DEFAULT_RETRY_DELAY,
defaultHeaders: { "Content-Type": "application/json" },
throwOnError: true,
validateStatus: (status) => status < CLIENT_ERROR_MIN,
debug: false
};
if (config.timeout !== undefined && (config.timeout <= 0 || !Number.isFinite(config.timeout))) {
throw new RequestValidationError("timeout must be a positive number", "timeout", config.timeout);
}
if (config.retries !== undefined && (config.retries < 0 || !Number.isInteger(config.retries))) {
throw new RequestValidationError("retries must be a non-negative integer", "retries", config.retries);
}
if (config.retryDelay !== undefined && (config.retryDelay < 0 || !Number.isFinite(config.retryDelay))) {
throw new RequestValidationError("retryDelay must be a non-negative number", "retryDelay", config.retryDelay);
}
this.config = {
...defaults,
...config,
baseUrl: config.baseUrl.replace(/\/$/, ""),
defaultHeaders: {
...defaults.defaultHeaders,
...config.defaultHeaders
},
throwOnError: config.throwOnError ?? defaults.throwOnError,
validateStatus: config.validateStatus || defaults.validateStatus
};
if (this.config.apiToken) {
this.config.defaultHeaders.Authorization = `Bearer ${this.config.apiToken}`;
}
}
validateUrl(url) {
try {
new URL(url);
} catch {
throw new RequestValidationError(`Invalid URL: ${url}`, "url", url);
}
}
log(message, data) {
if (this.config.debug) {
console.log(`[Crawl4AI] ${message}`, data || "");
}
}
normalizeArrayResponse(response) {
if (Array.isArray(response)) {
return response;
}
if (typeof response === "object" && response !== null) {
const apiResponse = response;
if (apiResponse.results && Array.isArray(apiResponse.results)) {
return apiResponse.results;
}
if (apiResponse.result && Array.isArray(apiResponse.result)) {
return apiResponse.result;
}
}
return [response];
}
buildQueryParams(params) {
const searchParams = new URLSearchParams;
for (const [key, value] of Object.entries(params)) {
if (value !== undefined) {
searchParams.append(key, String(value));
}
}
return searchParams.toString();
}
async request(endpoint, options = {}) {
const url = `${this.config.baseUrl}${endpoint}`;
const { timeout = this.config.timeout, signal, headers, ...fetchOptions } = options;
this.log(`Request: ${fetchOptions.method || "GET"} ${url}`, fetchOptions.body);
const requestHeaders = {
...this.config.defaultHeaders,
...headers
};
const controller = new AbortController;
const timeoutId = setTimeout(() => controller.abort(), timeout);
const requestSignal = signal || controller.signal;
try {
const response = await fetch(url, {
...fetchOptions,
headers: requestHeaders,
signal: requestSignal
});
clearTimeout(timeoutId);
const contentType = response.headers.get("content-type") || "";
let responseData;
if (contentType.includes("application/json")) {
responseData = await response.json();
} else if (contentType.includes("text/html") || contentType.includes("text/plain")) {
responseData = await response.text();
} else if (contentType.includes("text/event-stream")) {
return response;
} else {
responseData = await response.text();
}
this.log(`Response: ${response.status}`, responseData);
if (!this.config.validateStatus(response.status)) {
const headers2 = {};
response.headers.forEach((value, key) => {
headers2[key] = value;
});
const error = createHttpError(response.status, response.statusText, undefined, responseData, headers2);
error.request = {
url,
method: fetchOptions.method || "GET",
headers: requestHeaders,
body: fetchOptions.body
};
if (this.config.throwOnError) {
throw error;
}
}
return responseData;
} catch (error) {
clearTimeout(timeoutId);
if (error instanceof Error && error.name === "AbortError") {
throw new TimeoutError(timeout, url);
}
if (error instanceof TypeError && error.message.includes("fetch")) {
throw new NetworkError(`Network request failed: ${error.message}`, error);
}
throw error;
}
}
async requestWithRetry(endpoint, options = {}) {
let lastError = new Error("No attempts made");
for (let attempt = 0;attempt <= this.config.retries; attempt++) {
try {
return await this.request(endpoint, options);
} catch (error) {
lastError = error;
if (error instanceof Crawl4AIError && error.status && error.status >= CLIENT_ERROR_MIN && error.status < CLIENT_ERROR_MAX && error.status !== RATE_LIMIT_STATUS) {
throw error;
}
if (attempt < this.config.retries) {
let delay = this.config.retryDelay * RETRY_BACKOFF_MULTIPLIER ** attempt;
if (error instanceof RateLimitError && error.retryAfter) {
delay = error.retryAfter * 1000;
this.log(`Rate limited. Waiting ${error.retryAfter}s before retry (attempt ${attempt + 1}/${this.config.retries})`);
} else {
this.log(`Retry attempt ${attempt + 1}/${this.config.retries} after ${delay}ms`);
}
await new Promise((resolve) => setTimeout(resolve, delay));
}
}
}
throw lastError;
}
async crawl(request, config) {
const urls = Array.isArray(request.urls) ? request.urls : [request.urls];
for (const url of urls) {
this.validateUrl(url);
}
const normalizedRequest = {
...request,
urls
};
const response = await this.requestWithRetry("/crawl", {
method: "POST",
body: JSON.stringify(normalizedRequest),
...config
});
return this.normalizeArrayResponse(response);
}
async markdown(request, config) {
this.validateUrl(request.url);
const apiRequest = {
url: request.url,
...request.filter !== undefined && { f: request.filter },
...request.query !== undefined && { q: request.query },
...request.cache !== undefined && { c: request.cache }
};
const response = await this.requestWithRetry("/md", {
method: "POST",
body: JSON.stringify(apiRequest),
...config
});
return typeof response === "string" ? response : response.markdown;
}
async html(request, config) {
this.validateUrl(request.url);
const response = await this.requestWithRetry("/html", {
method: "POST",
body: JSON.stringify(request),
...config
});
return typeof response === "string" ? response : response.html;
}
async executeJs(request, config) {
this.validateUrl(request.url);
return this.requestWithRetry("/execute_js", {
method: "POST",
body: JSON.stringify(request),
...config
});
}
async ask(params, config) {
const queryString = this.buildQueryParams({
context_type: params?.context_type,
query: params?.query,
score_ratio: params?.score_ratio,
max_results: params?.max_results
});
const endpoint = `/ask${queryString ? `?${queryString}` : ""}`;
const response = await this.requestWithRetry(endpoint, {
method: "GET",
...config
});
const results = response.doc_results || response.code_results || response.all_results || [];
const result = {
context: results.map((r) => r.text).join(`
`),
type: params?.context_type || "doc",
results_count: results.length
};
if (params?.query !== undefined) {
result.query = params.query;
}
return result;
}
async llm(url, query, config) {
this.validateUrl(url);
const encodedUrl = encodeURIComponent(url);
const queryParams = new URLSearchParams({ q: query });
const response = await this.requestWithRetry(`/llm/${encodedUrl}?${queryParams.toString()}`, {
method: "GET",
...config
});
return typeof response === "string" ? response : response.answer || "";
}
async health(config) {
return this.request("/health", {
method: "GET",
...config
});
}
async metrics(config) {
return this.request("/metrics", {
method: "GET",
...config
});
}
async schema(config) {
return this.request("/schema", {
method: "GET",
...config
});
}
async getRoot(config) {
return this.request("/", {
method: "GET",
...config
});
}
async testConnection(options) {
try {
await this.health({ timeout: HEALTH_CHECK_TIMEOUT });
return true;
} catch (error) {
if (options?.throwOnError) {
throw error;
}
return false;
}
}
async version(options) {
try {
const health = await this.health();
return health.version || "unknown";
} catch (error) {
if (options?.throwOnError) {
throw error;
}
return "unknown";
}
}
setApiToken(token) {
this.config.apiToken = token;
if (token) {
this.config.defaultHeaders.Authorization = `Bearer ${token}`;
} else {
delete this.config.defaultHeaders.Authorization;
}
}
setBaseUrl(baseUrl) {
this.config.baseUrl = baseUrl.replace(/\/$/, "");
}
setDebug(debug) {
this.config.debug = debug;
}
}
var sdk_default = Crawl4AI;
export {
isRateLimitError,
isNetworkError,
isCrawl4AIError,
isAuthError,
sdk_default as default,
createHttpError,
TimeoutError,
ServerError,
RequestValidationError,
RateLimitError,
ParseError,
NotFoundError,
NetworkError,
Crawl4AIError,
Crawl4AI,
AuthError
};