rag-crawler
Version:
Crawl a website to generate knowledge file for RAG
36 lines (35 loc) • 849 B
TypeScript
import { RequestInit } from "node-fetch";
/**
* Options for the web crawler.
*/
export interface CrawlOptions {
/**
* Extract specific content using CSS selector
*/
extract?: string;
/**
* Maximum number of concurrent connections allowed.
*/
maxConnections: number;
/**
* Path names to exclude
*/
exclude: string[];
/**
* Whether to stop the crawling process on the first encountered error.
*/
breakOnError: boolean;
/**
* Whether to enable logging during the crawling process.
*/
logEnabled: boolean;
/**
* Fetch options
*/
fetchOptions: RequestInit;
}
export declare function crawlWebsite(startUrl: string, options_?: Partial<CrawlOptions>): AsyncGenerator<Page, any, Page>;
export interface Page {
path: string;
text: string;
}