rag-crawler

Version:

Crawl a website to generate knowledge file for RAG

36 lines (35 loc) • 849 B

TypeScript

import { RequestInit } from "node-fetch"; /** * Options for the web crawler. */ export interface CrawlOptions { /** * Extract specific content using CSS selector */ extract?: string; /** * Maximum number of concurrent connections allowed. */ maxConnections: number; /** * Path names to exclude */ exclude: string[]; /** * Whether to stop the crawling process on the first encountered error. */ breakOnError: boolean; /** * Whether to enable logging during the crawling process. */ logEnabled: boolean; /** * Fetch options */ fetchOptions: RequestInit; } export declare function crawlWebsite(startUrl: string, options_?: Partial<CrawlOptions>): AsyncGenerator<Page, any, Page>; export interface Page { path: string; text: string; }