ts-web-scraper

Version:

A powerful web scraper for both static and client-side rendered sites using only Bun native APIs

42 lines • 1.41 kB

TypeScript

export declare function getGlobalRobotsParser(options?: RobotsOptions): RobotsParser; export declare function resetGlobalRobotsParser(): void; /** * Quick helper to check if URL can be fetched */ export declare function canFetch(url: string, options?: RobotsOptions): Promise<boolean>; /** * Quick helper to get crawl delay */ export declare function getCrawlDelay(url: string, options?: RobotsOptions): Promise<number>; export declare interface RobotsOptions { respectRobotsTxt?: boolean userAgent?: string cacheTime?: number timeout?: number } export declare interface RobotRule { userAgent: string allow: string[] disallow: string[] crawlDelay?: number } export declare interface ParsedRobots { rules: RobotRule[] sitemaps: string[] } /** * Robots.txt Parser and Manager */ export declare class RobotsParser { private cache: any; private options: Required<RobotsOptions>; constructor(options: Required<RobotsOptions>); canFetch(url: string): Promise<boolean>; getCrawlDelay(url: string): Promise<number>; getSitemaps(url: string): Promise<string[]>; fetch(robotsUrl: string): Promise<ParsedRobots>; parse(content: string): ParsedRobots; private isAllowedByRules(path: string, robots: ParsedRobots): boolean; private findMatchingRule(userAgent: string, robots: ParsedRobots): RobotRule | null; private matchesPattern(path: string, pattern: string): boolean; }