ts-web-scraper
Version:
A powerful web scraper for both static and client-side rendered sites using only Bun native APIs
42 lines • 1.41 kB
TypeScript
export declare function getGlobalRobotsParser(options?: RobotsOptions): RobotsParser;
export declare function resetGlobalRobotsParser(): void;
/**
* Quick helper to check if URL can be fetched
*/
export declare function canFetch(url: string, options?: RobotsOptions): Promise<boolean>;
/**
* Quick helper to get crawl delay
*/
export declare function getCrawlDelay(url: string, options?: RobotsOptions): Promise<number>;
export declare interface RobotsOptions {
respectRobotsTxt?: boolean
userAgent?: string
cacheTime?: number
timeout?: number
}
export declare interface RobotRule {
userAgent: string
allow: string[]
disallow: string[]
crawlDelay?: number
}
export declare interface ParsedRobots {
rules: RobotRule[]
sitemaps: string[]
}
/**
* Robots.txt Parser and Manager
*/
export declare class RobotsParser {
private cache: any;
private options: Required<RobotsOptions>;
constructor(options: Required<RobotsOptions>);
canFetch(url: string): Promise<boolean>;
getCrawlDelay(url: string): Promise<number>;
getSitemaps(url: string): Promise<string[]>;
fetch(robotsUrl: string): Promise<ParsedRobots>;
parse(content: string): ParsedRobots;
private isAllowedByRules(path: string, robots: ParsedRobots): boolean;
private findMatchingRule(userAgent: string, robots: ParsedRobots): RobotRule | null;
private matchesPattern(path: string, pattern: string): boolean;
}