UNPKG

ts-web-scraper

Version:

A powerful web scraper for both static and client-side rendered sites using only Bun native APIs

46 lines 1.46 kB
/** * Quick helper to parse sitemap */ export declare function parseSitemap(url: string, options?: SitemapOptions): Promise<SitemapEntry[]>; /** * Quick helper to discover sitemaps */ export declare function discoverSitemaps(baseUrl: string): Promise<string[]>; /** * Sitemap Parser * * Parse XML sitemaps for URL discovery * Uses ONLY Bun native APIs - no external dependencies! */ export declare interface SitemapEntry { loc: string lastmod?: Date changefreq?: 'always' | 'hourly' | 'daily' | 'weekly' | 'monthly' | 'yearly' | 'never' priority?: number } export declare interface SitemapOptions { maxDepth?: number followIndexes?: boolean filters?: { includePatterns?: RegExp[] excludePatterns?: RegExp[] minLastMod?: Date maxLastMod?: Date minPriority?: number } timeout?: number } /** * Sitemap Parser */ export declare class SitemapParser { private options: Required<SitemapOptions>; constructor(options: Required<SitemapOptions>); parse(sitemapUrl: string, depth?: any): Promise<SitemapEntry[]>; private parseIndex(content: string, baseUrl: string, depth: number): Promise<SitemapEntry[]>; private parseUrlSet(content: string): SitemapEntry[]; private parseUrlBlock(block: string): SitemapEntry | null; private extractSitemapUrls(content: string): string[]; private applyFilters(entries: SitemapEntry[]): SitemapEntry[]; static discoverSitemaps(baseUrl: string): Promise<string[]>; }