ts-web-scraper
Version:
A powerful web scraper for both static and client-side rendered sites using only Bun native APIs
46 lines • 1.46 kB
TypeScript
/**
* Quick helper to parse sitemap
*/
export declare function parseSitemap(url: string, options?: SitemapOptions): Promise<SitemapEntry[]>;
/**
* Quick helper to discover sitemaps
*/
export declare function discoverSitemaps(baseUrl: string): Promise<string[]>;
/**
* Sitemap Parser
*
* Parse XML sitemaps for URL discovery
* Uses ONLY Bun native APIs - no external dependencies!
*/
export declare interface SitemapEntry {
loc: string
lastmod?: Date
changefreq?: 'always' | 'hourly' | 'daily' | 'weekly' | 'monthly' | 'yearly' | 'never'
priority?: number
}
export declare interface SitemapOptions {
maxDepth?: number
followIndexes?: boolean
filters?: {
includePatterns?: RegExp[]
excludePatterns?: RegExp[]
minLastMod?: Date
maxLastMod?: Date
minPriority?: number
}
timeout?: number
}
/**
* Sitemap Parser
*/
export declare class SitemapParser {
private options: Required<SitemapOptions>;
constructor(options: Required<SitemapOptions>);
parse(sitemapUrl: string, depth?: any): Promise<SitemapEntry[]>;
private parseIndex(content: string, baseUrl: string, depth: number): Promise<SitemapEntry[]>;
private parseUrlSet(content: string): SitemapEntry[];
private parseUrlBlock(block: string): SitemapEntry | null;
private extractSitemapUrls(content: string): string[];
private applyFilters(entries: SitemapEntry[]): SitemapEntry[];
static discoverSitemaps(baseUrl: string): Promise<string[]>;
}