UNPKG

ts-web-scraper

Version:

A powerful web scraper for both static and client-side rendered sites using only Bun native APIs

61 lines 1.8 kB
/** * Detect pagination type and extract info from HTML */ export declare function detectPagination(html: string, currentUrl: string): PaginationInfo; /** * Extract page number from URL */ export declare function extractPageFromUrl(url: URL): number | null; /** * Generate URL for a specific page */ export declare function generatePageUrl(baseUrl: string, page: number, pattern?: PaginationPattern): string; /** * Extract all page URLs from pagination */ export declare function extractAllPageUrls(paginationInfo: PaginationInfo): string[]; /** * Check if pagination has more pages */ export declare function hasMorePages(paginationInfo: PaginationInfo): boolean; /** * Get next page URL */ export declare function getNextPageUrl(paginationInfo: PaginationInfo, baseUrl?: string): string | null; /** * Auto-paginate and collect all pages */ export declare function autoPaginate(startUrl: string, fetcher: (url: string) => Promise<{ html: string, url: string }>, options?: { maxPages?: number delay?: number }): AsyncGenerator<{ html: string, url: string, pageNumber: number }>; export declare interface PaginationInfo { type: PaginationType currentPage?: number totalPages?: number nextUrl?: string prevUrl?: string pageUrls?: string[] hasMore?: boolean cursorNext?: string cursorPrev?: string } export declare interface PaginationPattern { selector?: string urlPattern?: RegExp pageParam?: string } /** * Pagination Auto-Detection * * Detect and handle various pagination patterns * Uses ONLY Bun native APIs - no external dependencies! */ export declare enum PaginationType { NONE = 'none', PAGE_NUMBERS = 'page_numbers', NEXT_PREV = 'next_prev', LOAD_MORE = 'load_more', INFINITE_SCROLL = 'infinite_scroll', CURSOR = 'cursor', }