ts-web-scraper
Version:
A powerful web scraper for both static and client-side rendered sites using only Bun native APIs
61 lines • 1.8 kB
TypeScript
/**
* Detect pagination type and extract info from HTML
*/
export declare function detectPagination(html: string, currentUrl: string): PaginationInfo;
/**
* Extract page number from URL
*/
export declare function extractPageFromUrl(url: URL): number | null;
/**
* Generate URL for a specific page
*/
export declare function generatePageUrl(baseUrl: string, page: number, pattern?: PaginationPattern): string;
/**
* Extract all page URLs from pagination
*/
export declare function extractAllPageUrls(paginationInfo: PaginationInfo): string[];
/**
* Check if pagination has more pages
*/
export declare function hasMorePages(paginationInfo: PaginationInfo): boolean;
/**
* Get next page URL
*/
export declare function getNextPageUrl(paginationInfo: PaginationInfo, baseUrl?: string): string | null;
/**
* Auto-paginate and collect all pages
*/
export declare function autoPaginate(startUrl: string, fetcher: (url: string) => Promise<{ html: string, url: string }>, options?: {
maxPages?: number
delay?: number
}): AsyncGenerator<{ html: string, url: string, pageNumber: number }>;
export declare interface PaginationInfo {
type: PaginationType
currentPage?: number
totalPages?: number
nextUrl?: string
prevUrl?: string
pageUrls?: string[]
hasMore?: boolean
cursorNext?: string
cursorPrev?: string
}
export declare interface PaginationPattern {
selector?: string
urlPattern?: RegExp
pageParam?: string
}
/**
* Pagination Auto-Detection
*
* Detect and handle various pagination patterns
* Uses ONLY Bun native APIs - no external dependencies!
*/
export declare enum PaginationType {
NONE = 'none',
PAGE_NUMBERS = 'page_numbers',
NEXT_PREV = 'next_prev',
LOAD_MORE = 'load_more',
INFINITE_SCROLL = 'infinite_scroll',
CURSOR = 'cursor',
}