UNPKG

@phunky/scrape-channel-listings

Version:

A TypeScript library for scraping TV channel listings from various providers

50 lines (49 loc) 1.94 kB
/** * Core scraping utility module for channel listing extraction * Provides a robust framework for scraping channel information from various providers * Features include: * - Automated browser setup and cleanup * - Request interception and resource blocking * - Retry mechanism with exponential backoff * - Channel name normalization * - Configurable error handling * - Structured output generation */ import playwright from 'playwright'; /** * Represents a TV channel with its number and standardized name * @property {string} number - The channel number in the provider's lineup * @property {string} name - The standardized channel name across providers */ export interface Channel { number: string; name: string; } /** * Configuration for a provider-specific scraper * @property {string} url - The URL to scrape channel information from * @property {Function} scrapeFunction - Provider-specific function to extract channel data * @property {Record<string, string>} [overrides] - Channel name standardization mappings * @property {string} outputFile - Name of the JSON file to store results * @property {Function} [runCustom] - Optional custom run function for special cases */ export interface ScraperConfig { url: string; scrapeFunction: (page: playwright.Page) => Promise<Partial<Channel>[]>; overrides?: Record<string, string>; outputFile?: string; runCustom?: (options: { writeFiles: boolean; }) => Promise<Channel[]>; } /** * Executes a scraper with the given configuration * @param {ScraperConfig} config - Scraper configuration * @returns {Promise<Channel[]>} Array of scraped channels */ export declare function runScraper(config: ScraperConfig): Promise<Channel[]>; /** * Execute a scraper configuration from the command line * Handles argument parsing and output formatting */ export declare function runScraperCLI(config: ScraperConfig): Promise<void>;