@phunky/scrape-channel-listings
Version:
A TypeScript library for scraping TV channel listings from various providers
50 lines (49 loc) • 1.94 kB
TypeScript
/**
* Core scraping utility module for channel listing extraction
* Provides a robust framework for scraping channel information from various providers
* Features include:
* - Automated browser setup and cleanup
* - Request interception and resource blocking
* - Retry mechanism with exponential backoff
* - Channel name normalization
* - Configurable error handling
* - Structured output generation
*/
import playwright from 'playwright';
/**
* Represents a TV channel with its number and standardized name
* @property {string} number - The channel number in the provider's lineup
* @property {string} name - The standardized channel name across providers
*/
export interface Channel {
number: string;
name: string;
}
/**
* Configuration for a provider-specific scraper
* @property {string} url - The URL to scrape channel information from
* @property {Function} scrapeFunction - Provider-specific function to extract channel data
* @property {Record<string, string>} [overrides] - Channel name standardization mappings
* @property {string} outputFile - Name of the JSON file to store results
* @property {Function} [runCustom] - Optional custom run function for special cases
*/
export interface ScraperConfig {
url: string;
scrapeFunction: (page: playwright.Page) => Promise<Partial<Channel>[]>;
overrides?: Record<string, string>;
outputFile?: string;
runCustom?: (options: {
writeFiles: boolean;
}) => Promise<Channel[]>;
}
/**
* Executes a scraper with the given configuration
* @param {ScraperConfig} config - Scraper configuration
* @returns {Promise<Channel[]>} Array of scraped channels
*/
export declare function runScraper(config: ScraperConfig): Promise<Channel[]>;
/**
* Execute a scraper configuration from the command line
* Handles argument parsing and output formatting
*/
export declare function runScraperCLI(config: ScraperConfig): Promise<void>;