UNPKG

@phunky/scrape-channel-listings

Version:

A TypeScript library for scraping TV channel listings from various providers

57 lines (56 loc) 1.97 kB
"use strict"; /** * DIRECTV Channel Listing Scraper * Extracts channel numbers and names from DIRECTV's channel lineup */ Object.defineProperty(exports, "__esModule", { value: true }); const scraper_1 = require("../utils/scraper"); /** * Extracts channel information from the DIRECTV channel listing page * @param page - Playwright page instance * @returns Array of partial channel objects containing number and name */ const scrapeFunction = async (page) => { return await page.$$eval('table tr', (rows) => { // Skip header row and process each channel row return rows.slice(1).map((row) => { let number = row.querySelector('td:nth-child(2)')?.textContent?.trim() || ''; const name = row.querySelector('td:nth-child(1)')?.textContent?.trim() || ''; // Handle timeshift channels, they use the same number but the website displays the timeshift with them. if (number.includes('-')) { number = number.split('-')[0]; } // Handle channels with commas, these have multiple numbers but we only want the first one. if (number.includes(',')) { number = number.split(',')[0]; } if (!name || !number) { return {}; } return { number, name }; }); }); }; /** * Channel name overrides to standardize naming across providers */ const overrides = { 'CHEDDAR NEWS8': 'CHEDDAR NEWS', 'DIRECTV 4K 1': 'DIRECTV 4K', 'DIRECTV 4K LIVE 1': 'DIRECTV 4K LIVE', 'DIRECTV 4K LIVE 2 1': 'DIRECTV 4K LIVE 2', }; /** * DIRECTV scraper configuration */ const config = { url: 'https://www.usdirect.com/channels', scrapeFunction, overrides, outputFile: 'directv.json' }; // Run scraper if this file is executed directly if (require.main === module) { (0, scraper_1.runScraperCLI)(config).catch(() => process.exit(1)); } exports.default = config;