@phunky/scrape-channel-listings
Version:
A TypeScript library for scraping TV channel listings from various providers
53 lines (52 loc) • 1.89 kB
JavaScript
;
/**
* Virgin Media UK Channel Listing Scraper
* Extracts channel numbers and names from Virgin Media's channel lineup
*/
Object.defineProperty(exports, "__esModule", { value: true });
const scraper_1 = require("../utils/scraper");
/**
* Extracts channel information from the Virgin Media channel listing page
* @param page - Playwright page instance
* @returns Array of partial channel objects containing number and name
*/
const scrapeFunction = async (page) => {
return await page.$$eval('table tbody tr', (rows) => {
return rows.map((row) => {
// Skip regional variants (e.g., "In Wales")
const region = row.querySelector('.column-3')?.textContent?.trim() || '';
const number = row.querySelector('.column-1')?.textContent?.trim() || '';
const name = row.querySelector('.column-2')?.textContent?.trim() || '';
// Skip those with dashes in numbers as these are category definitions
// Region variants are also skipped these start with In
if (!name || !number || number.includes('-') || region.startsWith('In')) {
return {};
}
return { number, name };
});
});
};
/**
* Channel name overrides to standardize naming across providers
*/
const overrides = {
'TNT SPORTS ULTIMATE': 'TNT Ultimate',
'ITV1/STV/UTV': 'ITV1',
'SKY CINEMA SCI-FI & HORROR HD': 'SKY CINEMA SCFI/HORROR',
'5 HD': 'Channel5 HD',
'5+1': 'Channel5+1',
};
/**
* Virgin Media scraper configuration
*/
const config = {
url: 'https://rxtvinfo.com/virgin-media-channel-list-uk/',
scrapeFunction,
overrides,
outputFile: 'virgin.json'
};
// Run scraper if this file is executed directly
if (require.main === module) {
(0, scraper_1.runScraperCLI)(config).catch(() => process.exit(1));
}
exports.default = config;