UNPKG

@phunky/scrape-channel-listings

Version:

A TypeScript library for scraping TV channel listings from various providers

168 lines (167 loc) 5.95 kB
#!/usr/bin/env node "use strict"; /** * Main entry point for the channel listing scraper. * This file exposes both the library API and CLI functionality. */ var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.scrapeAllProviders = scrapeAllProviders; exports.scrapeProvider = scrapeProvider; const scraper_1 = require("./utils/scraper"); const virgin_1 = __importDefault(require("./scrapers/virgin")); const sky_1 = __importDefault(require("./scrapers/sky")); const sky_ireland_1 = __importDefault(require("./scrapers/sky-ireland")); const sky_satellite_1 = __importDefault(require("./scrapers/sky-satellite")); const freesat_1 = __importDefault(require("./scrapers/freesat")); const directv_1 = __importDefault(require("./scrapers/directv")); const dish_1 = __importDefault(require("./scrapers/dish")); const fileUtils_1 = require("./utils/fileUtils"); const args_1 = require("./utils/args"); const providers = { directv: directv_1.default, dish: dish_1.default, virgin: virgin_1.default, sky: sky_1.default, skyireland: sky_ireland_1.default, skysatellite: sky_satellite_1.default, freesat: freesat_1.default }; /** * Scrapes channel listings from all configured providers. * @param options Optional configuration for the scraping process * @returns Promise resolving to either an array of provider channels or a summary object */ async function scrapeAllProviders(options) { const startTime = Date.now(); const results = []; const envConcurrent = process.env.CONCURRENT_SCRAPERS ? parseInt(process.env.CONCURRENT_SCRAPERS, 10) : undefined; const maxConcurrent = options?.maxConcurrent || envConcurrent || 10; // Process providers in batches to control concurrency const providerEntries = Object.entries(providers); for (let i = 0; i < providerEntries.length; i += maxConcurrent) { const batch = providerEntries.slice(i, i + maxConcurrent); const batchResults = await Promise.all(batch.map(async ([name, config]) => { const start = Date.now(); try { const channels = await (0, scraper_1.runScraper)(config); return { name, success: true, duration: Date.now() - start, channelCount: channels.length, channels }; } catch (error) { return { name, success: false, duration: Date.now() - start, error: error }; } })); results.push(...batchResults); } const totalDuration = Date.now() - startTime; const failedScrapers = results.filter(r => !r.success); const successRate = `${((results.length - failedScrapers.length) / results.length * 100).toFixed(1)}%`; const totalChannels = results.reduce((sum, r) => sum + (r.channelCount || 0), 0); if (options?.writeFiles) { return { results, totalDuration, successRate, totalChannels, failedScrapers }; } // Transform results into ProviderChannels array return results .filter(result => result.success && result.channels) .map(result => ({ provider: result.name, channels: result.channels })); } /** * Scrapes channel listings from a specific provider. * @param providerName Name of the provider to scrape * @param options Optional configuration for the scraping process * @returns Promise resolving to a ScraperResult object * @throws Error if provider is not found */ async function scrapeProvider(providerName, options) { const config = providers[providerName]; if (!config) { throw new Error(`Provider "${providerName}" not found`); } const start = Date.now(); try { const channels = await (0, scraper_1.runScraper)(config); return { name: providerName, success: true, duration: Date.now() - start, channelCount: channels.length, channels }; } catch (error) { return { name: providerName, success: false, duration: Date.now() - start, error: error }; } } // CLI functionality if (require.main === module) { const args = (0, args_1.parseArgs)(); const options = { writeFiles: args.writeFiles, maxConcurrent: args.maxConcurrent }; if (args.provider) { // Scrape specific provider scrapeProvider(args.provider, options) .then(result => { if (result.success && result.channels) { if (args.writeFiles) { (0, fileUtils_1.writeResultsToFiles)([result]); } else { console.log(JSON.stringify(result.channels, null, 2)); } } else { console.error(`Failed to scrape ${args.provider}:`, result.error); process.exit(1); } }) .catch(error => { console.error(`Error scraping ${args.provider}:`, error); process.exit(1); }); } else { // Scrape all providers scrapeAllProviders(options) .then(results => { if (args.writeFiles) { (0, fileUtils_1.writeResultsToFiles)(results.results); } else { console.log(JSON.stringify(results, null, 2)); } }) .catch(error => { console.error('Error scraping providers:', error); process.exit(1); }); } }