@phunky/scrape-channel-listings
Version:
A TypeScript library for scraping TV channel listings from various providers
168 lines (167 loc) • 5.95 kB
JavaScript
;
/**
* Main entry point for the channel listing scraper.
* This file exposes both the library API and CLI functionality.
*/
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.scrapeAllProviders = scrapeAllProviders;
exports.scrapeProvider = scrapeProvider;
const scraper_1 = require("./utils/scraper");
const virgin_1 = __importDefault(require("./scrapers/virgin"));
const sky_1 = __importDefault(require("./scrapers/sky"));
const sky_ireland_1 = __importDefault(require("./scrapers/sky-ireland"));
const sky_satellite_1 = __importDefault(require("./scrapers/sky-satellite"));
const freesat_1 = __importDefault(require("./scrapers/freesat"));
const directv_1 = __importDefault(require("./scrapers/directv"));
const dish_1 = __importDefault(require("./scrapers/dish"));
const fileUtils_1 = require("./utils/fileUtils");
const args_1 = require("./utils/args");
const providers = {
directv: directv_1.default,
dish: dish_1.default,
virgin: virgin_1.default,
sky: sky_1.default,
skyireland: sky_ireland_1.default,
skysatellite: sky_satellite_1.default,
freesat: freesat_1.default
};
/**
* Scrapes channel listings from all configured providers.
* @param options Optional configuration for the scraping process
* @returns Promise resolving to either an array of provider channels or a summary object
*/
async function scrapeAllProviders(options) {
const startTime = Date.now();
const results = [];
const envConcurrent = process.env.CONCURRENT_SCRAPERS ? parseInt(process.env.CONCURRENT_SCRAPERS, 10) : undefined;
const maxConcurrent = options?.maxConcurrent || envConcurrent || 10;
// Process providers in batches to control concurrency
const providerEntries = Object.entries(providers);
for (let i = 0; i < providerEntries.length; i += maxConcurrent) {
const batch = providerEntries.slice(i, i + maxConcurrent);
const batchResults = await Promise.all(batch.map(async ([name, config]) => {
const start = Date.now();
try {
const channels = await (0, scraper_1.runScraper)(config);
return {
name,
success: true,
duration: Date.now() - start,
channelCount: channels.length,
channels
};
}
catch (error) {
return {
name,
success: false,
duration: Date.now() - start,
error: error
};
}
}));
results.push(...batchResults);
}
const totalDuration = Date.now() - startTime;
const failedScrapers = results.filter(r => !r.success);
const successRate = `${((results.length - failedScrapers.length) / results.length * 100).toFixed(1)}%`;
const totalChannels = results.reduce((sum, r) => sum + (r.channelCount || 0), 0);
if (options?.writeFiles) {
return {
results,
totalDuration,
successRate,
totalChannels,
failedScrapers
};
}
// Transform results into ProviderChannels array
return results
.filter(result => result.success && result.channels)
.map(result => ({
provider: result.name,
channels: result.channels
}));
}
/**
* Scrapes channel listings from a specific provider.
* @param providerName Name of the provider to scrape
* @param options Optional configuration for the scraping process
* @returns Promise resolving to a ScraperResult object
* @throws Error if provider is not found
*/
async function scrapeProvider(providerName, options) {
const config = providers[providerName];
if (!config) {
throw new Error(`Provider "${providerName}" not found`);
}
const start = Date.now();
try {
const channels = await (0, scraper_1.runScraper)(config);
return {
name: providerName,
success: true,
duration: Date.now() - start,
channelCount: channels.length,
channels
};
}
catch (error) {
return {
name: providerName,
success: false,
duration: Date.now() - start,
error: error
};
}
}
// CLI functionality
if (require.main === module) {
const args = (0, args_1.parseArgs)();
const options = {
writeFiles: args.writeFiles,
maxConcurrent: args.maxConcurrent
};
if (args.provider) {
// Scrape specific provider
scrapeProvider(args.provider, options)
.then(result => {
if (result.success && result.channels) {
if (args.writeFiles) {
(0, fileUtils_1.writeResultsToFiles)([result]);
}
else {
console.log(JSON.stringify(result.channels, null, 2));
}
}
else {
console.error(`Failed to scrape ${args.provider}:`, result.error);
process.exit(1);
}
})
.catch(error => {
console.error(`Error scraping ${args.provider}:`, error);
process.exit(1);
});
}
else {
// Scrape all providers
scrapeAllProviders(options)
.then(results => {
if (args.writeFiles) {
(0, fileUtils_1.writeResultsToFiles)(results.results);
}
else {
console.log(JSON.stringify(results, null, 2));
}
})
.catch(error => {
console.error('Error scraping providers:', error);
process.exit(1);
});
}
}