epg-grabber
Version:
Node.js CLI tool for grabbing EPG from different sites
240 lines (237 loc) • 9.88 kB
JavaScript
import { n as name, v as version, d as description, p as parseNumber, l as loadJs, a as parseProxy, E as EPGGrabberMock, b as EPGGrabber, c as defaultConfig, i as isObject, g as getAbsPath, e as getUTCDate } from './index-Dm69fQTU.js';
import { Collection, Template } from '@freearhey/core';
import { Command, Option } from 'commander';
import { SocksProxyAgent } from 'socks-proxy-agent';
import { CurlGenerator } from 'curl-generator';
import winston from 'winston';
import path from 'path';
import { TaskQueue } from 'cwait';
import merge from 'lodash.merge';
import Promise$1 from 'bluebird';
import path$1 from 'node:path';
import { glob } from 'glob';
import fs from 'fs-extra';
import pako from 'pako';
import 'dayjs/plugin/utc.js';
import 'dayjs';
import 'node:url';
import 'axios-mock-adapter';
import 'lodash.padstart';
import 'axios';
import 'axios-cache-interceptor';
import 'xml-js';
const { combine, timestamp, printf } = winston.format;
class Logger {
#logger;
constructor(options) {
options = options || {};
const fileFormat = printf(({ level, message, timestamp: timestamp2 }) => {
return `[${timestamp2}] ${level.toUpperCase()}: ${message}`;
});
const templateFunction = (info) => {
if (info.level === "error") return ` Error: ${info.message}`;
if (typeof info.message === "string") return info.message;
return "";
};
const consoleFormat = printf(templateFunction);
const transports = [
new winston.transports.Console({ format: consoleFormat })
];
if (options.log) {
transports.push(
new winston.transports.File({
filename: path.resolve(options.log),
format: combine(timestamp(), fileFormat),
options: { flags: "w" }
})
);
}
this.#logger = winston.createLogger({
level: options.logLevel,
transports
});
}
info(message) {
this.#logger.info(message);
}
debug(message) {
this.#logger.debug(message);
}
error(message) {
this.#logger.error(message);
}
}
const program = new Command();
program.name(name).version(version, "-v, --version").description(description).addOption(
new Option("-c, --config <config>", "Path to [site].config.js file").makeOptionMandatory()
).addOption(new Option("-o, --output <output>", "Path to output file")).addOption(new Option("-x, --proxy <url>", "Use the specified proxy")).addOption(new Option("--channels <channels>", "Path to list of channels")).addOption(
new Option("--days <days>", "Number of days for which to grab the program").argParser(
parseNumber
)
).addOption(
new Option("--delay <delay>", "Delay between requests (in milliseconds)").argParser(parseNumber)
).addOption(
new Option("--timeout <timeout>", "Set a timeout for each request (in milliseconds)").argParser(
parseNumber
)
).addOption(
new Option(
"--max-connections <maxConnections>",
"Set a limit on the number of concurrent requests per site"
).argParser(parseNumber)
).addOption(
new Option(
"--cache-ttl <cacheTtl>",
"Maximum time for storing each request (in milliseconds)"
).argParser(parseNumber)
).addOption(new Option("--gzip", "Compress the output")).addOption(new Option("--debug", "Enable debug mode")).addOption(new Option("--curl", "Display request as CURL")).addOption(new Option("--log <log>", "Path to log file")).addOption(new Option("--log-level <level>", "Set log level")).parse(process.argv);
const options = program.opts();
const logger = new Logger({
log: options.log,
logLevel: options.debug === true ? "debug" : options.logLevel
});
async function main() {
logger.info("Starting...");
logger.info(`Loading '${options.config}'...`);
let config = await loadJs(options.config);
config.channels = Array.isArray(config.channels) ? config.channels : typeof config.channels === "string" ? [config.channels] : [];
if (typeof options.cacheTtl === "number")
config = merge(config, { request: { cache: { ttl: options.cacheTtl } } });
if (typeof options.timeout === "number")
config = merge(config, { request: { timeout: options.timeout } });
if (options.proxy !== void 0) {
const proxy = parseProxy(options.proxy);
if (proxy.protocol && ["socks", "socks5", "socks5h", "socks4", "socks4a"].includes(String(proxy.protocol))) {
const socksProxyAgent = new SocksProxyAgent(options.proxy);
config = merge(config, {
request: { httpAgent: socksProxyAgent, httpsAgent: socksProxyAgent }
});
} else {
config = merge(config, { request: { proxy } });
}
}
if (typeof options.channels === "string") config.channels = await glob(options.channels);
if (typeof options.output === "string") config.output = options.output;
if (typeof options.days === "number") config.days = options.days;
if (typeof options.delay === "number") config.delay = options.delay;
if (typeof options.maxConnections === "number") config.maxConnections = options.maxConnections;
if (typeof options.debug === "boolean") config.debug = options.debug;
if (typeof options.curl === "boolean") config.curl = options.curl;
if (typeof options.gzip === "boolean") config.gzip = options.gzip;
logger.debug(`Config: ${JSON.stringify(config, null, 2)}`);
const grabber = process.env.NODE_ENV === "test" ? new EPGGrabberMock(config) : new EPGGrabber(config);
grabber.client.instance.interceptors.request.use(
(request) => {
logger.debug(`Request: ${JSON.stringify(request, null, 2)}`);
const curl = config.curl || defaultConfig.curl;
if (curl) {
const url = request.url || "";
const method = request.method ? request.method : "GET";
const headers = request.headers ? request.headers.toJSON() : void 0;
const body = request.data ? request.data : void 0;
const curl2 = CurlGenerator({ url, method, headers, body });
logger.info(curl2);
}
return request;
},
(error) => Promise$1.reject(error)
);
grabber.client.instance.interceptors.response.use(
(response) => {
const data = response.data ? isObject(response.data) || Array.isArray(response.data) ? JSON.stringify(response.data) : response.data.toString() : void 0;
logger.debug(
`Response: ${JSON.stringify(
{
headers: response.headers,
data,
cached: response.cached
},
null,
2
)}`
);
return response;
},
(error) => Promise$1.reject(error)
);
if (!Array.isArray(config.channels) || !config.channels.length)
throw new Error('Path to "*.channels.xml" is missing');
const channels = new Collection();
const rootDir = options.channels ? process.cwd() : path$1.dirname(options.config);
config.channels.forEach((filepath) => {
const absFilepath = getAbsPath(filepath, rootDir);
logger.debug(`Loading "${absFilepath}"...`);
const channelsXML = fs.readFileSync(absFilepath, "utf8");
const channelsFromXML = EPGGrabber.parseChannelsXML(channelsXML);
channels.concat(new Collection(channelsFromXML));
});
if (channels.isEmpty()) throw new Error("No channels found");
const days = config.days || defaultConfig.days;
const maxConnections = config.maxConnections || defaultConfig.maxConnections;
const gzip = config.gzip || defaultConfig.gzip;
const defaultOutput = gzip ? defaultConfig.output + ".gz" : defaultConfig.output;
const output = config.output || defaultOutput;
const template = new Template(output);
const variables = template.variables();
const groups = channels.groupBy((channel) => {
let groupId = "";
for (const key in channel) {
if (variables.includes(key)) {
const obj = channel.toObject();
groupId += obj[key];
}
}
return groupId;
});
logger.info("Processing...");
for (const groupId of groups.keys()) {
const group = groups.get(groupId);
const groupChannels = new Collection(group);
let programs = new Collection();
let index = 1;
const total = groupChannels.count() * days;
const utcDate = getUTCDate(process.env.CURR_DATE);
const dates = Array.from({ length: days }, (_, i) => utcDate.add(i, "d"));
let queue = new Collection();
groupChannels.forEach((channel) => {
for (let date of dates) {
queue.add({ channel, date });
}
});
const taskQueue = new TaskQueue(Promise$1, maxConnections);
const requests = queue.map(
taskQueue.wrap(async (queueItem) => {
const { channel, date } = queueItem;
if (!channel.logo) {
channel.logo = await grabber.loadLogo(channel, date);
}
const _programs = await grabber.grab(channel, date, (context, error) => {
const { channel: channel2, date: date2, programs: programs2 } = context;
logger.info(
`[${index}/${total}] ${channel2.site} - ${channel2.xmltv_id || channel2.site_id} - ${date2.format("MMM D, YYYY")} (${programs2.length} programs)`
);
if (error) logger.error(error.message);
if (index < total) index++;
});
programs.concat(new Collection(_programs));
})
);
await Promise$1.all(requests.all());
const headers = { date: utcDate.format("YYYYMMDD") };
const xml = EPGGrabber.generateXMLTV(groupChannels.all(), programs.all(), headers);
const channelSample = groupChannels.sample();
let outputPath = template.format(channelSample.toObject());
const outputDir = path$1.dirname(outputPath);
fs.mkdirSync(outputDir, { recursive: true });
if (gzip) {
const compressed = pako.gzip(xml);
fs.writeFileSync(outputPath, compressed);
} else {
fs.writeFileSync(outputPath, xml);
}
logger.info(`File '${outputPath}' successfully saved`);
}
logger.info("Finished");
}
main();