guardian-epub
Version:
Creates an ePub of the Guardian newspaper from the Guardian API
329 lines (280 loc) • 9.33 kB
JavaScript
import axios from "axios";
import ora from "ora";
import { JSDOM } from "jsdom";
import Epub from "epub-gen";
import yargs from "yargs/yargs";
import { hideBin } from "yargs/helpers";
import { formatISO, parseISO } from "date-fns";
import os from "os";
import { format, utcToZonedTime } from "date-fns-tz";
import { mkdirSync, existsSync } from "fs";
import { fileURLToPath } from "url";
import path, { dirname, join } from "path";
import Enquirer from "enquirer";
// import { createRequire } from "module";
import { getApiKey, loadSections, saveSettings } from "./utils/files.js";
import { sortArrayByDefaultArray } from "./utils/sort.js";
import { createTextImage } from "./utils/images.js";
const { MultiSelect, Sort } = Enquirer; // Can't import these directly in one step for some reason
// const dynamicRequire = createRequire(import.meta.url);
// const { MultiSelect, Sort } = dynamicRequire("enquirer");
// Get the current date and time
const now = new Date();
const timeZone = Intl.DateTimeFormat().resolvedOptions().timeZone;
const zonedTime = utcToZonedTime(now, timeZone);
const dateString = format(zonedTime, "yyyy-MM-dd");
const timeString = format(zonedTime, "HHmm");
const dayOfWeek = format(zonedTime, "EEEE");
const timeStringDisplay = format(zonedTime, "HH:mm");
const userHomeDir = os.homedir();
const configDir = path.join(userHomeDir, ".guardianEpub");
if (!existsSync(configDir)) {
mkdirSync(configDir);
}
const argv = yargs(hideBin(process.argv)).argv;
const API_KEY = await getApiKey();
if (!API_KEY) {
console.log(
"API key file not found. Please run command 'guardianEpubKey' to initialise. You will need your Guardian API key ready to paste in.",
);
process.exit(1);
}
async function reorderSections(sections) {
const prompt = new Sort({
name: "order",
message: "Reorder the sections (use arrow keys and space to select):",
choices: sections,
});
const result = await prompt.run();
return result;
}
function createUrlToFileMap(articlesBySection) {
const urlToFileMap = {};
let fileIndex = 1;
articlesBySection.forEach(sectionGroup => {
sectionGroup.articles.forEach(article => {
const titlePart = article.webTitle
.toLowerCase()
.replace(/\s+/g, "-")
.replace(/[^a-z0-9-]+/g, "")
.replace(/-+/g, "-")
.replace(/^-|-$/g, "");
const filename = `${fileIndex}_${titlePart}.xhtml`;
urlToFileMap[article.webUrl] = filename;
fileIndex++;
});
});
return urlToFileMap;
}
function updateArticleLinks(articleContent, urlToFileMap) {
// If we've pulled a page down locally, then change URLs to point to there and
// not to the online version
const dom = new JSDOM(articleContent);
const document = dom.window.document;
const links = document.querySelectorAll("a");
links.forEach(link => {
const href = link.href;
if (href && href.includes("theguardian.com") && urlToFileMap[href]) {
link.href = urlToFileMap[href];
}
});
return dom.serialize();
}
async function fetchSections() {
try {
const response = await axios.get(
"https://content.guardianapis.com/sections",
{
params: {
"api-key": API_KEY,
},
},
);
return response.data.response.results.map(section => section.id);
} catch (error) {
console.error("Error fetching sections:", error);
return [];
}
}
async function selectSections(sections, defaultSections = []) {
const choices = sections.map(section => ({
name: section,
value: section,
// Mark as selected if it's a default section
enabled: defaultSections.includes(section),
}));
const prompt = new MultiSelect({
name: "selectedSections",
limit: 15,
message: "Select sections to fetch articles from:",
choices,
result(names) {
return this.map(names);
},
});
try {
const answer = await prompt.run();
return answer;
} catch (error) {
console.error("Error: ", error);
return []; // Return an empty array in case of error or if the prompt was cancelled
}
}
async function fetchArticles(sections) {
let allArticlesBySection = [];
// const lastRunDate = parseISO(loadLastRunDate());
// const fromDate = lastRunDate
// ? formatISO(startOfDay(lastRunDate), { representation: "date" })
// : null;
for (const section of sections) {
try {
const params = {
"api-key": API_KEY,
"show-fields": "all",
};
// if (fromDate) {
// params["from-date"] = fromDate;
// }
const response = await axios.get(
`https://content.guardianapis.com/${section}`,
{ params },
);
allArticlesBySection.push({
section,
articles: response.data.response.results,
});
} catch (error) {
console.error(`Error fetching articles from section ${section}:`, error);
}
}
return allArticlesBySection;
}
async function createEpub(articlesBySection) {
const filename = `guardian-${dateString}-${timeString}.epub`;
const urlToFileMap = createUrlToFileMap(articlesBySection);
// Creating custom title for the ToC
const tocTitle = `The Guardian - ${dateString} ${timeStringDisplay}`;
// const content = [
// {
// title: tocTitle,
// data: `<h1>${tocTitle}</h1>`,
// excludeFromToc: true,
// },
// ];
const content = [];
// Process each section and its articles
articlesBySection.forEach(sectionGroup => {
// Create a section header with an articles array
let sectionHeader = {
title: sectionGroup.section.toUpperCase(),
data: `<h2 style="font-weight:bold;">${sectionGroup.section.toUpperCase()}</h2>`,
excludeFromToc: true,
articles: [],
};
// Add articles to the section
sectionGroup.articles.forEach(article => {
const updatedContent = updateArticleLinks(
article.fields.body,
urlToFileMap,
);
const articleFilename = urlToFileMap[article.webUrl];
let publishDate;
try {
publishDate = format(
parseISO(article?.fields?.firstPublicationDate),
"dd MMM",
);
} catch (e) {
publishDate = formatISO(new Date());
}
const articleItem = {
title: article.webTitle,
data: updatedContent,
author: article.fields.byline,
publishedDate: publishDate,
excludeFromToc: false,
filename: articleFilename,
};
// Add article to section's articles array
sectionHeader.articles.push(articleItem);
// Also add article to the main content array for epub-gen processing
content.push(articleItem);
});
// Add the section header to the main content array
content.push(sectionHeader);
});
const __dirname = dirname(fileURLToPath(import.meta.url));
const tocTemplatePath = join(__dirname, "guardian-toc-html.ejs");
const tocNcxTemplatePath = join(__dirname, "guardian-toc-ncx.ejs");
const coverPath = join(__dirname, "guardian-cover.jpg");
const title = `The Guardian ${dateString}:${timeStringDisplay}`;
await createTextImage(
coverPath,
"The Guardian",
`${dayOfWeek} ${dateString}:${timeStringDisplay}`,
);
// EPUB options including the custom ToC template path
const options = {
title,
author: "The Guardian",
content: content,
bookId: tocTitle,
cover: coverPath,
customHtmlTocTemplatePath: tocTemplatePath,
customNcxTocTemplatePath: tocNcxTemplatePath,
};
// Creating the EPUB
try {
await new Epub(options, filename).promise;
console.log(`EPUB file created successfully: ${filename}`);
} catch (error) {
console.error("Error creating EPUB file:", error);
}
}
async function main() {
let spinner = ora("Fetching sections from Guardian API...").start();
const sections = await fetchSections();
if (sections.length === 0) {
spinner.fail("No sections fetched.");
process.exit(1);
}
spinner.succeed("Sections fetched successfully.");
const defaultSections = loadSections();
// Check for --noselect switch
const noSelect = argv.noSelect || argv.noselect;
let userSortedSections;
if (!noSelect) {
// Prompt user to select and reorder sections
const selectedSections = await selectSections(sections, defaultSections);
if (selectedSections.length === 0) {
console.log("No sections selected.");
return;
}
const sectionsArray = Object.keys(selectedSections);
const sectionsArraySortedByDefault = sortArrayByDefaultArray(
sectionsArray,
defaultSections,
);
userSortedSections = await reorderSections(sectionsArraySortedByDefault);
saveSettings({ sections: userSortedSections });
} else {
// Use default sections without prompting
userSortedSections = defaultSections;
}
spinner = ora(
`${
noSelect
? "--noselect switch detected, so skipping sections select. "
: ""
} Fetching articles from Guardian API...`,
).start();
const articlesBySection = await fetchArticles(userSortedSections);
if (articlesBySection.length > 0) {
spinner.succeed("Articles fetched successfully.");
await createEpub(articlesBySection);
} else {
spinner.fail("No articles fetched.");
}
}
main();