epubinator
Version:
NPM package to generate epub files out of url
199 lines (198 loc) • 6.61 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.generateLink = exports.getMain = exports.removeTitle = exports.getTitle = exports.getBodyHtmlFromDom = exports.removeToc = exports.getArticle = exports.getDom = void 0;
var tslib_1 = require("tslib");
var node_fetch_1 = require("node-fetch");
var jsdom_1 = require("jsdom");
var jsdom_2 = require("./util/jsdom");
var logger_1 = require("./logger");
var url_1 = require("./url");
var ramda_1 = require("ramda");
/**
* getDom
*
* @param url
* @returns Promise<JSDOM>
*/
function getDom(url) {
return tslib_1.__awaiter(this, void 0, void 0, function () {
var response, html, dom, pictures, images;
return tslib_1.__generator(this, function (_a) {
switch (_a.label) {
case 0: return [4 /*yield*/, (0, node_fetch_1.default)(url)];
case 1:
response = _a.sent();
return [4 /*yield*/, response.text()];
case 2:
html = _a.sent();
dom = new jsdom_1.JSDOM(html);
pictures = Array.from(dom.window.document.documentElement.querySelectorAll('picture'));
replacePicturesWithImages(pictures);
images = Array.from(dom.window.document.documentElement.querySelectorAll('img'));
(0, ramda_1.compose)(withoutAttributes(['srcset', 'loading']), linkImgSrcToCompleteLink(url))(images);
removeSvgImg(images);
return [2 /*return*/, dom];
}
});
});
}
exports.getDom = getDom;
var removeSvgImg = function (images) {
images.forEach(function (image) { return /data:image\/svg/i.test(image.src) && image.remove(); });
};
var replacePicturesWithImages = function (pictures) {
pictures.forEach(function (picture) {
var image = picture.querySelector('img');
var parentNode = picture.parentNode;
parentNode.insertBefore(image.cloneNode(true), picture);
picture.remove();
});
};
var withoutAttributes = function (attributes) { return function (images) {
var removeFrom = function (image) { return function (attribute) {
return image.removeAttribute(attribute);
}; };
images.forEach(function (image) {
attributes.forEach(removeFrom(image));
});
return images;
}; };
/**
* getArticle
*
* @param dom
* @param context
* @returns JSDOM object
*/
function getArticle(dom, context) {
if (context === void 0) { context = {}; }
var document = (0, jsdom_2.getDocument)(dom);
var article = document.querySelector('article') || document.querySelector('body');
if (!article) {
throw new Error("cannot find article. describe the article explicitly ".concat(JSON.stringify(context)));
}
return new jsdom_1.JSDOM(article.outerHTML);
}
exports.getArticle = getArticle;
function removeToc(dom) {
var document = (0, jsdom_2.getDocument)(dom);
var toc = document.querySelector('#toc') || document.querySelector('aside');
if (toc)
toc.remove();
return new jsdom_1.JSDOM(document.documentElement.outerHTML);
}
exports.removeToc = removeToc;
/**
* getBodyHtmlFromDom
*
* @param {JSDOM} dom
* @returns {string}
*/
function getBodyHtmlFromDom(dom) {
if (!dom)
return '';
return dom.window.document.querySelector('body').innerHTML;
}
exports.getBodyHtmlFromDom = getBodyHtmlFromDom;
/**
* getTitle
*
* @param {JSDOM} dom
* @param {ContextType} context={}
* @returns {string}
*/
function getTitle(dom, context) {
if (context === void 0) { context = {}; }
var document = (0, jsdom_2.getDocument)(dom);
var titleElement = document.querySelector('h1');
if (!titleElement) {
console.log("cannot find title at ".concat(context.url));
return '';
}
return titleElement.innerHTML;
}
exports.getTitle = getTitle;
/**
* removeTitle
*
* @param {JSDOM} dom
* @returns {JSDOM}
*/
function removeTitle(dom) {
// TODO: immutability
var document = (0, jsdom_2.getDocument)(dom);
var titleElement = document.querySelector('h1');
if (!titleElement) {
return dom;
}
titleElement.remove();
return dom;
}
exports.removeTitle = removeTitle;
function getFallbackTitleContent(dom) {
var title = (0, jsdom_2.getDocument)(dom).querySelector('h1') || (0, jsdom_2.getDocument)(dom).querySelector('h2');
return title === null || title === void 0 ? void 0 : title.parentElement;
}
/**
* getMain
*
* @param {JSDOM} dom
* @param {ContextType} context={}
* @returns {JSDOM}
*/
function getMain(dom, context) {
if (context === void 0) { context = {}; }
var document = (0, jsdom_2.getDocument)(dom);
var fallbackTitleContent = getFallbackTitleContent(dom);
var main = document.querySelector('main') ||
document.querySelector('.content') ||
document.querySelector("[role='main']") ||
document.querySelector('#main') ||
fallbackTitleContent ||
document.querySelector('body');
if (!main) {
throw new Error("cannot find main. describe the main explicitly at ".concat(context.url, " ").concat(dom.window.document.documentElement.outerHTML));
}
return new jsdom_1.JSDOM(main.outerHTML);
}
exports.getMain = getMain;
var linkImgSrcToCompleteLink = function (url) { return function (images) {
Array.from(images).forEach(function (image) {
if ((0, url_1.isAbsoluteHref)(image.src)) {
image.src = (0, url_1.absoluteToLink)({ url: url })(image.src);
}
return image;
});
return images;
}; };
/**
* generateLink
*
* @param {string} origin
* @param {string} link
* @returns {string}
*/
function generateLink(url, link) {
if (!link)
return;
(0, logger_1.log)((0, logger_1.info)('Generating link at'), (0, logger_1.success)('origin:'), (0, logger_1.emphasizedInfo)((0, url_1.origin)(url)), logger_1.lineBreak, (0, logger_1.success)('link:'), (0, logger_1.emphasizedInfo)(link));
(0, logger_1.log)((0, logger_1.error)(url));
try {
new URL(link);
return link;
}
catch (e) {
if ((0, url_1.isAbsoluteHref)(link)) {
return (0, url_1.absoluteToLink)({ url: url })(link);
}
return "".concat(stripCurrentPageFromUrl(url), "/").concat(link);
}
}
exports.generateLink = generateLink;
var stripCurrentPageFromUrl = function (url) {
return (url
// .replace(/(^\w+:|^)\/\//, '')
.split('/')
.slice(0, -1)
.join('/'));
};