UNPKG

cleanview

Version:

Clean the content of html articles

197 lines (196 loc) 6.29 kB
"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.clean = clean; const valid_tags_1 = __importDefault(require("../defaults/valid-tags")); const forbidden_classes_1 = __importDefault(require("../defaults/forbidden-classes")); const helpers_1 = require("./helpers"); const VALID_TAGS_SECOND_TRY = [...valid_tags_1.default, "header"]; const ATTRIBUTES_TO_KEEP = { IMAGE: ["src", "title", "alt", "data-src", "srcset", "data-srcset"], LINK: ["href", "title"], SOURCE: ["srcset"], YOUTUBE: ["src", "width", "height", "allowfullscreen", "frameborder"], OTHER: [], INVALID: [], }; function clean(json, options) { options = options || {}; json = addFlags(json, options); json = cleanOuterToInner(json, options); json = cleanInnerToOuter(json, options); return json; } function addFlags(json, options) { json = addFlagForPre(json, options); return json; } function addFlagForPre(json, options) { return json.map((e) => iterateChildren(e, options, (child, _, parent) => { if (parent.tagName === "pre" || parent.insidePre) { child.insidePre = true; } return child; })); } function iterateChildren(element, options, func) { if (!element) return element; if (!(0, helpers_1.isNode)(element)) return element; if (!element.children) return element; if (!element.children.length) return element; element.children = element.children.map((child) => { const modified = func(child, options, element); iterateChildren(child, options, func); return modified; }); return element; } function cleanOuterToInner(json, options) { json = json .filter((e) => filterComments(e, options)) .filter((e) => filterSpaces(e, options)) .filter((e) => filterTags(e, options)) .filter((e) => filterClasses(e, options)) .map((e) => cleanAttributes(e, options)) .map((e) => passToChildren(e, options, cleanOuterToInner)); return json; } function cleanInnerToOuter(json, options) { json = json .map((e) => passToChildren(e, options, cleanInnerToOuter)) .filter((e) => filterEmptyNodes(e, options)); return json; } function filterEmptyNodes(e, _options) { if ((0, helpers_1.isComment)(e)) return false; if ((0, helpers_1.isText)(e)) return true; if (e.tagName == "img") return true; if (e.tagName == "iframe") return true; if (e.tagName == "br") return true; if (e.tagName == "hr") return true; if (!e.children) return true; return e.children.length > 0; } function filterComments(e, _options) { return !(0, helpers_1.isComment)(e); } function filterSpaces(e, _options) { // do not remove spaces when inside a <pre> tag if (e.insidePre) return true; const blankSpace = (0, helpers_1.isText)(e) && e.content.trim() == ""; return !blankSpace; } function filterTags(e, options) { if ((0, helpers_1.isText)(e)) return true; if ((0, helpers_1.isComment)(e)) return false; const TAGS = options.secondTry ? VALID_TAGS_SECOND_TRY : valid_tags_1.default; const aditionalTags = options.includeTags || []; const tags = [...TAGS, ...aditionalTags]; const tag = (e.tagName || "").toLowerCase(); const isValidTag = tags.indexOf(tag) > -1; return isValidTag; } function filterClasses(e, options) { if (options.includeClasses) return true; const forbiddenClasses = options.forbiddenClasses || []; const FORBIDDEN = [...forbidden_classes_1.default, ...forbiddenClasses]; const className = getClass(e); let found = false; FORBIDDEN.forEach(function (forbidden) { if (className.indexOf(forbidden) > -1) { found = true; } }); return !found; } function getClass(e) { return getProp(e, "class").toLowerCase(); } function getProp(e, prop) { if (!(0, helpers_1.isNode)(e)) return ""; if (!e.attributes) return ""; const pair = e.attributes.find((a) => a.key === prop); if (pair) return String(pair.value); return ""; } function passToChildren(e, options, func) { if (!(0, helpers_1.isNode)(e)) return e; if ((0, helpers_1.isNodeWithChildren)(e)) { e.children = func(e.children, options, func); } return e; } function cleanAttributes(e, _options) { if (!(0, helpers_1.isNode)(e)) return e; const type = getElementType(e); const attributeList = ATTRIBUTES_TO_KEEP[type]; keepAttributes(e, attributeList); // make sure invalid elements don't get rendered to html if (type === "INVALID") { e.tagName = "div"; e.children = []; } if (type === "LINK") { e.attributes.push({ key: "target", value: "_blank" }); } if (type === "IMAGE") { mirrorAttribute(e, "data-src", "src"); mirrorAttribute(e, "data-srcset", "srcset"); } return e; } function mirrorAttribute(e, source, target) { const sourceValue = getProp(e, source); const targetValue = getProp(e, target); if (sourceValue && !targetValue) { e.attributes.push({ key: target, value: sourceValue }); } } function getElementType(e) { if (e.tagName === "img") return "IMAGE"; if (e.tagName === "a") return "LINK"; if (e.tagName === "source") return "SOURCE"; const isIFrame = e.tagName === "iframe"; if (isIFrame) { const src = getProp(e, "src"); // TODO: add support to other platforms const isYoutube = src.indexOf("youtube.com") > 0 || src.indexOf("youtu.be") > 0; if (isYoutube) return "YOUTUBE"; } // if is not a youtube video, but is still an iframe, return invalid if (isIFrame) return "INVALID"; return "OTHER"; } function keepAttributes(e, list) { e.attributes = e.attributes .map((a) => ({ key: a.key.toLowerCase(), value: a.value })) .filter((attr) => attr.value && list.includes(attr.key)); return e; }