UNPKG

truncate-html

Version:
197 lines (196 loc) 6.42 kB
import cheerio from "cheerio"; const astralRange = /\ud83c[\udffb-\udfff](?=\ud83c[\udffb-\udfff])|(?:[^\ud800-\udfff][\u0300-\u036f\ufe20-\ufe23\u20d0-\u20f0]?|[\u0300-\u036f\ufe20-\ufe23\u20d0-\u20f0]|(?:\ud83c[\udde6-\uddff]){2}|[\ud800-\udbff][\udc00-\udfff]|[\ud800-\udfff])[\ufe0e\ufe0f]?(?:[\u0300-\u036f\ufe20-\ufe23\u20d0-\u20f0]|\ud83c[\udffb-\udfff])?(?:\u200d(?:[^\ud800-\udfff]|(?:\ud83c[\udde6-\uddff]){2}|[\ud800-\udbff][\udc00-\udfff])[\ufe0e\ufe0f]?(?:[\u0300-\u036f\ufe20-\ufe23\u20d0-\u20f0]|\ud83c[\udffb-\udfff])?)*/g; const defaultOptions = { // remove all tags stripTags: false, // postfix of the string ellipsis: "...", // decode html entities decodeEntities: false, // whether truncate by words byWords: false, // // truncate by words, set to true keep words // // set to number then truncate by word count // length: 0 excludes: "", // remove tags customNodeStrategy: (n) => n, reserveLastWord: false, // keep word completed if truncate at the middle of the word, works no matter byWords is true/false trimTheOnlyWord: false, keepWhitespaces: false // even if set true, continuous whitespace will count as one }; let userDefaults = defaultOptions; function truncate(html, length, truncateOptions) { const options = sanitizeOptions(length, truncateOptions); if (!html || isNaN(options.length) || options.length < 1 || options.length === Infinity) { return isCheerioInstance(html) ? html.html() || "" : html; } let $; if (isCheerioInstance(html)) { $ = html; } else { $ = cheerio.load(`${html}`, { decodeEntities: options.decodeEntities }, false); } const $html = $.root(); if (options.excludes) $html.find(options.excludes).remove(); if (options.stripTags) { return truncateText($html.text(), options); } const travelChildren = function($ele, isParentLastNode = true) { const contents = $ele.contents(); const lastIdx = contents.length - 1; return contents.each(function(idx) { const nodeType = this.type; const node = $(this); if (nodeType === "text") { if (!options.limit) { node.remove(); return; } this.data = truncateText( node.text(), options, isParentLastNode && idx === lastIdx ); return; } if (nodeType === "tag") { if (!options.limit) { node.remove(); return; } const strategy = options.customNodeStrategy(node); if (strategy === "remove") { node.remove(); return; } if (strategy === "keep") { return; } travelChildren(strategy || node, isParentLastNode && idx === lastIdx); return; } node.remove(); return; }); }; travelChildren($html); return $html.html() || ""; } truncate.setup = function(options) { userDefaults = extendOptions(options, defaultOptions); }; function truncateText(text, options, isLastNode) { if (!options.keepWhitespaces) { text = text.replace(/\s+/g, " "); } const byWords = options.byWords; const astralSafeCharacterArray = text.match(astralRange); if (!astralSafeCharacterArray) { return ""; } const strLen = astralSafeCharacterArray.length; let idx = 0; let count = 0; let prevIsBlank = byWords; let curIsBlank = false; while (idx < strLen) { curIsBlank = isBlank(astralSafeCharacterArray[idx++]); if (byWords && prevIsBlank === curIsBlank) continue; if (count === options.limit) { if (prevIsBlank && curIsBlank) { prevIsBlank = curIsBlank; continue; } --idx; break; } if (byWords) { if (!curIsBlank) ++count; } else { if (!(curIsBlank && prevIsBlank)) ++count; } prevIsBlank = curIsBlank; } options.limit -= count; if (options.limit) { return text; } const str = byWords ? astralSafeCharacterArray.slice(0, idx).join("") : substr(astralSafeCharacterArray, idx, options); if (str === text) { return isLastNode ? text : text + options.ellipsis; } else { return str + options.ellipsis; } } function substr(astralSafeCharacterArray, len, options) { const sliced = astralSafeCharacterArray.slice(0, len).join(""); if (!options.reserveLastWord || astralSafeCharacterArray.length === len) { return sliced; } const boundary = astralSafeCharacterArray.slice(len - 1, len + 1).join(""); if (/\W/.test(boundary)) { return sliced; } if (typeof options.reserveLastWord === "number" && options.reserveLastWord < 0) { const result = sliced.replace(/\w+$/, ""); if (!(result.length === 0 && sliced.length === options.length)) { return result; } if (options.trimTheOnlyWord) return sliced; } const maxExceeded = options.reserveLastWord !== true && options.reserveLastWord > 0 ? options.reserveLastWord : 10; const mtc = astralSafeCharacterArray.slice(len).join("").match(/(\w+)/); const exceeded = mtc ? mtc[1] : ""; return sliced + exceeded.substring(0, maxExceeded); } function sanitizeOptions(length, truncateOptions) { switch (typeof length) { case "object": truncateOptions = length; break; case "number": if (typeof truncateOptions === "object") { truncateOptions.length = length; } else { truncateOptions = { length }; } } if (truncateOptions && truncateOptions.excludes) { if (!Array.isArray(truncateOptions.excludes)) { truncateOptions.excludes = [truncateOptions.excludes]; } truncateOptions.excludes = truncateOptions.excludes.join(","); } const options = extendOptions(Object.assign({}, userDefaults, truncateOptions), defaultOptions); options.limit = options.length; return options; } function isBlank(char) { return char === " " || char === "\f" || char === "\n" || char === "\r" || char === " " || char === "\v" || char === " " || char === "\u2028" || char === "\u2029"; } function extendOptions(options, defaultOptions2) { if (options == null) { options = {}; } for (const k in defaultOptions2) { const v = defaultOptions2[k]; if (options[k] != null) { continue; } options[k] = v; } return options; } function isCheerioInstance(elem) { return elem && elem.contains && elem.html && elem.parseHTML && true; } export { truncate as default };