truncate-html
Version:
Truncate HTML and Keep Tags
196 lines (195 loc) • 6.43 kB
JavaScript
;
const cheerio = require("cheerio");
const astralRange = /\ud83c[\udffb-\udfff](?=\ud83c[\udffb-\udfff])|(?:[^\ud800-\udfff][\u0300-\u036f\ufe20-\ufe23\u20d0-\u20f0]?|[\u0300-\u036f\ufe20-\ufe23\u20d0-\u20f0]|(?:\ud83c[\udde6-\uddff]){2}|[\ud800-\udbff][\udc00-\udfff]|[\ud800-\udfff])[\ufe0e\ufe0f]?(?:[\u0300-\u036f\ufe20-\ufe23\u20d0-\u20f0]|\ud83c[\udffb-\udfff])?(?:\u200d(?:[^\ud800-\udfff]|(?:\ud83c[\udde6-\uddff]){2}|[\ud800-\udbff][\udc00-\udfff])[\ufe0e\ufe0f]?(?:[\u0300-\u036f\ufe20-\ufe23\u20d0-\u20f0]|\ud83c[\udffb-\udfff])?)*/g;
const defaultOptions = {
// remove all tags
stripTags: false,
// postfix of the string
ellipsis: "...",
// decode html entities
decodeEntities: false,
// whether truncate by words
byWords: false,
// // truncate by words, set to true keep words
// // set to number then truncate by word count
// length: 0
excludes: "",
// remove tags
customNodeStrategy: (n) => n,
reserveLastWord: false,
// keep word completed if truncate at the middle of the word, works no matter byWords is true/false
trimTheOnlyWord: false,
keepWhitespaces: false
// even if set true, continuous whitespace will count as one
};
let userDefaults = defaultOptions;
function truncate(html, length, truncateOptions) {
const options = sanitizeOptions(length, truncateOptions);
if (!html || isNaN(options.length) || options.length < 1 || options.length === Infinity) {
return isCheerioInstance(html) ? html.html() || "" : html;
}
let $;
if (isCheerioInstance(html)) {
$ = html;
} else {
$ = cheerio.load(`${html}`, {
decodeEntities: options.decodeEntities
}, false);
}
const $html = $.root();
if (options.excludes) $html.find(options.excludes).remove();
if (options.stripTags) {
return truncateText($html.text(), options);
}
const travelChildren = function($ele, isParentLastNode = true) {
const contents = $ele.contents();
const lastIdx = contents.length - 1;
return contents.each(function(idx) {
const nodeType = this.type;
const node = $(this);
if (nodeType === "text") {
if (!options.limit) {
node.remove();
return;
}
this.data = truncateText(
node.text(),
options,
isParentLastNode && idx === lastIdx
);
return;
}
if (nodeType === "tag") {
if (!options.limit) {
node.remove();
return;
}
const strategy = options.customNodeStrategy(node);
if (strategy === "remove") {
node.remove();
return;
}
if (strategy === "keep") {
return;
}
travelChildren(strategy || node, isParentLastNode && idx === lastIdx);
return;
}
node.remove();
return;
});
};
travelChildren($html);
return $html.html() || "";
}
truncate.setup = function(options) {
userDefaults = extendOptions(options, defaultOptions);
};
function truncateText(text, options, isLastNode) {
if (!options.keepWhitespaces) {
text = text.replace(/\s+/g, " ");
}
const byWords = options.byWords;
const astralSafeCharacterArray = text.match(astralRange);
if (!astralSafeCharacterArray) {
return "";
}
const strLen = astralSafeCharacterArray.length;
let idx = 0;
let count = 0;
let prevIsBlank = byWords;
let curIsBlank = false;
while (idx < strLen) {
curIsBlank = isBlank(astralSafeCharacterArray[idx++]);
if (byWords && prevIsBlank === curIsBlank) continue;
if (count === options.limit) {
if (prevIsBlank && curIsBlank) {
prevIsBlank = curIsBlank;
continue;
}
--idx;
break;
}
if (byWords) {
if (!curIsBlank) ++count;
} else {
if (!(curIsBlank && prevIsBlank)) ++count;
}
prevIsBlank = curIsBlank;
}
options.limit -= count;
if (options.limit) {
return text;
}
const str = byWords ? astralSafeCharacterArray.slice(0, idx).join("") : substr(astralSafeCharacterArray, idx, options);
if (str === text) {
return isLastNode ? text : text + options.ellipsis;
} else {
return str + options.ellipsis;
}
}
function substr(astralSafeCharacterArray, len, options) {
const sliced = astralSafeCharacterArray.slice(0, len).join("");
if (!options.reserveLastWord || astralSafeCharacterArray.length === len) {
return sliced;
}
const boundary = astralSafeCharacterArray.slice(len - 1, len + 1).join("");
if (/\W/.test(boundary)) {
return sliced;
}
if (typeof options.reserveLastWord === "number" && options.reserveLastWord < 0) {
const result = sliced.replace(/\w+$/, "");
if (!(result.length === 0 && sliced.length === options.length)) {
return result;
}
if (options.trimTheOnlyWord) return sliced;
}
const maxExceeded = options.reserveLastWord !== true && options.reserveLastWord > 0 ? options.reserveLastWord : 10;
const mtc = astralSafeCharacterArray.slice(len).join("").match(/(\w+)/);
const exceeded = mtc ? mtc[1] : "";
return sliced + exceeded.substring(0, maxExceeded);
}
function sanitizeOptions(length, truncateOptions) {
switch (typeof length) {
case "object":
truncateOptions = length;
break;
case "number":
if (typeof truncateOptions === "object") {
truncateOptions.length = length;
} else {
truncateOptions = {
length
};
}
}
if (truncateOptions && truncateOptions.excludes) {
if (!Array.isArray(truncateOptions.excludes)) {
truncateOptions.excludes = [truncateOptions.excludes];
}
truncateOptions.excludes = truncateOptions.excludes.join(",");
}
const options = extendOptions(Object.assign({}, userDefaults, truncateOptions), defaultOptions);
options.limit = options.length;
return options;
}
function isBlank(char) {
return char === " " || char === "\f" || char === "\n" || char === "\r" || char === " " || char === "\v" || char === " " || char === "\u2028" || char === "\u2029";
}
function extendOptions(options, defaultOptions2) {
if (options == null) {
options = {};
}
for (const k in defaultOptions2) {
const v = defaultOptions2[k];
if (options[k] != null) {
continue;
}
options[k] = v;
}
return options;
}
function isCheerioInstance(elem) {
return elem && elem.contains && elem.html && elem.parseHTML && true;
}
module.exports = truncate;