html-from-md
Version:
A simple TypeScript library that formats Markdown and returns a formatted HTML.
214 lines (213 loc) • 8.26 kB
JavaScript
Object.defineProperty(exports, "__esModule", { value: true });
exports.formatMarkdown = formatMarkdown;
function decodeHtmlEntities(s) {
return s
.replace(/"/g, '"')
.replace(/&/g, "&")
.replace(/</g, "<")
.replace(/>/g, ">")
.replace(/'/g, "'")
.replace(/ /g, " ")
.replace(/–/g, "-")
.replace(/’/g, "'");
}
function keepSections(md, keep) {
if (!keep || keep.length === 0)
return md;
const wanted = keep.map((k) => k.toLowerCase());
const regex = /^(#{1,6})\s+(.*)$/gm;
let out = "";
let match;
while ((match = regex.exec(md)) !== null) {
const [full, hashes, title] = match;
const titleLowered = title.toLowerCase();
if (wanted.some((w) => titleLowered.includes(w))) {
const level = hashes.length;
const start = match.index;
regex.lastIndex = start + full.length;
let nextMatch;
let end = md.length;
while ((nextMatch = regex.exec(md)) !== null) {
if (nextMatch[1].length <= level) {
end = nextMatch.index;
break;
}
}
out += md.slice(start, end) + "\n";
regex.lastIndex = end;
}
}
return out.trim() || md;
}
function excludeSections(md, exclude) {
if (!exclude || exclude.length === 0)
return md;
const banned = exclude.map((e) => e.toLowerCase());
const regex = /^(#{1,6})\s+(.*)$/gm;
let out = "";
let lastIndex = 0;
let match;
while ((match = regex.exec(md)) !== null) {
const [full, hashes, title] = match;
const titleLowered = title.toLowerCase();
const level = hashes.length;
if (banned.some((b) => titleLowered.includes(b))) {
out += md.slice(lastIndex, match.index);
regex.lastIndex = match.index + full.length;
let nextMatch;
let end = md.length;
while ((nextMatch = regex.exec(md)) !== null) {
if (nextMatch[1].length <= level) {
end = nextMatch.index;
break;
}
}
lastIndex = end;
regex.lastIndex = end;
}
}
out += md.slice(lastIndex);
return out.trim() || md;
}
function extractLinks(md) {
const links = Array.from(md.matchAll(/\[([^\]]+)\]\((https?:[^\)\s]+)\)/g)).map((m) => ({
title: m[1],
url: m[2],
}));
const seen = new Set();
return links.filter((link) => {
const key = `${link.title}|${link.url}`;
if (seen.has(key))
return false;
seen.add(key);
return true;
});
}
function renderLists(md) {
const lines = md.split(/\r?\n/);
const out = [];
const stack = [];
function closeToIndent(targetIndent = 0) {
while (stack.length && stack[stack.length - 1].indent >= targetIndent) {
const top = stack.pop();
out.push(`</${top.type}>`);
}
}
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
const ulMatch = line.match(/^(\s*)([-*+])\s+(.*)$/);
const olMatch = line.match(/^(\s*)(\d+)\.\s+(.*)$/);
if (ulMatch || olMatch) {
const indent = (ulMatch ? ulMatch[1] : olMatch[1]).length;
const type = ulMatch ? "ul" : "ol";
let content = (ulMatch ? ulMatch[3] : olMatch[3]).trim();
const taskMatch = content.match(/^\[([ xX])\]\s+(.*)$/);
if (taskMatch) {
const checked = taskMatch[1].toLowerCase() === "x";
content = `<input type="checkbox" disabled ${checked ? "checked" : ""}/> ${taskMatch[2]}`;
}
if (!stack.length || indent > stack[stack.length - 1].indent || type !== stack[stack.length - 1].type) {
if (stack.length && indent <= stack[stack.length - 1].indent && type !== stack[stack.length - 1].type) {
closeToIndent(indent);
}
out.push(`<${type}>`);
stack.push({ type, indent });
}
out.push(`<li>${content}</li>`);
continue;
}
if (stack.length) {
closeToIndent(0);
}
out.push(line);
}
while (stack.length) {
const top = stack.pop();
out.push(`</${top.type}>`);
}
return out.join("\n");
}
function mdToHtml(mdRaw, opts) {
let md = mdRaw;
const codeBlocks = [];
md = md.replace(/```([\w-]+)?\n([\s\S]*?)```/g, (_m, lang, code) => {
const langClass = lang ? ` class="language-${lang}"` : "";
const escapedCode = code.replace(/</g, "<").replace(/>/g, ">");
const token = `@@CODE_BLOCK_${codeBlocks.length}@@`;
codeBlocks.push(`<pre><code${langClass}>${escapedCode}</code></pre>`);
return token;
});
md = decodeHtmlEntities(md);
// Inline code
md = md.replace(/`([^`]+)`/g, (_m, code) => `<code>${code.replace(/</g, "<").replace(/>/g, ">")}</code>`);
// Headings
md = md.replace(/^######\s+(.*)$/gm, "<h6>$1</h6>");
md = md.replace(/^#####\s+(.*)$/gm, "<h5>$1</h5>");
md = md.replace(/^####\s+(.*)$/gm, "<h4>$1</h4>");
md = md.replace(/^###\s+(.*)$/gm, "<h3>$1</h3>");
md = md.replace(/^##\s+(.*)$/gm, "<h2>$1</h2>");
md = md.replace(/^#\s+(.*)$/gm, "<h1>$1</h1>");
// Bold
md = md.replace(/\*\*([^*]+)\*\*/g, "<strong>$1</strong>");
md = md.replace(/__([^_]+)__/g, "<strong>$1</strong>");
// Images & links
if (opts?.useImgAltText) {
md = md.replace(/!\[([^\]]*)\]\((https?:[^\)\s]+)\)/g, (_m, alt) => alt || "");
}
else if (opts?.removeImages ?? true) {
md = md.replace(/!\[([^\]]*)\]\((https?:[^\)\s]+)\)/g, (_m, alt, url) => `<a href="${url}" target="_blank" rel="noreferrer">${alt || "image"}</a>`);
}
else {
md = md.replace(/!\[([^\]]*)\]\((https?:[^\)\s]+)\)/g, (_m, alt, url) => `<img alt="${alt}" src="${url}" />`);
}
md = md.replace(/\[([^\]]+)\]\((https?:[^\)\s]+)\)/g, (_m, text, url) => `<a href="${url}" target="_blank" rel="noreferrer">${text}</a>`);
// Blockquotes
md = md.replace(/^>\s?(.*)$/gm, "<blockquote>$1</blockquote>");
// GFM small features
if (opts?.gfm) {
md = md.replace(/^\s*[-*+]\s+\[ \]\s+/gm, '<input type="checkbox" disabled /> ');
md = md.replace(/^\s*[-*+]\s+\[x\]\s+/gmi, '<input type="checkbox" checked disabled /> ');
md = md.replace(/~~(.*?)~~/g, "<del>$1</del>");
md = md.replace(/^\|(.+)\|\s*$/gm, (m) => {
const cells = m.split("|").slice(1, -1).map((c) => `<td>${c.trim()}</td>`).join("");
return `<tr>${cells}</tr>`;
});
md = md.replace(/(<tr>.*<\/tr>\n?)+/g, (rows) => `<table>${rows}</table>`);
}
md = renderLists(md);
// Horizontal rules
md = md.replace(/^\s*---+\s*$/gm, "<hr />");
md = md
.split(/\n{2,}/)
.map((block) => {
const trimmed = block.trim();
if (!trimmed)
return "";
if (/^@@CODE_BLOCK_\d+@@$/.test(trimmed))
return trimmed;
if (/^<\/?(h\d|ul|ol|li|pre|blockquote|hr|p|img|table|tr|td|del|input)/i.test(trimmed))
return trimmed;
if (/^<li>/i.test(trimmed) || /^\s*(?:-|\*|\+|\d+\.)\s+/.test(trimmed))
return trimmed;
return `<p>${trimmed}</p>`; // .replace(/\n/g, "<br/>")
})
.join("\n");
md = md.replace(/@@CODE_BLOCK_(\d+)@@/g, (_, i) => codeBlocks[Number(i)]);
return decodeHtmlEntities(md);
}
function formatMarkdown(raw, opts) {
let md = raw;
if (opts?.excludeSections && opts.excludeSections.length > 0) {
md = excludeSections(md, opts.excludeSections);
}
else if (opts?.keepSections && opts.keepSections.length > 0) {
md = keepSections(md, opts.keepSections);
}
const links = extractLinks(md);
let html = mdToHtml(md, opts);
if (opts?.maxChars && opts.maxChars > 0 && html.length > opts.maxChars) {
html = html.slice(0, opts.maxChars) + "…";
}
return { html, links };
}
;