UNPKG

html-from-md

Version:

A simple TypeScript library that formats Markdown and returns a formatted HTML.

214 lines (213 loc) 8.26 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.formatMarkdown = formatMarkdown; function decodeHtmlEntities(s) { return s .replace(/&quot;/g, '"') .replace(/&amp;/g, "&") .replace(/&lt;/g, "<") .replace(/&gt;/g, ">") .replace(/&#39;/g, "'") .replace(/&nbsp;/g, " ") .replace(/&#8211;/g, "-") .replace(/&#8217;/g, "'"); } function keepSections(md, keep) { if (!keep || keep.length === 0) return md; const wanted = keep.map((k) => k.toLowerCase()); const regex = /^(#{1,6})\s+(.*)$/gm; let out = ""; let match; while ((match = regex.exec(md)) !== null) { const [full, hashes, title] = match; const titleLowered = title.toLowerCase(); if (wanted.some((w) => titleLowered.includes(w))) { const level = hashes.length; const start = match.index; regex.lastIndex = start + full.length; let nextMatch; let end = md.length; while ((nextMatch = regex.exec(md)) !== null) { if (nextMatch[1].length <= level) { end = nextMatch.index; break; } } out += md.slice(start, end) + "\n"; regex.lastIndex = end; } } return out.trim() || md; } function excludeSections(md, exclude) { if (!exclude || exclude.length === 0) return md; const banned = exclude.map((e) => e.toLowerCase()); const regex = /^(#{1,6})\s+(.*)$/gm; let out = ""; let lastIndex = 0; let match; while ((match = regex.exec(md)) !== null) { const [full, hashes, title] = match; const titleLowered = title.toLowerCase(); const level = hashes.length; if (banned.some((b) => titleLowered.includes(b))) { out += md.slice(lastIndex, match.index); regex.lastIndex = match.index + full.length; let nextMatch; let end = md.length; while ((nextMatch = regex.exec(md)) !== null) { if (nextMatch[1].length <= level) { end = nextMatch.index; break; } } lastIndex = end; regex.lastIndex = end; } } out += md.slice(lastIndex); return out.trim() || md; } function extractLinks(md) { const links = Array.from(md.matchAll(/\[([^\]]+)\]\((https?:[^\)\s]+)\)/g)).map((m) => ({ title: m[1], url: m[2], })); const seen = new Set(); return links.filter((link) => { const key = `${link.title}|${link.url}`; if (seen.has(key)) return false; seen.add(key); return true; }); } function renderLists(md) { const lines = md.split(/\r?\n/); const out = []; const stack = []; function closeToIndent(targetIndent = 0) { while (stack.length && stack[stack.length - 1].indent >= targetIndent) { const top = stack.pop(); out.push(`</${top.type}>`); } } for (let i = 0; i < lines.length; i++) { const line = lines[i]; const ulMatch = line.match(/^(\s*)([-*+])\s+(.*)$/); const olMatch = line.match(/^(\s*)(\d+)\.\s+(.*)$/); if (ulMatch || olMatch) { const indent = (ulMatch ? ulMatch[1] : olMatch[1]).length; const type = ulMatch ? "ul" : "ol"; let content = (ulMatch ? ulMatch[3] : olMatch[3]).trim(); const taskMatch = content.match(/^\[([ xX])\]\s+(.*)$/); if (taskMatch) { const checked = taskMatch[1].toLowerCase() === "x"; content = `<input type="checkbox" disabled ${checked ? "checked" : ""}/> ${taskMatch[2]}`; } if (!stack.length || indent > stack[stack.length - 1].indent || type !== stack[stack.length - 1].type) { if (stack.length && indent <= stack[stack.length - 1].indent && type !== stack[stack.length - 1].type) { closeToIndent(indent); } out.push(`<${type}>`); stack.push({ type, indent }); } out.push(`<li>${content}</li>`); continue; } if (stack.length) { closeToIndent(0); } out.push(line); } while (stack.length) { const top = stack.pop(); out.push(`</${top.type}>`); } return out.join("\n"); } function mdToHtml(mdRaw, opts) { let md = mdRaw; const codeBlocks = []; md = md.replace(/```([\w-]+)?\n([\s\S]*?)```/g, (_m, lang, code) => { const langClass = lang ? ` class="language-${lang}"` : ""; const escapedCode = code.replace(/</g, "&lt;").replace(/>/g, "&gt;"); const token = `@@CODE_BLOCK_${codeBlocks.length}@@`; codeBlocks.push(`<pre><code${langClass}>${escapedCode}</code></pre>`); return token; }); md = decodeHtmlEntities(md); // Inline code md = md.replace(/`([^`]+)`/g, (_m, code) => `<code>${code.replace(/</g, "&lt;").replace(/>/g, "&gt;")}</code>`); // Headings md = md.replace(/^######\s+(.*)$/gm, "<h6>$1</h6>"); md = md.replace(/^#####\s+(.*)$/gm, "<h5>$1</h5>"); md = md.replace(/^####\s+(.*)$/gm, "<h4>$1</h4>"); md = md.replace(/^###\s+(.*)$/gm, "<h3>$1</h3>"); md = md.replace(/^##\s+(.*)$/gm, "<h2>$1</h2>"); md = md.replace(/^#\s+(.*)$/gm, "<h1>$1</h1>"); // Bold md = md.replace(/\*\*([^*]+)\*\*/g, "<strong>$1</strong>"); md = md.replace(/__([^_]+)__/g, "<strong>$1</strong>"); // Images & links if (opts?.useImgAltText) { md = md.replace(/!\[([^\]]*)\]\((https?:[^\)\s]+)\)/g, (_m, alt) => alt || ""); } else if (opts?.removeImages ?? true) { md = md.replace(/!\[([^\]]*)\]\((https?:[^\)\s]+)\)/g, (_m, alt, url) => `<a href="${url}" target="_blank" rel="noreferrer">${alt || "image"}</a>`); } else { md = md.replace(/!\[([^\]]*)\]\((https?:[^\)\s]+)\)/g, (_m, alt, url) => `<img alt="${alt}" src="${url}" />`); } md = md.replace(/\[([^\]]+)\]\((https?:[^\)\s]+)\)/g, (_m, text, url) => `<a href="${url}" target="_blank" rel="noreferrer">${text}</a>`); // Blockquotes md = md.replace(/^>\s?(.*)$/gm, "<blockquote>$1</blockquote>"); // GFM small features if (opts?.gfm) { md = md.replace(/^\s*[-*+]\s+\[ \]\s+/gm, '<input type="checkbox" disabled /> '); md = md.replace(/^\s*[-*+]\s+\[x\]\s+/gmi, '<input type="checkbox" checked disabled /> '); md = md.replace(/~~(.*?)~~/g, "<del>$1</del>"); md = md.replace(/^\|(.+)\|\s*$/gm, (m) => { const cells = m.split("|").slice(1, -1).map((c) => `<td>${c.trim()}</td>`).join(""); return `<tr>${cells}</tr>`; }); md = md.replace(/(<tr>.*<\/tr>\n?)+/g, (rows) => `<table>${rows}</table>`); } md = renderLists(md); // Horizontal rules md = md.replace(/^\s*---+\s*$/gm, "<hr />"); md = md .split(/\n{2,}/) .map((block) => { const trimmed = block.trim(); if (!trimmed) return ""; if (/^@@CODE_BLOCK_\d+@@$/.test(trimmed)) return trimmed; if (/^<\/?(h\d|ul|ol|li|pre|blockquote|hr|p|img|table|tr|td|del|input)/i.test(trimmed)) return trimmed; if (/^<li>/i.test(trimmed) || /^\s*(?:-|\*|\+|\d+\.)\s+/.test(trimmed)) return trimmed; return `<p>${trimmed}</p>`; // .replace(/\n/g, "<br/>") }) .join("\n"); md = md.replace(/@@CODE_BLOCK_(\d+)@@/g, (_, i) => codeBlocks[Number(i)]); return decodeHtmlEntities(md); } function formatMarkdown(raw, opts) { let md = raw; if (opts?.excludeSections && opts.excludeSections.length > 0) { md = excludeSections(md, opts.excludeSections); } else if (opts?.keepSections && opts.keepSections.length > 0) { md = keepSections(md, opts.keepSections); } const links = extractLinks(md); let html = mdToHtml(md, opts); if (opts?.maxChars && opts.maxChars > 0 && html.length > opts.maxChars) { html = html.slice(0, opts.maxChars) + "…"; } return { html, links }; }