UNPKG

advanced-markdown

Version:

Production-ready markdown parser with Math (KaTeX), Chemistry (mhchem), and Code Highlighting - all working together flawlessly

401 lines (400 loc) 15.7 kB
// src/index.ts import katex from "katex"; import hljs from "highlight.js"; import "katex/contrib/mhchem"; function escapeHtml(text) { const map = { "&": "&amp;", "<": "&lt;", ">": "&gt;", '"': "&quot;", "'": "&#039;" }; return text.replace(/[&<>"']/g, (m) => map[m]); } function parse(markdown, options = {}) { const { enableMath = true, enableChemistry = true, enableHighlight = true } = options; let html = markdown; html = html.replace(/<!--[\s\S]*?-->/g, ""); const codeBlocks = []; const inlineCode = []; html = html.replace(/```([\s\S]*?)```/g, (match, code) => { const placeholder = `__CODEBLOCK_${codeBlocks.length}__`; const firstNewline = code.indexOf("\n"); let lang = ""; let codeContent = code; if (firstNewline > -1) { const firstLine = code.substring(0, firstNewline).trim(); if (firstLine && firstLine.length < 20 && !/\s/.test(firstLine)) { lang = firstLine; codeContent = code.substring(firstNewline + 1); } } const trimmedCode = codeContent.replace(/^\n+|\n+$/g, ""); const languageClass = lang ? `language-${lang}` : ""; const languageLabel = lang || "code"; let highlightedCode = escapeHtml(trimmedCode); if (enableHighlight && lang) { try { const highlighted = hljs.highlight(trimmedCode, { language: lang, ignoreIllegals: true }); highlightedCode = highlighted.value; } catch (e) { highlightedCode = escapeHtml(trimmedCode); } } const codeBlock = `<pre><div class="code-header"><span class="code-language">${languageLabel}</span></div><code class="${languageClass}">${highlightedCode}</code></pre>`; codeBlocks.push(codeBlock); return placeholder; }); html = html.replace(/`([^`\n]+?)`/g, (match, code) => { const placeholder = `__INLINECODE_${inlineCode.length}__`; inlineCode.push(`<code>${escapeHtml(code)}</code>`); return placeholder; }); const displayMath = []; const inlineMath = []; if (enableMath) { html = html.replace(/\$\$([\s\S]+?)\$\$/g, (match, math) => { const placeholder = `__DISPLAYMATH_${displayMath.length}__`; displayMath.push(math); return placeholder; }); html = html.replace(/\$([^\$\n]+?)\$/g, (match, math) => { const placeholder = `__INLINEMATH_${inlineMath.length}__`; inlineMath.push(math.trim()); return placeholder; }); } const escapeMap = {}; let escapeIndex = 0; html = html.replace(/\\([\\`*_{}\[\]()#+\-.!|~])/g, (match, char) => { const placeholder = `__ESCAPE_${escapeIndex}__`; escapeMap[placeholder] = char; escapeIndex++; return placeholder; }); const emojiMap = { ":smile:": "\u{1F60A}", ":heart:": "\u2764\uFE0F", ":thumbsup:": "\u{1F44D}", ":fire:": "\u{1F525}", ":rocket:": "\u{1F680}", ":star:": "\u2B50", ":check:": "\u2705", ":cross:": "\u274C", ":warning:": "\u26A0\uFE0F", ":info:": "\u2139\uFE0F", ":book:": "\u{1F4D6}", ":bulb:": "\u{1F4A1}", ":pencil:": "\u270F\uFE0F", ":clipboard:": "\u{1F4CB}", ":folder:": "\u{1F4C1}", ":lock:": "\u{1F512}", ":unlock:": "\u{1F513}", ":key:": "\u{1F511}", ":hammer:": "\u{1F528}", ":wrench:": "\u{1F527}", ":gear:": "\u2699\uFE0F", ":chart:": "\u{1F4CA}", ":mag:": "\u{1F50D}", ":bell:": "\u{1F514}", ":email:": "\u{1F4E7}", ":phone:": "\u{1F4DE}", ":calendar:": "\u{1F4C5}", ":clock:": "\u{1F550}", ":hourglass:": "\u23F3", ":checkmark:": "\u2713", ":cool:": "\u{1F60E}", ":tada:": "\u{1F389}" }; html = html.replace(/:(\w+):/g, (match, name) => emojiMap[match] || match); const footnotes = {}; const footnoteRefs = []; html = html.replace(/^\[\^(\w+)\]:\s*(.+)$/gim, (match, id, content) => { footnotes[id] = content; return ""; }); const generateSlug = (text) => { return text.toLowerCase().replace(/[^\w\s-]/g, "").trim().replace(/\s+/g, "-").replace(/-+/g, "-").replace(/^-+|-+$/g, ""); }; const usedSlugs = /* @__PURE__ */ new Map(); const addHeadingWithId = (match, text, level) => { let slug = generateSlug(text); if (!slug) { slug = `heading-${usedSlugs.size + 1}`; } if (usedSlugs.has(slug)) { const count = usedSlugs.get(slug) + 1; usedSlugs.set(slug, count); slug = `${slug}-${count}`; } else { usedSlugs.set(slug, 1); } return `<h${level} id="${slug}">${text}</h${level}>`; }; html = html.replace(/^###### (.*$)/gim, (match, text) => addHeadingWithId(match, text, 6)); html = html.replace(/^##### (.*$)/gim, (match, text) => addHeadingWithId(match, text, 5)); html = html.replace(/^#### (.*$)/gim, (match, text) => addHeadingWithId(match, text, 4)); html = html.replace(/^### (.*$)/gim, (match, text) => addHeadingWithId(match, text, 3)); html = html.replace(/^## (.*$)/gim, (match, text) => addHeadingWithId(match, text, 2)); html = html.replace(/^# (.*$)/gim, (match, text) => addHeadingWithId(match, text, 1)); html = html.replace(/^[ \t]*-{3,}[ \t]*$/gm, "<hr>"); html = html.replace(/^[ \t]*\*{3,}[ \t]*$/gm, "<hr>"); html = html.replace(/^[ \t]*_{3,}[ \t]*$/gm, "<hr>"); const tableRegex = /^\s*(\|.+\|)[ \t]*\r?\n\s*(\|[\s:|-]+\|)[ \t]*\r?\n((?:\s*\|.+\|[ \t]*\r?\n?)+)/gm; html = html.replace(tableRegex, (match, header, separator, rows) => { const alignments = separator.split("|").filter((cell) => cell.trim()).map((cell) => { const trimmed = cell.trim(); if (trimmed.startsWith(":") && trimmed.endsWith(":")) return "center"; if (trimmed.endsWith(":")) return "right"; if (trimmed.startsWith(":")) return "left"; return "left"; }); const headerCells = header.split("|").filter((cell) => cell.trim()).map( (cell, index) => `<th style="text-align: ${alignments[index] || "left"}">${cell.trim()}</th>` ).join(""); const rowsHtml = rows.trim().split(/\r?\n/).filter((row) => row.trim() && row.includes("|")).map((row) => { const cells = row.split("|").filter((cell) => cell.trim()).map( (cell, index) => `<td style="text-align: ${alignments[index] || "left"}">${cell.trim()}</td>` ).join(""); return cells ? `<tr>${cells}</tr>` : ""; }).filter(Boolean).join("\n"); return `<table><thead><tr>${headerCells}</tr></thead><tbody>${rowsHtml}</tbody></table>`; }); html = html.replace(/^(.+)\n:\s+(.+)$/gm, "<dl><dt>$1</dt><dd>$2</dd></dl>"); html = html.replace(/<\/dl>\n<dl>/g, ""); const lines = html.split(/\r?\n/); const processed = []; const listStack = []; const blockquoteStack = []; for (let i = 0; i < lines.length; i++) { let line = lines[i]; if (line.match(/^<(h[1-6]|hr)[\s>]/)) { while (listStack.length > 0) { listStack.pop(); processed.push("</ul>"); } while (blockquoteStack.length > 0) { blockquoteStack.pop(); processed.push("</blockquote>"); } processed.push(line); continue; } const blockquoteMatch = line.match(/^((?:\s*>\s*)+)(.*)$/); if (blockquoteMatch) { while (listStack.length > 0) { listStack.pop(); processed.push("</ul>"); } const depth = (blockquoteMatch[1].match(/>/g) || []).length; const content = blockquoteMatch[2]; while (blockquoteStack.length > depth) { blockquoteStack.pop(); processed.push("</blockquote>"); } while (blockquoteStack.length < depth) { processed.push("<blockquote>"); blockquoteStack.push(blockquoteStack.length + 1); } processed.push(content); continue; } const indentMatch = line.match(/^(\s*)/); const indent = indentMatch ? indentMatch[1].length : 0; const trimmedLine = line.trimStart(); const taskListMatch = trimmedLine.match(/^[-*]\s\[([ xX])\]\s(.*)$/); const unorderedMatch = trimmedLine.match(/^[-*]\s(.*)$/); const emojiMatch = !unorderedMatch ? trimmedLine.match(/^(\p{Emoji}(?:\uFE0F)?)\s+(.+)$/u) : null; const numericMatch = trimmedLine.match(/^(\d+)\.\s(.*)$/); const letterLowerMatch = trimmedLine.match(/^([a-z])\.\s(.*)$/); const letterUpperMatch = trimmedLine.match(/^([A-Z])\.\s(.*)$/); const romanLowerMatch = trimmedLine.match(/^(i{1,3}|iv|v|vi{0,3}|ix|x)\.\s(.*)$/i); if (taskListMatch || emojiMatch || numericMatch || letterLowerMatch || letterUpperMatch || romanLowerMatch || unorderedMatch) { while (blockquoteStack.length > 0) { blockquoteStack.pop(); processed.push("</blockquote>"); } let listType = ""; let content = ""; let dataAttr = ""; if (taskListMatch) { listType = "task"; const checked = taskListMatch[1].toLowerCase() === "x"; content = taskListMatch[2]; dataAttr = ` class="task-list-item"`; content = `<input type="checkbox" ${checked ? "checked" : ""} disabled /><span>${content}</span>`; } else if (emojiMatch) { listType = "emoji"; const emoji = emojiMatch[1]; content = emojiMatch[2]; dataAttr = ` data-emoji="${emoji}" class="emoji-list-item"`; content = `<span class="emoji-bullet">${emoji}</span> ${content}`; } else if (numericMatch) { listType = "decimal"; content = numericMatch[2]; dataAttr = ` data-number="${numericMatch[1]}"`; } else if (letterLowerMatch) { listType = "lower-alpha"; content = letterLowerMatch[2]; dataAttr = ` data-letter="${letterLowerMatch[1]}"`; } else if (letterUpperMatch) { listType = "upper-alpha"; content = letterUpperMatch[2]; dataAttr = ` data-letter="${letterUpperMatch[1]}"`; } else if (romanLowerMatch && romanLowerMatch[1].length <= 10) { listType = "lower-roman"; content = romanLowerMatch[2]; dataAttr = ` data-roman="${romanLowerMatch[1]}"`; } else if (unorderedMatch) { listType = "disc"; content = unorderedMatch[1]; } while (listStack.length > 0 && listStack[listStack.length - 1].indent >= indent) { listStack.pop(); processed.push("</ul>"); } if (listStack.length === 0 || listStack[listStack.length - 1].indent < indent) { processed.push(`<ul class="list-${listType}">`); listStack.push({ type: listType, indent }); } else if (listStack[listStack.length - 1].type !== listType) { processed.push("</ul>"); listStack.pop(); processed.push(`<ul class="list-${listType}">`); listStack.push({ type: listType, indent }); } processed.push(`<li${dataAttr}>${content}</li>`); } else { if (line.trim() !== "") { while (listStack.length > 0) { listStack.pop(); processed.push("</ul>"); } while (blockquoteStack.length > 0) { blockquoteStack.pop(); processed.push("</blockquote>"); } } processed.push(line); } } while (listStack.length > 0) { listStack.pop(); processed.push("</ul>"); } while (blockquoteStack.length > 0) { blockquoteStack.pop(); processed.push("</blockquote>"); } html = processed.join("\n"); html = html.replace(/\*\*\*(.+?)\*\*\*/g, "<strong><em>$1</em></strong>"); html = html.replace(/\*\*(.+?)\*\*/g, "<strong>$1</strong>"); html = html.replace(/\*(.+?)\*/g, "<em>$1</em>"); html = html.replace(/~~([^~]+)~~/g, "<del>$1</del>"); html = html.replace(/!\[([^\]]*)\]\(([^)]+)\)/g, '<img src="$2" alt="$1" />'); html = html.replace(/<(https?:\/\/[^>]+)>/g, '<a href="$1">$1</a>'); html = html.replace(/<([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})>/g, '<a href="mailto:$1">$1</a>'); html = html.replace(/\[([^\]]+)\]\(([^)]+)\)/g, '<a href="$2">$1</a>'); html = html.replace(/\[\^(\w+)\]/g, (match, id) => { if (!footnoteRefs.includes(id)) { footnoteRefs.push(id); } const index = footnoteRefs.indexOf(id) + 1; return `<sup class="footnote-ref"><a href="#fn-${id}" id="fnref-${id}">[${index}]</a></sup>`; }); const paragraphLines = html.split("\n"); const paragraphs = []; let currentParagraph = []; for (const line of paragraphLines) { const trimmed = line.trim(); const isStructural = trimmed.match(/^<(h[1-6]|hr|blockquote|ul|\/ul|li)[\s>\/]/); if (isStructural || trimmed === "") { if (currentParagraph.length > 0) { paragraphs.push("<p>" + currentParagraph.join(" ") + "</p>"); currentParagraph = []; } if (trimmed !== "") { paragraphs.push(line); } } else { currentParagraph.push(line); } } if (currentParagraph.length > 0) { paragraphs.push("<p>" + currentParagraph.join(" ") + "</p>"); } html = paragraphs.join("\n"); codeBlocks.forEach((codeHtml, index) => { const placeholder = `__CODEBLOCK_${index}__`; const escapedHtml = codeHtml.replace(/\$/g, "$$$$"); html = html.replace(new RegExp(`<p>${placeholder.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}</p>`, "g"), escapedHtml); html = html.replace(new RegExp(placeholder.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), "g"), escapedHtml); }); inlineCode.forEach((codeHtml, index) => { const placeholder = `__INLINECODE_${index}__`; const escapedHtml = codeHtml.replace(/\$/g, "$$$$"); html = html.replace(new RegExp(placeholder.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), "g"), escapedHtml); }); html = html.replace(/__DISPLAYMATH_(\d+)__/g, (match, index) => { const math = displayMath[parseInt(index)]; if (enableMath) { try { const rendered = katex.renderToString(math, { displayMode: true, throwOnError: false, trust: enableChemistry, // Enable \ce{} chemistry commands strict: false }); return `<div class="math-display">${rendered}</div>`; } catch (e) { return `<div class="math-display math-error" title="KaTeX error: ${String(e)}">$$${escapeHtml(math)}$$</div>`; } } return `<div class="math-display" data-math="${escapeHtml(math)}">$$${escapeHtml(math)}$$</div>`; }); html = html.replace(/__INLINEMATH_(\d+)__/g, (match, index) => { const math = inlineMath[parseInt(index)]; if (enableMath) { try { const rendered = katex.renderToString(math, { displayMode: false, throwOnError: false, trust: enableChemistry, // Enable \ce{} chemistry commands strict: false }); return `<span class="math-inline">${rendered}</span>`; } catch (e) { return `<span class="math-inline math-error" title="KaTeX error: ${String(e)}">$${escapeHtml(math)}$</span>`; } } return `<span class="math-inline" data-math="${escapeHtml(math)}">$${escapeHtml(math)}$</span>`; }); html = html.replace(/<p>(<div class="math-display">[\s\S]*?<\/div>)<\/p>/g, "$1"); html = html.replace(/<p><\/p>/g, ""); html = html.replace(/<p>\s*<\/p>/g, ""); if (footnoteRefs.length > 0) { let footnotesHtml = '<hr><div class="footnotes"><ol>'; footnoteRefs.forEach((id, index) => { const content = footnotes[id] || "Missing footnote content"; footnotesHtml += `<li id="fn-${id}">${content} <a href="#fnref-${id}" class="footnote-backref">\u21A9</a></li>`; }); footnotesHtml += "</ol></div>"; html += footnotesHtml; } Object.keys(escapeMap).forEach((placeholder) => { html = html.replace(new RegExp(placeholder, "g"), escapeMap[placeholder]); }); return html; } export { parse };