advanced-markdown
Version:
Production-ready markdown parser with Math (KaTeX), Chemistry (mhchem), and Code Highlighting - all working together flawlessly
401 lines (400 loc) • 15.7 kB
JavaScript
// src/index.ts
import katex from "katex";
import hljs from "highlight.js";
import "katex/contrib/mhchem";
function escapeHtml(text) {
const map = {
"&": "&",
"<": "<",
">": ">",
'"': """,
"'": "'"
};
return text.replace(/[&<>"']/g, (m) => map[m]);
}
function parse(markdown, options = {}) {
const {
enableMath = true,
enableChemistry = true,
enableHighlight = true
} = options;
let html = markdown;
html = html.replace(/<!--[\s\S]*?-->/g, "");
const codeBlocks = [];
const inlineCode = [];
html = html.replace(/```([\s\S]*?)```/g, (match, code) => {
const placeholder = `__CODEBLOCK_${codeBlocks.length}__`;
const firstNewline = code.indexOf("\n");
let lang = "";
let codeContent = code;
if (firstNewline > -1) {
const firstLine = code.substring(0, firstNewline).trim();
if (firstLine && firstLine.length < 20 && !/\s/.test(firstLine)) {
lang = firstLine;
codeContent = code.substring(firstNewline + 1);
}
}
const trimmedCode = codeContent.replace(/^\n+|\n+$/g, "");
const languageClass = lang ? `language-${lang}` : "";
const languageLabel = lang || "code";
let highlightedCode = escapeHtml(trimmedCode);
if (enableHighlight && lang) {
try {
const highlighted = hljs.highlight(trimmedCode, {
language: lang,
ignoreIllegals: true
});
highlightedCode = highlighted.value;
} catch (e) {
highlightedCode = escapeHtml(trimmedCode);
}
}
const codeBlock = `<pre><div class="code-header"><span class="code-language">${languageLabel}</span></div><code class="${languageClass}">${highlightedCode}</code></pre>`;
codeBlocks.push(codeBlock);
return placeholder;
});
html = html.replace(/`([^`\n]+?)`/g, (match, code) => {
const placeholder = `__INLINECODE_${inlineCode.length}__`;
inlineCode.push(`<code>${escapeHtml(code)}</code>`);
return placeholder;
});
const displayMath = [];
const inlineMath = [];
if (enableMath) {
html = html.replace(/\$\$([\s\S]+?)\$\$/g, (match, math) => {
const placeholder = `__DISPLAYMATH_${displayMath.length}__`;
displayMath.push(math);
return placeholder;
});
html = html.replace(/\$([^\$\n]+?)\$/g, (match, math) => {
const placeholder = `__INLINEMATH_${inlineMath.length}__`;
inlineMath.push(math.trim());
return placeholder;
});
}
const escapeMap = {};
let escapeIndex = 0;
html = html.replace(/\\([\\`*_{}\[\]()#+\-.!|~])/g, (match, char) => {
const placeholder = `__ESCAPE_${escapeIndex}__`;
escapeMap[placeholder] = char;
escapeIndex++;
return placeholder;
});
const emojiMap = {
":smile:": "\u{1F60A}",
":heart:": "\u2764\uFE0F",
":thumbsup:": "\u{1F44D}",
":fire:": "\u{1F525}",
":rocket:": "\u{1F680}",
":star:": "\u2B50",
":check:": "\u2705",
":cross:": "\u274C",
":warning:": "\u26A0\uFE0F",
":info:": "\u2139\uFE0F",
":book:": "\u{1F4D6}",
":bulb:": "\u{1F4A1}",
":pencil:": "\u270F\uFE0F",
":clipboard:": "\u{1F4CB}",
":folder:": "\u{1F4C1}",
":lock:": "\u{1F512}",
":unlock:": "\u{1F513}",
":key:": "\u{1F511}",
":hammer:": "\u{1F528}",
":wrench:": "\u{1F527}",
":gear:": "\u2699\uFE0F",
":chart:": "\u{1F4CA}",
":mag:": "\u{1F50D}",
":bell:": "\u{1F514}",
":email:": "\u{1F4E7}",
":phone:": "\u{1F4DE}",
":calendar:": "\u{1F4C5}",
":clock:": "\u{1F550}",
":hourglass:": "\u23F3",
":checkmark:": "\u2713",
":cool:": "\u{1F60E}",
":tada:": "\u{1F389}"
};
html = html.replace(/:(\w+):/g, (match, name) => emojiMap[match] || match);
const footnotes = {};
const footnoteRefs = [];
html = html.replace(/^\[\^(\w+)\]:\s*(.+)$/gim, (match, id, content) => {
footnotes[id] = content;
return "";
});
const generateSlug = (text) => {
return text.toLowerCase().replace(/[^\w\s-]/g, "").trim().replace(/\s+/g, "-").replace(/-+/g, "-").replace(/^-+|-+$/g, "");
};
const usedSlugs = /* @__PURE__ */ new Map();
const addHeadingWithId = (match, text, level) => {
let slug = generateSlug(text);
if (!slug) {
slug = `heading-${usedSlugs.size + 1}`;
}
if (usedSlugs.has(slug)) {
const count = usedSlugs.get(slug) + 1;
usedSlugs.set(slug, count);
slug = `${slug}-${count}`;
} else {
usedSlugs.set(slug, 1);
}
return `<h${level} id="${slug}">${text}</h${level}>`;
};
html = html.replace(/^###### (.*$)/gim, (match, text) => addHeadingWithId(match, text, 6));
html = html.replace(/^##### (.*$)/gim, (match, text) => addHeadingWithId(match, text, 5));
html = html.replace(/^#### (.*$)/gim, (match, text) => addHeadingWithId(match, text, 4));
html = html.replace(/^### (.*$)/gim, (match, text) => addHeadingWithId(match, text, 3));
html = html.replace(/^## (.*$)/gim, (match, text) => addHeadingWithId(match, text, 2));
html = html.replace(/^# (.*$)/gim, (match, text) => addHeadingWithId(match, text, 1));
html = html.replace(/^[ \t]*-{3,}[ \t]*$/gm, "<hr>");
html = html.replace(/^[ \t]*\*{3,}[ \t]*$/gm, "<hr>");
html = html.replace(/^[ \t]*_{3,}[ \t]*$/gm, "<hr>");
const tableRegex = /^\s*(\|.+\|)[ \t]*\r?\n\s*(\|[\s:|-]+\|)[ \t]*\r?\n((?:\s*\|.+\|[ \t]*\r?\n?)+)/gm;
html = html.replace(tableRegex, (match, header, separator, rows) => {
const alignments = separator.split("|").filter((cell) => cell.trim()).map((cell) => {
const trimmed = cell.trim();
if (trimmed.startsWith(":") && trimmed.endsWith(":")) return "center";
if (trimmed.endsWith(":")) return "right";
if (trimmed.startsWith(":")) return "left";
return "left";
});
const headerCells = header.split("|").filter((cell) => cell.trim()).map(
(cell, index) => `<th style="text-align: ${alignments[index] || "left"}">${cell.trim()}</th>`
).join("");
const rowsHtml = rows.trim().split(/\r?\n/).filter((row) => row.trim() && row.includes("|")).map((row) => {
const cells = row.split("|").filter((cell) => cell.trim()).map(
(cell, index) => `<td style="text-align: ${alignments[index] || "left"}">${cell.trim()}</td>`
).join("");
return cells ? `<tr>${cells}</tr>` : "";
}).filter(Boolean).join("\n");
return `<table><thead><tr>${headerCells}</tr></thead><tbody>${rowsHtml}</tbody></table>`;
});
html = html.replace(/^(.+)\n:\s+(.+)$/gm, "<dl><dt>$1</dt><dd>$2</dd></dl>");
html = html.replace(/<\/dl>\n<dl>/g, "");
const lines = html.split(/\r?\n/);
const processed = [];
const listStack = [];
const blockquoteStack = [];
for (let i = 0; i < lines.length; i++) {
let line = lines[i];
if (line.match(/^<(h[1-6]|hr)[\s>]/)) {
while (listStack.length > 0) {
listStack.pop();
processed.push("</ul>");
}
while (blockquoteStack.length > 0) {
blockquoteStack.pop();
processed.push("</blockquote>");
}
processed.push(line);
continue;
}
const blockquoteMatch = line.match(/^((?:\s*>\s*)+)(.*)$/);
if (blockquoteMatch) {
while (listStack.length > 0) {
listStack.pop();
processed.push("</ul>");
}
const depth = (blockquoteMatch[1].match(/>/g) || []).length;
const content = blockquoteMatch[2];
while (blockquoteStack.length > depth) {
blockquoteStack.pop();
processed.push("</blockquote>");
}
while (blockquoteStack.length < depth) {
processed.push("<blockquote>");
blockquoteStack.push(blockquoteStack.length + 1);
}
processed.push(content);
continue;
}
const indentMatch = line.match(/^(\s*)/);
const indent = indentMatch ? indentMatch[1].length : 0;
const trimmedLine = line.trimStart();
const taskListMatch = trimmedLine.match(/^[-*]\s\[([ xX])\]\s(.*)$/);
const unorderedMatch = trimmedLine.match(/^[-*]\s(.*)$/);
const emojiMatch = !unorderedMatch ? trimmedLine.match(/^(\p{Emoji}(?:\uFE0F)?)\s+(.+)$/u) : null;
const numericMatch = trimmedLine.match(/^(\d+)\.\s(.*)$/);
const letterLowerMatch = trimmedLine.match(/^([a-z])\.\s(.*)$/);
const letterUpperMatch = trimmedLine.match(/^([A-Z])\.\s(.*)$/);
const romanLowerMatch = trimmedLine.match(/^(i{1,3}|iv|v|vi{0,3}|ix|x)\.\s(.*)$/i);
if (taskListMatch || emojiMatch || numericMatch || letterLowerMatch || letterUpperMatch || romanLowerMatch || unorderedMatch) {
while (blockquoteStack.length > 0) {
blockquoteStack.pop();
processed.push("</blockquote>");
}
let listType = "";
let content = "";
let dataAttr = "";
if (taskListMatch) {
listType = "task";
const checked = taskListMatch[1].toLowerCase() === "x";
content = taskListMatch[2];
dataAttr = ` class="task-list-item"`;
content = `<input type="checkbox" ${checked ? "checked" : ""} disabled /><span>${content}</span>`;
} else if (emojiMatch) {
listType = "emoji";
const emoji = emojiMatch[1];
content = emojiMatch[2];
dataAttr = ` data-emoji="${emoji}" class="emoji-list-item"`;
content = `<span class="emoji-bullet">${emoji}</span> ${content}`;
} else if (numericMatch) {
listType = "decimal";
content = numericMatch[2];
dataAttr = ` data-number="${numericMatch[1]}"`;
} else if (letterLowerMatch) {
listType = "lower-alpha";
content = letterLowerMatch[2];
dataAttr = ` data-letter="${letterLowerMatch[1]}"`;
} else if (letterUpperMatch) {
listType = "upper-alpha";
content = letterUpperMatch[2];
dataAttr = ` data-letter="${letterUpperMatch[1]}"`;
} else if (romanLowerMatch && romanLowerMatch[1].length <= 10) {
listType = "lower-roman";
content = romanLowerMatch[2];
dataAttr = ` data-roman="${romanLowerMatch[1]}"`;
} else if (unorderedMatch) {
listType = "disc";
content = unorderedMatch[1];
}
while (listStack.length > 0 && listStack[listStack.length - 1].indent >= indent) {
listStack.pop();
processed.push("</ul>");
}
if (listStack.length === 0 || listStack[listStack.length - 1].indent < indent) {
processed.push(`<ul class="list-${listType}">`);
listStack.push({ type: listType, indent });
} else if (listStack[listStack.length - 1].type !== listType) {
processed.push("</ul>");
listStack.pop();
processed.push(`<ul class="list-${listType}">`);
listStack.push({ type: listType, indent });
}
processed.push(`<li${dataAttr}>${content}</li>`);
} else {
if (line.trim() !== "") {
while (listStack.length > 0) {
listStack.pop();
processed.push("</ul>");
}
while (blockquoteStack.length > 0) {
blockquoteStack.pop();
processed.push("</blockquote>");
}
}
processed.push(line);
}
}
while (listStack.length > 0) {
listStack.pop();
processed.push("</ul>");
}
while (blockquoteStack.length > 0) {
blockquoteStack.pop();
processed.push("</blockquote>");
}
html = processed.join("\n");
html = html.replace(/\*\*\*(.+?)\*\*\*/g, "<strong><em>$1</em></strong>");
html = html.replace(/\*\*(.+?)\*\*/g, "<strong>$1</strong>");
html = html.replace(/\*(.+?)\*/g, "<em>$1</em>");
html = html.replace(/~~([^~]+)~~/g, "<del>$1</del>");
html = html.replace(/!\[([^\]]*)\]\(([^)]+)\)/g, '<img src="$2" alt="$1" />');
html = html.replace(/<(https?:\/\/[^>]+)>/g, '<a href="$1">$1</a>');
html = html.replace(/<([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})>/g, '<a href="mailto:$1">$1</a>');
html = html.replace(/\[([^\]]+)\]\(([^)]+)\)/g, '<a href="$2">$1</a>');
html = html.replace(/\[\^(\w+)\]/g, (match, id) => {
if (!footnoteRefs.includes(id)) {
footnoteRefs.push(id);
}
const index = footnoteRefs.indexOf(id) + 1;
return `<sup class="footnote-ref"><a href="#fn-${id}" id="fnref-${id}">[${index}]</a></sup>`;
});
const paragraphLines = html.split("\n");
const paragraphs = [];
let currentParagraph = [];
for (const line of paragraphLines) {
const trimmed = line.trim();
const isStructural = trimmed.match(/^<(h[1-6]|hr|blockquote|ul|\/ul|li)[\s>\/]/);
if (isStructural || trimmed === "") {
if (currentParagraph.length > 0) {
paragraphs.push("<p>" + currentParagraph.join(" ") + "</p>");
currentParagraph = [];
}
if (trimmed !== "") {
paragraphs.push(line);
}
} else {
currentParagraph.push(line);
}
}
if (currentParagraph.length > 0) {
paragraphs.push("<p>" + currentParagraph.join(" ") + "</p>");
}
html = paragraphs.join("\n");
codeBlocks.forEach((codeHtml, index) => {
const placeholder = `__CODEBLOCK_${index}__`;
const escapedHtml = codeHtml.replace(/\$/g, "$$$$");
html = html.replace(new RegExp(`<p>${placeholder.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}</p>`, "g"), escapedHtml);
html = html.replace(new RegExp(placeholder.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), "g"), escapedHtml);
});
inlineCode.forEach((codeHtml, index) => {
const placeholder = `__INLINECODE_${index}__`;
const escapedHtml = codeHtml.replace(/\$/g, "$$$$");
html = html.replace(new RegExp(placeholder.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), "g"), escapedHtml);
});
html = html.replace(/__DISPLAYMATH_(\d+)__/g, (match, index) => {
const math = displayMath[parseInt(index)];
if (enableMath) {
try {
const rendered = katex.renderToString(math, {
displayMode: true,
throwOnError: false,
trust: enableChemistry,
// Enable \ce{} chemistry commands
strict: false
});
return `<div class="math-display">${rendered}</div>`;
} catch (e) {
return `<div class="math-display math-error" title="KaTeX error: ${String(e)}">$$${escapeHtml(math)}$$</div>`;
}
}
return `<div class="math-display" data-math="${escapeHtml(math)}">$$${escapeHtml(math)}$$</div>`;
});
html = html.replace(/__INLINEMATH_(\d+)__/g, (match, index) => {
const math = inlineMath[parseInt(index)];
if (enableMath) {
try {
const rendered = katex.renderToString(math, {
displayMode: false,
throwOnError: false,
trust: enableChemistry,
// Enable \ce{} chemistry commands
strict: false
});
return `<span class="math-inline">${rendered}</span>`;
} catch (e) {
return `<span class="math-inline math-error" title="KaTeX error: ${String(e)}">$${escapeHtml(math)}$</span>`;
}
}
return `<span class="math-inline" data-math="${escapeHtml(math)}">$${escapeHtml(math)}$</span>`;
});
html = html.replace(/<p>(<div class="math-display">[\s\S]*?<\/div>)<\/p>/g, "$1");
html = html.replace(/<p><\/p>/g, "");
html = html.replace(/<p>\s*<\/p>/g, "");
if (footnoteRefs.length > 0) {
let footnotesHtml = '<hr><div class="footnotes"><ol>';
footnoteRefs.forEach((id, index) => {
const content = footnotes[id] || "Missing footnote content";
footnotesHtml += `<li id="fn-${id}">${content} <a href="#fnref-${id}" class="footnote-backref">\u21A9</a></li>`;
});
footnotesHtml += "</ol></div>";
html += footnotesHtml;
}
Object.keys(escapeMap).forEach((placeholder) => {
html = html.replace(new RegExp(placeholder, "g"), escapeMap[placeholder]);
});
return html;
}
export {
parse
};