ts-markdown-parser
Version:
TypeScript library that converts markdown to HTML (with code support).
559 lines • 24.7 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.checkboxScript = exports.globalScript = exports.elementToHtml = exports.parseMarkdown = exports.replaceSpecialQuotes = exports.markdownTableToHTML = exports.escapeHtml = exports.replaceReferenceLinks = void 0;
const libs_1 = require("../libs");
// Helper: detect open/close tags (with attributes), and if on the same line
const detectNonHtmlCodeBlocks = (line, previous = "html") => {
const openScriptMatch = line.match(/<script/i);
const closeScriptMatch = line.match(/<\/script>/i);
const openStyleMatch = line.match(/<style/i);
const closeStyleMatch = line.match(/<\/style>/i);
if (previous !== "html") {
if (closeScriptMatch || closeStyleMatch) {
return { action: "close", lang: "html" };
}
}
if (openScriptMatch) {
return { action: "open", lang: "js" };
}
else if (closeScriptMatch) {
return { action: "close", lang: "js" };
}
else if (openStyleMatch) {
return { action: "open", lang: "css" };
}
else if (closeStyleMatch) {
return { action: "close", lang: "css" };
}
else {
// If no specific open/close tag is detected, return null or "other" action
return { action: "inside", lang: typeof previous === "string" && previous ? previous : "html" };
}
};
/**
* Replaces reference links in markdown text with their corresponding titles and URLs.
*
* This function performs two passes over the markdown text:
* 1. It captures reference definitions and stores them in a map.
* 2. It replaces inline references with their corresponding titles and URLs.
*
* @param {string} markdown - The markdown text to process.
* @returns {string} The processed markdown text with reference links replaced.
*/
const replaceReferenceLinks = (markdown) => {
const lines = markdown.split("\n");
const map = {};
const newLines = [];
const refDefRegex = /^\[(\d+)]\:\s*(\S+)\s*"(.+)"$/;
const inlineRefRegex = /\[([^\]]+)]\[(\d+)]/g;
const inlineCodeRegex = /`([^`]+)`/g;
let isCodeBlock = false;
// First pass: Capture reference defs, skip them from newLines
for (const line of lines) {
const trimmed = line.trimStart();
if (trimmed.startsWith("```")) {
isCodeBlock = !isCodeBlock;
newLines.push(line);
continue;
}
if (isCodeBlock) {
newLines.push(line);
continue;
}
const refMatch = line.match(refDefRegex);
if (refMatch) {
const [, refNum, url, title] = refMatch;
map[refNum] = { title, link: url };
continue; // Don't include ref def lines
}
newLines.push(line);
}
// Second pass: Replace annotations outside code blocks & inline code
const finalLines = [];
isCodeBlock = false;
for (const line of newLines) {
const trimmed = line.trimStart();
if (trimmed.startsWith("```")) {
isCodeBlock = !isCodeBlock;
finalLines.push(line);
continue;
}
if (isCodeBlock) {
finalLines.push(line);
continue;
}
// Mask inline code blocks
const inlineCodeMatches = [];
let maskedLine = line.replace(inlineCodeRegex, (match, code) => {
inlineCodeMatches.push(match);
return `{{INLINE_CODE_${inlineCodeMatches.length - 1}}}`;
});
// Replace [label][n] annotations
maskedLine = maskedLine.replace(inlineRefRegex, (match, label, refNum) => {
const ref = map[refNum];
if (ref) {
return `[${ref.title}](${ref.link})`;
}
return match;
});
// Restore inline code blocks
maskedLine = maskedLine.replace(/\{\{INLINE_CODE_(\d+)}}/g, (match, index) => {
return inlineCodeMatches[parseInt(index, 10)];
});
finalLines.push(maskedLine);
}
return finalLines.join("\n");
};
exports.replaceReferenceLinks = replaceReferenceLinks;
/**
* Escapes HTML special characters inside quotes or backticks to prevent HTML injection.
* - Converts special characters to their HTML entity equivalents.
*
* @param {string} html HTML string to be escaped.
* @returns {string} Escaped HTML string.
*/
const escapeHtml = (html) => {
// Regular expression to match single quotes, double quotes, and backticks
const regex = /(['"`])(.*?)\1/g;
// Replace function to escape the content inside quotes or backticks
let escapedHtml = html.replace(regex, (match, p1, p2) => {
const escapedContent = p2
.replace(/&/g, "&")
.replace(/</g, "<")
.replace(/>/g, ">")
.replace(/"/g, """)
.replace(/'/g, "'")
.replace(/`/g, "`"); // Escape backticks as `
return `${p1}${escapedContent}${p1}`;
});
// Specifically replace <Image and <img tags
escapedHtml = escapedHtml.replace(/<\/?Image/g, "<Image");
escapedHtml = escapedHtml.replace(/<\/?img/g, "<img");
return escapedHtml;
};
exports.escapeHtml = escapeHtml;
/**
* Replaces markdown tables (that contain `|` and `_` characters) with table-related HTML elements.
*
* @param {string[]} lines Text containing markdown tables
* @returns {string} Text with `<table>` and `<tbody>` HTML injected into it
*/
const markdownTableToHTML = (lines) => {
if (lines.length < 2)
return "";
const headerCells = lines[0]
.split("|")
.map((cell) => parseInlineStyles(cell.trim()))
.filter(Boolean);
const bodyLines = lines.slice(2);
const thead = `<thead><tr>${headerCells.map((c) => `<th>${c}</th>`).join("")}</tr></thead>`;
const tbodyRows = bodyLines.map((row) => {
const cells = row
.split("|")
.map((cell) => parseInlineStyles(cell.trim()))
.filter(Boolean);
return `<tr>${cells.map((c) => `<td>${c}</td>`).join("")}</tr>`;
});
const tbody = `<tbody>${tbodyRows.join("")}</tbody>`;
return `<table class="md-table">\n${thead}\n${tbody}\n</table>`;
};
exports.markdownTableToHTML = markdownTableToHTML;
/**
* Replaces special Unicode single quotes with a regular single quote.
*
* @param {string} text Text containing special single quotes.
* @returns {string} Text with special single quotes replaced.
*/
const replaceSpecialQuotes = (text) => {
const specialSingleQuotes = [
"\u2018", // Left Single Quotation Mark (U+2018)
"\u2019", // Right Single Quotation Mark (U+2019)
"\u201A", // Single Low-9 Quotation Mark (U+201A)
"\u2039", // Single Left-Pointing Angle Quotation Mark (U+2039)
"\u203A", // Single Right-Pointing Angle Quotation Mark (U+203A)
];
// Replace each special single quote with a regular single quote
specialSingleQuotes.forEach((specialQuote) => {
text = text.split(specialQuote).join("'");
});
return text;
};
exports.replaceSpecialQuotes = replaceSpecialQuotes;
/**
* Parses inline styles in Markdown text into HTML.
* - Handles inline code, images, links, bold, italic, and blockquotes.
*
* @param {string} text Markdown text to be parsed.
* @returns {string} HTML string with inline styles converted.
*/
const parseInlineStyles = (text) => {
// Escape special Markdown characters inside inline code blocks
text = text.replace(/`([^`]*)`/g, (match, code) => {
const escapedCode = code
.replace(/\*/g, "*")
.replace(/_/g, "_")
.replace(/</g, "<")
.replace(/>/g, ">")
.replace(/</g, "<")
.replace(/>/g, ">")
.replace(/\[/g, "[")
.replace(/\]/g, "]")
.replace(/\(/g, "(")
.replace(/\)/g, ")");
return `<span class="md-inline-code">${escapedCode}</span>`;
});
// Images
text = text.replace(/!\[(.*?)\]\((.*?)\)/g, '<img src="$2" alt="$1" />');
// Extract links and temporarily replace them with a placeholder
const links = [];
let linkIndex = 0;
text = text.replace(/\[(.*?)\]\((.*?)\)/g, (match, linkText, url) => {
const placeholder = `{{LINK~${linkIndex}}}`;
links.push({ placeholder, html: `<a href="${url}">${linkText}</a>` });
linkIndex++;
return placeholder;
});
// Bold
text = text.replace(/\*\*(?![^<]*?>)(.*?)\*\*/g, "<b>$1</b>");
text = text.replace(/__(?![^<]*?>)(.*?)__/g, "<b>$1</b>");
// Italic
text = text.replace(/\*(?![^<]*?>)(.*?)\*/g, "<i>$1</i>");
text = text.replace(/_(?![^<]*?>)(.*?)_/g, "<i>$1</i>");
// Blockquotes
text = text.replace(/^>\s*(.*)/gm, "<blockquote>$1</blockquote>");
// Inline code
text = text.replace(/`(.*?)`/g, `<span class="md-inline-code">$1</span>`);
// Reinsert the links
for (let i = 0; i < links.length; i++) {
const link = links[i];
text = text.replace(new RegExp(link.placeholder, "g"), link.html);
// console.dir({ placeholder: link.placeholder, html: link.html });
}
// Remove backslash escape for * and _ outside of code
text = text.replace(/\\([*_])/g, "$1");
return text;
};
/**
* Converts a Markdown string into an array of MarkdownElement objects.
* - Handles headers, code blocks, unordered lists, and paragraphs.
*
* @param {string} markdown Markdown string to be parsed.
* @returns {MarkdownElement[]} An array of MarkdownElement objects.
*/
const parseMarkdown = (markdown) => {
if (typeof markdown !== "string" || !markdown) {
throw new Error(`Markdown string is invalid: ${typeof markdown}`);
}
markdown = (0, exports.replaceReferenceLinks)(markdown);
const lines = (0, libs_1.stripLeadingWhitespace)(markdown).split("\n");
const totalYamlFrontLines = lines.filter((line) => line.trim() === "---").length;
let yamlEndLine = -1;
// Find closing YAML marker if it exists
if (totalYamlFrontLines >= 2 && lines[0].trim() === "---") {
for (let j = 1; j < lines.length; j++) {
if (lines[j].trim() === "---") {
yamlEndLine = j;
break;
}
}
}
// Regex for unordered list items (handles -, *, +, optional checkbox [ ] or [x])
const ulItemRegex = /^(\s*)[-*+]\s+(\[([ xX])\]\s*)?(.*)$/;
const processedLines = [];
const elements = [];
let i = 0;
let inMetadata = false;
while (i < lines.length) {
const line = (0, exports.replaceSpecialQuotes)(lines[i]).trim();
// Skip YAML metadata block
if (i === 0 && line === "---") {
inMetadata = true;
i++;
continue;
}
// Detect end of YAML front matter
if (line === "---" && inMetadata) {
inMetadata = false;
i++;
continue;
}
if (inMetadata) {
if (i === yamlEndLine) {
inMetadata = false;
}
i++;
continue;
}
// Decorative line: triple hyphen
if (line.trim() === "---") {
elements.push({ type: "line", content: "" });
i++;
continue;
}
// Handle Headers
if (line.startsWith("##### ")) {
elements.push({ type: "h5", content: parseInlineStyles(line.slice(5)) });
}
else if (line.startsWith("#### ")) {
elements.push({ type: "h4", content: parseInlineStyles(line.slice(5)) });
}
else if (line.startsWith("### ")) {
elements.push({ type: "h3", content: parseInlineStyles(line.slice(4)) });
}
else if (line.startsWith("## ")) {
elements.push({ type: "h2", content: parseInlineStyles(line.slice(3)) });
}
else if (line.startsWith("# ")) {
elements.push({ type: "h1", content: parseInlineStyles(line.slice(2)) });
// Handle Code Blocks
}
else if (line.trimStart().startsWith("```")) {
const cleanLine = line.trimStart();
const language = cleanLine.slice(3).trim().toLowerCase() || "txt";
const codeLines = [];
i++;
while (i < lines.length && !lines[i].trim().startsWith("```")) {
const fixedLine = (0, exports.replaceSpecialQuotes)(lines[i]);
codeLines.push(fixedLine);
i++;
}
const finalCode = codeLines.join("\n");
elements.push({ type: "code", content: finalCode, language });
}
else if (ulItemRegex.test(line)) {
const listItems = [];
while (i < lines.length && ulItemRegex.test(lines[i])) {
// Match: [indent, list char, [ ] or [x], rest]
const [, indent, , checkboxStatus, content] = lines[i].match(ulItemRegex);
if (typeof checkboxStatus !== "undefined") {
// Checkbox
const checked = checkboxStatus && checkboxStatus.toLowerCase() === "x";
const label = parseInlineStyles(content.trim());
const checkbox = `<input type="checkbox" disabled${checked ? " checked" : ""}>`;
listItems.push(`<li class="md-checkbox">${checkbox}<span>${label}</span></li>`);
}
else {
// Normal list item
listItems.push(`<li>${parseInlineStyles(content.trim())}</li>`);
}
i++;
}
elements.push({ type: "ul", content: listItems.join("") });
continue;
// Handle Paragraphs
}
// Handle Tables
else if (line.includes("|") && i + 1 < lines.length && /^\s*\|?\s*-+/.test(lines[i + 1])) {
const tableLines = [];
// Capture header and separator rows
tableLines.push(lines[i]);
tableLines.push(lines[i + 1]);
i += 2;
// Capture remaining rows
while (i < lines.length && lines[i].includes("|") && !lines[i].trim().startsWith("#") && lines[i].trim().length > 0) {
tableLines.push(lines[i]);
i++;
}
// Convert table to HTML
const tableHtml = (0, exports.markdownTableToHTML)(tableLines);
elements.push({ type: "table", content: tableHtml });
continue; // Skip to next iteration
}
else if (line.trim().length > 0) {
const fixedLine = parseInlineStyles(line);
// console.dir({ fixedLine });
elements.push({ type: "p", content: fixedLine });
}
if (processedLines.includes(i)) {
console.error(`\x1b[31mLine '${line}' (#${i}) has already been processed\x1b[37m`);
break;
}
processedLines.push(i);
i++;
}
return elements;
};
exports.parseMarkdown = parseMarkdown;
/**
* Converts a MarkdownElement object to an HTML string.
* - Handles different element types including headers, code blocks, tables, and lists.
*
* @param {MarkdownElement} element MarkdownElement object to be converted.
* @returns {string} HTML representation of the MarkdownElement.
*/
const elementToHtml = (element, opts) => {
const addCopyToClipboard = !!opts.addCopyToClipboard;
switch (element.type) {
case "h1":
return `<h1>${element.content}</h1>\n`;
case "h2":
return `<h2>${element.content}</h2>\n`;
case "h3":
return `<h3>${element.content}</h3>\n`;
case "h4":
return `<h4>${element.content}</h4>\n`;
case "h5":
return `<h5>${element.content}</h5>\n`;
case "table":
return `${element.content}\n`;
case "line":
return `<div class="md-line"></div>\n`; // i.e. `---` decorative lines
case "code":
let highlightedCode = "";
if (element.language && typeof element.language === "string") {
const codeBlock = element.content;
const lines = codeBlock.split("\n");
const multiLineCommentRegex = (0, libs_1.getMultilineCommentRegex)(element.language);
const finalLines = [];
let inBlockComment = false, pythonCommentIsOpen = false, isPython = !!(element.language === "py" || element.language === "python"), htmlCommentOpen = false, previousHtmlLang = "html";
for (let i = 0; i < lines.length; i++) {
let line = lines[i];
// console.log(`\nline: '${line}'`);
let isBlockStart = false, isBlockEnd = false;
if (multiLineCommentRegex) {
const startRegex = multiLineCommentRegex.start;
const endRegex = multiLineCommentRegex.end;
isBlockStart = startRegex.test(line);
isBlockEnd = endRegex.test(line);
}
// Handle Python as a special case since opening/closing are the same
if (isPython) {
// Regex for multi-line comment delimiters (''' or """)
const regexPython = /('''|""")/;
const isPythonMultiCommentMarker = regexPython.test(line);
if (isPythonMultiCommentMarker) {
pythonCommentIsOpen = !pythonCommentIsOpen;
if (pythonCommentIsOpen) {
finalLines.push(`<span class="md-comment">${line}`);
}
else {
finalLines.push(`${line}</span>`);
}
}
else if (pythonCommentIsOpen) {
// Inside multi-line comment: do not highlight, just output line
finalLines.push(line);
}
else {
// Not in multi-line comment: use highlighter for code & single-line comments
finalLines.push((0, libs_1.highlightCode)("py", line));
}
}
// All other languages
else {
if (element.language === "html") {
if (line.trim() === "<!--") {
line = line.replace(/<!--/g, `<!--`);
line = '<span class="md-comment">' + line; // Open md-comment <span>
htmlCommentOpen = true;
}
const otherCodeResult = detectNonHtmlCodeBlocks(line, previousHtmlLang);
if (previousHtmlLang !== "html" && (otherCodeResult === null || otherCodeResult === void 0 ? void 0 : otherCodeResult.action) === "inside") {
// console.log(`\n${line}`);
// console.dir({ otherCodeResult });
// console.dir({ htmlCommentOpen });
highlightedCode = (0, libs_1.highlightCode)(previousHtmlLang, line);
}
else if (!htmlCommentOpen) {
highlightedCode = (0, libs_1.highlightCode)("html", line);
}
else {
// Close md-comment <span>
highlightedCode = (0, exports.escapeHtml)(line);
if (htmlCommentOpen === true && line.includes("-->")) {
highlightedCode = (0, exports.escapeHtml)(line);
htmlCommentOpen = false;
highlightedCode = highlightedCode + "</span>";
}
}
// console.dir({ highlightedCode });
previousHtmlLang = typeof (otherCodeResult === null || otherCodeResult === void 0 ? void 0 : otherCodeResult.lang) === "string" ? otherCodeResult.lang : "html";
finalLines.push(highlightedCode);
}
// Handle opening the block comment
else if (isBlockStart && !inBlockComment) {
finalLines.push(`<span class="md-comment">${line}`);
inBlockComment = true;
}
// If we're still inside a block comment
else if (inBlockComment && !isBlockStart && !isBlockEnd) {
finalLines.push(line);
}
// Close the md-comment span for multi-line comments
else if (inBlockComment && isBlockEnd) {
finalLines.push(`${line}</span>`);
inBlockComment = false;
}
// Regular code highlighting for non-block comment lines
else {
highlightedCode = (0, libs_1.highlightCode)(element.language, line);
finalLines.push(highlightedCode); // Push highlighted code only if not in block comment
// Handle Tables
if (line.includes("|") && i + 1 < lines.length && /^\s*\|?\s*-+/.test(lines[i + 1])) {
const tableLines = [];
// Capture header and separator rows
tableLines.push(lines[i]);
tableLines.push(lines[i + 1]);
i += 2;
// Capture remaining rows
while (i < lines.length && lines[i].includes("|") && !lines[i].trim().startsWith("#") && lines[i].trim().length > 0) {
tableLines.push(lines[i]);
i++;
}
continue; // Skip to next iteration
}
}
}
}
highlightedCode = finalLines.join("\n");
}
return `
<div class="md-code-container">
${addCopyToClipboard ? `<button onclick="copyToClipboard(this)">Copy</button>` : ""}
<pre><code class="md-code${element.language ? "-" + element.language : ""}">${(0, exports.escapeHtml)(highlightedCode)}</code></pre>
</div>
`;
case "ul":
return `<ul>\n${element.content}\n</ul>\n`;
case "ol":
return `<ol>\n${element.content}\n</ol>\n`;
case "li":
return `<li>${(0, exports.escapeHtml)(element.content)}</li>\n`;
case "p":
return `<p>${parseInlineStyles(element.content)}</p>\n`;
default:
return "";
}
};
exports.elementToHtml = elementToHtml;
// Add the global script for copy-to-clipboard functionality
const globalScript = () => `
<script>
function copyToClipboard(button) {
const codeBlock = button.parentElement.querySelector('code');
const text = codeBlock.innerText.replace(/</g, '<').replace(/>/g, '>');
navigator.clipboard.writeText(text).then(() => {
button.innerText = 'Copied!';
setTimeout(() => button.innerText = 'Copy', 2000);
});
}
</script>
`;
exports.globalScript = globalScript;
const checkboxScript = () => `
<script>
document.addEventListener('DOMContentLoaded', function() {
document.querySelectorAll('.md-checkbox input[type="checkbox"]').forEach(function(cb) {
cb.addEventListener('change', function() {
if (cb.checked) {
cb.nextElementSibling && cb.nextElementSibling.classList.add('md-checked');
} else {
cb.nextElementSibling && cb.nextElementSibling.classList.remove('md-checked');
}
});
});
});
</script>
`;
exports.checkboxScript = checkboxScript;
//# sourceMappingURL=markdown-parser.js.map