UNPKG

@schukai/monster

Version:

Monster is a simple library for creating fast, robust and lightweight websites.

277 lines (244 loc) 7.19 kB
/** * Copyright © Volker Schukai and all contributing authors, {{copyRightYear}}. All rights reserved. * Node module: @schukai/monster * * This source code is licensed under the GNU Affero General Public License version 3 (AGPLv3). * The full text of the license can be found at: https://www.gnu.org/licenses/agpl-3.0.en.html * * For those who do not wish to adhere to the AGPLv3, a commercial license is available. * Acquiring a commercial license allows you to use this software without complying with the AGPLv3 terms. * For more information about purchasing a commercial license, please contact Volker Schukai. * * SPDX-License-Identifier: AGPL-3.0 */ export { MarkdownToHTML }; /** * A class to convert Markdown text into HTML. It includes multiple parsing * features such as handling headings, lists, code blocks, inline formatting, * and task list items. */ class MarkdownToHTML { constructor(markdown, options = {}) { this.markdown = markdown; this.tokens = []; this.options = { taskListDisabled: true, codeHighlightClassPrefix: "language-", escapeHTML: true, ...options, }; this._taskId = 0; // For unique checkbox IDs in task lists } /** * Tokenizes the Markdown input into a structured array of tokens. */ tokenize() { const lines = this.markdown.split("\n"); let inCodeBlock = false; let codeBuffer = []; let listBuffer = null; let listType = null; // 'ul' or 'ol' let codeLang = ""; for (const line of lines) { const trimmed = line.trim(); // Detect start/end of fenced code blocks const codeFenceMatch = trimmed.match(/^```(\w+)?/); if (codeFenceMatch) { if (inCodeBlock) { this.tokens.push({ type: "code-block", content: codeBuffer.join("\n"), language: codeLang || null, }); codeBuffer = []; codeLang = ""; inCodeBlock = false; } else { this._flushList(listBuffer); listBuffer = null; listType = null; codeLang = codeFenceMatch[1] || ""; inCodeBlock = true; } continue; } if (inCodeBlock) { codeBuffer.push(line); continue; } // Heading (e.g. #, ##, ###, etc.) if (/^#{1,6}\s/.test(trimmed)) { this._flushList(listBuffer); listBuffer = null; listType = null; const level = trimmed.match(/^#+/)[0].length; this.tokens.push({ type: "heading", level, content: trimmed.slice(level + 1).trim(), }); continue; } // Ordered list item (e.g. 1. Item) if (/^\d+\.\s+/.test(trimmed)) { if (listType && listType !== "ol") { this._flushList(listBuffer); listBuffer = null; } listBuffer = listBuffer || { type: "ordered-list", items: [] }; listType = "ol"; listBuffer.items.push(trimmed.replace(/^\d+\.\s+/, "")); continue; } // Unordered list item or task list (e.g. - Item, - [x] Task) if (/^[-+*]\s+/.test(trimmed)) { if (listType && listType !== "ul") { this._flushList(listBuffer); listBuffer = null; } listBuffer = listBuffer || { type: "unordered-list", items: [] }; listType = "ul"; const content = trimmed.replace(/^[-+*]\s+/, ""); const taskMatch = content.match(/^\[( |x|X)]\s+(.*)/); if (taskMatch) { listBuffer.items.push({ type: "task", checked: taskMatch[1].toLowerCase() === "x", content: taskMatch[2], }); } else { listBuffer.items.push(content); } continue; } // Blank line if (trimmed === "") { this._flushList(listBuffer); listBuffer = null; listType = null; this.tokens.push({ type: "blank" }); continue; } // Plain paragraph this._flushList(listBuffer); listBuffer = null; listType = null; this.tokens.push({ type: "paragraph", content: trimmed }); } // Flush any remaining list or code buffer at EOF this._flushList(listBuffer); if (inCodeBlock && codeBuffer.length > 0) { this.tokens.push({ type: "code-block", content: codeBuffer.join("\n"), language: codeLang || null, }); } return this.tokens; } /** * Pushes the current list buffer into the token stream if it's not empty. */ _flushList(listBuffer) { if ( listBuffer && Array.isArray(listBuffer.items) && listBuffer.items.length > 0 ) { this.tokens.push(listBuffer); } } /** * Escapes HTML characters to prevent injection. */ static escapeHTML(text) { return text .replace(/&/g, "&amp;") .replace(/</g, "&lt;") .replace(/>/g, "&gt;"); } /** * Filters out invalid characters in code language names. */ static safeCodeLang(lang) { return typeof lang === "string" ? lang.replace(/[^a-zA-Z0-9\-_]/g, "").slice(0, 32) : ""; } /** * Converts inline Markdown to HTML (bold, italic, code, links). * Optionally escapes HTML. */ parseInline(text) { let out = this.options.escapeHTML ? MarkdownToHTML.escapeHTML(text) : text; // Markdown elements out = out.replace(/\[([^\]]+)]\(([^)]+)\)/g, '<a href="$2">$1</a>'); // Links out = out.replace(/\*\*(.+?)\*\*/g, "<strong>$1</strong>"); // Bold out = out.replace(/\*(.+?)\*/g, "<em>$1</em>"); // Italic out = out.replace(/`(.+?)`/g, "<code>$1</code>"); // Inline code return out; } /** * Converts tokens to final HTML. */ render() { if (this.tokens.length === 0) { this.tokenize(); } let html = ""; for (const token of this.tokens) { switch (token.type) { case "heading": html += `<h${token.level}>${this.parseInline(token.content)}</h${token.level}>\n`; break; case "paragraph": html += `<p>${this.parseInline(token.content)}</p>\n`; break; case "unordered-list": html += "<ul>\n"; for (const item of token.items) { if (typeof item === "string") { html += ` <li>${this.parseInline(item)}</li>\n`; } else if (item && item.type === "task") { this._taskId += 1; const inputId = `mdtask-${this._taskId}`; const checked = item.checked ? " checked" : ""; const disabled = this.options.taskListDisabled ? " disabled" : ""; html += ` <li><input type="checkbox" id="${inputId}"${disabled}${checked}><label for="${inputId}"> ${this.parseInline(item.content)}</label></li>\n`; } } html += "</ul>\n"; break; case "ordered-list": html += "<ol>\n"; for (const item of token.items) { html += ` <li>${this.parseInline(item)}</li>\n`; } html += "</ol>\n"; break; case "code-block": { const safeLang = MarkdownToHTML.safeCodeLang(token.language); const langClass = safeLang ? ` class="${this.options.codeHighlightClassPrefix}${safeLang}"` : ""; const codeContent = this.options.escapeHTML ? MarkdownToHTML.escapeHTML(token.content) : token.content; html += `<pre><code${langClass}>${codeContent}</code></pre>\n`; break; } case "blank": html += "\n"; break; } } return html.trim(); } /** * Convenience static method to render Markdown directly. */ static convert(markdown, options = {}) { const converter = new MarkdownToHTML(markdown, options); return converter.render(); } }