@schukai/monster
Version:
Monster is a simple library for creating fast, robust and lightweight websites.
277 lines (244 loc) • 7.19 kB
JavaScript
/**
* Copyright © Volker Schukai and all contributing authors, {{copyRightYear}}. All rights reserved.
* Node module: @schukai/monster
*
* This source code is licensed under the GNU Affero General Public License version 3 (AGPLv3).
* The full text of the license can be found at: https://www.gnu.org/licenses/agpl-3.0.en.html
*
* For those who do not wish to adhere to the AGPLv3, a commercial license is available.
* Acquiring a commercial license allows you to use this software without complying with the AGPLv3 terms.
* For more information about purchasing a commercial license, please contact Volker Schukai.
*
* SPDX-License-Identifier: AGPL-3.0
*/
export { MarkdownToHTML };
/**
* A class to convert Markdown text into HTML. It includes multiple parsing
* features such as handling headings, lists, code blocks, inline formatting,
* and task list items.
*/
class MarkdownToHTML {
constructor(markdown, options = {}) {
this.markdown = markdown;
this.tokens = [];
this.options = {
taskListDisabled: true,
codeHighlightClassPrefix: "language-",
escapeHTML: true,
...options,
};
this._taskId = 0; // For unique checkbox IDs in task lists
}
/**
* Tokenizes the Markdown input into a structured array of tokens.
*/
tokenize() {
const lines = this.markdown.split("\n");
let inCodeBlock = false;
let codeBuffer = [];
let listBuffer = null;
let listType = null; // 'ul' or 'ol'
let codeLang = "";
for (const line of lines) {
const trimmed = line.trim();
// Detect start/end of fenced code blocks
const codeFenceMatch = trimmed.match(/^```(\w+)?/);
if (codeFenceMatch) {
if (inCodeBlock) {
this.tokens.push({
type: "code-block",
content: codeBuffer.join("\n"),
language: codeLang || null,
});
codeBuffer = [];
codeLang = "";
inCodeBlock = false;
} else {
this._flushList(listBuffer);
listBuffer = null;
listType = null;
codeLang = codeFenceMatch[1] || "";
inCodeBlock = true;
}
continue;
}
if (inCodeBlock) {
codeBuffer.push(line);
continue;
}
// Heading (e.g. #, ##, ###, etc.)
if (/^#{1,6}\s/.test(trimmed)) {
this._flushList(listBuffer);
listBuffer = null;
listType = null;
const level = trimmed.match(/^#+/)[0].length;
this.tokens.push({
type: "heading",
level,
content: trimmed.slice(level + 1).trim(),
});
continue;
}
// Ordered list item (e.g. 1. Item)
if (/^\d+\.\s+/.test(trimmed)) {
if (listType && listType !== "ol") {
this._flushList(listBuffer);
listBuffer = null;
}
listBuffer = listBuffer || { type: "ordered-list", items: [] };
listType = "ol";
listBuffer.items.push(trimmed.replace(/^\d+\.\s+/, ""));
continue;
}
// Unordered list item or task list (e.g. - Item, - [x] Task)
if (/^[-+*]\s+/.test(trimmed)) {
if (listType && listType !== "ul") {
this._flushList(listBuffer);
listBuffer = null;
}
listBuffer = listBuffer || { type: "unordered-list", items: [] };
listType = "ul";
const content = trimmed.replace(/^[-+*]\s+/, "");
const taskMatch = content.match(/^\[( |x|X)]\s+(.*)/);
if (taskMatch) {
listBuffer.items.push({
type: "task",
checked: taskMatch[1].toLowerCase() === "x",
content: taskMatch[2],
});
} else {
listBuffer.items.push(content);
}
continue;
}
// Blank line
if (trimmed === "") {
this._flushList(listBuffer);
listBuffer = null;
listType = null;
this.tokens.push({ type: "blank" });
continue;
}
// Plain paragraph
this._flushList(listBuffer);
listBuffer = null;
listType = null;
this.tokens.push({ type: "paragraph", content: trimmed });
}
// Flush any remaining list or code buffer at EOF
this._flushList(listBuffer);
if (inCodeBlock && codeBuffer.length > 0) {
this.tokens.push({
type: "code-block",
content: codeBuffer.join("\n"),
language: codeLang || null,
});
}
return this.tokens;
}
/**
* Pushes the current list buffer into the token stream if it's not empty.
*/
_flushList(listBuffer) {
if (
listBuffer &&
Array.isArray(listBuffer.items) &&
listBuffer.items.length > 0
) {
this.tokens.push(listBuffer);
}
}
/**
* Escapes HTML characters to prevent injection.
*/
static escapeHTML(text) {
return text
.replace(/&/g, "&")
.replace(/</g, "<")
.replace(/>/g, ">");
}
/**
* Filters out invalid characters in code language names.
*/
static safeCodeLang(lang) {
return typeof lang === "string"
? lang.replace(/[^a-zA-Z0-9\-_]/g, "").slice(0, 32)
: "";
}
/**
* Converts inline Markdown to HTML (bold, italic, code, links).
* Optionally escapes HTML.
*/
parseInline(text) {
let out = this.options.escapeHTML ? MarkdownToHTML.escapeHTML(text) : text;
// Markdown elements
out = out.replace(/\[([^\]]+)]\(([^)]+)\)/g, '<a href="$2">$1</a>'); // Links
out = out.replace(/\*\*(.+?)\*\*/g, "<strong>$1</strong>"); // Bold
out = out.replace(/\*(.+?)\*/g, "<em>$1</em>"); // Italic
out = out.replace(/`(.+?)`/g, "<code>$1</code>"); // Inline code
return out;
}
/**
* Converts tokens to final HTML.
*/
render() {
if (this.tokens.length === 0) {
this.tokenize();
}
let html = "";
for (const token of this.tokens) {
switch (token.type) {
case "heading":
html += `<h${token.level}>${this.parseInline(token.content)}</h${token.level}>\n`;
break;
case "paragraph":
html += `<p>${this.parseInline(token.content)}</p>\n`;
break;
case "unordered-list":
html += "<ul>\n";
for (const item of token.items) {
if (typeof item === "string") {
html += ` <li>${this.parseInline(item)}</li>\n`;
} else if (item && item.type === "task") {
this._taskId += 1;
const inputId = `mdtask-${this._taskId}`;
const checked = item.checked ? " checked" : "";
const disabled = this.options.taskListDisabled ? " disabled" : "";
html += ` <li><input type="checkbox" id="${inputId}"${disabled}${checked}><label for="${inputId}"> ${this.parseInline(item.content)}</label></li>\n`;
}
}
html += "</ul>\n";
break;
case "ordered-list":
html += "<ol>\n";
for (const item of token.items) {
html += ` <li>${this.parseInline(item)}</li>\n`;
}
html += "</ol>\n";
break;
case "code-block": {
const safeLang = MarkdownToHTML.safeCodeLang(token.language);
const langClass = safeLang
? ` class="${this.options.codeHighlightClassPrefix}${safeLang}"`
: "";
const codeContent = this.options.escapeHTML
? MarkdownToHTML.escapeHTML(token.content)
: token.content;
html += `<pre><code${langClass}>${codeContent}</code></pre>\n`;
break;
}
case "blank":
html += "\n";
break;
}
}
return html.trim();
}
/**
* Convenience static method to render Markdown directly.
*/
static convert(markdown, options = {}) {
const converter = new MarkdownToHTML(markdown, options);
return converter.render();
}
}