marked
Version:
A markdown parser built for speed
1,365 lines (1,357 loc) • 89.2 kB
JavaScript
/**
* marked v9.0.0 - a markdown parser
* Copyright (c) 2011-2023, Christopher Jeffrey. (MIT Licensed)
* https://github.com/markedjs/marked
*/
/**
* DO NOT EDIT THIS FILE
* The code in this file is generated from files in ./src/
*/
'use strict';
/**
* Gets the original marked default options.
*/
function _getDefaults() {
return {
async: false,
breaks: false,
extensions: null,
gfm: true,
hooks: null,
pedantic: false,
renderer: null,
silent: false,
tokenizer: null,
walkTokens: null
};
}
exports.defaults = _getDefaults();
function changeDefaults(newDefaults) {
exports.defaults = newDefaults;
}
/**
* Helpers
*/
const escapeTest = /[&<>"']/;
const escapeReplace = new RegExp(escapeTest.source, 'g');
const escapeTestNoEncode = /[<>"']|&(?!(#\d{1,7}|#[Xx][a-fA-F0-9]{1,6}|\w+);)/;
const escapeReplaceNoEncode = new RegExp(escapeTestNoEncode.source, 'g');
const escapeReplacements = {
'&': '&',
'<': '<',
'>': '>',
'"': '"',
"'": '''
};
const getEscapeReplacement = (ch) => escapeReplacements[ch];
function escape(html, encode) {
if (encode) {
if (escapeTest.test(html)) {
return html.replace(escapeReplace, getEscapeReplacement);
}
}
else {
if (escapeTestNoEncode.test(html)) {
return html.replace(escapeReplaceNoEncode, getEscapeReplacement);
}
}
return html;
}
const unescapeTest = /&(#(?:\d+)|(?:#x[0-9A-Fa-f]+)|(?:\w+));?/ig;
function unescape(html) {
// explicitly match decimal, hex, and named HTML entities
return html.replace(unescapeTest, (_, n) => {
n = n.toLowerCase();
if (n === 'colon')
return ':';
if (n.charAt(0) === '#') {
return n.charAt(1) === 'x'
? String.fromCharCode(parseInt(n.substring(2), 16))
: String.fromCharCode(+n.substring(1));
}
return '';
});
}
const caret = /(^|[^\[])\^/g;
function edit(regex, opt) {
regex = typeof regex === 'string' ? regex : regex.source;
opt = opt || '';
const obj = {
replace: (name, val) => {
val = typeof val === 'object' && 'source' in val ? val.source : val;
val = val.replace(caret, '$1');
regex = regex.replace(name, val);
return obj;
},
getRegex: () => {
return new RegExp(regex, opt);
}
};
return obj;
}
function cleanUrl(href) {
try {
href = encodeURI(href).replace(/%25/g, '%');
}
catch (e) {
return null;
}
return href;
}
const noopTest = { exec: () => null };
function splitCells(tableRow, count) {
// ensure that every cell-delimiting pipe has a space
// before it to distinguish it from an escaped pipe
const row = tableRow.replace(/\|/g, (match, offset, str) => {
let escaped = false;
let curr = offset;
while (--curr >= 0 && str[curr] === '\\')
escaped = !escaped;
if (escaped) {
// odd number of slashes means | is escaped
// so we leave it alone
return '|';
}
else {
// add space before unescaped |
return ' |';
}
}), cells = row.split(/ \|/);
let i = 0;
// First/last cell in a row cannot be empty if it has no leading/trailing pipe
if (!cells[0].trim()) {
cells.shift();
}
if (cells.length > 0 && !cells[cells.length - 1].trim()) {
cells.pop();
}
if (count) {
if (cells.length > count) {
cells.splice(count);
}
else {
while (cells.length < count)
cells.push('');
}
}
for (; i < cells.length; i++) {
// leading or trailing whitespace is ignored per the gfm spec
cells[i] = cells[i].trim().replace(/\\\|/g, '|');
}
return cells;
}
/**
* Remove trailing 'c's. Equivalent to str.replace(/c*$/, '').
* /c*$/ is vulnerable to REDOS.
*
* @param str
* @param c
* @param invert Remove suffix of non-c chars instead. Default falsey.
*/
function rtrim(str, c, invert) {
const l = str.length;
if (l === 0) {
return '';
}
// Length of suffix matching the invert condition.
let suffLen = 0;
// Step left until we fail to match the invert condition.
while (suffLen < l) {
const currChar = str.charAt(l - suffLen - 1);
if (currChar === c && !invert) {
suffLen++;
}
else if (currChar !== c && invert) {
suffLen++;
}
else {
break;
}
}
return str.slice(0, l - suffLen);
}
function findClosingBracket(str, b) {
if (str.indexOf(b[1]) === -1) {
return -1;
}
let level = 0;
for (let i = 0; i < str.length; i++) {
if (str[i] === '\\') {
i++;
}
else if (str[i] === b[0]) {
level++;
}
else if (str[i] === b[1]) {
level--;
if (level < 0) {
return i;
}
}
}
return -1;
}
function outputLink(cap, link, raw, lexer) {
const href = link.href;
const title = link.title ? escape(link.title) : null;
const text = cap[1].replace(/\\([\[\]])/g, '$1');
if (cap[0].charAt(0) !== '!') {
lexer.state.inLink = true;
const token = {
type: 'link',
raw,
href,
title,
text,
tokens: lexer.inlineTokens(text)
};
lexer.state.inLink = false;
return token;
}
return {
type: 'image',
raw,
href,
title,
text: escape(text)
};
}
function indentCodeCompensation(raw, text) {
const matchIndentToCode = raw.match(/^(\s+)(?:```)/);
if (matchIndentToCode === null) {
return text;
}
const indentToCode = matchIndentToCode[1];
return text
.split('\n')
.map(node => {
const matchIndentInNode = node.match(/^\s+/);
if (matchIndentInNode === null) {
return node;
}
const [indentInNode] = matchIndentInNode;
if (indentInNode.length >= indentToCode.length) {
return node.slice(indentToCode.length);
}
return node;
})
.join('\n');
}
/**
* Tokenizer
*/
class _Tokenizer {
options;
// TODO: Fix this rules type
rules;
lexer;
constructor(options) {
this.options = options || exports.defaults;
}
space(src) {
const cap = this.rules.block.newline.exec(src);
if (cap && cap[0].length > 0) {
return {
type: 'space',
raw: cap[0]
};
}
}
code(src) {
const cap = this.rules.block.code.exec(src);
if (cap) {
const text = cap[0].replace(/^ {1,4}/gm, '');
return {
type: 'code',
raw: cap[0],
codeBlockStyle: 'indented',
text: !this.options.pedantic
? rtrim(text, '\n')
: text
};
}
}
fences(src) {
const cap = this.rules.block.fences.exec(src);
if (cap) {
const raw = cap[0];
const text = indentCodeCompensation(raw, cap[3] || '');
return {
type: 'code',
raw,
lang: cap[2] ? cap[2].trim().replace(this.rules.inline._escapes, '$1') : cap[2],
text
};
}
}
heading(src) {
const cap = this.rules.block.heading.exec(src);
if (cap) {
let text = cap[2].trim();
// remove trailing #s
if (/#$/.test(text)) {
const trimmed = rtrim(text, '#');
if (this.options.pedantic) {
text = trimmed.trim();
}
else if (!trimmed || / $/.test(trimmed)) {
// CommonMark requires space before trailing #s
text = trimmed.trim();
}
}
return {
type: 'heading',
raw: cap[0],
depth: cap[1].length,
text,
tokens: this.lexer.inline(text)
};
}
}
hr(src) {
const cap = this.rules.block.hr.exec(src);
if (cap) {
return {
type: 'hr',
raw: cap[0]
};
}
}
blockquote(src) {
const cap = this.rules.block.blockquote.exec(src);
if (cap) {
const text = cap[0].replace(/^ *>[ \t]?/gm, '');
const top = this.lexer.state.top;
this.lexer.state.top = true;
const tokens = this.lexer.blockTokens(text);
this.lexer.state.top = top;
return {
type: 'blockquote',
raw: cap[0],
tokens,
text
};
}
}
list(src) {
let cap = this.rules.block.list.exec(src);
if (cap) {
let bull = cap[1].trim();
const isordered = bull.length > 1;
const list = {
type: 'list',
raw: '',
ordered: isordered,
start: isordered ? +bull.slice(0, -1) : '',
loose: false,
items: []
};
bull = isordered ? `\\d{1,9}\\${bull.slice(-1)}` : `\\${bull}`;
if (this.options.pedantic) {
bull = isordered ? bull : '[*+-]';
}
// Get next list item
const itemRegex = new RegExp(`^( {0,3}${bull})((?:[\t ][^\\n]*)?(?:\\n|$))`);
let raw = '';
let itemContents = '';
let endsWithBlankLine = false;
// Check if current bullet point can start a new List Item
while (src) {
let endEarly = false;
if (!(cap = itemRegex.exec(src))) {
break;
}
if (this.rules.block.hr.test(src)) { // End list if bullet was actually HR (possibly move into itemRegex?)
break;
}
raw = cap[0];
src = src.substring(raw.length);
let line = cap[2].split('\n', 1)[0].replace(/^\t+/, (t) => ' '.repeat(3 * t.length));
let nextLine = src.split('\n', 1)[0];
let indent = 0;
if (this.options.pedantic) {
indent = 2;
itemContents = line.trimStart();
}
else {
indent = cap[2].search(/[^ ]/); // Find first non-space char
indent = indent > 4 ? 1 : indent; // Treat indented code blocks (> 4 spaces) as having only 1 indent
itemContents = line.slice(indent);
indent += cap[1].length;
}
let blankLine = false;
if (!line && /^ *$/.test(nextLine)) { // Items begin with at most one blank line
raw += nextLine + '\n';
src = src.substring(nextLine.length + 1);
endEarly = true;
}
if (!endEarly) {
const nextBulletRegex = new RegExp(`^ {0,${Math.min(3, indent - 1)}}(?:[*+-]|\\d{1,9}[.)])((?:[ \t][^\\n]*)?(?:\\n|$))`);
const hrRegex = new RegExp(`^ {0,${Math.min(3, indent - 1)}}((?:- *){3,}|(?:_ *){3,}|(?:\\* *){3,})(?:\\n+|$)`);
const fencesBeginRegex = new RegExp(`^ {0,${Math.min(3, indent - 1)}}(?:\`\`\`|~~~)`);
const headingBeginRegex = new RegExp(`^ {0,${Math.min(3, indent - 1)}}#`);
// Check if following lines should be included in List Item
while (src) {
const rawLine = src.split('\n', 1)[0];
nextLine = rawLine;
// Re-align to follow commonmark nesting rules
if (this.options.pedantic) {
nextLine = nextLine.replace(/^ {1,4}(?=( {4})*[^ ])/g, ' ');
}
// End list item if found code fences
if (fencesBeginRegex.test(nextLine)) {
break;
}
// End list item if found start of new heading
if (headingBeginRegex.test(nextLine)) {
break;
}
// End list item if found start of new bullet
if (nextBulletRegex.test(nextLine)) {
break;
}
// Horizontal rule found
if (hrRegex.test(src)) {
break;
}
if (nextLine.search(/[^ ]/) >= indent || !nextLine.trim()) { // Dedent if possible
itemContents += '\n' + nextLine.slice(indent);
}
else {
// not enough indentation
if (blankLine) {
break;
}
// paragraph continuation unless last line was a different block level element
if (line.search(/[^ ]/) >= 4) { // indented code block
break;
}
if (fencesBeginRegex.test(line)) {
break;
}
if (headingBeginRegex.test(line)) {
break;
}
if (hrRegex.test(line)) {
break;
}
itemContents += '\n' + nextLine;
}
if (!blankLine && !nextLine.trim()) { // Check if current line is blank
blankLine = true;
}
raw += rawLine + '\n';
src = src.substring(rawLine.length + 1);
line = nextLine.slice(indent);
}
}
if (!list.loose) {
// If the previous item ended with a blank line, the list is loose
if (endsWithBlankLine) {
list.loose = true;
}
else if (/\n *\n *$/.test(raw)) {
endsWithBlankLine = true;
}
}
let istask = null;
let ischecked;
// Check for task list items
if (this.options.gfm) {
istask = /^\[[ xX]\] /.exec(itemContents);
if (istask) {
ischecked = istask[0] !== '[ ] ';
itemContents = itemContents.replace(/^\[[ xX]\] +/, '');
}
}
list.items.push({
type: 'list_item',
raw,
task: !!istask,
checked: ischecked,
loose: false,
text: itemContents,
tokens: []
});
list.raw += raw;
}
// Do not consume newlines at end of final item. Alternatively, make itemRegex *start* with any newlines to simplify/speed up endsWithBlankLine logic
list.items[list.items.length - 1].raw = raw.trimEnd();
list.items[list.items.length - 1].text = itemContents.trimEnd();
list.raw = list.raw.trimEnd();
// Item child tokens handled here at end because we needed to have the final item to trim it first
for (let i = 0; i < list.items.length; i++) {
this.lexer.state.top = false;
list.items[i].tokens = this.lexer.blockTokens(list.items[i].text, []);
if (!list.loose) {
// Check if list should be loose
const spacers = list.items[i].tokens.filter(t => t.type === 'space');
const hasMultipleLineBreaks = spacers.length > 0 && spacers.some(t => /\n.*\n/.test(t.raw));
list.loose = hasMultipleLineBreaks;
}
}
// Set all items to loose if list is loose
if (list.loose) {
for (let i = 0; i < list.items.length; i++) {
list.items[i].loose = true;
}
}
return list;
}
}
html(src) {
const cap = this.rules.block.html.exec(src);
if (cap) {
const token = {
type: 'html',
block: true,
raw: cap[0],
pre: cap[1] === 'pre' || cap[1] === 'script' || cap[1] === 'style',
text: cap[0]
};
return token;
}
}
def(src) {
const cap = this.rules.block.def.exec(src);
if (cap) {
const tag = cap[1].toLowerCase().replace(/\s+/g, ' ');
const href = cap[2] ? cap[2].replace(/^<(.*)>$/, '$1').replace(this.rules.inline._escapes, '$1') : '';
const title = cap[3] ? cap[3].substring(1, cap[3].length - 1).replace(this.rules.inline._escapes, '$1') : cap[3];
return {
type: 'def',
tag,
raw: cap[0],
href,
title
};
}
}
table(src) {
const cap = this.rules.block.table.exec(src);
if (cap) {
const item = {
type: 'table',
raw: cap[0],
header: splitCells(cap[1]).map(c => {
return { text: c, tokens: [] };
}),
align: cap[2].replace(/^ *|\| *$/g, '').split(/ *\| */),
rows: cap[3] && cap[3].trim() ? cap[3].replace(/\n[ \t]*$/, '').split('\n') : []
};
if (item.header.length === item.align.length) {
let l = item.align.length;
let i, j, k, row;
for (i = 0; i < l; i++) {
const align = item.align[i];
if (align) {
if (/^ *-+: *$/.test(align)) {
item.align[i] = 'right';
}
else if (/^ *:-+: *$/.test(align)) {
item.align[i] = 'center';
}
else if (/^ *:-+ *$/.test(align)) {
item.align[i] = 'left';
}
else {
item.align[i] = null;
}
}
}
l = item.rows.length;
for (i = 0; i < l; i++) {
item.rows[i] = splitCells(item.rows[i], item.header.length).map(c => {
return { text: c, tokens: [] };
});
}
// parse child tokens inside headers and cells
// header child tokens
l = item.header.length;
for (j = 0; j < l; j++) {
item.header[j].tokens = this.lexer.inline(item.header[j].text);
}
// cell child tokens
l = item.rows.length;
for (j = 0; j < l; j++) {
row = item.rows[j];
for (k = 0; k < row.length; k++) {
row[k].tokens = this.lexer.inline(row[k].text);
}
}
return item;
}
}
}
lheading(src) {
const cap = this.rules.block.lheading.exec(src);
if (cap) {
return {
type: 'heading',
raw: cap[0],
depth: cap[2].charAt(0) === '=' ? 1 : 2,
text: cap[1],
tokens: this.lexer.inline(cap[1])
};
}
}
paragraph(src) {
const cap = this.rules.block.paragraph.exec(src);
if (cap) {
const text = cap[1].charAt(cap[1].length - 1) === '\n'
? cap[1].slice(0, -1)
: cap[1];
return {
type: 'paragraph',
raw: cap[0],
text,
tokens: this.lexer.inline(text)
};
}
}
text(src) {
const cap = this.rules.block.text.exec(src);
if (cap) {
return {
type: 'text',
raw: cap[0],
text: cap[0],
tokens: this.lexer.inline(cap[0])
};
}
}
escape(src) {
const cap = this.rules.inline.escape.exec(src);
if (cap) {
return {
type: 'escape',
raw: cap[0],
text: escape(cap[1])
};
}
}
tag(src) {
const cap = this.rules.inline.tag.exec(src);
if (cap) {
if (!this.lexer.state.inLink && /^<a /i.test(cap[0])) {
this.lexer.state.inLink = true;
}
else if (this.lexer.state.inLink && /^<\/a>/i.test(cap[0])) {
this.lexer.state.inLink = false;
}
if (!this.lexer.state.inRawBlock && /^<(pre|code|kbd|script)(\s|>)/i.test(cap[0])) {
this.lexer.state.inRawBlock = true;
}
else if (this.lexer.state.inRawBlock && /^<\/(pre|code|kbd|script)(\s|>)/i.test(cap[0])) {
this.lexer.state.inRawBlock = false;
}
return {
type: 'html',
raw: cap[0],
inLink: this.lexer.state.inLink,
inRawBlock: this.lexer.state.inRawBlock,
block: false,
text: cap[0]
};
}
}
link(src) {
const cap = this.rules.inline.link.exec(src);
if (cap) {
const trimmedUrl = cap[2].trim();
if (!this.options.pedantic && /^</.test(trimmedUrl)) {
// commonmark requires matching angle brackets
if (!(/>$/.test(trimmedUrl))) {
return;
}
// ending angle bracket cannot be escaped
const rtrimSlash = rtrim(trimmedUrl.slice(0, -1), '\\');
if ((trimmedUrl.length - rtrimSlash.length) % 2 === 0) {
return;
}
}
else {
// find closing parenthesis
const lastParenIndex = findClosingBracket(cap[2], '()');
if (lastParenIndex > -1) {
const start = cap[0].indexOf('!') === 0 ? 5 : 4;
const linkLen = start + cap[1].length + lastParenIndex;
cap[2] = cap[2].substring(0, lastParenIndex);
cap[0] = cap[0].substring(0, linkLen).trim();
cap[3] = '';
}
}
let href = cap[2];
let title = '';
if (this.options.pedantic) {
// split pedantic href and title
const link = /^([^'"]*[^\s])\s+(['"])(.*)\2/.exec(href);
if (link) {
href = link[1];
title = link[3];
}
}
else {
title = cap[3] ? cap[3].slice(1, -1) : '';
}
href = href.trim();
if (/^</.test(href)) {
if (this.options.pedantic && !(/>$/.test(trimmedUrl))) {
// pedantic allows starting angle bracket without ending angle bracket
href = href.slice(1);
}
else {
href = href.slice(1, -1);
}
}
return outputLink(cap, {
href: href ? href.replace(this.rules.inline._escapes, '$1') : href,
title: title ? title.replace(this.rules.inline._escapes, '$1') : title
}, cap[0], this.lexer);
}
}
reflink(src, links) {
let cap;
if ((cap = this.rules.inline.reflink.exec(src))
|| (cap = this.rules.inline.nolink.exec(src))) {
let link = (cap[2] || cap[1]).replace(/\s+/g, ' ');
link = links[link.toLowerCase()];
if (!link) {
const text = cap[0].charAt(0);
return {
type: 'text',
raw: text,
text
};
}
return outputLink(cap, link, cap[0], this.lexer);
}
}
emStrong(src, maskedSrc, prevChar = '') {
let match = this.rules.inline.emStrong.lDelim.exec(src);
if (!match)
return;
// _ can't be between two alphanumerics. \p{L}\p{N} includes non-english alphabet/numbers as well
if (match[3] && prevChar.match(/[\p{L}\p{N}]/u))
return;
const nextChar = match[1] || match[2] || '';
if (!nextChar || !prevChar || this.rules.inline.punctuation.exec(prevChar)) {
// unicode Regex counts emoji as 1 char; spread into array for proper count (used multiple times below)
const lLength = [...match[0]].length - 1;
let rDelim, rLength, delimTotal = lLength, midDelimTotal = 0;
const endReg = match[0][0] === '*' ? this.rules.inline.emStrong.rDelimAst : this.rules.inline.emStrong.rDelimUnd;
endReg.lastIndex = 0;
// Clip maskedSrc to same section of string as src (move to lexer?)
maskedSrc = maskedSrc.slice(-1 * src.length + lLength);
while ((match = endReg.exec(maskedSrc)) != null) {
rDelim = match[1] || match[2] || match[3] || match[4] || match[5] || match[6];
if (!rDelim)
continue; // skip single * in __abc*abc__
rLength = [...rDelim].length;
if (match[3] || match[4]) { // found another Left Delim
delimTotal += rLength;
continue;
}
else if (match[5] || match[6]) { // either Left or Right Delim
if (lLength % 3 && !((lLength + rLength) % 3)) {
midDelimTotal += rLength;
continue; // CommonMark Emphasis Rules 9-10
}
}
delimTotal -= rLength;
if (delimTotal > 0)
continue; // Haven't found enough closing delimiters
// Remove extra characters. *a*** -> *a*
rLength = Math.min(rLength, rLength + delimTotal + midDelimTotal);
const raw = [...src].slice(0, lLength + match.index + rLength + 1).join('');
// Create `em` if smallest delimiter has odd char count. *a***
if (Math.min(lLength, rLength) % 2) {
const text = raw.slice(1, -1);
return {
type: 'em',
raw,
text,
tokens: this.lexer.inlineTokens(text)
};
}
// Create 'strong' if smallest delimiter has even char count. **a***
const text = raw.slice(2, -2);
return {
type: 'strong',
raw,
text,
tokens: this.lexer.inlineTokens(text)
};
}
}
}
codespan(src) {
const cap = this.rules.inline.code.exec(src);
if (cap) {
let text = cap[2].replace(/\n/g, ' ');
const hasNonSpaceChars = /[^ ]/.test(text);
const hasSpaceCharsOnBothEnds = /^ /.test(text) && / $/.test(text);
if (hasNonSpaceChars && hasSpaceCharsOnBothEnds) {
text = text.substring(1, text.length - 1);
}
text = escape(text, true);
return {
type: 'codespan',
raw: cap[0],
text
};
}
}
br(src) {
const cap = this.rules.inline.br.exec(src);
if (cap) {
return {
type: 'br',
raw: cap[0]
};
}
}
del(src) {
const cap = this.rules.inline.del.exec(src);
if (cap) {
return {
type: 'del',
raw: cap[0],
text: cap[2],
tokens: this.lexer.inlineTokens(cap[2])
};
}
}
autolink(src) {
const cap = this.rules.inline.autolink.exec(src);
if (cap) {
let text, href;
if (cap[2] === '@') {
text = escape(cap[1]);
href = 'mailto:' + text;
}
else {
text = escape(cap[1]);
href = text;
}
return {
type: 'link',
raw: cap[0],
text,
href,
tokens: [
{
type: 'text',
raw: text,
text
}
]
};
}
}
url(src) {
let cap;
if (cap = this.rules.inline.url.exec(src)) {
let text, href;
if (cap[2] === '@') {
text = escape(cap[0]);
href = 'mailto:' + text;
}
else {
// do extended autolink path validation
let prevCapZero;
do {
prevCapZero = cap[0];
cap[0] = this.rules.inline._backpedal.exec(cap[0])[0];
} while (prevCapZero !== cap[0]);
text = escape(cap[0]);
if (cap[1] === 'www.') {
href = 'http://' + cap[0];
}
else {
href = cap[0];
}
}
return {
type: 'link',
raw: cap[0],
text,
href,
tokens: [
{
type: 'text',
raw: text,
text
}
]
};
}
}
inlineText(src) {
const cap = this.rules.inline.text.exec(src);
if (cap) {
let text;
if (this.lexer.state.inRawBlock) {
text = cap[0];
}
else {
text = escape(cap[0]);
}
return {
type: 'text',
raw: cap[0],
text
};
}
}
}
/**
* Block-Level Grammar
*/
// Not all rules are defined in the object literal
// @ts-expect-error
const block = {
newline: /^(?: *(?:\n|$))+/,
code: /^( {4}[^\n]+(?:\n(?: *(?:\n|$))*)?)+/,
fences: /^ {0,3}(`{3,}(?=[^`\n]*(?:\n|$))|~{3,})([^\n]*)(?:\n|$)(?:|([\s\S]*?)(?:\n|$))(?: {0,3}\1[~`]* *(?=\n|$)|$)/,
hr: /^ {0,3}((?:-[\t ]*){3,}|(?:_[ \t]*){3,}|(?:\*[ \t]*){3,})(?:\n+|$)/,
heading: /^ {0,3}(#{1,6})(?=\s|$)(.*)(?:\n+|$)/,
blockquote: /^( {0,3}> ?(paragraph|[^\n]*)(?:\n|$))+/,
list: /^( {0,3}bull)([ \t][^\n]+?)?(?:\n|$)/,
html: '^ {0,3}(?:' // optional indentation
+ '<(script|pre|style|textarea)[\\s>][\\s\\S]*?(?:</\\1>[^\\n]*\\n+|$)' // (1)
+ '|comment[^\\n]*(\\n+|$)' // (2)
+ '|<\\?[\\s\\S]*?(?:\\?>\\n*|$)' // (3)
+ '|<![A-Z][\\s\\S]*?(?:>\\n*|$)' // (4)
+ '|<!\\[CDATA\\[[\\s\\S]*?(?:\\]\\]>\\n*|$)' // (5)
+ '|</?(tag)(?: +|\\n|/?>)[\\s\\S]*?(?:(?:\\n *)+\\n|$)' // (6)
+ '|<(?!script|pre|style|textarea)([a-z][\\w-]*)(?:attribute)*? */?>(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:(?:\\n *)+\\n|$)' // (7) open tag
+ '|</(?!script|pre|style|textarea)[a-z][\\w-]*\\s*>(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:(?:\\n *)+\\n|$)' // (7) closing tag
+ ')',
def: /^ {0,3}\[(label)\]: *(?:\n *)?([^<\s][^\s]*|<.*?>)(?:(?: +(?:\n *)?| *\n *)(title))? *(?:\n+|$)/,
table: noopTest,
lheading: /^((?:(?!^bull ).|\n(?!\n|bull ))+?)\n {0,3}(=+|-+) *(?:\n+|$)/,
// regex template, placeholders will be replaced according to different paragraph
// interruption rules of commonmark and the original markdown spec:
_paragraph: /^([^\n]+(?:\n(?!hr|heading|lheading|blockquote|fences|list|html|table| +\n)[^\n]+)*)/,
text: /^[^\n]+/
};
block._label = /(?!\s*\])(?:\\.|[^\[\]\\])+/;
block._title = /(?:"(?:\\"?|[^"\\])*"|'[^'\n]*(?:\n[^'\n]+)*\n?'|\([^()]*\))/;
block.def = edit(block.def)
.replace('label', block._label)
.replace('title', block._title)
.getRegex();
block.bullet = /(?:[*+-]|\d{1,9}[.)])/;
block.listItemStart = edit(/^( *)(bull) */)
.replace('bull', block.bullet)
.getRegex();
block.list = edit(block.list)
.replace(/bull/g, block.bullet)
.replace('hr', '\\n+(?=\\1?(?:(?:- *){3,}|(?:_ *){3,}|(?:\\* *){3,})(?:\\n+|$))')
.replace('def', '\\n+(?=' + block.def.source + ')')
.getRegex();
block._tag = 'address|article|aside|base|basefont|blockquote|body|caption'
+ '|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption'
+ '|figure|footer|form|frame|frameset|h[1-6]|head|header|hr|html|iframe'
+ '|legend|li|link|main|menu|menuitem|meta|nav|noframes|ol|optgroup|option'
+ '|p|param|section|source|summary|table|tbody|td|tfoot|th|thead|title|tr'
+ '|track|ul';
block._comment = /<!--(?!-?>)[\s\S]*?(?:-->|$)/;
block.html = edit(block.html, 'i')
.replace('comment', block._comment)
.replace('tag', block._tag)
.replace('attribute', / +[a-zA-Z:_][\w.:-]*(?: *= *"[^"\n]*"| *= *'[^'\n]*'| *= *[^\s"'=<>`]+)?/)
.getRegex();
block.lheading = edit(block.lheading)
.replace(/bull/g, block.bullet) // lists can interrupt
.getRegex();
block.paragraph = edit(block._paragraph)
.replace('hr', block.hr)
.replace('heading', ' {0,3}#{1,6} ')
.replace('|lheading', '') // setex headings don't interrupt commonmark paragraphs
.replace('|table', '')
.replace('blockquote', ' {0,3}>')
.replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n')
.replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt
.replace('html', '</?(?:tag)(?: +|\\n|/?>)|<(?:script|pre|style|textarea|!--)')
.replace('tag', block._tag) // pars can be interrupted by type (6) html blocks
.getRegex();
block.blockquote = edit(block.blockquote)
.replace('paragraph', block.paragraph)
.getRegex();
/**
* Normal Block Grammar
*/
block.normal = { ...block };
/**
* GFM Block Grammar
*/
block.gfm = {
...block.normal,
table: '^ *([^\\n ].*\\|.*)\\n' // Header
+ ' {0,3}(?:\\| *)?(:?-+:? *(?:\\| *:?-+:? *)*)(?:\\| *)?' // Align
+ '(?:\\n((?:(?! *\\n|hr|heading|blockquote|code|fences|list|html).*(?:\\n|$))*)\\n*|$)' // Cells
};
block.gfm.table = edit(block.gfm.table)
.replace('hr', block.hr)
.replace('heading', ' {0,3}#{1,6} ')
.replace('blockquote', ' {0,3}>')
.replace('code', ' {4}[^\\n]')
.replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n')
.replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt
.replace('html', '</?(?:tag)(?: +|\\n|/?>)|<(?:script|pre|style|textarea|!--)')
.replace('tag', block._tag) // tables can be interrupted by type (6) html blocks
.getRegex();
block.gfm.paragraph = edit(block._paragraph)
.replace('hr', block.hr)
.replace('heading', ' {0,3}#{1,6} ')
.replace('|lheading', '') // setex headings don't interrupt commonmark paragraphs
.replace('table', block.gfm.table) // interrupt paragraphs with table
.replace('blockquote', ' {0,3}>')
.replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n')
.replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt
.replace('html', '</?(?:tag)(?: +|\\n|/?>)|<(?:script|pre|style|textarea|!--)')
.replace('tag', block._tag) // pars can be interrupted by type (6) html blocks
.getRegex();
/**
* Pedantic grammar (original John Gruber's loose markdown specification)
*/
block.pedantic = {
...block.normal,
html: edit('^ *(?:comment *(?:\\n|\\s*$)'
+ '|<(tag)[\\s\\S]+?</\\1> *(?:\\n{2,}|\\s*$)' // closed tag
+ '|<tag(?:"[^"]*"|\'[^\']*\'|\\s[^\'"/>\\s]*)*?/?> *(?:\\n{2,}|\\s*$))')
.replace('comment', block._comment)
.replace(/tag/g, '(?!(?:'
+ 'a|em|strong|small|s|cite|q|dfn|abbr|data|time|code|var|samp|kbd|sub'
+ '|sup|i|b|u|mark|ruby|rt|rp|bdi|bdo|span|br|wbr|ins|del|img)'
+ '\\b)\\w+(?!:|[^\\w\\s@]*@)\\b')
.getRegex(),
def: /^ *\[([^\]]+)\]: *<?([^\s>]+)>?(?: +(["(][^\n]+[")]))? *(?:\n+|$)/,
heading: /^(#{1,6})(.*)(?:\n+|$)/,
fences: noopTest,
lheading: /^(.+?)\n {0,3}(=+|-+) *(?:\n+|$)/,
paragraph: edit(block.normal._paragraph)
.replace('hr', block.hr)
.replace('heading', ' *#{1,6} *[^\n]')
.replace('lheading', block.lheading)
.replace('blockquote', ' {0,3}>')
.replace('|fences', '')
.replace('|list', '')
.replace('|html', '')
.getRegex()
};
/**
* Inline-Level Grammar
*/
// Not all rules are defined in the object literal
// @ts-expect-error
const inline = {
escape: /^\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/,
autolink: /^<(scheme:[^\s\x00-\x1f<>]*|email)>/,
url: noopTest,
tag: '^comment'
+ '|^</[a-zA-Z][\\w:-]*\\s*>' // self-closing tag
+ '|^<[a-zA-Z][\\w-]*(?:attribute)*?\\s*/?>' // open tag
+ '|^<\\?[\\s\\S]*?\\?>' // processing instruction, e.g. <?php ?>
+ '|^<![a-zA-Z]+\\s[\\s\\S]*?>' // declaration, e.g. <!DOCTYPE html>
+ '|^<!\\[CDATA\\[[\\s\\S]*?\\]\\]>',
link: /^!?\[(label)\]\(\s*(href)(?:\s+(title))?\s*\)/,
reflink: /^!?\[(label)\]\[(ref)\]/,
nolink: /^!?\[(ref)\](?:\[\])?/,
reflinkSearch: 'reflink|nolink(?!\\()',
emStrong: {
lDelim: /^(?:\*+(?:((?!\*)[punct])|[^\s*]))|^_+(?:((?!_)[punct])|([^\s_]))/,
// (1) and (2) can only be a Right Delimiter. (3) and (4) can only be Left. (5) and (6) can be either Left or Right.
// | Skip orphan inside strong | Consume to delim | (1) #*** | (2) a***#, a*** | (3) #***a, ***a | (4) ***# | (5) #***# | (6) a***a
rDelimAst: /^[^_*]*?__[^_*]*?\*[^_*]*?(?=__)|[^*]+(?=[^*])|(?!\*)[punct](\*+)(?=[\s]|$)|[^punct\s](\*+)(?!\*)(?=[punct\s]|$)|(?!\*)[punct\s](\*+)(?=[^punct\s])|[\s](\*+)(?!\*)(?=[punct])|(?!\*)[punct](\*+)(?!\*)(?=[punct])|[^punct\s](\*+)(?=[^punct\s])/,
rDelimUnd: /^[^_*]*?\*\*[^_*]*?_[^_*]*?(?=\*\*)|[^_]+(?=[^_])|(?!_)[punct](_+)(?=[\s]|$)|[^punct\s](_+)(?!_)(?=[punct\s]|$)|(?!_)[punct\s](_+)(?=[^punct\s])|[\s](_+)(?!_)(?=[punct])|(?!_)[punct](_+)(?!_)(?=[punct])/ // ^- Not allowed for _
},
code: /^(`+)([^`]|[^`][\s\S]*?[^`])\1(?!`)/,
br: /^( {2,}|\\)\n(?!\s*$)/,
del: noopTest,
text: /^(`+|[^`])(?:(?= {2,}\n)|[\s\S]*?(?:(?=[\\<!\[`*_]|\b_|$)|[^ ](?= {2,}\n)))/,
punctuation: /^((?![*_])[\spunctuation])/
};
// list of unicode punctuation marks, plus any missing characters from CommonMark spec
inline._punctuation = '\\p{P}$+<=>`^|~';
inline.punctuation = edit(inline.punctuation, 'u').replace(/punctuation/g, inline._punctuation).getRegex();
// sequences em should skip over [title](link), `code`, <html>
inline.blockSkip = /\[[^[\]]*?\]\([^\(\)]*?\)|`[^`]*?`|<[^<>]*?>/g;
inline.anyPunctuation = /\\[punct]/g;
inline._escapes = /\\([punct])/g;
inline._comment = edit(block._comment).replace('(?:-->|$)', '-->').getRegex();
inline.emStrong.lDelim = edit(inline.emStrong.lDelim, 'u')
.replace(/punct/g, inline._punctuation)
.getRegex();
inline.emStrong.rDelimAst = edit(inline.emStrong.rDelimAst, 'gu')
.replace(/punct/g, inline._punctuation)
.getRegex();
inline.emStrong.rDelimUnd = edit(inline.emStrong.rDelimUnd, 'gu')
.replace(/punct/g, inline._punctuation)
.getRegex();
inline.anyPunctuation = edit(inline.anyPunctuation, 'gu')
.replace(/punct/g, inline._punctuation)
.getRegex();
inline._escapes = edit(inline._escapes, 'gu')
.replace(/punct/g, inline._punctuation)
.getRegex();
inline._scheme = /[a-zA-Z][a-zA-Z0-9+.-]{1,31}/;
inline._email = /[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+(@)[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+(?![-_])/;
inline.autolink = edit(inline.autolink)
.replace('scheme', inline._scheme)
.replace('email', inline._email)
.getRegex();
inline._attribute = /\s+[a-zA-Z:_][\w.:-]*(?:\s*=\s*"[^"]*"|\s*=\s*'[^']*'|\s*=\s*[^\s"'=<>`]+)?/;
inline.tag = edit(inline.tag)
.replace('comment', inline._comment)
.replace('attribute', inline._attribute)
.getRegex();
inline._label = /(?:\[(?:\\.|[^\[\]\\])*\]|\\.|`[^`]*`|[^\[\]\\`])*?/;
inline._href = /<(?:\\.|[^\n<>\\])+>|[^\s\x00-\x1f]*/;
inline._title = /"(?:\\"?|[^"\\])*"|'(?:\\'?|[^'\\])*'|\((?:\\\)?|[^)\\])*\)/;
inline.link = edit(inline.link)
.replace('label', inline._label)
.replace('href', inline._href)
.replace('title', inline._title)
.getRegex();
inline.reflink = edit(inline.reflink)
.replace('label', inline._label)
.replace('ref', block._label)
.getRegex();
inline.nolink = edit(inline.nolink)
.replace('ref', block._label)
.getRegex();
inline.reflinkSearch = edit(inline.reflinkSearch, 'g')
.replace('reflink', inline.reflink)
.replace('nolink', inline.nolink)
.getRegex();
/**
* Normal Inline Grammar
*/
inline.normal = { ...inline };
/**
* Pedantic Inline Grammar
*/
inline.pedantic = {
...inline.normal,
strong: {
start: /^__|\*\*/,
middle: /^__(?=\S)([\s\S]*?\S)__(?!_)|^\*\*(?=\S)([\s\S]*?\S)\*\*(?!\*)/,
endAst: /\*\*(?!\*)/g,
endUnd: /__(?!_)/g
},
em: {
start: /^_|\*/,
middle: /^()\*(?=\S)([\s\S]*?\S)\*(?!\*)|^_(?=\S)([\s\S]*?\S)_(?!_)/,
endAst: /\*(?!\*)/g,
endUnd: /_(?!_)/g
},
link: edit(/^!?\[(label)\]\((.*?)\)/)
.replace('label', inline._label)
.getRegex(),
reflink: edit(/^!?\[(label)\]\s*\[([^\]]*)\]/)
.replace('label', inline._label)
.getRegex()
};
/**
* GFM Inline Grammar
*/
inline.gfm = {
...inline.normal,
escape: edit(inline.escape).replace('])', '~|])').getRegex(),
_extended_email: /[A-Za-z0-9._+-]+(@)[a-zA-Z0-9-_]+(?:\.[a-zA-Z0-9-_]*[a-zA-Z0-9])+(?![-_])/,
url: /^((?:ftp|https?):\/\/|www\.)(?:[a-zA-Z0-9\-]+\.?)+[^\s<]*|^email/,
_backpedal: /(?:[^?!.,:;*_'"~()&]+|\([^)]*\)|&(?![a-zA-Z0-9]+;$)|[?!.,:;*_'"~)]+(?!$))+/,
del: /^(~~?)(?=[^\s~])([\s\S]*?[^\s~])\1(?=[^~]|$)/,
text: /^([`~]+|[^`~])(?:(?= {2,}\n)|(?=[a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-]+@)|[\s\S]*?(?:(?=[\\<!\[`*~_]|\b_|https?:\/\/|ftp:\/\/|www\.|$)|[^ ](?= {2,}\n)|[^a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-](?=[a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-]+@)))/
};
inline.gfm.url = edit(inline.gfm.url, 'i')
.replace('email', inline.gfm._extended_email)
.getRegex();
/**
* GFM + Line Breaks Inline Grammar
*/
inline.breaks = {
...inline.gfm,
br: edit(inline.br).replace('{2,}', '*').getRegex(),
text: edit(inline.gfm.text)
.replace('\\b_', '\\b_| {2,}\\n')
.replace(/\{2,\}/g, '*')
.getRegex()
};
/**
* Block Lexer
*/
class _Lexer {
tokens;
options;
state;
tokenizer;
inlineQueue;
constructor(options) {
// TokenList cannot be created in one go
// @ts-expect-error
this.tokens = [];
this.tokens.links = Object.create(null);
this.options = options || exports.defaults;
this.options.tokenizer = this.options.tokenizer || new _Tokenizer();
this.tokenizer = this.options.tokenizer;
this.tokenizer.options = this.options;
this.tokenizer.lexer = this;
this.inlineQueue = [];
this.state = {
inLink: false,
inRawBlock: false,
top: true
};
const rules = {
block: block.normal,
inline: inline.normal
};
if (this.options.pedantic) {
rules.block = block.pedantic;
rules.inline = inline.pedantic;
}
else if (this.options.gfm) {
rules.block = block.gfm;
if (this.options.breaks) {
rules.inline = inline.breaks;
}
else {
rules.inline = inline.gfm;
}
}
this.tokenizer.rules = rules;
}
/**
* Expose Rules
*/
static get rules() {
return {
block,
inline
};
}
/**
* Static Lex Method
*/
static lex(src, options) {
const lexer = new _Lexer(options);
return lexer.lex(src);
}
/**
* Static Lex Inline Method
*/
static lexInline(src, options) {
const lexer = new _Lexer(options);
return lexer.inlineTokens(src);
}
/**
* Preprocessing
*/
lex(src) {
src = src
.replace(/\r\n|\r/g, '\n');
this.blockTokens(src, this.tokens);
let next;
while (next = this.inlineQueue.shift()) {
this.inlineTokens(next.src, next.tokens);
}
return this.tokens;
}
blockTokens(src, tokens = []) {
if (this.options.pedantic) {
src = src.replace(/\t/g, ' ').replace(/^ +$/gm, '');
}
else {
src = src.replace(/^( *)(\t+)/gm, (_, leading, tabs) => {
return leading + ' '.repeat(tabs.length);
});
}
let token;
let lastToken;
let cutSrc;
let lastParagraphClipped;
while (src) {
if (this.options.extensions
&& this.options.extensions.block
&& this.options.extensions.block.some((extTokenizer) => {
if (token = extTokenizer.call({ lexer: this }, src, tokens)) {
src = src.substring(token.raw.length);
tokens.push(token);
return true;
}
return false;
})) {
continue;
}
// newline
if (token = this.tokenizer.space(src)) {
src = src.substring(token.raw.length);
if (token.raw.length === 1 && tokens.length > 0) {
// if there's a single \n as a spacer, it's terminating the last line,
// so move it there so that we don't get unecessary paragraph tags
tokens[tokens.length - 1].raw += '\n';
}
else {
tokens.push(token);
}
continue;
}
// code
if (token = this.tokenizer.code(src)) {
src = src.substring(token.raw.length);
lastToken = tokens[tokens.length - 1];
// An indented code block cannot interrupt a paragraph.
if (lastToken && (lastToken.type === 'paragraph' || lastToken.type === 'text')) {
lastToken.raw += '\n' + token.raw;
lastToken.text += '\n' + token.text;
this.inlineQueue[this.inlineQueue.length - 1].src = lastToken.text;
}
else {
tokens.push(token);
}
continue;
}
// fences
if (token = this.tokenizer.fences(src)) {
src = src.substring(token.raw.length);
tokens.push(token);
continue;
}
// heading
if (token = this.tokenizer.heading(src)) {
src = src.substring(token.raw.length);
tokens.push(token);
continue;
}
// hr
if (token = this.tokenizer.hr(src)) {
src = src.substring(token.raw.length);
tokens.push(token);
continue;
}
// blockquote
if (token = this.tokenizer.blockquote(src)) {
src = src.substring(token.raw.length);
tokens.push(token);
continue;
}
// list
if (token = this.tokenizer.list(src)) {
src = src.substring(token.raw.length);
tokens.push(token);
continue;
}
// html
if (token = this.tokenizer.html(src)) {
src = src.substring(token.raw.length);
tokens.push(token);
continue;
}
// def
if (token = this.tokenizer.def(src)) {
src = src.substring(token.raw.length);
lastToken = tokens[tokens.length - 1];