UNPKG

marked

Version:

A markdown parser built for speed

1,365 lines (1,357 loc) 89.2 kB
/** * marked v9.0.0 - a markdown parser * Copyright (c) 2011-2023, Christopher Jeffrey. (MIT Licensed) * https://github.com/markedjs/marked */ /** * DO NOT EDIT THIS FILE * The code in this file is generated from files in ./src/ */ 'use strict'; /** * Gets the original marked default options. */ function _getDefaults() { return { async: false, breaks: false, extensions: null, gfm: true, hooks: null, pedantic: false, renderer: null, silent: false, tokenizer: null, walkTokens: null }; } exports.defaults = _getDefaults(); function changeDefaults(newDefaults) { exports.defaults = newDefaults; } /** * Helpers */ const escapeTest = /[&<>"']/; const escapeReplace = new RegExp(escapeTest.source, 'g'); const escapeTestNoEncode = /[<>"']|&(?!(#\d{1,7}|#[Xx][a-fA-F0-9]{1,6}|\w+);)/; const escapeReplaceNoEncode = new RegExp(escapeTestNoEncode.source, 'g'); const escapeReplacements = { '&': '&amp;', '<': '&lt;', '>': '&gt;', '"': '&quot;', "'": '&#39;' }; const getEscapeReplacement = (ch) => escapeReplacements[ch]; function escape(html, encode) { if (encode) { if (escapeTest.test(html)) { return html.replace(escapeReplace, getEscapeReplacement); } } else { if (escapeTestNoEncode.test(html)) { return html.replace(escapeReplaceNoEncode, getEscapeReplacement); } } return html; } const unescapeTest = /&(#(?:\d+)|(?:#x[0-9A-Fa-f]+)|(?:\w+));?/ig; function unescape(html) { // explicitly match decimal, hex, and named HTML entities return html.replace(unescapeTest, (_, n) => { n = n.toLowerCase(); if (n === 'colon') return ':'; if (n.charAt(0) === '#') { return n.charAt(1) === 'x' ? String.fromCharCode(parseInt(n.substring(2), 16)) : String.fromCharCode(+n.substring(1)); } return ''; }); } const caret = /(^|[^\[])\^/g; function edit(regex, opt) { regex = typeof regex === 'string' ? regex : regex.source; opt = opt || ''; const obj = { replace: (name, val) => { val = typeof val === 'object' && 'source' in val ? val.source : val; val = val.replace(caret, '$1'); regex = regex.replace(name, val); return obj; }, getRegex: () => { return new RegExp(regex, opt); } }; return obj; } function cleanUrl(href) { try { href = encodeURI(href).replace(/%25/g, '%'); } catch (e) { return null; } return href; } const noopTest = { exec: () => null }; function splitCells(tableRow, count) { // ensure that every cell-delimiting pipe has a space // before it to distinguish it from an escaped pipe const row = tableRow.replace(/\|/g, (match, offset, str) => { let escaped = false; let curr = offset; while (--curr >= 0 && str[curr] === '\\') escaped = !escaped; if (escaped) { // odd number of slashes means | is escaped // so we leave it alone return '|'; } else { // add space before unescaped | return ' |'; } }), cells = row.split(/ \|/); let i = 0; // First/last cell in a row cannot be empty if it has no leading/trailing pipe if (!cells[0].trim()) { cells.shift(); } if (cells.length > 0 && !cells[cells.length - 1].trim()) { cells.pop(); } if (count) { if (cells.length > count) { cells.splice(count); } else { while (cells.length < count) cells.push(''); } } for (; i < cells.length; i++) { // leading or trailing whitespace is ignored per the gfm spec cells[i] = cells[i].trim().replace(/\\\|/g, '|'); } return cells; } /** * Remove trailing 'c's. Equivalent to str.replace(/c*$/, ''). * /c*$/ is vulnerable to REDOS. * * @param str * @param c * @param invert Remove suffix of non-c chars instead. Default falsey. */ function rtrim(str, c, invert) { const l = str.length; if (l === 0) { return ''; } // Length of suffix matching the invert condition. let suffLen = 0; // Step left until we fail to match the invert condition. while (suffLen < l) { const currChar = str.charAt(l - suffLen - 1); if (currChar === c && !invert) { suffLen++; } else if (currChar !== c && invert) { suffLen++; } else { break; } } return str.slice(0, l - suffLen); } function findClosingBracket(str, b) { if (str.indexOf(b[1]) === -1) { return -1; } let level = 0; for (let i = 0; i < str.length; i++) { if (str[i] === '\\') { i++; } else if (str[i] === b[0]) { level++; } else if (str[i] === b[1]) { level--; if (level < 0) { return i; } } } return -1; } function outputLink(cap, link, raw, lexer) { const href = link.href; const title = link.title ? escape(link.title) : null; const text = cap[1].replace(/\\([\[\]])/g, '$1'); if (cap[0].charAt(0) !== '!') { lexer.state.inLink = true; const token = { type: 'link', raw, href, title, text, tokens: lexer.inlineTokens(text) }; lexer.state.inLink = false; return token; } return { type: 'image', raw, href, title, text: escape(text) }; } function indentCodeCompensation(raw, text) { const matchIndentToCode = raw.match(/^(\s+)(?:```)/); if (matchIndentToCode === null) { return text; } const indentToCode = matchIndentToCode[1]; return text .split('\n') .map(node => { const matchIndentInNode = node.match(/^\s+/); if (matchIndentInNode === null) { return node; } const [indentInNode] = matchIndentInNode; if (indentInNode.length >= indentToCode.length) { return node.slice(indentToCode.length); } return node; }) .join('\n'); } /** * Tokenizer */ class _Tokenizer { options; // TODO: Fix this rules type rules; lexer; constructor(options) { this.options = options || exports.defaults; } space(src) { const cap = this.rules.block.newline.exec(src); if (cap && cap[0].length > 0) { return { type: 'space', raw: cap[0] }; } } code(src) { const cap = this.rules.block.code.exec(src); if (cap) { const text = cap[0].replace(/^ {1,4}/gm, ''); return { type: 'code', raw: cap[0], codeBlockStyle: 'indented', text: !this.options.pedantic ? rtrim(text, '\n') : text }; } } fences(src) { const cap = this.rules.block.fences.exec(src); if (cap) { const raw = cap[0]; const text = indentCodeCompensation(raw, cap[3] || ''); return { type: 'code', raw, lang: cap[2] ? cap[2].trim().replace(this.rules.inline._escapes, '$1') : cap[2], text }; } } heading(src) { const cap = this.rules.block.heading.exec(src); if (cap) { let text = cap[2].trim(); // remove trailing #s if (/#$/.test(text)) { const trimmed = rtrim(text, '#'); if (this.options.pedantic) { text = trimmed.trim(); } else if (!trimmed || / $/.test(trimmed)) { // CommonMark requires space before trailing #s text = trimmed.trim(); } } return { type: 'heading', raw: cap[0], depth: cap[1].length, text, tokens: this.lexer.inline(text) }; } } hr(src) { const cap = this.rules.block.hr.exec(src); if (cap) { return { type: 'hr', raw: cap[0] }; } } blockquote(src) { const cap = this.rules.block.blockquote.exec(src); if (cap) { const text = cap[0].replace(/^ *>[ \t]?/gm, ''); const top = this.lexer.state.top; this.lexer.state.top = true; const tokens = this.lexer.blockTokens(text); this.lexer.state.top = top; return { type: 'blockquote', raw: cap[0], tokens, text }; } } list(src) { let cap = this.rules.block.list.exec(src); if (cap) { let bull = cap[1].trim(); const isordered = bull.length > 1; const list = { type: 'list', raw: '', ordered: isordered, start: isordered ? +bull.slice(0, -1) : '', loose: false, items: [] }; bull = isordered ? `\\d{1,9}\\${bull.slice(-1)}` : `\\${bull}`; if (this.options.pedantic) { bull = isordered ? bull : '[*+-]'; } // Get next list item const itemRegex = new RegExp(`^( {0,3}${bull})((?:[\t ][^\\n]*)?(?:\\n|$))`); let raw = ''; let itemContents = ''; let endsWithBlankLine = false; // Check if current bullet point can start a new List Item while (src) { let endEarly = false; if (!(cap = itemRegex.exec(src))) { break; } if (this.rules.block.hr.test(src)) { // End list if bullet was actually HR (possibly move into itemRegex?) break; } raw = cap[0]; src = src.substring(raw.length); let line = cap[2].split('\n', 1)[0].replace(/^\t+/, (t) => ' '.repeat(3 * t.length)); let nextLine = src.split('\n', 1)[0]; let indent = 0; if (this.options.pedantic) { indent = 2; itemContents = line.trimStart(); } else { indent = cap[2].search(/[^ ]/); // Find first non-space char indent = indent > 4 ? 1 : indent; // Treat indented code blocks (> 4 spaces) as having only 1 indent itemContents = line.slice(indent); indent += cap[1].length; } let blankLine = false; if (!line && /^ *$/.test(nextLine)) { // Items begin with at most one blank line raw += nextLine + '\n'; src = src.substring(nextLine.length + 1); endEarly = true; } if (!endEarly) { const nextBulletRegex = new RegExp(`^ {0,${Math.min(3, indent - 1)}}(?:[*+-]|\\d{1,9}[.)])((?:[ \t][^\\n]*)?(?:\\n|$))`); const hrRegex = new RegExp(`^ {0,${Math.min(3, indent - 1)}}((?:- *){3,}|(?:_ *){3,}|(?:\\* *){3,})(?:\\n+|$)`); const fencesBeginRegex = new RegExp(`^ {0,${Math.min(3, indent - 1)}}(?:\`\`\`|~~~)`); const headingBeginRegex = new RegExp(`^ {0,${Math.min(3, indent - 1)}}#`); // Check if following lines should be included in List Item while (src) { const rawLine = src.split('\n', 1)[0]; nextLine = rawLine; // Re-align to follow commonmark nesting rules if (this.options.pedantic) { nextLine = nextLine.replace(/^ {1,4}(?=( {4})*[^ ])/g, ' '); } // End list item if found code fences if (fencesBeginRegex.test(nextLine)) { break; } // End list item if found start of new heading if (headingBeginRegex.test(nextLine)) { break; } // End list item if found start of new bullet if (nextBulletRegex.test(nextLine)) { break; } // Horizontal rule found if (hrRegex.test(src)) { break; } if (nextLine.search(/[^ ]/) >= indent || !nextLine.trim()) { // Dedent if possible itemContents += '\n' + nextLine.slice(indent); } else { // not enough indentation if (blankLine) { break; } // paragraph continuation unless last line was a different block level element if (line.search(/[^ ]/) >= 4) { // indented code block break; } if (fencesBeginRegex.test(line)) { break; } if (headingBeginRegex.test(line)) { break; } if (hrRegex.test(line)) { break; } itemContents += '\n' + nextLine; } if (!blankLine && !nextLine.trim()) { // Check if current line is blank blankLine = true; } raw += rawLine + '\n'; src = src.substring(rawLine.length + 1); line = nextLine.slice(indent); } } if (!list.loose) { // If the previous item ended with a blank line, the list is loose if (endsWithBlankLine) { list.loose = true; } else if (/\n *\n *$/.test(raw)) { endsWithBlankLine = true; } } let istask = null; let ischecked; // Check for task list items if (this.options.gfm) { istask = /^\[[ xX]\] /.exec(itemContents); if (istask) { ischecked = istask[0] !== '[ ] '; itemContents = itemContents.replace(/^\[[ xX]\] +/, ''); } } list.items.push({ type: 'list_item', raw, task: !!istask, checked: ischecked, loose: false, text: itemContents, tokens: [] }); list.raw += raw; } // Do not consume newlines at end of final item. Alternatively, make itemRegex *start* with any newlines to simplify/speed up endsWithBlankLine logic list.items[list.items.length - 1].raw = raw.trimEnd(); list.items[list.items.length - 1].text = itemContents.trimEnd(); list.raw = list.raw.trimEnd(); // Item child tokens handled here at end because we needed to have the final item to trim it first for (let i = 0; i < list.items.length; i++) { this.lexer.state.top = false; list.items[i].tokens = this.lexer.blockTokens(list.items[i].text, []); if (!list.loose) { // Check if list should be loose const spacers = list.items[i].tokens.filter(t => t.type === 'space'); const hasMultipleLineBreaks = spacers.length > 0 && spacers.some(t => /\n.*\n/.test(t.raw)); list.loose = hasMultipleLineBreaks; } } // Set all items to loose if list is loose if (list.loose) { for (let i = 0; i < list.items.length; i++) { list.items[i].loose = true; } } return list; } } html(src) { const cap = this.rules.block.html.exec(src); if (cap) { const token = { type: 'html', block: true, raw: cap[0], pre: cap[1] === 'pre' || cap[1] === 'script' || cap[1] === 'style', text: cap[0] }; return token; } } def(src) { const cap = this.rules.block.def.exec(src); if (cap) { const tag = cap[1].toLowerCase().replace(/\s+/g, ' '); const href = cap[2] ? cap[2].replace(/^<(.*)>$/, '$1').replace(this.rules.inline._escapes, '$1') : ''; const title = cap[3] ? cap[3].substring(1, cap[3].length - 1).replace(this.rules.inline._escapes, '$1') : cap[3]; return { type: 'def', tag, raw: cap[0], href, title }; } } table(src) { const cap = this.rules.block.table.exec(src); if (cap) { const item = { type: 'table', raw: cap[0], header: splitCells(cap[1]).map(c => { return { text: c, tokens: [] }; }), align: cap[2].replace(/^ *|\| *$/g, '').split(/ *\| */), rows: cap[3] && cap[3].trim() ? cap[3].replace(/\n[ \t]*$/, '').split('\n') : [] }; if (item.header.length === item.align.length) { let l = item.align.length; let i, j, k, row; for (i = 0; i < l; i++) { const align = item.align[i]; if (align) { if (/^ *-+: *$/.test(align)) { item.align[i] = 'right'; } else if (/^ *:-+: *$/.test(align)) { item.align[i] = 'center'; } else if (/^ *:-+ *$/.test(align)) { item.align[i] = 'left'; } else { item.align[i] = null; } } } l = item.rows.length; for (i = 0; i < l; i++) { item.rows[i] = splitCells(item.rows[i], item.header.length).map(c => { return { text: c, tokens: [] }; }); } // parse child tokens inside headers and cells // header child tokens l = item.header.length; for (j = 0; j < l; j++) { item.header[j].tokens = this.lexer.inline(item.header[j].text); } // cell child tokens l = item.rows.length; for (j = 0; j < l; j++) { row = item.rows[j]; for (k = 0; k < row.length; k++) { row[k].tokens = this.lexer.inline(row[k].text); } } return item; } } } lheading(src) { const cap = this.rules.block.lheading.exec(src); if (cap) { return { type: 'heading', raw: cap[0], depth: cap[2].charAt(0) === '=' ? 1 : 2, text: cap[1], tokens: this.lexer.inline(cap[1]) }; } } paragraph(src) { const cap = this.rules.block.paragraph.exec(src); if (cap) { const text = cap[1].charAt(cap[1].length - 1) === '\n' ? cap[1].slice(0, -1) : cap[1]; return { type: 'paragraph', raw: cap[0], text, tokens: this.lexer.inline(text) }; } } text(src) { const cap = this.rules.block.text.exec(src); if (cap) { return { type: 'text', raw: cap[0], text: cap[0], tokens: this.lexer.inline(cap[0]) }; } } escape(src) { const cap = this.rules.inline.escape.exec(src); if (cap) { return { type: 'escape', raw: cap[0], text: escape(cap[1]) }; } } tag(src) { const cap = this.rules.inline.tag.exec(src); if (cap) { if (!this.lexer.state.inLink && /^<a /i.test(cap[0])) { this.lexer.state.inLink = true; } else if (this.lexer.state.inLink && /^<\/a>/i.test(cap[0])) { this.lexer.state.inLink = false; } if (!this.lexer.state.inRawBlock && /^<(pre|code|kbd|script)(\s|>)/i.test(cap[0])) { this.lexer.state.inRawBlock = true; } else if (this.lexer.state.inRawBlock && /^<\/(pre|code|kbd|script)(\s|>)/i.test(cap[0])) { this.lexer.state.inRawBlock = false; } return { type: 'html', raw: cap[0], inLink: this.lexer.state.inLink, inRawBlock: this.lexer.state.inRawBlock, block: false, text: cap[0] }; } } link(src) { const cap = this.rules.inline.link.exec(src); if (cap) { const trimmedUrl = cap[2].trim(); if (!this.options.pedantic && /^</.test(trimmedUrl)) { // commonmark requires matching angle brackets if (!(/>$/.test(trimmedUrl))) { return; } // ending angle bracket cannot be escaped const rtrimSlash = rtrim(trimmedUrl.slice(0, -1), '\\'); if ((trimmedUrl.length - rtrimSlash.length) % 2 === 0) { return; } } else { // find closing parenthesis const lastParenIndex = findClosingBracket(cap[2], '()'); if (lastParenIndex > -1) { const start = cap[0].indexOf('!') === 0 ? 5 : 4; const linkLen = start + cap[1].length + lastParenIndex; cap[2] = cap[2].substring(0, lastParenIndex); cap[0] = cap[0].substring(0, linkLen).trim(); cap[3] = ''; } } let href = cap[2]; let title = ''; if (this.options.pedantic) { // split pedantic href and title const link = /^([^'"]*[^\s])\s+(['"])(.*)\2/.exec(href); if (link) { href = link[1]; title = link[3]; } } else { title = cap[3] ? cap[3].slice(1, -1) : ''; } href = href.trim(); if (/^</.test(href)) { if (this.options.pedantic && !(/>$/.test(trimmedUrl))) { // pedantic allows starting angle bracket without ending angle bracket href = href.slice(1); } else { href = href.slice(1, -1); } } return outputLink(cap, { href: href ? href.replace(this.rules.inline._escapes, '$1') : href, title: title ? title.replace(this.rules.inline._escapes, '$1') : title }, cap[0], this.lexer); } } reflink(src, links) { let cap; if ((cap = this.rules.inline.reflink.exec(src)) || (cap = this.rules.inline.nolink.exec(src))) { let link = (cap[2] || cap[1]).replace(/\s+/g, ' '); link = links[link.toLowerCase()]; if (!link) { const text = cap[0].charAt(0); return { type: 'text', raw: text, text }; } return outputLink(cap, link, cap[0], this.lexer); } } emStrong(src, maskedSrc, prevChar = '') { let match = this.rules.inline.emStrong.lDelim.exec(src); if (!match) return; // _ can't be between two alphanumerics. \p{L}\p{N} includes non-english alphabet/numbers as well if (match[3] && prevChar.match(/[\p{L}\p{N}]/u)) return; const nextChar = match[1] || match[2] || ''; if (!nextChar || !prevChar || this.rules.inline.punctuation.exec(prevChar)) { // unicode Regex counts emoji as 1 char; spread into array for proper count (used multiple times below) const lLength = [...match[0]].length - 1; let rDelim, rLength, delimTotal = lLength, midDelimTotal = 0; const endReg = match[0][0] === '*' ? this.rules.inline.emStrong.rDelimAst : this.rules.inline.emStrong.rDelimUnd; endReg.lastIndex = 0; // Clip maskedSrc to same section of string as src (move to lexer?) maskedSrc = maskedSrc.slice(-1 * src.length + lLength); while ((match = endReg.exec(maskedSrc)) != null) { rDelim = match[1] || match[2] || match[3] || match[4] || match[5] || match[6]; if (!rDelim) continue; // skip single * in __abc*abc__ rLength = [...rDelim].length; if (match[3] || match[4]) { // found another Left Delim delimTotal += rLength; continue; } else if (match[5] || match[6]) { // either Left or Right Delim if (lLength % 3 && !((lLength + rLength) % 3)) { midDelimTotal += rLength; continue; // CommonMark Emphasis Rules 9-10 } } delimTotal -= rLength; if (delimTotal > 0) continue; // Haven't found enough closing delimiters // Remove extra characters. *a*** -> *a* rLength = Math.min(rLength, rLength + delimTotal + midDelimTotal); const raw = [...src].slice(0, lLength + match.index + rLength + 1).join(''); // Create `em` if smallest delimiter has odd char count. *a*** if (Math.min(lLength, rLength) % 2) { const text = raw.slice(1, -1); return { type: 'em', raw, text, tokens: this.lexer.inlineTokens(text) }; } // Create 'strong' if smallest delimiter has even char count. **a*** const text = raw.slice(2, -2); return { type: 'strong', raw, text, tokens: this.lexer.inlineTokens(text) }; } } } codespan(src) { const cap = this.rules.inline.code.exec(src); if (cap) { let text = cap[2].replace(/\n/g, ' '); const hasNonSpaceChars = /[^ ]/.test(text); const hasSpaceCharsOnBothEnds = /^ /.test(text) && / $/.test(text); if (hasNonSpaceChars && hasSpaceCharsOnBothEnds) { text = text.substring(1, text.length - 1); } text = escape(text, true); return { type: 'codespan', raw: cap[0], text }; } } br(src) { const cap = this.rules.inline.br.exec(src); if (cap) { return { type: 'br', raw: cap[0] }; } } del(src) { const cap = this.rules.inline.del.exec(src); if (cap) { return { type: 'del', raw: cap[0], text: cap[2], tokens: this.lexer.inlineTokens(cap[2]) }; } } autolink(src) { const cap = this.rules.inline.autolink.exec(src); if (cap) { let text, href; if (cap[2] === '@') { text = escape(cap[1]); href = 'mailto:' + text; } else { text = escape(cap[1]); href = text; } return { type: 'link', raw: cap[0], text, href, tokens: [ { type: 'text', raw: text, text } ] }; } } url(src) { let cap; if (cap = this.rules.inline.url.exec(src)) { let text, href; if (cap[2] === '@') { text = escape(cap[0]); href = 'mailto:' + text; } else { // do extended autolink path validation let prevCapZero; do { prevCapZero = cap[0]; cap[0] = this.rules.inline._backpedal.exec(cap[0])[0]; } while (prevCapZero !== cap[0]); text = escape(cap[0]); if (cap[1] === 'www.') { href = 'http://' + cap[0]; } else { href = cap[0]; } } return { type: 'link', raw: cap[0], text, href, tokens: [ { type: 'text', raw: text, text } ] }; } } inlineText(src) { const cap = this.rules.inline.text.exec(src); if (cap) { let text; if (this.lexer.state.inRawBlock) { text = cap[0]; } else { text = escape(cap[0]); } return { type: 'text', raw: cap[0], text }; } } } /** * Block-Level Grammar */ // Not all rules are defined in the object literal // @ts-expect-error const block = { newline: /^(?: *(?:\n|$))+/, code: /^( {4}[^\n]+(?:\n(?: *(?:\n|$))*)?)+/, fences: /^ {0,3}(`{3,}(?=[^`\n]*(?:\n|$))|~{3,})([^\n]*)(?:\n|$)(?:|([\s\S]*?)(?:\n|$))(?: {0,3}\1[~`]* *(?=\n|$)|$)/, hr: /^ {0,3}((?:-[\t ]*){3,}|(?:_[ \t]*){3,}|(?:\*[ \t]*){3,})(?:\n+|$)/, heading: /^ {0,3}(#{1,6})(?=\s|$)(.*)(?:\n+|$)/, blockquote: /^( {0,3}> ?(paragraph|[^\n]*)(?:\n|$))+/, list: /^( {0,3}bull)([ \t][^\n]+?)?(?:\n|$)/, html: '^ {0,3}(?:' // optional indentation + '<(script|pre|style|textarea)[\\s>][\\s\\S]*?(?:</\\1>[^\\n]*\\n+|$)' // (1) + '|comment[^\\n]*(\\n+|$)' // (2) + '|<\\?[\\s\\S]*?(?:\\?>\\n*|$)' // (3) + '|<![A-Z][\\s\\S]*?(?:>\\n*|$)' // (4) + '|<!\\[CDATA\\[[\\s\\S]*?(?:\\]\\]>\\n*|$)' // (5) + '|</?(tag)(?: +|\\n|/?>)[\\s\\S]*?(?:(?:\\n *)+\\n|$)' // (6) + '|<(?!script|pre|style|textarea)([a-z][\\w-]*)(?:attribute)*? */?>(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:(?:\\n *)+\\n|$)' // (7) open tag + '|</(?!script|pre|style|textarea)[a-z][\\w-]*\\s*>(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:(?:\\n *)+\\n|$)' // (7) closing tag + ')', def: /^ {0,3}\[(label)\]: *(?:\n *)?([^<\s][^\s]*|<.*?>)(?:(?: +(?:\n *)?| *\n *)(title))? *(?:\n+|$)/, table: noopTest, lheading: /^((?:(?!^bull ).|\n(?!\n|bull ))+?)\n {0,3}(=+|-+) *(?:\n+|$)/, // regex template, placeholders will be replaced according to different paragraph // interruption rules of commonmark and the original markdown spec: _paragraph: /^([^\n]+(?:\n(?!hr|heading|lheading|blockquote|fences|list|html|table| +\n)[^\n]+)*)/, text: /^[^\n]+/ }; block._label = /(?!\s*\])(?:\\.|[^\[\]\\])+/; block._title = /(?:"(?:\\"?|[^"\\])*"|'[^'\n]*(?:\n[^'\n]+)*\n?'|\([^()]*\))/; block.def = edit(block.def) .replace('label', block._label) .replace('title', block._title) .getRegex(); block.bullet = /(?:[*+-]|\d{1,9}[.)])/; block.listItemStart = edit(/^( *)(bull) */) .replace('bull', block.bullet) .getRegex(); block.list = edit(block.list) .replace(/bull/g, block.bullet) .replace('hr', '\\n+(?=\\1?(?:(?:- *){3,}|(?:_ *){3,}|(?:\\* *){3,})(?:\\n+|$))') .replace('def', '\\n+(?=' + block.def.source + ')') .getRegex(); block._tag = 'address|article|aside|base|basefont|blockquote|body|caption' + '|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption' + '|figure|footer|form|frame|frameset|h[1-6]|head|header|hr|html|iframe' + '|legend|li|link|main|menu|menuitem|meta|nav|noframes|ol|optgroup|option' + '|p|param|section|source|summary|table|tbody|td|tfoot|th|thead|title|tr' + '|track|ul'; block._comment = /<!--(?!-?>)[\s\S]*?(?:-->|$)/; block.html = edit(block.html, 'i') .replace('comment', block._comment) .replace('tag', block._tag) .replace('attribute', / +[a-zA-Z:_][\w.:-]*(?: *= *"[^"\n]*"| *= *'[^'\n]*'| *= *[^\s"'=<>`]+)?/) .getRegex(); block.lheading = edit(block.lheading) .replace(/bull/g, block.bullet) // lists can interrupt .getRegex(); block.paragraph = edit(block._paragraph) .replace('hr', block.hr) .replace('heading', ' {0,3}#{1,6} ') .replace('|lheading', '') // setex headings don't interrupt commonmark paragraphs .replace('|table', '') .replace('blockquote', ' {0,3}>') .replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n') .replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt .replace('html', '</?(?:tag)(?: +|\\n|/?>)|<(?:script|pre|style|textarea|!--)') .replace('tag', block._tag) // pars can be interrupted by type (6) html blocks .getRegex(); block.blockquote = edit(block.blockquote) .replace('paragraph', block.paragraph) .getRegex(); /** * Normal Block Grammar */ block.normal = { ...block }; /** * GFM Block Grammar */ block.gfm = { ...block.normal, table: '^ *([^\\n ].*\\|.*)\\n' // Header + ' {0,3}(?:\\| *)?(:?-+:? *(?:\\| *:?-+:? *)*)(?:\\| *)?' // Align + '(?:\\n((?:(?! *\\n|hr|heading|blockquote|code|fences|list|html).*(?:\\n|$))*)\\n*|$)' // Cells }; block.gfm.table = edit(block.gfm.table) .replace('hr', block.hr) .replace('heading', ' {0,3}#{1,6} ') .replace('blockquote', ' {0,3}>') .replace('code', ' {4}[^\\n]') .replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n') .replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt .replace('html', '</?(?:tag)(?: +|\\n|/?>)|<(?:script|pre|style|textarea|!--)') .replace('tag', block._tag) // tables can be interrupted by type (6) html blocks .getRegex(); block.gfm.paragraph = edit(block._paragraph) .replace('hr', block.hr) .replace('heading', ' {0,3}#{1,6} ') .replace('|lheading', '') // setex headings don't interrupt commonmark paragraphs .replace('table', block.gfm.table) // interrupt paragraphs with table .replace('blockquote', ' {0,3}>') .replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n') .replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt .replace('html', '</?(?:tag)(?: +|\\n|/?>)|<(?:script|pre|style|textarea|!--)') .replace('tag', block._tag) // pars can be interrupted by type (6) html blocks .getRegex(); /** * Pedantic grammar (original John Gruber's loose markdown specification) */ block.pedantic = { ...block.normal, html: edit('^ *(?:comment *(?:\\n|\\s*$)' + '|<(tag)[\\s\\S]+?</\\1> *(?:\\n{2,}|\\s*$)' // closed tag + '|<tag(?:"[^"]*"|\'[^\']*\'|\\s[^\'"/>\\s]*)*?/?> *(?:\\n{2,}|\\s*$))') .replace('comment', block._comment) .replace(/tag/g, '(?!(?:' + 'a|em|strong|small|s|cite|q|dfn|abbr|data|time|code|var|samp|kbd|sub' + '|sup|i|b|u|mark|ruby|rt|rp|bdi|bdo|span|br|wbr|ins|del|img)' + '\\b)\\w+(?!:|[^\\w\\s@]*@)\\b') .getRegex(), def: /^ *\[([^\]]+)\]: *<?([^\s>]+)>?(?: +(["(][^\n]+[")]))? *(?:\n+|$)/, heading: /^(#{1,6})(.*)(?:\n+|$)/, fences: noopTest, lheading: /^(.+?)\n {0,3}(=+|-+) *(?:\n+|$)/, paragraph: edit(block.normal._paragraph) .replace('hr', block.hr) .replace('heading', ' *#{1,6} *[^\n]') .replace('lheading', block.lheading) .replace('blockquote', ' {0,3}>') .replace('|fences', '') .replace('|list', '') .replace('|html', '') .getRegex() }; /** * Inline-Level Grammar */ // Not all rules are defined in the object literal // @ts-expect-error const inline = { escape: /^\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/, autolink: /^<(scheme:[^\s\x00-\x1f<>]*|email)>/, url: noopTest, tag: '^comment' + '|^</[a-zA-Z][\\w:-]*\\s*>' // self-closing tag + '|^<[a-zA-Z][\\w-]*(?:attribute)*?\\s*/?>' // open tag + '|^<\\?[\\s\\S]*?\\?>' // processing instruction, e.g. <?php ?> + '|^<![a-zA-Z]+\\s[\\s\\S]*?>' // declaration, e.g. <!DOCTYPE html> + '|^<!\\[CDATA\\[[\\s\\S]*?\\]\\]>', link: /^!?\[(label)\]\(\s*(href)(?:\s+(title))?\s*\)/, reflink: /^!?\[(label)\]\[(ref)\]/, nolink: /^!?\[(ref)\](?:\[\])?/, reflinkSearch: 'reflink|nolink(?!\\()', emStrong: { lDelim: /^(?:\*+(?:((?!\*)[punct])|[^\s*]))|^_+(?:((?!_)[punct])|([^\s_]))/, // (1) and (2) can only be a Right Delimiter. (3) and (4) can only be Left. (5) and (6) can be either Left or Right. // | Skip orphan inside strong | Consume to delim | (1) #*** | (2) a***#, a*** | (3) #***a, ***a | (4) ***# | (5) #***# | (6) a***a rDelimAst: /^[^_*]*?__[^_*]*?\*[^_*]*?(?=__)|[^*]+(?=[^*])|(?!\*)[punct](\*+)(?=[\s]|$)|[^punct\s](\*+)(?!\*)(?=[punct\s]|$)|(?!\*)[punct\s](\*+)(?=[^punct\s])|[\s](\*+)(?!\*)(?=[punct])|(?!\*)[punct](\*+)(?!\*)(?=[punct])|[^punct\s](\*+)(?=[^punct\s])/, rDelimUnd: /^[^_*]*?\*\*[^_*]*?_[^_*]*?(?=\*\*)|[^_]+(?=[^_])|(?!_)[punct](_+)(?=[\s]|$)|[^punct\s](_+)(?!_)(?=[punct\s]|$)|(?!_)[punct\s](_+)(?=[^punct\s])|[\s](_+)(?!_)(?=[punct])|(?!_)[punct](_+)(?!_)(?=[punct])/ // ^- Not allowed for _ }, code: /^(`+)([^`]|[^`][\s\S]*?[^`])\1(?!`)/, br: /^( {2,}|\\)\n(?!\s*$)/, del: noopTest, text: /^(`+|[^`])(?:(?= {2,}\n)|[\s\S]*?(?:(?=[\\<!\[`*_]|\b_|$)|[^ ](?= {2,}\n)))/, punctuation: /^((?![*_])[\spunctuation])/ }; // list of unicode punctuation marks, plus any missing characters from CommonMark spec inline._punctuation = '\\p{P}$+<=>`^|~'; inline.punctuation = edit(inline.punctuation, 'u').replace(/punctuation/g, inline._punctuation).getRegex(); // sequences em should skip over [title](link), `code`, <html> inline.blockSkip = /\[[^[\]]*?\]\([^\(\)]*?\)|`[^`]*?`|<[^<>]*?>/g; inline.anyPunctuation = /\\[punct]/g; inline._escapes = /\\([punct])/g; inline._comment = edit(block._comment).replace('(?:-->|$)', '-->').getRegex(); inline.emStrong.lDelim = edit(inline.emStrong.lDelim, 'u') .replace(/punct/g, inline._punctuation) .getRegex(); inline.emStrong.rDelimAst = edit(inline.emStrong.rDelimAst, 'gu') .replace(/punct/g, inline._punctuation) .getRegex(); inline.emStrong.rDelimUnd = edit(inline.emStrong.rDelimUnd, 'gu') .replace(/punct/g, inline._punctuation) .getRegex(); inline.anyPunctuation = edit(inline.anyPunctuation, 'gu') .replace(/punct/g, inline._punctuation) .getRegex(); inline._escapes = edit(inline._escapes, 'gu') .replace(/punct/g, inline._punctuation) .getRegex(); inline._scheme = /[a-zA-Z][a-zA-Z0-9+.-]{1,31}/; inline._email = /[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+(@)[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+(?![-_])/; inline.autolink = edit(inline.autolink) .replace('scheme', inline._scheme) .replace('email', inline._email) .getRegex(); inline._attribute = /\s+[a-zA-Z:_][\w.:-]*(?:\s*=\s*"[^"]*"|\s*=\s*'[^']*'|\s*=\s*[^\s"'=<>`]+)?/; inline.tag = edit(inline.tag) .replace('comment', inline._comment) .replace('attribute', inline._attribute) .getRegex(); inline._label = /(?:\[(?:\\.|[^\[\]\\])*\]|\\.|`[^`]*`|[^\[\]\\`])*?/; inline._href = /<(?:\\.|[^\n<>\\])+>|[^\s\x00-\x1f]*/; inline._title = /"(?:\\"?|[^"\\])*"|'(?:\\'?|[^'\\])*'|\((?:\\\)?|[^)\\])*\)/; inline.link = edit(inline.link) .replace('label', inline._label) .replace('href', inline._href) .replace('title', inline._title) .getRegex(); inline.reflink = edit(inline.reflink) .replace('label', inline._label) .replace('ref', block._label) .getRegex(); inline.nolink = edit(inline.nolink) .replace('ref', block._label) .getRegex(); inline.reflinkSearch = edit(inline.reflinkSearch, 'g') .replace('reflink', inline.reflink) .replace('nolink', inline.nolink) .getRegex(); /** * Normal Inline Grammar */ inline.normal = { ...inline }; /** * Pedantic Inline Grammar */ inline.pedantic = { ...inline.normal, strong: { start: /^__|\*\*/, middle: /^__(?=\S)([\s\S]*?\S)__(?!_)|^\*\*(?=\S)([\s\S]*?\S)\*\*(?!\*)/, endAst: /\*\*(?!\*)/g, endUnd: /__(?!_)/g }, em: { start: /^_|\*/, middle: /^()\*(?=\S)([\s\S]*?\S)\*(?!\*)|^_(?=\S)([\s\S]*?\S)_(?!_)/, endAst: /\*(?!\*)/g, endUnd: /_(?!_)/g }, link: edit(/^!?\[(label)\]\((.*?)\)/) .replace('label', inline._label) .getRegex(), reflink: edit(/^!?\[(label)\]\s*\[([^\]]*)\]/) .replace('label', inline._label) .getRegex() }; /** * GFM Inline Grammar */ inline.gfm = { ...inline.normal, escape: edit(inline.escape).replace('])', '~|])').getRegex(), _extended_email: /[A-Za-z0-9._+-]+(@)[a-zA-Z0-9-_]+(?:\.[a-zA-Z0-9-_]*[a-zA-Z0-9])+(?![-_])/, url: /^((?:ftp|https?):\/\/|www\.)(?:[a-zA-Z0-9\-]+\.?)+[^\s<]*|^email/, _backpedal: /(?:[^?!.,:;*_'"~()&]+|\([^)]*\)|&(?![a-zA-Z0-9]+;$)|[?!.,:;*_'"~)]+(?!$))+/, del: /^(~~?)(?=[^\s~])([\s\S]*?[^\s~])\1(?=[^~]|$)/, text: /^([`~]+|[^`~])(?:(?= {2,}\n)|(?=[a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-]+@)|[\s\S]*?(?:(?=[\\<!\[`*~_]|\b_|https?:\/\/|ftp:\/\/|www\.|$)|[^ ](?= {2,}\n)|[^a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-](?=[a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-]+@)))/ }; inline.gfm.url = edit(inline.gfm.url, 'i') .replace('email', inline.gfm._extended_email) .getRegex(); /** * GFM + Line Breaks Inline Grammar */ inline.breaks = { ...inline.gfm, br: edit(inline.br).replace('{2,}', '*').getRegex(), text: edit(inline.gfm.text) .replace('\\b_', '\\b_| {2,}\\n') .replace(/\{2,\}/g, '*') .getRegex() }; /** * Block Lexer */ class _Lexer { tokens; options; state; tokenizer; inlineQueue; constructor(options) { // TokenList cannot be created in one go // @ts-expect-error this.tokens = []; this.tokens.links = Object.create(null); this.options = options || exports.defaults; this.options.tokenizer = this.options.tokenizer || new _Tokenizer(); this.tokenizer = this.options.tokenizer; this.tokenizer.options = this.options; this.tokenizer.lexer = this; this.inlineQueue = []; this.state = { inLink: false, inRawBlock: false, top: true }; const rules = { block: block.normal, inline: inline.normal }; if (this.options.pedantic) { rules.block = block.pedantic; rules.inline = inline.pedantic; } else if (this.options.gfm) { rules.block = block.gfm; if (this.options.breaks) { rules.inline = inline.breaks; } else { rules.inline = inline.gfm; } } this.tokenizer.rules = rules; } /** * Expose Rules */ static get rules() { return { block, inline }; } /** * Static Lex Method */ static lex(src, options) { const lexer = new _Lexer(options); return lexer.lex(src); } /** * Static Lex Inline Method */ static lexInline(src, options) { const lexer = new _Lexer(options); return lexer.inlineTokens(src); } /** * Preprocessing */ lex(src) { src = src .replace(/\r\n|\r/g, '\n'); this.blockTokens(src, this.tokens); let next; while (next = this.inlineQueue.shift()) { this.inlineTokens(next.src, next.tokens); } return this.tokens; } blockTokens(src, tokens = []) { if (this.options.pedantic) { src = src.replace(/\t/g, ' ').replace(/^ +$/gm, ''); } else { src = src.replace(/^( *)(\t+)/gm, (_, leading, tabs) => { return leading + ' '.repeat(tabs.length); }); } let token; let lastToken; let cutSrc; let lastParagraphClipped; while (src) { if (this.options.extensions && this.options.extensions.block && this.options.extensions.block.some((extTokenizer) => { if (token = extTokenizer.call({ lexer: this }, src, tokens)) { src = src.substring(token.raw.length); tokens.push(token); return true; } return false; })) { continue; } // newline if (token = this.tokenizer.space(src)) { src = src.substring(token.raw.length); if (token.raw.length === 1 && tokens.length > 0) { // if there's a single \n as a spacer, it's terminating the last line, // so move it there so that we don't get unecessary paragraph tags tokens[tokens.length - 1].raw += '\n'; } else { tokens.push(token); } continue; } // code if (token = this.tokenizer.code(src)) { src = src.substring(token.raw.length); lastToken = tokens[tokens.length - 1]; // An indented code block cannot interrupt a paragraph. if (lastToken && (lastToken.type === 'paragraph' || lastToken.type === 'text')) { lastToken.raw += '\n' + token.raw; lastToken.text += '\n' + token.text; this.inlineQueue[this.inlineQueue.length - 1].src = lastToken.text; } else { tokens.push(token); } continue; } // fences if (token = this.tokenizer.fences(src)) { src = src.substring(token.raw.length); tokens.push(token); continue; } // heading if (token = this.tokenizer.heading(src)) { src = src.substring(token.raw.length); tokens.push(token); continue; } // hr if (token = this.tokenizer.hr(src)) { src = src.substring(token.raw.length); tokens.push(token); continue; } // blockquote if (token = this.tokenizer.blockquote(src)) { src = src.substring(token.raw.length); tokens.push(token); continue; } // list if (token = this.tokenizer.list(src)) { src = src.substring(token.raw.length); tokens.push(token); continue; } // html if (token = this.tokenizer.html(src)) { src = src.substring(token.raw.length); tokens.push(token); continue; } // def if (token = this.tokenizer.def(src)) { src = src.substring(token.raw.length); lastToken = tokens[tokens.length - 1];