UNPKG

marked

Version:

A markdown parser built for speed

1,157 lines (1,151 loc) 108 kB
/** * marked v15.0.11 - a markdown parser * Copyright (c) 2011-2025, Christopher Jeffrey. (MIT Licensed) * https://github.com/markedjs/marked */ /** * DO NOT EDIT THIS FILE * The code in this file is generated from files in ./src/ */ (function (global, factory) { typeof exports === 'object' && typeof module !== 'undefined' ? factory(exports) : typeof define === 'function' && define.amd ? define(['exports'], factory) : (global = typeof globalThis !== 'undefined' ? globalThis : global || self, factory(global.marked = {})); })(this, (function (exports) { 'use strict'; /** * Gets the original marked default options. */ function _getDefaults() { return { async: false, breaks: false, extensions: null, gfm: true, hooks: null, pedantic: false, renderer: null, silent: false, tokenizer: null, walkTokens: null, }; } exports.defaults = _getDefaults(); function changeDefaults(newDefaults) { exports.defaults = newDefaults; } const noopTest = { exec: () => null }; function edit(regex, opt = '') { let source = typeof regex === 'string' ? regex : regex.source; const obj = { replace: (name, val) => { let valSource = typeof val === 'string' ? val : val.source; valSource = valSource.replace(other.caret, '$1'); source = source.replace(name, valSource); return obj; }, getRegex: () => { return new RegExp(source, opt); }, }; return obj; } const other = { codeRemoveIndent: /^(?: {1,4}| {0,3}\t)/gm, outputLinkReplace: /\\([\[\]])/g, indentCodeCompensation: /^(\s+)(?:```)/, beginningSpace: /^\s+/, endingHash: /#$/, startingSpaceChar: /^ /, endingSpaceChar: / $/, nonSpaceChar: /[^ ]/, newLineCharGlobal: /\n/g, tabCharGlobal: /\t/g, multipleSpaceGlobal: /\s+/g, blankLine: /^[ \t]*$/, doubleBlankLine: /\n[ \t]*\n[ \t]*$/, blockquoteStart: /^ {0,3}>/, blockquoteSetextReplace: /\n {0,3}((?:=+|-+) *)(?=\n|$)/g, blockquoteSetextReplace2: /^ {0,3}>[ \t]?/gm, listReplaceTabs: /^\t+/, listReplaceNesting: /^ {1,4}(?=( {4})*[^ ])/g, listIsTask: /^\[[ xX]\] /, listReplaceTask: /^\[[ xX]\] +/, anyLine: /\n.*\n/, hrefBrackets: /^<(.*)>$/, tableDelimiter: /[:|]/, tableAlignChars: /^\||\| *$/g, tableRowBlankLine: /\n[ \t]*$/, tableAlignRight: /^ *-+: *$/, tableAlignCenter: /^ *:-+: *$/, tableAlignLeft: /^ *:-+ *$/, startATag: /^<a /i, endATag: /^<\/a>/i, startPreScriptTag: /^<(pre|code|kbd|script)(\s|>)/i, endPreScriptTag: /^<\/(pre|code|kbd|script)(\s|>)/i, startAngleBracket: /^</, endAngleBracket: />$/, pedanticHrefTitle: /^([^'"]*[^\s])\s+(['"])(.*)\2/, unicodeAlphaNumeric: /[\p{L}\p{N}]/u, escapeTest: /[&<>"']/, escapeReplace: /[&<>"']/g, escapeTestNoEncode: /[<>"']|&(?!(#\d{1,7}|#[Xx][a-fA-F0-9]{1,6}|\w+);)/, escapeReplaceNoEncode: /[<>"']|&(?!(#\d{1,7}|#[Xx][a-fA-F0-9]{1,6}|\w+);)/g, unescapeTest: /&(#(?:\d+)|(?:#x[0-9A-Fa-f]+)|(?:\w+));?/ig, caret: /(^|[^\[])\^/g, percentDecode: /%25/g, findPipe: /\|/g, splitPipe: / \|/, slashPipe: /\\\|/g, carriageReturn: /\r\n|\r/g, spaceLine: /^ +$/gm, notSpaceStart: /^\S*/, endingNewline: /\n$/, listItemRegex: (bull) => new RegExp(`^( {0,3}${bull})((?:[\t ][^\\n]*)?(?:\\n|$))`), nextBulletRegex: (indent) => new RegExp(`^ {0,${Math.min(3, indent - 1)}}(?:[*+-]|\\d{1,9}[.)])((?:[ \t][^\\n]*)?(?:\\n|$))`), hrRegex: (indent) => new RegExp(`^ {0,${Math.min(3, indent - 1)}}((?:- *){3,}|(?:_ *){3,}|(?:\\* *){3,})(?:\\n+|$)`), fencesBeginRegex: (indent) => new RegExp(`^ {0,${Math.min(3, indent - 1)}}(?:\`\`\`|~~~)`), headingBeginRegex: (indent) => new RegExp(`^ {0,${Math.min(3, indent - 1)}}#`), htmlBeginRegex: (indent) => new RegExp(`^ {0,${Math.min(3, indent - 1)}}<(?:[a-z].*>|!--)`, 'i'), }; /** * Block-Level Grammar */ const newline = /^(?:[ \t]*(?:\n|$))+/; const blockCode = /^((?: {4}| {0,3}\t)[^\n]+(?:\n(?:[ \t]*(?:\n|$))*)?)+/; const fences = /^ {0,3}(`{3,}(?=[^`\n]*(?:\n|$))|~{3,})([^\n]*)(?:\n|$)(?:|([\s\S]*?)(?:\n|$))(?: {0,3}\1[~`]* *(?=\n|$)|$)/; const hr = /^ {0,3}((?:-[\t ]*){3,}|(?:_[ \t]*){3,}|(?:\*[ \t]*){3,})(?:\n+|$)/; const heading = /^ {0,3}(#{1,6})(?=\s|$)(.*)(?:\n+|$)/; const bullet = /(?:[*+-]|\d{1,9}[.)])/; const lheadingCore = /^(?!bull |blockCode|fences|blockquote|heading|html|table)((?:.|\n(?!\s*?\n|bull |blockCode|fences|blockquote|heading|html|table))+?)\n {0,3}(=+|-+) *(?:\n+|$)/; const lheading = edit(lheadingCore) .replace(/bull/g, bullet) // lists can interrupt .replace(/blockCode/g, /(?: {4}| {0,3}\t)/) // indented code blocks can interrupt .replace(/fences/g, / {0,3}(?:`{3,}|~{3,})/) // fenced code blocks can interrupt .replace(/blockquote/g, / {0,3}>/) // blockquote can interrupt .replace(/heading/g, / {0,3}#{1,6}/) // ATX heading can interrupt .replace(/html/g, / {0,3}<[^\n>]+>\n/) // block html can interrupt .replace(/\|table/g, '') // table not in commonmark .getRegex(); const lheadingGfm = edit(lheadingCore) .replace(/bull/g, bullet) // lists can interrupt .replace(/blockCode/g, /(?: {4}| {0,3}\t)/) // indented code blocks can interrupt .replace(/fences/g, / {0,3}(?:`{3,}|~{3,})/) // fenced code blocks can interrupt .replace(/blockquote/g, / {0,3}>/) // blockquote can interrupt .replace(/heading/g, / {0,3}#{1,6}/) // ATX heading can interrupt .replace(/html/g, / {0,3}<[^\n>]+>\n/) // block html can interrupt .replace(/table/g, / {0,3}\|?(?:[:\- ]*\|)+[\:\- ]*\n/) // table can interrupt .getRegex(); const _paragraph = /^([^\n]+(?:\n(?!hr|heading|lheading|blockquote|fences|list|html|table| +\n)[^\n]+)*)/; const blockText = /^[^\n]+/; const _blockLabel = /(?!\s*\])(?:\\.|[^\[\]\\])+/; const def = edit(/^ {0,3}\[(label)\]: *(?:\n[ \t]*)?([^<\s][^\s]*|<.*?>)(?:(?: +(?:\n[ \t]*)?| *\n[ \t]*)(title))? *(?:\n+|$)/) .replace('label', _blockLabel) .replace('title', /(?:"(?:\\"?|[^"\\])*"|'[^'\n]*(?:\n[^'\n]+)*\n?'|\([^()]*\))/) .getRegex(); const list = edit(/^( {0,3}bull)([ \t][^\n]+?)?(?:\n|$)/) .replace(/bull/g, bullet) .getRegex(); const _tag = 'address|article|aside|base|basefont|blockquote|body|caption' + '|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption' + '|figure|footer|form|frame|frameset|h[1-6]|head|header|hr|html|iframe' + '|legend|li|link|main|menu|menuitem|meta|nav|noframes|ol|optgroup|option' + '|p|param|search|section|summary|table|tbody|td|tfoot|th|thead|title' + '|tr|track|ul'; const _comment = /<!--(?:-?>|[\s\S]*?(?:-->|$))/; const html = edit('^ {0,3}(?:' // optional indentation + '<(script|pre|style|textarea)[\\s>][\\s\\S]*?(?:</\\1>[^\\n]*\\n+|$)' // (1) + '|comment[^\\n]*(\\n+|$)' // (2) + '|<\\?[\\s\\S]*?(?:\\?>\\n*|$)' // (3) + '|<![A-Z][\\s\\S]*?(?:>\\n*|$)' // (4) + '|<!\\[CDATA\\[[\\s\\S]*?(?:\\]\\]>\\n*|$)' // (5) + '|</?(tag)(?: +|\\n|/?>)[\\s\\S]*?(?:(?:\\n[ \t]*)+\\n|$)' // (6) + '|<(?!script|pre|style|textarea)([a-z][\\w-]*)(?:attribute)*? */?>(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:(?:\\n[ \t]*)+\\n|$)' // (7) open tag + '|</(?!script|pre|style|textarea)[a-z][\\w-]*\\s*>(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:(?:\\n[ \t]*)+\\n|$)' // (7) closing tag + ')', 'i') .replace('comment', _comment) .replace('tag', _tag) .replace('attribute', / +[a-zA-Z:_][\w.:-]*(?: *= *"[^"\n]*"| *= *'[^'\n]*'| *= *[^\s"'=<>`]+)?/) .getRegex(); const paragraph = edit(_paragraph) .replace('hr', hr) .replace('heading', ' {0,3}#{1,6}(?:\\s|$)') .replace('|lheading', '') // setext headings don't interrupt commonmark paragraphs .replace('|table', '') .replace('blockquote', ' {0,3}>') .replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n') .replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt .replace('html', '</?(?:tag)(?: +|\\n|/?>)|<(?:script|pre|style|textarea|!--)') .replace('tag', _tag) // pars can be interrupted by type (6) html blocks .getRegex(); const blockquote = edit(/^( {0,3}> ?(paragraph|[^\n]*)(?:\n|$))+/) .replace('paragraph', paragraph) .getRegex(); /** * Normal Block Grammar */ const blockNormal = { blockquote, code: blockCode, def, fences, heading, hr, html, lheading, list, newline, paragraph, table: noopTest, text: blockText, }; /** * GFM Block Grammar */ const gfmTable = edit('^ *([^\\n ].*)\\n' // Header + ' {0,3}((?:\\| *)?:?-+:? *(?:\\| *:?-+:? *)*(?:\\| *)?)' // Align + '(?:\\n((?:(?! *\\n|hr|heading|blockquote|code|fences|list|html).*(?:\\n|$))*)\\n*|$)') // Cells .replace('hr', hr) .replace('heading', ' {0,3}#{1,6}(?:\\s|$)') .replace('blockquote', ' {0,3}>') .replace('code', '(?: {4}| {0,3}\t)[^\\n]') .replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n') .replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt .replace('html', '</?(?:tag)(?: +|\\n|/?>)|<(?:script|pre|style|textarea|!--)') .replace('tag', _tag) // tables can be interrupted by type (6) html blocks .getRegex(); const blockGfm = { ...blockNormal, lheading: lheadingGfm, table: gfmTable, paragraph: edit(_paragraph) .replace('hr', hr) .replace('heading', ' {0,3}#{1,6}(?:\\s|$)') .replace('|lheading', '') // setext headings don't interrupt commonmark paragraphs .replace('table', gfmTable) // interrupt paragraphs with table .replace('blockquote', ' {0,3}>') .replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n') .replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt .replace('html', '</?(?:tag)(?: +|\\n|/?>)|<(?:script|pre|style|textarea|!--)') .replace('tag', _tag) // pars can be interrupted by type (6) html blocks .getRegex(), }; /** * Pedantic grammar (original John Gruber's loose markdown specification) */ const blockPedantic = { ...blockNormal, html: edit('^ *(?:comment *(?:\\n|\\s*$)' + '|<(tag)[\\s\\S]+?</\\1> *(?:\\n{2,}|\\s*$)' // closed tag + '|<tag(?:"[^"]*"|\'[^\']*\'|\\s[^\'"/>\\s]*)*?/?> *(?:\\n{2,}|\\s*$))') .replace('comment', _comment) .replace(/tag/g, '(?!(?:' + 'a|em|strong|small|s|cite|q|dfn|abbr|data|time|code|var|samp|kbd|sub' + '|sup|i|b|u|mark|ruby|rt|rp|bdi|bdo|span|br|wbr|ins|del|img)' + '\\b)\\w+(?!:|[^\\w\\s@]*@)\\b') .getRegex(), def: /^ *\[([^\]]+)\]: *<?([^\s>]+)>?(?: +(["(][^\n]+[")]))? *(?:\n+|$)/, heading: /^(#{1,6})(.*)(?:\n+|$)/, fences: noopTest, // fences not supported lheading: /^(.+?)\n {0,3}(=+|-+) *(?:\n+|$)/, paragraph: edit(_paragraph) .replace('hr', hr) .replace('heading', ' *#{1,6} *[^\n]') .replace('lheading', lheading) .replace('|table', '') .replace('blockquote', ' {0,3}>') .replace('|fences', '') .replace('|list', '') .replace('|html', '') .replace('|tag', '') .getRegex(), }; /** * Inline-Level Grammar */ const escape$1 = /^\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/; const inlineCode = /^(`+)([^`]|[^`][\s\S]*?[^`])\1(?!`)/; const br = /^( {2,}|\\)\n(?!\s*$)/; const inlineText = /^(`+|[^`])(?:(?= {2,}\n)|[\s\S]*?(?:(?=[\\<!\[`*_]|\b_|$)|[^ ](?= {2,}\n)))/; // list of unicode punctuation marks, plus any missing characters from CommonMark spec const _punctuation = /[\p{P}\p{S}]/u; const _punctuationOrSpace = /[\s\p{P}\p{S}]/u; const _notPunctuationOrSpace = /[^\s\p{P}\p{S}]/u; const punctuation = edit(/^((?![*_])punctSpace)/, 'u') .replace(/punctSpace/g, _punctuationOrSpace).getRegex(); // GFM allows ~ inside strong and em for strikethrough const _punctuationGfmStrongEm = /(?!~)[\p{P}\p{S}]/u; const _punctuationOrSpaceGfmStrongEm = /(?!~)[\s\p{P}\p{S}]/u; const _notPunctuationOrSpaceGfmStrongEm = /(?:[^\s\p{P}\p{S}]|~)/u; // sequences em should skip over [title](link), `code`, <html> const blockSkip = /\[[^[\]]*?\]\((?:\\.|[^\\\(\)]|\((?:\\.|[^\\\(\)])*\))*\)|`[^`]*?`|<[^<>]*?>/g; const emStrongLDelimCore = /^(?:\*+(?:((?!\*)punct)|[^\s*]))|^_+(?:((?!_)punct)|([^\s_]))/; const emStrongLDelim = edit(emStrongLDelimCore, 'u') .replace(/punct/g, _punctuation) .getRegex(); const emStrongLDelimGfm = edit(emStrongLDelimCore, 'u') .replace(/punct/g, _punctuationGfmStrongEm) .getRegex(); const emStrongRDelimAstCore = '^[^_*]*?__[^_*]*?\\*[^_*]*?(?=__)' // Skip orphan inside strong + '|[^*]+(?=[^*])' // Consume to delim + '|(?!\\*)punct(\\*+)(?=[\\s]|$)' // (1) #*** can only be a Right Delimiter + '|notPunctSpace(\\*+)(?!\\*)(?=punctSpace|$)' // (2) a***#, a*** can only be a Right Delimiter + '|(?!\\*)punctSpace(\\*+)(?=notPunctSpace)' // (3) #***a, ***a can only be Left Delimiter + '|[\\s](\\*+)(?!\\*)(?=punct)' // (4) ***# can only be Left Delimiter + '|(?!\\*)punct(\\*+)(?!\\*)(?=punct)' // (5) #***# can be either Left or Right Delimiter + '|notPunctSpace(\\*+)(?=notPunctSpace)'; // (6) a***a can be either Left or Right Delimiter const emStrongRDelimAst = edit(emStrongRDelimAstCore, 'gu') .replace(/notPunctSpace/g, _notPunctuationOrSpace) .replace(/punctSpace/g, _punctuationOrSpace) .replace(/punct/g, _punctuation) .getRegex(); const emStrongRDelimAstGfm = edit(emStrongRDelimAstCore, 'gu') .replace(/notPunctSpace/g, _notPunctuationOrSpaceGfmStrongEm) .replace(/punctSpace/g, _punctuationOrSpaceGfmStrongEm) .replace(/punct/g, _punctuationGfmStrongEm) .getRegex(); // (6) Not allowed for _ const emStrongRDelimUnd = edit('^[^_*]*?\\*\\*[^_*]*?_[^_*]*?(?=\\*\\*)' // Skip orphan inside strong + '|[^_]+(?=[^_])' // Consume to delim + '|(?!_)punct(_+)(?=[\\s]|$)' // (1) #___ can only be a Right Delimiter + '|notPunctSpace(_+)(?!_)(?=punctSpace|$)' // (2) a___#, a___ can only be a Right Delimiter + '|(?!_)punctSpace(_+)(?=notPunctSpace)' // (3) #___a, ___a can only be Left Delimiter + '|[\\s](_+)(?!_)(?=punct)' // (4) ___# can only be Left Delimiter + '|(?!_)punct(_+)(?!_)(?=punct)', 'gu') // (5) #___# can be either Left or Right Delimiter .replace(/notPunctSpace/g, _notPunctuationOrSpace) .replace(/punctSpace/g, _punctuationOrSpace) .replace(/punct/g, _punctuation) .getRegex(); const anyPunctuation = edit(/\\(punct)/, 'gu') .replace(/punct/g, _punctuation) .getRegex(); const autolink = edit(/^<(scheme:[^\s\x00-\x1f<>]*|email)>/) .replace('scheme', /[a-zA-Z][a-zA-Z0-9+.-]{1,31}/) .replace('email', /[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+(@)[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+(?![-_])/) .getRegex(); const _inlineComment = edit(_comment).replace('(?:-->|$)', '-->').getRegex(); const tag = edit('^comment' + '|^</[a-zA-Z][\\w:-]*\\s*>' // self-closing tag + '|^<[a-zA-Z][\\w-]*(?:attribute)*?\\s*/?>' // open tag + '|^<\\?[\\s\\S]*?\\?>' // processing instruction, e.g. <?php ?> + '|^<![a-zA-Z]+\\s[\\s\\S]*?>' // declaration, e.g. <!DOCTYPE html> + '|^<!\\[CDATA\\[[\\s\\S]*?\\]\\]>') // CDATA section .replace('comment', _inlineComment) .replace('attribute', /\s+[a-zA-Z:_][\w.:-]*(?:\s*=\s*"[^"]*"|\s*=\s*'[^']*'|\s*=\s*[^\s"'=<>`]+)?/) .getRegex(); const _inlineLabel = /(?:\[(?:\\.|[^\[\]\\])*\]|\\.|`[^`]*`|[^\[\]\\`])*?/; const link = edit(/^!?\[(label)\]\(\s*(href)(?:(?:[ \t]*(?:\n[ \t]*)?)(title))?\s*\)/) .replace('label', _inlineLabel) .replace('href', /<(?:\\.|[^\n<>\\])+>|[^ \t\n\x00-\x1f]*/) .replace('title', /"(?:\\"?|[^"\\])*"|'(?:\\'?|[^'\\])*'|\((?:\\\)?|[^)\\])*\)/) .getRegex(); const reflink = edit(/^!?\[(label)\]\[(ref)\]/) .replace('label', _inlineLabel) .replace('ref', _blockLabel) .getRegex(); const nolink = edit(/^!?\[(ref)\](?:\[\])?/) .replace('ref', _blockLabel) .getRegex(); const reflinkSearch = edit('reflink|nolink(?!\\()', 'g') .replace('reflink', reflink) .replace('nolink', nolink) .getRegex(); /** * Normal Inline Grammar */ const inlineNormal = { _backpedal: noopTest, // only used for GFM url anyPunctuation, autolink, blockSkip, br, code: inlineCode, del: noopTest, emStrongLDelim, emStrongRDelimAst, emStrongRDelimUnd, escape: escape$1, link, nolink, punctuation, reflink, reflinkSearch, tag, text: inlineText, url: noopTest, }; /** * Pedantic Inline Grammar */ const inlinePedantic = { ...inlineNormal, link: edit(/^!?\[(label)\]\((.*?)\)/) .replace('label', _inlineLabel) .getRegex(), reflink: edit(/^!?\[(label)\]\s*\[([^\]]*)\]/) .replace('label', _inlineLabel) .getRegex(), }; /** * GFM Inline Grammar */ const inlineGfm = { ...inlineNormal, emStrongRDelimAst: emStrongRDelimAstGfm, emStrongLDelim: emStrongLDelimGfm, url: edit(/^((?:ftp|https?):\/\/|www\.)(?:[a-zA-Z0-9\-]+\.?)+[^\s<]*|^email/, 'i') .replace('email', /[A-Za-z0-9._+-]+(@)[a-zA-Z0-9-_]+(?:\.[a-zA-Z0-9-_]*[a-zA-Z0-9])+(?![-_])/) .getRegex(), _backpedal: /(?:[^?!.,:;*_'"~()&]+|\([^)]*\)|&(?![a-zA-Z0-9]+;$)|[?!.,:;*_'"~)]+(?!$))+/, del: /^(~~?)(?=[^\s~])((?:\\.|[^\\])*?(?:\\.|[^\s~\\]))\1(?=[^~]|$)/, text: /^([`~]+|[^`~])(?:(?= {2,}\n)|(?=[a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-]+@)|[\s\S]*?(?:(?=[\\<!\[`*~_]|\b_|https?:\/\/|ftp:\/\/|www\.|$)|[^ ](?= {2,}\n)|[^a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-](?=[a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-]+@)))/, }; /** * GFM + Line Breaks Inline Grammar */ const inlineBreaks = { ...inlineGfm, br: edit(br).replace('{2,}', '*').getRegex(), text: edit(inlineGfm.text) .replace('\\b_', '\\b_| {2,}\\n') .replace(/\{2,\}/g, '*') .getRegex(), }; /** * exports */ const block = { normal: blockNormal, gfm: blockGfm, pedantic: blockPedantic, }; const inline = { normal: inlineNormal, gfm: inlineGfm, breaks: inlineBreaks, pedantic: inlinePedantic, }; /** * Helpers */ const escapeReplacements = { '&': '&amp;', '<': '&lt;', '>': '&gt;', '"': '&quot;', "'": '&#39;', }; const getEscapeReplacement = (ch) => escapeReplacements[ch]; function escape(html, encode) { if (encode) { if (other.escapeTest.test(html)) { return html.replace(other.escapeReplace, getEscapeReplacement); } } else { if (other.escapeTestNoEncode.test(html)) { return html.replace(other.escapeReplaceNoEncode, getEscapeReplacement); } } return html; } function cleanUrl(href) { try { href = encodeURI(href).replace(other.percentDecode, '%'); } catch { return null; } return href; } function splitCells(tableRow, count) { // ensure that every cell-delimiting pipe has a space // before it to distinguish it from an escaped pipe const row = tableRow.replace(other.findPipe, (match, offset, str) => { let escaped = false; let curr = offset; while (--curr >= 0 && str[curr] === '\\') escaped = !escaped; if (escaped) { // odd number of slashes means | is escaped // so we leave it alone return '|'; } else { // add space before unescaped | return ' |'; } }), cells = row.split(other.splitPipe); let i = 0; // First/last cell in a row cannot be empty if it has no leading/trailing pipe if (!cells[0].trim()) { cells.shift(); } if (cells.length > 0 && !cells.at(-1)?.trim()) { cells.pop(); } if (count) { if (cells.length > count) { cells.splice(count); } else { while (cells.length < count) cells.push(''); } } for (; i < cells.length; i++) { // leading or trailing whitespace is ignored per the gfm spec cells[i] = cells[i].trim().replace(other.slashPipe, '|'); } return cells; } /** * Remove trailing 'c's. Equivalent to str.replace(/c*$/, ''). * /c*$/ is vulnerable to REDOS. * * @param str * @param c * @param invert Remove suffix of non-c chars instead. Default falsey. */ function rtrim(str, c, invert) { const l = str.length; if (l === 0) { return ''; } // Length of suffix matching the invert condition. let suffLen = 0; // Step left until we fail to match the invert condition. while (suffLen < l) { const currChar = str.charAt(l - suffLen - 1); if (currChar === c && true) { suffLen++; } else { break; } } return str.slice(0, l - suffLen); } function findClosingBracket(str, b) { if (str.indexOf(b[1]) === -1) { return -1; } let level = 0; for (let i = 0; i < str.length; i++) { if (str[i] === '\\') { i++; } else if (str[i] === b[0]) { level++; } else if (str[i] === b[1]) { level--; if (level < 0) { return i; } } } if (level > 0) { return -2; } return -1; } function outputLink(cap, link, raw, lexer, rules) { const href = link.href; const title = link.title || null; const text = cap[1].replace(rules.other.outputLinkReplace, '$1'); lexer.state.inLink = true; const token = { type: cap[0].charAt(0) === '!' ? 'image' : 'link', raw, href, title, text, tokens: lexer.inlineTokens(text), }; lexer.state.inLink = false; return token; } function indentCodeCompensation(raw, text, rules) { const matchIndentToCode = raw.match(rules.other.indentCodeCompensation); if (matchIndentToCode === null) { return text; } const indentToCode = matchIndentToCode[1]; return text .split('\n') .map(node => { const matchIndentInNode = node.match(rules.other.beginningSpace); if (matchIndentInNode === null) { return node; } const [indentInNode] = matchIndentInNode; if (indentInNode.length >= indentToCode.length) { return node.slice(indentToCode.length); } return node; }) .join('\n'); } /** * Tokenizer */ class _Tokenizer { options; rules; // set by the lexer lexer; // set by the lexer constructor(options) { this.options = options || exports.defaults; } space(src) { const cap = this.rules.block.newline.exec(src); if (cap && cap[0].length > 0) { return { type: 'space', raw: cap[0], }; } } code(src) { const cap = this.rules.block.code.exec(src); if (cap) { const text = cap[0].replace(this.rules.other.codeRemoveIndent, ''); return { type: 'code', raw: cap[0], codeBlockStyle: 'indented', text: !this.options.pedantic ? rtrim(text, '\n') : text, }; } } fences(src) { const cap = this.rules.block.fences.exec(src); if (cap) { const raw = cap[0]; const text = indentCodeCompensation(raw, cap[3] || '', this.rules); return { type: 'code', raw, lang: cap[2] ? cap[2].trim().replace(this.rules.inline.anyPunctuation, '$1') : cap[2], text, }; } } heading(src) { const cap = this.rules.block.heading.exec(src); if (cap) { let text = cap[2].trim(); // remove trailing #s if (this.rules.other.endingHash.test(text)) { const trimmed = rtrim(text, '#'); if (this.options.pedantic) { text = trimmed.trim(); } else if (!trimmed || this.rules.other.endingSpaceChar.test(trimmed)) { // CommonMark requires space before trailing #s text = trimmed.trim(); } } return { type: 'heading', raw: cap[0], depth: cap[1].length, text, tokens: this.lexer.inline(text), }; } } hr(src) { const cap = this.rules.block.hr.exec(src); if (cap) { return { type: 'hr', raw: rtrim(cap[0], '\n'), }; } } blockquote(src) { const cap = this.rules.block.blockquote.exec(src); if (cap) { let lines = rtrim(cap[0], '\n').split('\n'); let raw = ''; let text = ''; const tokens = []; while (lines.length > 0) { let inBlockquote = false; const currentLines = []; let i; for (i = 0; i < lines.length; i++) { // get lines up to a continuation if (this.rules.other.blockquoteStart.test(lines[i])) { currentLines.push(lines[i]); inBlockquote = true; } else if (!inBlockquote) { currentLines.push(lines[i]); } else { break; } } lines = lines.slice(i); const currentRaw = currentLines.join('\n'); const currentText = currentRaw // precede setext continuation with 4 spaces so it isn't a setext .replace(this.rules.other.blockquoteSetextReplace, '\n $1') .replace(this.rules.other.blockquoteSetextReplace2, ''); raw = raw ? `${raw}\n${currentRaw}` : currentRaw; text = text ? `${text}\n${currentText}` : currentText; // parse blockquote lines as top level tokens // merge paragraphs if this is a continuation const top = this.lexer.state.top; this.lexer.state.top = true; this.lexer.blockTokens(currentText, tokens, true); this.lexer.state.top = top; // if there is no continuation then we are done if (lines.length === 0) { break; } const lastToken = tokens.at(-1); if (lastToken?.type === 'code') { // blockquote continuation cannot be preceded by a code block break; } else if (lastToken?.type === 'blockquote') { // include continuation in nested blockquote const oldToken = lastToken; const newText = oldToken.raw + '\n' + lines.join('\n'); const newToken = this.blockquote(newText); tokens[tokens.length - 1] = newToken; raw = raw.substring(0, raw.length - oldToken.raw.length) + newToken.raw; text = text.substring(0, text.length - oldToken.text.length) + newToken.text; break; } else if (lastToken?.type === 'list') { // include continuation in nested list const oldToken = lastToken; const newText = oldToken.raw + '\n' + lines.join('\n'); const newToken = this.list(newText); tokens[tokens.length - 1] = newToken; raw = raw.substring(0, raw.length - lastToken.raw.length) + newToken.raw; text = text.substring(0, text.length - oldToken.raw.length) + newToken.raw; lines = newText.substring(tokens.at(-1).raw.length).split('\n'); continue; } } return { type: 'blockquote', raw, tokens, text, }; } } list(src) { let cap = this.rules.block.list.exec(src); if (cap) { let bull = cap[1].trim(); const isordered = bull.length > 1; const list = { type: 'list', raw: '', ordered: isordered, start: isordered ? +bull.slice(0, -1) : '', loose: false, items: [], }; bull = isordered ? `\\d{1,9}\\${bull.slice(-1)}` : `\\${bull}`; if (this.options.pedantic) { bull = isordered ? bull : '[*+-]'; } // Get next list item const itemRegex = this.rules.other.listItemRegex(bull); let endsWithBlankLine = false; // Check if current bullet point can start a new List Item while (src) { let endEarly = false; let raw = ''; let itemContents = ''; if (!(cap = itemRegex.exec(src))) { break; } if (this.rules.block.hr.test(src)) { // End list if bullet was actually HR (possibly move into itemRegex?) break; } raw = cap[0]; src = src.substring(raw.length); let line = cap[2].split('\n', 1)[0].replace(this.rules.other.listReplaceTabs, (t) => ' '.repeat(3 * t.length)); let nextLine = src.split('\n', 1)[0]; let blankLine = !line.trim(); let indent = 0; if (this.options.pedantic) { indent = 2; itemContents = line.trimStart(); } else if (blankLine) { indent = cap[1].length + 1; } else { indent = cap[2].search(this.rules.other.nonSpaceChar); // Find first non-space char indent = indent > 4 ? 1 : indent; // Treat indented code blocks (> 4 spaces) as having only 1 indent itemContents = line.slice(indent); indent += cap[1].length; } if (blankLine && this.rules.other.blankLine.test(nextLine)) { // Items begin with at most one blank line raw += nextLine + '\n'; src = src.substring(nextLine.length + 1); endEarly = true; } if (!endEarly) { const nextBulletRegex = this.rules.other.nextBulletRegex(indent); const hrRegex = this.rules.other.hrRegex(indent); const fencesBeginRegex = this.rules.other.fencesBeginRegex(indent); const headingBeginRegex = this.rules.other.headingBeginRegex(indent); const htmlBeginRegex = this.rules.other.htmlBeginRegex(indent); // Check if following lines should be included in List Item while (src) { const rawLine = src.split('\n', 1)[0]; let nextLineWithoutTabs; nextLine = rawLine; // Re-align to follow commonmark nesting rules if (this.options.pedantic) { nextLine = nextLine.replace(this.rules.other.listReplaceNesting, ' '); nextLineWithoutTabs = nextLine; } else { nextLineWithoutTabs = nextLine.replace(this.rules.other.tabCharGlobal, ' '); } // End list item if found code fences if (fencesBeginRegex.test(nextLine)) { break; } // End list item if found start of new heading if (headingBeginRegex.test(nextLine)) { break; } // End list item if found start of html block if (htmlBeginRegex.test(nextLine)) { break; } // End list item if found start of new bullet if (nextBulletRegex.test(nextLine)) { break; } // Horizontal rule found if (hrRegex.test(nextLine)) { break; } if (nextLineWithoutTabs.search(this.rules.other.nonSpaceChar) >= indent || !nextLine.trim()) { // Dedent if possible itemContents += '\n' + nextLineWithoutTabs.slice(indent); } else { // not enough indentation if (blankLine) { break; } // paragraph continuation unless last line was a different block level element if (line.replace(this.rules.other.tabCharGlobal, ' ').search(this.rules.other.nonSpaceChar) >= 4) { // indented code block break; } if (fencesBeginRegex.test(line)) { break; } if (headingBeginRegex.test(line)) { break; } if (hrRegex.test(line)) { break; } itemContents += '\n' + nextLine; } if (!blankLine && !nextLine.trim()) { // Check if current line is blank blankLine = true; } raw += rawLine + '\n'; src = src.substring(rawLine.length + 1); line = nextLineWithoutTabs.slice(indent); } } if (!list.loose) { // If the previous item ended with a blank line, the list is loose if (endsWithBlankLine) { list.loose = true; } else if (this.rules.other.doubleBlankLine.test(raw)) { endsWithBlankLine = true; } } let istask = null; let ischecked; // Check for task list items if (this.options.gfm) { istask = this.rules.other.listIsTask.exec(itemContents); if (istask) { ischecked = istask[0] !== '[ ] '; itemContents = itemContents.replace(this.rules.other.listReplaceTask, ''); } } list.items.push({ type: 'list_item', raw, task: !!istask, checked: ischecked, loose: false, text: itemContents, tokens: [], }); list.raw += raw; } // Do not consume newlines at end of final item. Alternatively, make itemRegex *start* with any newlines to simplify/speed up endsWithBlankLine logic const lastItem = list.items.at(-1); if (lastItem) { lastItem.raw = lastItem.raw.trimEnd(); lastItem.text = lastItem.text.trimEnd(); } else { // not a list since there were no items return; } list.raw = list.raw.trimEnd(); // Item child tokens handled here at end because we needed to have the final item to trim it first for (let i = 0; i < list.items.length; i++) { this.lexer.state.top = false; list.items[i].tokens = this.lexer.blockTokens(list.items[i].text, []); if (!list.loose) { // Check if list should be loose const spacers = list.items[i].tokens.filter(t => t.type === 'space'); const hasMultipleLineBreaks = spacers.length > 0 && spacers.some(t => this.rules.other.anyLine.test(t.raw)); list.loose = hasMultipleLineBreaks; } } // Set all items to loose if list is loose if (list.loose) { for (let i = 0; i < list.items.length; i++) { list.items[i].loose = true; } } return list; } } html(src) { const cap = this.rules.block.html.exec(src); if (cap) { const token = { type: 'html', block: true, raw: cap[0], pre: cap[1] === 'pre' || cap[1] === 'script' || cap[1] === 'style', text: cap[0], }; return token; } } def(src) { const cap = this.rules.block.def.exec(src); if (cap) { const tag = cap[1].toLowerCase().replace(this.rules.other.multipleSpaceGlobal, ' '); const href = cap[2] ? cap[2].replace(this.rules.other.hrefBrackets, '$1').replace(this.rules.inline.anyPunctuation, '$1') : ''; const title = cap[3] ? cap[3].substring(1, cap[3].length - 1).replace(this.rules.inline.anyPunctuation, '$1') : cap[3]; return { type: 'def', tag, raw: cap[0], href, title, }; } } table(src) { const cap = this.rules.block.table.exec(src); if (!cap) { return; } if (!this.rules.other.tableDelimiter.test(cap[2])) { // delimiter row must have a pipe (|) or colon (:) otherwise it is a setext heading return; } const headers = splitCells(cap[1]); const aligns = cap[2].replace(this.rules.other.tableAlignChars, '').split('|'); const rows = cap[3]?.trim() ? cap[3].replace(this.rules.other.tableRowBlankLine, '').split('\n') : []; const item = { type: 'table', raw: cap[0], header: [], align: [], rows: [], }; if (headers.length !== aligns.length) { // header and align columns must be equal, rows can be different. return; } for (const align of aligns) { if (this.rules.other.tableAlignRight.test(align)) { item.align.push('right'); } else if (this.rules.other.tableAlignCenter.test(align)) { item.align.push('center'); } else if (this.rules.other.tableAlignLeft.test(align)) { item.align.push('left'); } else { item.align.push(null); } } for (let i = 0; i < headers.length; i++) { item.header.push({ text: headers[i], tokens: this.lexer.inline(headers[i]), header: true, align: item.align[i], }); } for (const row of rows) { item.rows.push(splitCells(row, item.header.length).map((cell, i) => { return { text: cell, tokens: this.lexer.inline(cell), header: false, align: item.align[i], }; })); } return item; } lheading(src) { const cap = this.rules.block.lheading.exec(src); if (cap) { return { type: 'heading', raw: cap[0], depth: cap[2].charAt(0) === '=' ? 1 : 2, text: cap[1], tokens: this.lexer.inline(cap[1]), }; } } paragraph(src) { const cap = this.rules.block.paragraph.exec(src); if (cap) { const text = cap[1].charAt(cap[1].length - 1) === '\n' ? cap[1].slice(0, -1) : cap[1]; return { type: 'paragraph', raw: cap[0], text, tokens: this.lexer.inline(text), }; } } text(src) { const cap = this.rules.block.text.exec(src); if (cap) { return { type: 'text', raw: cap[0], text: cap[0], tokens: this.lexer.inline(cap[0]), }; } } escape(src) { const cap = this.rules.inline.escape.exec(src); if (cap) { return { type: 'escape', raw: cap[0], text: cap[1], }; } } tag(src) { const cap = this.rules.inline.tag.exec(src); if (cap) { if (!this.lexer.state.inLink && this.rules.other.startATag.test(cap[0])) { this.lexer.state.inLink = true; } else if (this.lexer.state.inLink && this.rules.other.endATag.test(cap[0])) { this.lexer.state.inLink = false; } if (!this.lexer.state.inRawBlock && this.rules.other.startPreScriptTag.test(cap[0])) { this.lexer.state.inRawBlock = true; } else if (this.lexer.state.inRawBlock && this.rules.other.endPreScriptTag.test(cap[0])) { this.lexer.state.inRawBlock = false; } return { type: 'html', raw: cap[0], inLink: this.lexer.state.inLink, inRawBlock: this.lexer.state.inRawBlock, block: false, text: cap[0], }; } } link(src) { const cap = this.rules.inline.link.exec(src); if (cap) { const trimmedUrl = cap[2].trim(); if (!this.options.pedantic && this.rules.other.startAngleBracket.test(trimmedUrl)) { // commonmark requires matching angle brackets if (!(this.rules.other.endAngleBracket.test(trimmedUrl))) { return; } // ending angle bracket cannot be escaped const rtrimSlash = rtrim(trimmedUrl.slice(0, -1), '\\'); if ((trimmedUrl.length - rtrimSlash.length) % 2 === 0) { return; } } else { // find closing parenthesis const lastParenIndex = findClosingBracket(cap[2], '()'); if (lastParenIndex === -2) { // more open parens than closed return; } if (lastParenIndex > -1) { const start = cap[0].indexOf('!') === 0 ? 5 : 4; const linkLen = start + cap[1].length + lastParenIndex; cap[2] = cap[2].substring(0, lastParenIndex); cap[0] = cap[0].substring(0, linkLen).trim(); cap[3] = ''; } } let href = cap[2]; let title = ''; if (this.options.pedantic) { // split pedantic href and title const link = this.rules.other.pedanticHrefTitle.exec(href); if (link) { href = link[1]; title = link[3]; } } else { title = cap[3] ? cap[3].slice(1, -1) : ''; } href = href.trim(); if (this.rules.other.startAngleBracket.test(href)) { if (this.options.pedantic && !(this.rules.other.endAngleBracket.test(trimmedUrl))) { // pedantic allows starting angle bracket without ending angle bracket href = href.slice(1); } else { href = href.slice(1, -1); } } return outputLink(cap, { href: href ? href.replace(this.rules.inline.anyPunctuation, '$1') : href, title: title ? title.replace(this.rules.inline.anyPunctuation, '$1') : title, }, cap[0], this.lexer, this.rules); } } reflink(src, links) { let cap; if ((cap = this.rules.inline.reflink.exec(src)) || (cap = this.rules.inline.nolink.exec(src))) { const linkString = (cap[2] || cap