UNPKG

marked

Version:

A markdown parser built for speed

318 lines (277 loc) 12.3 kB
import { noopTest, edit } from './helpers.js'; /** * Block-Level Grammar */ export const block = { newline: /^(?: *(?:\n|$))+/, code: /^( {4}[^\n]+(?:\n(?: *(?:\n|$))*)?)+/, fences: /^ {0,3}(`{3,}(?=[^`\n]*(?:\n|$))|~{3,})([^\n]*)(?:\n|$)(?:|([\s\S]*?)(?:\n|$))(?: {0,3}\1[~`]* *(?=\n|$)|$)/, hr: /^ {0,3}((?:-[\t ]*){3,}|(?:_[ \t]*){3,}|(?:\*[ \t]*){3,})(?:\n+|$)/, heading: /^ {0,3}(#{1,6})(?=\s|$)(.*)(?:\n+|$)/, blockquote: /^( {0,3}> ?(paragraph|[^\n]*)(?:\n|$))+/, list: /^( {0,3}bull)([ \t][^\n]+?)?(?:\n|$)/, html: '^ {0,3}(?:' // optional indentation + '<(script|pre|style|textarea)[\\s>][\\s\\S]*?(?:</\\1>[^\\n]*\\n+|$)' // (1) + '|comment[^\\n]*(\\n+|$)' // (2) + '|<\\?[\\s\\S]*?(?:\\?>\\n*|$)' // (3) + '|<![A-Z][\\s\\S]*?(?:>\\n*|$)' // (4) + '|<!\\[CDATA\\[[\\s\\S]*?(?:\\]\\]>\\n*|$)' // (5) + '|</?(tag)(?: +|\\n|/?>)[\\s\\S]*?(?:(?:\\n *)+\\n|$)' // (6) + '|<(?!script|pre|style|textarea)([a-z][\\w-]*)(?:attribute)*? */?>(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:(?:\\n *)+\\n|$)' // (7) open tag + '|</(?!script|pre|style|textarea)[a-z][\\w-]*\\s*>(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:(?:\\n *)+\\n|$)' // (7) closing tag + ')', def: /^ {0,3}\[(label)\]: *(?:\n *)?([^<\s][^\s]*|<.*?>)(?:(?: +(?:\n *)?| *\n *)(title))? *(?:\n+|$)/, table: noopTest, lheading: /^((?:(?!^bull ).|\n(?!\n|bull ))+?)\n {0,3}(=+|-+) *(?:\n+|$)/, // regex template, placeholders will be replaced according to different paragraph // interruption rules of commonmark and the original markdown spec: _paragraph: /^([^\n]+(?:\n(?!hr|heading|lheading|blockquote|fences|list|html|table| +\n)[^\n]+)*)/, text: /^[^\n]+/ }; block._label = /(?!\s*\])(?:\\.|[^\[\]\\])+/; block._title = /(?:"(?:\\"?|[^"\\])*"|'[^'\n]*(?:\n[^'\n]+)*\n?'|\([^()]*\))/; block.def = edit(block.def) .replace('label', block._label) .replace('title', block._title) .getRegex(); block.bullet = /(?:[*+-]|\d{1,9}[.)])/; block.listItemStart = edit(/^( *)(bull) */) .replace('bull', block.bullet) .getRegex(); block.list = edit(block.list) .replace(/bull/g, block.bullet) .replace('hr', '\\n+(?=\\1?(?:(?:- *){3,}|(?:_ *){3,}|(?:\\* *){3,})(?:\\n+|$))') .replace('def', '\\n+(?=' + block.def.source + ')') .getRegex(); block._tag = 'address|article|aside|base|basefont|blockquote|body|caption' + '|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption' + '|figure|footer|form|frame|frameset|h[1-6]|head|header|hr|html|iframe' + '|legend|li|link|main|menu|menuitem|meta|nav|noframes|ol|optgroup|option' + '|p|param|section|source|summary|table|tbody|td|tfoot|th|thead|title|tr' + '|track|ul'; block._comment = /<!--(?!-?>)[\s\S]*?(?:-->|$)/; block.html = edit(block.html, 'i') .replace('comment', block._comment) .replace('tag', block._tag) .replace('attribute', / +[a-zA-Z:_][\w.:-]*(?: *= *"[^"\n]*"| *= *'[^'\n]*'| *= *[^\s"'=<>`]+)?/) .getRegex(); block.lheading = edit(block.lheading) .replace(/bull/g, block.bullet) // lists can interrupt .getRegex(); block.paragraph = edit(block._paragraph) .replace('hr', block.hr) .replace('heading', ' {0,3}#{1,6} ') .replace('|lheading', '') // setex headings don't interrupt commonmark paragraphs .replace('|table', '') .replace('blockquote', ' {0,3}>') .replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n') .replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt .replace('html', '</?(?:tag)(?: +|\\n|/?>)|<(?:script|pre|style|textarea|!--)') .replace('tag', block._tag) // pars can be interrupted by type (6) html blocks .getRegex(); block.blockquote = edit(block.blockquote) .replace('paragraph', block.paragraph) .getRegex(); /** * Normal Block Grammar */ block.normal = { ...block }; /** * GFM Block Grammar */ block.gfm = { ...block.normal, table: '^ *([^\\n ].*\\|.*)\\n' // Header + ' {0,3}(?:\\| *)?(:?-+:? *(?:\\| *:?-+:? *)*)(?:\\| *)?' // Align + '(?:\\n((?:(?! *\\n|hr|heading|blockquote|code|fences|list|html).*(?:\\n|$))*)\\n*|$)' // Cells }; block.gfm.table = edit(block.gfm.table) .replace('hr', block.hr) .replace('heading', ' {0,3}#{1,6} ') .replace('blockquote', ' {0,3}>') .replace('code', ' {4}[^\\n]') .replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n') .replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt .replace('html', '</?(?:tag)(?: +|\\n|/?>)|<(?:script|pre|style|textarea|!--)') .replace('tag', block._tag) // tables can be interrupted by type (6) html blocks .getRegex(); block.gfm.paragraph = edit(block._paragraph) .replace('hr', block.hr) .replace('heading', ' {0,3}#{1,6} ') .replace('|lheading', '') // setex headings don't interrupt commonmark paragraphs .replace('table', block.gfm.table) // interrupt paragraphs with table .replace('blockquote', ' {0,3}>') .replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n') .replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt .replace('html', '</?(?:tag)(?: +|\\n|/?>)|<(?:script|pre|style|textarea|!--)') .replace('tag', block._tag) // pars can be interrupted by type (6) html blocks .getRegex(); /** * Pedantic grammar (original John Gruber's loose markdown specification) */ block.pedantic = { ...block.normal, html: edit( '^ *(?:comment *(?:\\n|\\s*$)' + '|<(tag)[\\s\\S]+?</\\1> *(?:\\n{2,}|\\s*$)' // closed tag + '|<tag(?:"[^"]*"|\'[^\']*\'|\\s[^\'"/>\\s]*)*?/?> *(?:\\n{2,}|\\s*$))') .replace('comment', block._comment) .replace(/tag/g, '(?!(?:' + 'a|em|strong|small|s|cite|q|dfn|abbr|data|time|code|var|samp|kbd|sub' + '|sup|i|b|u|mark|ruby|rt|rp|bdi|bdo|span|br|wbr|ins|del|img)' + '\\b)\\w+(?!:|[^\\w\\s@]*@)\\b') .getRegex(), def: /^ *\[([^\]]+)\]: *<?([^\s>]+)>?(?: +(["(][^\n]+[")]))? *(?:\n+|$)/, heading: /^(#{1,6})(.*)(?:\n+|$)/, fences: noopTest, // fences not supported lheading: /^(.+?)\n {0,3}(=+|-+) *(?:\n+|$)/, paragraph: edit(block.normal._paragraph) .replace('hr', block.hr) .replace('heading', ' *#{1,6} *[^\n]') .replace('lheading', block.lheading) .replace('blockquote', ' {0,3}>') .replace('|fences', '') .replace('|list', '') .replace('|html', '') .getRegex() }; /** * Inline-Level Grammar */ export const inline = { escape: /^\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/, autolink: /^<(scheme:[^\s\x00-\x1f<>]*|email)>/, url: noopTest, tag: '^comment' + '|^</[a-zA-Z][\\w:-]*\\s*>' // self-closing tag + '|^<[a-zA-Z][\\w-]*(?:attribute)*?\\s*/?>' // open tag + '|^<\\?[\\s\\S]*?\\?>' // processing instruction, e.g. <?php ?> + '|^<![a-zA-Z]+\\s[\\s\\S]*?>' // declaration, e.g. <!DOCTYPE html> + '|^<!\\[CDATA\\[[\\s\\S]*?\\]\\]>', // CDATA section link: /^!?\[(label)\]\(\s*(href)(?:\s+(title))?\s*\)/, reflink: /^!?\[(label)\]\[(ref)\]/, nolink: /^!?\[(ref)\](?:\[\])?/, reflinkSearch: 'reflink|nolink(?!\\()', emStrong: { lDelim: /^(?:\*+(?:((?!\*)[punct])|[^\s*]))|^_+(?:((?!_)[punct])|([^\s_]))/, // (1) and (2) can only be a Right Delimiter. (3) and (4) can only be Left. (5) and (6) can be either Left or Right. // | Skip orphan inside strong | Consume to delim | (1) #*** | (2) a***#, a*** | (3) #***a, ***a | (4) ***# | (5) #***# | (6) a***a rDelimAst: /^[^_*]*?__[^_*]*?\*[^_*]*?(?=__)|[^*]+(?=[^*])|(?!\*)[punct](\*+)(?=[\s]|$)|[^punct\s](\*+)(?!\*)(?=[punct\s]|$)|(?!\*)[punct\s](\*+)(?=[^punct\s])|[\s](\*+)(?!\*)(?=[punct])|(?!\*)[punct](\*+)(?!\*)(?=[punct])|[^punct\s](\*+)(?=[^punct\s])/, rDelimUnd: /^[^_*]*?\*\*[^_*]*?_[^_*]*?(?=\*\*)|[^_]+(?=[^_])|(?!_)[punct](_+)(?=[\s]|$)|[^punct\s](_+)(?!_)(?=[punct\s]|$)|(?!_)[punct\s](_+)(?=[^punct\s])|[\s](_+)(?!_)(?=[punct])|(?!_)[punct](_+)(?!_)(?=[punct])/ // ^- Not allowed for _ }, code: /^(`+)([^`]|[^`][\s\S]*?[^`])\1(?!`)/, br: /^( {2,}|\\)\n(?!\s*$)/, del: noopTest, text: /^(`+|[^`])(?:(?= {2,}\n)|[\s\S]*?(?:(?=[\\<!\[`*_]|\b_|$)|[^ ](?= {2,}\n)))/, punctuation: /^((?![*_])[\spunctuation])/ }; // list of unicode punctuation marks, plus any missing characters from CommonMark spec inline._punctuation = '\\p{P}$+<=>`^|~'; inline.punctuation = edit(inline.punctuation, 'u').replace(/punctuation/g, inline._punctuation).getRegex(); // sequences em should skip over [title](link), `code`, <html> inline.blockSkip = /\[[^[\]]*?\]\([^\(\)]*?\)|`[^`]*?`|<[^<>]*?>/g; inline.anyPunctuation = /\\[punct]/g; inline._escapes = /\\([punct])/g; inline._comment = edit(block._comment).replace('(?:-->|$)', '-->').getRegex(); inline.emStrong.lDelim = edit(inline.emStrong.lDelim, 'u') .replace(/punct/g, inline._punctuation) .getRegex(); inline.emStrong.rDelimAst = edit(inline.emStrong.rDelimAst, 'gu') .replace(/punct/g, inline._punctuation) .getRegex(); inline.emStrong.rDelimUnd = edit(inline.emStrong.rDelimUnd, 'gu') .replace(/punct/g, inline._punctuation) .getRegex(); inline.anyPunctuation = edit(inline.anyPunctuation, 'gu') .replace(/punct/g, inline._punctuation) .getRegex(); inline._escapes = edit(inline._escapes, 'gu') .replace(/punct/g, inline._punctuation) .getRegex(); inline._scheme = /[a-zA-Z][a-zA-Z0-9+.-]{1,31}/; inline._email = /[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+(@)[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+(?![-_])/; inline.autolink = edit(inline.autolink) .replace('scheme', inline._scheme) .replace('email', inline._email) .getRegex(); inline._attribute = /\s+[a-zA-Z:_][\w.:-]*(?:\s*=\s*"[^"]*"|\s*=\s*'[^']*'|\s*=\s*[^\s"'=<>`]+)?/; inline.tag = edit(inline.tag) .replace('comment', inline._comment) .replace('attribute', inline._attribute) .getRegex(); inline._label = /(?:\[(?:\\.|[^\[\]\\])*\]|\\.|`[^`]*`|[^\[\]\\`])*?/; inline._href = /<(?:\\.|[^\n<>\\])+>|[^\s\x00-\x1f]*/; inline._title = /"(?:\\"?|[^"\\])*"|'(?:\\'?|[^'\\])*'|\((?:\\\)?|[^)\\])*\)/; inline.link = edit(inline.link) .replace('label', inline._label) .replace('href', inline._href) .replace('title', inline._title) .getRegex(); inline.reflink = edit(inline.reflink) .replace('label', inline._label) .replace('ref', block._label) .getRegex(); inline.nolink = edit(inline.nolink) .replace('ref', block._label) .getRegex(); inline.reflinkSearch = edit(inline.reflinkSearch, 'g') .replace('reflink', inline.reflink) .replace('nolink', inline.nolink) .getRegex(); /** * Normal Inline Grammar */ inline.normal = { ...inline }; /** * Pedantic Inline Grammar */ inline.pedantic = { ...inline.normal, strong: { start: /^__|\*\*/, middle: /^__(?=\S)([\s\S]*?\S)__(?!_)|^\*\*(?=\S)([\s\S]*?\S)\*\*(?!\*)/, endAst: /\*\*(?!\*)/g, endUnd: /__(?!_)/g }, em: { start: /^_|\*/, middle: /^()\*(?=\S)([\s\S]*?\S)\*(?!\*)|^_(?=\S)([\s\S]*?\S)_(?!_)/, endAst: /\*(?!\*)/g, endUnd: /_(?!_)/g }, link: edit(/^!?\[(label)\]\((.*?)\)/) .replace('label', inline._label) .getRegex(), reflink: edit(/^!?\[(label)\]\s*\[([^\]]*)\]/) .replace('label', inline._label) .getRegex() }; /** * GFM Inline Grammar */ inline.gfm = { ...inline.normal, escape: edit(inline.escape).replace('])', '~|])').getRegex(), _extended_email: /[A-Za-z0-9._+-]+(@)[a-zA-Z0-9-_]+(?:\.[a-zA-Z0-9-_]*[a-zA-Z0-9])+(?![-_])/, url: /^((?:ftp|https?):\/\/|www\.)(?:[a-zA-Z0-9\-]+\.?)+[^\s<]*|^email/, _backpedal: /(?:[^?!.,:;*_'"~()&]+|\([^)]*\)|&(?![a-zA-Z0-9]+;$)|[?!.,:;*_'"~)]+(?!$))+/, del: /^(~~?)(?=[^\s~])([\s\S]*?[^\s~])\1(?=[^~]|$)/, text: /^([`~]+|[^`~])(?:(?= {2,}\n)|(?=[a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-]+@)|[\s\S]*?(?:(?=[\\<!\[`*~_]|\b_|https?:\/\/|ftp:\/\/|www\.|$)|[^ ](?= {2,}\n)|[^a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-](?=[a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-]+@)))/ }; inline.gfm.url = edit(inline.gfm.url, 'i') .replace('email', inline.gfm._extended_email) .getRegex(); /** * GFM + Line Breaks Inline Grammar */ inline.breaks = { ...inline.gfm, br: edit(inline.br).replace('{2,}', '*').getRegex(), text: edit(inline.gfm.text) .replace('\\b_', '\\b_| {2,}\\n') .replace(/\{2,\}/g, '*') .getRegex() };