UNPKG

@budibase/server

Version:
1,856 lines (1,658 loc) • 77.7 kB
/** * marked - a markdown parser * Copyright (c) 2011-2022, Christopher Jeffrey. (MIT Licensed) * https://github.com/markedjs/marked */ /** * DO NOT EDIT THIS FILE * The code in this file is generated from files in ./src/ */ function getDefaults() { return { baseUrl: null, breaks: false, extensions: null, gfm: true, headerIds: true, headerPrefix: "", highlight: null, langPrefix: "language-", mangle: true, pedantic: false, renderer: null, sanitize: false, sanitizer: null, silent: false, smartLists: false, smartypants: false, tokenizer: null, walkTokens: null, xhtml: false, } } let defaults = getDefaults() function changeDefaults(newDefaults) { defaults = newDefaults } /** * Helpers */ const escapeTest = /[&<>"']/ const escapeReplace = /[&<>"']/g const escapeTestNoEncode = /[<>"']|&(?!#?\w+;)/ const escapeReplaceNoEncode = /[<>"']|&(?!#?\w+;)/g const escapeReplacements = { "&": "&amp;", "<": "&lt;", ">": "&gt;", '"': "&quot;", "'": "&#39;", } const getEscapeReplacement = ch => escapeReplacements[ch] function escape(html, encode) { if (encode) { if (escapeTest.test(html)) { return html.replace(escapeReplace, getEscapeReplacement) } } else { if (escapeTestNoEncode.test(html)) { return html.replace(escapeReplaceNoEncode, getEscapeReplacement) } } return html } const unescapeTest = /&(#(?:\d+)|(?:#x[0-9A-Fa-f]+)|(?:\w+));?/gi /** * @param {string} html */ function unescape(html) { // explicitly match decimal, hex, and named HTML entities return html.replace(unescapeTest, (_, n) => { n = n.toLowerCase() if (n === "colon") return ":" if (n.charAt(0) === "#") { return n.charAt(1) === "x" ? String.fromCharCode(parseInt(n.substring(2), 16)) : String.fromCharCode(+n.substring(1)) } return "" }) } const caret = /(^|[^\[])\^/g /** * @param {string | RegExp} regex * @param {string} opt */ function edit(regex, opt) { regex = typeof regex === "string" ? regex : regex.source opt = opt || "" const obj = { replace: (name, val) => { val = val.source || val val = val.replace(caret, "$1") regex = regex.replace(name, val) return obj }, getRegex: () => { return new RegExp(regex, opt) }, } return obj } const nonWordAndColonTest = /[^\w:]/g const originIndependentUrl = /^$|^[a-z][a-z0-9+.-]*:|^[?#]/i /** * @param {boolean} sanitize * @param {string} base * @param {string} href */ function cleanUrl(sanitize, base, href) { if (sanitize) { let prot try { prot = decodeURIComponent(unescape(href)) .replace(nonWordAndColonTest, "") .toLowerCase() } catch (e) { return null } if ( prot.indexOf("javascript:") === 0 || prot.indexOf("vbscript:") === 0 || prot.indexOf("data:") === 0 ) { return null } } if (base && !originIndependentUrl.test(href)) { href = resolveUrl(base, href) } try { href = encodeURI(href).replace(/%25/g, "%") } catch (e) { return null } return href } const baseUrls = {} const justDomain = /^[^:]+:\/*[^/]*$/ const protocol = /^([^:]+:)[\s\S]*$/ const domain = /^([^:]+:\/*[^/]*)[\s\S]*$/ /** * @param {string} base * @param {string} href */ function resolveUrl(base, href) { if (!baseUrls[" " + base]) { // we can ignore everything in base after the last slash of its path component, // but we might need to add _that_ // https://tools.ietf.org/html/rfc3986#section-3 if (justDomain.test(base)) { baseUrls[" " + base] = base + "/" } else { baseUrls[" " + base] = rtrim(base, "/", true) } } base = baseUrls[" " + base] const relativeBase = base.indexOf(":") === -1 if (href.substring(0, 2) === "//") { if (relativeBase) { return href } return base.replace(protocol, "$1") + href } else if (href.charAt(0) === "/") { if (relativeBase) { return href } return base.replace(domain, "$1") + href } else { return base + href } } const noopTest = { exec: function noopTest() {} } function merge(obj) { let i = 1, target, key for (; i < arguments.length; i++) { target = arguments[i] for (key in target) { if (Object.prototype.hasOwnProperty.call(target, key)) { obj[key] = target[key] } } } return obj } function splitCells(tableRow, count) { // ensure that every cell-delimiting pipe has a space // before it to distinguish it from an escaped pipe const row = tableRow.replace(/\|/g, (match, offset, str) => { let escaped = false, curr = offset while (--curr >= 0 && str[curr] === "\\") escaped = !escaped if (escaped) { // odd number of slashes means | is escaped // so we leave it alone return "|" } else { // add space before unescaped | return " |" } }), cells = row.split(/ \|/) let i = 0 // First/last cell in a row cannot be empty if it has no leading/trailing pipe if (!cells[0].trim()) { cells.shift() } if (cells.length > 0 && !cells[cells.length - 1].trim()) { cells.pop() } if (cells.length > count) { cells.splice(count) } else { while (cells.length < count) cells.push("") } for (; i < cells.length; i++) { // leading or trailing whitespace is ignored per the gfm spec cells[i] = cells[i].trim().replace(/\\\|/g, "|") } return cells } /** * Remove trailing 'c's. Equivalent to str.replace(/c*$/, ''). * /c*$/ is vulnerable to REDOS. * * @param {string} str * @param {string} c * @param {boolean} invert Remove suffix of non-c chars instead. Default falsey. */ function rtrim(str, c, invert) { const l = str.length if (l === 0) { return "" } // Length of suffix matching the invert condition. let suffLen = 0 // Step left until we fail to match the invert condition. while (suffLen < l) { const currChar = str.charAt(l - suffLen - 1) if (currChar === c && !invert) { suffLen++ } else if (currChar !== c && invert) { suffLen++ } else { break } } return str.slice(0, l - suffLen) } function findClosingBracket(str, b) { if (str.indexOf(b[1]) === -1) { return -1 } const l = str.length let level = 0, i = 0 for (; i < l; i++) { if (str[i] === "\\") { i++ } else if (str[i] === b[0]) { level++ } else if (str[i] === b[1]) { level-- if (level < 0) { return i } } } return -1 } function checkSanitizeDeprecation(opt) { if (opt && opt.sanitize && !opt.silent) { console.warn( "marked(): sanitize and sanitizer parameters are deprecated since version 0.7.0, should not be used and will be removed in the future. Read more here: https://marked.js.org/#/USING_ADVANCED.md#options" ) } } // copied from https://stackoverflow.com/a/5450113/806777 /** * @param {string} pattern * @param {number} count */ function repeatString(pattern, count) { if (count < 1) { return "" } let result = "" while (count > 1) { if (count & 1) { result += pattern } count >>= 1 pattern += pattern } return result + pattern } function outputLink(cap, link, raw, lexer) { const href = link.href const title = link.title ? escape(link.title) : null const text = cap[1].replace(/\\([\[\]])/g, "$1") if (cap[0].charAt(0) !== "!") { lexer.state.inLink = true const token = { type: "link", raw, href, title, text, tokens: lexer.inlineTokens(text, []), } lexer.state.inLink = false return token } return { type: "image", raw, href, title, text: escape(text), } } function indentCodeCompensation(raw, text) { const matchIndentToCode = raw.match(/^(\s+)(?:```)/) if (matchIndentToCode === null) { return text } const indentToCode = matchIndentToCode[1] return text .split("\n") .map(node => { const matchIndentInNode = node.match(/^\s+/) if (matchIndentInNode === null) { return node } const [indentInNode] = matchIndentInNode if (indentInNode.length >= indentToCode.length) { return node.slice(indentToCode.length) } return node }) .join("\n") } /** * Tokenizer */ class Tokenizer { constructor(options) { this.options = options || defaults } space(src) { const cap = this.rules.block.newline.exec(src) if (cap && cap[0].length > 0) { return { type: "space", raw: cap[0], } } } code(src) { const cap = this.rules.block.code.exec(src) if (cap) { const text = cap[0].replace(/^ {1,4}/gm, "") return { type: "code", raw: cap[0], codeBlockStyle: "indented", text: !this.options.pedantic ? rtrim(text, "\n") : text, } } } fences(src) { const cap = this.rules.block.fences.exec(src) if (cap) { const raw = cap[0] const text = indentCodeCompensation(raw, cap[3] || "") return { type: "code", raw, lang: cap[2] ? cap[2].trim() : cap[2], text, } } } heading(src) { const cap = this.rules.block.heading.exec(src) if (cap) { let text = cap[2].trim() // remove trailing #s if (/#$/.test(text)) { const trimmed = rtrim(text, "#") if (this.options.pedantic) { text = trimmed.trim() } else if (!trimmed || / $/.test(trimmed)) { // CommonMark requires space before trailing #s text = trimmed.trim() } } const token = { type: "heading", raw: cap[0], depth: cap[1].length, text, tokens: [], } this.lexer.inline(token.text, token.tokens) return token } } hr(src) { const cap = this.rules.block.hr.exec(src) if (cap) { return { type: "hr", raw: cap[0], } } } blockquote(src) { const cap = this.rules.block.blockquote.exec(src) if (cap) { const text = cap[0].replace(/^ *>[ \t]?/gm, "") return { type: "blockquote", raw: cap[0], tokens: this.lexer.blockTokens(text, []), text, } } } list(src) { let cap = this.rules.block.list.exec(src) if (cap) { let raw, istask, ischecked, indent, i, blankLine, endsWithBlankLine, line, nextLine, rawLine, itemContents, endEarly let bull = cap[1].trim() const isordered = bull.length > 1 const list = { type: "list", raw: "", ordered: isordered, start: isordered ? +bull.slice(0, -1) : "", loose: false, items: [], } bull = isordered ? `\\d{1,9}\\${bull.slice(-1)}` : `\\${bull}` if (this.options.pedantic) { bull = isordered ? bull : "[*+-]" } // Get next list item const itemRegex = new RegExp( `^( {0,3}${bull})((?:[\t ][^\\n]*)?(?:\\n|$))` ) // Check if current bullet point can start a new List Item while (src) { endEarly = false if (!(cap = itemRegex.exec(src))) { break } if (this.rules.block.hr.test(src)) { // End list if bullet was actually HR (possibly move into itemRegex?) break } raw = cap[0] src = src.substring(raw.length) line = cap[2].split("\n", 1)[0] nextLine = src.split("\n", 1)[0] if (this.options.pedantic) { indent = 2 itemContents = line.trimLeft() } else { indent = cap[2].search(/[^ ]/) // Find first non-space char indent = indent > 4 ? 1 : indent // Treat indented code blocks (> 4 spaces) as having only 1 indent itemContents = line.slice(indent) indent += cap[1].length } blankLine = false if (!line && /^ *$/.test(nextLine)) { // Items begin with at most one blank line raw += nextLine + "\n" src = src.substring(nextLine.length + 1) endEarly = true } if (!endEarly) { const nextBulletRegex = new RegExp( `^ {0,${Math.min( 3, indent - 1 )}}(?:[*+-]|\\d{1,9}[.)])((?: [^\\n]*)?(?:\\n|$))` ) const hrRegex = new RegExp( `^ {0,${Math.min( 3, indent - 1 )}}((?:- *){3,}|(?:_ *){3,}|(?:\\* *){3,})(?:\\n+|$)` ) // Check if following lines should be included in List Item while (src) { rawLine = src.split("\n", 1)[0] line = rawLine // Re-align to follow commonmark nesting rules if (this.options.pedantic) { line = line.replace(/^ {1,4}(?=( {4})*[^ ])/g, " ") } // End list item if found start of new bullet if (nextBulletRegex.test(line)) { break } // Horizontal rule found if (hrRegex.test(src)) { break } if (line.search(/[^ ]/) >= indent || !line.trim()) { // Dedent if possible itemContents += "\n" + line.slice(indent) } else if (!blankLine) { // Until blank line, item doesn't need indentation itemContents += "\n" + line } else { // Otherwise, improper indentation ends this item break } if (!blankLine && !line.trim()) { // Check if current line is blank blankLine = true } raw += rawLine + "\n" src = src.substring(rawLine.length + 1) } } if (!list.loose) { // If the previous item ended with a blank line, the list is loose if (endsWithBlankLine) { list.loose = true } else if (/\n *\n *$/.test(raw)) { endsWithBlankLine = true } } // Check for task list items if (this.options.gfm) { istask = /^\[[ xX]\] /.exec(itemContents) if (istask) { ischecked = istask[0] !== "[ ] " itemContents = itemContents.replace(/^\[[ xX]\] +/, "") } } list.items.push({ type: "list_item", raw, task: !!istask, checked: ischecked, loose: false, text: itemContents, }) list.raw += raw } // Do not consume newlines at end of final item. Alternatively, make itemRegex *start* with any newlines to simplify/speed up endsWithBlankLine logic list.items[list.items.length - 1].raw = raw.trimRight() list.items[list.items.length - 1].text = itemContents.trimRight() list.raw = list.raw.trimRight() const l = list.items.length // Item child tokens handled here at end because we needed to have the final item to trim it first for (i = 0; i < l; i++) { this.lexer.state.top = false list.items[i].tokens = this.lexer.blockTokens(list.items[i].text, []) const spacers = list.items[i].tokens.filter(t => t.type === "space") const hasMultipleLineBreaks = spacers.every(t => { const chars = t.raw.split("") let lineBreaks = 0 for (const char of chars) { if (char === "\n") { lineBreaks += 1 } if (lineBreaks > 1) { return true } } return false }) if (!list.loose && spacers.length && hasMultipleLineBreaks) { // Having a single line break doesn't mean a list is loose. A single line break is terminating the last list item list.loose = true list.items[i].loose = true } } return list } } html(src) { const cap = this.rules.block.html.exec(src) if (cap) { const token = { type: "html", raw: cap[0], pre: !this.options.sanitizer && (cap[1] === "pre" || cap[1] === "script" || cap[1] === "style"), text: cap[0], } if (this.options.sanitize) { token.type = "paragraph" token.text = this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape(cap[0]) token.tokens = [] this.lexer.inline(token.text, token.tokens) } return token } } def(src) { const cap = this.rules.block.def.exec(src) if (cap) { if (cap[3]) cap[3] = cap[3].substring(1, cap[3].length - 1) const tag = cap[1].toLowerCase().replace(/\s+/g, " ") return { type: "def", tag, raw: cap[0], href: cap[2], title: cap[3], } } } table(src) { const cap = this.rules.block.table.exec(src) if (cap) { const item = { type: "table", header: splitCells(cap[1]).map(c => { return { text: c } }), align: cap[2].replace(/^ *|\| *$/g, "").split(/ *\| */), rows: cap[3] && cap[3].trim() ? cap[3].replace(/\n[ \t]*$/, "").split("\n") : [], } if (item.header.length === item.align.length) { item.raw = cap[0] let l = item.align.length let i, j, k, row for (i = 0; i < l; i++) { if (/^ *-+: *$/.test(item.align[i])) { item.align[i] = "right" } else if (/^ *:-+: *$/.test(item.align[i])) { item.align[i] = "center" } else if (/^ *:-+ *$/.test(item.align[i])) { item.align[i] = "left" } else { item.align[i] = null } } l = item.rows.length for (i = 0; i < l; i++) { item.rows[i] = splitCells(item.rows[i], item.header.length).map(c => { return { text: c } }) } // parse child tokens inside headers and cells // header child tokens l = item.header.length for (j = 0; j < l; j++) { item.header[j].tokens = [] this.lexer.inline(item.header[j].text, item.header[j].tokens) } // cell child tokens l = item.rows.length for (j = 0; j < l; j++) { row = item.rows[j] for (k = 0; k < row.length; k++) { row[k].tokens = [] this.lexer.inline(row[k].text, row[k].tokens) } } return item } } } lheading(src) { const cap = this.rules.block.lheading.exec(src) if (cap) { const token = { type: "heading", raw: cap[0], depth: cap[2].charAt(0) === "=" ? 1 : 2, text: cap[1], tokens: [], } this.lexer.inline(token.text, token.tokens) return token } } paragraph(src) { const cap = this.rules.block.paragraph.exec(src) if (cap) { const token = { type: "paragraph", raw: cap[0], text: cap[1].charAt(cap[1].length - 1) === "\n" ? cap[1].slice(0, -1) : cap[1], tokens: [], } this.lexer.inline(token.text, token.tokens) return token } } text(src) { const cap = this.rules.block.text.exec(src) if (cap) { const token = { type: "text", raw: cap[0], text: cap[0], tokens: [], } this.lexer.inline(token.text, token.tokens) return token } } escape(src) { const cap = this.rules.inline.escape.exec(src) if (cap) { return { type: "escape", raw: cap[0], text: escape(cap[1]), } } } tag(src) { const cap = this.rules.inline.tag.exec(src) if (cap) { if (!this.lexer.state.inLink && /^<a /i.test(cap[0])) { this.lexer.state.inLink = true } else if (this.lexer.state.inLink && /^<\/a>/i.test(cap[0])) { this.lexer.state.inLink = false } if ( !this.lexer.state.inRawBlock && /^<(pre|code|kbd|script)(\s|>)/i.test(cap[0]) ) { this.lexer.state.inRawBlock = true } else if ( this.lexer.state.inRawBlock && /^<\/(pre|code|kbd|script)(\s|>)/i.test(cap[0]) ) { this.lexer.state.inRawBlock = false } return { type: this.options.sanitize ? "text" : "html", raw: cap[0], inLink: this.lexer.state.inLink, inRawBlock: this.lexer.state.inRawBlock, text: this.options.sanitize ? this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape(cap[0]) : cap[0], } } } link(src) { const cap = this.rules.inline.link.exec(src) if (cap) { const trimmedUrl = cap[2].trim() if (!this.options.pedantic && /^</.test(trimmedUrl)) { // commonmark requires matching angle brackets if (!/>$/.test(trimmedUrl)) { return } // ending angle bracket cannot be escaped const rtrimSlash = rtrim(trimmedUrl.slice(0, -1), "\\") if ((trimmedUrl.length - rtrimSlash.length) % 2 === 0) { return } } else { // find closing parenthesis const lastParenIndex = findClosingBracket(cap[2], "()") if (lastParenIndex > -1) { const start = cap[0].indexOf("!") === 0 ? 5 : 4 const linkLen = start + cap[1].length + lastParenIndex cap[2] = cap[2].substring(0, lastParenIndex) cap[0] = cap[0].substring(0, linkLen).trim() cap[3] = "" } } let href = cap[2] let title = "" if (this.options.pedantic) { // split pedantic href and title const link = /^([^'"]*[^\s])\s+(['"])(.*)\2/.exec(href) if (link) { href = link[1] title = link[3] } } else { title = cap[3] ? cap[3].slice(1, -1) : "" } href = href.trim() if (/^</.test(href)) { if (this.options.pedantic && !/>$/.test(trimmedUrl)) { // pedantic allows starting angle bracket without ending angle bracket href = href.slice(1) } else { href = href.slice(1, -1) } } return outputLink( cap, { href: href ? href.replace(this.rules.inline._escapes, "$1") : href, title: title ? title.replace(this.rules.inline._escapes, "$1") : title, }, cap[0], this.lexer ) } } reflink(src, links) { let cap if ( (cap = this.rules.inline.reflink.exec(src)) || (cap = this.rules.inline.nolink.exec(src)) ) { let link = (cap[2] || cap[1]).replace(/\s+/g, " ") link = links[link.toLowerCase()] if (!link || !link.href) { const text = cap[0].charAt(0) return { type: "text", raw: text, text, } } return outputLink(cap, link, cap[0], this.lexer) } } emStrong(src, maskedSrc, prevChar = "") { let match = this.rules.inline.emStrong.lDelim.exec(src) if (!match) return // _ can't be between two alphanumerics. \p{L}\p{N} includes non-english alphabet/numbers as well if (match[3] && prevChar.match(/[\p{L}\p{N}]/u)) return const nextChar = match[1] || match[2] || "" if ( !nextChar || (nextChar && (prevChar === "" || this.rules.inline.punctuation.exec(prevChar))) ) { const lLength = match[0].length - 1 let rDelim, rLength, delimTotal = lLength, midDelimTotal = 0 const endReg = match[0][0] === "*" ? this.rules.inline.emStrong.rDelimAst : this.rules.inline.emStrong.rDelimUnd endReg.lastIndex = 0 // Clip maskedSrc to same section of string as src (move to lexer?) maskedSrc = maskedSrc.slice(-1 * src.length + lLength) while ((match = endReg.exec(maskedSrc)) != null) { rDelim = match[1] || match[2] || match[3] || match[4] || match[5] || match[6] if (!rDelim) continue // skip single * in __abc*abc__ rLength = rDelim.length if (match[3] || match[4]) { // found another Left Delim delimTotal += rLength continue } else if (match[5] || match[6]) { // either Left or Right Delim if (lLength % 3 && !((lLength + rLength) % 3)) { midDelimTotal += rLength continue // CommonMark Emphasis Rules 9-10 } } delimTotal -= rLength if (delimTotal > 0) continue // Haven't found enough closing delimiters // Remove extra characters. *a*** -> *a* rLength = Math.min(rLength, rLength + delimTotal + midDelimTotal) // Create `em` if smallest delimiter has odd char count. *a*** if (Math.min(lLength, rLength) % 2) { const text = src.slice(1, lLength + match.index + rLength) return { type: "em", raw: src.slice(0, lLength + match.index + rLength + 1), text, tokens: this.lexer.inlineTokens(text, []), } } // Create 'strong' if smallest delimiter has even char count. **a*** const text = src.slice(2, lLength + match.index + rLength - 1) return { type: "strong", raw: src.slice(0, lLength + match.index + rLength + 1), text, tokens: this.lexer.inlineTokens(text, []), } } } } codespan(src) { const cap = this.rules.inline.code.exec(src) if (cap) { let text = cap[2].replace(/\n/g, " ") const hasNonSpaceChars = /[^ ]/.test(text) const hasSpaceCharsOnBothEnds = /^ /.test(text) && / $/.test(text) if (hasNonSpaceChars && hasSpaceCharsOnBothEnds) { text = text.substring(1, text.length - 1) } text = escape(text, true) return { type: "codespan", raw: cap[0], text, } } } br(src) { const cap = this.rules.inline.br.exec(src) if (cap) { return { type: "br", raw: cap[0], } } } del(src) { const cap = this.rules.inline.del.exec(src) if (cap) { return { type: "del", raw: cap[0], text: cap[2], tokens: this.lexer.inlineTokens(cap[2], []), } } } autolink(src, mangle) { const cap = this.rules.inline.autolink.exec(src) if (cap) { let text, href if (cap[2] === "@") { text = escape(this.options.mangle ? mangle(cap[1]) : cap[1]) href = "mailto:" + text } else { text = escape(cap[1]) href = text } return { type: "link", raw: cap[0], text, href, tokens: [ { type: "text", raw: text, text, }, ], } } } url(src, mangle) { let cap if ((cap = this.rules.inline.url.exec(src))) { let text, href if (cap[2] === "@") { text = escape(this.options.mangle ? mangle(cap[0]) : cap[0]) href = "mailto:" + text } else { // do extended autolink path validation let prevCapZero do { prevCapZero = cap[0] cap[0] = this.rules.inline._backpedal.exec(cap[0])[0] } while (prevCapZero !== cap[0]) text = escape(cap[0]) if (cap[1] === "www.") { href = "http://" + text } else { href = text } } return { type: "link", raw: cap[0], text, href, tokens: [ { type: "text", raw: text, text, }, ], } } } inlineText(src, smartypants) { const cap = this.rules.inline.text.exec(src) if (cap) { let text if (this.lexer.state.inRawBlock) { text = this.options.sanitize ? this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape(cap[0]) : cap[0] } else { text = escape(this.options.smartypants ? smartypants(cap[0]) : cap[0]) } return { type: "text", raw: cap[0], text, } } } } /** * Block-Level Grammar */ const block = { newline: /^(?: *(?:\n|$))+/, code: /^( {4}[^\n]+(?:\n(?: *(?:\n|$))*)?)+/, fences: /^ {0,3}(`{3,}(?=[^`\n]*\n)|~{3,})([^\n]*)\n(?:|([\s\S]*?)\n)(?: {0,3}\1[~`]* *(?=\n|$)|$)/, hr: /^ {0,3}((?:-[\t ]*){3,}|(?:_[ \t]*){3,}|(?:\*[ \t]*){3,})(?:\n+|$)/, heading: /^ {0,3}(#{1,6})(?=\s|$)(.*)(?:\n+|$)/, blockquote: /^( {0,3}> ?(paragraph|[^\n]*)(?:\n|$))+/, list: /^( {0,3}bull)([ \t][^\n]+?)?(?:\n|$)/, html: "^ {0,3}(?:" + // optional indentation "<(script|pre|style|textarea)[\\s>][\\s\\S]*?(?:</\\1>[^\\n]*\\n+|$)" + // (1) "|comment[^\\n]*(\\n+|$)" + // (2) "|<\\?[\\s\\S]*?(?:\\?>\\n*|$)" + // (3) "|<![A-Z][\\s\\S]*?(?:>\\n*|$)" + // (4) "|<!\\[CDATA\\[[\\s\\S]*?(?:\\]\\]>\\n*|$)" + // (5) "|</?(tag)(?: +|\\n|/?>)[\\s\\S]*?(?:(?:\\n *)+\\n|$)" + // (6) "|<(?!script|pre|style|textarea)([a-z][\\w-]*)(?:attribute)*? */?>(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:(?:\\n *)+\\n|$)" + // (7) open tag "|</(?!script|pre|style|textarea)[a-z][\\w-]*\\s*>(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:(?:\\n *)+\\n|$)" + // (7) closing tag ")", def: /^ {0,3}\[(label)\]: *(?:\n *)?<?([^\s>]+)>?(?:(?: +(?:\n *)?| *\n *)(title))? *(?:\n+|$)/, table: noopTest, lheading: /^([^\n]+)\n {0,3}(=+|-+) *(?:\n+|$)/, // regex template, placeholders will be replaced according to different paragraph // interruption rules of commonmark and the original markdown spec: _paragraph: /^([^\n]+(?:\n(?!hr|heading|lheading|blockquote|fences|list|html|table| +\n)[^\n]+)*)/, text: /^[^\n]+/, } block._label = /(?!\s*\])(?:\\.|[^\[\]\\])+/ block._title = /(?:"(?:\\"?|[^"\\])*"|'[^'\n]*(?:\n[^'\n]+)*\n?'|\([^()]*\))/ block.def = edit(block.def) .replace("label", block._label) .replace("title", block._title) .getRegex() block.bullet = /(?:[*+-]|\d{1,9}[.)])/ block.listItemStart = edit(/^( *)(bull) */) .replace("bull", block.bullet) .getRegex() block.list = edit(block.list) .replace(/bull/g, block.bullet) .replace( "hr", "\\n+(?=\\1?(?:(?:- *){3,}|(?:_ *){3,}|(?:\\* *){3,})(?:\\n+|$))" ) .replace("def", "\\n+(?=" + block.def.source + ")") .getRegex() block._tag = "address|article|aside|base|basefont|blockquote|body|caption" + "|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption" + "|figure|footer|form|frame|frameset|h[1-6]|head|header|hr|html|iframe" + "|legend|li|link|main|menu|menuitem|meta|nav|noframes|ol|optgroup|option" + "|p|param|section|source|summary|table|tbody|td|tfoot|th|thead|title|tr" + "|track|ul" block._comment = /<!--(?!-?>)[\s\S]*?(?:-->|$)/ block.html = edit(block.html, "i") .replace("comment", block._comment) .replace("tag", block._tag) .replace( "attribute", / +[a-zA-Z:_][\w.:-]*(?: *= *"[^"\n]*"| *= *'[^'\n]*'| *= *[^\s"'=<>`]+)?/ ) .getRegex() block.paragraph = edit(block._paragraph) .replace("hr", block.hr) .replace("heading", " {0,3}#{1,6} ") .replace("|lheading", "") // setex headings don't interrupt commonmark paragraphs .replace("|table", "") .replace("blockquote", " {0,3}>") .replace("fences", " {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n") .replace("list", " {0,3}(?:[*+-]|1[.)]) ") // only lists starting from 1 can interrupt .replace( "html", "</?(?:tag)(?: +|\\n|/?>)|<(?:script|pre|style|textarea|!--)" ) .replace("tag", block._tag) // pars can be interrupted by type (6) html blocks .getRegex() block.blockquote = edit(block.blockquote) .replace("paragraph", block.paragraph) .getRegex() /** * Normal Block Grammar */ block.normal = merge({}, block) /** * GFM Block Grammar */ block.gfm = merge({}, block.normal, { table: "^ *([^\\n ].*\\|.*)\\n" + // Header " {0,3}(?:\\| *)?(:?-+:? *(?:\\| *:?-+:? *)*)(?:\\| *)?" + // Align "(?:\\n((?:(?! *\\n|hr|heading|blockquote|code|fences|list|html).*(?:\\n|$))*)\\n*|$)", // Cells }) block.gfm.table = edit(block.gfm.table) .replace("hr", block.hr) .replace("heading", " {0,3}#{1,6} ") .replace("blockquote", " {0,3}>") .replace("code", " {4}[^\\n]") .replace("fences", " {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n") .replace("list", " {0,3}(?:[*+-]|1[.)]) ") // only lists starting from 1 can interrupt .replace( "html", "</?(?:tag)(?: +|\\n|/?>)|<(?:script|pre|style|textarea|!--)" ) .replace("tag", block._tag) // tables can be interrupted by type (6) html blocks .getRegex() block.gfm.paragraph = edit(block._paragraph) .replace("hr", block.hr) .replace("heading", " {0,3}#{1,6} ") .replace("|lheading", "") // setex headings don't interrupt commonmark paragraphs .replace("table", block.gfm.table) // interrupt paragraphs with table .replace("blockquote", " {0,3}>") .replace("fences", " {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n") .replace("list", " {0,3}(?:[*+-]|1[.)]) ") // only lists starting from 1 can interrupt .replace( "html", "</?(?:tag)(?: +|\\n|/?>)|<(?:script|pre|style|textarea|!--)" ) .replace("tag", block._tag) // pars can be interrupted by type (6) html blocks .getRegex() /** * Pedantic grammar (original John Gruber's loose markdown specification) */ block.pedantic = merge({}, block.normal, { html: edit( "^ *(?:comment *(?:\\n|\\s*$)" + "|<(tag)[\\s\\S]+?</\\1> *(?:\\n{2,}|\\s*$)" + // closed tag "|<tag(?:\"[^\"]*\"|'[^']*'|\\s[^'\"/>\\s]*)*?/?> *(?:\\n{2,}|\\s*$))" ) .replace("comment", block._comment) .replace( /tag/g, "(?!(?:" + "a|em|strong|small|s|cite|q|dfn|abbr|data|time|code|var|samp|kbd|sub" + "|sup|i|b|u|mark|ruby|rt|rp|bdi|bdo|span|br|wbr|ins|del|img)" + "\\b)\\w+(?!:|[^\\w\\s@]*@)\\b" ) .getRegex(), def: /^ *\[([^\]]+)\]: *<?([^\s>]+)>?(?: +(["(][^\n]+[")]))? *(?:\n+|$)/, heading: /^(#{1,6})(.*)(?:\n+|$)/, fences: noopTest, // fences not supported paragraph: edit(block.normal._paragraph) .replace("hr", block.hr) .replace("heading", " *#{1,6} *[^\n]") .replace("lheading", block.lheading) .replace("blockquote", " {0,3}>") .replace("|fences", "") .replace("|list", "") .replace("|html", "") .getRegex(), }) /** * Inline-Level Grammar */ const inline = { escape: /^\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/, autolink: /^<(scheme:[^\s\x00-\x1f<>]*|email)>/, url: noopTest, tag: "^comment" + "|^</[a-zA-Z][\\w:-]*\\s*>" + // self-closing tag "|^<[a-zA-Z][\\w-]*(?:attribute)*?\\s*/?>" + // open tag "|^<\\?[\\s\\S]*?\\?>" + // processing instruction, e.g. <?php ?> "|^<![a-zA-Z]+\\s[\\s\\S]*?>" + // declaration, e.g. <!DOCTYPE html> "|^<!\\[CDATA\\[[\\s\\S]*?\\]\\]>", // CDATA section link: /^!?\[(label)\]\(\s*(href)(?:\s+(title))?\s*\)/, reflink: /^!?\[(label)\]\[(ref)\]/, nolink: /^!?\[(ref)\](?:\[\])?/, reflinkSearch: "reflink|nolink(?!\\()", emStrong: { lDelim: /^(?:\*+(?:([punct_])|[^\s*]))|^_+(?:([punct*])|([^\s_]))/, // (1) and (2) can only be a Right Delimiter. (3) and (4) can only be Left. (5) and (6) can be either Left or Right. // () Skip orphan inside strong () Consume to delim (1) #*** (2) a***#, a*** (3) #***a, ***a (4) ***# (5) #***# (6) a***a rDelimAst: /^[^_*]*?\_\_[^_*]*?\*[^_*]*?(?=\_\_)|[^*]+(?=[^*])|[punct_](\*+)(?=[\s]|$)|[^punct*_\s](\*+)(?=[punct_\s]|$)|[punct_\s](\*+)(?=[^punct*_\s])|[\s](\*+)(?=[punct_])|[punct_](\*+)(?=[punct_])|[^punct*_\s](\*+)(?=[^punct*_\s])/, rDelimUnd: /^[^_*]*?\*\*[^_*]*?\_[^_*]*?(?=\*\*)|[^_]+(?=[^_])|[punct*](\_+)(?=[\s]|$)|[^punct*_\s](\_+)(?=[punct*\s]|$)|[punct*\s](\_+)(?=[^punct*_\s])|[\s](\_+)(?=[punct*])|[punct*](\_+)(?=[punct*])/, // ^- Not allowed for _ }, code: /^(`+)([^`]|[^`][\s\S]*?[^`])\1(?!`)/, br: /^( {2,}|\\)\n(?!\s*$)/, del: noopTest, text: /^(`+|[^`])(?:(?= {2,}\n)|[\s\S]*?(?:(?=[\\<!\[`*_]|\b_|$)|[^ ](?= {2,}\n)))/, punctuation: /^([\spunctuation])/, } // list of punctuation marks from CommonMark spec // without * and _ to handle the different emphasis markers * and _ inline._punctuation = "!\"#$%&'()+\\-.,/:;<=>?@\\[\\]`^{|}~" inline.punctuation = edit(inline.punctuation) .replace(/punctuation/g, inline._punctuation) .getRegex() // sequences em should skip over [title](link), `code`, <html> inline.blockSkip = /\[[^\]]*?\]\([^\)]*?\)|`[^`]*?`|<[^>]*?>/g inline.escapedEmSt = /\\\*|\\_/g inline._comment = edit(block._comment).replace("(?:-->|$)", "-->").getRegex() inline.emStrong.lDelim = edit(inline.emStrong.lDelim) .replace(/punct/g, inline._punctuation) .getRegex() inline.emStrong.rDelimAst = edit(inline.emStrong.rDelimAst, "g") .replace(/punct/g, inline._punctuation) .getRegex() inline.emStrong.rDelimUnd = edit(inline.emStrong.rDelimUnd, "g") .replace(/punct/g, inline._punctuation) .getRegex() inline._escapes = /\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/g inline._scheme = /[a-zA-Z][a-zA-Z0-9+.-]{1,31}/ inline._email = /[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+(@)[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+(?![-_])/ inline.autolink = edit(inline.autolink) .replace("scheme", inline._scheme) .replace("email", inline._email) .getRegex() inline._attribute = /\s+[a-zA-Z:_][\w.:-]*(?:\s*=\s*"[^"]*"|\s*=\s*'[^']*'|\s*=\s*[^\s"'=<>`]+)?/ inline.tag = edit(inline.tag) .replace("comment", inline._comment) .replace("attribute", inline._attribute) .getRegex() inline._label = /(?:\[(?:\\.|[^\[\]\\])*\]|\\.|`[^`]*`|[^\[\]\\`])*?/ inline._href = /<(?:\\.|[^\n<>\\])+>|[^\s\x00-\x1f]*/ inline._title = /"(?:\\"?|[^"\\])*"|'(?:\\'?|[^'\\])*'|\((?:\\\)?|[^)\\])*\)/ inline.link = edit(inline.link) .replace("label", inline._label) .replace("href", inline._href) .replace("title", inline._title) .getRegex() inline.reflink = edit(inline.reflink) .replace("label", inline._label) .replace("ref", block._label) .getRegex() inline.nolink = edit(inline.nolink).replace("ref", block._label).getRegex() inline.reflinkSearch = edit(inline.reflinkSearch, "g") .replace("reflink", inline.reflink) .replace("nolink", inline.nolink) .getRegex() /** * Normal Inline Grammar */ inline.normal = merge({}, inline) /** * Pedantic Inline Grammar */ inline.pedantic = merge({}, inline.normal, { strong: { start: /^__|\*\*/, middle: /^__(?=\S)([\s\S]*?\S)__(?!_)|^\*\*(?=\S)([\s\S]*?\S)\*\*(?!\*)/, endAst: /\*\*(?!\*)/g, endUnd: /__(?!_)/g, }, em: { start: /^_|\*/, middle: /^()\*(?=\S)([\s\S]*?\S)\*(?!\*)|^_(?=\S)([\s\S]*?\S)_(?!_)/, endAst: /\*(?!\*)/g, endUnd: /_(?!_)/g, }, link: edit(/^!?\[(label)\]\((.*?)\)/) .replace("label", inline._label) .getRegex(), reflink: edit(/^!?\[(label)\]\s*\[([^\]]*)\]/) .replace("label", inline._label) .getRegex(), }) /** * GFM Inline Grammar */ inline.gfm = merge({}, inline.normal, { escape: edit(inline.escape).replace("])", "~|])").getRegex(), _extended_email: /[A-Za-z0-9._+-]+(@)[a-zA-Z0-9-_]+(?:\.[a-zA-Z0-9-_]*[a-zA-Z0-9])+(?![-_])/, url: /^((?:ftp|https?):\/\/|www\.)(?:[a-zA-Z0-9\-]+\.?)+[^\s<]*|^email/, _backpedal: /(?:[^?!.,:;*_~()&]+|\([^)]*\)|&(?![a-zA-Z0-9]+;$)|[?!.,:;*_~)]+(?!$))+/, del: /^(~~?)(?=[^\s~])([\s\S]*?[^\s~])\1(?=[^~]|$)/, text: /^([`~]+|[^`~])(?:(?= {2,}\n)|(?=[a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-]+@)|[\s\S]*?(?:(?=[\\<!\[`*~_]|\b_|https?:\/\/|ftp:\/\/|www\.|$)|[^ ](?= {2,}\n)|[^a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-](?=[a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-]+@)))/, }) inline.gfm.url = edit(inline.gfm.url, "i") .replace("email", inline.gfm._extended_email) .getRegex() /** * GFM + Line Breaks Inline Grammar */ inline.breaks = merge({}, inline.gfm, { br: edit(inline.br).replace("{2,}", "*").getRegex(), text: edit(inline.gfm.text) .replace("\\b_", "\\b_| {2,}\\n") .replace(/\{2,\}/g, "*") .getRegex(), }) /** * smartypants text replacement * @param {string} text */ function smartypants(text) { return ( text // em-dashes .replace(/---/g, "\u2014") // en-dashes .replace(/--/g, "\u2013") // opening singles .replace(/(^|[-\u2014/(\[{"\s])'/g, "$1\u2018") // closing singles & apostrophes .replace(/'/g, "\u2019") // opening doubles .replace(/(^|[-\u2014/(\[{\u2018\s])"/g, "$1\u201c") // closing doubles .replace(/"/g, "\u201d") // ellipses .replace(/\.{3}/g, "\u2026") ) } /** * mangle email addresses * @param {string} text */ function mangle(text) { let out = "", i, ch const l = text.length for (i = 0; i < l; i++) { ch = text.charCodeAt(i) if (Math.random() > 0.5) { ch = "x" + ch.toString(16) } out += "&#" + ch + ";" } return out } /** * Block Lexer */ class Lexer { constructor(options) { this.tokens = [] this.tokens.links = Object.create(null) this.options = options || defaults this.options.tokenizer = this.options.tokenizer || new Tokenizer() this.tokenizer = this.options.tokenizer this.tokenizer.options = this.options this.tokenizer.lexer = this this.inlineQueue = [] this.state = { inLink: false, inRawBlock: false, top: true, } const rules = { block: block.normal, inline: inline.normal, } if (this.options.pedantic) { rules.block = block.pedantic rules.inline = inline.pedantic } else if (this.options.gfm) { rules.block = block.gfm if (this.options.breaks) { rules.inline = inline.breaks } else { rules.inline = inline.gfm } } this.tokenizer.rules = rules } /** * Expose Rules */ static get rules() { return { block, inline, } } /** * Static Lex Method */ static lex(src, options) { const lexer = new Lexer(options) return lexer.lex(src) } /** * Static Lex Inline Method */ static lexInline(src, options) { const lexer = new Lexer(options) return lexer.inlineTokens(src) } /** * Preprocessing */ lex(src) { src = src.replace(/\r\n|\r/g, "\n") this.blockTokens(src, this.tokens) let next while ((next = this.inlineQueue.shift())) { this.inlineTokens(next.src, next.tokens) } return this.tokens } /** * Lexing */ blockTokens(src, tokens = []) { if (this.options.pedantic) { src = src.replace(/\t/g, " ").replace(/^ +$/gm, "") } else { src = src.replace(/^( *)(\t+)/gm, (_, leading, tabs) => { return leading + " ".repeat(tabs.length) }) } let token, lastToken, cutSrc, lastParagraphClipped while (src) { if ( this.options.extensions && this.options.extensions.block && this.options.extensions.block.some(extTokenizer => { if ((token = extTokenizer.call({ lexer: this }, src, tokens))) { src = src.substring(token.raw.length) tokens.push(token) return true } return false }) ) { continue } // newline if ((token = this.tokenizer.space(src))) { src = src.substring(token.raw.length) if (token.raw.length === 1 && tokens.length > 0) { // if there's a single \n as a spacer, it's terminating the last line, // so move it there so that we don't get unecessary paragraph tags tokens[tokens.length - 1].raw += "\n" } else { tokens.push(token) } continue } // code if ((token = this.tokenizer.code(src))) { src = src.substring(token.raw.length) lastToken = tokens[tokens.length - 1] // An indented code block cannot interrupt a paragraph. if ( lastToken && (lastToken.type === "paragraph" || lastToken.type === "text") ) { lastToken.raw += "\n" + token.raw lastToken.text += "\n" + token.text this.inlineQueue[this.inlineQueue.length - 1].src = lastToken.text } else { tokens.push(token) } continue } // fences if ((token = this.tokenizer.fences(src))) { src = src.substring(token.raw.length) tokens.push(token) continue } // heading if ((token = this.tokenizer.heading(src))) { src = src.substring(token.raw.length) tokens.push(token) continue } // hr if ((token = this.tokenizer.hr(src))) { src = src.substring(token.raw.length) tokens.push(token) continue } // blockquote if ((token = this.tokenizer.blockquote(src))) { src = src.substring(token.raw.length) tokens.push(token) continue } // list if ((token = this.tokenizer.list(src))) { src = src.substring(token.raw.length) tokens.push(token) continue } // html if ((token = this.tokenizer.html(src))) { src = src.substring(token.raw.length) tokens.push(token) continue } // def if ((token = this.tokenizer.def(src))) { src = src.substring(token.raw.length) lastToken = tokens[tokens.length - 1] if ( lastToken && (lastToken.type === "paragraph" || lastToken.type === "text") ) { lastToken.raw += "\n" + token.raw lastToken.text += "\n" + token.raw this.inlineQueue[this.inlineQueue.length - 1].src = lastToken.text } else if (!this.tokens.links[token.tag]) { this.tokens.links[token.tag] = { href: token.href, title: token.title, } } continue } // table (gfm) if ((token = this.tokenizer.table(src))) { src = src.substring(token.raw.length) tokens.push(token) continue } // lheading if ((token = this.tokenizer.lheading(src))) { src = src.substring(token.raw.length) tokens.push(token) continue } // top-level paragraph // prevent paragraph consuming extensions by clipping 'src' to extension start cutSrc = src if (this.options.extensions && this.options.extensions.startBlock) { let startIndex = Infinity const tempSrc = src.slice(1) let tempStart this.options.extensions.startBlock.forEach(function (getStartIndex) { tempStart = getStartIndex.call({ lexer: this }, tempSrc) if (typeof tempStart === "number" && tempStart >= 0) { startIndex = Math.min(startIndex, tempStart) } }) if (startIndex < Infinity && startIndex >= 0) { cutSrc = src.substring(0, startIndex + 1) } } if (this.state.top && (token = this.tokenizer.paragraph(cutSrc))) { lastToken = tokens[tokens.length - 1] if (lastParagraphClipped && lastToken.type === "paragraph") { lastToken.raw += "\n" + token.raw lastToken.text += "\n" + token.text this.inlineQueue.pop() this.inlineQueue[this.inlineQueue.length - 1].src = lastToken.text } else { tokens.push(token) } lastParagraphClipped = cutSrc.length !== src.length src = src.substring(token.raw.length) continue } // text if ((token = this.tokenizer.text(src))) { src = src.substring(token.raw.length) lastToken = tokens[tokens.length - 1] if (lastToken && lastToken.type === "text") { lastToken.raw += "\n" + token.raw lastToken.text += "\n" + token.text this.inlineQueue.pop() this.inlineQueue[this.inlineQueue.length - 1].src = lastToken.text } else { tokens.push(token) } continue } if (src) { const errMsg = "Infinite loop on byte: " + src.charCodeAt(0) if (this.options.silent) { console.error(errMsg) break } else { throw new Error(errMsg) } } } this.state.top = true return tokens } inline(src, tokens) { this.inlineQueue.push({ src, tokens }) } /** * Lexing/Compiling */ inlineTokens(src, tokens = []) { let token, lastToken, cutSrc // String with links masked to avoid interference with em and strong let maskedSrc = src let match let keepPrevChar, prevChar // Mask out reflinks if (this.tokens.links) { const links = Object.keys(this.tokens.links) if (links.length > 0) { while ( (match = this.tokenizer.rules.inline.reflinkSearch.exec(maskedSrc)) != null ) { if ( links.includes(match[0].slice(match[0].lastIndexOf("[") + 1, -1)) ) { maskedSrc = maskedSrc.slice(0, match.index) + "[" + repeatString("a", match[0].length - 2) + "]" + maskedSrc.slice( this.tokenizer.rules.inline.reflinkSearch.lastIndex ) } } } } // Mask out other blocks while ( (match = this.tokenizer.rules.inline.blockSkip.exec(maskedSrc)) != null ) { mas