UNPKG

markdown-it

Version:

Markdown-it - modern pluggable markdown parser.

1,835 lines (1,708 loc) 163 kB
'use strict'; var mdurl = require('mdurl'); var ucmicro = require('uc.micro'); var entities = require('entities'); var LinkifyIt = require('linkify-it'); var punycode = require('punycode.js'); function _interopNamespaceDefault(e) { var n = Object.create(null); if (e) { Object.keys(e).forEach(function (k) { if (k !== 'default') { var d = Object.getOwnPropertyDescriptor(e, k); Object.defineProperty(n, k, d.get ? d : { enumerable: true, get: function () { return e[k]; } }); } }); } n.default = e; return Object.freeze(n); } var mdurl__namespace = /*#__PURE__*/_interopNamespaceDefault(mdurl); var ucmicro__namespace = /*#__PURE__*/_interopNamespaceDefault(ucmicro); // Utilities // function _class(obj) { return Object.prototype.toString.call(obj); } function isString(obj) { return _class(obj) === '[object String]'; } const _hasOwnProperty = Object.prototype.hasOwnProperty; function has(object, key) { return _hasOwnProperty.call(object, key); } // Merge objects // function assign(obj /* from1, from2, from3, ... */) { const sources = Array.prototype.slice.call(arguments, 1); sources.forEach(function (source) { if (!source) { return; } if (typeof source !== 'object') { throw new TypeError(source + 'must be object'); } Object.keys(source).forEach(function (key) { obj[key] = source[key]; }); }); return obj; } // Remove element from array and put another array at those position. // Useful for some operations with tokens function arrayReplaceAt(src, pos, newElements) { return [].concat(src.slice(0, pos), newElements, src.slice(pos + 1)); } function isValidEntityCode(c) { /* eslint no-bitwise:0 */ // broken sequence if (c >= 0xD800 && c <= 0xDFFF) { return false; } // never used if (c >= 0xFDD0 && c <= 0xFDEF) { return false; } if ((c & 0xFFFF) === 0xFFFF || (c & 0xFFFF) === 0xFFFE) { return false; } // control codes if (c >= 0x00 && c <= 0x08) { return false; } if (c === 0x0B) { return false; } if (c >= 0x0E && c <= 0x1F) { return false; } if (c >= 0x7F && c <= 0x9F) { return false; } // out of range if (c > 0x10FFFF) { return false; } return true; } function fromCodePoint(c) { /* eslint no-bitwise:0 */ if (c > 0xffff) { c -= 0x10000; const surrogate1 = 0xd800 + (c >> 10); const surrogate2 = 0xdc00 + (c & 0x3ff); return String.fromCharCode(surrogate1, surrogate2); } return String.fromCharCode(c); } const UNESCAPE_MD_RE = /\\([!"#$%&'()*+,\-./:;<=>?@[\\\]^_`{|}~])/g; const ENTITY_RE = /&([a-z#][a-z0-9]{1,31});/gi; const UNESCAPE_ALL_RE = new RegExp(UNESCAPE_MD_RE.source + '|' + ENTITY_RE.source, 'gi'); const DIGITAL_ENTITY_TEST_RE = /^#((?:x[a-f0-9]{1,8}|[0-9]{1,8}))$/i; function replaceEntityPattern(match, name) { if (name.charCodeAt(0) === 0x23 /* # */ && DIGITAL_ENTITY_TEST_RE.test(name)) { const code = name[1].toLowerCase() === 'x' ? parseInt(name.slice(2), 16) : parseInt(name.slice(1), 10); if (isValidEntityCode(code)) { return fromCodePoint(code); } return match; } const decoded = entities.decodeHTML(match); if (decoded !== match) { return decoded; } return match; } /* function replaceEntities(str) { if (str.indexOf('&') < 0) { return str; } return str.replace(ENTITY_RE, replaceEntityPattern); } */ function unescapeMd(str) { if (str.indexOf('\\') < 0) { return str; } return str.replace(UNESCAPE_MD_RE, '$1'); } function unescapeAll(str) { if (str.indexOf('\\') < 0 && str.indexOf('&') < 0) { return str; } return str.replace(UNESCAPE_ALL_RE, function (match, escaped, entity) { if (escaped) { return escaped; } return replaceEntityPattern(match, entity); }); } const HTML_ESCAPE_TEST_RE = /[&<>"]/; const HTML_ESCAPE_REPLACE_RE = /[&<>"]/g; const HTML_REPLACEMENTS = { '&': '&amp;', '<': '&lt;', '>': '&gt;', '"': '&quot;' }; function replaceUnsafeChar(ch) { return HTML_REPLACEMENTS[ch]; } function escapeHtml(str) { if (HTML_ESCAPE_TEST_RE.test(str)) { return str.replace(HTML_ESCAPE_REPLACE_RE, replaceUnsafeChar); } return str; } const REGEXP_ESCAPE_RE = /[.?*+^$[\]\\(){}|-]/g; function escapeRE(str) { return str.replace(REGEXP_ESCAPE_RE, '\\$&'); } function isSpace(code) { switch (code) { case 0x09: case 0x20: return true; } return false; } // Zs (unicode class) || [\t\f\v\r\n] function isWhiteSpace(code) { if (code >= 0x2000 && code <= 0x200A) { return true; } switch (code) { case 0x09: // \t case 0x0A: // \n case 0x0B: // \v case 0x0C: // \f case 0x0D: // \r case 0x20: case 0xA0: case 0x1680: case 0x202F: case 0x205F: case 0x3000: return true; } return false; } /* eslint-disable max-len */ // Currently without astral characters support. function isPunctChar(ch) { return ucmicro__namespace.P.test(ch) || ucmicro__namespace.S.test(ch); } // Markdown ASCII punctuation characters. // // !, ", #, $, %, &, ', (, ), *, +, ,, -, ., /, :, ;, <, =, >, ?, @, [, \, ], ^, _, `, {, |, }, or ~ // http://spec.commonmark.org/0.15/#ascii-punctuation-character // // Don't confuse with unicode punctuation !!! It lacks some chars in ascii range. // function isMdAsciiPunct(ch) { switch (ch) { case 0x21 /* ! */: case 0x22 /* " */: case 0x23 /* # */: case 0x24 /* $ */: case 0x25 /* % */: case 0x26 /* & */: case 0x27 /* ' */: case 0x28 /* ( */: case 0x29 /* ) */: case 0x2A /* * */: case 0x2B /* + */: case 0x2C /* , */: case 0x2D /* - */: case 0x2E /* . */: case 0x2F /* / */: case 0x3A /* : */: case 0x3B /* ; */: case 0x3C /* < */: case 0x3D /* = */: case 0x3E /* > */: case 0x3F /* ? */: case 0x40 /* @ */: case 0x5B /* [ */: case 0x5C /* \ */: case 0x5D /* ] */: case 0x5E /* ^ */: case 0x5F /* _ */: case 0x60 /* ` */: case 0x7B /* { */: case 0x7C /* | */: case 0x7D /* } */: case 0x7E /* ~ */: return true; default: return false; } } // Hepler to unify [reference labels]. // function normalizeReference(str) { // Trim and collapse whitespace // str = str.trim().replace(/\s+/g, ' '); // In node v10 'ẞ'.toLowerCase() === 'Ṿ', which is presumed to be a bug // fixed in v12 (couldn't find any details). // // So treat this one as a special case // (remove this when node v10 is no longer supported). // if ('ẞ'.toLowerCase() === 'Ṿ') { str = str.replace(/ẞ/g, 'ß'); } // .toLowerCase().toUpperCase() should get rid of all differences // between letter variants. // // Simple .toLowerCase() doesn't normalize 125 code points correctly, // and .toUpperCase doesn't normalize 6 of them (list of exceptions: // İ, ϴ, ẞ, Ω, K, Å - those are already uppercased, but have differently // uppercased versions). // // Here's an example showing how it happens. Lets take greek letter omega: // uppercase U+0398 (Θ), U+03f4 (ϴ) and lowercase U+03b8 (θ), U+03d1 (ϑ) // // Unicode entries: // 0398;GREEK CAPITAL LETTER THETA;Lu;0;L;;;;;N;;;;03B8; // 03B8;GREEK SMALL LETTER THETA;Ll;0;L;;;;;N;;;0398;;0398 // 03D1;GREEK THETA SYMBOL;Ll;0;L;<compat> 03B8;;;;N;GREEK SMALL LETTER SCRIPT THETA;;0398;;0398 // 03F4;GREEK CAPITAL THETA SYMBOL;Lu;0;L;<compat> 0398;;;;N;;;;03B8; // // Case-insensitive comparison should treat all of them as equivalent. // // But .toLowerCase() doesn't change ϑ (it's already lowercase), // and .toUpperCase() doesn't change ϴ (already uppercase). // // Applying first lower then upper case normalizes any character: // '\u0398\u03f4\u03b8\u03d1'.toLowerCase().toUpperCase() === '\u0398\u0398\u0398\u0398' // // Note: this is equivalent to unicode case folding; unicode normalization // is a different step that is not required here. // // Final result should be uppercased, because it's later stored in an object // (this avoid a conflict with Object.prototype members, // most notably, `__proto__`) // return str.toLowerCase().toUpperCase(); } // Re-export libraries commonly used in both markdown-it and its plugins, // so plugins won't have to depend on them explicitly, which reduces their // bundled size (e.g. a browser build). // const lib = { mdurl: mdurl__namespace, ucmicro: ucmicro__namespace }; var utils = /*#__PURE__*/Object.freeze({ __proto__: null, arrayReplaceAt: arrayReplaceAt, assign: assign, escapeHtml: escapeHtml, escapeRE: escapeRE, fromCodePoint: fromCodePoint, has: has, isMdAsciiPunct: isMdAsciiPunct, isPunctChar: isPunctChar, isSpace: isSpace, isString: isString, isValidEntityCode: isValidEntityCode, isWhiteSpace: isWhiteSpace, lib: lib, normalizeReference: normalizeReference, unescapeAll: unescapeAll, unescapeMd: unescapeMd }); // Parse link label // // this function assumes that first character ("[") already matches; // returns the end of the label // function parseLinkLabel(state, start, disableNested) { let level, found, marker, prevPos; const max = state.posMax; const oldPos = state.pos; state.pos = start + 1; level = 1; while (state.pos < max) { marker = state.src.charCodeAt(state.pos); if (marker === 0x5D /* ] */) { level--; if (level === 0) { found = true; break; } } prevPos = state.pos; state.md.inline.skipToken(state); if (marker === 0x5B /* [ */) { if (prevPos === state.pos - 1) { // increase level if we find text `[`, which is not a part of any token level++; } else if (disableNested) { state.pos = oldPos; return -1; } } } let labelEnd = -1; if (found) { labelEnd = state.pos; } // restore old state state.pos = oldPos; return labelEnd; } // Parse link destination // function parseLinkDestination(str, start, max) { let code; let pos = start; const result = { ok: false, pos: 0, str: '' }; if (str.charCodeAt(pos) === 0x3C /* < */) { pos++; while (pos < max) { code = str.charCodeAt(pos); if (code === 0x0A /* \n */) { return result; } if (code === 0x3C /* < */) { return result; } if (code === 0x3E /* > */) { result.pos = pos + 1; result.str = unescapeAll(str.slice(start + 1, pos)); result.ok = true; return result; } if (code === 0x5C /* \ */ && pos + 1 < max) { pos += 2; continue; } pos++; } // no closing '>' return result; } // this should be ... } else { ... branch let level = 0; while (pos < max) { code = str.charCodeAt(pos); if (code === 0x20) { break; } // ascii control characters if (code < 0x20 || code === 0x7F) { break; } if (code === 0x5C /* \ */ && pos + 1 < max) { if (str.charCodeAt(pos + 1) === 0x20) { break; } pos += 2; continue; } if (code === 0x28 /* ( */) { level++; if (level > 32) { return result; } } if (code === 0x29 /* ) */) { if (level === 0) { break; } level--; } pos++; } if (start === pos) { return result; } if (level !== 0) { return result; } result.str = unescapeAll(str.slice(start, pos)); result.pos = pos; result.ok = true; return result; } // Parse link title // // Parse link title within `str` in [start, max] range, // or continue previous parsing if `prev_state` is defined (equal to result of last execution). // function parseLinkTitle(str, start, max, prev_state) { let code; let pos = start; const state = { // if `true`, this is a valid link title ok: false, // if `true`, this link can be continued on the next line can_continue: false, // if `ok`, it's the position of the first character after the closing marker pos: 0, // if `ok`, it's the unescaped title str: '', // expected closing marker character code marker: 0 }; if (prev_state) { // this is a continuation of a previous parseLinkTitle call on the next line, // used in reference links only state.str = prev_state.str; state.marker = prev_state.marker; } else { if (pos >= max) { return state; } let marker = str.charCodeAt(pos); if (marker !== 0x22 /* " */ && marker !== 0x27 /* ' */ && marker !== 0x28 /* ( */) { return state; } start++; pos++; // if opening marker is "(", switch it to closing marker ")" if (marker === 0x28) { marker = 0x29; } state.marker = marker; } while (pos < max) { code = str.charCodeAt(pos); if (code === state.marker) { state.pos = pos + 1; state.str += unescapeAll(str.slice(start, pos)); state.ok = true; return state; } else if (code === 0x28 /* ( */ && state.marker === 0x29 /* ) */) { return state; } else if (code === 0x5C /* \ */ && pos + 1 < max) { pos++; } pos++; } // no closing marker found, but this link title may continue on the next line (for references) state.can_continue = true; state.str += unescapeAll(str.slice(start, pos)); return state; } // Just a shortcut for bulk export var helpers = /*#__PURE__*/Object.freeze({ __proto__: null, parseLinkDestination: parseLinkDestination, parseLinkLabel: parseLinkLabel, parseLinkTitle: parseLinkTitle }); /** * class Renderer * * Generates HTML from parsed token stream. Each instance has independent * copy of rules. Those can be rewritten with ease. Also, you can add new * rules if you create plugin and adds new token types. **/ const default_rules = {}; default_rules.code_inline = function (tokens, idx, options, env, slf) { const token = tokens[idx]; return '<code' + slf.renderAttrs(token) + '>' + escapeHtml(token.content) + '</code>'; }; default_rules.code_block = function (tokens, idx, options, env, slf) { const token = tokens[idx]; return '<pre' + slf.renderAttrs(token) + '><code>' + escapeHtml(tokens[idx].content) + '</code></pre>\n'; }; default_rules.fence = function (tokens, idx, options, env, slf) { const token = tokens[idx]; const info = token.info ? unescapeAll(token.info).trim() : ''; let langName = ''; let langAttrs = ''; if (info) { const arr = info.split(/(\s+)/g); langName = arr[0]; langAttrs = arr.slice(2).join(''); } let highlighted; if (options.highlight) { highlighted = options.highlight(token.content, langName, langAttrs) || escapeHtml(token.content); } else { highlighted = escapeHtml(token.content); } if (highlighted.indexOf('<pre') === 0) { return highlighted + '\n'; } // If language exists, inject class gently, without modifying original token. // May be, one day we will add .deepClone() for token and simplify this part, but // now we prefer to keep things local. if (info) { const i = token.attrIndex('class'); const tmpAttrs = token.attrs ? token.attrs.slice() : []; if (i < 0) { tmpAttrs.push(['class', options.langPrefix + langName]); } else { tmpAttrs[i] = tmpAttrs[i].slice(); tmpAttrs[i][1] += ' ' + options.langPrefix + langName; } // Fake token just to render attributes const tmpToken = { attrs: tmpAttrs }; return `<pre><code${slf.renderAttrs(tmpToken)}>${highlighted}</code></pre>\n`; } return `<pre><code${slf.renderAttrs(token)}>${highlighted}</code></pre>\n`; }; default_rules.image = function (tokens, idx, options, env, slf) { const token = tokens[idx]; // "alt" attr MUST be set, even if empty. Because it's mandatory and // should be placed on proper position for tests. // // Replace content with actual value token.attrs[token.attrIndex('alt')][1] = slf.renderInlineAsText(token.children, options, env); return slf.renderToken(tokens, idx, options); }; default_rules.hardbreak = function (tokens, idx, options /*, env */) { return options.xhtmlOut ? '<br />\n' : '<br>\n'; }; default_rules.softbreak = function (tokens, idx, options /*, env */) { return options.breaks ? options.xhtmlOut ? '<br />\n' : '<br>\n' : '\n'; }; default_rules.text = function (tokens, idx /*, options, env */) { return escapeHtml(tokens[idx].content); }; default_rules.html_block = function (tokens, idx /*, options, env */) { return tokens[idx].content; }; default_rules.html_inline = function (tokens, idx /*, options, env */) { return tokens[idx].content; }; /** * new Renderer() * * Creates new [[Renderer]] instance and fill [[Renderer#rules]] with defaults. **/ function Renderer() { /** * Renderer#rules -> Object * * Contains render rules for tokens. Can be updated and extended. * * ##### Example * * ```javascript * var md = require('markdown-it')(); * * md.renderer.rules.strong_open = function () { return '<b>'; }; * md.renderer.rules.strong_close = function () { return '</b>'; }; * * var result = md.renderInline(...); * ``` * * Each rule is called as independent static function with fixed signature: * * ```javascript * function my_token_render(tokens, idx, options, env, renderer) { * // ... * return renderedHTML; * } * ``` * * See [source code](https://github.com/markdown-it/markdown-it/blob/master/lib/renderer.mjs) * for more details and examples. **/ this.rules = assign({}, default_rules); } /** * Renderer.renderAttrs(token) -> String * * Render token attributes to string. **/ Renderer.prototype.renderAttrs = function renderAttrs(token) { let i, l, result; if (!token.attrs) { return ''; } result = ''; for (i = 0, l = token.attrs.length; i < l; i++) { result += ' ' + escapeHtml(token.attrs[i][0]) + '="' + escapeHtml(token.attrs[i][1]) + '"'; } return result; }; /** * Renderer.renderToken(tokens, idx, options) -> String * - tokens (Array): list of tokens * - idx (Numbed): token index to render * - options (Object): params of parser instance * * Default token renderer. Can be overriden by custom function * in [[Renderer#rules]]. **/ Renderer.prototype.renderToken = function renderToken(tokens, idx, options) { const token = tokens[idx]; let result = ''; // Tight list paragraphs if (token.hidden) { return ''; } // Insert a newline between hidden paragraph and subsequent opening // block-level tag. // // For example, here we should insert a newline before blockquote: // - a // > // if (token.block && token.nesting !== -1 && idx && tokens[idx - 1].hidden) { result += '\n'; } // Add token name, e.g. `<img` result += (token.nesting === -1 ? '</' : '<') + token.tag; // Encode attributes, e.g. `<img src="foo"` result += this.renderAttrs(token); // Add a slash for self-closing tags, e.g. `<img src="foo" /` if (token.nesting === 0 && options.xhtmlOut) { result += ' /'; } // Check if we need to add a newline after this tag let needLf = false; if (token.block) { needLf = true; if (token.nesting === 1) { if (idx + 1 < tokens.length) { const nextToken = tokens[idx + 1]; if (nextToken.type === 'inline' || nextToken.hidden) { // Block-level tag containing an inline tag. // needLf = false; } else if (nextToken.nesting === -1 && nextToken.tag === token.tag) { // Opening tag + closing tag of the same type. E.g. `<li></li>`. // needLf = false; } } } } result += needLf ? '>\n' : '>'; return result; }; /** * Renderer.renderInline(tokens, options, env) -> String * - tokens (Array): list on block tokens to render * - options (Object): params of parser instance * - env (Object): additional data from parsed input (references, for example) * * The same as [[Renderer.render]], but for single token of `inline` type. **/ Renderer.prototype.renderInline = function (tokens, options, env) { let result = ''; const rules = this.rules; for (let i = 0, len = tokens.length; i < len; i++) { const type = tokens[i].type; if (typeof rules[type] !== 'undefined') { result += rules[type](tokens, i, options, env, this); } else { result += this.renderToken(tokens, i, options); } } return result; }; /** internal * Renderer.renderInlineAsText(tokens, options, env) -> String * - tokens (Array): list on block tokens to render * - options (Object): params of parser instance * - env (Object): additional data from parsed input (references, for example) * * Special kludge for image `alt` attributes to conform CommonMark spec. * Don't try to use it! Spec requires to show `alt` content with stripped markup, * instead of simple escaping. **/ Renderer.prototype.renderInlineAsText = function (tokens, options, env) { let result = ''; for (let i = 0, len = tokens.length; i < len; i++) { switch (tokens[i].type) { case 'text': result += tokens[i].content; break; case 'image': result += this.renderInlineAsText(tokens[i].children, options, env); break; case 'html_inline': case 'html_block': result += tokens[i].content; break; case 'softbreak': case 'hardbreak': result += '\n'; break; // all other tokens are skipped } } return result; }; /** * Renderer.render(tokens, options, env) -> String * - tokens (Array): list on block tokens to render * - options (Object): params of parser instance * - env (Object): additional data from parsed input (references, for example) * * Takes token stream and generates HTML. Probably, you will never need to call * this method directly. **/ Renderer.prototype.render = function (tokens, options, env) { let result = ''; const rules = this.rules; for (let i = 0, len = tokens.length; i < len; i++) { const type = tokens[i].type; if (type === 'inline') { result += this.renderInline(tokens[i].children, options, env); } else if (typeof rules[type] !== 'undefined') { result += rules[type](tokens, i, options, env, this); } else { result += this.renderToken(tokens, i, options, env); } } return result; }; /** * class Ruler * * Helper class, used by [[MarkdownIt#core]], [[MarkdownIt#block]] and * [[MarkdownIt#inline]] to manage sequences of functions (rules): * * - keep rules in defined order * - assign the name to each rule * - enable/disable rules * - add/replace rules * - allow assign rules to additional named chains (in the same) * - cacheing lists of active rules * * You will not need use this class directly until write plugins. For simple * rules control use [[MarkdownIt.disable]], [[MarkdownIt.enable]] and * [[MarkdownIt.use]]. **/ /** * new Ruler() **/ function Ruler() { // List of added rules. Each element is: // // { // name: XXX, // enabled: Boolean, // fn: Function(), // alt: [ name2, name3 ] // } // this.__rules__ = []; // Cached rule chains. // // First level - chain name, '' for default. // Second level - diginal anchor for fast filtering by charcodes. // this.__cache__ = null; } // Helper methods, should not be used directly // Find rule index by name // Ruler.prototype.__find__ = function (name) { for (let i = 0; i < this.__rules__.length; i++) { if (this.__rules__[i].name === name) { return i; } } return -1; }; // Build rules lookup cache // Ruler.prototype.__compile__ = function () { const self = this; const chains = ['']; // collect unique names self.__rules__.forEach(function (rule) { if (!rule.enabled) { return; } rule.alt.forEach(function (altName) { if (chains.indexOf(altName) < 0) { chains.push(altName); } }); }); self.__cache__ = {}; chains.forEach(function (chain) { self.__cache__[chain] = []; self.__rules__.forEach(function (rule) { if (!rule.enabled) { return; } if (chain && rule.alt.indexOf(chain) < 0) { return; } self.__cache__[chain].push(rule.fn); }); }); }; /** * Ruler.at(name, fn [, options]) * - name (String): rule name to replace. * - fn (Function): new rule function. * - options (Object): new rule options (not mandatory). * * Replace rule by name with new function & options. Throws error if name not * found. * * ##### Options: * * - __alt__ - array with names of "alternate" chains. * * ##### Example * * Replace existing typographer replacement rule with new one: * * ```javascript * var md = require('markdown-it')(); * * md.core.ruler.at('replacements', function replace(state) { * //... * }); * ``` **/ Ruler.prototype.at = function (name, fn, options) { const index = this.__find__(name); const opt = options || {}; if (index === -1) { throw new Error('Parser rule not found: ' + name); } this.__rules__[index].fn = fn; this.__rules__[index].alt = opt.alt || []; this.__cache__ = null; }; /** * Ruler.before(beforeName, ruleName, fn [, options]) * - beforeName (String): new rule will be added before this one. * - ruleName (String): name of added rule. * - fn (Function): rule function. * - options (Object): rule options (not mandatory). * * Add new rule to chain before one with given name. See also * [[Ruler.after]], [[Ruler.push]]. * * ##### Options: * * - __alt__ - array with names of "alternate" chains. * * ##### Example * * ```javascript * var md = require('markdown-it')(); * * md.block.ruler.before('paragraph', 'my_rule', function replace(state) { * //... * }); * ``` **/ Ruler.prototype.before = function (beforeName, ruleName, fn, options) { const index = this.__find__(beforeName); const opt = options || {}; if (index === -1) { throw new Error('Parser rule not found: ' + beforeName); } this.__rules__.splice(index, 0, { name: ruleName, enabled: true, fn, alt: opt.alt || [] }); this.__cache__ = null; }; /** * Ruler.after(afterName, ruleName, fn [, options]) * - afterName (String): new rule will be added after this one. * - ruleName (String): name of added rule. * - fn (Function): rule function. * - options (Object): rule options (not mandatory). * * Add new rule to chain after one with given name. See also * [[Ruler.before]], [[Ruler.push]]. * * ##### Options: * * - __alt__ - array with names of "alternate" chains. * * ##### Example * * ```javascript * var md = require('markdown-it')(); * * md.inline.ruler.after('text', 'my_rule', function replace(state) { * //... * }); * ``` **/ Ruler.prototype.after = function (afterName, ruleName, fn, options) { const index = this.__find__(afterName); const opt = options || {}; if (index === -1) { throw new Error('Parser rule not found: ' + afterName); } this.__rules__.splice(index + 1, 0, { name: ruleName, enabled: true, fn, alt: opt.alt || [] }); this.__cache__ = null; }; /** * Ruler.push(ruleName, fn [, options]) * - ruleName (String): name of added rule. * - fn (Function): rule function. * - options (Object): rule options (not mandatory). * * Push new rule to the end of chain. See also * [[Ruler.before]], [[Ruler.after]]. * * ##### Options: * * - __alt__ - array with names of "alternate" chains. * * ##### Example * * ```javascript * var md = require('markdown-it')(); * * md.core.ruler.push('my_rule', function replace(state) { * //... * }); * ``` **/ Ruler.prototype.push = function (ruleName, fn, options) { const opt = options || {}; this.__rules__.push({ name: ruleName, enabled: true, fn, alt: opt.alt || [] }); this.__cache__ = null; }; /** * Ruler.enable(list [, ignoreInvalid]) -> Array * - list (String|Array): list of rule names to enable. * - ignoreInvalid (Boolean): set `true` to ignore errors when rule not found. * * Enable rules with given names. If any rule name not found - throw Error. * Errors can be disabled by second param. * * Returns list of found rule names (if no exception happened). * * See also [[Ruler.disable]], [[Ruler.enableOnly]]. **/ Ruler.prototype.enable = function (list, ignoreInvalid) { if (!Array.isArray(list)) { list = [list]; } const result = []; // Search by name and enable list.forEach(function (name) { const idx = this.__find__(name); if (idx < 0) { if (ignoreInvalid) { return; } throw new Error('Rules manager: invalid rule name ' + name); } this.__rules__[idx].enabled = true; result.push(name); }, this); this.__cache__ = null; return result; }; /** * Ruler.enableOnly(list [, ignoreInvalid]) * - list (String|Array): list of rule names to enable (whitelist). * - ignoreInvalid (Boolean): set `true` to ignore errors when rule not found. * * Enable rules with given names, and disable everything else. If any rule name * not found - throw Error. Errors can be disabled by second param. * * See also [[Ruler.disable]], [[Ruler.enable]]. **/ Ruler.prototype.enableOnly = function (list, ignoreInvalid) { if (!Array.isArray(list)) { list = [list]; } this.__rules__.forEach(function (rule) { rule.enabled = false; }); this.enable(list, ignoreInvalid); }; /** * Ruler.disable(list [, ignoreInvalid]) -> Array * - list (String|Array): list of rule names to disable. * - ignoreInvalid (Boolean): set `true` to ignore errors when rule not found. * * Disable rules with given names. If any rule name not found - throw Error. * Errors can be disabled by second param. * * Returns list of found rule names (if no exception happened). * * See also [[Ruler.enable]], [[Ruler.enableOnly]]. **/ Ruler.prototype.disable = function (list, ignoreInvalid) { if (!Array.isArray(list)) { list = [list]; } const result = []; // Search by name and disable list.forEach(function (name) { const idx = this.__find__(name); if (idx < 0) { if (ignoreInvalid) { return; } throw new Error('Rules manager: invalid rule name ' + name); } this.__rules__[idx].enabled = false; result.push(name); }, this); this.__cache__ = null; return result; }; /** * Ruler.getRules(chainName) -> Array * * Return array of active functions (rules) for given chain name. It analyzes * rules configuration, compiles caches if not exists and returns result. * * Default chain name is `''` (empty string). It can't be skipped. That's * done intentionally, to keep signature monomorphic for high speed. **/ Ruler.prototype.getRules = function (chainName) { if (this.__cache__ === null) { this.__compile__(); } // Chain can be empty, if rules disabled. But we still have to return Array. return this.__cache__[chainName] || []; }; // Token class /** * class Token **/ /** * new Token(type, tag, nesting) * * Create new token and fill passed properties. **/ function Token(type, tag, nesting) { /** * Token#type -> String * * Type of the token (string, e.g. "paragraph_open") **/ this.type = type; /** * Token#tag -> String * * html tag name, e.g. "p" **/ this.tag = tag; /** * Token#attrs -> Array * * Html attributes. Format: `[ [ name1, value1 ], [ name2, value2 ] ]` **/ this.attrs = null; /** * Token#map -> Array * * Source map info. Format: `[ line_begin, line_end ]` **/ this.map = null; /** * Token#nesting -> Number * * Level change (number in {-1, 0, 1} set), where: * * - `1` means the tag is opening * - `0` means the tag is self-closing * - `-1` means the tag is closing **/ this.nesting = nesting; /** * Token#level -> Number * * nesting level, the same as `state.level` **/ this.level = 0; /** * Token#children -> Array * * An array of child nodes (inline and img tokens) **/ this.children = null; /** * Token#content -> String * * In a case of self-closing tag (code, html, fence, etc.), * it has contents of this tag. **/ this.content = ''; /** * Token#markup -> String * * '*' or '_' for emphasis, fence string for fence, etc. **/ this.markup = ''; /** * Token#info -> String * * Additional information: * * - Info string for "fence" tokens * - The value "auto" for autolink "link_open" and "link_close" tokens * - The string value of the item marker for ordered-list "list_item_open" tokens **/ this.info = ''; /** * Token#meta -> Object * * A place for plugins to store an arbitrary data **/ this.meta = null; /** * Token#block -> Boolean * * True for block-level tokens, false for inline tokens. * Used in renderer to calculate line breaks **/ this.block = false; /** * Token#hidden -> Boolean * * If it's true, ignore this element when rendering. Used for tight lists * to hide paragraphs. **/ this.hidden = false; } /** * Token.attrIndex(name) -> Number * * Search attribute index by name. **/ Token.prototype.attrIndex = function attrIndex(name) { if (!this.attrs) { return -1; } const attrs = this.attrs; for (let i = 0, len = attrs.length; i < len; i++) { if (attrs[i][0] === name) { return i; } } return -1; }; /** * Token.attrPush(attrData) * * Add `[ name, value ]` attribute to list. Init attrs if necessary **/ Token.prototype.attrPush = function attrPush(attrData) { if (this.attrs) { this.attrs.push(attrData); } else { this.attrs = [attrData]; } }; /** * Token.attrSet(name, value) * * Set `name` attribute to `value`. Override old value if exists. **/ Token.prototype.attrSet = function attrSet(name, value) { const idx = this.attrIndex(name); const attrData = [name, value]; if (idx < 0) { this.attrPush(attrData); } else { this.attrs[idx] = attrData; } }; /** * Token.attrGet(name) * * Get the value of attribute `name`, or null if it does not exist. **/ Token.prototype.attrGet = function attrGet(name) { const idx = this.attrIndex(name); let value = null; if (idx >= 0) { value = this.attrs[idx][1]; } return value; }; /** * Token.attrJoin(name, value) * * Join value to existing attribute via space. Or create new attribute if not * exists. Useful to operate with token classes. **/ Token.prototype.attrJoin = function attrJoin(name, value) { const idx = this.attrIndex(name); if (idx < 0) { this.attrPush([name, value]); } else { this.attrs[idx][1] = this.attrs[idx][1] + ' ' + value; } }; // Core state object // function StateCore(src, md, env) { this.src = src; this.env = env; this.tokens = []; this.inlineMode = false; this.md = md; // link to parser instance } // re-export Token class to use in core rules StateCore.prototype.Token = Token; // Normalize input string // https://spec.commonmark.org/0.29/#line-ending const NEWLINES_RE = /\r\n?|\n/g; const NULL_RE = /\0/g; function normalize(state) { let str; // Normalize newlines str = state.src.replace(NEWLINES_RE, '\n'); // Replace NULL characters str = str.replace(NULL_RE, '\uFFFD'); state.src = str; } function block(state) { let token; if (state.inlineMode) { token = new state.Token('inline', '', 0); token.content = state.src; token.map = [0, 1]; token.children = []; state.tokens.push(token); } else { state.md.block.parse(state.src, state.md, state.env, state.tokens); } } function inline(state) { const tokens = state.tokens; // Parse inlines for (let i = 0, l = tokens.length; i < l; i++) { const tok = tokens[i]; if (tok.type === 'inline') { state.md.inline.parse(tok.content, state.md, state.env, tok.children); } } } // Replace link-like texts with link nodes. // // Currently restricted by `md.validateLink()` to http/https/ftp // function isLinkOpen$1(str) { return /^<a[>\s]/i.test(str); } function isLinkClose$1(str) { return /^<\/a\s*>/i.test(str); } function linkify$1(state) { const blockTokens = state.tokens; if (!state.md.options.linkify) { return; } for (let j = 0, l = blockTokens.length; j < l; j++) { if (blockTokens[j].type !== 'inline' || !state.md.linkify.pretest(blockTokens[j].content)) { continue; } let tokens = blockTokens[j].children; let htmlLinkLevel = 0; // We scan from the end, to keep position when new tags added. // Use reversed logic in links start/end match for (let i = tokens.length - 1; i >= 0; i--) { const currentToken = tokens[i]; // Skip content of markdown links if (currentToken.type === 'link_close') { i--; while (tokens[i].level !== currentToken.level && tokens[i].type !== 'link_open') { i--; } continue; } // Skip content of html tag links if (currentToken.type === 'html_inline') { if (isLinkOpen$1(currentToken.content) && htmlLinkLevel > 0) { htmlLinkLevel--; } if (isLinkClose$1(currentToken.content)) { htmlLinkLevel++; } } if (htmlLinkLevel > 0) { continue; } if (currentToken.type === 'text' && state.md.linkify.test(currentToken.content)) { const text = currentToken.content; let links = state.md.linkify.match(text); // Now split string to nodes const nodes = []; let level = currentToken.level; let lastPos = 0; // forbid escape sequence at the start of the string, // this avoids http\://example.com/ from being linkified as // http:<a href="//example.com/">//example.com/</a> if (links.length > 0 && links[0].index === 0 && i > 0 && tokens[i - 1].type === 'text_special') { links = links.slice(1); } for (let ln = 0; ln < links.length; ln++) { const url = links[ln].url; const fullUrl = state.md.normalizeLink(url); if (!state.md.validateLink(fullUrl)) { continue; } let urlText = links[ln].text; // Linkifier might send raw hostnames like "example.com", where url // starts with domain name. So we prepend http:// in those cases, // and remove it afterwards. // if (!links[ln].schema) { urlText = state.md.normalizeLinkText('http://' + urlText).replace(/^http:\/\//, ''); } else if (links[ln].schema === 'mailto:' && !/^mailto:/i.test(urlText)) { urlText = state.md.normalizeLinkText('mailto:' + urlText).replace(/^mailto:/, ''); } else { urlText = state.md.normalizeLinkText(urlText); } const pos = links[ln].index; if (pos > lastPos) { const token = new state.Token('text', '', 0); token.content = text.slice(lastPos, pos); token.level = level; nodes.push(token); } const token_o = new state.Token('link_open', 'a', 1); token_o.attrs = [['href', fullUrl]]; token_o.level = level++; token_o.markup = 'linkify'; token_o.info = 'auto'; nodes.push(token_o); const token_t = new state.Token('text', '', 0); token_t.content = urlText; token_t.level = level; nodes.push(token_t); const token_c = new state.Token('link_close', 'a', -1); token_c.level = --level; token_c.markup = 'linkify'; token_c.info = 'auto'; nodes.push(token_c); lastPos = links[ln].lastIndex; } if (lastPos < text.length) { const token = new state.Token('text', '', 0); token.content = text.slice(lastPos); token.level = level; nodes.push(token); } // replace current node blockTokens[j].children = tokens = arrayReplaceAt(tokens, i, nodes); } } } } // Simple typographic replacements // // (c) (C) → © // (tm) (TM) → ™ // (r) (R) → ® // +- → ± // ... → … (also ?.... → ?.., !.... → !..) // ???????? → ???, !!!!! → !!!, `,,` → `,` // -- → &ndash;, --- → &mdash; // // TODO: // - fractionals 1/2, 1/4, 3/4 -> ½, ¼, ¾ // - multiplications 2 x 4 -> 2 × 4 const RARE_RE = /\+-|\.\.|\?\?\?\?|!!!!|,,|--/; // Workaround for phantomjs - need regex without /g flag, // or root check will fail every second time const SCOPED_ABBR_TEST_RE = /\((c|tm|r)\)/i; const SCOPED_ABBR_RE = /\((c|tm|r)\)/ig; const SCOPED_ABBR = { c: '©', r: '®', tm: '™' }; function replaceFn(match, name) { return SCOPED_ABBR[name.toLowerCase()]; } function replace_scoped(inlineTokens) { let inside_autolink = 0; for (let i = inlineTokens.length - 1; i >= 0; i--) { const token = inlineTokens[i]; if (token.type === 'text' && !inside_autolink) { token.content = token.content.replace(SCOPED_ABBR_RE, replaceFn); } if (token.type === 'link_open' && token.info === 'auto') { inside_autolink--; } if (token.type === 'link_close' && token.info === 'auto') { inside_autolink++; } } } function replace_rare(inlineTokens) { let inside_autolink = 0; for (let i = inlineTokens.length - 1; i >= 0; i--) { const token = inlineTokens[i]; if (token.type === 'text' && !inside_autolink) { if (RARE_RE.test(token.content)) { token.content = token.content.replace(/\+-/g, '±') // .., ..., ....... -> … // but ?..... & !..... -> ?.. & !.. .replace(/\.{2,}/g, '…').replace(/([?!])…/g, '$1..').replace(/([?!]){4,}/g, '$1$1$1').replace(/,{2,}/g, ',') // em-dash .replace(/(^|[^-])---(?=[^-]|$)/mg, '$1\u2014') // en-dash .replace(/(^|\s)--(?=\s|$)/mg, '$1\u2013').replace(/(^|[^-\s])--(?=[^-\s]|$)/mg, '$1\u2013'); } } if (token.type === 'link_open' && token.info === 'auto') { inside_autolink--; } if (token.type === 'link_close' && token.info === 'auto') { inside_autolink++; } } } function replace(state) { let blkIdx; if (!state.md.options.typographer) { return; } for (blkIdx = state.tokens.length - 1; blkIdx >= 0; blkIdx--) { if (state.tokens[blkIdx].type !== 'inline') { continue; } if (SCOPED_ABBR_TEST_RE.test(state.tokens[blkIdx].content)) { replace_scoped(state.tokens[blkIdx].children); } if (RARE_RE.test(state.tokens[blkIdx].content)) { replace_rare(state.tokens[blkIdx].children); } } } // Convert straight quotation marks to typographic ones // const QUOTE_TEST_RE = /['"]/; const QUOTE_RE = /['"]/g; const APOSTROPHE = '\u2019'; /* ’ */ function replaceAt(str, index, ch) { return str.slice(0, index) + ch + str.slice(index + 1); } function process_inlines(tokens, state) { let j; const stack = []; for (let i = 0; i < tokens.length; i++) { const token = tokens[i]; const thisLevel = tokens[i].level; for (j = stack.length - 1; j >= 0; j--) { if (stack[j].level <= thisLevel) { break; } } stack.length = j + 1; if (token.type !== 'text') { continue; } let text = token.content; let pos = 0; let max = text.length; /* eslint no-labels:0,block-scoped-var:0 */ OUTER: while (pos < max) { QUOTE_RE.lastIndex = pos; const t = QUOTE_RE.exec(text); if (!t) { break; } let canOpen = true; let canClose = true; pos = t.index + 1; const isSingle = t[0] === "'"; // Find previous character, // default to space if it's the beginning of the line // let lastChar = 0x20; if (t.index - 1 >= 0) { lastChar = text.charCodeAt(t.index - 1); } else { for (j = i - 1; j >= 0; j--) { if (tokens[j].type === 'softbreak' || tokens[j].type === 'hardbreak') break; // lastChar defaults to 0x20 if (!tokens[j].content) continue; // should skip all tokens except 'text', 'html_inline' or 'code_inline' lastChar = tokens[j].content.charCodeAt(tokens[j].content.length - 1); break; } } // Find next character, // default to space if it's the end of the line // let nextChar = 0x20; if (pos < max) { nextChar = text.charCodeAt(pos); } else { for (j = i + 1; j < tokens.length; j++) { if (tokens[j].type === 'softbreak' || tokens[j].type === 'hardbreak') break; // nextChar defaults to 0x20 if (!tokens[j].content) continue; // should skip all tokens except 'text', 'html_inline' or 'code_inline' nextChar = tokens[j].content.charCodeAt(0); break; } } const isLastPunctChar = isMdAsciiPunct(lastChar) || isPunctChar(String.fromCharCode(lastChar)); const isNextPunctChar = isMdAsciiPunct(nextChar) || isPunctChar(String.fromCharCode(nextChar)); const isLastWhiteSpace = isWhiteSpace(lastChar); const isNextWhiteSpace = isWhiteSpace(nextChar); if (isNextWhiteSpace) { canOpen = false; } else if (isNextPunctChar) { if (!(isLastWhiteSpace || isLastPunctChar)) { canOpen = false; } } if (isLastWhiteSpace) { canClose = false; } else if (isLastPunctChar) { if (!(isNextWhiteSpace || isNextPunctChar)) { canClose = false; } } if (nextChar === 0x22 /* " */ && t[0] === '"') { if (lastChar >= 0x30 /* 0 */ && lastChar <= 0x39 /* 9 */) { // special case: 1"" - count first quote as an inch canClose = canOpen = false; } } if (canOpen && canClose) { // Replace quotes in the middle of punctuation sequence, but not // in the middle of the words, i.e.: // // 1. foo " bar " baz - not replaced // 2. foo-"-bar-"-baz - replaced // 3. foo"bar"baz - not replaced // canOpen = isLastPunctChar; canClose = isNextPunctChar; } if (!canOpen && !canClose) { // middle of word if (isSingle) { token.content = replaceAt(token.content, t.index, APOSTROPHE); } continue; } if (canClose) { // this could be a closing quote, rewind the stack to get a match for (j = stack.length - 1; j >= 0; j--) { let item = stack[j]; if (stack[j].level < thisLevel) { break; } if (item.single === isSingle && stack[j].level === thisLevel) { item = stack[j]; let openQuote; let closeQuote; if (isSingle) { openQuote = state.md.options.quotes[2]; closeQuote = state.md.options.quotes[3]; } else { openQuote = state.md.options.quotes[0]; closeQuote = state.md.options.quotes[1]; } // replace token.content *before* tokens[item.token].content, // because, if they are pointing at the same token, replaceAt // could mess up indices when quote length != 1 token.content = replaceAt(token.content, t.index, closeQuote); tokens[item.token].content = replaceAt(tokens[item.token].content, item.pos, openQuote); pos += closeQuote.length - 1; if (item.token === i) { pos += openQuote.length - 1; } text = token.content; max = text.length; stack.length = j; continue OUTER; } } } if (canOpen) { stack.push({ token: i, pos: t.index, single: isSingle, level: thisLevel }); } else if (canClose && isSingle) { token.content = replaceAt(token.content, t.index, APOSTROPHE); } } } } function smartquotes(state) { /* eslint max-depth:0 */ if (!state.md.options.typographer) { return; } for (let blkIdx = state.tokens.length - 1; blkIdx >= 0; blkIdx--) { if (state.tokens[blkIdx].type !== 'inline' || !QUOTE_TEST_RE.test(state.tokens[blkIdx].content)) { continue; } process_inlines(state.tokens[blkIdx].children, state); } } // Join raw text tokens with the rest of the text // // This is set as a separate rule to provide an opportunity for plugins // to run text replacements after text join, but before escape join. // // For example, `\:)` shouldn't be replaced with an emoji. // function text_join(state) { let curr, last; const blockTokens = state.tokens; const l = blockTokens.length; for (let j = 0; j < l; j++) { if (blockTokens[j].type !== 'inline') continue; const tokens = blockTokens[j].children; const max = tokens.length; for (curr = 0; curr < max; curr++) { if (tokens[curr].type === 'text_special') { tokens[curr].type = 'text'; } } for (curr = last = 0; curr < max; curr++) { if (tokens[curr].type === 'text' && curr + 1 < max && tokens[curr + 1].type === 'text') { // collapse two adjacent text nodes tokens[curr + 1].content = tokens[curr].content + tokens[curr + 1].content; } else { if (curr !== last) { tokens[last] = tokens[curr]; } last++; } } if (curr !== last) { tokens.length = last; } } } /** internal * class Core * * Top-level rules executor. Glues block/inline parsers and does intermediate * transformations. **/ const _rules$2 = [['normalize', normalize], ['block', block], ['inline', inline], ['linkify', linkify$1], ['replacements', replace], ['smartquotes', smartquotes], // `text_join` finds `text_special` tokens (for escape sequences) // and joins them with the rest of