markdown-it
Version:
Markdown-it - modern pluggable markdown parser.
1,835 lines (1,708 loc) • 163 kB
JavaScript
'use strict';
var mdurl = require('mdurl');
var ucmicro = require('uc.micro');
var entities = require('entities');
var LinkifyIt = require('linkify-it');
var punycode = require('punycode.js');
function _interopNamespaceDefault(e) {
var n = Object.create(null);
if (e) {
Object.keys(e).forEach(function (k) {
if (k !== 'default') {
var d = Object.getOwnPropertyDescriptor(e, k);
Object.defineProperty(n, k, d.get ? d : {
enumerable: true,
get: function () { return e[k]; }
});
}
});
}
n.default = e;
return Object.freeze(n);
}
var mdurl__namespace = /*#__PURE__*/_interopNamespaceDefault(mdurl);
var ucmicro__namespace = /*#__PURE__*/_interopNamespaceDefault(ucmicro);
// Utilities
//
function _class(obj) {
return Object.prototype.toString.call(obj);
}
function isString(obj) {
return _class(obj) === '[object String]';
}
const _hasOwnProperty = Object.prototype.hasOwnProperty;
function has(object, key) {
return _hasOwnProperty.call(object, key);
}
// Merge objects
//
function assign(obj /* from1, from2, from3, ... */) {
const sources = Array.prototype.slice.call(arguments, 1);
sources.forEach(function (source) {
if (!source) {
return;
}
if (typeof source !== 'object') {
throw new TypeError(source + 'must be object');
}
Object.keys(source).forEach(function (key) {
obj[key] = source[key];
});
});
return obj;
}
// Remove element from array and put another array at those position.
// Useful for some operations with tokens
function arrayReplaceAt(src, pos, newElements) {
return [].concat(src.slice(0, pos), newElements, src.slice(pos + 1));
}
function isValidEntityCode(c) {
/* eslint no-bitwise:0 */
// broken sequence
if (c >= 0xD800 && c <= 0xDFFF) {
return false;
}
// never used
if (c >= 0xFDD0 && c <= 0xFDEF) {
return false;
}
if ((c & 0xFFFF) === 0xFFFF || (c & 0xFFFF) === 0xFFFE) {
return false;
}
// control codes
if (c >= 0x00 && c <= 0x08) {
return false;
}
if (c === 0x0B) {
return false;
}
if (c >= 0x0E && c <= 0x1F) {
return false;
}
if (c >= 0x7F && c <= 0x9F) {
return false;
}
// out of range
if (c > 0x10FFFF) {
return false;
}
return true;
}
function fromCodePoint(c) {
/* eslint no-bitwise:0 */
if (c > 0xffff) {
c -= 0x10000;
const surrogate1 = 0xd800 + (c >> 10);
const surrogate2 = 0xdc00 + (c & 0x3ff);
return String.fromCharCode(surrogate1, surrogate2);
}
return String.fromCharCode(c);
}
const UNESCAPE_MD_RE = /\\([!"#$%&'()*+,\-./:;<=>?@[\\\]^_`{|}~])/g;
const ENTITY_RE = /&([a-z#][a-z0-9]{1,31});/gi;
const UNESCAPE_ALL_RE = new RegExp(UNESCAPE_MD_RE.source + '|' + ENTITY_RE.source, 'gi');
const DIGITAL_ENTITY_TEST_RE = /^#((?:x[a-f0-9]{1,8}|[0-9]{1,8}))$/i;
function replaceEntityPattern(match, name) {
if (name.charCodeAt(0) === 0x23 /* # */ && DIGITAL_ENTITY_TEST_RE.test(name)) {
const code = name[1].toLowerCase() === 'x' ? parseInt(name.slice(2), 16) : parseInt(name.slice(1), 10);
if (isValidEntityCode(code)) {
return fromCodePoint(code);
}
return match;
}
const decoded = entities.decodeHTML(match);
if (decoded !== match) {
return decoded;
}
return match;
}
/* function replaceEntities(str) {
if (str.indexOf('&') < 0) { return str; }
return str.replace(ENTITY_RE, replaceEntityPattern);
} */
function unescapeMd(str) {
if (str.indexOf('\\') < 0) {
return str;
}
return str.replace(UNESCAPE_MD_RE, '$1');
}
function unescapeAll(str) {
if (str.indexOf('\\') < 0 && str.indexOf('&') < 0) {
return str;
}
return str.replace(UNESCAPE_ALL_RE, function (match, escaped, entity) {
if (escaped) {
return escaped;
}
return replaceEntityPattern(match, entity);
});
}
const HTML_ESCAPE_TEST_RE = /[&<>"]/;
const HTML_ESCAPE_REPLACE_RE = /[&<>"]/g;
const HTML_REPLACEMENTS = {
'&': '&',
'<': '<',
'>': '>',
'"': '"'
};
function replaceUnsafeChar(ch) {
return HTML_REPLACEMENTS[ch];
}
function escapeHtml(str) {
if (HTML_ESCAPE_TEST_RE.test(str)) {
return str.replace(HTML_ESCAPE_REPLACE_RE, replaceUnsafeChar);
}
return str;
}
const REGEXP_ESCAPE_RE = /[.?*+^$[\]\\(){}|-]/g;
function escapeRE(str) {
return str.replace(REGEXP_ESCAPE_RE, '\\$&');
}
function isSpace(code) {
switch (code) {
case 0x09:
case 0x20:
return true;
}
return false;
}
// Zs (unicode class) || [\t\f\v\r\n]
function isWhiteSpace(code) {
if (code >= 0x2000 && code <= 0x200A) {
return true;
}
switch (code) {
case 0x09: // \t
case 0x0A: // \n
case 0x0B: // \v
case 0x0C: // \f
case 0x0D: // \r
case 0x20:
case 0xA0:
case 0x1680:
case 0x202F:
case 0x205F:
case 0x3000:
return true;
}
return false;
}
/* eslint-disable max-len */
// Currently without astral characters support.
function isPunctChar(ch) {
return ucmicro__namespace.P.test(ch) || ucmicro__namespace.S.test(ch);
}
// Markdown ASCII punctuation characters.
//
// !, ", #, $, %, &, ', (, ), *, +, ,, -, ., /, :, ;, <, =, >, ?, @, [, \, ], ^, _, `, {, |, }, or ~
// http://spec.commonmark.org/0.15/#ascii-punctuation-character
//
// Don't confuse with unicode punctuation !!! It lacks some chars in ascii range.
//
function isMdAsciiPunct(ch) {
switch (ch) {
case 0x21 /* ! */:
case 0x22 /* " */:
case 0x23 /* # */:
case 0x24 /* $ */:
case 0x25 /* % */:
case 0x26 /* & */:
case 0x27 /* ' */:
case 0x28 /* ( */:
case 0x29 /* ) */:
case 0x2A /* * */:
case 0x2B /* + */:
case 0x2C /* , */:
case 0x2D /* - */:
case 0x2E /* . */:
case 0x2F /* / */:
case 0x3A /* : */:
case 0x3B /* ; */:
case 0x3C /* < */:
case 0x3D /* = */:
case 0x3E /* > */:
case 0x3F /* ? */:
case 0x40 /* @ */:
case 0x5B /* [ */:
case 0x5C /* \ */:
case 0x5D /* ] */:
case 0x5E /* ^ */:
case 0x5F /* _ */:
case 0x60 /* ` */:
case 0x7B /* { */:
case 0x7C /* | */:
case 0x7D /* } */:
case 0x7E /* ~ */:
return true;
default:
return false;
}
}
// Hepler to unify [reference labels].
//
function normalizeReference(str) {
// Trim and collapse whitespace
//
str = str.trim().replace(/\s+/g, ' ');
// In node v10 'ẞ'.toLowerCase() === 'Ṿ', which is presumed to be a bug
// fixed in v12 (couldn't find any details).
//
// So treat this one as a special case
// (remove this when node v10 is no longer supported).
//
if ('ẞ'.toLowerCase() === 'Ṿ') {
str = str.replace(/ẞ/g, 'ß');
}
// .toLowerCase().toUpperCase() should get rid of all differences
// between letter variants.
//
// Simple .toLowerCase() doesn't normalize 125 code points correctly,
// and .toUpperCase doesn't normalize 6 of them (list of exceptions:
// İ, ϴ, ẞ, Ω, K, Å - those are already uppercased, but have differently
// uppercased versions).
//
// Here's an example showing how it happens. Lets take greek letter omega:
// uppercase U+0398 (Θ), U+03f4 (ϴ) and lowercase U+03b8 (θ), U+03d1 (ϑ)
//
// Unicode entries:
// 0398;GREEK CAPITAL LETTER THETA;Lu;0;L;;;;;N;;;;03B8;
// 03B8;GREEK SMALL LETTER THETA;Ll;0;L;;;;;N;;;0398;;0398
// 03D1;GREEK THETA SYMBOL;Ll;0;L;<compat> 03B8;;;;N;GREEK SMALL LETTER SCRIPT THETA;;0398;;0398
// 03F4;GREEK CAPITAL THETA SYMBOL;Lu;0;L;<compat> 0398;;;;N;;;;03B8;
//
// Case-insensitive comparison should treat all of them as equivalent.
//
// But .toLowerCase() doesn't change ϑ (it's already lowercase),
// and .toUpperCase() doesn't change ϴ (already uppercase).
//
// Applying first lower then upper case normalizes any character:
// '\u0398\u03f4\u03b8\u03d1'.toLowerCase().toUpperCase() === '\u0398\u0398\u0398\u0398'
//
// Note: this is equivalent to unicode case folding; unicode normalization
// is a different step that is not required here.
//
// Final result should be uppercased, because it's later stored in an object
// (this avoid a conflict with Object.prototype members,
// most notably, `__proto__`)
//
return str.toLowerCase().toUpperCase();
}
// Re-export libraries commonly used in both markdown-it and its plugins,
// so plugins won't have to depend on them explicitly, which reduces their
// bundled size (e.g. a browser build).
//
const lib = {
mdurl: mdurl__namespace,
ucmicro: ucmicro__namespace
};
var utils = /*#__PURE__*/Object.freeze({
__proto__: null,
arrayReplaceAt: arrayReplaceAt,
assign: assign,
escapeHtml: escapeHtml,
escapeRE: escapeRE,
fromCodePoint: fromCodePoint,
has: has,
isMdAsciiPunct: isMdAsciiPunct,
isPunctChar: isPunctChar,
isSpace: isSpace,
isString: isString,
isValidEntityCode: isValidEntityCode,
isWhiteSpace: isWhiteSpace,
lib: lib,
normalizeReference: normalizeReference,
unescapeAll: unescapeAll,
unescapeMd: unescapeMd
});
// Parse link label
//
// this function assumes that first character ("[") already matches;
// returns the end of the label
//
function parseLinkLabel(state, start, disableNested) {
let level, found, marker, prevPos;
const max = state.posMax;
const oldPos = state.pos;
state.pos = start + 1;
level = 1;
while (state.pos < max) {
marker = state.src.charCodeAt(state.pos);
if (marker === 0x5D /* ] */) {
level--;
if (level === 0) {
found = true;
break;
}
}
prevPos = state.pos;
state.md.inline.skipToken(state);
if (marker === 0x5B /* [ */) {
if (prevPos === state.pos - 1) {
// increase level if we find text `[`, which is not a part of any token
level++;
} else if (disableNested) {
state.pos = oldPos;
return -1;
}
}
}
let labelEnd = -1;
if (found) {
labelEnd = state.pos;
}
// restore old state
state.pos = oldPos;
return labelEnd;
}
// Parse link destination
//
function parseLinkDestination(str, start, max) {
let code;
let pos = start;
const result = {
ok: false,
pos: 0,
str: ''
};
if (str.charCodeAt(pos) === 0x3C /* < */) {
pos++;
while (pos < max) {
code = str.charCodeAt(pos);
if (code === 0x0A /* \n */) {
return result;
}
if (code === 0x3C /* < */) {
return result;
}
if (code === 0x3E /* > */) {
result.pos = pos + 1;
result.str = unescapeAll(str.slice(start + 1, pos));
result.ok = true;
return result;
}
if (code === 0x5C /* \ */ && pos + 1 < max) {
pos += 2;
continue;
}
pos++;
}
// no closing '>'
return result;
}
// this should be ... } else { ... branch
let level = 0;
while (pos < max) {
code = str.charCodeAt(pos);
if (code === 0x20) {
break;
}
// ascii control characters
if (code < 0x20 || code === 0x7F) {
break;
}
if (code === 0x5C /* \ */ && pos + 1 < max) {
if (str.charCodeAt(pos + 1) === 0x20) {
break;
}
pos += 2;
continue;
}
if (code === 0x28 /* ( */) {
level++;
if (level > 32) {
return result;
}
}
if (code === 0x29 /* ) */) {
if (level === 0) {
break;
}
level--;
}
pos++;
}
if (start === pos) {
return result;
}
if (level !== 0) {
return result;
}
result.str = unescapeAll(str.slice(start, pos));
result.pos = pos;
result.ok = true;
return result;
}
// Parse link title
//
// Parse link title within `str` in [start, max] range,
// or continue previous parsing if `prev_state` is defined (equal to result of last execution).
//
function parseLinkTitle(str, start, max, prev_state) {
let code;
let pos = start;
const state = {
// if `true`, this is a valid link title
ok: false,
// if `true`, this link can be continued on the next line
can_continue: false,
// if `ok`, it's the position of the first character after the closing marker
pos: 0,
// if `ok`, it's the unescaped title
str: '',
// expected closing marker character code
marker: 0
};
if (prev_state) {
// this is a continuation of a previous parseLinkTitle call on the next line,
// used in reference links only
state.str = prev_state.str;
state.marker = prev_state.marker;
} else {
if (pos >= max) {
return state;
}
let marker = str.charCodeAt(pos);
if (marker !== 0x22 /* " */ && marker !== 0x27 /* ' */ && marker !== 0x28 /* ( */) {
return state;
}
start++;
pos++;
// if opening marker is "(", switch it to closing marker ")"
if (marker === 0x28) {
marker = 0x29;
}
state.marker = marker;
}
while (pos < max) {
code = str.charCodeAt(pos);
if (code === state.marker) {
state.pos = pos + 1;
state.str += unescapeAll(str.slice(start, pos));
state.ok = true;
return state;
} else if (code === 0x28 /* ( */ && state.marker === 0x29 /* ) */) {
return state;
} else if (code === 0x5C /* \ */ && pos + 1 < max) {
pos++;
}
pos++;
}
// no closing marker found, but this link title may continue on the next line (for references)
state.can_continue = true;
state.str += unescapeAll(str.slice(start, pos));
return state;
}
// Just a shortcut for bulk export
var helpers = /*#__PURE__*/Object.freeze({
__proto__: null,
parseLinkDestination: parseLinkDestination,
parseLinkLabel: parseLinkLabel,
parseLinkTitle: parseLinkTitle
});
/**
* class Renderer
*
* Generates HTML from parsed token stream. Each instance has independent
* copy of rules. Those can be rewritten with ease. Also, you can add new
* rules if you create plugin and adds new token types.
**/
const default_rules = {};
default_rules.code_inline = function (tokens, idx, options, env, slf) {
const token = tokens[idx];
return '<code' + slf.renderAttrs(token) + '>' + escapeHtml(token.content) + '</code>';
};
default_rules.code_block = function (tokens, idx, options, env, slf) {
const token = tokens[idx];
return '<pre' + slf.renderAttrs(token) + '><code>' + escapeHtml(tokens[idx].content) + '</code></pre>\n';
};
default_rules.fence = function (tokens, idx, options, env, slf) {
const token = tokens[idx];
const info = token.info ? unescapeAll(token.info).trim() : '';
let langName = '';
let langAttrs = '';
if (info) {
const arr = info.split(/(\s+)/g);
langName = arr[0];
langAttrs = arr.slice(2).join('');
}
let highlighted;
if (options.highlight) {
highlighted = options.highlight(token.content, langName, langAttrs) || escapeHtml(token.content);
} else {
highlighted = escapeHtml(token.content);
}
if (highlighted.indexOf('<pre') === 0) {
return highlighted + '\n';
}
// If language exists, inject class gently, without modifying original token.
// May be, one day we will add .deepClone() for token and simplify this part, but
// now we prefer to keep things local.
if (info) {
const i = token.attrIndex('class');
const tmpAttrs = token.attrs ? token.attrs.slice() : [];
if (i < 0) {
tmpAttrs.push(['class', options.langPrefix + langName]);
} else {
tmpAttrs[i] = tmpAttrs[i].slice();
tmpAttrs[i][1] += ' ' + options.langPrefix + langName;
}
// Fake token just to render attributes
const tmpToken = {
attrs: tmpAttrs
};
return `<pre><code${slf.renderAttrs(tmpToken)}>${highlighted}</code></pre>\n`;
}
return `<pre><code${slf.renderAttrs(token)}>${highlighted}</code></pre>\n`;
};
default_rules.image = function (tokens, idx, options, env, slf) {
const token = tokens[idx];
// "alt" attr MUST be set, even if empty. Because it's mandatory and
// should be placed on proper position for tests.
//
// Replace content with actual value
token.attrs[token.attrIndex('alt')][1] = slf.renderInlineAsText(token.children, options, env);
return slf.renderToken(tokens, idx, options);
};
default_rules.hardbreak = function (tokens, idx, options /*, env */) {
return options.xhtmlOut ? '<br />\n' : '<br>\n';
};
default_rules.softbreak = function (tokens, idx, options /*, env */) {
return options.breaks ? options.xhtmlOut ? '<br />\n' : '<br>\n' : '\n';
};
default_rules.text = function (tokens, idx /*, options, env */) {
return escapeHtml(tokens[idx].content);
};
default_rules.html_block = function (tokens, idx /*, options, env */) {
return tokens[idx].content;
};
default_rules.html_inline = function (tokens, idx /*, options, env */) {
return tokens[idx].content;
};
/**
* new Renderer()
*
* Creates new [[Renderer]] instance and fill [[Renderer#rules]] with defaults.
**/
function Renderer() {
/**
* Renderer#rules -> Object
*
* Contains render rules for tokens. Can be updated and extended.
*
* ##### Example
*
* ```javascript
* var md = require('markdown-it')();
*
* md.renderer.rules.strong_open = function () { return '<b>'; };
* md.renderer.rules.strong_close = function () { return '</b>'; };
*
* var result = md.renderInline(...);
* ```
*
* Each rule is called as independent static function with fixed signature:
*
* ```javascript
* function my_token_render(tokens, idx, options, env, renderer) {
* // ...
* return renderedHTML;
* }
* ```
*
* See [source code](https://github.com/markdown-it/markdown-it/blob/master/lib/renderer.mjs)
* for more details and examples.
**/
this.rules = assign({}, default_rules);
}
/**
* Renderer.renderAttrs(token) -> String
*
* Render token attributes to string.
**/
Renderer.prototype.renderAttrs = function renderAttrs(token) {
let i, l, result;
if (!token.attrs) {
return '';
}
result = '';
for (i = 0, l = token.attrs.length; i < l; i++) {
result += ' ' + escapeHtml(token.attrs[i][0]) + '="' + escapeHtml(token.attrs[i][1]) + '"';
}
return result;
};
/**
* Renderer.renderToken(tokens, idx, options) -> String
* - tokens (Array): list of tokens
* - idx (Numbed): token index to render
* - options (Object): params of parser instance
*
* Default token renderer. Can be overriden by custom function
* in [[Renderer#rules]].
**/
Renderer.prototype.renderToken = function renderToken(tokens, idx, options) {
const token = tokens[idx];
let result = '';
// Tight list paragraphs
if (token.hidden) {
return '';
}
// Insert a newline between hidden paragraph and subsequent opening
// block-level tag.
//
// For example, here we should insert a newline before blockquote:
// - a
// >
//
if (token.block && token.nesting !== -1 && idx && tokens[idx - 1].hidden) {
result += '\n';
}
// Add token name, e.g. `<img`
result += (token.nesting === -1 ? '</' : '<') + token.tag;
// Encode attributes, e.g. `<img src="foo"`
result += this.renderAttrs(token);
// Add a slash for self-closing tags, e.g. `<img src="foo" /`
if (token.nesting === 0 && options.xhtmlOut) {
result += ' /';
}
// Check if we need to add a newline after this tag
let needLf = false;
if (token.block) {
needLf = true;
if (token.nesting === 1) {
if (idx + 1 < tokens.length) {
const nextToken = tokens[idx + 1];
if (nextToken.type === 'inline' || nextToken.hidden) {
// Block-level tag containing an inline tag.
//
needLf = false;
} else if (nextToken.nesting === -1 && nextToken.tag === token.tag) {
// Opening tag + closing tag of the same type. E.g. `<li></li>`.
//
needLf = false;
}
}
}
}
result += needLf ? '>\n' : '>';
return result;
};
/**
* Renderer.renderInline(tokens, options, env) -> String
* - tokens (Array): list on block tokens to render
* - options (Object): params of parser instance
* - env (Object): additional data from parsed input (references, for example)
*
* The same as [[Renderer.render]], but for single token of `inline` type.
**/
Renderer.prototype.renderInline = function (tokens, options, env) {
let result = '';
const rules = this.rules;
for (let i = 0, len = tokens.length; i < len; i++) {
const type = tokens[i].type;
if (typeof rules[type] !== 'undefined') {
result += rules[type](tokens, i, options, env, this);
} else {
result += this.renderToken(tokens, i, options);
}
}
return result;
};
/** internal
* Renderer.renderInlineAsText(tokens, options, env) -> String
* - tokens (Array): list on block tokens to render
* - options (Object): params of parser instance
* - env (Object): additional data from parsed input (references, for example)
*
* Special kludge for image `alt` attributes to conform CommonMark spec.
* Don't try to use it! Spec requires to show `alt` content with stripped markup,
* instead of simple escaping.
**/
Renderer.prototype.renderInlineAsText = function (tokens, options, env) {
let result = '';
for (let i = 0, len = tokens.length; i < len; i++) {
switch (tokens[i].type) {
case 'text':
result += tokens[i].content;
break;
case 'image':
result += this.renderInlineAsText(tokens[i].children, options, env);
break;
case 'html_inline':
case 'html_block':
result += tokens[i].content;
break;
case 'softbreak':
case 'hardbreak':
result += '\n';
break;
// all other tokens are skipped
}
}
return result;
};
/**
* Renderer.render(tokens, options, env) -> String
* - tokens (Array): list on block tokens to render
* - options (Object): params of parser instance
* - env (Object): additional data from parsed input (references, for example)
*
* Takes token stream and generates HTML. Probably, you will never need to call
* this method directly.
**/
Renderer.prototype.render = function (tokens, options, env) {
let result = '';
const rules = this.rules;
for (let i = 0, len = tokens.length; i < len; i++) {
const type = tokens[i].type;
if (type === 'inline') {
result += this.renderInline(tokens[i].children, options, env);
} else if (typeof rules[type] !== 'undefined') {
result += rules[type](tokens, i, options, env, this);
} else {
result += this.renderToken(tokens, i, options, env);
}
}
return result;
};
/**
* class Ruler
*
* Helper class, used by [[MarkdownIt#core]], [[MarkdownIt#block]] and
* [[MarkdownIt#inline]] to manage sequences of functions (rules):
*
* - keep rules in defined order
* - assign the name to each rule
* - enable/disable rules
* - add/replace rules
* - allow assign rules to additional named chains (in the same)
* - cacheing lists of active rules
*
* You will not need use this class directly until write plugins. For simple
* rules control use [[MarkdownIt.disable]], [[MarkdownIt.enable]] and
* [[MarkdownIt.use]].
**/
/**
* new Ruler()
**/
function Ruler() {
// List of added rules. Each element is:
//
// {
// name: XXX,
// enabled: Boolean,
// fn: Function(),
// alt: [ name2, name3 ]
// }
//
this.__rules__ = [];
// Cached rule chains.
//
// First level - chain name, '' for default.
// Second level - diginal anchor for fast filtering by charcodes.
//
this.__cache__ = null;
}
// Helper methods, should not be used directly
// Find rule index by name
//
Ruler.prototype.__find__ = function (name) {
for (let i = 0; i < this.__rules__.length; i++) {
if (this.__rules__[i].name === name) {
return i;
}
}
return -1;
};
// Build rules lookup cache
//
Ruler.prototype.__compile__ = function () {
const self = this;
const chains = [''];
// collect unique names
self.__rules__.forEach(function (rule) {
if (!rule.enabled) {
return;
}
rule.alt.forEach(function (altName) {
if (chains.indexOf(altName) < 0) {
chains.push(altName);
}
});
});
self.__cache__ = {};
chains.forEach(function (chain) {
self.__cache__[chain] = [];
self.__rules__.forEach(function (rule) {
if (!rule.enabled) {
return;
}
if (chain && rule.alt.indexOf(chain) < 0) {
return;
}
self.__cache__[chain].push(rule.fn);
});
});
};
/**
* Ruler.at(name, fn [, options])
* - name (String): rule name to replace.
* - fn (Function): new rule function.
* - options (Object): new rule options (not mandatory).
*
* Replace rule by name with new function & options. Throws error if name not
* found.
*
* ##### Options:
*
* - __alt__ - array with names of "alternate" chains.
*
* ##### Example
*
* Replace existing typographer replacement rule with new one:
*
* ```javascript
* var md = require('markdown-it')();
*
* md.core.ruler.at('replacements', function replace(state) {
* //...
* });
* ```
**/
Ruler.prototype.at = function (name, fn, options) {
const index = this.__find__(name);
const opt = options || {};
if (index === -1) {
throw new Error('Parser rule not found: ' + name);
}
this.__rules__[index].fn = fn;
this.__rules__[index].alt = opt.alt || [];
this.__cache__ = null;
};
/**
* Ruler.before(beforeName, ruleName, fn [, options])
* - beforeName (String): new rule will be added before this one.
* - ruleName (String): name of added rule.
* - fn (Function): rule function.
* - options (Object): rule options (not mandatory).
*
* Add new rule to chain before one with given name. See also
* [[Ruler.after]], [[Ruler.push]].
*
* ##### Options:
*
* - __alt__ - array with names of "alternate" chains.
*
* ##### Example
*
* ```javascript
* var md = require('markdown-it')();
*
* md.block.ruler.before('paragraph', 'my_rule', function replace(state) {
* //...
* });
* ```
**/
Ruler.prototype.before = function (beforeName, ruleName, fn, options) {
const index = this.__find__(beforeName);
const opt = options || {};
if (index === -1) {
throw new Error('Parser rule not found: ' + beforeName);
}
this.__rules__.splice(index, 0, {
name: ruleName,
enabled: true,
fn,
alt: opt.alt || []
});
this.__cache__ = null;
};
/**
* Ruler.after(afterName, ruleName, fn [, options])
* - afterName (String): new rule will be added after this one.
* - ruleName (String): name of added rule.
* - fn (Function): rule function.
* - options (Object): rule options (not mandatory).
*
* Add new rule to chain after one with given name. See also
* [[Ruler.before]], [[Ruler.push]].
*
* ##### Options:
*
* - __alt__ - array with names of "alternate" chains.
*
* ##### Example
*
* ```javascript
* var md = require('markdown-it')();
*
* md.inline.ruler.after('text', 'my_rule', function replace(state) {
* //...
* });
* ```
**/
Ruler.prototype.after = function (afterName, ruleName, fn, options) {
const index = this.__find__(afterName);
const opt = options || {};
if (index === -1) {
throw new Error('Parser rule not found: ' + afterName);
}
this.__rules__.splice(index + 1, 0, {
name: ruleName,
enabled: true,
fn,
alt: opt.alt || []
});
this.__cache__ = null;
};
/**
* Ruler.push(ruleName, fn [, options])
* - ruleName (String): name of added rule.
* - fn (Function): rule function.
* - options (Object): rule options (not mandatory).
*
* Push new rule to the end of chain. See also
* [[Ruler.before]], [[Ruler.after]].
*
* ##### Options:
*
* - __alt__ - array with names of "alternate" chains.
*
* ##### Example
*
* ```javascript
* var md = require('markdown-it')();
*
* md.core.ruler.push('my_rule', function replace(state) {
* //...
* });
* ```
**/
Ruler.prototype.push = function (ruleName, fn, options) {
const opt = options || {};
this.__rules__.push({
name: ruleName,
enabled: true,
fn,
alt: opt.alt || []
});
this.__cache__ = null;
};
/**
* Ruler.enable(list [, ignoreInvalid]) -> Array
* - list (String|Array): list of rule names to enable.
* - ignoreInvalid (Boolean): set `true` to ignore errors when rule not found.
*
* Enable rules with given names. If any rule name not found - throw Error.
* Errors can be disabled by second param.
*
* Returns list of found rule names (if no exception happened).
*
* See also [[Ruler.disable]], [[Ruler.enableOnly]].
**/
Ruler.prototype.enable = function (list, ignoreInvalid) {
if (!Array.isArray(list)) {
list = [list];
}
const result = [];
// Search by name and enable
list.forEach(function (name) {
const idx = this.__find__(name);
if (idx < 0) {
if (ignoreInvalid) {
return;
}
throw new Error('Rules manager: invalid rule name ' + name);
}
this.__rules__[idx].enabled = true;
result.push(name);
}, this);
this.__cache__ = null;
return result;
};
/**
* Ruler.enableOnly(list [, ignoreInvalid])
* - list (String|Array): list of rule names to enable (whitelist).
* - ignoreInvalid (Boolean): set `true` to ignore errors when rule not found.
*
* Enable rules with given names, and disable everything else. If any rule name
* not found - throw Error. Errors can be disabled by second param.
*
* See also [[Ruler.disable]], [[Ruler.enable]].
**/
Ruler.prototype.enableOnly = function (list, ignoreInvalid) {
if (!Array.isArray(list)) {
list = [list];
}
this.__rules__.forEach(function (rule) {
rule.enabled = false;
});
this.enable(list, ignoreInvalid);
};
/**
* Ruler.disable(list [, ignoreInvalid]) -> Array
* - list (String|Array): list of rule names to disable.
* - ignoreInvalid (Boolean): set `true` to ignore errors when rule not found.
*
* Disable rules with given names. If any rule name not found - throw Error.
* Errors can be disabled by second param.
*
* Returns list of found rule names (if no exception happened).
*
* See also [[Ruler.enable]], [[Ruler.enableOnly]].
**/
Ruler.prototype.disable = function (list, ignoreInvalid) {
if (!Array.isArray(list)) {
list = [list];
}
const result = [];
// Search by name and disable
list.forEach(function (name) {
const idx = this.__find__(name);
if (idx < 0) {
if (ignoreInvalid) {
return;
}
throw new Error('Rules manager: invalid rule name ' + name);
}
this.__rules__[idx].enabled = false;
result.push(name);
}, this);
this.__cache__ = null;
return result;
};
/**
* Ruler.getRules(chainName) -> Array
*
* Return array of active functions (rules) for given chain name. It analyzes
* rules configuration, compiles caches if not exists and returns result.
*
* Default chain name is `''` (empty string). It can't be skipped. That's
* done intentionally, to keep signature monomorphic for high speed.
**/
Ruler.prototype.getRules = function (chainName) {
if (this.__cache__ === null) {
this.__compile__();
}
// Chain can be empty, if rules disabled. But we still have to return Array.
return this.__cache__[chainName] || [];
};
// Token class
/**
* class Token
**/
/**
* new Token(type, tag, nesting)
*
* Create new token and fill passed properties.
**/
function Token(type, tag, nesting) {
/**
* Token#type -> String
*
* Type of the token (string, e.g. "paragraph_open")
**/
this.type = type;
/**
* Token#tag -> String
*
* html tag name, e.g. "p"
**/
this.tag = tag;
/**
* Token#attrs -> Array
*
* Html attributes. Format: `[ [ name1, value1 ], [ name2, value2 ] ]`
**/
this.attrs = null;
/**
* Token#map -> Array
*
* Source map info. Format: `[ line_begin, line_end ]`
**/
this.map = null;
/**
* Token#nesting -> Number
*
* Level change (number in {-1, 0, 1} set), where:
*
* - `1` means the tag is opening
* - `0` means the tag is self-closing
* - `-1` means the tag is closing
**/
this.nesting = nesting;
/**
* Token#level -> Number
*
* nesting level, the same as `state.level`
**/
this.level = 0;
/**
* Token#children -> Array
*
* An array of child nodes (inline and img tokens)
**/
this.children = null;
/**
* Token#content -> String
*
* In a case of self-closing tag (code, html, fence, etc.),
* it has contents of this tag.
**/
this.content = '';
/**
* Token#markup -> String
*
* '*' or '_' for emphasis, fence string for fence, etc.
**/
this.markup = '';
/**
* Token#info -> String
*
* Additional information:
*
* - Info string for "fence" tokens
* - The value "auto" for autolink "link_open" and "link_close" tokens
* - The string value of the item marker for ordered-list "list_item_open" tokens
**/
this.info = '';
/**
* Token#meta -> Object
*
* A place for plugins to store an arbitrary data
**/
this.meta = null;
/**
* Token#block -> Boolean
*
* True for block-level tokens, false for inline tokens.
* Used in renderer to calculate line breaks
**/
this.block = false;
/**
* Token#hidden -> Boolean
*
* If it's true, ignore this element when rendering. Used for tight lists
* to hide paragraphs.
**/
this.hidden = false;
}
/**
* Token.attrIndex(name) -> Number
*
* Search attribute index by name.
**/
Token.prototype.attrIndex = function attrIndex(name) {
if (!this.attrs) {
return -1;
}
const attrs = this.attrs;
for (let i = 0, len = attrs.length; i < len; i++) {
if (attrs[i][0] === name) {
return i;
}
}
return -1;
};
/**
* Token.attrPush(attrData)
*
* Add `[ name, value ]` attribute to list. Init attrs if necessary
**/
Token.prototype.attrPush = function attrPush(attrData) {
if (this.attrs) {
this.attrs.push(attrData);
} else {
this.attrs = [attrData];
}
};
/**
* Token.attrSet(name, value)
*
* Set `name` attribute to `value`. Override old value if exists.
**/
Token.prototype.attrSet = function attrSet(name, value) {
const idx = this.attrIndex(name);
const attrData = [name, value];
if (idx < 0) {
this.attrPush(attrData);
} else {
this.attrs[idx] = attrData;
}
};
/**
* Token.attrGet(name)
*
* Get the value of attribute `name`, or null if it does not exist.
**/
Token.prototype.attrGet = function attrGet(name) {
const idx = this.attrIndex(name);
let value = null;
if (idx >= 0) {
value = this.attrs[idx][1];
}
return value;
};
/**
* Token.attrJoin(name, value)
*
* Join value to existing attribute via space. Or create new attribute if not
* exists. Useful to operate with token classes.
**/
Token.prototype.attrJoin = function attrJoin(name, value) {
const idx = this.attrIndex(name);
if (idx < 0) {
this.attrPush([name, value]);
} else {
this.attrs[idx][1] = this.attrs[idx][1] + ' ' + value;
}
};
// Core state object
//
function StateCore(src, md, env) {
this.src = src;
this.env = env;
this.tokens = [];
this.inlineMode = false;
this.md = md; // link to parser instance
}
// re-export Token class to use in core rules
StateCore.prototype.Token = Token;
// Normalize input string
// https://spec.commonmark.org/0.29/#line-ending
const NEWLINES_RE = /\r\n?|\n/g;
const NULL_RE = /\0/g;
function normalize(state) {
let str;
// Normalize newlines
str = state.src.replace(NEWLINES_RE, '\n');
// Replace NULL characters
str = str.replace(NULL_RE, '\uFFFD');
state.src = str;
}
function block(state) {
let token;
if (state.inlineMode) {
token = new state.Token('inline', '', 0);
token.content = state.src;
token.map = [0, 1];
token.children = [];
state.tokens.push(token);
} else {
state.md.block.parse(state.src, state.md, state.env, state.tokens);
}
}
function inline(state) {
const tokens = state.tokens;
// Parse inlines
for (let i = 0, l = tokens.length; i < l; i++) {
const tok = tokens[i];
if (tok.type === 'inline') {
state.md.inline.parse(tok.content, state.md, state.env, tok.children);
}
}
}
// Replace link-like texts with link nodes.
//
// Currently restricted by `md.validateLink()` to http/https/ftp
//
function isLinkOpen$1(str) {
return /^<a[>\s]/i.test(str);
}
function isLinkClose$1(str) {
return /^<\/a\s*>/i.test(str);
}
function linkify$1(state) {
const blockTokens = state.tokens;
if (!state.md.options.linkify) {
return;
}
for (let j = 0, l = blockTokens.length; j < l; j++) {
if (blockTokens[j].type !== 'inline' || !state.md.linkify.pretest(blockTokens[j].content)) {
continue;
}
let tokens = blockTokens[j].children;
let htmlLinkLevel = 0;
// We scan from the end, to keep position when new tags added.
// Use reversed logic in links start/end match
for (let i = tokens.length - 1; i >= 0; i--) {
const currentToken = tokens[i];
// Skip content of markdown links
if (currentToken.type === 'link_close') {
i--;
while (tokens[i].level !== currentToken.level && tokens[i].type !== 'link_open') {
i--;
}
continue;
}
// Skip content of html tag links
if (currentToken.type === 'html_inline') {
if (isLinkOpen$1(currentToken.content) && htmlLinkLevel > 0) {
htmlLinkLevel--;
}
if (isLinkClose$1(currentToken.content)) {
htmlLinkLevel++;
}
}
if (htmlLinkLevel > 0) {
continue;
}
if (currentToken.type === 'text' && state.md.linkify.test(currentToken.content)) {
const text = currentToken.content;
let links = state.md.linkify.match(text);
// Now split string to nodes
const nodes = [];
let level = currentToken.level;
let lastPos = 0;
// forbid escape sequence at the start of the string,
// this avoids http\://example.com/ from being linkified as
// http:<a href="//example.com/">//example.com/</a>
if (links.length > 0 && links[0].index === 0 && i > 0 && tokens[i - 1].type === 'text_special') {
links = links.slice(1);
}
for (let ln = 0; ln < links.length; ln++) {
const url = links[ln].url;
const fullUrl = state.md.normalizeLink(url);
if (!state.md.validateLink(fullUrl)) {
continue;
}
let urlText = links[ln].text;
// Linkifier might send raw hostnames like "example.com", where url
// starts with domain name. So we prepend http:// in those cases,
// and remove it afterwards.
//
if (!links[ln].schema) {
urlText = state.md.normalizeLinkText('http://' + urlText).replace(/^http:\/\//, '');
} else if (links[ln].schema === 'mailto:' && !/^mailto:/i.test(urlText)) {
urlText = state.md.normalizeLinkText('mailto:' + urlText).replace(/^mailto:/, '');
} else {
urlText = state.md.normalizeLinkText(urlText);
}
const pos = links[ln].index;
if (pos > lastPos) {
const token = new state.Token('text', '', 0);
token.content = text.slice(lastPos, pos);
token.level = level;
nodes.push(token);
}
const token_o = new state.Token('link_open', 'a', 1);
token_o.attrs = [['href', fullUrl]];
token_o.level = level++;
token_o.markup = 'linkify';
token_o.info = 'auto';
nodes.push(token_o);
const token_t = new state.Token('text', '', 0);
token_t.content = urlText;
token_t.level = level;
nodes.push(token_t);
const token_c = new state.Token('link_close', 'a', -1);
token_c.level = --level;
token_c.markup = 'linkify';
token_c.info = 'auto';
nodes.push(token_c);
lastPos = links[ln].lastIndex;
}
if (lastPos < text.length) {
const token = new state.Token('text', '', 0);
token.content = text.slice(lastPos);
token.level = level;
nodes.push(token);
}
// replace current node
blockTokens[j].children = tokens = arrayReplaceAt(tokens, i, nodes);
}
}
}
}
// Simple typographic replacements
//
// (c) (C) → ©
// (tm) (TM) → ™
// (r) (R) → ®
// +- → ±
// ... → … (also ?.... → ?.., !.... → !..)
// ???????? → ???, !!!!! → !!!, `,,` → `,`
// -- → –, --- → —
//
// TODO:
// - fractionals 1/2, 1/4, 3/4 -> ½, ¼, ¾
// - multiplications 2 x 4 -> 2 × 4
const RARE_RE = /\+-|\.\.|\?\?\?\?|!!!!|,,|--/;
// Workaround for phantomjs - need regex without /g flag,
// or root check will fail every second time
const SCOPED_ABBR_TEST_RE = /\((c|tm|r)\)/i;
const SCOPED_ABBR_RE = /\((c|tm|r)\)/ig;
const SCOPED_ABBR = {
c: '©',
r: '®',
tm: '™'
};
function replaceFn(match, name) {
return SCOPED_ABBR[name.toLowerCase()];
}
function replace_scoped(inlineTokens) {
let inside_autolink = 0;
for (let i = inlineTokens.length - 1; i >= 0; i--) {
const token = inlineTokens[i];
if (token.type === 'text' && !inside_autolink) {
token.content = token.content.replace(SCOPED_ABBR_RE, replaceFn);
}
if (token.type === 'link_open' && token.info === 'auto') {
inside_autolink--;
}
if (token.type === 'link_close' && token.info === 'auto') {
inside_autolink++;
}
}
}
function replace_rare(inlineTokens) {
let inside_autolink = 0;
for (let i = inlineTokens.length - 1; i >= 0; i--) {
const token = inlineTokens[i];
if (token.type === 'text' && !inside_autolink) {
if (RARE_RE.test(token.content)) {
token.content = token.content.replace(/\+-/g, '±')
// .., ..., ....... -> …
// but ?..... & !..... -> ?.. & !..
.replace(/\.{2,}/g, '…').replace(/([?!])…/g, '$1..').replace(/([?!]){4,}/g, '$1$1$1').replace(/,{2,}/g, ',')
// em-dash
.replace(/(^|[^-])---(?=[^-]|$)/mg, '$1\u2014')
// en-dash
.replace(/(^|\s)--(?=\s|$)/mg, '$1\u2013').replace(/(^|[^-\s])--(?=[^-\s]|$)/mg, '$1\u2013');
}
}
if (token.type === 'link_open' && token.info === 'auto') {
inside_autolink--;
}
if (token.type === 'link_close' && token.info === 'auto') {
inside_autolink++;
}
}
}
function replace(state) {
let blkIdx;
if (!state.md.options.typographer) {
return;
}
for (blkIdx = state.tokens.length - 1; blkIdx >= 0; blkIdx--) {
if (state.tokens[blkIdx].type !== 'inline') {
continue;
}
if (SCOPED_ABBR_TEST_RE.test(state.tokens[blkIdx].content)) {
replace_scoped(state.tokens[blkIdx].children);
}
if (RARE_RE.test(state.tokens[blkIdx].content)) {
replace_rare(state.tokens[blkIdx].children);
}
}
}
// Convert straight quotation marks to typographic ones
//
const QUOTE_TEST_RE = /['"]/;
const QUOTE_RE = /['"]/g;
const APOSTROPHE = '\u2019'; /* ’ */
function replaceAt(str, index, ch) {
return str.slice(0, index) + ch + str.slice(index + 1);
}
function process_inlines(tokens, state) {
let j;
const stack = [];
for (let i = 0; i < tokens.length; i++) {
const token = tokens[i];
const thisLevel = tokens[i].level;
for (j = stack.length - 1; j >= 0; j--) {
if (stack[j].level <= thisLevel) {
break;
}
}
stack.length = j + 1;
if (token.type !== 'text') {
continue;
}
let text = token.content;
let pos = 0;
let max = text.length;
/* eslint no-labels:0,block-scoped-var:0 */
OUTER: while (pos < max) {
QUOTE_RE.lastIndex = pos;
const t = QUOTE_RE.exec(text);
if (!t) {
break;
}
let canOpen = true;
let canClose = true;
pos = t.index + 1;
const isSingle = t[0] === "'";
// Find previous character,
// default to space if it's the beginning of the line
//
let lastChar = 0x20;
if (t.index - 1 >= 0) {
lastChar = text.charCodeAt(t.index - 1);
} else {
for (j = i - 1; j >= 0; j--) {
if (tokens[j].type === 'softbreak' || tokens[j].type === 'hardbreak') break; // lastChar defaults to 0x20
if (!tokens[j].content) continue; // should skip all tokens except 'text', 'html_inline' or 'code_inline'
lastChar = tokens[j].content.charCodeAt(tokens[j].content.length - 1);
break;
}
}
// Find next character,
// default to space if it's the end of the line
//
let nextChar = 0x20;
if (pos < max) {
nextChar = text.charCodeAt(pos);
} else {
for (j = i + 1; j < tokens.length; j++) {
if (tokens[j].type === 'softbreak' || tokens[j].type === 'hardbreak') break; // nextChar defaults to 0x20
if (!tokens[j].content) continue; // should skip all tokens except 'text', 'html_inline' or 'code_inline'
nextChar = tokens[j].content.charCodeAt(0);
break;
}
}
const isLastPunctChar = isMdAsciiPunct(lastChar) || isPunctChar(String.fromCharCode(lastChar));
const isNextPunctChar = isMdAsciiPunct(nextChar) || isPunctChar(String.fromCharCode(nextChar));
const isLastWhiteSpace = isWhiteSpace(lastChar);
const isNextWhiteSpace = isWhiteSpace(nextChar);
if (isNextWhiteSpace) {
canOpen = false;
} else if (isNextPunctChar) {
if (!(isLastWhiteSpace || isLastPunctChar)) {
canOpen = false;
}
}
if (isLastWhiteSpace) {
canClose = false;
} else if (isLastPunctChar) {
if (!(isNextWhiteSpace || isNextPunctChar)) {
canClose = false;
}
}
if (nextChar === 0x22 /* " */ && t[0] === '"') {
if (lastChar >= 0x30 /* 0 */ && lastChar <= 0x39 /* 9 */) {
// special case: 1"" - count first quote as an inch
canClose = canOpen = false;
}
}
if (canOpen && canClose) {
// Replace quotes in the middle of punctuation sequence, but not
// in the middle of the words, i.e.:
//
// 1. foo " bar " baz - not replaced
// 2. foo-"-bar-"-baz - replaced
// 3. foo"bar"baz - not replaced
//
canOpen = isLastPunctChar;
canClose = isNextPunctChar;
}
if (!canOpen && !canClose) {
// middle of word
if (isSingle) {
token.content = replaceAt(token.content, t.index, APOSTROPHE);
}
continue;
}
if (canClose) {
// this could be a closing quote, rewind the stack to get a match
for (j = stack.length - 1; j >= 0; j--) {
let item = stack[j];
if (stack[j].level < thisLevel) {
break;
}
if (item.single === isSingle && stack[j].level === thisLevel) {
item = stack[j];
let openQuote;
let closeQuote;
if (isSingle) {
openQuote = state.md.options.quotes[2];
closeQuote = state.md.options.quotes[3];
} else {
openQuote = state.md.options.quotes[0];
closeQuote = state.md.options.quotes[1];
}
// replace token.content *before* tokens[item.token].content,
// because, if they are pointing at the same token, replaceAt
// could mess up indices when quote length != 1
token.content = replaceAt(token.content, t.index, closeQuote);
tokens[item.token].content = replaceAt(tokens[item.token].content, item.pos, openQuote);
pos += closeQuote.length - 1;
if (item.token === i) {
pos += openQuote.length - 1;
}
text = token.content;
max = text.length;
stack.length = j;
continue OUTER;
}
}
}
if (canOpen) {
stack.push({
token: i,
pos: t.index,
single: isSingle,
level: thisLevel
});
} else if (canClose && isSingle) {
token.content = replaceAt(token.content, t.index, APOSTROPHE);
}
}
}
}
function smartquotes(state) {
/* eslint max-depth:0 */
if (!state.md.options.typographer) {
return;
}
for (let blkIdx = state.tokens.length - 1; blkIdx >= 0; blkIdx--) {
if (state.tokens[blkIdx].type !== 'inline' || !QUOTE_TEST_RE.test(state.tokens[blkIdx].content)) {
continue;
}
process_inlines(state.tokens[blkIdx].children, state);
}
}
// Join raw text tokens with the rest of the text
//
// This is set as a separate rule to provide an opportunity for plugins
// to run text replacements after text join, but before escape join.
//
// For example, `\:)` shouldn't be replaced with an emoji.
//
function text_join(state) {
let curr, last;
const blockTokens = state.tokens;
const l = blockTokens.length;
for (let j = 0; j < l; j++) {
if (blockTokens[j].type !== 'inline') continue;
const tokens = blockTokens[j].children;
const max = tokens.length;
for (curr = 0; curr < max; curr++) {
if (tokens[curr].type === 'text_special') {
tokens[curr].type = 'text';
}
}
for (curr = last = 0; curr < max; curr++) {
if (tokens[curr].type === 'text' && curr + 1 < max && tokens[curr + 1].type === 'text') {
// collapse two adjacent text nodes
tokens[curr + 1].content = tokens[curr].content + tokens[curr + 1].content;
} else {
if (curr !== last) {
tokens[last] = tokens[curr];
}
last++;
}
}
if (curr !== last) {
tokens.length = last;
}
}
}
/** internal
* class Core
*
* Top-level rules executor. Glues block/inline parsers and does intermediate
* transformations.
**/
const _rules$2 = [['normalize', normalize], ['block', block], ['inline', inline], ['linkify', linkify$1], ['replacements', replace], ['smartquotes', smartquotes],
// `text_join` finds `text_special` tokens (for escape sequences)
// and joins them with the rest of