mdast
Version:
Markdown processor powered by plugins
2,125 lines (1,818 loc) • 65.6 kB
JavaScript
/**
* @author Titus Wormer
* @copyright 2015 Titus Wormer
* @license MIT
* @module mdast:parse
* @version 2.2.2
* @fileoverview Parse a markdown document into an
* abstract syntax tree.
*/
'use strict';
/* eslint-env commonjs */
/*
* Dependencies.
*/
var he = require('he');
var repeat = require('repeat-string');
var trim = require('trim');
var trimTrailingLines = require('trim-trailing-lines');
var extend = require('extend.js');
var utilities = require('./utilities.js');
var defaultExpressions = require('./expressions.js');
var defaultOptions = require('./defaults.js').parse;
/*
* Methods.
*/
var raise = utilities.raise;
var clean = utilities.clean;
var validate = utilities.validate;
var normalize = utilities.normalizeIdentifier;
var arrayPush = [].push;
/*
* Characters.
*/
var AT_SIGN = '@';
var CARET = '^';
var EQUALS = '=';
var EXCLAMATION_MARK = '!';
var MAILTO_PROTOCOL = 'mailto:';
var NEW_LINE = '\n';
var SPACE = ' ';
var TAB = '\t';
var EMPTY = '';
var LT = '<';
var GT = '>';
var BRACKET_OPEN = '[';
/*
* Types.
*/
var BLOCK = 'block';
var INLINE = 'inline';
var HORIZONTAL_RULE = 'horizontalRule';
var HTML = 'html';
var YAML = 'yaml';
var TABLE = 'table';
var TABLE_CELL = 'tableCell';
var TABLE_HEADER = 'tableHeader';
var TABLE_ROW = 'tableRow';
var PARAGRAPH = 'paragraph';
var TEXT = 'text';
var CODE = 'code';
var LIST = 'list';
var LIST_ITEM = 'listItem';
var FOOTNOTE_DEFINITION = 'footnoteDefinition';
var HEADING = 'heading';
var BLOCKQUOTE = 'blockquote';
var LINK = 'link';
var IMAGE = 'image';
var FOOTNOTE = 'footnote';
var ESCAPE = 'escape';
var STRONG = 'strong';
var EMPHASIS = 'emphasis';
var DELETE = 'delete';
var INLINE_CODE = 'inlineCode';
var BREAK = 'break';
var ROOT = 'root';
/**
* Wrapper around he's `decode` function.
*
* @example
* decode('&'); // '&'
* decode('&'); // '&'
*
* @param {string} value
* @param {function(string)} eat
* @return {string}
* @throws {Error} - When `eat.file.quiet` is not `true`.
* However, by default `he` does not throw on incorrect
* encoded entities, but when
* `he.decode.options.strict: true`, they occur on
* entities with a missing closing semi-colon.
*/
function decode(value, eat) {
try {
return he.decode(value);
} catch (exception) {
eat.file.fail(exception, eat.now());
}
}
/**
* Factory to de-escape a value, based on an expression
* at `key` in `scope`.
*
* @example
* var expressions = {escape: /\\(a)/}
* var descape = descapeFactory(expressions, 'escape');
*
* @param {Object} scope - Map of expressions.
* @param {string} key - Key in `map` at which the
* non-global expression exists.
* @return {function(string): string} - Function which
* takes a value and returns its unescaped version.
*/
function descapeFactory(scope, key) {
var globalExpression;
var expression;
/**
* Private method to get a global expression
* from the expression at `key` in `scope`.
* This method is smart about not recreating
* the expressions every time.
*
* @private
* @return {RegExp}
*/
function generate() {
if (scope[key] !== globalExpression) {
globalExpression = scope[key];
expression = new RegExp(
scope[key].source.replace(CARET, EMPTY), 'g'
);
}
return expression;
}
/**
* De-escape a string using the expression at `key`
* in `scope`.
*
* @example
* var expressions = {escape: /\\(a)/}
* var descape = descapeFactory(expressions, 'escape');
* descape('\a'); // 'a'
*
* @param {string} value - Escaped string.
* @return {string} - Unescaped string.
*/
function descape(value) {
return value.replace(generate(), '$1');
}
return descape;
}
/*
* Tab size.
*/
var TAB_SIZE = 4;
/*
* Expressions.
*/
var EXPRESSION_RIGHT_ALIGNMENT = /^[ \t]*-+:[ \t]*$/;
var EXPRESSION_CENTER_ALIGNMENT = /^[ \t]*:-+:[ \t]*$/;
var EXPRESSION_LEFT_ALIGNMENT = /^[ \t]*:-+[ \t]*$/;
var EXPRESSION_TABLE_FENCE = /^[ \t]*|\|[ \t]*$/g;
var EXPRESSION_TABLE_BORDER = /[ \t]*\|[ \t]*/;
var EXPRESSION_BLOCK_QUOTE = /^[ \t]*>[ \t]?/gm;
var EXPRESSION_BULLET = /^([ \t]*)([*+-]|\d+[.)])( {1,4}(?! )| |\t)([^\n]*)/;
var EXPRESSION_PEDANTIC_BULLET = /^([ \t]*)([*+-]|\d+[.)])([ \t]+)/;
var EXPRESSION_INITIAL_INDENT = /^( {1,4}|\t)?/gm;
var EXPRESSION_INITIAL_TAB = /^( {4}|\t)?/gm;
var EXPRESSION_HTML_LINK_OPEN = /^<a /i;
var EXPRESSION_HTML_LINK_CLOSE = /^<\/a>/i;
var EXPRESSION_LOOSE_LIST_ITEM = /\n\n(?!\s*$)/;
var EXPRESSION_TASK_ITEM = /^\[([\ \t]|x|X)\][\ \t]/;
/*
* A map of characters, and their column length,
* which can be used as indentation.
*/
var INDENTATION_CHARACTERS = {};
INDENTATION_CHARACTERS[SPACE] = SPACE.length;
INDENTATION_CHARACTERS[TAB] = TAB_SIZE;
/**
* Gets indentation information for a line.
*
* @example
* getIndent(' foo');
* // {indent: 2, stops: {1: 0, 2: 1}}
*
* getIndent('\tfoo');
* // {indent: 4, stops: {4: 0}}
*
* getIndent(' \tfoo');
* // {indent: 4, stops: {1: 0, 2: 1, 4: 2}}
*
* getIndent('\t foo')
* // {indent: 6, stops: {4: 0, 5: 1, 6: 2}}
*
* @param {string} value - Indented line.
* @return {Object}
*/
function getIndent(value) {
var index = 0;
var indent = 0;
var character = value.charAt(index);
var stops = {};
var size;
while (character in INDENTATION_CHARACTERS) {
size = INDENTATION_CHARACTERS[character];
indent += size;
if (size > 1) {
indent = Math.floor(indent / size) * size;
}
stops[indent] = index;
character = value.charAt(++index);
}
return {
'indent': indent,
'stops': stops
};
}
/**
* Remove the minimum indent from every line in `value`.
* Supports both tab, spaced, and mixed indentation (as
* well as possible).
*
* @example
* removeIndentation(' foo'); // 'foo'
* removeIndentation(' foo', 2); // ' foo'
* removeIndentation('\tfoo', 2); // ' foo'
* removeIndentation(' foo\n bar'); // ' foo\n bar'
*
* @param {string} value
* @param {number?} [maximum] - Maximum indentation
* to remove.
* @return {string} - Unindented `value`.
*/
function removeIndentation(value, maximum) {
var values = value.split(NEW_LINE);
var position = values.length + 1;
var minIndent = Infinity;
var matrix = [];
var index;
var indentation;
var stops;
var padding;
values.unshift(repeat(SPACE, maximum) + EXCLAMATION_MARK);
while (position--) {
indentation = getIndent(values[position]);
matrix[position] = indentation.stops;
if (trim(values[position]).length === 0) {
continue;
}
if (indentation.indent) {
if (indentation.indent > 0 && indentation.indent < minIndent) {
minIndent = indentation.indent;
}
} else {
minIndent = Infinity;
break;
}
}
if (minIndent !== Infinity) {
position = values.length;
while (position--) {
stops = matrix[position];
index = minIndent;
while (index && !(index in stops)) {
index--;
}
if (
trim(values[position]).length !== 0 &&
minIndent &&
index !== minIndent
) {
padding = TAB;
} else {
padding = EMPTY;
}
values[position] = padding + values[position].slice(
index in stops ? stops[index] + 1 : 0
);
}
}
values.shift();
return values.join(NEW_LINE);
}
/**
* Ensure that `value` is at least indented with
* `indent` spaces. Does not support tabs. Does support
* multiple lines.
*
* @example
* ensureIndentation('foo', 2); // ' foo'
* ensureIndentation(' foo', 4); // ' foo'
*
* @param {string} value
* @param {number} indent - The maximum amount of
* spacing to insert.
* @return {string} - indented `value`.
*/
function ensureIndentation(value, indent) {
var values = value.split(NEW_LINE);
var length = values.length;
var index = -1;
var line;
var position;
while (++index < length) {
line = values[index];
position = -1;
while (++position < indent) {
if (line.charAt(position) !== SPACE) {
values[index] = repeat(SPACE, indent - position) + line;
break;
}
}
}
return values.join(NEW_LINE);
}
/**
* Get the alignment from a table rule.
*
* @example
* getAlignment([':-', ':-:', '-:', '--']);
* // ['left', 'center', 'right', null];
*
* @param {Array.<string>} cells
* @return {Array.<string?>}
*/
function getAlignment(cells) {
var results = [];
var index = -1;
var length = cells.length;
var alignment;
while (++index < length) {
alignment = cells[index];
if (EXPRESSION_RIGHT_ALIGNMENT.test(alignment)) {
results[index] = 'right';
} else if (EXPRESSION_CENTER_ALIGNMENT.test(alignment)) {
results[index] = 'center';
} else if (EXPRESSION_LEFT_ALIGNMENT.test(alignment)) {
results[index] = 'left';
} else {
results[index] = null;
}
}
return results;
}
/**
* Construct a state `toggler`: a function which inverses
* `property` in context based on its current value.
* The by `toggler` returned function restores that value.
*
* @example
* var context = {};
* var key = 'foo';
* var val = true;
* context[key] = val;
* context.enter = stateToggler(key, val);
* context[key]; // true
* var exit = context.enter();
* context[key]; // false
* var nested = context.enter();
* context[key]; // false
* nested();
* context[key]; // false
* exit();
* context[key]; // true
*
* @param {string} key - Property to toggle.
* @param {boolean} state - It's default state.
* @return {function(): function()} - Enter.
*/
function stateToggler(key, state) {
/**
* Construct a toggler for the bound `key`.
*
* @return {Function} - Exit state.
*/
function enter() {
var self = this;
var current = self[key];
self[key] = !state;
/**
* State canceler, cancels the state, if allowed.
*/
function exit() {
self[key] = current;
}
return exit;
}
return enter;
}
/**
* Construct a state toggler which doesn't toggle.
*
* @example
* var context = {};
* var key = 'foo';
* var val = true;
* context[key] = val;
* context.enter = noopToggler();
* context[key]; // true
* var exit = context.enter();
* context[key]; // true
* exit();
* context[key]; // true
*
* @return {function(): function()} - Enter.
*/
function noopToggler() {
/**
* No-operation.
*/
function exit() {}
/**
* @return {Function}
*/
function enter() {
return exit;
}
return enter;
}
/*
* Define nodes of a type which can be merged.
*/
var MERGEABLE_NODES = {};
/**
* Merge two text nodes: `node` into `prev`.
*
* @param {Object} prev - Preceding sibling.
* @param {Object} node - Following sibling.
* @return {Object} - `prev`.
*/
MERGEABLE_NODES.text = function (prev, node) {
prev.value += node.value;
return prev;
};
/**
* Merge two blockquotes: `node` into `prev`, unless in
* CommonMark mode.
*
* @param {Object} prev - Preceding sibling.
* @param {Object} node - Following sibling.
* @return {Object} - `prev`, or `node` in CommonMark mode.
*/
MERGEABLE_NODES.blockquote = function (prev, node) {
if (this.options.commonmark) {
return node;
}
prev.children = prev.children.concat(node.children);
return prev;
};
/**
* Merge two lists: `node` into `prev`. Knows, about
* which bullets were used.
*
* @param {Object} prev - Preceding sibling.
* @param {Object} node - Following sibling.
* @return {Object} - `prev`, or `node` when the lists are
* of different types (a different bullet is used).
*/
MERGEABLE_NODES.list = function (prev, node) {
if (
!this.currentBullet ||
this.currentBullet !== this.previousBullet ||
this.currentBullet.length !== 1
) {
return node;
}
prev.children = prev.children.concat(node.children);
return prev;
};
/**
* Tokenise a line. Unsets `currentBullet` and
* `previousBullet` if more than one lines are found, thus
* preventing lists from merging when they use different
* bullets.
*
* @example
* tokenizeNewline(eat, '\n\n');
*
* @param {function(string)} eat
* @param {string} $0 - Lines.
*/
function tokenizeNewline(eat, $0) {
if ($0.length > 1) {
this.currentBullet = null;
this.previousBullet = null;
}
eat($0);
}
/**
* Tokenise an indented code block.
*
* @example
* tokenizeCode(eat, '\tfoo');
*
* @param {function(string)} eat
* @param {string} $0 - Whole code.
* @return {Node} - `code` node.
*/
function tokenizeCode(eat, $0) {
$0 = trimTrailingLines($0);
return eat($0)(this.renderCodeBlock(
removeIndentation($0, TAB_SIZE), null, eat)
);
}
/**
* Tokenise a fenced code block.
*
* @example
* var $0 = '```js\nfoo()\n```';
* tokenizeFences(eat, $0, '', '```', '`', 'js', 'foo()\n');
*
* @param {function(string)} eat
* @param {string} $0 - Whole code.
* @param {string} $1 - Initial spacing.
* @param {string} $2 - Initial fence.
* @param {string} $3 - Fence marker.
* @param {string} $4 - Programming language flag.
* @param {string} $5 - Content.
* @return {Node} - `code` node.
*/
function tokenizeFences(eat, $0, $1, $2, $3, $4, $5) {
$0 = trimTrailingLines($0);
/*
* If the initial fence was preceded by spaces,
* exdent that amount of white space from the code
* block. Because it's possible that the code block
* is exdented, we first have to ensure at least
* those spaces are available.
*/
if ($1) {
$5 = removeIndentation(ensureIndentation($5, $1.length), $1.length);
}
return eat($0)(this.renderCodeBlock($5, $4, eat));
}
/**
* Tokenise an ATX-style heading.
*
* @example
* tokenizeHeading(eat, ' # foo', ' ', '#', ' ', 'foo');
*
* @param {function(string)} eat
* @param {string} $0 - Whole heading.
* @param {string} $1 - Initial spacing.
* @param {string} $2 - Hashes.
* @param {string} $3 - Internal spacing.
* @param {string} $4 - Content.
* @return {Node} - `heading` node.
*/
function tokenizeHeading(eat, $0, $1, $2, $3, $4) {
var now = eat.now();
now.column += ($1 + $2 + ($3 || '')).length;
return eat($0)(this.renderHeading($4, $2.length, now));
}
/**
* Tokenise a Setext-style heading.
*
* @example
* tokenizeLineHeading(eat, 'foo\n===', '', 'foo', '=');
*
* @param {function(string)} eat
* @param {string} $0 - Whole heading.
* @param {string} $1 - Initial spacing.
* @param {string} $2 - Content.
* @param {string} $3 - Underline marker.
* @return {Node} - `heading` node.
*/
function tokenizeLineHeading(eat, $0, $1, $2, $3) {
var now = eat.now();
now.column += $1.length;
return eat($0)(this.renderHeading($2, $3 === EQUALS ? 1 : 2, now));
}
/**
* Tokenise a horizontal rule.
*
* @example
* tokenizeHorizontalRule(eat, '***');
*
* @param {function(string)} eat
* @param {string} $0 - Whole rule.
* @return {Node} - `horizontalRule` node.
*/
function tokenizeHorizontalRule(eat, $0) {
return eat($0)(this.renderVoid(HORIZONTAL_RULE));
}
/**
* Tokenise a blockquote.
*
* @example
* tokenizeBlockquote(eat, '> Foo');
*
* @param {function(string)} eat
* @param {string} $0 - Whole blockquote.
* @return {Node} - `blockquote` node.
*/
function tokenizeBlockquote(eat, $0) {
var now = eat.now();
var indent = this.indent(now.line);
var value = trimTrailingLines($0);
var add = eat(value);
value = value.replace(EXPRESSION_BLOCK_QUOTE, function (prefix) {
indent(prefix.length);
return '';
});
return add(this.renderBlockquote(value, now));
}
/**
* Tokenise a list.
*
* @example
* tokenizeList(eat, '- Foo', '', '-');
*
* @param {function(string)} eat
* @param {string} $0 - Whole list.
* @param {string} $1 - Indent.
* @param {string} $2 - Bullet.
* @return {Node} - `list` node.
*/
function tokenizeList(eat, $0, $1, $2) {
var self = this;
var firstBullet = $2;
var value = trimTrailingLines($0);
var matches = value.match(self.rules.item);
var length = matches.length;
var index = 0;
var isLoose = false;
var now;
var bullet;
var item;
var enterTop;
var exitBlockquote;
var node;
var indent;
var size;
var position;
var end;
/*
* Determine if all list-items belong to the
* same list.
*/
if (!self.options.pedantic) {
while (++index < length) {
bullet = self.rules.bullet.exec(matches[index])[0];
if (
firstBullet !== bullet &&
(
firstBullet.length === 1 && bullet.length === 1 ||
bullet.charAt(bullet.length - 1) !==
firstBullet.charAt(firstBullet.length - 1)
)
) {
matches = matches.slice(0, index);
matches[index - 1] = trimTrailingLines(matches[index - 1]);
length = matches.length;
break;
}
}
}
if (self.options.commonmark) {
index = -1;
while (++index < length) {
item = matches[index];
indent = self.rules.indent.exec(item);
indent = indent[1] + repeat(SPACE, indent[2].length) + indent[3];
size = getIndent(indent).indent;
position = indent.length;
end = item.length;
while (++position < end) {
if (
item.charAt(position) === NEW_LINE &&
item.charAt(position - 1) === NEW_LINE &&
getIndent(item.slice(position + 1)).indent < size
) {
matches[index] = item.slice(0, position - 1);
matches = matches.slice(0, index + 1);
length = matches.length;
break;
}
}
}
}
self.previousBullet = self.currentBullet;
self.currentBullet = firstBullet;
index = -1;
node = eat(matches.join(NEW_LINE)).reset(
self.renderList([], firstBullet)
);
enterTop = self.exitTop();
exitBlockquote = self.enterBlockquote();
while (++index < length) {
item = matches[index];
now = eat.now();
item = eat(item)(self.renderListItem(item, now), node);
if (item.loose) {
isLoose = true;
}
if (index !== length - 1) {
eat(NEW_LINE);
}
}
node.loose = isLoose;
enterTop();
exitBlockquote();
return node;
}
/**
* Tokenise HTML.
*
* @example
* tokenizeHtml(eat, '<span>foo</span>');
*
* @param {function(string)} eat
* @param {string} $0 - Whole HTML.
* @return {Node} - `html` node.
*/
function tokenizeHtml(eat, $0) {
$0 = trimTrailingLines($0);
return eat($0)(this.renderRaw(HTML, $0));
}
/**
* Tokenise a definition.
*
* @example
* var $0 = '[foo]: http://example.com "Example Domain"';
* var $1 = 'foo';
* var $2 = 'http://example.com';
* var $3 = 'Example Domain';
* tokenizeDefinition(eat, $0, $1, $2, $3);
*
* @property {boolean} onlyAtTop
* @property {boolean} notInBlockquote
* @param {function(string)} eat
* @param {string} $0 - Whole definition.
* @param {string} $1 - Key.
* @param {string} $2 - URL.
* @param {string} $3 - Title.
* @return {Node} - `definition` node.
*/
function tokenizeDefinition(eat, $0, $1, $2, $3) {
var link = $2;
/*
* Remove angle-brackets from `link`.
*/
if (link.charAt(0) === LT && link.charAt(link.length - 1) === GT) {
link = link.slice(1, -1);
}
return eat($0)({
'type': 'definition',
'identifier': normalize($1),
'title': $3 ? decode(this.descape($3), eat) : null,
'link': decode(this.descape(link), eat)
});
}
tokenizeDefinition.onlyAtTop = true;
tokenizeDefinition.notInBlockquote = true;
/**
* Tokenise YAML front matter.
*
* @example
* var $0 = '---\nfoo: bar\n---';
* var $1 = 'foo: bar';
* tokenizeYAMLFrontMatter(eat, $0, $1);
*
* @property {boolean} onlyAtStart
* @param {function(string)} eat
* @param {string} $0 - Whole front matter.
* @param {string} $1 - Content.
* @return {Node} - `yaml` node.
*/
function tokenizeYAMLFrontMatter(eat, $0, $1) {
return eat($0)(this.renderRaw(YAML, $1 ? trimTrailingLines($1) : EMPTY));
}
tokenizeYAMLFrontMatter.onlyAtStart = true;
/**
* Tokenise a footnote definition.
*
* @example
* var $0 = '[foo]: Bar.';
* var $1 = '[foo]';
* var $2 = 'foo';
* var $3 = 'Bar.';
* tokenizeFootnoteDefinition(eat, $0, $1, $2, $3);
*
* @property {boolean} onlyAtTop
* @property {boolean} notInBlockquote
* @param {function(string)} eat
* @param {string} $0 - Whole definition.
* @param {string} $1 - Whole key.
* @param {string} $2 - Key.
* @param {string} $3 - Whole value.
* @return {Node} - `footnoteDefinition` node.
*/
function tokenizeFootnoteDefinition(eat, $0, $1, $2, $3) {
var self = this;
var now = eat.now();
var indent = self.indent(now.line);
$3 = $3.replace(EXPRESSION_INITIAL_TAB, function (value) {
indent(value.length);
return EMPTY;
});
now.column += $1.length;
return eat($0)(self.renderFootnoteDefinition(normalize($2), $3, now));
}
tokenizeFootnoteDefinition.onlyAtTop = true;
tokenizeFootnoteDefinition.notInBlockquote = true;
/**
* Tokenise a table.
*
* @example
* var $0 = ' | foo |\n | --- |\n | bar |';
* var $1 = ' | foo |';
* var $2 = '| foo |';
* var $3 = ' | --- |';
* var $4 = '| --- |';
* var $5 = ' | bar |';
* tokenizeTable(eat, $0, $1, $2, $3, $4, $5);
*
* @property {boolean} onlyAtTop
* @param {function(string)} eat
* @param {string} $0 - Whole table.
* @param {string} $1 - Whole heading.
* @param {string} $2 - Trimmed heading.
* @param {string} $3 - Whole alignment.
* @param {string} $4 - Trimmed alignment.
* @param {string} $5 - Rows.
* @return {Node} - `table` node.
*/
function tokenizeTable(eat, $0, $1, $2, $3, $4, $5) {
var self = this;
var length;
var index;
var node;
$0 = trimTrailingLines($0);
node = eat($0).reset({
'type': TABLE,
'align': [],
'children': []
});
/**
* Eat a row of type `type`.
*
* @param {string} type - Type of the returned node,
* such as `tableHeader` or `tableRow`.
* @param {string} value - Row, including initial and
* final fences.
*/
function renderRow(type, value) {
var row = eat(value).reset(self.renderParent(type, []), node);
var length = value.length + 1;
var index = -1;
var queue = '';
var cell = '';
var preamble = true;
var count;
var opening;
var character;
var subvalue;
var now;
while (++index < length) {
character = value.charAt(index);
if (character === '\t' || character === ' ') {
if (cell) {
queue += character;
} else {
eat(character);
}
continue;
}
if (character === '|' || character === '') {
if (preamble) {
eat(character);
} else {
if (character && opening) {
// cell += queue + character;
queue += character;
continue;
}
if ((cell || character) && !preamble) {
subvalue = cell;
if (queue.length > 1) {
if (character) {
subvalue += queue.slice(0, queue.length - 1);
queue = queue.charAt(queue.length - 1);
} else {
subvalue += queue;
queue = '';
}
}
now = eat.now();
eat(subvalue)(
self.renderInline(TABLE_CELL, cell, now), row
);
}
eat(queue + character);
queue = '';
cell = '';
}
} else {
if (queue) {
cell += queue;
queue = '';
}
cell += character;
if (character === '\\' && index !== length - 2) {
cell += value.charAt(index + 1);
index++;
}
if (character === '`') {
count = 1;
while (value.charAt(index + 1) === character) {
cell += character;
index++;
count++;
}
if (!opening) {
opening = count;
} else if (count >= opening) {
opening = 0;
}
}
}
preamble = false;
}
}
/*
* Add the table's header.
*/
renderRow(TABLE_HEADER, $1);
eat(NEW_LINE);
/*
* Add the table's alignment.
*/
eat($3);
$4 = $4
.replace(EXPRESSION_TABLE_FENCE, EMPTY)
.split(EXPRESSION_TABLE_BORDER);
node.align = getAlignment($4);
/*
* Add the table rows to table's children.
*/
$5 = trimTrailingLines($5).split(NEW_LINE);
index = -1;
length = $5.length;
while (++index < length) {
renderRow(TABLE_ROW, $5[index]);
if (index !== length - 1) {
eat(NEW_LINE);
}
}
return node;
}
tokenizeTable.onlyAtTop = true;
/**
* Tokenise a paragraph node.
*
* @example
* tokenizeParagraph(eat, 'Foo.');
*
* @param {function(string)} eat
* @param {string} $0 - Whole paragraph.
* @return {Node?} - `paragraph` node, when the node does
* not just contain white space.
*/
function tokenizeParagraph(eat, $0) {
var now = eat.now();
if (trim($0) === EMPTY) {
eat($0);
return null;
}
$0 = trimTrailingLines($0);
return eat($0)(this.renderInline(PARAGRAPH, $0, now));
}
/**
* Tokenise a text node.
*
* @example
* tokenizeText(eat, 'foo');
*
* @param {function(string)} eat
* @param {string} $0 - Whole text.
* @return {Node} - `text` node.
*/
function tokenizeText(eat, $0) {
return eat($0)(this.renderRaw(TEXT, $0));
}
/**
* Create a code-block node.
*
* @example
* renderCodeBlock('foo()', 'js', now());
*
* @param {string?} [value] - Code.
* @param {string?} [language] - Optional language flag.
* @param {Function} eat
* @return {Object} - `code` node.
*/
function renderCodeBlock(value, language, eat) {
return {
'type': CODE,
'lang': language ? decode(this.descape(language), eat) : null,
'value': trimTrailingLines(value || EMPTY)
};
}
/**
* Create a list node.
*
* @example
* var children = [renderListItem('- foo')];
* renderList(children, '-');
*
* @param {string} children - Children.
* @param {string} bullet - First bullet.
* @return {Object} - `list` node.
*/
function renderList(children, bullet) {
var start = parseInt(bullet, 10);
if (start !== start) {
start = null;
}
/*
* `loose` should be added later.
*/
return {
'type': LIST,
'ordered': bullet.length > 1,
'start': start,
'loose': null,
'children': children
};
}
/**
* Create a list-item using overly simple mechanics.
*
* @example
* renderPedanticListItem('- _foo_', now());
*
* @param {string} value - List-item.
* @param {Object} position - List-item location.
* @return {string} - Cleaned `value`.
*/
function renderPedanticListItem(value, position) {
var self = this;
var indent = self.indent(position.line);
/**
* A simple replacer which removed all matches,
* and adds their length to `offset`.
*
* @param {string} $0
* @return {string}
*/
function replacer($0) {
indent($0.length);
return EMPTY;
}
/*
* Remove the list-item's bullet.
*/
value = value.replace(EXPRESSION_PEDANTIC_BULLET, replacer);
/*
* The initial line was also matched by the below, so
* we reset the `line`.
*/
indent = self.indent(position.line);
return value.replace(EXPRESSION_INITIAL_INDENT, replacer);
}
/**
* Create a list-item using sane mechanics.
*
* @example
* renderNormalListItem('- _foo_', now());
*
* @param {string} value - List-item.
* @param {Object} position - List-item location.
* @return {string} - Cleaned `value`.
*/
function renderNormalListItem(value, position) {
var self = this;
var indent = self.indent(position.line);
var bullet;
var rest;
var lines;
var trimmedLines;
var index;
var length;
var max;
/*
* Remove the list-item's bullet.
*/
value = value.replace(EXPRESSION_BULLET, function ($0, $1, $2, $3, $4) {
bullet = $1 + $2 + $3;
rest = $4;
/*
* Make sure that the first nine numbered list items
* can indent with an extra space. That is, when
* the bullet did not receive an extra final space.
*/
if (Number($2) < 10 && bullet.length % 2 === 1) {
$2 = SPACE + $2;
}
max = $1 + repeat(SPACE, $2.length) + $3;
return max + rest;
});
lines = value.split(NEW_LINE);
trimmedLines = removeIndentation(
value, getIndent(max).indent
).split(NEW_LINE);
/*
* We replaced the initial bullet with something
* else above, which was used to trick
* `removeIndentation` into removing some more
* characters when possible. However, that could
* result in the initial line to be stripped more
* than it should be.
*/
trimmedLines[0] = rest;
indent(bullet.length);
index = 0;
length = lines.length;
while (++index < length) {
indent(lines[index].length - trimmedLines[index].length);
}
return trimmedLines.join(NEW_LINE);
}
/*
* A map of two functions which can create list items.
*/
var LIST_ITEM_MAP = {};
LIST_ITEM_MAP.true = renderPedanticListItem;
LIST_ITEM_MAP.false = renderNormalListItem;
/**
* Create a list-item node.
*
* @example
* renderListItem('- _foo_', now());
*
* @param {Object} value - List-item.
* @param {Object} position - List-item location.
* @return {Object} - `listItem` node.
*/
function renderListItem(value, position) {
var self = this;
var checked = null;
var node;
var task;
var indent;
value = LIST_ITEM_MAP[self.options.pedantic].apply(self, arguments);
if (self.options.gfm) {
task = value.match(EXPRESSION_TASK_ITEM);
if (task) {
indent = task[0].length;
checked = task[1].toLowerCase() === 'x';
self.indent(position.line)(indent);
value = value.slice(indent);
}
}
node = {
'type': LIST_ITEM,
'loose': EXPRESSION_LOOSE_LIST_ITEM.test(value) ||
value.charAt(value.length - 1) === NEW_LINE
};
if (self.options.gfm) {
node.checked = checked;
}
node.children = self.tokenizeBlock(value, position);
return node;
}
/**
* Create a footnote-definition node.
*
* @example
* renderFootnoteDefinition('1', '_foo_', now());
*
* @param {string} identifier - Unique reference.
* @param {string} value - Contents
* @param {Object} position - Definition location.
* @return {Object} - `footnoteDefinition` node.
*/
function renderFootnoteDefinition(identifier, value, position) {
var self = this;
var exitBlockquote = self.enterBlockquote();
var node;
node = {
'type': FOOTNOTE_DEFINITION,
'identifier': identifier,
'children': self.tokenizeBlock(value, position)
};
exitBlockquote();
return node;
}
/**
* Create a heading node.
*
* @example
* renderHeading('_foo_', 1, now());
*
* @param {string} value - Content.
* @param {number} depth - Heading depth.
* @param {Object} position - Heading content location.
* @return {Object} - `heading` node
*/
function renderHeading(value, depth, position) {
return {
'type': HEADING,
'depth': depth,
'children': this.tokenizeInline(value, position)
};
}
/**
* Create a blockquote node.
*
* @example
* renderBlockquote('_foo_', eat);
*
* @param {string} value - Content.
* @param {Object} now - Position.
* @return {Object} - `blockquote` node.
*/
function renderBlockquote(value, now) {
var self = this;
var exitBlockquote = self.enterBlockquote();
var node = {
'type': BLOCKQUOTE,
'children': this.tokenizeBlock(value, now)
};
exitBlockquote();
return node;
}
/**
* Create a void node.
*
* @example
* renderVoid('horizontalRule');
*
* @param {string} type - Node type.
* @return {Object} - Node of type `type`.
*/
function renderVoid(type) {
return {
'type': type
};
}
/**
* Create a parent.
*
* @example
* renderParent('paragraph', '_foo_');
*
* @param {string} type - Node type.
* @param {Array.<Object>} children - Child nodes.
* @return {Object} - Node of type `type`.
*/
function renderParent(type, children) {
return {
'type': type,
'children': children
};
}
/**
* Create a raw node.
*
* @example
* renderRaw('inlineCode', 'foo()');
*
* @param {string} type - Node type.
* @param {string} value - Contents.
* @return {Object} - Node of type `type`.
*/
function renderRaw(type, value) {
return {
'type': type,
'value': value
};
}
/**
* Create a link node.
*
* @example
* renderLink(true, 'example.com', 'example', 'Example Domain', now(), eat);
* renderLink(false, 'fav.ico', 'example', 'Example Domain', now(), eat);
*
* @param {boolean} isLink - Whether linking to a document
* or an image.
* @param {string} href - URI reference.
* @param {string} text - Content.
* @param {string?} title - Title.
* @param {Object} position - Location of link.
* @param {function(string)} eat
* @return {Object} - `link` or `image` node.
*/
function renderLink(isLink, href, text, title, position, eat) {
var self = this;
var exitLink = self.enterLink();
var node;
node = {
'type': isLink ? LINK : IMAGE,
'title': title ? decode(self.descape(title), eat) : null
};
href = decode(href, eat);
if (isLink) {
node.href = href;
node.children = self.tokenizeInline(text, position);
} else {
node.src = href;
node.alt = text ? decode(self.descape(text), eat) : null;
}
exitLink();
return node;
}
/**
* Create a footnote node.
*
* @example
* renderFootnote('_foo_', now());
*
* @param {string} value - Contents.
* @param {Object} position - Location of footnote.
* @return {Object} - `footnote` node.
*/
function renderFootnote(value, position) {
return this.renderInline(FOOTNOTE, value, position);
}
/**
* Add a node with inline content.
*
* @example
* renderInline('strong', '_foo_', now());
*
* @param {string} type - Node type.
* @param {string} value - Contents.
* @param {Object} position - Location of node.
* @return {Object} - Node of type `type`.
*/
function renderInline(type, value, position) {
return this.renderParent(type, this.tokenizeInline(value, position));
}
/**
* Add a node with block content.
*
* @example
* renderBlock('blockquote', 'Foo.', now());
*
* @param {string} type - Node type.
* @param {string} value - Contents.
* @param {Object} position - Location of node.
* @return {Object} - Node of type `type`.
*/
function renderBlock(type, value, position) {
return this.renderParent(type, this.tokenizeBlock(value, position));
}
/**
* Tokenise an escape sequence.
*
* @example
* tokenizeEscape(eat, '\\a', 'a');
*
* @param {function(string)} eat
* @param {string} $0 - Whole escape.
* @param {string} $1 - Escaped character.
* @return {Node} - `escape` node.
*/
function tokenizeEscape(eat, $0, $1) {
return eat($0)(this.renderRaw(ESCAPE, $1));
}
/**
* Tokenise a URL in carets.
*
* @example
* tokenizeAutoLink(eat, '<http://foo.bar>', 'http://foo.bar', '');
*
* @property {boolean} notInLink
* @param {function(string)} eat
* @param {string} $0 - Whole link.
* @param {string} $1 - URL.
* @param {string?} [$2] - Protocol or at.
* @return {Node} - `link` node.
*/
function tokenizeAutoLink(eat, $0, $1, $2) {
var self = this;
var href = $1;
var text = $1;
var now = eat.now();
var offset = 1;
var tokenize;
var node;
if ($2 === AT_SIGN) {
if (
text.substr(0, MAILTO_PROTOCOL.length).toLowerCase() !==
MAILTO_PROTOCOL
) {
href = MAILTO_PROTOCOL + text;
} else {
text = text.substr(MAILTO_PROTOCOL.length);
offset += MAILTO_PROTOCOL.length;
}
}
now.column += offset;
/*
* Temporarily remove support for escapes in autolinks.
*/
tokenize = self.inlineTokenizers.escape;
self.inlineTokenizers.escape = null;
node = eat($0)(self.renderLink(true, href, text, null, now, eat));
self.inlineTokenizers.escape = tokenize;
return node;
}
tokenizeAutoLink.notInLink = true;
/**
* Tokenise a URL in text.
*
* @example
* tokenizeURL(eat, 'http://foo.bar');
*
* @property {boolean} notInLink
* @param {function(string)} eat
* @param {string} $0 - Whole link.
* @return {Node} - `link` node.
*/
function tokenizeURL(eat, $0) {
var now = eat.now();
return eat($0)(this.renderLink(true, $0, $0, null, now, eat));
}
tokenizeURL.notInLink = true;
/**
* Tokenise an HTML tag.
*
* @example
* tokenizeTag(eat, '<span foo="bar">');
*
* @param {function(string)} eat
* @param {string} $0 - Content.
* @return {Node} - `html` node.
*/
function tokenizeTag(eat, $0) {
var self = this;
if (!self.inLink && EXPRESSION_HTML_LINK_OPEN.test($0)) {
self.inLink = true;
} else if (self.inLink && EXPRESSION_HTML_LINK_CLOSE.test($0)) {
self.inLink = false;
}
return eat($0)(self.renderRaw(HTML, $0));
}
/**
* Tokenise a link.
*
* @example
* tokenizeLink(
* eat, '', '![', 'foo', null,
* 'fav.ico', 'Foo Domain'
* );
*
* @param {function(string)} eat
* @param {string} $0 - Whole link.
* @param {string} $1 - Prefix.
* @param {string} $2 - Text.
* @param {string?} $3 - URL wrapped in angle braces.
* @param {string?} $4 - Literal URL.
* @param {string?} $5 - Title wrapped in single or double
* quotes.
* @param {string?} [$6] - Title wrapped in double quotes.
* @param {string?} [$7] - Title wrapped in parentheses.
* @return {Node?} - `link` node, `image` node, or `null`.
*/
function tokenizeLink(eat, $0, $1, $2, $3, $4, $5, $6, $7) {
var isLink = $1 === BRACKET_OPEN;
var href = $4 || $3 || '';
var title = $7 || $6 || $5;
var now;
if (!isLink || !this.inLink) {
now = eat.now();
now.column += $1.length;
return eat($0)(this.renderLink(
isLink, this.descape(href), $2, title, now, eat
));
}
return null;
}
/**
* Tokenise a reference link, image, or footnote;
* shortcut reference link, or footnote.
*
* @example
* tokenizeReference(eat, '[foo]', '[', 'foo');
* tokenizeReference(eat, '[foo][]', '[', 'foo', '');
* tokenizeReference(eat, '[foo][bar]', '[', 'foo', 'bar');
*
* @param {function(string)} eat
* @param {string} $0 - Whole link.
* @param {string} $1 - Prefix.
* @param {string} $2 - identifier.
* @param {string} $3 - Content.
* @return {Node?} - `linkReference`, `imageReference`, or
* `footnoteReference`. Returns null when this is a link
* reference, but we're already in a link.
*/
function tokenizeReference(eat, $0, $1, $2, $3) {
var self = this;
var text = $2;
var identifier = $3 || $2;
var type = $1 === BRACKET_OPEN ? 'link' : 'image';
var isFootnote = self.options.footnotes && identifier.charAt(0) === CARET;
var now = eat.now();
var referenceType;
var node;
var exitLink;
if ($3 === undefined) {
referenceType = 'shortcut';
} else if ($3 === '') {
referenceType = 'collapsed';
} else {
referenceType = 'full';
}
if (referenceType !== 'shortcut') {
isFootnote = false;
}
if (isFootnote) {
identifier = identifier.substr(1);
}
if (isFootnote) {
if (identifier.indexOf(SPACE) !== -1) {
return eat($0)(self.renderFootnote(identifier, eat.now()));
} else {
type = 'footnote';
}
}
if (self.inLink && type === 'link') {
return null;
}
now.column += $1.length;
node = {
'type': type + 'Reference',
'identifier': normalize(identifier)
};
if (type === 'link' || type === 'image') {
node.referenceType = referenceType;
}
if (type === 'link') {
exitLink = self.enterLink();
node.children = self.tokenizeInline(text, now);
exitLink();
} else if (type === 'image') {
node.alt = decode(self.descape(text), eat);
}
return eat($0)(node);
}
/**
* Tokenise strong emphasis.
*
* @example
* tokenizeStrong(eat, '**foo**', '**', 'foo');
* tokenizeStrong(eat, '__foo__', null, null, '__', 'foo');
*
* @param {function(string)} eat
* @param {string} $0 - Whole emphasis.
* @param {string?} $1 - Marker.
* @param {string?} $2 - Content.
* @param {string?} [$3] - Marker.
* @param {string?} [$4] - Content.
* @return {Node?} - `strong` node, when not empty.
*/
function tokenizeStrong(eat, $0, $1, $2, $3, $4) {
var now = eat.now();
var value = $2 || $4;
if (trim(value) === EMPTY) {
return null;
}
now.column += 2;
return eat($0)(this.renderInline(STRONG, value, now));
}
/**
* Tokenise slight emphasis.
*
* @example
* tokenizeEmphasis(eat, '*foo*', '*', 'foo');
* tokenizeEmphasis(eat, '_foo_', null, null, '_', 'foo');
*
* @param {function(string)} eat
* @param {string} $0 - Whole emphasis.
* @param {string?} $1 - Marker.
* @param {string?} $2 - Content.
* @param {string?} [$3] - Marker.
* @param {string?} [$4] - Content.
* @return {Node?} - `emphasis` node, when not empty.
*/
function tokenizeEmphasis(eat, $0, $1, $2, $3, $4) {
var now = eat.now();
var marker = $1 || $3;
var value = $2 || $4;
if (
trim(value) === EMPTY ||
value.charAt(0) === marker ||
value.charAt(value.length - 1) === marker
) {
return null;
}
now.column += 1;
return eat($0)(this.renderInline(EMPHASIS, value, now));
}
/**
* Tokenise a deletion.
*
* @example
* tokenizeDeletion(eat, '~~foo~~', '~~', 'foo');
*
* @param {function(string)} eat
* @param {string} $0 - Whole deletion.
* @param {string} $1 - Content.
* @return {Node} - `delete` node.
*/
function tokenizeDeletion(eat, $0, $1) {
var now = eat.now();
now.column += 2;
return eat($0)(this.renderInline(DELETE, $1, now));
}
/**
* Tokenise inline code.
*
* @example
* tokenizeInlineCode(eat, '`foo()`', '`', 'foo()');
*
* @param {function(string)} eat
* @param {string} $0 - Whole code.
* @param {string} $1 - Initial markers.
* @param {string} $2 - Content.
* @return {Node} - `inlineCode` node.
*/
function tokenizeInlineCode(eat, $0, $1, $2) {
return eat($0)(this.renderRaw(INLINE_CODE, trim($2 || '')));
}
/**
* Tokenise a break.
*
* @example
* tokenizeBreak(eat, ' \n');
*
* @param {function(string)} eat
* @param {string} $0
* @return {Node} - `break` node.
*/
function tokenizeBreak(eat, $0) {
return eat($0)(this.renderVoid(BREAK));
}
/**
* Construct a new parser.
*
* @example
* var parser = new Parser(new VFile('Foo'));
*
* @constructor
* @class {Parser}
* @param {VFile} file - File to parse.
* @param {Object?} [options] - Passed to
* `Parser#setOptions()`.
*/
function Parser(file, options) {
var self = this;
var rules = extend({}, self.expressions.rules);
self.file = file;
self.inLink = false;
self.atTop = true;
self.atStart = true;
self.inBlockquote = false;
self.rules = rules;
self.descape = descapeFactory(rules, 'escape');
self.options = extend({}, self.options);
self.setOptions(options);
}
/**
* Set options. Does not overwrite previously set
* options.
*
* @example
* var parser = new Parser();
* parser.setOptions({gfm: true});
*
* @this {Parser}
* @throws {Error} - When an option is invalid.
* @param {Object?} [options] - Parse settings.
* @return {Parser} - `self`.
*/
Parser.prototype.setOptions = function (options) {
var self = this;
var expressions = self.expressions;
var rules = self.rules;
var current = self.options;
var key;
if (options === null || options === undefined) {
options = {};
} else if (typeof options === 'object') {
options = extend({}, options);
} else {
raise(options, 'options');
}
self.options = options;
for (key in defaultOptions) {
validate.boolean(options, key, current[key]);
if (options[key]) {
extend(rules, expressions[key]);
}
}
if (options.gfm && options.breaks) {
extend(rules, expressions.breaksGFM);
}
if (options.gfm && options.commonmark) {
extend(rules, expressions.commonmarkGFM);
}
if (options.commonmark) {
self.enterBlockquote = noopToggler();
}
return self;
};
/*
* Expose `defaults`.
*/
Parser.prototype.options = defaultOptions;
/*
* Expose `expressions`.
*/
Parser.prototype.expressions = defaultExpressions;
/**
* Factory to track indentation for each line corresponding
* to the given `start` and the number of invocations.
*
* @param {number} start - Starting line.
* @return {function(offset)} - Indenter.
*/
Parser.prototype.indent = function (start) {
var self = this;
var line = start;
/**
* Intender which increments the global offset,
* starting at the bound line, and further incrementing
* each line for each invocation.
*
* @example
* indenter(2)
*
* @param {number} offset - Number to increment the
* offset.
*/
function indenter(offset) {
self.offset[line] = (self.offset[line] || 0) + offset;
line++;
}
return indenter;
};
/**
* Parse the bound file.
*
* @example
* new Parser(new File('_Foo_.')).parse();
*
* @this {Parser}
* @return {Object} - `root` node.
*/
Parser.prototype.parse = function () {
var self = this;
var value = clean(String(self.file));
var node;
/*
* Add an `offset` matrix, used to keep track of
* syntax and white space indentation per line.
*/
self.offset = {};
node = self.renderBlock(ROOT, value);
if (self.options.position) {
node.position = {
'start': {
'line': 1,
'column': 1
}
};
node.position.end = self.eof || node.position.start;
}
return node;
};
/*
* Enter and exit helpers.
*/
Parser.prototype.enterLink = stateToggler('inLink', false);
Parser.prototype.exitTop = stateToggler('atTop', true);
Parser.prototype.exitStart = stateToggler('atStart', true);
Parser.prototype.enterBlockquote = stateToggler('inBlockquote', false);
/*
* Expose helpers
*/
Parser.prototype.renderRaw = renderRaw;
Parser.prototype.renderVoid = renderVoid;
Parser.prototype.renderParent = renderParent;
Parser.prototype.renderInline = renderInline;
Parser.prototype.renderBlock = renderBlock;
Parser.prototype.renderLink = renderLink;
Parser.prototype.renderCodeBlock = renderCodeBlock;
Parser.prototype.renderBlockquote = renderBlockquote;
Parser.prototype.renderList = renderList;
Parser.prototype.renderListItem = renderListItem;
Parser.prototype.renderFootnoteDefinition = renderFootnoteDefinition;
Parser.prototype.renderHeading = renderHeading;
Parser.prototype.renderFootnote = renderFootnote;
/**
* Construct a tokenizer. This creates both
* `tokenizeInline` and `tokenizeBlock`.
*
* @example
* Parser.prototype.tokenizeInline = tokenizeFactory('inline');
*
* @param {string} type - Name of parser, used to find
* its expressions (`%sMethods`) and tokenizers
* (`%Tokenizers`).
* @return {function(string, Object?): Array.<Object>}
*/
function tokenizeFactory(type) {
/**
* Tokenizer for a bound `type`
*
* @example
* parser = new Parser();
* parser.tokenizeInline('_foo_');
*
* @param {string}