UNPKG

mdast

Version:

Markdown processor powered by plugins

2,125 lines (1,818 loc) 65.6 kB
/** * @author Titus Wormer * @copyright 2015 Titus Wormer * @license MIT * @module mdast:parse * @version 2.2.2 * @fileoverview Parse a markdown document into an * abstract syntax tree. */ 'use strict'; /* eslint-env commonjs */ /* * Dependencies. */ var he = require('he'); var repeat = require('repeat-string'); var trim = require('trim'); var trimTrailingLines = require('trim-trailing-lines'); var extend = require('extend.js'); var utilities = require('./utilities.js'); var defaultExpressions = require('./expressions.js'); var defaultOptions = require('./defaults.js').parse; /* * Methods. */ var raise = utilities.raise; var clean = utilities.clean; var validate = utilities.validate; var normalize = utilities.normalizeIdentifier; var arrayPush = [].push; /* * Characters. */ var AT_SIGN = '@'; var CARET = '^'; var EQUALS = '='; var EXCLAMATION_MARK = '!'; var MAILTO_PROTOCOL = 'mailto:'; var NEW_LINE = '\n'; var SPACE = ' '; var TAB = '\t'; var EMPTY = ''; var LT = '<'; var GT = '>'; var BRACKET_OPEN = '['; /* * Types. */ var BLOCK = 'block'; var INLINE = 'inline'; var HORIZONTAL_RULE = 'horizontalRule'; var HTML = 'html'; var YAML = 'yaml'; var TABLE = 'table'; var TABLE_CELL = 'tableCell'; var TABLE_HEADER = 'tableHeader'; var TABLE_ROW = 'tableRow'; var PARAGRAPH = 'paragraph'; var TEXT = 'text'; var CODE = 'code'; var LIST = 'list'; var LIST_ITEM = 'listItem'; var FOOTNOTE_DEFINITION = 'footnoteDefinition'; var HEADING = 'heading'; var BLOCKQUOTE = 'blockquote'; var LINK = 'link'; var IMAGE = 'image'; var FOOTNOTE = 'footnote'; var ESCAPE = 'escape'; var STRONG = 'strong'; var EMPHASIS = 'emphasis'; var DELETE = 'delete'; var INLINE_CODE = 'inlineCode'; var BREAK = 'break'; var ROOT = 'root'; /** * Wrapper around he's `decode` function. * * @example * decode('&amp;'); // '&' * decode('&amp'); // '&' * * @param {string} value * @param {function(string)} eat * @return {string} * @throws {Error} - When `eat.file.quiet` is not `true`. * However, by default `he` does not throw on incorrect * encoded entities, but when * `he.decode.options.strict: true`, they occur on * entities with a missing closing semi-colon. */ function decode(value, eat) { try { return he.decode(value); } catch (exception) { eat.file.fail(exception, eat.now()); } } /** * Factory to de-escape a value, based on an expression * at `key` in `scope`. * * @example * var expressions = {escape: /\\(a)/} * var descape = descapeFactory(expressions, 'escape'); * * @param {Object} scope - Map of expressions. * @param {string} key - Key in `map` at which the * non-global expression exists. * @return {function(string): string} - Function which * takes a value and returns its unescaped version. */ function descapeFactory(scope, key) { var globalExpression; var expression; /** * Private method to get a global expression * from the expression at `key` in `scope`. * This method is smart about not recreating * the expressions every time. * * @private * @return {RegExp} */ function generate() { if (scope[key] !== globalExpression) { globalExpression = scope[key]; expression = new RegExp( scope[key].source.replace(CARET, EMPTY), 'g' ); } return expression; } /** * De-escape a string using the expression at `key` * in `scope`. * * @example * var expressions = {escape: /\\(a)/} * var descape = descapeFactory(expressions, 'escape'); * descape('\a'); // 'a' * * @param {string} value - Escaped string. * @return {string} - Unescaped string. */ function descape(value) { return value.replace(generate(), '$1'); } return descape; } /* * Tab size. */ var TAB_SIZE = 4; /* * Expressions. */ var EXPRESSION_RIGHT_ALIGNMENT = /^[ \t]*-+:[ \t]*$/; var EXPRESSION_CENTER_ALIGNMENT = /^[ \t]*:-+:[ \t]*$/; var EXPRESSION_LEFT_ALIGNMENT = /^[ \t]*:-+[ \t]*$/; var EXPRESSION_TABLE_FENCE = /^[ \t]*|\|[ \t]*$/g; var EXPRESSION_TABLE_BORDER = /[ \t]*\|[ \t]*/; var EXPRESSION_BLOCK_QUOTE = /^[ \t]*>[ \t]?/gm; var EXPRESSION_BULLET = /^([ \t]*)([*+-]|\d+[.)])( {1,4}(?! )| |\t)([^\n]*)/; var EXPRESSION_PEDANTIC_BULLET = /^([ \t]*)([*+-]|\d+[.)])([ \t]+)/; var EXPRESSION_INITIAL_INDENT = /^( {1,4}|\t)?/gm; var EXPRESSION_INITIAL_TAB = /^( {4}|\t)?/gm; var EXPRESSION_HTML_LINK_OPEN = /^<a /i; var EXPRESSION_HTML_LINK_CLOSE = /^<\/a>/i; var EXPRESSION_LOOSE_LIST_ITEM = /\n\n(?!\s*$)/; var EXPRESSION_TASK_ITEM = /^\[([\ \t]|x|X)\][\ \t]/; /* * A map of characters, and their column length, * which can be used as indentation. */ var INDENTATION_CHARACTERS = {}; INDENTATION_CHARACTERS[SPACE] = SPACE.length; INDENTATION_CHARACTERS[TAB] = TAB_SIZE; /** * Gets indentation information for a line. * * @example * getIndent(' foo'); * // {indent: 2, stops: {1: 0, 2: 1}} * * getIndent('\tfoo'); * // {indent: 4, stops: {4: 0}} * * getIndent(' \tfoo'); * // {indent: 4, stops: {1: 0, 2: 1, 4: 2}} * * getIndent('\t foo') * // {indent: 6, stops: {4: 0, 5: 1, 6: 2}} * * @param {string} value - Indented line. * @return {Object} */ function getIndent(value) { var index = 0; var indent = 0; var character = value.charAt(index); var stops = {}; var size; while (character in INDENTATION_CHARACTERS) { size = INDENTATION_CHARACTERS[character]; indent += size; if (size > 1) { indent = Math.floor(indent / size) * size; } stops[indent] = index; character = value.charAt(++index); } return { 'indent': indent, 'stops': stops }; } /** * Remove the minimum indent from every line in `value`. * Supports both tab, spaced, and mixed indentation (as * well as possible). * * @example * removeIndentation(' foo'); // 'foo' * removeIndentation(' foo', 2); // ' foo' * removeIndentation('\tfoo', 2); // ' foo' * removeIndentation(' foo\n bar'); // ' foo\n bar' * * @param {string} value * @param {number?} [maximum] - Maximum indentation * to remove. * @return {string} - Unindented `value`. */ function removeIndentation(value, maximum) { var values = value.split(NEW_LINE); var position = values.length + 1; var minIndent = Infinity; var matrix = []; var index; var indentation; var stops; var padding; values.unshift(repeat(SPACE, maximum) + EXCLAMATION_MARK); while (position--) { indentation = getIndent(values[position]); matrix[position] = indentation.stops; if (trim(values[position]).length === 0) { continue; } if (indentation.indent) { if (indentation.indent > 0 && indentation.indent < minIndent) { minIndent = indentation.indent; } } else { minIndent = Infinity; break; } } if (minIndent !== Infinity) { position = values.length; while (position--) { stops = matrix[position]; index = minIndent; while (index && !(index in stops)) { index--; } if ( trim(values[position]).length !== 0 && minIndent && index !== minIndent ) { padding = TAB; } else { padding = EMPTY; } values[position] = padding + values[position].slice( index in stops ? stops[index] + 1 : 0 ); } } values.shift(); return values.join(NEW_LINE); } /** * Ensure that `value` is at least indented with * `indent` spaces. Does not support tabs. Does support * multiple lines. * * @example * ensureIndentation('foo', 2); // ' foo' * ensureIndentation(' foo', 4); // ' foo' * * @param {string} value * @param {number} indent - The maximum amount of * spacing to insert. * @return {string} - indented `value`. */ function ensureIndentation(value, indent) { var values = value.split(NEW_LINE); var length = values.length; var index = -1; var line; var position; while (++index < length) { line = values[index]; position = -1; while (++position < indent) { if (line.charAt(position) !== SPACE) { values[index] = repeat(SPACE, indent - position) + line; break; } } } return values.join(NEW_LINE); } /** * Get the alignment from a table rule. * * @example * getAlignment([':-', ':-:', '-:', '--']); * // ['left', 'center', 'right', null]; * * @param {Array.<string>} cells * @return {Array.<string?>} */ function getAlignment(cells) { var results = []; var index = -1; var length = cells.length; var alignment; while (++index < length) { alignment = cells[index]; if (EXPRESSION_RIGHT_ALIGNMENT.test(alignment)) { results[index] = 'right'; } else if (EXPRESSION_CENTER_ALIGNMENT.test(alignment)) { results[index] = 'center'; } else if (EXPRESSION_LEFT_ALIGNMENT.test(alignment)) { results[index] = 'left'; } else { results[index] = null; } } return results; } /** * Construct a state `toggler`: a function which inverses * `property` in context based on its current value. * The by `toggler` returned function restores that value. * * @example * var context = {}; * var key = 'foo'; * var val = true; * context[key] = val; * context.enter = stateToggler(key, val); * context[key]; // true * var exit = context.enter(); * context[key]; // false * var nested = context.enter(); * context[key]; // false * nested(); * context[key]; // false * exit(); * context[key]; // true * * @param {string} key - Property to toggle. * @param {boolean} state - It's default state. * @return {function(): function()} - Enter. */ function stateToggler(key, state) { /** * Construct a toggler for the bound `key`. * * @return {Function} - Exit state. */ function enter() { var self = this; var current = self[key]; self[key] = !state; /** * State canceler, cancels the state, if allowed. */ function exit() { self[key] = current; } return exit; } return enter; } /** * Construct a state toggler which doesn't toggle. * * @example * var context = {}; * var key = 'foo'; * var val = true; * context[key] = val; * context.enter = noopToggler(); * context[key]; // true * var exit = context.enter(); * context[key]; // true * exit(); * context[key]; // true * * @return {function(): function()} - Enter. */ function noopToggler() { /** * No-operation. */ function exit() {} /** * @return {Function} */ function enter() { return exit; } return enter; } /* * Define nodes of a type which can be merged. */ var MERGEABLE_NODES = {}; /** * Merge two text nodes: `node` into `prev`. * * @param {Object} prev - Preceding sibling. * @param {Object} node - Following sibling. * @return {Object} - `prev`. */ MERGEABLE_NODES.text = function (prev, node) { prev.value += node.value; return prev; }; /** * Merge two blockquotes: `node` into `prev`, unless in * CommonMark mode. * * @param {Object} prev - Preceding sibling. * @param {Object} node - Following sibling. * @return {Object} - `prev`, or `node` in CommonMark mode. */ MERGEABLE_NODES.blockquote = function (prev, node) { if (this.options.commonmark) { return node; } prev.children = prev.children.concat(node.children); return prev; }; /** * Merge two lists: `node` into `prev`. Knows, about * which bullets were used. * * @param {Object} prev - Preceding sibling. * @param {Object} node - Following sibling. * @return {Object} - `prev`, or `node` when the lists are * of different types (a different bullet is used). */ MERGEABLE_NODES.list = function (prev, node) { if ( !this.currentBullet || this.currentBullet !== this.previousBullet || this.currentBullet.length !== 1 ) { return node; } prev.children = prev.children.concat(node.children); return prev; }; /** * Tokenise a line. Unsets `currentBullet` and * `previousBullet` if more than one lines are found, thus * preventing lists from merging when they use different * bullets. * * @example * tokenizeNewline(eat, '\n\n'); * * @param {function(string)} eat * @param {string} $0 - Lines. */ function tokenizeNewline(eat, $0) { if ($0.length > 1) { this.currentBullet = null; this.previousBullet = null; } eat($0); } /** * Tokenise an indented code block. * * @example * tokenizeCode(eat, '\tfoo'); * * @param {function(string)} eat * @param {string} $0 - Whole code. * @return {Node} - `code` node. */ function tokenizeCode(eat, $0) { $0 = trimTrailingLines($0); return eat($0)(this.renderCodeBlock( removeIndentation($0, TAB_SIZE), null, eat) ); } /** * Tokenise a fenced code block. * * @example * var $0 = '```js\nfoo()\n```'; * tokenizeFences(eat, $0, '', '```', '`', 'js', 'foo()\n'); * * @param {function(string)} eat * @param {string} $0 - Whole code. * @param {string} $1 - Initial spacing. * @param {string} $2 - Initial fence. * @param {string} $3 - Fence marker. * @param {string} $4 - Programming language flag. * @param {string} $5 - Content. * @return {Node} - `code` node. */ function tokenizeFences(eat, $0, $1, $2, $3, $4, $5) { $0 = trimTrailingLines($0); /* * If the initial fence was preceded by spaces, * exdent that amount of white space from the code * block. Because it's possible that the code block * is exdented, we first have to ensure at least * those spaces are available. */ if ($1) { $5 = removeIndentation(ensureIndentation($5, $1.length), $1.length); } return eat($0)(this.renderCodeBlock($5, $4, eat)); } /** * Tokenise an ATX-style heading. * * @example * tokenizeHeading(eat, ' # foo', ' ', '#', ' ', 'foo'); * * @param {function(string)} eat * @param {string} $0 - Whole heading. * @param {string} $1 - Initial spacing. * @param {string} $2 - Hashes. * @param {string} $3 - Internal spacing. * @param {string} $4 - Content. * @return {Node} - `heading` node. */ function tokenizeHeading(eat, $0, $1, $2, $3, $4) { var now = eat.now(); now.column += ($1 + $2 + ($3 || '')).length; return eat($0)(this.renderHeading($4, $2.length, now)); } /** * Tokenise a Setext-style heading. * * @example * tokenizeLineHeading(eat, 'foo\n===', '', 'foo', '='); * * @param {function(string)} eat * @param {string} $0 - Whole heading. * @param {string} $1 - Initial spacing. * @param {string} $2 - Content. * @param {string} $3 - Underline marker. * @return {Node} - `heading` node. */ function tokenizeLineHeading(eat, $0, $1, $2, $3) { var now = eat.now(); now.column += $1.length; return eat($0)(this.renderHeading($2, $3 === EQUALS ? 1 : 2, now)); } /** * Tokenise a horizontal rule. * * @example * tokenizeHorizontalRule(eat, '***'); * * @param {function(string)} eat * @param {string} $0 - Whole rule. * @return {Node} - `horizontalRule` node. */ function tokenizeHorizontalRule(eat, $0) { return eat($0)(this.renderVoid(HORIZONTAL_RULE)); } /** * Tokenise a blockquote. * * @example * tokenizeBlockquote(eat, '> Foo'); * * @param {function(string)} eat * @param {string} $0 - Whole blockquote. * @return {Node} - `blockquote` node. */ function tokenizeBlockquote(eat, $0) { var now = eat.now(); var indent = this.indent(now.line); var value = trimTrailingLines($0); var add = eat(value); value = value.replace(EXPRESSION_BLOCK_QUOTE, function (prefix) { indent(prefix.length); return ''; }); return add(this.renderBlockquote(value, now)); } /** * Tokenise a list. * * @example * tokenizeList(eat, '- Foo', '', '-'); * * @param {function(string)} eat * @param {string} $0 - Whole list. * @param {string} $1 - Indent. * @param {string} $2 - Bullet. * @return {Node} - `list` node. */ function tokenizeList(eat, $0, $1, $2) { var self = this; var firstBullet = $2; var value = trimTrailingLines($0); var matches = value.match(self.rules.item); var length = matches.length; var index = 0; var isLoose = false; var now; var bullet; var item; var enterTop; var exitBlockquote; var node; var indent; var size; var position; var end; /* * Determine if all list-items belong to the * same list. */ if (!self.options.pedantic) { while (++index < length) { bullet = self.rules.bullet.exec(matches[index])[0]; if ( firstBullet !== bullet && ( firstBullet.length === 1 && bullet.length === 1 || bullet.charAt(bullet.length - 1) !== firstBullet.charAt(firstBullet.length - 1) ) ) { matches = matches.slice(0, index); matches[index - 1] = trimTrailingLines(matches[index - 1]); length = matches.length; break; } } } if (self.options.commonmark) { index = -1; while (++index < length) { item = matches[index]; indent = self.rules.indent.exec(item); indent = indent[1] + repeat(SPACE, indent[2].length) + indent[3]; size = getIndent(indent).indent; position = indent.length; end = item.length; while (++position < end) { if ( item.charAt(position) === NEW_LINE && item.charAt(position - 1) === NEW_LINE && getIndent(item.slice(position + 1)).indent < size ) { matches[index] = item.slice(0, position - 1); matches = matches.slice(0, index + 1); length = matches.length; break; } } } } self.previousBullet = self.currentBullet; self.currentBullet = firstBullet; index = -1; node = eat(matches.join(NEW_LINE)).reset( self.renderList([], firstBullet) ); enterTop = self.exitTop(); exitBlockquote = self.enterBlockquote(); while (++index < length) { item = matches[index]; now = eat.now(); item = eat(item)(self.renderListItem(item, now), node); if (item.loose) { isLoose = true; } if (index !== length - 1) { eat(NEW_LINE); } } node.loose = isLoose; enterTop(); exitBlockquote(); return node; } /** * Tokenise HTML. * * @example * tokenizeHtml(eat, '<span>foo</span>'); * * @param {function(string)} eat * @param {string} $0 - Whole HTML. * @return {Node} - `html` node. */ function tokenizeHtml(eat, $0) { $0 = trimTrailingLines($0); return eat($0)(this.renderRaw(HTML, $0)); } /** * Tokenise a definition. * * @example * var $0 = '[foo]: http://example.com "Example Domain"'; * var $1 = 'foo'; * var $2 = 'http://example.com'; * var $3 = 'Example Domain'; * tokenizeDefinition(eat, $0, $1, $2, $3); * * @property {boolean} onlyAtTop * @property {boolean} notInBlockquote * @param {function(string)} eat * @param {string} $0 - Whole definition. * @param {string} $1 - Key. * @param {string} $2 - URL. * @param {string} $3 - Title. * @return {Node} - `definition` node. */ function tokenizeDefinition(eat, $0, $1, $2, $3) { var link = $2; /* * Remove angle-brackets from `link`. */ if (link.charAt(0) === LT && link.charAt(link.length - 1) === GT) { link = link.slice(1, -1); } return eat($0)({ 'type': 'definition', 'identifier': normalize($1), 'title': $3 ? decode(this.descape($3), eat) : null, 'link': decode(this.descape(link), eat) }); } tokenizeDefinition.onlyAtTop = true; tokenizeDefinition.notInBlockquote = true; /** * Tokenise YAML front matter. * * @example * var $0 = '---\nfoo: bar\n---'; * var $1 = 'foo: bar'; * tokenizeYAMLFrontMatter(eat, $0, $1); * * @property {boolean} onlyAtStart * @param {function(string)} eat * @param {string} $0 - Whole front matter. * @param {string} $1 - Content. * @return {Node} - `yaml` node. */ function tokenizeYAMLFrontMatter(eat, $0, $1) { return eat($0)(this.renderRaw(YAML, $1 ? trimTrailingLines($1) : EMPTY)); } tokenizeYAMLFrontMatter.onlyAtStart = true; /** * Tokenise a footnote definition. * * @example * var $0 = '[foo]: Bar.'; * var $1 = '[foo]'; * var $2 = 'foo'; * var $3 = 'Bar.'; * tokenizeFootnoteDefinition(eat, $0, $1, $2, $3); * * @property {boolean} onlyAtTop * @property {boolean} notInBlockquote * @param {function(string)} eat * @param {string} $0 - Whole definition. * @param {string} $1 - Whole key. * @param {string} $2 - Key. * @param {string} $3 - Whole value. * @return {Node} - `footnoteDefinition` node. */ function tokenizeFootnoteDefinition(eat, $0, $1, $2, $3) { var self = this; var now = eat.now(); var indent = self.indent(now.line); $3 = $3.replace(EXPRESSION_INITIAL_TAB, function (value) { indent(value.length); return EMPTY; }); now.column += $1.length; return eat($0)(self.renderFootnoteDefinition(normalize($2), $3, now)); } tokenizeFootnoteDefinition.onlyAtTop = true; tokenizeFootnoteDefinition.notInBlockquote = true; /** * Tokenise a table. * * @example * var $0 = ' | foo |\n | --- |\n | bar |'; * var $1 = ' | foo |'; * var $2 = '| foo |'; * var $3 = ' | --- |'; * var $4 = '| --- |'; * var $5 = ' | bar |'; * tokenizeTable(eat, $0, $1, $2, $3, $4, $5); * * @property {boolean} onlyAtTop * @param {function(string)} eat * @param {string} $0 - Whole table. * @param {string} $1 - Whole heading. * @param {string} $2 - Trimmed heading. * @param {string} $3 - Whole alignment. * @param {string} $4 - Trimmed alignment. * @param {string} $5 - Rows. * @return {Node} - `table` node. */ function tokenizeTable(eat, $0, $1, $2, $3, $4, $5) { var self = this; var length; var index; var node; $0 = trimTrailingLines($0); node = eat($0).reset({ 'type': TABLE, 'align': [], 'children': [] }); /** * Eat a row of type `type`. * * @param {string} type - Type of the returned node, * such as `tableHeader` or `tableRow`. * @param {string} value - Row, including initial and * final fences. */ function renderRow(type, value) { var row = eat(value).reset(self.renderParent(type, []), node); var length = value.length + 1; var index = -1; var queue = ''; var cell = ''; var preamble = true; var count; var opening; var character; var subvalue; var now; while (++index < length) { character = value.charAt(index); if (character === '\t' || character === ' ') { if (cell) { queue += character; } else { eat(character); } continue; } if (character === '|' || character === '') { if (preamble) { eat(character); } else { if (character && opening) { // cell += queue + character; queue += character; continue; } if ((cell || character) && !preamble) { subvalue = cell; if (queue.length > 1) { if (character) { subvalue += queue.slice(0, queue.length - 1); queue = queue.charAt(queue.length - 1); } else { subvalue += queue; queue = ''; } } now = eat.now(); eat(subvalue)( self.renderInline(TABLE_CELL, cell, now), row ); } eat(queue + character); queue = ''; cell = ''; } } else { if (queue) { cell += queue; queue = ''; } cell += character; if (character === '\\' && index !== length - 2) { cell += value.charAt(index + 1); index++; } if (character === '`') { count = 1; while (value.charAt(index + 1) === character) { cell += character; index++; count++; } if (!opening) { opening = count; } else if (count >= opening) { opening = 0; } } } preamble = false; } } /* * Add the table's header. */ renderRow(TABLE_HEADER, $1); eat(NEW_LINE); /* * Add the table's alignment. */ eat($3); $4 = $4 .replace(EXPRESSION_TABLE_FENCE, EMPTY) .split(EXPRESSION_TABLE_BORDER); node.align = getAlignment($4); /* * Add the table rows to table's children. */ $5 = trimTrailingLines($5).split(NEW_LINE); index = -1; length = $5.length; while (++index < length) { renderRow(TABLE_ROW, $5[index]); if (index !== length - 1) { eat(NEW_LINE); } } return node; } tokenizeTable.onlyAtTop = true; /** * Tokenise a paragraph node. * * @example * tokenizeParagraph(eat, 'Foo.'); * * @param {function(string)} eat * @param {string} $0 - Whole paragraph. * @return {Node?} - `paragraph` node, when the node does * not just contain white space. */ function tokenizeParagraph(eat, $0) { var now = eat.now(); if (trim($0) === EMPTY) { eat($0); return null; } $0 = trimTrailingLines($0); return eat($0)(this.renderInline(PARAGRAPH, $0, now)); } /** * Tokenise a text node. * * @example * tokenizeText(eat, 'foo'); * * @param {function(string)} eat * @param {string} $0 - Whole text. * @return {Node} - `text` node. */ function tokenizeText(eat, $0) { return eat($0)(this.renderRaw(TEXT, $0)); } /** * Create a code-block node. * * @example * renderCodeBlock('foo()', 'js', now()); * * @param {string?} [value] - Code. * @param {string?} [language] - Optional language flag. * @param {Function} eat * @return {Object} - `code` node. */ function renderCodeBlock(value, language, eat) { return { 'type': CODE, 'lang': language ? decode(this.descape(language), eat) : null, 'value': trimTrailingLines(value || EMPTY) }; } /** * Create a list node. * * @example * var children = [renderListItem('- foo')]; * renderList(children, '-'); * * @param {string} children - Children. * @param {string} bullet - First bullet. * @return {Object} - `list` node. */ function renderList(children, bullet) { var start = parseInt(bullet, 10); if (start !== start) { start = null; } /* * `loose` should be added later. */ return { 'type': LIST, 'ordered': bullet.length > 1, 'start': start, 'loose': null, 'children': children }; } /** * Create a list-item using overly simple mechanics. * * @example * renderPedanticListItem('- _foo_', now()); * * @param {string} value - List-item. * @param {Object} position - List-item location. * @return {string} - Cleaned `value`. */ function renderPedanticListItem(value, position) { var self = this; var indent = self.indent(position.line); /** * A simple replacer which removed all matches, * and adds their length to `offset`. * * @param {string} $0 * @return {string} */ function replacer($0) { indent($0.length); return EMPTY; } /* * Remove the list-item's bullet. */ value = value.replace(EXPRESSION_PEDANTIC_BULLET, replacer); /* * The initial line was also matched by the below, so * we reset the `line`. */ indent = self.indent(position.line); return value.replace(EXPRESSION_INITIAL_INDENT, replacer); } /** * Create a list-item using sane mechanics. * * @example * renderNormalListItem('- _foo_', now()); * * @param {string} value - List-item. * @param {Object} position - List-item location. * @return {string} - Cleaned `value`. */ function renderNormalListItem(value, position) { var self = this; var indent = self.indent(position.line); var bullet; var rest; var lines; var trimmedLines; var index; var length; var max; /* * Remove the list-item's bullet. */ value = value.replace(EXPRESSION_BULLET, function ($0, $1, $2, $3, $4) { bullet = $1 + $2 + $3; rest = $4; /* * Make sure that the first nine numbered list items * can indent with an extra space. That is, when * the bullet did not receive an extra final space. */ if (Number($2) < 10 && bullet.length % 2 === 1) { $2 = SPACE + $2; } max = $1 + repeat(SPACE, $2.length) + $3; return max + rest; }); lines = value.split(NEW_LINE); trimmedLines = removeIndentation( value, getIndent(max).indent ).split(NEW_LINE); /* * We replaced the initial bullet with something * else above, which was used to trick * `removeIndentation` into removing some more * characters when possible. However, that could * result in the initial line to be stripped more * than it should be. */ trimmedLines[0] = rest; indent(bullet.length); index = 0; length = lines.length; while (++index < length) { indent(lines[index].length - trimmedLines[index].length); } return trimmedLines.join(NEW_LINE); } /* * A map of two functions which can create list items. */ var LIST_ITEM_MAP = {}; LIST_ITEM_MAP.true = renderPedanticListItem; LIST_ITEM_MAP.false = renderNormalListItem; /** * Create a list-item node. * * @example * renderListItem('- _foo_', now()); * * @param {Object} value - List-item. * @param {Object} position - List-item location. * @return {Object} - `listItem` node. */ function renderListItem(value, position) { var self = this; var checked = null; var node; var task; var indent; value = LIST_ITEM_MAP[self.options.pedantic].apply(self, arguments); if (self.options.gfm) { task = value.match(EXPRESSION_TASK_ITEM); if (task) { indent = task[0].length; checked = task[1].toLowerCase() === 'x'; self.indent(position.line)(indent); value = value.slice(indent); } } node = { 'type': LIST_ITEM, 'loose': EXPRESSION_LOOSE_LIST_ITEM.test(value) || value.charAt(value.length - 1) === NEW_LINE }; if (self.options.gfm) { node.checked = checked; } node.children = self.tokenizeBlock(value, position); return node; } /** * Create a footnote-definition node. * * @example * renderFootnoteDefinition('1', '_foo_', now()); * * @param {string} identifier - Unique reference. * @param {string} value - Contents * @param {Object} position - Definition location. * @return {Object} - `footnoteDefinition` node. */ function renderFootnoteDefinition(identifier, value, position) { var self = this; var exitBlockquote = self.enterBlockquote(); var node; node = { 'type': FOOTNOTE_DEFINITION, 'identifier': identifier, 'children': self.tokenizeBlock(value, position) }; exitBlockquote(); return node; } /** * Create a heading node. * * @example * renderHeading('_foo_', 1, now()); * * @param {string} value - Content. * @param {number} depth - Heading depth. * @param {Object} position - Heading content location. * @return {Object} - `heading` node */ function renderHeading(value, depth, position) { return { 'type': HEADING, 'depth': depth, 'children': this.tokenizeInline(value, position) }; } /** * Create a blockquote node. * * @example * renderBlockquote('_foo_', eat); * * @param {string} value - Content. * @param {Object} now - Position. * @return {Object} - `blockquote` node. */ function renderBlockquote(value, now) { var self = this; var exitBlockquote = self.enterBlockquote(); var node = { 'type': BLOCKQUOTE, 'children': this.tokenizeBlock(value, now) }; exitBlockquote(); return node; } /** * Create a void node. * * @example * renderVoid('horizontalRule'); * * @param {string} type - Node type. * @return {Object} - Node of type `type`. */ function renderVoid(type) { return { 'type': type }; } /** * Create a parent. * * @example * renderParent('paragraph', '_foo_'); * * @param {string} type - Node type. * @param {Array.<Object>} children - Child nodes. * @return {Object} - Node of type `type`. */ function renderParent(type, children) { return { 'type': type, 'children': children }; } /** * Create a raw node. * * @example * renderRaw('inlineCode', 'foo()'); * * @param {string} type - Node type. * @param {string} value - Contents. * @return {Object} - Node of type `type`. */ function renderRaw(type, value) { return { 'type': type, 'value': value }; } /** * Create a link node. * * @example * renderLink(true, 'example.com', 'example', 'Example Domain', now(), eat); * renderLink(false, 'fav.ico', 'example', 'Example Domain', now(), eat); * * @param {boolean} isLink - Whether linking to a document * or an image. * @param {string} href - URI reference. * @param {string} text - Content. * @param {string?} title - Title. * @param {Object} position - Location of link. * @param {function(string)} eat * @return {Object} - `link` or `image` node. */ function renderLink(isLink, href, text, title, position, eat) { var self = this; var exitLink = self.enterLink(); var node; node = { 'type': isLink ? LINK : IMAGE, 'title': title ? decode(self.descape(title), eat) : null }; href = decode(href, eat); if (isLink) { node.href = href; node.children = self.tokenizeInline(text, position); } else { node.src = href; node.alt = text ? decode(self.descape(text), eat) : null; } exitLink(); return node; } /** * Create a footnote node. * * @example * renderFootnote('_foo_', now()); * * @param {string} value - Contents. * @param {Object} position - Location of footnote. * @return {Object} - `footnote` node. */ function renderFootnote(value, position) { return this.renderInline(FOOTNOTE, value, position); } /** * Add a node with inline content. * * @example * renderInline('strong', '_foo_', now()); * * @param {string} type - Node type. * @param {string} value - Contents. * @param {Object} position - Location of node. * @return {Object} - Node of type `type`. */ function renderInline(type, value, position) { return this.renderParent(type, this.tokenizeInline(value, position)); } /** * Add a node with block content. * * @example * renderBlock('blockquote', 'Foo.', now()); * * @param {string} type - Node type. * @param {string} value - Contents. * @param {Object} position - Location of node. * @return {Object} - Node of type `type`. */ function renderBlock(type, value, position) { return this.renderParent(type, this.tokenizeBlock(value, position)); } /** * Tokenise an escape sequence. * * @example * tokenizeEscape(eat, '\\a', 'a'); * * @param {function(string)} eat * @param {string} $0 - Whole escape. * @param {string} $1 - Escaped character. * @return {Node} - `escape` node. */ function tokenizeEscape(eat, $0, $1) { return eat($0)(this.renderRaw(ESCAPE, $1)); } /** * Tokenise a URL in carets. * * @example * tokenizeAutoLink(eat, '<http://foo.bar>', 'http://foo.bar', ''); * * @property {boolean} notInLink * @param {function(string)} eat * @param {string} $0 - Whole link. * @param {string} $1 - URL. * @param {string?} [$2] - Protocol or at. * @return {Node} - `link` node. */ function tokenizeAutoLink(eat, $0, $1, $2) { var self = this; var href = $1; var text = $1; var now = eat.now(); var offset = 1; var tokenize; var node; if ($2 === AT_SIGN) { if ( text.substr(0, MAILTO_PROTOCOL.length).toLowerCase() !== MAILTO_PROTOCOL ) { href = MAILTO_PROTOCOL + text; } else { text = text.substr(MAILTO_PROTOCOL.length); offset += MAILTO_PROTOCOL.length; } } now.column += offset; /* * Temporarily remove support for escapes in autolinks. */ tokenize = self.inlineTokenizers.escape; self.inlineTokenizers.escape = null; node = eat($0)(self.renderLink(true, href, text, null, now, eat)); self.inlineTokenizers.escape = tokenize; return node; } tokenizeAutoLink.notInLink = true; /** * Tokenise a URL in text. * * @example * tokenizeURL(eat, 'http://foo.bar'); * * @property {boolean} notInLink * @param {function(string)} eat * @param {string} $0 - Whole link. * @return {Node} - `link` node. */ function tokenizeURL(eat, $0) { var now = eat.now(); return eat($0)(this.renderLink(true, $0, $0, null, now, eat)); } tokenizeURL.notInLink = true; /** * Tokenise an HTML tag. * * @example * tokenizeTag(eat, '<span foo="bar">'); * * @param {function(string)} eat * @param {string} $0 - Content. * @return {Node} - `html` node. */ function tokenizeTag(eat, $0) { var self = this; if (!self.inLink && EXPRESSION_HTML_LINK_OPEN.test($0)) { self.inLink = true; } else if (self.inLink && EXPRESSION_HTML_LINK_CLOSE.test($0)) { self.inLink = false; } return eat($0)(self.renderRaw(HTML, $0)); } /** * Tokenise a link. * * @example * tokenizeLink( * eat, '![foo](fav.ico "Favicon")', '![', 'foo', null, * 'fav.ico', 'Foo Domain' * ); * * @param {function(string)} eat * @param {string} $0 - Whole link. * @param {string} $1 - Prefix. * @param {string} $2 - Text. * @param {string?} $3 - URL wrapped in angle braces. * @param {string?} $4 - Literal URL. * @param {string?} $5 - Title wrapped in single or double * quotes. * @param {string?} [$6] - Title wrapped in double quotes. * @param {string?} [$7] - Title wrapped in parentheses. * @return {Node?} - `link` node, `image` node, or `null`. */ function tokenizeLink(eat, $0, $1, $2, $3, $4, $5, $6, $7) { var isLink = $1 === BRACKET_OPEN; var href = $4 || $3 || ''; var title = $7 || $6 || $5; var now; if (!isLink || !this.inLink) { now = eat.now(); now.column += $1.length; return eat($0)(this.renderLink( isLink, this.descape(href), $2, title, now, eat )); } return null; } /** * Tokenise a reference link, image, or footnote; * shortcut reference link, or footnote. * * @example * tokenizeReference(eat, '[foo]', '[', 'foo'); * tokenizeReference(eat, '[foo][]', '[', 'foo', ''); * tokenizeReference(eat, '[foo][bar]', '[', 'foo', 'bar'); * * @param {function(string)} eat * @param {string} $0 - Whole link. * @param {string} $1 - Prefix. * @param {string} $2 - identifier. * @param {string} $3 - Content. * @return {Node?} - `linkReference`, `imageReference`, or * `footnoteReference`. Returns null when this is a link * reference, but we're already in a link. */ function tokenizeReference(eat, $0, $1, $2, $3) { var self = this; var text = $2; var identifier = $3 || $2; var type = $1 === BRACKET_OPEN ? 'link' : 'image'; var isFootnote = self.options.footnotes && identifier.charAt(0) === CARET; var now = eat.now(); var referenceType; var node; var exitLink; if ($3 === undefined) { referenceType = 'shortcut'; } else if ($3 === '') { referenceType = 'collapsed'; } else { referenceType = 'full'; } if (referenceType !== 'shortcut') { isFootnote = false; } if (isFootnote) { identifier = identifier.substr(1); } if (isFootnote) { if (identifier.indexOf(SPACE) !== -1) { return eat($0)(self.renderFootnote(identifier, eat.now())); } else { type = 'footnote'; } } if (self.inLink && type === 'link') { return null; } now.column += $1.length; node = { 'type': type + 'Reference', 'identifier': normalize(identifier) }; if (type === 'link' || type === 'image') { node.referenceType = referenceType; } if (type === 'link') { exitLink = self.enterLink(); node.children = self.tokenizeInline(text, now); exitLink(); } else if (type === 'image') { node.alt = decode(self.descape(text), eat); } return eat($0)(node); } /** * Tokenise strong emphasis. * * @example * tokenizeStrong(eat, '**foo**', '**', 'foo'); * tokenizeStrong(eat, '__foo__', null, null, '__', 'foo'); * * @param {function(string)} eat * @param {string} $0 - Whole emphasis. * @param {string?} $1 - Marker. * @param {string?} $2 - Content. * @param {string?} [$3] - Marker. * @param {string?} [$4] - Content. * @return {Node?} - `strong` node, when not empty. */ function tokenizeStrong(eat, $0, $1, $2, $3, $4) { var now = eat.now(); var value = $2 || $4; if (trim(value) === EMPTY) { return null; } now.column += 2; return eat($0)(this.renderInline(STRONG, value, now)); } /** * Tokenise slight emphasis. * * @example * tokenizeEmphasis(eat, '*foo*', '*', 'foo'); * tokenizeEmphasis(eat, '_foo_', null, null, '_', 'foo'); * * @param {function(string)} eat * @param {string} $0 - Whole emphasis. * @param {string?} $1 - Marker. * @param {string?} $2 - Content. * @param {string?} [$3] - Marker. * @param {string?} [$4] - Content. * @return {Node?} - `emphasis` node, when not empty. */ function tokenizeEmphasis(eat, $0, $1, $2, $3, $4) { var now = eat.now(); var marker = $1 || $3; var value = $2 || $4; if ( trim(value) === EMPTY || value.charAt(0) === marker || value.charAt(value.length - 1) === marker ) { return null; } now.column += 1; return eat($0)(this.renderInline(EMPHASIS, value, now)); } /** * Tokenise a deletion. * * @example * tokenizeDeletion(eat, '~~foo~~', '~~', 'foo'); * * @param {function(string)} eat * @param {string} $0 - Whole deletion. * @param {string} $1 - Content. * @return {Node} - `delete` node. */ function tokenizeDeletion(eat, $0, $1) { var now = eat.now(); now.column += 2; return eat($0)(this.renderInline(DELETE, $1, now)); } /** * Tokenise inline code. * * @example * tokenizeInlineCode(eat, '`foo()`', '`', 'foo()'); * * @param {function(string)} eat * @param {string} $0 - Whole code. * @param {string} $1 - Initial markers. * @param {string} $2 - Content. * @return {Node} - `inlineCode` node. */ function tokenizeInlineCode(eat, $0, $1, $2) { return eat($0)(this.renderRaw(INLINE_CODE, trim($2 || ''))); } /** * Tokenise a break. * * @example * tokenizeBreak(eat, ' \n'); * * @param {function(string)} eat * @param {string} $0 * @return {Node} - `break` node. */ function tokenizeBreak(eat, $0) { return eat($0)(this.renderVoid(BREAK)); } /** * Construct a new parser. * * @example * var parser = new Parser(new VFile('Foo')); * * @constructor * @class {Parser} * @param {VFile} file - File to parse. * @param {Object?} [options] - Passed to * `Parser#setOptions()`. */ function Parser(file, options) { var self = this; var rules = extend({}, self.expressions.rules); self.file = file; self.inLink = false; self.atTop = true; self.atStart = true; self.inBlockquote = false; self.rules = rules; self.descape = descapeFactory(rules, 'escape'); self.options = extend({}, self.options); self.setOptions(options); } /** * Set options. Does not overwrite previously set * options. * * @example * var parser = new Parser(); * parser.setOptions({gfm: true}); * * @this {Parser} * @throws {Error} - When an option is invalid. * @param {Object?} [options] - Parse settings. * @return {Parser} - `self`. */ Parser.prototype.setOptions = function (options) { var self = this; var expressions = self.expressions; var rules = self.rules; var current = self.options; var key; if (options === null || options === undefined) { options = {}; } else if (typeof options === 'object') { options = extend({}, options); } else { raise(options, 'options'); } self.options = options; for (key in defaultOptions) { validate.boolean(options, key, current[key]); if (options[key]) { extend(rules, expressions[key]); } } if (options.gfm && options.breaks) { extend(rules, expressions.breaksGFM); } if (options.gfm && options.commonmark) { extend(rules, expressions.commonmarkGFM); } if (options.commonmark) { self.enterBlockquote = noopToggler(); } return self; }; /* * Expose `defaults`. */ Parser.prototype.options = defaultOptions; /* * Expose `expressions`. */ Parser.prototype.expressions = defaultExpressions; /** * Factory to track indentation for each line corresponding * to the given `start` and the number of invocations. * * @param {number} start - Starting line. * @return {function(offset)} - Indenter. */ Parser.prototype.indent = function (start) { var self = this; var line = start; /** * Intender which increments the global offset, * starting at the bound line, and further incrementing * each line for each invocation. * * @example * indenter(2) * * @param {number} offset - Number to increment the * offset. */ function indenter(offset) { self.offset[line] = (self.offset[line] || 0) + offset; line++; } return indenter; }; /** * Parse the bound file. * * @example * new Parser(new File('_Foo_.')).parse(); * * @this {Parser} * @return {Object} - `root` node. */ Parser.prototype.parse = function () { var self = this; var value = clean(String(self.file)); var node; /* * Add an `offset` matrix, used to keep track of * syntax and white space indentation per line. */ self.offset = {}; node = self.renderBlock(ROOT, value); if (self.options.position) { node.position = { 'start': { 'line': 1, 'column': 1 } }; node.position.end = self.eof || node.position.start; } return node; }; /* * Enter and exit helpers. */ Parser.prototype.enterLink = stateToggler('inLink', false); Parser.prototype.exitTop = stateToggler('atTop', true); Parser.prototype.exitStart = stateToggler('atStart', true); Parser.prototype.enterBlockquote = stateToggler('inBlockquote', false); /* * Expose helpers */ Parser.prototype.renderRaw = renderRaw; Parser.prototype.renderVoid = renderVoid; Parser.prototype.renderParent = renderParent; Parser.prototype.renderInline = renderInline; Parser.prototype.renderBlock = renderBlock; Parser.prototype.renderLink = renderLink; Parser.prototype.renderCodeBlock = renderCodeBlock; Parser.prototype.renderBlockquote = renderBlockquote; Parser.prototype.renderList = renderList; Parser.prototype.renderListItem = renderListItem; Parser.prototype.renderFootnoteDefinition = renderFootnoteDefinition; Parser.prototype.renderHeading = renderHeading; Parser.prototype.renderFootnote = renderFootnote; /** * Construct a tokenizer. This creates both * `tokenizeInline` and `tokenizeBlock`. * * @example * Parser.prototype.tokenizeInline = tokenizeFactory('inline'); * * @param {string} type - Name of parser, used to find * its expressions (`%sMethods`) and tokenizers * (`%Tokenizers`). * @return {function(string, Object?): Array.<Object>} */ function tokenizeFactory(type) { /** * Tokenizer for a bound `type` * * @example * parser = new Parser(); * parser.tokenizeInline('_foo_'); * * @param {string}