UNPKG

markdown-it-multimd-table-ext

Version:

Multimarkdown table syntax plugin for markdown-it markdown parser

547 lines (490 loc) 19.9 kB
'use strict'; var DFA = require('./lib/dfa.js'); module.exports = function multimd_table_plugin(md, options) { var defaults = { multiline: false, rowspan: false, headerless: false, multibody: true, autolabel: true }; options = md.utils.assign({}, defaults, options || {}); /** * @param {*} state * @param {number} line * @returns {[ number[], boolean[] ]} [ bounds, vlines ] */ function scan_bound_indices(state, line) { /* * Naming convention of positional variables * - list-item * ·········longtext······\n * ^head ^start ^end ^max */ var start = state.bMarks[line] + state.sCount[line], head = state.bMarks[line] + state.blkIndent, end = state.skipSpacesBack(state.eMarks[line], head), bounds = [], vlines = [], c, pos, posjump, escape = false, code = false, serial = 0; /* Scan for valid pipe character position */ for (pos = start; pos < end; pos++) { c = state.src.charCodeAt(pos); switch (c) { case 0x5c /* \ */: escape = true; break; case 0x60 /* ` */: posjump = state.skipChars(pos, 0x60) - 1; /* make \` closes the code sequence, but not open it; the reason is that `\` is correct code block */ /* eslint-disable-next-line brace-style */ if (posjump > pos) { if (!code) { if (serial === 0) { serial = posjump - pos; } else if (serial === posjump - pos) { serial = 0; } } pos = posjump; } else if (code || (!escape && !serial)) { code = !code; } escape = false; break; case 0x7c /* | */: case 0x2016 /* ‖ */: if (!code && !escape) { bounds.push(pos); vlines.push(c === 0x2016); } escape = false; break; default: escape = false; break; } } if (bounds.length === 0) return [ bounds, bounds ]; /* Pad in newline characters on last and this line */ if (bounds[0] > head) { bounds.unshift(head - 1); vlines.unshift(false); } if (bounds[bounds.length - 1] < end - 1) { bounds.push(end); vlines.push(false); } return [ bounds, vlines ]; } /** * @param {*} state * @param {boolean} silent * @param {number} line * @returns {{ text: string, label: string } | boolean } */ function table_caption(state, silent, line) { var meta = { text: null, label: null }, start = state.bMarks[line] + state.sCount[line], max = state.eMarks[line], /* A non-greedy qualifier allows the label to be matched */ capRE = /^\[(.+?)\](\[([^\[\]]+)\])?\s*$/, matches = state.src.slice(start, max).match(capRE); if (!matches) { return false; } if (silent) { return true; } meta.text = matches[1]; if (!options.autolabel && !matches[2]) { return meta; } meta.label = matches[2] || matches[1]; meta.label = meta.label.toLowerCase().replace(/\W+/g, ''); return meta; } /** * @param {*} state * @param {boolean} silent * @param {number} line * @returns {{ bounds: number[], multiline: boolean, vlines: null } | boolean } */ function table_row(state, silent, line) { var meta = { bounds: null, multiline: null, vlines: null }, lineinfo = scan_bound_indices(state, line), bounds, start, pos, oldMax; bounds = lineinfo[0]; if (bounds.length < 2) { return false; } if (silent) { return true; } meta.bounds = bounds; meta.vlines = lineinfo[1]; /* Multiline. Scan boundaries again since it's very complicated */ if (options.multiline) { start = state.bMarks[line] + state.sCount[line]; pos = state.eMarks[line] - 1; /* where backslash should be */ meta.multiline = (state.src.charCodeAt(pos) === 0x5C/* \ */); if (meta.multiline) { oldMax = state.eMarks[line]; state.eMarks[line] = state.skipSpacesBack(pos, start); meta.bounds = scan_bound_indices(state, line); meta.vlines = meta.bounds[1]; meta.bounds = meta.bounds[0]; state.eMarks[line] = oldMax; } } return meta; } /** * @param {*} state * @param {boolean} silent * @param {number} line * @returns {{ aligns: string[], valigns: string[], wraps: boolean[], * wrapscompact: boolean[], vlines: boolean[] } | boolean } */ function table_separator(state, silent, line) { var lineinfo = scan_bound_indices(state, line), bounds = lineinfo[0], meta = { aligns: [], valigns: [], wraps: [], wrapscompact:[], vlines: lineinfo[1] }, sepRE = /^:?(\^|v)?(-+|=+):?\+?-?$/, c, text, align, first, wraps; /* Only separator needs to check indents */ if (state.sCount[line] - state.blkIndent >= 4) { return false; } if (bounds.length === 0) { return false; } for (c = 0; c < bounds.length - 1; c++) { text = state.src.slice(bounds[c] + 1, bounds[c + 1]).trim(); if (!sepRE.test(text)) { return false; } wraps = text.charCodeAt(text.length - 1) === 0x2B/* + */ || (text.length >= 2 && text.charCodeAt(text.length - 2) === 0x2B/* + */); meta.wraps.push(wraps); meta.wrapscompact.push(wraps && text.charCodeAt(text.length - 1) === 0x2D/* - */); first = text.charCodeAt(0); align = (Number(first === 0x3A/* : */) << 4) | Number(text.charCodeAt(text.length - 1 - meta.wraps[c]) === 0x3A); switch (align) { case 0x00: meta.aligns.push(''); break; case 0x01: meta.aligns.push('right'); break; case 0x10: meta.aligns.push('left'); break; case 0x11: meta.aligns.push('center'); break; } align = text.charCodeAt((first === 0x3A/* : */) ? 1 : 0); if (align === 0x5E/* ^ */) { meta.valigns.push('top'); } else if (align === 0x76/* v */) { meta.valigns.push('bottom'); } else { meta.valigns.push(''); } } if (silent) { return true; } return meta; } /** * @param {*} state * @param {boolean} silent * @param {number} line * @returns {boolean} */ function table_empty(state, silent, line) { return state.isEmpty(line); } /** * @param {*} state * @param {boolean} silent * @param {number} line * @returns {boolean} */ function table_line(state, silent, line) { var linetext = state.src.slice(state.bMarks[line], state.eMarks[line]).trim(), // check if the line is all '-' or '=' chars, allowing for optional '|' and '‖' chars lineRE = /^[-=\|‖]? ?[-=]+[ -=\|‖]*$/; return lineRE.test(linetext); } /** * @param {*} state * @param {number} startLine * @param {number} endLine * @param {boolean} silent * @returns {boolean} */ function table(state, startLine, endLine, silent) { /* * Regex pseudo code for table: * caption? header+ separator (data+ empty line)* data+ caption? * * We use DFA to emulate this plugin. Types with lower precedence are * set-minus from all the formers. Noted that separator should have higher * precedence than header or data. * | state | caption separator header line data empty | --> lower precedence * | 0x101000 | 1 0 1 0 0 0 | */ var tableDFA = new DFA(), grp = 0x10, mtr = -1, alignOverrideRE = /^\[(:-|<|-:|>|-|<>|:-:|><)?[ ,]?(v|\^|=)?\] ?(.*)$/, match, token, tableToken, trToken, colspan, leftToken, rowspan, upTokens = [], tableLines, tgroupLines, tag, text, textTrimmed, range, r, c, b, t, halign, valign, styleParts, blockState; if (startLine + 2 > endLine) { return false; } /** * First pass: validate and collect info into table token. IR is stored in * markdown-it `token.meta` to be pushed later. table/tr open tokens are * generated here. */ tableToken = new state.Token('table_open', 'table', 1); tableToken.meta = { sep: null, cap: null, tr: [] }; tableDFA.set_highest_alphabet(0x100000); tableDFA.set_initial_state(0x101000 /* cap/head */); tableDFA.set_accept_states([ 0x100010 /* cap/data */, 0x100110 /* cap/data/line */, 0x100111 /* cap/data/line/empty */, 0x000000 /* end */ ]); tableDFA.set_match_alphabets({ /* cap */ 0x100000: table_caption.bind(this, state, true), /* sep */ 0x010000: table_separator.bind(this, state, true), /* head */ 0x001000: table_row.bind(this, state, true), /* line */ 0x000100: table_line.bind(this, state, true), /* data */ 0x000010: table_row.bind(this, state, true), /* empty */ 0x000001: table_empty.bind(this, state, true) }); tableDFA.set_transitions({ /* cap/head: { cap -> head, head -> sep/head } */ 0x101000: { 0x100000: 0x001000, 0x001000: 0x011000 }, /* head: { head -> sep/head } */ 0x001000: { 0x001000: 0x011000 }, /* sep/head: { sep -> cap/data, head -> sep/head } */ 0x011000: { 0x010000: 0x100010, 0x001000: 0x011000 }, /* cap/data: { cap -> end, data -> cap/data/line/empty } */ 0x100010: { 0x100000: 0x000000, 0x000010: 0x100111 }, /* cap/data/line/empty: { cap -> end, data -> cap/data/line/empty, line -> cap/data, empty -> cap/data } */ 0x100111: { 0x100000: 0x000000, 0x000010: 0x100111, 0x000100: 0x100010, 0x000001: 0x100010 } }); if (options.headerless) { tableDFA.set_initial_state(0x111000 /* cap/sep/head */); tableDFA.update_transition(0x111000, /* cap/sep/head: { cap -> sep/head, sep -> cap/data, head -> sep/head } */ { 0x100000: 0x011000, 0x010000: 0x100010, 0x001000: 0x011000 } ); trToken = new state.Token('tr_placeholder', 'tr', 0); trToken.meta = Object(); // avoid trToken.meta.grp throws exception } if (!options.multibody) { tableDFA.update_transition(0x100010, /* cap/data: { cap -> end, data -> cap/data/line } */ { 0x100000: 0x000000, 0x000010: 0x100110 } // disallow empty line ); tableDFA.update_transition(0x100110, /* cap/data/line: { cap -> end, data -> cap/data/line, line -> cap/data } */ { 0x100000: 0x000000, 0x000010: 0x100110, 0x000100: 0x100010 } ); } /* Don't mix up DFA `_state` and markdown-it `state` */ tableDFA.set_actions(function (_line, _state, _type) { // console.log(_line, _state.toString(16), _type.toString(16)) // for test switch (_type) { case 0x100000: // caption if (tableToken.meta.cap) { break; } tableToken.meta.cap = table_caption(state, false, _line); tableToken.meta.cap.map = [ _line, _line + 1 ]; tableToken.meta.cap.first = (_line === startLine); break; case 0x010000: // separator tableToken.meta.sep = table_separator(state, false, _line); tableToken.meta.sep.map = [ _line, _line + 1 ]; trToken.meta.grp |= 0x01; // previously assigned at case 0x001010 grp = 0x10; break; case 0x001000: // header case 0x000010: // data trToken = new state.Token('tr_open', 'tr', 1); trToken.map = [ _line, _line + 1 ]; trToken.meta = table_row(state, false, _line); trToken.meta.type = _type; trToken.meta.grp = grp; grp = 0x00; tableToken.meta.tr.push(trToken); /* Multiline. Merge trTokens as an entire multiline trToken */ if (options.multiline) { if (trToken.meta.multiline && mtr < 0) { /* Start line of multiline row. mark this trToken */ mtr = tableToken.meta.tr.length - 1; } else if (!trToken.meta.multiline && mtr >= 0) { /* End line of multiline row. merge forward until the marked trToken */ token = tableToken.meta.tr[mtr]; token.meta.mbounds = tableToken.meta.tr .slice(mtr).map(function (tk) { return tk.meta.bounds; }); token.map[1] = trToken.map[1]; tableToken.meta.tr = tableToken.meta.tr.slice(0, mtr + 1); mtr = -1; } } break; case 0x000100: // line trToken.meta.lineBelow = true; trToken.meta.grp |= 0x01; grp = 0x10; break; case 0x000001: // empty trToken.meta.grp |= 0x01; grp = 0x10; break; } }); if (tableDFA.execute(startLine, endLine) === false) { return false; } // if (!tableToken.meta.sep) { return false; } // always evaluated true if (!tableToken.meta.tr.length) { return false; } // false under headerless corner case if (silent) { return true; } /* Last data row cannot be detected. not stored to trToken outside? */ tableToken.meta.tr[tableToken.meta.tr.length - 1].meta.grp |= 0x01; /** * Second pass: actually push the tokens into `state.tokens`. * thead/tbody/th/td open tokens and all closed tokens are generated here; * thead/tbody are generally called tgroup; td/th are generally called tcol. */ tableToken.map = tableLines = [ startLine, 0 ]; tableToken.block = true; tableToken.level = state.level++; state.tokens.push(tableToken); if (tableToken.meta.cap) { token = state.push('caption_open', 'caption', 1); token.map = tableToken.meta.cap.map; var attrs = []; var capSide = tableToken.meta.cap.first ? 'top' : 'bottom'; /* Null is possible when disabled the option autolabel */ if (tableToken.meta.cap.label !== null) { attrs.push([ 'id', tableToken.meta.cap.label ]); } /* Add caption-side inline-CSS to <caption> tag, if caption is below the markdown table. */ if (capSide !== 'top') { attrs.push([ 'style', 'caption-side: ' + capSide ]); } token.attrs = attrs; token = state.push('inline', '', 0); token.content = tableToken.meta.cap.text; token.map = tableToken.meta.cap.map; token.children = []; token = state.push('caption_close', 'caption', -1); } for (r = 0; r < tableToken.meta.tr.length; r++) { leftToken = new state.Token('td_th_placeholder', '', 0); /* Push in thead/tbody and tr open tokens */ trToken = tableToken.meta.tr[r]; // console.log(trToken.meta); // for test if (trToken.meta.grp & 0x10) { tag = (trToken.meta.type === 0x001000) ? 'thead' : 'tbody'; token = state.push(tag + '_open', tag, 1); token.map = tgroupLines = [ trToken.map[0], 0 ]; // array ref upTokens = []; } trToken.block = true; trToken.level = state.level++; state.tokens.push(trToken); /* Push in th/td tokens */ for (c = 0; c < trToken.meta.bounds.length - 1; c++) { range = [ trToken.meta.bounds[c] + 1, trToken.meta.bounds[c + 1] ]; text = state.src.slice.apply(state.src, range); if (text === '') { colspan = leftToken.attrGet('colspan'); leftToken.attrSet('colspan', colspan === null ? 2 : colspan + 1); continue; } textTrimmed = text.trim(); if (options.rowspan && upTokens[c] && textTrimmed === '^^') { rowspan = upTokens[c].attrGet('rowspan'); upTokens[c].attrSet('rowspan', rowspan === null ? 2 : rowspan + 1); leftToken = new state.Token('td_th_placeholder', '', 0); continue; } tag = (trToken.meta.type === 0x001000) ? 'th' : 'td'; token = state.push(tag + '_open', tag, 1); token.map = trToken.map; token.attrs = []; halign = tableToken.meta.sep.aligns[c]; valign = tableToken.meta.sep.valigns[c]; match = alignOverrideRE.exec(textTrimmed); if (match) { if (match[1]) { // halign switch (match[1]) { case ':-': case '<': halign = 'left'; break; case '-:': case '>': halign = 'right'; break; case ':-:': case '><': halign = 'center'; break; case '-': case '<>': default: halign = ''; break; } } if (match[2]) { // valign switch (match[2]) { case '^': valign = 'top'; break; case 'v': valign = 'bottom'; break; case '=': default: valign = 'middle'; break; } } // text text = match[3]; } styleParts = []; if (halign) { styleParts.push('text-align:' + halign); } if (valign) { styleParts.push('vertical-align:' + valign); } token.meta = { halign: halign, valign: valign }; if (tableToken.meta.sep.vlines[c]) { styleParts.push('border-left:1px solid'); } if (tableToken.meta.sep.vlines[c + 1]) { styleParts.push('border-right:1px solid'); } if (trToken.meta.lineBelow) { styleParts.push('border-bottom:1px solid'); } if (styleParts.length) { token.attrs.push([ 'style', styleParts.join(';') ]); } if (tableToken.meta.sep.wraps[c]) { token.attrs.push([ 'class', 'extend' ]); } leftToken = upTokens[c] = token; /* Multiline. Join the text and feed into markdown-it blockParser. */ if (options.multiline && trToken.meta.multiline && trToken.meta.mbounds) { // Pad the text with empty lines to ensure the line number mapping is correct text = new Array(trToken.map[0]).fill('').concat([ text.trimRight() ]); for (b = 1; b < trToken.meta.mbounds.length; b++) { /* Line with N bounds has cells indexed from 0 to N-2 */ if (c > trToken.meta.mbounds[b].length - 2) { continue; } range = [ trToken.meta.mbounds[b][c] + 1, trToken.meta.mbounds[b][c + 1] ]; text.push(state.src.slice.apply(state.src, range).trimRight()); } blockState = new state.md.block.State(text.join('\n'), state.md, state.env, []); blockState.level = trToken.level + 1; // Start tokenizing from the actual content (trToken.map[0]) state.md.block.tokenize(blockState, trToken.map[0], blockState.lineMax); for (t = 0; t < blockState.tokens.length; t++) { state.tokens.push(blockState.tokens[t]); } } else { token = state.push('inline', '', 0); token.content = text.trim(); token.map = trToken.map; token.level = trToken.level + 1; token.children = []; } token = state.push(tag + '_close', tag, -1); } /* Push in tr and thead/tbody closed tokens */ state.push('tr_close', 'tr', -1); if (trToken.meta.grp & 0x01) { tag = (trToken.meta.type === 0x001000) ? 'thead' : 'tbody'; token = state.push(tag + '_close', tag, -1); tgroupLines[1] = trToken.map[1]; } } tableLines[1] = Math.max( tgroupLines[1], tableToken.meta.sep.map[1], tableToken.meta.cap ? tableToken.meta.cap.map[1] : -1 ); token = state.push('table_close', 'table', -1); state.line = tableLines[1]; return true; } md.block.ruler.at('table', table, { alt: [ 'paragraph', 'reference' ] }); }; /* vim: set ts=2 sw=2 et: */