mathpix-markdown-it
Version:
Mathpix-markdown-it is an open source implementation of the mathpix-markdown spec written in Typescript. It relies on the following open source libraries: MathJax v3 (to render math with SVGs), markdown-it (for standard Markdown parsing)
587 lines • 26.2 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.renderTabularInline = exports.renderInlineTokenBlock = void 0;
var tslib_1 = require("tslib");
var tsv_1 = require("../common/tsv");
var csv_1 = require("../common/csv");
var table_markdown_1 = require("../common/table-markdown");
var parse_mmd_element_1 = require("../../helpers/parse-mmd-element");
var common_1 = require("../highlight/common");
var render_table_cell_content_1 = require("../common/render-table-cell-content");
var list_markers_1 = require("../common/list-markers");
var TABLE_TOKENS = new Set([
'table_open', 'table_close', 'tbody_open', 'tbody_close', 'tr_open', 'tr_close', 'td_open', 'td_close',
]);
/**
* Appends a text chunk to the last line of a string array.
* If the array is empty, a new line is created.
*/
var appendToLastLine = function (lines, chunk) {
if (!chunk) {
return;
}
if (lines.length === 0) {
lines.push(chunk);
return;
}
lines[lines.length - 1] += chunk;
};
/**
* Ensures there is an empty last line in the lines array.
* If the current last line contains non-whitespace characters, appends a new empty line.
*
* @param lines - Array of lines representing a multi-line cell value.
*/
var ensureTrailingEmptyLine = function (lines) {
if (!lines.length) {
lines.push('');
return;
}
if (lines[lines.length - 1].trim()) {
lines.push('');
}
};
/**
* Formats TSV cell content from an array of lines.
*
* Behavior:
* - Joins all lines using '\n' by default.
* - If the resulting text contains a double quote (`"`), falls back to joining lines with spaces
* to avoid broken TSV/Excel output.
* - If `isSubTable` is true, returns the joined text without quoting.
* - Otherwise, wraps the value in double quotes only when it contains newlines or tabs.
* @param lines - Cell content split into lines.
* @param isSubTable - Whether the cell belongs to a nested table context.
* @returns TSV-ready string for a single table cell.
*/
var formatTsvCell = function (lines, isSubTable) {
var text = (lines !== null && lines !== void 0 ? lines : []).join('\n');
if (text.includes('"')) {
return (lines !== null && lines !== void 0 ? lines : []).join(' ');
}
if (isSubTable) {
return text;
}
// Quote if TSV contains characters that should be protected in spreadsheets/parsers.
var needsQuoting = /[\n\t]/.test(text);
if (!needsQuoting) {
return text;
}
return "\"".concat(text, "\"");
};
/**
* Formats CSV cell content from an array of lines by joining them with newline characters.
*
* @param lines - Cell content split into lines.
* @returns CSV-ready string for a single table cell.
*/
var formatCsvCell = function (lines) {
return (lines !== null && lines !== void 0 ? lines : []).join('\n');
};
var tokenAttrGet = function (token, name) {
if (!name) {
return '';
}
if (!token.attrs) {
return '';
}
var index = token.attrs.findIndex(function (item) { return item[0] === name; });
if (index < 0) {
return '';
}
return token.attrs[index][1];
};
var tokenAttrSet = function (token, name, value) {
if (!name) {
return;
}
if (!token.attrs) {
token.attrs = [];
token.attrs.push([name, value]);
return;
}
var index = token.attrs.findIndex(function (item) { return item[0] === name; });
if (index < 0) {
token.attrs.push([name, value]);
return;
}
token.attrs[index][1] = value;
};
/**
* Renders a non-table token into the current table-cell accumulators.
*
* Handles three cases:
* - `tabular` blocks via `renderInlineTokenBlock` (nested LaTeX tables).
* - Composite tokens (with children) via `renderTableCellContent` (recursive cell rendering).
* - Leaf tokens via `slf.renderInline`, plus list-specific Markdown stitching
* (handled by `handleListTokensForCellMarkdown`).
*
* @param token - Token to render (expected to be outside the core table token set).
* @param ctx - Rendering context (renderer/options/env and additional state used by helpers).
* @param acc - Mutable accumulators for the current cell (HTML/text, TSV/CSV, Markdown, smoothed).
*/
var renderNonTableTokenIntoCell = function (token, ctx, acc) {
var _a, _b;
var options = ctx.options, env = ctx.env, slf = ctx.slf, highlight = ctx.highlight;
if ((token === null || token === void 0 ? void 0 : token.type) === 'tabular' || (token === null || token === void 0 ? void 0 : token.type) === 'tabular_inline') {
var data = (0, exports.renderInlineTokenBlock)(token.children, options, env, slf, true, highlight);
acc.result += data.table;
if (Array.isArray(data.tableMd) && data.tableMd.length) {
if ((_a = acc.cellMd) === null || _a === void 0 ? void 0 : _a.trim()) {
acc.cellMd += '<br>';
}
acc.cellMd += data.tableMd.map(function (item) { return (typeof item === 'string' ? item : item.join(' ')); }).join(' <br> ');
}
if (data.tsv) {
ensureTrailingEmptyLine(acc.cellTsvLines);
appendToLastLine(acc.cellTsvLines, (0, tsv_1.TsvJoin)(data.tsv, options));
}
if (data.csv) {
ensureTrailingEmptyLine(acc.cellCsvLines);
appendToLastLine(acc.cellCsvLines, (0, csv_1.CsvJoin)(data.csv, options, true));
}
return;
}
if ((_b = token === null || token === void 0 ? void 0 : token.children) === null || _b === void 0 ? void 0 : _b.length) {
var cellRender = (0, render_table_cell_content_1.renderTableCellContent)(token, true, options, env, slf);
acc.result += cellRender.content;
appendToLastLine(acc.cellTsvLines, cellRender.tsv);
appendToLastLine(acc.cellCsvLines, cellRender.csv);
acc.cellMd += cellRender.tableMd;
acc.cellSmoothed += cellRender.tableSmoothed;
return;
}
// Leaf token
acc.result += slf.renderInline([token], options, env);
// List-related markdown stitching inside table cells
handleListTokensForCellMarkdown(token, ctx, acc);
};
/**
* Applies Markdown/TSV/CSV "stitching" rules for LaTeX list tokens when rendering table cells.
* This handler does not render the list content itself; it only injects separators (e.g. <br>),
* indentation, and list markers so that list structure is preserved inside a single table cell.
*
* @param token - Current token being processed.
* @param ctx - Render context containing the token stream, current index, and renderer dependencies.
* @param acc - Mutable accumulators for the current cell (md/tsv/csv/smoothed).
*/
var handleListTokensForCellMarkdown = function (token, ctx, acc) {
var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m, _o;
var tokens = ctx.tokens, idx = ctx.idx, options = ctx.options, env = ctx.env, slf = ctx.slf;
var prevToken = idx > 0 ? tokens[idx - 1] : null;
var addBr = function () {
// Find last non-space character
var s = acc.cellMd;
var k = s.length - 1;
while (k >= 0 && (s[k] === " " || s[k] === "\t"))
k--;
// If the last significant char is a backslash, separate it from the HTML tag.
if (k >= 0 && s[k] === "\\") {
acc.cellMd += " ";
}
acc.cellMd += '<br>';
};
if ((token === null || token === void 0 ? void 0 : token.type) && ["itemize_list_open", "enumerate_list_open"].includes(token.type)) {
var level = (_a = token === null || token === void 0 ? void 0 : token.prentLevel) !== null && _a !== void 0 ? _a : 0;
var prevType = prevToken === null || prevToken === void 0 ? void 0 : prevToken.type;
var prevLevel = (_b = prevToken === null || prevToken === void 0 ? void 0 : prevToken.prentLevel) !== null && _b !== void 0 ? _b : 0;
// Add a break after a paragraph boundary.
if (prevType === 'paragraph_close') {
addBr();
}
// Add a break before nested lists unless we are right after a list item close.
if (prevToken && prevType !== 'latex_list_item_close' && level > 0) {
addBr();
}
// Add a break between top-level lists.
var prevIsListClose = prevType === 'enumerate_list_close' || prevType === 'itemize_list_close';
if (prevIsListClose && prevLevel === 0) {
addBr();
}
return;
}
if ((token === null || token === void 0 ? void 0 : token.type) === "latex_list_item_open") {
var mdPrefix = '';
var tsvPrefix = '';
var csvPrefix = '';
// Add a break if a list item starts right after a paragraph.
if ((prevToken === null || prevToken === void 0 ? void 0 : prevToken.type) === 'paragraph_close') {
mdPrefix += '<br>';
acc.cellSmoothed += ' ';
}
var isEnumerate = token.parentType === "enumerate";
// Ensure list items always start on a fresh TSV/CSV line.
ensureTrailingEmptyLine(acc.cellTsvLines);
ensureTrailingEmptyLine(acc.cellCsvLines);
// Indent nested list items using non-breaking spaces (HTML).
var listLevel = Math.max(1, isEnumerate ? (_c = token.meta) === null || _c === void 0 ? void 0 : _c.enumerateLevel : (_d = token.meta) === null || _d === void 0 ? void 0 : _d.itemizeLevel);
for (var i = 1; i < listLevel; i++) {
mdPrefix += '  ';
tsvPrefix += ' ';
csvPrefix += ' ';
}
var markerMd = '';
var markerTsv = ' ';
var markerCsv = ' ';
// If the token provides a custom marker, use it; otherwise default to bullet markers.
if (token.hasOwnProperty('marker')) {
if (((_e = token.markerTokens) === null || _e === void 0 ? void 0 : _e.length) > 0) {
// Avoid mutating the original token: render marker tokens via a shallow copy.
var markerToken = tslib_1.__assign(tslib_1.__assign({}, token), { children: token.markerTokens });
var markerRender = (0, render_table_cell_content_1.renderTableCellContent)(markerToken, true, options, env, slf);
markerMd = (_f = markerRender.tableMd) !== null && _f !== void 0 ? _f : '';
markerTsv += (_g = markerRender.tsv) !== null && _g !== void 0 ? _g : '';
markerCsv += (_h = markerRender.csv) !== null && _h !== void 0 ? _h : '';
}
else {
markerMd = (_j = token.marker) !== null && _j !== void 0 ? _j : '';
markerTsv += (_k = token.marker) !== null && _k !== void 0 ? _k : '';
markerCsv += (_l = token.marker) !== null && _l !== void 0 ? _l : '';
}
}
else {
var plainMarker = isEnumerate
? (0, list_markers_1.getEnumeratePlainMarker)(Math.max(1, (_o = (_m = token.meta) === null || _m === void 0 ? void 0 : _m.enumerateIndex) !== null && _o !== void 0 ? _o : 1), listLevel)
: (0, list_markers_1.getItemizePlainMarker)(listLevel);
markerMd = plainMarker;
markerTsv += plainMarker;
markerCsv += plainMarker;
}
if (markerMd) {
mdPrefix += "".concat(markerMd, " ");
}
if (markerTsv) {
tsvPrefix += markerTsv + ' ';
}
if (markerCsv) {
csvPrefix += markerCsv + ' ';
}
acc.cellMd += mdPrefix;
appendToLastLine(acc.cellTsvLines, tsvPrefix);
appendToLastLine(acc.cellCsvLines, csvPrefix);
acc.cellSmoothed += markerMd ? "".concat(markerMd, " ") : '';
return;
}
if ((token === null || token === void 0 ? void 0 : token.type) === "latex_list_item_close") {
var prevType = prevToken === null || prevToken === void 0 ? void 0 : prevToken.type;
// Add a break between list items unless the list ends immediately after the item.
var shouldBreak = prevType !== 'itemize_list_close' && prevType !== 'enumerate_list_close';
if (shouldBreak) {
addBr();
}
return;
}
if ((token === null || token === void 0 ? void 0 : token.type) && ["itemize_list_close", "enumerate_list_close"].includes(token.type)) {
// No-op: list close is handled by item close logic and surrounding tokens.
return;
}
};
/**
* Renders a markdown-it token stream representing an HTML table (or LaTeX tabular)
* into HTML markup and parallel TSV/CSV/Markdown/"smoothed" table representations.
* Also handles nested tabular blocks and list tokens inside table cells.
*
* @param tokens - Token stream to render.
* @param options - Renderer options (pptx/docx/xhtml, etc.).
* @param env - Rendering environment.
* @param slf - Markdown-it renderer instance.
* @param isSubTable - Whether the current table is nested inside another table cell.
* @param highlight - Optional highlight metadata applied to table cells.
* @returns Rendered table outputs in multiple formats.
*/
var renderInlineTokenBlock = function (tokens, options, env, slf, isSubTable, highlight) {
var _a, _b, _c;
if (isSubTable === void 0) { isSubTable = false; }
if (highlight === void 0) { highlight = null; }
var nextToken, result = '', needLf = false;
var arrTsv = [];
var arrCsv = [];
var arrMd = [];
var arrSmoothed = [];
var arrRow = [];
var arrRowCsv = [];
var arrRowMd = [];
var arrRowSmoothed = [];
var cellTsvLines = [''];
var cellCsvLines = [''];
var cellMd = '';
var cellSmoothed = '';
var align = '';
var colspan = 0, rowspan = [], mr = 0;
var numCol = 0;
var ctx = { tokens: tokens, idx: 0, options: options, env: env, slf: slf, highlight: highlight };
for (var idx = 0; idx < tokens.length; idx++) {
ctx.idx = idx;
var token = tokens[idx];
if (token.hidden) {
continue;
}
if (token.n !== -1 && idx && tokens[idx - 1].hidden) {
result += '\n';
}
if (token.token === 'table_open' || token.token === 'tbody_open') {
arrTsv = [];
arrCsv = [];
arrMd = [];
arrSmoothed = [];
arrRow = [];
arrRowCsv = [];
arrRowMd = [];
arrRowSmoothed = [];
if (!align) {
align = token.latex;
}
}
if (token.token === 'tr_open') {
arrRow = [];
arrRowCsv = [];
arrRowMd = [];
arrRowSmoothed = [];
}
if (token.token === 'tr_close') {
arrTsv.push(arrRow);
arrCsv.push(arrRowCsv);
arrMd.push(arrRowMd);
arrSmoothed.push(arrRowSmoothed);
var l = arrRow && arrRow.length > 0 ? arrRow.length : 0;
var l2 = rowspan && rowspan.length > 0 ? rowspan.length : 0;
if (l < l2) {
for (var k = l; k < l2; k++) {
if (rowspan[k]) {
if (rowspan[k][1] && rowspan[k][1] > 1) {
for (var i = 0; i < rowspan[k][1]; i++) {
arrRow.push('');
arrRowCsv.push('');
arrRowMd.push('');
arrRowSmoothed.push('');
}
}
else {
arrRow.push('');
arrRowCsv.push('');
arrRowMd.push('');
arrRowSmoothed.push('');
}
rowspan[k][0] -= 1;
}
}
}
}
if (token.token === 'td_open') {
var nextToken_1 = tokens[idx + 1];
var nextToken2 = tokens[idx + 2];
if ((nextToken2 === null || nextToken2 === void 0 ? void 0 : nextToken2.token) === 'td_close' &&
((_a = nextToken_1 === null || nextToken_1 === void 0 ? void 0 : nextToken_1.children) === null || _a === void 0 ? void 0 : _a.length) === 1 &&
['slashbox', 'backslashbox'].includes(nextToken_1.children[0].type)) {
var diagBoxToken = nextToken_1.children[0];
diagBoxToken.meta = tslib_1.__assign(tslib_1.__assign({}, diagBoxToken.meta), { isBlock: true });
var dir = diagBoxToken.type === 'backslashbox' ? 'left' : 'right';
var styles = tokenAttrGet(token, 'style');
styles += 'background-size: 100% 100%;';
styles += 'vertical-align: middle;';
styles += "background-image: linear-gradient(to bottom ".concat(dir, ", transparent calc(50% - 0.5px), black 50%, black 50%, transparent calc(50% + 0.5px));");
tokenAttrSet(token, 'style', styles);
}
cellTsvLines = [''];
cellCsvLines = [''];
cellMd = '';
cellSmoothed = '';
colspan = tokenAttrGet(token, 'colspan');
colspan = colspan ? Number(colspan) : 0;
mr = tokenAttrGet(token, 'rowspan');
mr = mr ? Number(mr) : 0;
if (highlight) {
var styles = tokenAttrGet(token, 'style');
var dataAttrsStyle = (0, common_1.getStyleFromHighlight)(highlight);
tokenAttrSet(token, 'style', dataAttrsStyle + styles);
tokenAttrSet(token, 'class', 'mmd-highlight');
}
}
if (token.token === 'td_close') {
var l = arrRow && arrRow.length > 0 ? arrRow.length : 0;
var l2 = rowspan && rowspan.length > 0 ? rowspan.length : 0;
if (l < l2) {
for (var k = l; k < l2; k++) {
if (rowspan[k] && rowspan[k][0] && rowspan[k][0] > 0) {
if (rowspan[k] && rowspan[k][1] && rowspan[k][1] > 1) {
for (var i = 0; i < rowspan[k][1]; i++) {
arrRow.push('');
arrRowCsv.push('');
arrRowMd.push('');
arrRowSmoothed.push('');
}
}
else {
arrRow.push('');
arrRowCsv.push('');
arrRowMd.push('');
arrRowSmoothed.push('');
}
if (rowspan[k] && rowspan[k][0]) {
rowspan[k][0] -= 1;
}
}
else {
break;
}
}
}
l = arrRow && arrRow.length > 0 ? arrRow.length : 0;
if (!mr && rowspan[l] && rowspan[l][0] > 0) {
arrRow.push(formatTsvCell(cellTsvLines, isSubTable));
arrRowCsv.push(formatCsvCell(cellCsvLines));
arrRowMd.push(cellMd);
arrRowSmoothed.push(cellSmoothed);
}
else {
arrRow.push(formatTsvCell(cellTsvLines, isSubTable));
arrRowCsv.push(formatCsvCell(cellCsvLines));
arrRowMd.push(cellMd);
arrRowSmoothed.push(cellSmoothed);
if (colspan && colspan > 1) {
for (var i = 0; i < colspan - 1; i++) {
arrRow.push('');
arrRowCsv.push('');
arrRowMd.push('');
arrRowSmoothed.push('');
}
}
if (mr && mr > 1) {
rowspan[l] = [mr - 1, colspan];
}
}
colspan = 0;
}
if (token.token === 'inline' || token.type === 'inline') {
var content = '';
if (token.children) {
var cellRender = (0, render_table_cell_content_1.renderTableCellContent)(token, true, options, env, slf);
content += cellRender.content;
appendToLastLine(cellTsvLines, cellRender.tsv);
appendToLastLine(cellCsvLines, cellRender.csv);
cellMd += cellRender.tableMd;
cellSmoothed += cellRender.tableSmoothed;
}
else {
content = slf.renderInline([{ type: 'text', content: token.content }], options, env);
appendToLastLine(cellTsvLines, content);
appendToLastLine(cellCsvLines, content);
cellMd += content;
cellSmoothed += content;
}
result += content;
continue;
}
if (!TABLE_TOKENS.has(token.token) && !TABLE_TOKENS.has(token.type)) {
var acc = {
result: result,
cellMd: cellMd,
cellSmoothed: cellSmoothed,
cellTsvLines: cellTsvLines,
cellCsvLines: cellCsvLines,
};
renderNonTableTokenIntoCell(token, ctx, acc);
result = acc.result;
cellMd = acc.cellMd;
cellSmoothed = acc.cellSmoothed;
cellTsvLines = acc.cellTsvLines;
cellCsvLines = acc.cellCsvLines;
continue;
}
var tokenTag = token.tag;
var sizerTr = '';
if (options === null || options === void 0 ? void 0 : options.forPptx) {
if (token.tag === 'tbody') {
numCol = tokenAttrGet(token, 'data_num_col');
numCol = numCol ? Number(numCol) : 0;
if (numCol) {
sizerTr += '<tr class="tr-sizer">';
for (var i = 0; i < numCol; i++) {
sizerTr += '<td class="td_empty">x</td>';
}
sizerTr += '</tr>';
}
}
}
// Add token name, e.g. `<img`
result += (token.n === -1 ? '</' : '<') + tokenTag;
if ((options === null || options === void 0 ? void 0 : options.forPptx) && token.token === 'td_open' && ((_b = tokens[idx + 1]) === null || _b === void 0 ? void 0 : _b.token) === 'td_close') {
var className = tokenAttrGet(token, 'class');
className += className ? ' ' : '';
className += 'td_empty';
tokenAttrSet(token, 'class', className);
}
// Encode attributes, e.g. `<img src="foo"`
result += slf.renderAttrs(token);
// Add a slash for self-closing tags, e.g. `<img src="foo" /`
if (token.n === 0 && options.xhtmlOut) {
result += ' /';
}
// Check if we need to add a newline after this tag
needLf = true;
if (token.n === 1) {
if (idx + 1 < tokens.length) {
nextToken = tokens[idx + 1];
if (nextToken.token === 'inline' || nextToken.hidden) {
// Block-level tag containing an inline tag.
//
needLf = false;
}
else if (nextToken.n === -1 && nextToken.tag === token.tag) {
// Opening tag + closing tag of the same type. E.g. `<li></li>`.
//
needLf = false;
}
}
}
result += needLf ? '>\n' : '>';
if (options === null || options === void 0 ? void 0 : options.forPptx) {
if (sizerTr) {
result += sizerTr;
}
if (token.token === 'td_open' && ((_c = tokens[idx + 1]) === null || _c === void 0 ? void 0 : _c.token) === 'td_close') {
result += 'x';
}
}
}
return {
table: result,
tsv: arrTsv,
csv: arrCsv,
tableMd: arrMd,
tableSmoothed: arrSmoothed,
align: align
};
};
exports.renderInlineTokenBlock = renderInlineTokenBlock;
var renderTabularInline = function (a, token, options, env, slf) {
var _a;
var _b = options.outMath, _c = _b.include_tsv, include_tsv = _c === void 0 ? false : _c, _d = _b.include_csv, include_csv = _d === void 0 ? false : _d, _e = _b.include_table_markdown, include_table_markdown = _e === void 0 ? false : _e, _f = _b.include_table_html, include_table_html = _f === void 0 ? true : _f;
var tabular = '';
if (!include_tsv && !include_csv && !include_table_html && !include_table_markdown) {
return '';
}
var highlight = ((_a = token.highlights) === null || _a === void 0 ? void 0 : _a.length) ? token.highlights[0] : null;
var data = (0, exports.renderInlineTokenBlock)(token.children, options, env, slf, token.isSubTable, highlight);
token.tsv = data.tsv;
token.csv = data.csv;
token.tableMd = data.tableMd; //tableMarkdownJoin(data.tableMd, data.align);
token.tableSmoothed = data.tableSmoothed;
var className = 'inline-tabular';
className += token.isSubTable ? ' sub-table' : '';
if (include_table_html) {
tabular = data.table;
}
var tsv = include_tsv && token.tsv
? "<tsv style=\"display: none;\">".concat((0, parse_mmd_element_1.formatSource)((0, tsv_1.TsvJoin)(token.tsv, options), true), "</tsv>")
: '';
var tableMd = include_table_markdown && token.tableMd
? "<table-markdown style=\"display: none;\">".concat((0, parse_mmd_element_1.formatSource)((0, table_markdown_1.tableMarkdownJoin)(data.tableMd, data.align), true), "</table-markdown>")
: '';
var csv = include_csv && token.csv
? "<csv style=\"display: none;\">".concat((0, parse_mmd_element_1.formatSource)((0, csv_1.CsvJoin)(token.csv, options), true), "</csv>")
: '';
return "<div class=\"".concat(className, "\">").concat(tabular).concat(tsv).concat(tableMd).concat(csv, "</div>");
};
exports.renderTabularInline = renderTabularInline;
//# sourceMappingURL=render-tabular.js.map