UNPKG

mathpix-markdown-it

Version:

Mathpix-markdown-it is an open source implementation of the mathpix-markdown spec written in Typescript. It relies on the following open source libraries: MathJax v3 (to render math with SVGs), markdown-it (for standard Markdown parsing)

339 lines 15.8 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.convertMathToHtml = exports.endCacheBypass = exports.beginCacheBypass = exports.cleanupMathCache = exports.initMathCache = void 0; var tslib_1 = require("tslib"); var mathjax_1 = require("../../mathjax/"); var utils_1 = require("../utils"); var labels_1 = require("./labels"); var consts_1 = require("../../helpers/consts"); var utils_2 = require("../../helpers/utils"); var consts_2 = require("./consts"); var parse_mmd_element_1 = require("../../helpers/parse-mmd-element"); var RE_MJX_ASSISTIVE_ID = /<mjx-assistive-mml[^>]*\bid="([^"]+)"/; var MATHPIX_ENV_KEY = '__mathpix'; /** Called from init_math_cache hook at the start of every md.parse(). */ var initMathCache = function (state) { if (!state.env) state.env = {}; state.env[MATHPIX_ENV_KEY] = { inlineCache: new Map(), displayCache: new Map(), cacheBypass: 0, }; }; exports.initMathCache = initMathCache; /** Called from cleanup_math_cache hook at the end of every md.parse(). * The cache is for within-parse dedup only; after parse it's dead weight * on env (holds MathJax html/svg strings for every unique expression). */ var cleanupMathCache = function (state) { var _a; var mx = (_a = state === null || state === void 0 ? void 0 : state.env) === null || _a === void 0 ? void 0 : _a[MATHPIX_ENV_KEY]; if (!mx) { return; } mx.inlineCache.clear(); mx.displayCache.clear(); state.env[MATHPIX_ENV_KEY] = null; }; exports.cleanupMathCache = cleanupMathCache; var getMathpixEnv = function (state) { var _a; return ((_a = state === null || state === void 0 ? void 0 : state.env) === null || _a === void 0 ? void 0 : _a[MATHPIX_ENV_KEY]) || null; }; /** Begin a section where cache must not be used (options.outMath is temporarily mutated). */ var beginCacheBypass = function (state) { var mx = getMathpixEnv(state); if (mx) mx.cacheBypass++; }; exports.beginCacheBypass = beginCacheBypass; /** End a cache-bypass section. */ var endCacheBypass = function (state) { var mx = getMathpixEnv(state); if (mx && mx.cacheBypass > 0) mx.cacheBypass--; }; exports.endCacheBypass = endCacheBypass; /** * Returns true when token already contains MathML input (display or inline). * These tokens use a separate MathJax path: TypesetMathML(). */ var isMathMLToken = function (token) { return token.type === 'display_mathML' || token.type === 'inline_mathML'; }; // Memoized per-outMath spread used by skipMathToHtml path — avoid 49K clones in hot path. var skipSvgOutMathCache = new WeakMap(); var getOutMathWithSvgOff = function (outMath) { var cached = skipSvgOutMathCache.get(outMath); if (cached) return cached; var fresh = tslib_1.__assign(tslib_1.__assign({}, outMath), { include_svg: false }); skipSvgOutMathCache.set(outMath, fresh); return fresh; }; /** * Applies typesetting output to a token in place. * Invariant: mathData.svg is dropped at parse time unless `options.highlights` * is non-empty — mutating `options.highlights` between parse and render will * NOT re-populate it. Highlight render path re-computes svg in convertMathToHtmlWithHighlight. */ var applyTypesetResultToToken = function (token, res, options) { var _a, _b; var skipMathEquation = !!((_a = options === null || options === void 0 ? void 0 : options.outMath) === null || _a === void 0 ? void 0 : _a.skipMathToHtml); if (res.html != null && !skipMathEquation) { token.mathEquation = res.html; } if (res.data != null) { var keepSvgInMathData = !!((_b = options === null || options === void 0 ? void 0 : options.highlights) === null || _b === void 0 ? void 0 : _b.length); if (keepSvgInMathData || res.data.svg == null) { token.mathData = res.data; } else { var _c = res.data, svg = _c.svg, rest = tslib_1.__rest(_c, ["svg"]); token.mathData = rest; } token.widthEx = res.data.widthEx; token.heightEx = res.data.heightEx; } // Optional fields (present only when requested via outMath options) if (res.ascii != null) { token.ascii = res.ascii; } if (res.linear != null) { token.linear = res.linear; } if (res.ascii_tsv != null) { token.ascii_tsv = res.ascii_tsv; } if (res.ascii_csv != null) { token.ascii_csv = res.ascii_csv; } if (res.ascii_md != null) { token.ascii_md = res.ascii_md; } if (res.labels != null) { token.labels = res.labels; } }; /** * Builds outMath options for AsciiMath/TSV/CSV/Markdown extraction. * Note: "tableTo*" options are enabled only for specific environments. */ var buildAsciiOutMath = function (outMath, token) { var shouldFlatten = consts_1.envArraysShouldBeFlattenInTSV.includes(token.math_env); return Object.assign({}, outMath, { optionAscii: { showStyle: false, extraBrackets: true, // We flatten arrays only for specific environments; otherwise output is noisy/unusable. tableToTsv: (outMath === null || outMath === void 0 ? void 0 : outMath.include_tsv) && shouldFlatten, tableToCsv: (outMath === null || outMath === void 0 ? void 0 : outMath.include_csv) && shouldFlatten, tableToMd: (outMath === null || outMath === void 0 ? void 0 : outMath.include_table_markdown) && shouldFlatten, isSubTable: token.isSubTable, // keep these defaults centralized in ./consts tsv_separators: tslib_1.__assign({}, consts_2.tsvSeparatorsDef), csv_separators: tslib_1.__assign({}, consts_2.csvSeparatorsDef), md_separators: tslib_1.__assign({}, consts_2.mdSeparatorsDef), }, }); }; /** * Returns { html, data } for the requested output_format. * - output_format === 'latex': return original latex string (not MathJax HTML) * - output_format === 'mathml': we typically don't need extra SVG metrics * - default: return MathJax HTML + data (metrics) */ var buildFormatOutputs = function (params) { var outputFormat = params.outputFormat, inputLatex = params.inputLatex, renderedHtml = params.renderedHtml, renderedData = params.renderedData; var html = outputFormat === 'latex' ? (0, parse_mmd_element_1.formatSource)(inputLatex !== null && inputLatex !== void 0 ? inputLatex : '') : renderedHtml; var data = (outputFormat === 'latex' || outputFormat === 'mathml') ? null : renderedData; return { html: html, data: data }; }; /** * Typesets a single token according to its type and options: * - MathML tokens -> TypesetMathML() * - tokens requesting Ascii extraction -> TypesetSvgAndAscii() * - default -> Typeset() * * Performance note: for 'mathml' and 'latex' output formats, MathJax still runs * the full SVG rendering pipeline internally (the SVG is discarded by renderByFormat). * A future optimization could add a fast path that skips SVG generation for these formats. */ var typesetMathForToken = function (params) { var _a, _b, _c, _d, _e; var state = params.state, token = params.token, math = params.math, isBlock = params.isBlock, beginNumber = params.beginNumber, containerWidth = params.containerWidth, options = params.options; var outputFormat = (_a = options.outMath) === null || _a === void 0 ? void 0 : _a.output_format; // 1) MathML tokens: MathJax input is MathML (not TeX). if (isMathMLToken(token)) { var typeset_1 = mathjax_1.MathJax.TypesetMathML(math, { display: true, metric: { cwidth: containerWidth }, outMath: options.outMath, accessibility: options.accessibility, renderingErrors: options.renderingErrors, }); return { html: typeset_1.html, // For mathml output, SVG metrics are irrelevant — null out data. data: outputFormat === 'mathml' ? null : typeset_1.data, }; } mathjax_1.MathJax.Reset(beginNumber); // Reset is important for equation numbering stability across tokens. // 2) Per-parse cache lookup for simple inline/display math. // Numbered equations (equation_math*) are never cached — they advance the equation counter. // Ascii-extraction tokens have side-effect options and are also excluded. // Cache is bypassed when options.outMath is temporarily mutated (e.g. SetItemizeLevelTokens). var mx = getMathpixEnv(state); var cache = mx && mx.cacheBypass === 0 ? (isBlock ? mx.displayCache : mx.inlineCache) : null; var isCacheable = !token.return_asciimath && (token.type === 'inline_math' || token.type === 'display_math'); if (isCacheable && cache) { var cached = cache.get(math); if (cached) { var hitHtml = cached.html || ''; var hitSvg = (_b = cached.data) === null || _b === void 0 ? void 0 : _b.svg; if (cached._mjxId) { var freshId = mathjax_1.MathJax.nextAssistiveId(); hitHtml = hitHtml.split(cached._mjxId).join(freshId); if (hitSvg) hitSvg = hitSvg.split(cached._mjxId).join(freshId); } var hitData = cached.data ? tslib_1.__assign(tslib_1.__assign({}, cached.data), { svg: hitSvg }) : cached.data; var _mjxId = cached._mjxId, rest = tslib_1.__rest(cached, ["_mjxId"]); var fmt_1 = buildFormatOutputs({ outputFormat: outputFormat, inputLatex: token.inputLatex, renderedHtml: hitHtml, renderedData: hitData, }); return tslib_1.__assign(tslib_1.__assign(tslib_1.__assign(tslib_1.__assign({}, rest), { html: hitHtml, data: hitData }), fmt_1), { _labelsRegistered: true }); } } // 3) AsciiMath extraction requested if (token.return_asciimath) { var typeset_2 = mathjax_1.MathJax.TypesetSvgAndAscii(math, { display: isBlock, metric: { cwidth: containerWidth }, outMath: buildAsciiOutMath(options.outMath, token), mathJax: options.mathJax, accessibility: options.accessibility, renderingErrors: options.renderingErrors, }); var fmt_2 = buildFormatOutputs({ outputFormat: outputFormat, inputLatex: token.inputLatex, renderedHtml: typeset_2.html, renderedData: typeset_2.data, }); return tslib_1.__assign(tslib_1.__assign({}, fmt_2), { ascii: typeset_2.ascii, linear: typeset_2.linear, ascii_tsv: typeset_2.ascii_tsv, ascii_csv: typeset_2.ascii_csv, ascii_md: typeset_2.ascii_md, labels: typeset_2.labels }); } // 4) Default TeX typesetting. skipMathToHtml → force include_svg off; memoized clone keeps hot path allocation-free. var outMathForTypeset = ((_c = options.outMath) === null || _c === void 0 ? void 0 : _c.skipMathToHtml) && ((_d = options.outMath) === null || _d === void 0 ? void 0 : _d.include_svg) !== false ? getOutMathWithSvgOff(options.outMath) : options.outMath; var typeset = mathjax_1.MathJax.Typeset(math, { display: isBlock, metric: { cwidth: containerWidth }, outMath: outMathForTypeset, mathJax: options.mathJax, forDocx: options.forDocx, forPptx: options.forPptx, accessibility: options.accessibility, nonumbers: options.nonumbers, renderingErrors: options.renderingErrors, }); var rawResult = { html: typeset.html, data: typeset.data, ascii: typeset.ascii, linear: typeset.linear, ascii_tsv: typeset.ascii_tsv, ascii_csv: typeset.ascii_csv, ascii_md: typeset.ascii_md, labels: typeset.labels, }; if (isCacheable && cache) { var mjxIdMatch = (_e = rawResult.html) === null || _e === void 0 ? void 0 : _e.match(RE_MJX_ASSISTIVE_ID); var entry = tslib_1.__assign(tslib_1.__assign({}, rawResult), { data: rawResult.data ? tslib_1.__assign({}, rawResult.data) : rawResult.data, labels: rawResult.labels ? tslib_1.__assign({}, rawResult.labels) : rawResult.labels }); if (mjxIdMatch && mjxIdMatch[1]) entry._mjxId = mjxIdMatch[1]; cache.set(math, entry); } var fmt = buildFormatOutputs({ outputFormat: outputFormat, inputLatex: token.inputLatex, renderedHtml: rawResult.html || '', renderedData: rawResult.data, }); return tslib_1.__assign(tslib_1.__assign({}, rawResult), fmt); }; /** * Converts a math token into HTML and attaches MathJax metadata to the token. * Also extracts equation labels and stores them in the shared labels list. */ var convertMathToHtml = function (state, token, options) { var math = token.content; var beginNumber = mathjax_1.MathJax.GetLastEquationNumber() + 1; try { var containerWidth = (options === null || options === void 0 ? void 0 : options.width) && options.width > 0 ? options.width : (0, utils_1.getWidthFromDocument)(1200); var res = typesetMathForToken({ state: state, token: token, math: math, isBlock: token.type !== 'inline_math', beginNumber: beginNumber, containerWidth: containerWidth, options: options, }); applyTypesetResultToToken(token, res, options); // After typesetting, equation counter may have advanced. var number = mathjax_1.MathJax.GetLastEquationNumber(); // Collect labels (e.g. \label{...}) so we can resolve refs later. // On cache hit the labels were already parsed & registered on first miss; // re-running inline.parse + addIntoLabelsList is idempotent for the same // key+content, so we skip the loop and only recompute idLabels. var idLabels = ''; if (token.labels) { var labelKeys = Object.keys(token.labels); idLabels = labelKeys.length ? encodeURIComponent(labelKeys.join('_')) : ''; if (!res._labelsRegistered) { for (var key in token.labels) { var tagContent = token.labels[key].tag; // Parse label content as inline markdown-it tokens // so we can render it consistently in UI. var tagChildrenTokens = []; state.md.inline.parse(tagContent, state.md, state.env, tagChildrenTokens); (0, labels_1.addIntoLabelsList)({ key: key, id: idLabels, tag: tagContent, tagId: token.labels[key].id, tagChildrenTokens: tagChildrenTokens, type: labels_1.eLabelType.equation }); } } } token.idLabels = idLabels; token.number = number; token.begin_number = beginNumber; // attrNumber is "current equation number range" used by render rules token.attrNumber = beginNumber >= number ? number.toString() : "".concat(beginNumber, ",").concat(number); return token; } catch (e) { console.error('ERROR [convertMathToHtml] MathJax =>', e.message, e); (0, utils_2.formatMathJaxError)(e, math, 'convertMathToHtml'); token.error = { message: e.message, error: e }; return token; } }; exports.convertMathToHtml = convertMathToHtml; //# sourceMappingURL=convert-math-to-html.js.map