UNPKG

mathpix-markdown-it

Version:

Mathpix-markdown-it is an open source implementation of the mathpix-markdown spec written in Typescript. It relies on the following open source libraries: MathJax v3 (to render math with SVGs), markdown-it (for standard Markdown parsing)

223 lines 9.9 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.validateMathMLShallow = void 0; var tslib_1 = require("tslib"); var VALIDATION_REASON = { EMPTY: 'empty', TOO_LARGE: 'too_large', DOCTYPE: 'doctype', SCRIPT: 'script', JS_URI: 'js_uri', EVENT_ATTR: 'event_attr', NO_MATH: 'no_math', UNCLOSED_MATH: 'unclosed_math', BAD_XMLNS: 'bad_xmlns', BAD_TAG: 'bad_tag', UNKNOWN_TAG: 'unknown_tag', MISMATCH: 'mismatch', TOO_MANY_NODES: 'too_many_nodes', TOO_DEEP: 'too_deep', ROOT_NOT_MATH: 'root_not_math', TEXT_OUTSIDE_TEXT_CONTAINER: 'text_outside_text_container', UNCLOSED_COMMENT: 'unclosed_comment', UNCLOSED_CDATA: 'unclosed_cdata', UNCLOSED_PI: 'unclosed_pi', UNCLOSED_DECL: 'unclosed_decl', UNCLOSED_QUOTE: 'unclosed_quote', UNCLOSED_TAG: 'unclosed_tag', UNCLOSED_TAGS: 'unclosed_tags' }; var DEFAULT_VALIDATION_LIMITS = { maxBytes: 1000000, maxDepth: 80, maxNodes: 50000 }; var MATHML_XMLNS = 'http://www.w3.org/1998/Math/MathML'; // Allowed MathML tags that MathJax definitely understands (presentation + semantics) var MATHML_ALLOWED_TAGS = new Set([ 'math', 'mrow', 'mi', 'mn', 'mo', 'ms', 'mtext', 'mspace', 'mfrac', 'msqrt', 'mroot', 'msub', 'msup', 'msubsup', 'munder', 'mover', 'munderover', 'mmultiscripts', 'mprescripts', 'none', 'mtable', 'mtr', 'mlabeledtr', 'mtd', 'mstyle', 'merror', 'mpadded', 'mphantom', 'menclose', 'mfenced', 'semantics', 'annotation', 'annotation-xml' ]); // Tags that allow a visible text node inside var MATHML_TEXT_CONTAINERS = new Set(['mi', 'mn', 'mo', 'mtext', 'ms']); var MATHML_ANNOTATION_TAGS = new Set(['annotation', 'annotation-xml']); var HAZARD_DOCTYPE_RE = /<\!DOCTYPE/i; var HAZARD_SCRIPT_RE = /<script\b/i; var HAZARD_JS_URI_RE = /javascript:/i; var HAZARD_EVENT_ATTR_RE = /\son[a-z]+\s*=/i; var OPEN_MATH_TAG_RE = /<math\b[^>]*>/i; var XMLNS_ATTR_RE = /\bxmlns\s*=\s*"([^"]*)"/i; // Single "wide" tokenizer: comments / CDATA / tags / text var MATHML_TOKEN_RE = /<!--[\s\S]*?-->|<!\[CDATA\[[\s\S]*?\]\]>|<\/?[A-Za-z][\w:.-]*\b[^>]*>|[^<]+/g; var MATH_OPEN = '<math'; var END_MATH = '</math>'; var hasUnclosedAll = function (hay, open, close) { var from = 0; for (;;) { var i = hay.indexOf(open, from); if (i < 0) return false; var j = hay.indexOf(close, i + open.length); if (j < 0) return true; from = j + close.length; } }; /** * Shallow & fast MathML pre-validation for MathJax (SVG). * * - Extracts the first `<math>…</math>` fragment. * - Blocks obvious hazards (doctype, script, javascript: URLs, event handlers). * - Ensures balanced tags with a tiny stack (not a full XML parser). * - Allows visible text only inside `mi/mn/mo/mtext/ms`. * - Ignores content inside `<annotation>` / `<annotation-xml>`. * * Not a full XML/DTD validator. Intended to be extremely fast and safe before * passing MathML to MathJax. O(N) over the `<math>` fragment, no allocations * beyond a small stack. */ var validateMathMLShallow = function (source, limits) { var _a, _b, _c; if (limits === void 0) { limits = {}; } var _d = tslib_1.__assign(tslib_1.__assign({}, DEFAULT_VALIDATION_LIMITS), limits), maxBytes = _d.maxBytes, maxDepth = _d.maxDepth, maxNodes = _d.maxNodes; if (typeof source !== 'string' || !source) { return { ok: false, reason: VALIDATION_REASON.EMPTY }; } if (source.length > maxBytes) { return { ok: false, reason: VALIDATION_REASON.TOO_LARGE }; } // Let's take the first <math>…</math> var startIndex = source.indexOf(MATH_OPEN); if (startIndex < 0) { return { ok: false, reason: VALIDATION_REASON.NO_MATH }; } var endIndex = source.indexOf(END_MATH, startIndex); if (endIndex < 0) { return { ok: false, reason: VALIDATION_REASON.UNCLOSED_MATH }; } var mathFragment = source.slice(startIndex, endIndex + END_MATH.length); // preflight on "stuck" sections if (mathFragment.includes('<!--') && hasUnclosedAll(mathFragment, '<!--', '-->')) { return { ok: false, reason: VALIDATION_REASON.UNCLOSED_COMMENT }; } if (mathFragment.includes('<![CDATA[') && hasUnclosedAll(mathFragment, '<![CDATA[', ']]>')) { return { ok: false, reason: VALIDATION_REASON.UNCLOSED_CDATA }; } if (mathFragment.includes('<?') && hasUnclosedAll(mathFragment, '<?', '?>')) { return { ok: false, reason: VALIDATION_REASON.UNCLOSED_PI }; } // simple declaration <!FOO …> if (/\<![A-Z]/.test(mathFragment)) { var i = mathFragment.search(/\<![A-Z]/); if (i >= 0 && mathFragment.indexOf('>', i + 2) < 0) return { ok: false, reason: VALIDATION_REASON.UNCLOSED_DECL }; } if (HAZARD_DOCTYPE_RE.test(mathFragment)) return { ok: false, reason: VALIDATION_REASON.DOCTYPE }; if (HAZARD_SCRIPT_RE.test(mathFragment)) return { ok: false, reason: VALIDATION_REASON.SCRIPT }; if (HAZARD_JS_URI_RE.test(mathFragment)) return { ok: false, reason: VALIDATION_REASON.JS_URI }; if (HAZARD_EVENT_ATTR_RE.test(mathFragment)) return { ok: false, reason: VALIDATION_REASON.EVENT_ATTR }; // Check xmlns at root (optionally strict) var rootOpenTag = ((_a = OPEN_MATH_TAG_RE.exec(mathFragment)) === null || _a === void 0 ? void 0 : _a[0]) || ''; var rootXmlns = (_b = XMLNS_ATTR_RE.exec(rootOpenTag)) === null || _b === void 0 ? void 0 : _b[1]; if (rootXmlns && rootXmlns !== MATHML_XMLNS) { return { ok: false, reason: VALIDATION_REASON.BAD_XMLNS, extra: rootXmlns }; } var tagStack = []; var nodeCount = 0; var seenMathOpen = false; var seenMathClose = false; var annotationDepth = 0; //inside <annotation> / <annotation-xml> skip the content MATHML_TOKEN_RE.lastIndex = 0; for (var m = void 0; (m = MATHML_TOKEN_RE.exec(mathFragment));) { var token = m[0]; // Comments if (token.startsWith('<!--')) continue; // CDATA as text (outside annotations - only in text containers) if (token.startsWith('<![CDATA[')) { if (annotationDepth > 0) continue; var parent_1 = tagStack[tagStack.length - 1]; if (!MATHML_TEXT_CONTAINERS.has(parent_1)) { return { ok: false, reason: VALIDATION_REASON.TEXT_OUTSIDE_TEXT_CONTAINER }; } continue; } // Tags if (token[0] === '<') { var isClosingTag = token[1] === '/'; var isSelfClosing = !isClosingTag && /\/>$/.test(token); var rawName = (_c = /^<\/?\s*([A-Za-z][\w:.-]*)/.exec(token)) === null || _c === void 0 ? void 0 : _c[1]; if (!rawName) return { ok: false, reason: VALIDATION_REASON.BAD_TAG }; // remove namespace prefix (mml:mi -> mi), convert to lower var localName = rawName.toLowerCase().replace(/^[a-z][\w-]*:/, ''); // accounting of annotation zones var isAnno = MATHML_ANNOTATION_TAGS.has(localName); if (!isClosingTag && isAnno && !isSelfClosing) annotationDepth++; else if (isClosingTag && isAnno && annotationDepth) annotationDepth--; // outside annotations - only known MathML tags if (annotationDepth === 0 && !MATHML_ALLOWED_TAGS.has(localName)) { return { ok: false, reason: VALIDATION_REASON.UNKNOWN_TAG, extra: localName }; } if (isClosingTag) { var top_1 = tagStack.pop(); if (!top_1 || top_1 !== localName) { return { ok: false, reason: VALIDATION_REASON.MISMATCH, extra: "".concat(top_1 || 'none', "!=").concat(localName) }; } if (localName === 'math' && tagStack.length === 0) seenMathClose = true; } else if (!isSelfClosing) { tagStack.push(localName); nodeCount++; if (!seenMathOpen) { if (localName !== 'math') return { ok: false, reason: VALIDATION_REASON.ROOT_NOT_MATH }; seenMathOpen = true; } if (nodeCount > maxNodes) return { ok: false, reason: VALIDATION_REASON.TOO_MANY_NODES }; if (tagStack.length > maxDepth) return { ok: false, reason: VALIDATION_REASON.TOO_DEEP }; } else { // self-closing nodeCount++; if (nodeCount > maxNodes) return { ok: false, reason: VALIDATION_REASON.TOO_MANY_NODES }; if (!seenMathOpen && localName !== 'math') return { ok: false, reason: VALIDATION_REASON.ROOT_NOT_MATH }; if (!seenMathOpen && localName === 'math') { seenMathOpen = true; seenMathClose = true; } } continue; } // Text between tags: only in text containers (outside annotations) if (annotationDepth > 0) continue; if (!token.trim()) continue; var parent_2 = tagStack[tagStack.length - 1]; if (!MATHML_TEXT_CONTAINERS.has(parent_2)) { return { ok: false, reason: VALIDATION_REASON.TEXT_OUTSIDE_TEXT_CONTAINER }; } } if (!seenMathOpen) return { ok: false, reason: VALIDATION_REASON.NO_MATH }; if (!seenMathClose || tagStack.length) return { ok: false, reason: VALIDATION_REASON.UNCLOSED_TAGS }; return { ok: true }; }; exports.validateMathMLShallow = validateMathMLShallow; //# sourceMappingURL=validate-mathML.js.map