UNPKG

mathml2omml

Version:
1,665 lines (1,577 loc) 68 kB
// Generated using scripts/write-decode-map.ts const xmlDecodeTree = /* #__PURE__ */ new Uint16Array( // prettier-ignore /* #__PURE__ */ "\u0200aglq\t\x15\x18\x1b\u026d\x0f\0\0\x12p;\u4026os;\u4027t;\u403et;\u403cuot;\u4022" .split("") .map((c) => c.charCodeAt(0))); // Adapted from https://github.com/mathiasbynens/he/blob/36afe179392226cf1b6ccdb16ebbb7a5a844d93a/src/he.js#L106-L134 var _a; const decodeMap = new Map([ [0, 65533], // C1 Unicode control character reference replacements [128, 8364], [130, 8218], [131, 402], [132, 8222], [133, 8230], [134, 8224], [135, 8225], [136, 710], [137, 8240], [138, 352], [139, 8249], [140, 338], [142, 381], [145, 8216], [146, 8217], [147, 8220], [148, 8221], [149, 8226], [150, 8211], [151, 8212], [152, 732], [153, 8482], [154, 353], [155, 8250], [156, 339], [158, 382], [159, 376], ]); /** * Polyfill for `String.fromCodePoint`. It is used to create a string from a Unicode code point. */ const fromCodePoint = // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition, n/no-unsupported-features/es-builtins (_a = String.fromCodePoint) !== null && _a !== void 0 ? _a : function (codePoint) { let output = ""; if (codePoint > 65535) { codePoint -= 65536; output += String.fromCharCode(((codePoint >>> 10) & 1023) | 55296); codePoint = 56320 | (codePoint & 1023); } output += String.fromCharCode(codePoint); return output; }; /** * Replace the given code point with a replacement character if it is a * surrogate or is outside the valid range. Otherwise return the code * point unchanged. */ function replaceCodePoint(codePoint) { var _a; if ((codePoint >= 55296 && codePoint <= 57343) || codePoint > 1114111) { return 65533; } return (_a = decodeMap.get(codePoint)) !== null && _a !== void 0 ? _a : codePoint; } var CharCodes; (function (CharCodes) { CharCodes[CharCodes["NUM"] = 35] = "NUM"; CharCodes[CharCodes["SEMI"] = 59] = "SEMI"; CharCodes[CharCodes["EQUALS"] = 61] = "EQUALS"; CharCodes[CharCodes["ZERO"] = 48] = "ZERO"; CharCodes[CharCodes["NINE"] = 57] = "NINE"; CharCodes[CharCodes["LOWER_A"] = 97] = "LOWER_A"; CharCodes[CharCodes["LOWER_F"] = 102] = "LOWER_F"; CharCodes[CharCodes["LOWER_X"] = 120] = "LOWER_X"; CharCodes[CharCodes["LOWER_Z"] = 122] = "LOWER_Z"; CharCodes[CharCodes["UPPER_A"] = 65] = "UPPER_A"; CharCodes[CharCodes["UPPER_F"] = 70] = "UPPER_F"; CharCodes[CharCodes["UPPER_Z"] = 90] = "UPPER_Z"; })(CharCodes || (CharCodes = {})); /** Bit that needs to be set to convert an upper case ASCII character to lower case */ const TO_LOWER_BIT = 32; var BinTrieFlags; (function (BinTrieFlags) { BinTrieFlags[BinTrieFlags["VALUE_LENGTH"] = 49152] = "VALUE_LENGTH"; BinTrieFlags[BinTrieFlags["BRANCH_LENGTH"] = 16256] = "BRANCH_LENGTH"; BinTrieFlags[BinTrieFlags["JUMP_TABLE"] = 127] = "JUMP_TABLE"; })(BinTrieFlags || (BinTrieFlags = {})); function isNumber(code) { return code >= CharCodes.ZERO && code <= CharCodes.NINE; } function isHexadecimalCharacter(code) { return ((code >= CharCodes.UPPER_A && code <= CharCodes.UPPER_F) || (code >= CharCodes.LOWER_A && code <= CharCodes.LOWER_F)); } function isAsciiAlphaNumeric(code) { return ((code >= CharCodes.UPPER_A && code <= CharCodes.UPPER_Z) || (code >= CharCodes.LOWER_A && code <= CharCodes.LOWER_Z) || isNumber(code)); } /** * Checks if the given character is a valid end character for an entity in an attribute. * * Attribute values that aren't terminated properly aren't parsed, and shouldn't lead to a parser error. * See the example in https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state */ function isEntityInAttributeInvalidEnd(code) { return code === CharCodes.EQUALS || isAsciiAlphaNumeric(code); } var EntityDecoderState; (function (EntityDecoderState) { EntityDecoderState[EntityDecoderState["EntityStart"] = 0] = "EntityStart"; EntityDecoderState[EntityDecoderState["NumericStart"] = 1] = "NumericStart"; EntityDecoderState[EntityDecoderState["NumericDecimal"] = 2] = "NumericDecimal"; EntityDecoderState[EntityDecoderState["NumericHex"] = 3] = "NumericHex"; EntityDecoderState[EntityDecoderState["NamedEntity"] = 4] = "NamedEntity"; })(EntityDecoderState || (EntityDecoderState = {})); var DecodingMode; (function (DecodingMode) { /** Entities in text nodes that can end with any character. */ DecodingMode[DecodingMode["Legacy"] = 0] = "Legacy"; /** Only allow entities terminated with a semicolon. */ DecodingMode[DecodingMode["Strict"] = 1] = "Strict"; /** Entities in attributes have limitations on ending characters. */ DecodingMode[DecodingMode["Attribute"] = 2] = "Attribute"; })(DecodingMode || (DecodingMode = {})); /** * Token decoder with support of writing partial entities. */ class EntityDecoder { constructor( /** The tree used to decode entities. */ decodeTree, /** * The function that is called when a codepoint is decoded. * * For multi-byte named entities, this will be called multiple times, * with the second codepoint, and the same `consumed` value. * * @param codepoint The decoded codepoint. * @param consumed The number of bytes consumed by the decoder. */ emitCodePoint, /** An object that is used to produce errors. */ errors) { this.decodeTree = decodeTree; this.emitCodePoint = emitCodePoint; this.errors = errors; /** The current state of the decoder. */ this.state = EntityDecoderState.EntityStart; /** Characters that were consumed while parsing an entity. */ this.consumed = 1; /** * The result of the entity. * * Either the result index of a numeric entity, or the codepoint of a * numeric entity. */ this.result = 0; /** The current index in the decode tree. */ this.treeIndex = 0; /** The number of characters that were consumed in excess. */ this.excess = 1; /** The mode in which the decoder is operating. */ this.decodeMode = DecodingMode.Strict; } /** Resets the instance to make it reusable. */ startEntity(decodeMode) { this.decodeMode = decodeMode; this.state = EntityDecoderState.EntityStart; this.result = 0; this.treeIndex = 0; this.excess = 1; this.consumed = 1; } /** * Write an entity to the decoder. This can be called multiple times with partial entities. * If the entity is incomplete, the decoder will return -1. * * Mirrors the implementation of `getDecoder`, but with the ability to stop decoding if the * entity is incomplete, and resume when the next string is written. * * @param input The string containing the entity (or a continuation of the entity). * @param offset The offset at which the entity begins. Should be 0 if this is not the first call. * @returns The number of characters that were consumed, or -1 if the entity is incomplete. */ write(input, offset) { switch (this.state) { case EntityDecoderState.EntityStart: { if (input.charCodeAt(offset) === CharCodes.NUM) { this.state = EntityDecoderState.NumericStart; this.consumed += 1; return this.stateNumericStart(input, offset + 1); } this.state = EntityDecoderState.NamedEntity; return this.stateNamedEntity(input, offset); } case EntityDecoderState.NumericStart: { return this.stateNumericStart(input, offset); } case EntityDecoderState.NumericDecimal: { return this.stateNumericDecimal(input, offset); } case EntityDecoderState.NumericHex: { return this.stateNumericHex(input, offset); } case EntityDecoderState.NamedEntity: { return this.stateNamedEntity(input, offset); } } } /** * Switches between the numeric decimal and hexadecimal states. * * Equivalent to the `Numeric character reference state` in the HTML spec. * * @param input The string containing the entity (or a continuation of the entity). * @param offset The current offset. * @returns The number of characters that were consumed, or -1 if the entity is incomplete. */ stateNumericStart(input, offset) { if (offset >= input.length) { return -1; } if ((input.charCodeAt(offset) | TO_LOWER_BIT) === CharCodes.LOWER_X) { this.state = EntityDecoderState.NumericHex; this.consumed += 1; return this.stateNumericHex(input, offset + 1); } this.state = EntityDecoderState.NumericDecimal; return this.stateNumericDecimal(input, offset); } addToNumericResult(input, start, end, base) { if (start !== end) { const digitCount = end - start; this.result = this.result * Math.pow(base, digitCount) + Number.parseInt(input.substr(start, digitCount), base); this.consumed += digitCount; } } /** * Parses a hexadecimal numeric entity. * * Equivalent to the `Hexademical character reference state` in the HTML spec. * * @param input The string containing the entity (or a continuation of the entity). * @param offset The current offset. * @returns The number of characters that were consumed, or -1 if the entity is incomplete. */ stateNumericHex(input, offset) { const startIndex = offset; while (offset < input.length) { const char = input.charCodeAt(offset); if (isNumber(char) || isHexadecimalCharacter(char)) { offset += 1; } else { this.addToNumericResult(input, startIndex, offset, 16); return this.emitNumericEntity(char, 3); } } this.addToNumericResult(input, startIndex, offset, 16); return -1; } /** * Parses a decimal numeric entity. * * Equivalent to the `Decimal character reference state` in the HTML spec. * * @param input The string containing the entity (or a continuation of the entity). * @param offset The current offset. * @returns The number of characters that were consumed, or -1 if the entity is incomplete. */ stateNumericDecimal(input, offset) { const startIndex = offset; while (offset < input.length) { const char = input.charCodeAt(offset); if (isNumber(char)) { offset += 1; } else { this.addToNumericResult(input, startIndex, offset, 10); return this.emitNumericEntity(char, 2); } } this.addToNumericResult(input, startIndex, offset, 10); return -1; } /** * Validate and emit a numeric entity. * * Implements the logic from the `Hexademical character reference start * state` and `Numeric character reference end state` in the HTML spec. * * @param lastCp The last code point of the entity. Used to see if the * entity was terminated with a semicolon. * @param expectedLength The minimum number of characters that should be * consumed. Used to validate that at least one digit * was consumed. * @returns The number of characters that were consumed. */ emitNumericEntity(lastCp, expectedLength) { var _a; // Ensure we consumed at least one digit. if (this.consumed <= expectedLength) { (_a = this.errors) === null || _a === void 0 ? void 0 : _a.absenceOfDigitsInNumericCharacterReference(this.consumed); return 0; } // Figure out if this is a legit end of the entity if (lastCp === CharCodes.SEMI) { this.consumed += 1; } else if (this.decodeMode === DecodingMode.Strict) { return 0; } this.emitCodePoint(replaceCodePoint(this.result), this.consumed); if (this.errors) { if (lastCp !== CharCodes.SEMI) { this.errors.missingSemicolonAfterCharacterReference(); } this.errors.validateNumericCharacterReference(this.result); } return this.consumed; } /** * Parses a named entity. * * Equivalent to the `Named character reference state` in the HTML spec. * * @param input The string containing the entity (or a continuation of the entity). * @param offset The current offset. * @returns The number of characters that were consumed, or -1 if the entity is incomplete. */ stateNamedEntity(input, offset) { const { decodeTree } = this; let current = decodeTree[this.treeIndex]; // The mask is the number of bytes of the value, including the current byte. let valueLength = (current & BinTrieFlags.VALUE_LENGTH) >> 14; for (; offset < input.length; offset++, this.excess++) { const char = input.charCodeAt(offset); this.treeIndex = determineBranch(decodeTree, current, this.treeIndex + Math.max(1, valueLength), char); if (this.treeIndex < 0) { return this.result === 0 || // If we are parsing an attribute (this.decodeMode === DecodingMode.Attribute && // We shouldn't have consumed any characters after the entity, (valueLength === 0 || // And there should be no invalid characters. isEntityInAttributeInvalidEnd(char))) ? 0 : this.emitNotTerminatedNamedEntity(); } current = decodeTree[this.treeIndex]; valueLength = (current & BinTrieFlags.VALUE_LENGTH) >> 14; // If the branch is a value, store it and continue if (valueLength !== 0) { // If the entity is terminated by a semicolon, we are done. if (char === CharCodes.SEMI) { return this.emitNamedEntityData(this.treeIndex, valueLength, this.consumed + this.excess); } // If we encounter a non-terminated (legacy) entity while parsing strictly, then ignore it. if (this.decodeMode !== DecodingMode.Strict) { this.result = this.treeIndex; this.consumed += this.excess; this.excess = 0; } } } return -1; } /** * Emit a named entity that was not terminated with a semicolon. * * @returns The number of characters consumed. */ emitNotTerminatedNamedEntity() { var _a; const { result, decodeTree } = this; const valueLength = (decodeTree[result] & BinTrieFlags.VALUE_LENGTH) >> 14; this.emitNamedEntityData(result, valueLength, this.consumed); (_a = this.errors) === null || _a === void 0 ? void 0 : _a.missingSemicolonAfterCharacterReference(); return this.consumed; } /** * Emit a named entity. * * @param result The index of the entity in the decode tree. * @param valueLength The number of bytes in the entity. * @param consumed The number of characters consumed. * * @returns The number of characters consumed. */ emitNamedEntityData(result, valueLength, consumed) { const { decodeTree } = this; this.emitCodePoint(valueLength === 1 ? decodeTree[result] & ~BinTrieFlags.VALUE_LENGTH : decodeTree[result + 1], consumed); if (valueLength === 3) { // For multi-byte values, we need to emit the second byte. this.emitCodePoint(decodeTree[result + 2], consumed); } return consumed; } /** * Signal to the parser that the end of the input was reached. * * Remaining data will be emitted and relevant errors will be produced. * * @returns The number of characters consumed. */ end() { var _a; switch (this.state) { case EntityDecoderState.NamedEntity: { // Emit a named entity if we have one. return this.result !== 0 && (this.decodeMode !== DecodingMode.Attribute || this.result === this.treeIndex) ? this.emitNotTerminatedNamedEntity() : 0; } // Otherwise, emit a numeric entity if we have one. case EntityDecoderState.NumericDecimal: { return this.emitNumericEntity(0, 2); } case EntityDecoderState.NumericHex: { return this.emitNumericEntity(0, 3); } case EntityDecoderState.NumericStart: { (_a = this.errors) === null || _a === void 0 ? void 0 : _a.absenceOfDigitsInNumericCharacterReference(this.consumed); return 0; } case EntityDecoderState.EntityStart: { // Return 0 if we have no entity. return 0; } } } } /** * Creates a function that decodes entities in a string. * * @param decodeTree The decode tree. * @returns A function that decodes entities in a string. */ function getDecoder(decodeTree) { let returnValue = ""; const decoder = new EntityDecoder(decodeTree, (data) => (returnValue += fromCodePoint(data))); return function decodeWithTrie(input, decodeMode) { let lastIndex = 0; let offset = 0; while ((offset = input.indexOf("&", offset)) >= 0) { returnValue += input.slice(lastIndex, offset); decoder.startEntity(decodeMode); const length = decoder.write(input, // Skip the "&" offset + 1); if (length < 0) { lastIndex = offset + decoder.end(); break; } lastIndex = offset + length; // If `length` is 0, skip the current `&` and continue. offset = length === 0 ? lastIndex + 1 : lastIndex; } const result = returnValue + input.slice(lastIndex); // Make sure we don't keep a reference to the final string. returnValue = ""; return result; }; } /** * Determines the branch of the current node that is taken given the current * character. This function is used to traverse the trie. * * @param decodeTree The trie. * @param current The current node. * @param nodeIdx The index right after the current node and its value. * @param char The current character. * @returns The index of the next node, or -1 if no branch is taken. */ function determineBranch(decodeTree, current, nodeIndex, char) { const branchCount = (current & BinTrieFlags.BRANCH_LENGTH) >> 7; const jumpOffset = current & BinTrieFlags.JUMP_TABLE; // Case 1: Single branch encoded in jump offset if (branchCount === 0) { return jumpOffset !== 0 && char === jumpOffset ? nodeIndex : -1; } // Case 2: Multiple branches encoded in jump table if (jumpOffset) { const value = char - jumpOffset; return value < 0 || value >= branchCount ? -1 : decodeTree[nodeIndex + value] - 1; } // Case 3: Multiple branches encoded in dictionary // Binary search for the character. let lo = nodeIndex; let hi = lo + branchCount - 1; while (lo <= hi) { const mid = (lo + hi) >>> 1; const midValue = decodeTree[mid]; if (midValue < char) { lo = mid + 1; } else if (midValue > char) { hi = mid - 1; } else { return decodeTree[mid + branchCount]; } } return -1; } const xmlDecoder = /* #__PURE__ */ getDecoder(xmlDecodeTree); /** * Decodes an XML string, requiring all entities to be terminated by a semicolon. * * @param xmlString The string to decode. * @returns The decoded string. */ function decodeXML(xmlString) { return xmlDecoder(xmlString, DecodingMode.Strict); } const attrRE = /\s([^'"/\s><]+?)[\s/>]|([^\s=]+)=\s?(".*?"|'.*?')/g; function stringify$1(tag) { const res = { type: 'tag', name: '', voidElement: false, attribs: {}, children: [] }; const tagMatch = tag.match(/<\/?([^\s]+?)[/\s>]/); if (tagMatch) { res.name = tagMatch[1]; if (tag.charAt(tag.length - 2) === '/') { res.voidElement = true; } // handle comment tag if (res.name.startsWith('!--')) { const endIndex = tag.indexOf('-->'); return { type: 'comment', comment: endIndex !== -1 ? tag.slice(4, endIndex) : '' } } } const reg = new RegExp(attrRE); let result = null; for (;;) { result = reg.exec(tag); if (result === null) { break } if (!result[0].trim()) { continue } if (result[1]) { const attr = result[1].trim(); let arr = [attr, '']; if (attr.indexOf('=') > -1) { arr = attr.split('='); } res.attribs[arr[0]] = arr[1]; reg.lastIndex--; } else if (result[2]) { res.attribs[result[2]] = result[3].trim().substring(1, result[3].length - 1); } } return res } const tagRE = /<[a-zA-Z0-9\-!/](?:"[^"]*"|'[^']*'|[^'">])*>/g; const whitespaceRE = /^\s*$/; const textContainerNames = ['mtext', 'mi', 'mn', 'mo', 'ms']; function parse(html, options = {}) { const result = []; const arr = []; let current; let level = -1; html.replace(tagRE, (tag, index) => { const isOpen = tag.charAt(1) !== '/'; const isComment = tag.startsWith('<!--'); const start = index + tag.length; const nextChar = html.charAt(start); let parent; if (isComment) { const comment = stringify$1(tag); // if we're at root, push new base node if (level < 0) { result.push(comment); return result } parent = arr[level]; parent.children.push(comment); return result } if (isOpen) { level++; current = stringify$1(tag); if (current.type === 'tag' && options.components?.[current.name]) { current.type = 'component'; } if ( textContainerNames.includes(current.name) && !current.voidElement && nextChar && nextChar !== '<' ) { const data = html.slice(start, html.indexOf('<', start)).trim(); current.children.push({ type: 'text', data: options.disableDecode ? data : decodeXML(data) }); } // if we're at root, push new base node if (level === 0) { result.push(current); } parent = arr[level - 1]; if (parent) { parent.children.push(current); } arr[level] = current; } if (!isOpen || current.voidElement) { if (level > -1 && (current.voidElement || current.name === tag.slice(2, -1))) { level--; // move current up a level to match the end tag current = level === -1 ? result : arr[level]; } if ( level > -1 && textContainerNames.includes[arr[level].name] && nextChar !== '<' && nextChar ) { // trailing text node parent = arr[level].children; // calculate correct end of the content slice in case there's // no tag after the text node. const end = html.indexOf('<', start); let data = html.slice(start, end === -1 ? undefined : end); // if a node is nothing but whitespace, collapse it as the spec states: // https://www.w3.org/TR/html4/struct/text.html#h-9.1 if (whitespaceRE.test(data)) { data = ' '; } // don't add whitespace-only text nodes if they would be trailing text nodes // or if they would be leading whitespace-only text nodes: // * end > -1 indicates this is not a trailing text node // * leading node is when level is -1 and parent has length 0 if ((end > -1 && level + parent.length >= 0) || data !== ' ') { parent.push({ type: 'text', data: options.disableDecode ? data : decodeXML(data) }); } } } }); return result } function attrString(attribs) { const buff = []; for (const key in attribs) { buff.push(`${key}="${attribs[key]}"`); } if (!buff.length) { return '' } return ` ${buff.join(' ')}` } function stringify(buff, doc) { switch (doc.type) { case 'text': return buff + doc.data case 'tag': { const voidElement = doc.voidElement || (!doc.children.length && doc.attribs['xml:space'] !== 'preserve'); buff += `<${doc.name}${doc.attribs ? attrString(doc.attribs) : ''}${voidElement ? '/>' : '>'}`; if (voidElement) { return buff } return `${buff + doc.children.reduce(stringify, '')}</${doc.name}>` } case 'comment': buff += `<!--${doc.comment}-->`; return buff } } function stringifyDoc(doc) { return doc.reduce((token, rootEl) => token + stringify('', rootEl), '') } function math(element, targetParent, previousSibling, nextSibling, ancestors) { targetParent.name = 'm:oMath'; targetParent.attribs = { 'xmlns:m': 'http://schemas.openxmlformats.org/officeDocument/2006/math', 'xmlns:w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main' }; targetParent.type = 'tag'; targetParent.children = []; return targetParent } function semantics(element, targetParent, previousSibling, nextSibling, ancestors) { // Ignore as default behavior return targetParent } function menclose(element, targetParent, previousSibling, nextSibling, ancestors) { const type = element.attribs?.notation?.split(' ')[0] || 'longdiv'; const targetElement = { type: 'tag', name: 'm:e', attribs: {}, children: [] }; if (type === 'longdiv') { targetParent.children.push({ type: 'tag', name: 'm:rad', attribs: {}, children: [ { type: 'tag', name: 'm:radPr', attribs: {}, children: [{ type: 'tag', name: 'm:degHide', attribs: { 'm:val': 'on' }, children: [] }] }, { type: 'tag', name: 'm:deg', attribs: {}, children: [] }, targetElement ] }); } else { const hide = { t: { type: 'tag', name: 'm:hideTop', attribs: { 'm:val': 'on' }, children: [] }, b: { type: 'tag', name: 'm:hideBot', attribs: { 'm:val': 'on' }, children: [] }, l: { type: 'tag', name: 'm:hideLeft', attribs: { 'm:val': 'on' }, children: [] }, r: { type: 'tag', name: 'm:hideRight', attribs: { 'm:val': 'on' }, children: [] } }; const borderBoxPr = { type: 'tag', name: 'm:borderBoxPr', attribs: {}, children: [] }; const containerElement = { type: 'tag', name: 'm:borderBox', attribs: {}, children: [] }; switch (type) { case 'actuarial': case 'radical': case 'box': containerElement.children = [targetElement]; break case 'left': case 'roundedbox': borderBoxPr.children = [hide.t, hide.b, hide.r]; containerElement.children = [borderBoxPr, targetElement]; break case 'right': case 'circle': borderBoxPr.children = [hide.t, hide.b, hide.l]; containerElement.children = [borderBoxPr, targetElement]; break case 'top': borderBoxPr.children = [hide.b, hide.l, hide.r]; containerElement.children = [borderBoxPr, targetElement]; break case 'bottom': borderBoxPr.children = [hide.t, hide.l, hide.r]; containerElement.children = [borderBoxPr, targetElement]; break case 'updiagonalstrike': borderBoxPr.children = [ hide.t, hide.b, hide.l, hide.r, { type: 'tag', name: 'm:strikeBLTR', attribs: { 'm:val': 'on' }, children: [] } ]; containerElement.children = [borderBoxPr, targetElement]; break case 'downdiagonalstrike': borderBoxPr.children = [ hide.t, hide.b, hide.l, hide.r, { type: 'tag', name: 'm:strikeTLBR', attribs: { 'm:val': 'on' }, children: [] } ]; containerElement.children = [borderBoxPr, targetElement]; break case 'verticalstrike': borderBoxPr.children = [ hide.t, hide.b, hide.l, hide.r, { type: 'tag', name: 'm:strikeV', attribs: { 'm:val': 'on' }, children: [] } ]; containerElement.children = [borderBoxPr, targetElement]; break case 'horizontalstrike': borderBoxPr.children = [ hide.t, hide.b, hide.l, hide.r, { type: 'tag', name: 'm:strikeH', attribs: { 'm:val': 'on' }, children: [] } ]; containerElement.children = [borderBoxPr, targetElement]; break default: borderBoxPr.children = [hide.t, hide.b, hide.l, hide.r]; containerElement.children = [borderBoxPr, targetElement]; break } targetParent.children.push(containerElement); } return targetElement } function mfrac(element, targetParent, previousSibling, nextSibling, ancestors) { if (element.children.length !== 2) { // treat as mrow return targetParent } const numerator = element.children[0]; const denumerator = element.children[1]; const numeratorTarget = { name: 'm:num', type: 'tag', attribs: {}, children: [] }; const denumeratorTarget = { name: 'm:den', type: 'tag', attribs: {}, children: [] }; ancestors = [...ancestors]; ancestors.unshift(element); walker(numerator, numeratorTarget, false, false, ancestors); walker(denumerator, denumeratorTarget, false, false, ancestors); const fracType = element.attribs?.linethickness === '0' ? 'noBar' : 'bar'; targetParent.children.push({ type: 'tag', name: 'm:f', attribs: {}, children: [ { type: 'tag', name: 'm:fPr', attribs: {}, children: [ { type: 'tag', name: 'm:type', attribs: { 'm:val': fracType }, children: [] } ] }, numeratorTarget, denumeratorTarget ] }); // Don't iterate over children in the usual way. } function mglyph(element, targetParent, previousSibling, nextSibling, ancestors) { // No support in omml. Output alt text. if (element.attribs?.alt) { targetParent.children.push({ type: 'text', data: element.attribs.alt }); } } function mmultiscripts(element, targetParent, previousSibling, nextSibling, ancestors) { if (element.children.length === 0) { // Don't use return } const base = element.children[0]; const postSubs = []; const postSupers = []; const preSubs = []; const preSupers = []; const children = element.children.slice(1); let dividerFound = false; children.forEach((child, index) => { if (child.name === 'mprescripts') { dividerFound = true; } else if (child.name !== 'none') { if (index % 2) { if (dividerFound) { preSubs.push(child); } else { postSupers.push(child); } } else { if (dividerFound) { preSupers.push(child); } else { postSubs.push(child); } } } }); ancestors = [...ancestors]; ancestors.unshift(element); const tempTarget = { children: [] }; walker(base, tempTarget, false, false, ancestors); let topTarget = tempTarget.children[0]; if (postSubs.length || postSupers.length) { const subscriptTarget = { name: 'm:sub', type: 'tag', attribs: {}, children: [] }; postSubs.forEach((subscript) => walker(subscript, subscriptTarget, false, false, ancestors)); const superscriptTarget = { name: 'm:sup', type: 'tag', attribs: {}, children: [] }; postSupers.forEach((superscript) => walker(superscript, superscriptTarget, false, false, ancestors) ); const topPostTarget = { type: 'tag', attribs: {}, children: [ { type: 'tag', name: 'm:e', attribs: {}, children: [topTarget] } ] }; if (postSubs.length && postSupers.length) { topPostTarget.name = 'm:sSubSup'; topPostTarget.children.push(subscriptTarget); topPostTarget.children.push(superscriptTarget); } else if (postSubs.length) { topPostTarget.name = 'm:sSub'; topPostTarget.children.push(subscriptTarget); } else { topPostTarget.name = 'm:sSup'; topPostTarget.children.push(superscriptTarget); } topTarget = topPostTarget; } if (preSubs.length || preSupers.length) { const preSubscriptTarget = { name: 'm:sub', type: 'tag', attribs: {}, children: [] }; preSubs.forEach((subscript) => walker(subscript, preSubscriptTarget, false, false, ancestors)); const preSuperscriptTarget = { name: 'm:sup', type: 'tag', attribs: {}, children: [] }; preSupers.forEach((superscript) => walker(superscript, preSuperscriptTarget, false, false, ancestors) ); const topPreTarget = { name: 'm:sPre', type: 'tag', attribs: {}, children: [ { name: 'm:e', type: 'tag', attribs: {}, children: [topTarget] }, preSubscriptTarget, preSuperscriptTarget ] }; topTarget = topPreTarget; } targetParent.children.push(topTarget); // Don't iterate over children in the usual way. } function mrow(element, targetParent, previousSibling, nextSibling, ancestors) { if (previousSibling.isNary) { const targetSibling = targetParent.children[targetParent.children.length - 1]; return targetSibling.children[targetSibling.children.length - 1] } // Ignore as default behavior return targetParent } function mspace(element, targetParent, previousSibling, nextSibling, ancestors) { targetParent.children.push({ name: 'm:r', type: 'tag', attribs: {}, children: [ { name: 'm:t', type: 'tag', attribs: { 'xml:space': 'preserve' }, children: [ { type: 'text', data: ' ' } ] } ] }); } function msqrt(element, targetParent, previousSibling, nextSibling, ancestors) { const targetElement = { name: 'm:e', type: 'tag', attribs: {}, children: [] }; targetParent.children.push({ name: 'm:rad', type: 'tag', attribs: {}, children: [ { name: 'm:radPr', type: 'tag', attribs: {}, children: [ { name: 'm:degHide', type: 'tag', attribs: { 'm:val': 'on' }, children: [] } ] }, { name: 'm:deg', type: 'tag', attribs: {}, children: [] }, targetElement ] }); return targetElement } function mstyle(element, targetParent, previousSibling, nextSibling, ancestors) { // Ignore as default behavior return targetParent } function getTextContent(node, trim = true) { let returnString = ''; if (node.type === 'text') { let text = node.data.replace(/[\u2062]|[\u200B]/g, ''); if (trim) { text = text.trim(); } returnString += text; } else if (node.children) { node.children.forEach((subNode) => { returnString += getTextContent(subNode, trim); }); } return returnString } const NARY_REGEXP = /^[\u220f-\u2211]|[\u2229-\u2233]|[\u22c0-\u22c3]$/; const GROW_REGEXP = /^\u220f|\u2211|[\u2229-\u222b]|\u222e|\u222f|\u2232|\u2233|[\u22c0-\u22c3]$/; function getNary(node) { // Check if node contains only a nary operator. const text = getTextContent(node); if (NARY_REGEXP.test(text)) { return text } return false } function getNaryTarget(naryChar, element, type, subHide = false, supHide = false) { const stretchy = element.attribs?.stretchy; const grow = stretchy === 'true' ? '1' : stretchy === 'false' ? '0' : GROW_REGEXP.test(naryChar) ? '1' : '0'; return { type: 'tag', name: 'm:nary', attribs: {}, children: [ { type: 'tag', name: 'm:naryPr', attribs: {}, children: [ { type: 'tag', name: 'm:chr', attribs: { 'm:val': naryChar }, children: [] }, { type: 'tag', name: 'm:limLoc', attribs: { 'm:val': type }, children: [] }, { type: 'tag', name: 'm:grow', attribs: { 'm:val': grow }, children: [] }, { type: 'tag', name: 'm:subHide', attribs: { 'm:val': subHide ? 'on' : 'off' }, children: [] }, { type: 'tag', name: 'm:supHide', attribs: { 'm:val': supHide ? 'on' : 'off' }, children: [] } ] } ] } } function addScriptlevel(target, ancestors) { const scriptlevel = ancestors.find((ancestor) => ancestor.attribs?.scriptlevel)?.attribs ?.scriptlevel; if (['0', '1', '2'].includes(scriptlevel)) { target.children.unshift({ type: 'tag', name: 'm:argPr', attribs: {}, children: [ { type: 'tag', name: 'm:scrLvl', attribs: { 'm:val': scriptlevel }, children: [] } ] }); } } function msub(element, targetParent, previousSibling, nextSibling, ancestors) { // Subscript if (element.children.length !== 2) { // treat as mrow return targetParent } ancestors = [...ancestors]; ancestors.unshift(element); const base = element.children[0]; const subscript = element.children[1]; let topTarget; // // m:nAry // // Conditions: // 1. base text must be nary operator // 2. no accents const naryChar = getNary(base); if ( naryChar && element.attribs?.accent?.toLowerCase() !== 'true' && element.attribs?.accentunder?.toLowerCase() !== 'true' ) { topTarget = getNaryTarget(naryChar, element, 'subSup', false, true); element.isNary = true; } else { const baseTarget = { name: 'm:e', type: 'tag', attribs: {}, children: [] }; walker(base, baseTarget, false, false, ancestors); topTarget = { type: 'tag', name: 'm:sSub', attribs: {}, children: [ { type: 'tag', name: 'm:sSubPr', attribs: {}, children: [ { type: 'tag', name: 'm:ctrlPr', attribs: {}, children: [] } ] }, baseTarget ] }; } const subscriptTarget = { name: 'm:sub', type: 'tag', attribs: {}, children: [] }; walker(subscript, subscriptTarget, false, false, ancestors); topTarget.children.push(subscriptTarget); if (element.isNary) { topTarget.children.push({ type: 'tag', name: 'm:sup', attribs: {}, children: [] }); topTarget.children.push({ type: 'tag', name: 'm:e', attribs: {}, children: [] }); } targetParent.children.push(topTarget); // Don't iterate over children in the usual way. } function msubsup(element, targetParent, previousSibling, nextSibling, ancestors) { // Sub + superscript if (element.children.length !== 3) { // treat as mrow return targetParent } ancestors = [...ancestors]; ancestors.unshift(element); const base = element.children[0]; const subscript = element.children[1]; const superscript = element.children[2]; let topTarget; // // m:nAry // // Conditions: // 1. base text must be nary operator // 2. no accents const naryChar = getNary(base); if ( naryChar && element.attribs?.accent?.toLowerCase() !== 'true' && element.attribs?.accentunder?.toLowerCase() !== 'true' ) { topTarget = getNaryTarget(naryChar, element, 'subSup'); element.isNary = true; } else { // fallback: m:sSubSup const baseTarget = { name: 'm:e', type: 'tag', attribs: {}, children: [] }; walker(base, baseTarget, false, false, ancestors); topTarget = { type: 'tag', name: 'm:sSubSup', attribs: {}, children: [ { type: 'tag', name: 'm:sSubSupPr', attribs: {}, children: [ { type: 'tag', name: 'm:ctrlPr', attribs: {}, children: [] } ] }, baseTarget ] }; } const subscriptTarget = { name: 'm:sub', type: 'tag', attribs: {}, children: [] }; const superscriptTarget = { name: 'm:sup', type: 'tag', attribs: {}, children: [] }; walker(subscript, subscriptTarget, false, false, ancestors); walker(superscript, superscriptTarget, false, false, ancestors); topTarget.children.push(subscriptTarget); topTarget.children.push(superscriptTarget); if (element.isNary) { topTarget.children.push({ type: 'tag', name: 'm:e', attribs: {}, children: [] }); } targetParent.children.push(topTarget); // Don't iterate over children in the usual way. } function msup(element, targetParent, previousSibling, nextSibling, ancestors) { // Superscript if (element.children.length !== 2) { // treat as mrow return targetParent } ancestors = [...ancestors]; ancestors.unshift(element); const base = element.children[0]; const superscript = element.children[1]; let topTarget; // // m:nAry // // Conditions: // 1. base text must be nary operator // 2. no accents const naryChar = getNary(base); if ( naryChar && element.attribs?.accent?.toLowerCase() !== 'true' && element.attribs?.accentunder?.toLowerCase() !== 'true' ) { topTarget = getNaryTarget(naryChar, element, 'subSup', true); element.isNary = true; topTarget.children.push({ type: 'tag', name: 'm:sub' }); } else { const baseTarget = { name: 'm:e', type: 'tag', attribs: {}, children: [] }; walker(base, baseTarget, false, false, ancestors); topTarget = { type: 'tag', name: 'm:sSup', attribs: {}, children: [ { type: 'tag', name: 'm:sSupPr', attribs: {}, children: [ { type: 'tag', name: 'm:ctrlPr', attribs: {}, children: [] } ] }, baseTarget ] }; } const superscriptTarget = { name: 'm:sup', type: 'tag', attribs: {}, children: [] }; walker(superscript, superscriptTarget, false, false, ancestors); topTarget.children.push(superscriptTarget); if (element.isNary) { topTarget.children.push({ type: 'tag', name: 'm:e', attribs: {}, children: [] }); } targetParent.children.push(topTarget); // Don't iterate over children in the usual way. } function mtable(element, targetParent, previousSibling, nextSibling, ancestors) { const cellsPerRowCount = Math.max(...element.children.map((row) => row.children.length)); const targetElement = { name: 'm:m', type: 'tag', attribs: {}, children: [ { name: 'm:mPr', type: 'tag', attribs: {}, children: [ { name: 'm:baseJc', type: 'tag', attribs: { 'm:val': 'center' }, children: [] }, { name: 'm:plcHide', type: 'tag', attribs: { 'm:val': 'on' }, children: [] }, { name: 'm:mcs', type: 'tag', attribs: {}, children: [ { name: 'm:mc', type: 'tag', attribs: {}, children: [ { name: 'm:mcPr', type: 'tag', attribs: {}, children: [ { name: 'm:count', type: 'tag', attribs: { 'm:val': cellsPerRowCount.toString() }, children: [] }, { name: 'm:mcJc', type: 'tag', attribs: { 'm:val': 'center' }, children: [] } ] } ] } ] } ] } ] }; targetParent.children.push(targetElement); return targetElement } function mtd(element, targetParent, previousSibling, nextSibling, ancestors) { // table cell const targetElement = { name: 'm:e', type: 'tag', attribs: {}, children: [] }; targetParent.children.push(targetElement); return targetElement } function mtr(element, targetParent, previousSibling, nextSibling, ancestors) { // table row const targetElement = { name: 'm:mr', type: 'tag', attribs: {}, children: [] }; targetParent.children.push(targetElement); return targetElement } function munderover(element, targetParent, previousSibling, nextSibling, ancestors) { // Munderover if (element.children.length !== 3) { // treat as mrow return targetParent } ancestors = [...ancestors]; ancestors.unshift(element); const base = element.children[0]; const underscript = element.children[1]; const overscript = element.children[2]; // // m:nAry // // Conditions: // 1. base text must be nary operator // 2. no accents const naryChar = getNary(base); if ( naryChar && element.attributes?.accent?.toLowerCase() !== 'true' && element.attributes?.accentunder?.toLowerCase() !== 'true' ) { const topTarget = getNaryTarget(naryChar, element, 'undOvr'); element.isNary = true; const subscriptTarget = { name: 'm:sub', type: 'tag', attribs: {}, children: [] }; const superscriptTarget = { name: 'm:sup', type: 'tag', attribs: {}, children: [] }; walker(underscript, subscriptTarget, false, false, ancestors); walker(overscript, superscriptTarget, false, false, ancestors); topTarget.children.push(subscriptTarget); topTarget.children.push(superscriptTarget); topTarget.children.push({ type: 'tag', name: 'm:e', attribs: {}, children: [] }); targetParent.children.push(topTarget); return } // Fallback: m:limUpp()m:limlow const baseTarget = { name: 'm:e', type: 'tag', attribs: {}, children: [] }; walker(base, baseTarget, false, false, ancestors); const underscriptTarget = { name: 'm:lim', type: 'tag', attribs: {}, children: [] }; const overscriptTarget = { name: 'm:lim', type: 'tag', attribs: {}, children: [] }; walker(underscript, underscriptTarget, false, false, ancestors); walker(overscript, overscriptTarget, false, false, ancestors); targetParent.children.push({ type: 'tag', name: 'm:limUpp', attribs: {}, children: [ { type: 'tag', name: 'm:e', attribs: {}, children: [ { type: 'tag', name: 'm:limLow', attribs: {}, children: [baseTarget, underscriptTarget] } ] }, overscriptTarget ] }); // Don't iterate over children in the usual way. } function getStyle(element, ancestors, previousStyle = {}) { const elAttributes = element.attribs || {}; const color = elAttributes.mathcolor || ancestors.find( (element) => element.name === 'mstyle' && element.attribs && element.attribs.color )?.attribs.color || ''; // const minsize = parseFloat(elAttributes.scriptminsize || ancestors.find(element => element.name === 'mstyle' && element.attribs && element.attribs.scriptminsize)?.attribs.scriptminsize || '8pt') // const sizemultiplier = parseFloat(elAttributes.scriptsizemultiplier || ancestors.find(element => element.name === 'mstyle' && element.attribs && element.attribs.scriptsizemultiplier)?.attribs.scriptsizemultiplier || '0.71') const size = elAttributes.mathsize || ancestors.find( (element) => element.name === 'mstyle' && element.attribs && element.attribs.mathsize )?.attribs.mathsize || ''; const scriptlevel = elAttributes.scriptlevel || ancestors.find( (element) => element.name === 'mstyle' && element.attribs && element.attribs.scriptlevel )?.attribs.scriptlevel || ''; const background = elAttributes.mathbackground || ancestors.find( (element) => elemen