mathml2omml
Version:
a MathML to OMML converter
1,665 lines (1,577 loc) • 68 kB
JavaScript
// Generated using scripts/write-decode-map.ts
const xmlDecodeTree = /* #__PURE__ */ new Uint16Array(
// prettier-ignore
/* #__PURE__ */ "\u0200aglq\t\x15\x18\x1b\u026d\x0f\0\0\x12p;\u4026os;\u4027t;\u403et;\u403cuot;\u4022"
.split("")
.map((c) => c.charCodeAt(0)));
// Adapted from https://github.com/mathiasbynens/he/blob/36afe179392226cf1b6ccdb16ebbb7a5a844d93a/src/he.js#L106-L134
var _a;
const decodeMap = new Map([
[0, 65533],
// C1 Unicode control character reference replacements
[128, 8364],
[130, 8218],
[131, 402],
[132, 8222],
[133, 8230],
[134, 8224],
[135, 8225],
[136, 710],
[137, 8240],
[138, 352],
[139, 8249],
[140, 338],
[142, 381],
[145, 8216],
[146, 8217],
[147, 8220],
[148, 8221],
[149, 8226],
[150, 8211],
[151, 8212],
[152, 732],
[153, 8482],
[154, 353],
[155, 8250],
[156, 339],
[158, 382],
[159, 376],
]);
/**
* Polyfill for `String.fromCodePoint`. It is used to create a string from a Unicode code point.
*/
const fromCodePoint =
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition, n/no-unsupported-features/es-builtins
(_a = String.fromCodePoint) !== null && _a !== void 0 ? _a : function (codePoint) {
let output = "";
if (codePoint > 65535) {
codePoint -= 65536;
output += String.fromCharCode(((codePoint >>> 10) & 1023) | 55296);
codePoint = 56320 | (codePoint & 1023);
}
output += String.fromCharCode(codePoint);
return output;
};
/**
* Replace the given code point with a replacement character if it is a
* surrogate or is outside the valid range. Otherwise return the code
* point unchanged.
*/
function replaceCodePoint(codePoint) {
var _a;
if ((codePoint >= 55296 && codePoint <= 57343) ||
codePoint > 1114111) {
return 65533;
}
return (_a = decodeMap.get(codePoint)) !== null && _a !== void 0 ? _a : codePoint;
}
var CharCodes;
(function (CharCodes) {
CharCodes[CharCodes["NUM"] = 35] = "NUM";
CharCodes[CharCodes["SEMI"] = 59] = "SEMI";
CharCodes[CharCodes["EQUALS"] = 61] = "EQUALS";
CharCodes[CharCodes["ZERO"] = 48] = "ZERO";
CharCodes[CharCodes["NINE"] = 57] = "NINE";
CharCodes[CharCodes["LOWER_A"] = 97] = "LOWER_A";
CharCodes[CharCodes["LOWER_F"] = 102] = "LOWER_F";
CharCodes[CharCodes["LOWER_X"] = 120] = "LOWER_X";
CharCodes[CharCodes["LOWER_Z"] = 122] = "LOWER_Z";
CharCodes[CharCodes["UPPER_A"] = 65] = "UPPER_A";
CharCodes[CharCodes["UPPER_F"] = 70] = "UPPER_F";
CharCodes[CharCodes["UPPER_Z"] = 90] = "UPPER_Z";
})(CharCodes || (CharCodes = {}));
/** Bit that needs to be set to convert an upper case ASCII character to lower case */
const TO_LOWER_BIT = 32;
var BinTrieFlags;
(function (BinTrieFlags) {
BinTrieFlags[BinTrieFlags["VALUE_LENGTH"] = 49152] = "VALUE_LENGTH";
BinTrieFlags[BinTrieFlags["BRANCH_LENGTH"] = 16256] = "BRANCH_LENGTH";
BinTrieFlags[BinTrieFlags["JUMP_TABLE"] = 127] = "JUMP_TABLE";
})(BinTrieFlags || (BinTrieFlags = {}));
function isNumber(code) {
return code >= CharCodes.ZERO && code <= CharCodes.NINE;
}
function isHexadecimalCharacter(code) {
return ((code >= CharCodes.UPPER_A && code <= CharCodes.UPPER_F) ||
(code >= CharCodes.LOWER_A && code <= CharCodes.LOWER_F));
}
function isAsciiAlphaNumeric(code) {
return ((code >= CharCodes.UPPER_A && code <= CharCodes.UPPER_Z) ||
(code >= CharCodes.LOWER_A && code <= CharCodes.LOWER_Z) ||
isNumber(code));
}
/**
* Checks if the given character is a valid end character for an entity in an attribute.
*
* Attribute values that aren't terminated properly aren't parsed, and shouldn't lead to a parser error.
* See the example in https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state
*/
function isEntityInAttributeInvalidEnd(code) {
return code === CharCodes.EQUALS || isAsciiAlphaNumeric(code);
}
var EntityDecoderState;
(function (EntityDecoderState) {
EntityDecoderState[EntityDecoderState["EntityStart"] = 0] = "EntityStart";
EntityDecoderState[EntityDecoderState["NumericStart"] = 1] = "NumericStart";
EntityDecoderState[EntityDecoderState["NumericDecimal"] = 2] = "NumericDecimal";
EntityDecoderState[EntityDecoderState["NumericHex"] = 3] = "NumericHex";
EntityDecoderState[EntityDecoderState["NamedEntity"] = 4] = "NamedEntity";
})(EntityDecoderState || (EntityDecoderState = {}));
var DecodingMode;
(function (DecodingMode) {
/** Entities in text nodes that can end with any character. */
DecodingMode[DecodingMode["Legacy"] = 0] = "Legacy";
/** Only allow entities terminated with a semicolon. */
DecodingMode[DecodingMode["Strict"] = 1] = "Strict";
/** Entities in attributes have limitations on ending characters. */
DecodingMode[DecodingMode["Attribute"] = 2] = "Attribute";
})(DecodingMode || (DecodingMode = {}));
/**
* Token decoder with support of writing partial entities.
*/
class EntityDecoder {
constructor(
/** The tree used to decode entities. */
decodeTree,
/**
* The function that is called when a codepoint is decoded.
*
* For multi-byte named entities, this will be called multiple times,
* with the second codepoint, and the same `consumed` value.
*
* @param codepoint The decoded codepoint.
* @param consumed The number of bytes consumed by the decoder.
*/
emitCodePoint,
/** An object that is used to produce errors. */
errors) {
this.decodeTree = decodeTree;
this.emitCodePoint = emitCodePoint;
this.errors = errors;
/** The current state of the decoder. */
this.state = EntityDecoderState.EntityStart;
/** Characters that were consumed while parsing an entity. */
this.consumed = 1;
/**
* The result of the entity.
*
* Either the result index of a numeric entity, or the codepoint of a
* numeric entity.
*/
this.result = 0;
/** The current index in the decode tree. */
this.treeIndex = 0;
/** The number of characters that were consumed in excess. */
this.excess = 1;
/** The mode in which the decoder is operating. */
this.decodeMode = DecodingMode.Strict;
}
/** Resets the instance to make it reusable. */
startEntity(decodeMode) {
this.decodeMode = decodeMode;
this.state = EntityDecoderState.EntityStart;
this.result = 0;
this.treeIndex = 0;
this.excess = 1;
this.consumed = 1;
}
/**
* Write an entity to the decoder. This can be called multiple times with partial entities.
* If the entity is incomplete, the decoder will return -1.
*
* Mirrors the implementation of `getDecoder`, but with the ability to stop decoding if the
* entity is incomplete, and resume when the next string is written.
*
* @param input The string containing the entity (or a continuation of the entity).
* @param offset The offset at which the entity begins. Should be 0 if this is not the first call.
* @returns The number of characters that were consumed, or -1 if the entity is incomplete.
*/
write(input, offset) {
switch (this.state) {
case EntityDecoderState.EntityStart: {
if (input.charCodeAt(offset) === CharCodes.NUM) {
this.state = EntityDecoderState.NumericStart;
this.consumed += 1;
return this.stateNumericStart(input, offset + 1);
}
this.state = EntityDecoderState.NamedEntity;
return this.stateNamedEntity(input, offset);
}
case EntityDecoderState.NumericStart: {
return this.stateNumericStart(input, offset);
}
case EntityDecoderState.NumericDecimal: {
return this.stateNumericDecimal(input, offset);
}
case EntityDecoderState.NumericHex: {
return this.stateNumericHex(input, offset);
}
case EntityDecoderState.NamedEntity: {
return this.stateNamedEntity(input, offset);
}
}
}
/**
* Switches between the numeric decimal and hexadecimal states.
*
* Equivalent to the `Numeric character reference state` in the HTML spec.
*
* @param input The string containing the entity (or a continuation of the entity).
* @param offset The current offset.
* @returns The number of characters that were consumed, or -1 if the entity is incomplete.
*/
stateNumericStart(input, offset) {
if (offset >= input.length) {
return -1;
}
if ((input.charCodeAt(offset) | TO_LOWER_BIT) === CharCodes.LOWER_X) {
this.state = EntityDecoderState.NumericHex;
this.consumed += 1;
return this.stateNumericHex(input, offset + 1);
}
this.state = EntityDecoderState.NumericDecimal;
return this.stateNumericDecimal(input, offset);
}
addToNumericResult(input, start, end, base) {
if (start !== end) {
const digitCount = end - start;
this.result =
this.result * Math.pow(base, digitCount) +
Number.parseInt(input.substr(start, digitCount), base);
this.consumed += digitCount;
}
}
/**
* Parses a hexadecimal numeric entity.
*
* Equivalent to the `Hexademical character reference state` in the HTML spec.
*
* @param input The string containing the entity (or a continuation of the entity).
* @param offset The current offset.
* @returns The number of characters that were consumed, or -1 if the entity is incomplete.
*/
stateNumericHex(input, offset) {
const startIndex = offset;
while (offset < input.length) {
const char = input.charCodeAt(offset);
if (isNumber(char) || isHexadecimalCharacter(char)) {
offset += 1;
}
else {
this.addToNumericResult(input, startIndex, offset, 16);
return this.emitNumericEntity(char, 3);
}
}
this.addToNumericResult(input, startIndex, offset, 16);
return -1;
}
/**
* Parses a decimal numeric entity.
*
* Equivalent to the `Decimal character reference state` in the HTML spec.
*
* @param input The string containing the entity (or a continuation of the entity).
* @param offset The current offset.
* @returns The number of characters that were consumed, or -1 if the entity is incomplete.
*/
stateNumericDecimal(input, offset) {
const startIndex = offset;
while (offset < input.length) {
const char = input.charCodeAt(offset);
if (isNumber(char)) {
offset += 1;
}
else {
this.addToNumericResult(input, startIndex, offset, 10);
return this.emitNumericEntity(char, 2);
}
}
this.addToNumericResult(input, startIndex, offset, 10);
return -1;
}
/**
* Validate and emit a numeric entity.
*
* Implements the logic from the `Hexademical character reference start
* state` and `Numeric character reference end state` in the HTML spec.
*
* @param lastCp The last code point of the entity. Used to see if the
* entity was terminated with a semicolon.
* @param expectedLength The minimum number of characters that should be
* consumed. Used to validate that at least one digit
* was consumed.
* @returns The number of characters that were consumed.
*/
emitNumericEntity(lastCp, expectedLength) {
var _a;
// Ensure we consumed at least one digit.
if (this.consumed <= expectedLength) {
(_a = this.errors) === null || _a === void 0 ? void 0 : _a.absenceOfDigitsInNumericCharacterReference(this.consumed);
return 0;
}
// Figure out if this is a legit end of the entity
if (lastCp === CharCodes.SEMI) {
this.consumed += 1;
}
else if (this.decodeMode === DecodingMode.Strict) {
return 0;
}
this.emitCodePoint(replaceCodePoint(this.result), this.consumed);
if (this.errors) {
if (lastCp !== CharCodes.SEMI) {
this.errors.missingSemicolonAfterCharacterReference();
}
this.errors.validateNumericCharacterReference(this.result);
}
return this.consumed;
}
/**
* Parses a named entity.
*
* Equivalent to the `Named character reference state` in the HTML spec.
*
* @param input The string containing the entity (or a continuation of the entity).
* @param offset The current offset.
* @returns The number of characters that were consumed, or -1 if the entity is incomplete.
*/
stateNamedEntity(input, offset) {
const { decodeTree } = this;
let current = decodeTree[this.treeIndex];
// The mask is the number of bytes of the value, including the current byte.
let valueLength = (current & BinTrieFlags.VALUE_LENGTH) >> 14;
for (; offset < input.length; offset++, this.excess++) {
const char = input.charCodeAt(offset);
this.treeIndex = determineBranch(decodeTree, current, this.treeIndex + Math.max(1, valueLength), char);
if (this.treeIndex < 0) {
return this.result === 0 ||
// If we are parsing an attribute
(this.decodeMode === DecodingMode.Attribute &&
// We shouldn't have consumed any characters after the entity,
(valueLength === 0 ||
// And there should be no invalid characters.
isEntityInAttributeInvalidEnd(char)))
? 0
: this.emitNotTerminatedNamedEntity();
}
current = decodeTree[this.treeIndex];
valueLength = (current & BinTrieFlags.VALUE_LENGTH) >> 14;
// If the branch is a value, store it and continue
if (valueLength !== 0) {
// If the entity is terminated by a semicolon, we are done.
if (char === CharCodes.SEMI) {
return this.emitNamedEntityData(this.treeIndex, valueLength, this.consumed + this.excess);
}
// If we encounter a non-terminated (legacy) entity while parsing strictly, then ignore it.
if (this.decodeMode !== DecodingMode.Strict) {
this.result = this.treeIndex;
this.consumed += this.excess;
this.excess = 0;
}
}
}
return -1;
}
/**
* Emit a named entity that was not terminated with a semicolon.
*
* @returns The number of characters consumed.
*/
emitNotTerminatedNamedEntity() {
var _a;
const { result, decodeTree } = this;
const valueLength = (decodeTree[result] & BinTrieFlags.VALUE_LENGTH) >> 14;
this.emitNamedEntityData(result, valueLength, this.consumed);
(_a = this.errors) === null || _a === void 0 ? void 0 : _a.missingSemicolonAfterCharacterReference();
return this.consumed;
}
/**
* Emit a named entity.
*
* @param result The index of the entity in the decode tree.
* @param valueLength The number of bytes in the entity.
* @param consumed The number of characters consumed.
*
* @returns The number of characters consumed.
*/
emitNamedEntityData(result, valueLength, consumed) {
const { decodeTree } = this;
this.emitCodePoint(valueLength === 1
? decodeTree[result] & ~BinTrieFlags.VALUE_LENGTH
: decodeTree[result + 1], consumed);
if (valueLength === 3) {
// For multi-byte values, we need to emit the second byte.
this.emitCodePoint(decodeTree[result + 2], consumed);
}
return consumed;
}
/**
* Signal to the parser that the end of the input was reached.
*
* Remaining data will be emitted and relevant errors will be produced.
*
* @returns The number of characters consumed.
*/
end() {
var _a;
switch (this.state) {
case EntityDecoderState.NamedEntity: {
// Emit a named entity if we have one.
return this.result !== 0 &&
(this.decodeMode !== DecodingMode.Attribute ||
this.result === this.treeIndex)
? this.emitNotTerminatedNamedEntity()
: 0;
}
// Otherwise, emit a numeric entity if we have one.
case EntityDecoderState.NumericDecimal: {
return this.emitNumericEntity(0, 2);
}
case EntityDecoderState.NumericHex: {
return this.emitNumericEntity(0, 3);
}
case EntityDecoderState.NumericStart: {
(_a = this.errors) === null || _a === void 0 ? void 0 : _a.absenceOfDigitsInNumericCharacterReference(this.consumed);
return 0;
}
case EntityDecoderState.EntityStart: {
// Return 0 if we have no entity.
return 0;
}
}
}
}
/**
* Creates a function that decodes entities in a string.
*
* @param decodeTree The decode tree.
* @returns A function that decodes entities in a string.
*/
function getDecoder(decodeTree) {
let returnValue = "";
const decoder = new EntityDecoder(decodeTree, (data) => (returnValue += fromCodePoint(data)));
return function decodeWithTrie(input, decodeMode) {
let lastIndex = 0;
let offset = 0;
while ((offset = input.indexOf("&", offset)) >= 0) {
returnValue += input.slice(lastIndex, offset);
decoder.startEntity(decodeMode);
const length = decoder.write(input,
// Skip the "&"
offset + 1);
if (length < 0) {
lastIndex = offset + decoder.end();
break;
}
lastIndex = offset + length;
// If `length` is 0, skip the current `&` and continue.
offset = length === 0 ? lastIndex + 1 : lastIndex;
}
const result = returnValue + input.slice(lastIndex);
// Make sure we don't keep a reference to the final string.
returnValue = "";
return result;
};
}
/**
* Determines the branch of the current node that is taken given the current
* character. This function is used to traverse the trie.
*
* @param decodeTree The trie.
* @param current The current node.
* @param nodeIdx The index right after the current node and its value.
* @param char The current character.
* @returns The index of the next node, or -1 if no branch is taken.
*/
function determineBranch(decodeTree, current, nodeIndex, char) {
const branchCount = (current & BinTrieFlags.BRANCH_LENGTH) >> 7;
const jumpOffset = current & BinTrieFlags.JUMP_TABLE;
// Case 1: Single branch encoded in jump offset
if (branchCount === 0) {
return jumpOffset !== 0 && char === jumpOffset ? nodeIndex : -1;
}
// Case 2: Multiple branches encoded in jump table
if (jumpOffset) {
const value = char - jumpOffset;
return value < 0 || value >= branchCount
? -1
: decodeTree[nodeIndex + value] - 1;
}
// Case 3: Multiple branches encoded in dictionary
// Binary search for the character.
let lo = nodeIndex;
let hi = lo + branchCount - 1;
while (lo <= hi) {
const mid = (lo + hi) >>> 1;
const midValue = decodeTree[mid];
if (midValue < char) {
lo = mid + 1;
}
else if (midValue > char) {
hi = mid - 1;
}
else {
return decodeTree[mid + branchCount];
}
}
return -1;
}
const xmlDecoder = /* #__PURE__ */ getDecoder(xmlDecodeTree);
/**
* Decodes an XML string, requiring all entities to be terminated by a semicolon.
*
* @param xmlString The string to decode.
* @returns The decoded string.
*/
function decodeXML(xmlString) {
return xmlDecoder(xmlString, DecodingMode.Strict);
}
const attrRE = /\s([^'"/\s><]+?)[\s/>]|([^\s=]+)=\s?(".*?"|'.*?')/g;
function stringify$1(tag) {
const res = {
type: 'tag',
name: '',
voidElement: false,
attribs: {},
children: []
};
const tagMatch = tag.match(/<\/?([^\s]+?)[/\s>]/);
if (tagMatch) {
res.name = tagMatch[1];
if (tag.charAt(tag.length - 2) === '/') {
res.voidElement = true;
}
// handle comment tag
if (res.name.startsWith('!--')) {
const endIndex = tag.indexOf('-->');
return {
type: 'comment',
comment: endIndex !== -1 ? tag.slice(4, endIndex) : ''
}
}
}
const reg = new RegExp(attrRE);
let result = null;
for (;;) {
result = reg.exec(tag);
if (result === null) {
break
}
if (!result[0].trim()) {
continue
}
if (result[1]) {
const attr = result[1].trim();
let arr = [attr, ''];
if (attr.indexOf('=') > -1) {
arr = attr.split('=');
}
res.attribs[arr[0]] = arr[1];
reg.lastIndex--;
} else if (result[2]) {
res.attribs[result[2]] = result[3].trim().substring(1, result[3].length - 1);
}
}
return res
}
const tagRE = /<[a-zA-Z0-9\-!/](?:"[^"]*"|'[^']*'|[^'">])*>/g;
const whitespaceRE = /^\s*$/;
const textContainerNames = ['mtext', 'mi', 'mn', 'mo', 'ms'];
function parse(html, options = {}) {
const result = [];
const arr = [];
let current;
let level = -1;
html.replace(tagRE, (tag, index) => {
const isOpen = tag.charAt(1) !== '/';
const isComment = tag.startsWith('<!--');
const start = index + tag.length;
const nextChar = html.charAt(start);
let parent;
if (isComment) {
const comment = stringify$1(tag);
// if we're at root, push new base node
if (level < 0) {
result.push(comment);
return result
}
parent = arr[level];
parent.children.push(comment);
return result
}
if (isOpen) {
level++;
current = stringify$1(tag);
if (current.type === 'tag' && options.components?.[current.name]) {
current.type = 'component';
}
if (
textContainerNames.includes(current.name) &&
!current.voidElement &&
nextChar &&
nextChar !== '<'
) {
const data = html.slice(start, html.indexOf('<', start)).trim();
current.children.push({
type: 'text',
data: options.disableDecode ? data : decodeXML(data)
});
}
// if we're at root, push new base node
if (level === 0) {
result.push(current);
}
parent = arr[level - 1];
if (parent) {
parent.children.push(current);
}
arr[level] = current;
}
if (!isOpen || current.voidElement) {
if (level > -1 && (current.voidElement || current.name === tag.slice(2, -1))) {
level--;
// move current up a level to match the end tag
current = level === -1 ? result : arr[level];
}
if (
level > -1 &&
textContainerNames.includes[arr[level].name] &&
nextChar !== '<' &&
nextChar
) {
// trailing text node
parent = arr[level].children;
// calculate correct end of the content slice in case there's
// no tag after the text node.
const end = html.indexOf('<', start);
let data = html.slice(start, end === -1 ? undefined : end);
// if a node is nothing but whitespace, collapse it as the spec states:
// https://www.w3.org/TR/html4/struct/text.html#h-9.1
if (whitespaceRE.test(data)) {
data = ' ';
}
// don't add whitespace-only text nodes if they would be trailing text nodes
// or if they would be leading whitespace-only text nodes:
// * end > -1 indicates this is not a trailing text node
// * leading node is when level is -1 and parent has length 0
if ((end > -1 && level + parent.length >= 0) || data !== ' ') {
parent.push({
type: 'text',
data: options.disableDecode ? data : decodeXML(data)
});
}
}
}
});
return result
}
function attrString(attribs) {
const buff = [];
for (const key in attribs) {
buff.push(`${key}="${attribs[key]}"`);
}
if (!buff.length) {
return ''
}
return ` ${buff.join(' ')}`
}
function stringify(buff, doc) {
switch (doc.type) {
case 'text':
return buff + doc.data
case 'tag': {
const voidElement =
doc.voidElement || (!doc.children.length && doc.attribs['xml:space'] !== 'preserve');
buff += `<${doc.name}${doc.attribs ? attrString(doc.attribs) : ''}${voidElement ? '/>' : '>'}`;
if (voidElement) {
return buff
}
return `${buff + doc.children.reduce(stringify, '')}</${doc.name}>`
}
case 'comment':
buff += `<!--${doc.comment}-->`;
return buff
}
}
function stringifyDoc(doc) {
return doc.reduce((token, rootEl) => token + stringify('', rootEl), '')
}
function math(element, targetParent, previousSibling, nextSibling, ancestors) {
targetParent.name = 'm:oMath';
targetParent.attribs = {
'xmlns:m': 'http://schemas.openxmlformats.org/officeDocument/2006/math',
'xmlns:w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'
};
targetParent.type = 'tag';
targetParent.children = [];
return targetParent
}
function semantics(element, targetParent, previousSibling, nextSibling, ancestors) {
// Ignore as default behavior
return targetParent
}
function menclose(element, targetParent, previousSibling, nextSibling, ancestors) {
const type = element.attribs?.notation?.split(' ')[0] || 'longdiv';
const targetElement = {
type: 'tag',
name: 'm:e',
attribs: {},
children: []
};
if (type === 'longdiv') {
targetParent.children.push({
type: 'tag',
name: 'm:rad',
attribs: {},
children: [
{
type: 'tag',
name: 'm:radPr',
attribs: {},
children: [{ type: 'tag', name: 'm:degHide', attribs: { 'm:val': 'on' }, children: [] }]
},
{ type: 'tag', name: 'm:deg', attribs: {}, children: [] },
targetElement
]
});
} else {
const hide = {
t: { type: 'tag', name: 'm:hideTop', attribs: { 'm:val': 'on' }, children: [] },
b: { type: 'tag', name: 'm:hideBot', attribs: { 'm:val': 'on' }, children: [] },
l: { type: 'tag', name: 'm:hideLeft', attribs: { 'm:val': 'on' }, children: [] },
r: { type: 'tag', name: 'm:hideRight', attribs: { 'm:val': 'on' }, children: [] }
};
const borderBoxPr = { type: 'tag', name: 'm:borderBoxPr', attribs: {}, children: [] };
const containerElement = {
type: 'tag',
name: 'm:borderBox',
attribs: {},
children: []
};
switch (type) {
case 'actuarial':
case 'radical':
case 'box':
containerElement.children = [targetElement];
break
case 'left':
case 'roundedbox':
borderBoxPr.children = [hide.t, hide.b, hide.r];
containerElement.children = [borderBoxPr, targetElement];
break
case 'right':
case 'circle':
borderBoxPr.children = [hide.t, hide.b, hide.l];
containerElement.children = [borderBoxPr, targetElement];
break
case 'top':
borderBoxPr.children = [hide.b, hide.l, hide.r];
containerElement.children = [borderBoxPr, targetElement];
break
case 'bottom':
borderBoxPr.children = [hide.t, hide.l, hide.r];
containerElement.children = [borderBoxPr, targetElement];
break
case 'updiagonalstrike':
borderBoxPr.children = [
hide.t,
hide.b,
hide.l,
hide.r,
{ type: 'tag', name: 'm:strikeBLTR', attribs: { 'm:val': 'on' }, children: [] }
];
containerElement.children = [borderBoxPr, targetElement];
break
case 'downdiagonalstrike':
borderBoxPr.children = [
hide.t,
hide.b,
hide.l,
hide.r,
{ type: 'tag', name: 'm:strikeTLBR', attribs: { 'm:val': 'on' }, children: [] }
];
containerElement.children = [borderBoxPr, targetElement];
break
case 'verticalstrike':
borderBoxPr.children = [
hide.t,
hide.b,
hide.l,
hide.r,
{ type: 'tag', name: 'm:strikeV', attribs: { 'm:val': 'on' }, children: [] }
];
containerElement.children = [borderBoxPr, targetElement];
break
case 'horizontalstrike':
borderBoxPr.children = [
hide.t,
hide.b,
hide.l,
hide.r,
{ type: 'tag', name: 'm:strikeH', attribs: { 'm:val': 'on' }, children: [] }
];
containerElement.children = [borderBoxPr, targetElement];
break
default:
borderBoxPr.children = [hide.t, hide.b, hide.l, hide.r];
containerElement.children = [borderBoxPr, targetElement];
break
}
targetParent.children.push(containerElement);
}
return targetElement
}
function mfrac(element, targetParent, previousSibling, nextSibling, ancestors) {
if (element.children.length !== 2) {
// treat as mrow
return targetParent
}
const numerator = element.children[0];
const denumerator = element.children[1];
const numeratorTarget = {
name: 'm:num',
type: 'tag',
attribs: {},
children: []
};
const denumeratorTarget = {
name: 'm:den',
type: 'tag',
attribs: {},
children: []
};
ancestors = [...ancestors];
ancestors.unshift(element);
walker(numerator, numeratorTarget, false, false, ancestors);
walker(denumerator, denumeratorTarget, false, false, ancestors);
const fracType = element.attribs?.linethickness === '0' ? 'noBar' : 'bar';
targetParent.children.push({
type: 'tag',
name: 'm:f',
attribs: {},
children: [
{
type: 'tag',
name: 'm:fPr',
attribs: {},
children: [
{
type: 'tag',
name: 'm:type',
attribs: {
'm:val': fracType
},
children: []
}
]
},
numeratorTarget,
denumeratorTarget
]
});
// Don't iterate over children in the usual way.
}
function mglyph(element, targetParent, previousSibling, nextSibling, ancestors) {
// No support in omml. Output alt text.
if (element.attribs?.alt) {
targetParent.children.push({
type: 'text',
data: element.attribs.alt
});
}
}
function mmultiscripts(element, targetParent, previousSibling, nextSibling, ancestors) {
if (element.children.length === 0) {
// Don't use
return
}
const base = element.children[0];
const postSubs = [];
const postSupers = [];
const preSubs = [];
const preSupers = [];
const children = element.children.slice(1);
let dividerFound = false;
children.forEach((child, index) => {
if (child.name === 'mprescripts') {
dividerFound = true;
} else if (child.name !== 'none') {
if (index % 2) {
if (dividerFound) {
preSubs.push(child);
} else {
postSupers.push(child);
}
} else {
if (dividerFound) {
preSupers.push(child);
} else {
postSubs.push(child);
}
}
}
});
ancestors = [...ancestors];
ancestors.unshift(element);
const tempTarget = {
children: []
};
walker(base, tempTarget, false, false, ancestors);
let topTarget = tempTarget.children[0];
if (postSubs.length || postSupers.length) {
const subscriptTarget = {
name: 'm:sub',
type: 'tag',
attribs: {},
children: []
};
postSubs.forEach((subscript) => walker(subscript, subscriptTarget, false, false, ancestors));
const superscriptTarget = {
name: 'm:sup',
type: 'tag',
attribs: {},
children: []
};
postSupers.forEach((superscript) =>
walker(superscript, superscriptTarget, false, false, ancestors)
);
const topPostTarget = {
type: 'tag',
attribs: {},
children: [
{
type: 'tag',
name: 'm:e',
attribs: {},
children: [topTarget]
}
]
};
if (postSubs.length && postSupers.length) {
topPostTarget.name = 'm:sSubSup';
topPostTarget.children.push(subscriptTarget);
topPostTarget.children.push(superscriptTarget);
} else if (postSubs.length) {
topPostTarget.name = 'm:sSub';
topPostTarget.children.push(subscriptTarget);
} else {
topPostTarget.name = 'm:sSup';
topPostTarget.children.push(superscriptTarget);
}
topTarget = topPostTarget;
}
if (preSubs.length || preSupers.length) {
const preSubscriptTarget = {
name: 'm:sub',
type: 'tag',
attribs: {},
children: []
};
preSubs.forEach((subscript) => walker(subscript, preSubscriptTarget, false, false, ancestors));
const preSuperscriptTarget = {
name: 'm:sup',
type: 'tag',
attribs: {},
children: []
};
preSupers.forEach((superscript) =>
walker(superscript, preSuperscriptTarget, false, false, ancestors)
);
const topPreTarget = {
name: 'm:sPre',
type: 'tag',
attribs: {},
children: [
{
name: 'm:e',
type: 'tag',
attribs: {},
children: [topTarget]
},
preSubscriptTarget,
preSuperscriptTarget
]
};
topTarget = topPreTarget;
}
targetParent.children.push(topTarget);
// Don't iterate over children in the usual way.
}
function mrow(element, targetParent, previousSibling, nextSibling, ancestors) {
if (previousSibling.isNary) {
const targetSibling = targetParent.children[targetParent.children.length - 1];
return targetSibling.children[targetSibling.children.length - 1]
}
// Ignore as default behavior
return targetParent
}
function mspace(element, targetParent, previousSibling, nextSibling, ancestors) {
targetParent.children.push({
name: 'm:r',
type: 'tag',
attribs: {},
children: [
{
name: 'm:t',
type: 'tag',
attribs: {
'xml:space': 'preserve'
},
children: [
{
type: 'text',
data: ' '
}
]
}
]
});
}
function msqrt(element, targetParent, previousSibling, nextSibling, ancestors) {
const targetElement = {
name: 'm:e',
type: 'tag',
attribs: {},
children: []
};
targetParent.children.push({
name: 'm:rad',
type: 'tag',
attribs: {},
children: [
{
name: 'm:radPr',
type: 'tag',
attribs: {},
children: [
{
name: 'm:degHide',
type: 'tag',
attribs: {
'm:val': 'on'
},
children: []
}
]
},
{
name: 'm:deg',
type: 'tag',
attribs: {},
children: []
},
targetElement
]
});
return targetElement
}
function mstyle(element, targetParent, previousSibling, nextSibling, ancestors) {
// Ignore as default behavior
return targetParent
}
function getTextContent(node, trim = true) {
let returnString = '';
if (node.type === 'text') {
let text = node.data.replace(/[\u2062]|[\u200B]/g, '');
if (trim) {
text = text.trim();
}
returnString += text;
} else if (node.children) {
node.children.forEach((subNode) => {
returnString += getTextContent(subNode, trim);
});
}
return returnString
}
const NARY_REGEXP = /^[\u220f-\u2211]|[\u2229-\u2233]|[\u22c0-\u22c3]$/;
const GROW_REGEXP = /^\u220f|\u2211|[\u2229-\u222b]|\u222e|\u222f|\u2232|\u2233|[\u22c0-\u22c3]$/;
function getNary(node) {
// Check if node contains only a nary operator.
const text = getTextContent(node);
if (NARY_REGEXP.test(text)) {
return text
}
return false
}
function getNaryTarget(naryChar, element, type, subHide = false, supHide = false) {
const stretchy = element.attribs?.stretchy;
const grow =
stretchy === 'true' ? '1' : stretchy === 'false' ? '0' : GROW_REGEXP.test(naryChar) ? '1' : '0';
return {
type: 'tag',
name: 'm:nary',
attribs: {},
children: [
{
type: 'tag',
name: 'm:naryPr',
attribs: {},
children: [
{ type: 'tag', name: 'm:chr', attribs: { 'm:val': naryChar }, children: [] },
{ type: 'tag', name: 'm:limLoc', attribs: { 'm:val': type }, children: [] },
{ type: 'tag', name: 'm:grow', attribs: { 'm:val': grow }, children: [] },
{
type: 'tag',
name: 'm:subHide',
attribs: { 'm:val': subHide ? 'on' : 'off' },
children: []
},
{
type: 'tag',
name: 'm:supHide',
attribs: { 'm:val': supHide ? 'on' : 'off' },
children: []
}
]
}
]
}
}
function addScriptlevel(target, ancestors) {
const scriptlevel = ancestors.find((ancestor) => ancestor.attribs?.scriptlevel)?.attribs
?.scriptlevel;
if (['0', '1', '2'].includes(scriptlevel)) {
target.children.unshift({
type: 'tag',
name: 'm:argPr',
attribs: {},
children: [
{
type: 'tag',
name: 'm:scrLvl',
attribs: { 'm:val': scriptlevel },
children: []
}
]
});
}
}
function msub(element, targetParent, previousSibling, nextSibling, ancestors) {
// Subscript
if (element.children.length !== 2) {
// treat as mrow
return targetParent
}
ancestors = [...ancestors];
ancestors.unshift(element);
const base = element.children[0];
const subscript = element.children[1];
let topTarget;
//
// m:nAry
//
// Conditions:
// 1. base text must be nary operator
// 2. no accents
const naryChar = getNary(base);
if (
naryChar &&
element.attribs?.accent?.toLowerCase() !== 'true' &&
element.attribs?.accentunder?.toLowerCase() !== 'true'
) {
topTarget = getNaryTarget(naryChar, element, 'subSup', false, true);
element.isNary = true;
} else {
const baseTarget = {
name: 'm:e',
type: 'tag',
attribs: {},
children: []
};
walker(base, baseTarget, false, false, ancestors);
topTarget = {
type: 'tag',
name: 'm:sSub',
attribs: {},
children: [
{
type: 'tag',
name: 'm:sSubPr',
attribs: {},
children: [
{
type: 'tag',
name: 'm:ctrlPr',
attribs: {},
children: []
}
]
},
baseTarget
]
};
}
const subscriptTarget = {
name: 'm:sub',
type: 'tag',
attribs: {},
children: []
};
walker(subscript, subscriptTarget, false, false, ancestors);
topTarget.children.push(subscriptTarget);
if (element.isNary) {
topTarget.children.push({ type: 'tag', name: 'm:sup', attribs: {}, children: [] });
topTarget.children.push({ type: 'tag', name: 'm:e', attribs: {}, children: [] });
}
targetParent.children.push(topTarget);
// Don't iterate over children in the usual way.
}
function msubsup(element, targetParent, previousSibling, nextSibling, ancestors) {
// Sub + superscript
if (element.children.length !== 3) {
// treat as mrow
return targetParent
}
ancestors = [...ancestors];
ancestors.unshift(element);
const base = element.children[0];
const subscript = element.children[1];
const superscript = element.children[2];
let topTarget;
//
// m:nAry
//
// Conditions:
// 1. base text must be nary operator
// 2. no accents
const naryChar = getNary(base);
if (
naryChar &&
element.attribs?.accent?.toLowerCase() !== 'true' &&
element.attribs?.accentunder?.toLowerCase() !== 'true'
) {
topTarget = getNaryTarget(naryChar, element, 'subSup');
element.isNary = true;
} else {
// fallback: m:sSubSup
const baseTarget = {
name: 'm:e',
type: 'tag',
attribs: {},
children: []
};
walker(base, baseTarget, false, false, ancestors);
topTarget = {
type: 'tag',
name: 'm:sSubSup',
attribs: {},
children: [
{
type: 'tag',
name: 'm:sSubSupPr',
attribs: {},
children: [
{
type: 'tag',
name: 'm:ctrlPr',
attribs: {},
children: []
}
]
},
baseTarget
]
};
}
const subscriptTarget = {
name: 'm:sub',
type: 'tag',
attribs: {},
children: []
};
const superscriptTarget = {
name: 'm:sup',
type: 'tag',
attribs: {},
children: []
};
walker(subscript, subscriptTarget, false, false, ancestors);
walker(superscript, superscriptTarget, false, false, ancestors);
topTarget.children.push(subscriptTarget);
topTarget.children.push(superscriptTarget);
if (element.isNary) {
topTarget.children.push({ type: 'tag', name: 'm:e', attribs: {}, children: [] });
}
targetParent.children.push(topTarget);
// Don't iterate over children in the usual way.
}
function msup(element, targetParent, previousSibling, nextSibling, ancestors) {
// Superscript
if (element.children.length !== 2) {
// treat as mrow
return targetParent
}
ancestors = [...ancestors];
ancestors.unshift(element);
const base = element.children[0];
const superscript = element.children[1];
let topTarget;
//
// m:nAry
//
// Conditions:
// 1. base text must be nary operator
// 2. no accents
const naryChar = getNary(base);
if (
naryChar &&
element.attribs?.accent?.toLowerCase() !== 'true' &&
element.attribs?.accentunder?.toLowerCase() !== 'true'
) {
topTarget = getNaryTarget(naryChar, element, 'subSup', true);
element.isNary = true;
topTarget.children.push({ type: 'tag', name: 'm:sub' });
} else {
const baseTarget = {
name: 'm:e',
type: 'tag',
attribs: {},
children: []
};
walker(base, baseTarget, false, false, ancestors);
topTarget = {
type: 'tag',
name: 'm:sSup',
attribs: {},
children: [
{
type: 'tag',
name: 'm:sSupPr',
attribs: {},
children: [
{
type: 'tag',
name: 'm:ctrlPr',
attribs: {},
children: []
}
]
},
baseTarget
]
};
}
const superscriptTarget = {
name: 'm:sup',
type: 'tag',
attribs: {},
children: []
};
walker(superscript, superscriptTarget, false, false, ancestors);
topTarget.children.push(superscriptTarget);
if (element.isNary) {
topTarget.children.push({ type: 'tag', name: 'm:e', attribs: {}, children: [] });
}
targetParent.children.push(topTarget);
// Don't iterate over children in the usual way.
}
function mtable(element, targetParent, previousSibling, nextSibling, ancestors) {
const cellsPerRowCount = Math.max(...element.children.map((row) => row.children.length));
const targetElement = {
name: 'm:m',
type: 'tag',
attribs: {},
children: [
{
name: 'm:mPr',
type: 'tag',
attribs: {},
children: [
{
name: 'm:baseJc',
type: 'tag',
attribs: {
'm:val': 'center'
},
children: []
},
{
name: 'm:plcHide',
type: 'tag',
attribs: {
'm:val': 'on'
},
children: []
},
{
name: 'm:mcs',
type: 'tag',
attribs: {},
children: [
{
name: 'm:mc',
type: 'tag',
attribs: {},
children: [
{
name: 'm:mcPr',
type: 'tag',
attribs: {},
children: [
{
name: 'm:count',
type: 'tag',
attribs: {
'm:val': cellsPerRowCount.toString()
},
children: []
},
{
name: 'm:mcJc',
type: 'tag',
attribs: {
'm:val': 'center'
},
children: []
}
]
}
]
}
]
}
]
}
]
};
targetParent.children.push(targetElement);
return targetElement
}
function mtd(element, targetParent, previousSibling, nextSibling, ancestors) {
// table cell
const targetElement = {
name: 'm:e',
type: 'tag',
attribs: {},
children: []
};
targetParent.children.push(targetElement);
return targetElement
}
function mtr(element, targetParent, previousSibling, nextSibling, ancestors) {
// table row
const targetElement = {
name: 'm:mr',
type: 'tag',
attribs: {},
children: []
};
targetParent.children.push(targetElement);
return targetElement
}
function munderover(element, targetParent, previousSibling, nextSibling, ancestors) {
// Munderover
if (element.children.length !== 3) {
// treat as mrow
return targetParent
}
ancestors = [...ancestors];
ancestors.unshift(element);
const base = element.children[0];
const underscript = element.children[1];
const overscript = element.children[2];
//
// m:nAry
//
// Conditions:
// 1. base text must be nary operator
// 2. no accents
const naryChar = getNary(base);
if (
naryChar &&
element.attributes?.accent?.toLowerCase() !== 'true' &&
element.attributes?.accentunder?.toLowerCase() !== 'true'
) {
const topTarget = getNaryTarget(naryChar, element, 'undOvr');
element.isNary = true;
const subscriptTarget = {
name: 'm:sub',
type: 'tag',
attribs: {},
children: []
};
const superscriptTarget = {
name: 'm:sup',
type: 'tag',
attribs: {},
children: []
};
walker(underscript, subscriptTarget, false, false, ancestors);
walker(overscript, superscriptTarget, false, false, ancestors);
topTarget.children.push(subscriptTarget);
topTarget.children.push(superscriptTarget);
topTarget.children.push({ type: 'tag', name: 'm:e', attribs: {}, children: [] });
targetParent.children.push(topTarget);
return
}
// Fallback: m:limUpp()m:limlow
const baseTarget = {
name: 'm:e',
type: 'tag',
attribs: {},
children: []
};
walker(base, baseTarget, false, false, ancestors);
const underscriptTarget = {
name: 'm:lim',
type: 'tag',
attribs: {},
children: []
};
const overscriptTarget = {
name: 'm:lim',
type: 'tag',
attribs: {},
children: []
};
walker(underscript, underscriptTarget, false, false, ancestors);
walker(overscript, overscriptTarget, false, false, ancestors);
targetParent.children.push({
type: 'tag',
name: 'm:limUpp',
attribs: {},
children: [
{
type: 'tag',
name: 'm:e',
attribs: {},
children: [
{
type: 'tag',
name: 'm:limLow',
attribs: {},
children: [baseTarget, underscriptTarget]
}
]
},
overscriptTarget
]
});
// Don't iterate over children in the usual way.
}
function getStyle(element, ancestors, previousStyle = {}) {
const elAttributes = element.attribs || {};
const color =
elAttributes.mathcolor ||
ancestors.find(
(element) => element.name === 'mstyle' && element.attribs && element.attribs.color
)?.attribs.color ||
'';
// const minsize = parseFloat(elAttributes.scriptminsize || ancestors.find(element => element.name === 'mstyle' && element.attribs && element.attribs.scriptminsize)?.attribs.scriptminsize || '8pt')
// const sizemultiplier = parseFloat(elAttributes.scriptsizemultiplier || ancestors.find(element => element.name === 'mstyle' && element.attribs && element.attribs.scriptsizemultiplier)?.attribs.scriptsizemultiplier || '0.71')
const size =
elAttributes.mathsize ||
ancestors.find(
(element) => element.name === 'mstyle' && element.attribs && element.attribs.mathsize
)?.attribs.mathsize ||
'';
const scriptlevel =
elAttributes.scriptlevel ||
ancestors.find(
(element) => element.name === 'mstyle' && element.attribs && element.attribs.scriptlevel
)?.attribs.scriptlevel ||
'';
const background =
elAttributes.mathbackground ||
ancestors.find(
(element) => elemen