@wiris/mathtype-viewer
Version:
The MathType viewer renders all MathML and LaTeX-like formulas in a webpage using the MathType rendering engine.
195 lines (171 loc) • 7.03 kB
text/typescript
import { Properties } from "./properties";
import { showImage, createImage, mathml2accessible } from "./services";
import MathML from "@wiris/mathtype-html-integration-devkit/src/mathml";
/**
* Data obtained when rendering image. Data needed to set the formula image parameters.
*/
interface FormulaData {
content: string;
baseline: string;
height: string;
width: string;
alt?: string;
}
/**
* Look for safe MathML «math» formulas.
* @param {HTMLElement} root - Any DOM element that can contain MathML.
*/
function findSafeMathMLTextNodes(root: HTMLElement): Node[] {
const nodeIterator: NodeIterator = document.createNodeIterator(root, NodeFilter.SHOW_TEXT, (node) =>
/«math(.*?)«\/math»/g.test(node.nodeValue || "") ? NodeFilter.FILTER_ACCEPT : NodeFilter.FILTER_REJECT,
);
const safeNodes: Node[] = [];
let currentNode: Node | null;
while ((currentNode = nodeIterator.nextNode())) {
safeNodes.push(currentNode);
}
return safeNodes;
}
/**
* Parse the DOM looking for «math» formulas and replace them with the corresponding rendered images within the given element.
* @param {HTMLElement} root - Any DOM element that can contain MathML.
*/
function decodeSafeMathML(root: any, ignored_mathml_containers: string | null) {
const safeNodes = findSafeMathMLTextNodes(root);
const blackListedNodes = root.querySelectorAll(ignored_mathml_containers) ?? [];
for (const safeNode of safeNodes) {
if (blackListedNodes.length > 0 && isNodeBlacklisted(safeNode, blackListedNodes)) {
console.log("Node is blacklisted");
continue;
}
const mathml = MathML.safeXmlDecode(safeNode.textContent ?? "");
// Insert mathml node.
const fragment = document.createRange().createContextualFragment(mathml);
safeNode.parentNode?.insertBefore(fragment, safeNode);
safeNode.parentNode?.removeChild(safeNode);
}
}
/**
* Checks if a node or any of its ancestors are in the blacklist.
*
* @param node - The node to check.
* @param blackListedNodes - The list of blacklisted nodes.
* @returns True if the node or any of its ancestors are blacklisted, false otherwise.
*/
function isNodeBlacklisted(node: Node, blackListedNodes: NodeListOf<Element>): boolean {
return Array.from(blackListedNodes).some((blackListedNode) => blackListedNode.contains(node));
}
/**
* Parse the DOM looking for <math> elements and replace them with the corresponding rendered images within the given element.
* @param {Properties} properties - Properties of the viewer.
* @param {HTMLElement} root - Any DOM element that can contain MathML.
*/
export async function renderMathML(properties: Properties, root: HTMLElement): Promise<void> {
if (properties.viewer !== "image" && properties.viewer !== "mathml") {
return;
}
decodeSafeMathML(root, properties.ignored_containers);
for (const mathElement of [...root.getElementsByTagName("math")]) {
const mml = serializeHtmlToXml(mathElement.outerHTML);
try {
let imgSource;
// Transform mml to img.
if (properties.wirispluginperformance === "true") {
imgSource = await showImage(
mml,
properties.lang,
properties.editorServicesRoot,
properties.editorServicesExtension,
);
} else {
imgSource = await createImage(
mml,
properties.lang,
properties.editorServicesRoot,
properties.editorServicesExtension,
);
}
// Set img properties.
const img = await setImageProperties(properties, imgSource, mml);
// Replace the MathML for the generated formula image.
mathElement.parentNode?.replaceChild(img, mathElement);
} catch {
console.error(`Cannot render ${mml}: invalid MathML format.`);
continue;
}
}
}
/**
* Returns an image formula containing all MathType properties.
* @param {Properties} properties - Properties of the viewer.
* @param {FormulaData} data - Object containing image values.
* @param {string} mml - The MathML of the formula image being created.
* @returns {Promise<HTMLImageElement>} - Formula image.
*/
async function setImageProperties(properties: Properties, data: FormulaData, mml: string): Promise<HTMLImageElement> {
// Create imag element.
let img = document.createElement("img");
// Set image src. Encode the result svg.
img.src = `data:image/svg+xml;charset=utf8,${encodeURIComponent(data.content)}`;
// Set other image properties.
img.setAttribute(properties.wiriseditormathmlattribute, mml);
img.setAttribute("class", "Wirisformula");
img.setAttribute("role", "math");
// If the render returns dimensions properties, set them to the image.
if (+data.height > 0) {
img.style.verticalAlign = "-" + (+data.height - +data.baseline) + "px";
img.height = +data.height;
img.width = +data.width;
}
// Set the alt text whenever there's a translation for the characters and MathML on the mml.
if (!corruptMathML.some((corruptMathML) => mml.includes(corruptMathML))) {
try {
if (!data.alt) {
const { text } = await mathml2accessible(
mml,
properties.lang,
properties.editorServicesRoot,
properties.editorServicesExtension,
);
data.alt = text;
}
img.alt = data.alt ?? "";
} catch {
img.alt = "Alternative text not available";
}
}
return img;
}
/**
* ! --------------- IMPORTANT CONTEXT --------------- !
*
* 1. **Attribute Context**: When the HTML parser encounters entities within attribute values, it automatically decodes them. So, `<` becomes `<` in your `open` attribute.
* 2. **Text Context**: However, within text nodes (the content between tags), entities are preserved to prevent potential issues with HTML syntax. For example, if `<` were not encoded as `<` in a text node, it could be mistaken for the start of a new tag.
*
* So, when you call `outerHTML`, it generates a string representation of the element's opening tag (including attributes), content, and closing tag. The attributes have their entities decoded, but the text nodes keep their entities encoded.
* This is standard behavior for HTML parsers and is not specific to any particular browser or JavaScript environment.
*/
/**
* Serializes text representing an HTML document to text representing an XML document (encoding/decoding Html Entities when needed).
* @param {string} text - the string encoded as HTML
* @returns the same string encoded as XML
*/
export function serializeHtmlToXml(text: string): string {
const serializer = new XMLSerializer();
text = serializer.serializeToString(document.createRange().createContextualFragment(text));
return text;
}
// Set of mathml and characters which don't have an accessible text associated
// and can not be translated or transformed to LaTeX.
const corruptMathML = [
"⟦",
"⟦",
"⟧",
"⟧",
"mscarries",
"mscarry",
"msgroup",
"mstack",
"msline",
"msrow",
];