@wiris/mathtype-html-integration-devkit
Version:
Allows to integrate MathType Web into any JavaScript HTML WYSIWYG rich text editor.
539 lines (482 loc) • 20.9 kB
JavaScript
import Util from "./util";
import Latex from "./latex";
import MathML from "./mathml";
import Image from "./image";
import Accessibility from "./accessibility";
import ServiceProvider from "./serviceprovider";
import Configuration from "./configuration";
import Constants from "./constants";
// eslint-disable-next-line no-unused-vars
import md5 from "./md5";
/**
* @classdesc
* This class represent a MahML parser. Converts MathML into formulas depending on the
* image format (SVG, PNG, base64) and the save mode (XML, safeXML, Image) configured
* in the backend.
*/
export default class Parser {
/**
* Converts a MathML string to an img element.
* @param {Document} creator - Document object to call createElement method.
* @param {string} mathml - MathML code
* @param {Object[]} wirisProperties - object containing WIRIS custom properties
* @param {language} language - custom language for accessibility.
* @returns {HTMLImageElement} the formula image corresponding to initial MathML string.
* @static
*/
static mathmlToImgObject(creator, mathml, wirisProperties, language) {
const imgObject = creator.createElement("img");
imgObject.align = "middle";
imgObject.style.maxWidth = "none";
let data = wirisProperties || {};
// Take into account the backend config
const wirisEditorProperties = Configuration.get("editorParameters");
data = { ...wirisEditorProperties, ...data };
data.mml = mathml;
data.lang = language;
// Request metrics of the generated image.
data.metrics = "true";
data.centerbaseline = "false";
// Full base64 method (edit & save).
if (Configuration.get("saveMode") === "base64" && Configuration.get("base64savemode") === "default") {
data.base64 = true;
}
// Render js params: _wrs_int_wirisProperties contains some js render params.
// Since MathML can support render params, js params should be send only to editor.
imgObject.className = Configuration.get("imageClassName");
if (mathml.indexOf('class="') !== -1) {
// We check here if the MathML has been created from a customEditor (such chemistry)
// to add custom editor name attribute to img object (if necessary).
let mathmlSubstring = mathml.substring(mathml.indexOf('class="') + 'class="'.length, mathml.length);
mathmlSubstring = mathmlSubstring.substring(0, mathmlSubstring.indexOf('"'));
mathmlSubstring = mathmlSubstring.substring(4, mathmlSubstring.length);
imgObject.setAttribute(Configuration.get("imageCustomEditorName"), mathmlSubstring);
}
// Performance enabled.
if (
Configuration.get("wirisPluginPerformance") &&
(Configuration.get("saveMode") === "xml" || Configuration.get("saveMode") === "safeXml")
) {
let result = JSON.parse(Parser.createShowImageSrc(data, language));
if (result.status === "warning") {
// POST call.
// if the mathml is malformed, this function will throw an exception.
try {
result = JSON.parse(ServiceProvider.getService("showimage", data));
} catch (e) {
return null;
}
}
({ result } = result);
if (result.format === "png") {
imgObject.src = `data:image/png;base64,${result.content}`;
} else {
imgObject.src = `data:image/svg+xml;charset=utf8,${Util.urlEncode(result.content)}`;
}
imgObject.setAttribute(Configuration.get("imageMathmlAttribute"), MathML.safeXmlEncode(mathml));
Image.setImgSize(imgObject, result.content, true);
if (Configuration.get("enableAccessibility")) {
if (typeof result.alt === "undefined") {
imgObject.alt = Accessibility.mathMLToAccessible(mathml, language, data);
} else {
imgObject.alt = result.alt;
}
}
} else {
const result = Parser.createImageSrc(mathml, data);
imgObject.setAttribute(Configuration.get("imageMathmlAttribute"), MathML.safeXmlEncode(mathml));
imgObject.src = result;
Image.setImgSize(
imgObject,
result,
Configuration.get("saveMode") === "base64" && Configuration.get("base64savemode") === "default",
);
if (Configuration.get("enableAccessibility")) {
imgObject.alt = Accessibility.mathMLToAccessible(mathml, language, data);
}
}
if (typeof Parser.observer !== "undefined") {
Parser.observer.observe(imgObject);
}
// Role math https://www.w3.org/TR/wai-aria/roles#math.
imgObject.setAttribute("role", "math");
return imgObject;
}
/**
* Returns the source to showimage service by calling createimage service. The
* output of the createimage service is a URL path pointing to showimage service.
* This method is called when performance is disabled.
* @param {string} mathml - MathML code.
* @param {Object[]} data - data object containing service parameters.
* @returns {string} the showimage path.
*/
static createImageSrc(mathml, data) {
// Full base64 method (edit & save).
if (Configuration.get("saveMode") === "base64" && Configuration.get("base64savemode") === "default") {
data.base64 = true;
}
let result = ServiceProvider.getService("createimage", data);
if (result.indexOf("@BASE@") !== -1) {
// Replacing '@BASE@' with the base URL of createimage.
const baseParts = ServiceProvider.getServicePath("createimage").split("/");
baseParts.pop();
result = result.split("@BASE@").join(baseParts.join("/"));
}
return result;
}
/**
* Parses initial HTML code. If the HTML contains data generated by WIRIS,
* this data would be converted as following:
* <pre>
* MathML code: Image containing the corresponding MathML formulas.
* MathML code with LaTeX annotation : LaTeX string.
* </pre>
* @param {string} code - HTML code containing MathML data.
* @param {string} language - language to create image alt text.
* @returns {string} HTML code with the original MathML converted into LaTeX and images.
*/
static initParse(code, language) {
/* Note: The code inside this function has been inverted.
If you invert again the code then you cannot use correctly LaTeX
in Moodle.
*/
code = Parser.initParseSaveMode(code, language);
return Parser.initParseEditMode(code);
}
/**
* Parses initial HTML code depending on the save mode. Transforms all MathML
* occurrences for it's correspondent image or LaTeX.
* @param {string} code - HTML code to be parsed
* @param {string} language - language to create image alt text.
* @returns {string} HTML code parsed.
*/
static initParseSaveMode(code, language) {
if (Configuration.get("saveMode")) {
// Converting XML to tags.
code = Latex.parseMathmlToLatex(code, Constants.safeXmlCharacters);
code = Latex.parseMathmlToLatex(code, Constants.xmlCharacters);
code = Parser.parseMathmlToImg(code, Constants.safeXmlCharacters, language);
code = Parser.parseMathmlToImg(code, Constants.xmlCharacters, language);
if (Configuration.get("saveMode") === "base64" && Configuration.get("base64savemode") === "image") {
code = Parser.codeImgTransform(code, "base642showimage");
}
}
return code;
}
/**
* Parses initial HTML code depending on the edit mode.
* If 'latex' parseMode is enabled all MathML containing an annotation with encoding='LaTeX' will
* be converted into a LaTeX string instead of an image.
* @param {string} code - HTML code containing MathML.
* @returns {string} parsed HTML code.
*/
static initParseEditMode(code) {
if (Configuration.get("parseModes").indexOf("latex") !== -1) {
const imgList = Util.getElementsByNameFromString(code, "img", true);
const token = 'encoding="LaTeX">';
// While replacing images with latex, the indexes of the found images changes
// respecting the original code, so this carry is needed.
let carry = 0;
for (let i = 0; i < imgList.length; i += 1) {
const imgCode = code.substring(imgList[i].start + carry, imgList[i].end + carry);
if (imgCode.indexOf(` class="${Configuration.get("imageClassName")}"`) !== -1) {
let mathmlStartToken = ` ${Configuration.get("imageMathmlAttribute")}="`;
let mathmlStart = imgCode.indexOf(mathmlStartToken);
if (mathmlStart === -1) {
mathmlStartToken = ' alt="';
mathmlStart = imgCode.indexOf(mathmlStartToken);
}
if (mathmlStart !== -1) {
mathmlStart += mathmlStartToken.length;
const mathmlEnd = imgCode.indexOf('"', mathmlStart);
const mathml = Util.htmlSanitize(MathML.safeXmlDecode(imgCode.substring(mathmlStart, mathmlEnd)));
let latexStartPosition = mathml.indexOf(token);
if (latexStartPosition !== -1) {
latexStartPosition += token.length;
const latexEndPosition = mathml.indexOf("</annotation>", latexStartPosition);
const latex = mathml.substring(latexStartPosition, latexEndPosition);
const replaceText = `$$${Util.htmlEntitiesDecode(latex)}$$`;
const start = code.substring(0, imgList[i].start + carry);
const end = code.substring(imgList[i].end + carry);
code = start + replaceText + end;
carry += replaceText.length - (imgList[i].end - imgList[i].start);
}
}
}
}
}
return code;
}
/**
* Parses end HTML code. The end HTML code is HTML code with embedded images
* or LaTeX formulas created with MathType. <br>
* By default this method converts the formula images and LaTeX strings in MathML. <br>
* If image mode is enabled the images will not be converted into MathML. For further information see {@link https://docs.wiris.com/mathtype/en/mathtype-integrations/mathtype-web-interface-features/full-mathml-mode---wirisplugins-js.html}.
* @param {string} code - HTML to be parsed
* @returns {string} the HTML code parsed.
*/
static endParse(code) {
// Transform LaTeX ocurrences to MathML elements.
const codeEndParsedEditMode = Parser.endParseEditMode(code);
// Transform img elements to MathML elements.
const codeEndParseSaveMode = Parser.endParseSaveMode(codeEndParsedEditMode);
return codeEndParseSaveMode;
}
/**
* Parses end HTML code depending on the edit mode.
* - LaTeX is an enabled parse mode, all LaTeX occurrences will be converted into MathML.
* @param {string} code - HTML code to be parsed.
* @returns {string} HTML code parsed.
*/
static endParseEditMode(code) {
// Converting LaTeX to images.
if (Configuration.get("parseModes").indexOf("latex") !== -1) {
let output = "";
let endPosition = 0;
let startPosition = code.indexOf("$$");
while (startPosition !== -1) {
output += code.substring(endPosition, startPosition);
endPosition = code.indexOf("$$", startPosition + 2);
if (endPosition !== -1) {
// Before, it was a condition here to execute the next codelines
// 'latex.indexOf('<') == -1'.
// We don't know why it was used, but seems to have a conflict with
// latex formulas that contains '<'.
const latex = code.substring(startPosition + 2, endPosition);
const decodedLatex = Util.htmlEntitiesDecode(latex);
let mathml = Util.htmlSanitize(Latex.getMathMLFromLatex(decodedLatex, true));
if (!Configuration.get("saveHandTraces")) {
// Remove hand traces.
mathml = MathML.removeAnnotation(mathml, "application/json");
}
output += mathml;
endPosition += 2;
} else {
output += "$$";
endPosition = startPosition + 2;
}
startPosition = code.indexOf("$$", endPosition);
}
output += code.substring(endPosition, code.length);
code = output;
}
return code;
}
/**
* Parses end HTML code depending on the save mode. Converts all
* images into the element determined by the save mode:
* - xml: Parses images formulas into MathML.
* - safeXml: Parses images formulas into safeMAthML
* - base64: Parses images into base64 images.
* - image: Parse images into images (no parsing)
* @param {string} code - HTML code to be parsed
* @returns {string} HTML code parsed.
*/
static endParseSaveMode(code) {
const savemode = Configuration.get("saveMode");
const base64savemode = Configuration.get("base64savemode");
if (savemode) {
if (savemode === "safeXml") {
code = Parser.codeImgTransform(code, "img2mathml");
} else if (savemode === "xml") {
code = Parser.codeImgTransform(code, "img2mathml");
} else if (savemode === "base64" && base64savemode === "image") {
code = Parser.codeImgTransform(code, "img264");
}
}
return code;
}
/**
* Auxiliar function that builds the data object to send to the showimage endpoint
* @param {Object[]} data - object containing showimage service parameters.
* @param {string} language - string containing the language of the formula.
* @returns {Object} JSON object with the data to send to showimage.
*/
static createShowImageSrcData(data, language) {
const dataMd5 = {};
const renderParams = [
"mml",
"color",
"centerbaseline",
"zoom",
"dpi",
"fontSize",
"fontFamily",
"defaultStretchy",
"backgroundColor",
"format",
];
renderParams.forEach((param) => {
if (typeof data[param] !== "undefined") {
dataMd5[param] = data[param];
}
});
// Data variables to get.
const dataObject = {};
Object.keys(data).forEach((key) => {
// We don't need mathml in this request we try to get cached.
// Only need the formula md5 calculated before.
if (key !== "mml") {
dataObject[key] = data[key];
}
});
dataObject.formula = com.wiris.js.JsPluginTools.md5encode(Util.propertiesToString(dataMd5));
dataObject.lang = typeof language === "undefined" ? "en" : language;
dataObject.version = Configuration.get("version");
return dataObject;
}
/**
* Returns the result to call showimage service with the formula md5 as parameter.
* The result could be:
* - {'status' : warning'} : The image associated to the MathML md5 is not in cache.
* - {'status' : 'ok' ...} : The image associated to the MathML md5 is in cache.
* @param {Object[]} data - object containing showimage service parameters.
* @param {string} language - string containing the language of the formula.
* @returns {Object} JSON object containing showimage response.
*/
static createShowImageSrc(data, language) {
const dataObject = this.createShowImageSrcData(data, language);
const result = ServiceProvider.getService("showimage", Util.httpBuildQuery(dataObject), true);
return result;
}
/**
* Transform html img tags inside a html code to mathml, base64 img tags (i.e with base64 on src)
* or showimage img tags (i.e with showimage.php on src)
* @param {string} code - HTML code
* @param {string} mode - base642showimage or img2mathml or img264 transform.
* @returns {string} html - code transformed.
*/
static codeImgTransform(code, mode) {
let output = "";
let endPosition = 0;
const pattern = /<img/gi;
const patternLength = pattern.source.length;
while (pattern.test(code)) {
const startPosition = pattern.lastIndex - patternLength;
output += code.substring(endPosition, startPosition);
let i = startPosition + 1;
while (i < code.length && endPosition <= startPosition) {
const character = code.charAt(i);
if (character === '"' || character === "'") {
const characterNextPosition = code.indexOf(character, i + 1);
if (characterNextPosition === -1) {
i = code.length; // End while.
} else {
i = characterNextPosition;
}
} else if (character === ">") {
endPosition = i + 1;
}
i += 1;
}
if (endPosition < startPosition) {
// The img tag is stripped.
output += code.substring(startPosition, code.length);
return output;
}
let imgCode = code.substring(startPosition, endPosition);
const imgObject = Util.createObject(imgCode);
let xmlCode = imgObject.getAttribute(Configuration.get("imageMathmlAttribute"));
let convertToXml;
let convertToSafeXml;
if (mode === "base642showimage") {
if (xmlCode == null) {
xmlCode = imgObject.getAttribute("alt");
}
xmlCode = MathML.safeXmlDecode(xmlCode);
imgCode = Parser.mathmlToImgObject(document, xmlCode, null, null);
output += Util.createObjectCode(imgCode);
} else if (mode === "img2mathml") {
if (Configuration.get("saveMode")) {
if (Configuration.get("saveMode") === "safeXml") {
convertToXml = true;
convertToSafeXml = true;
} else if (Configuration.get("saveMode") === "xml") {
convertToXml = true;
convertToSafeXml = false;
}
}
output += Util.getWIRISImageOutput(imgCode, convertToXml, convertToSafeXml);
} else if (mode === "img264") {
if (xmlCode === null) {
xmlCode = imgObject.getAttribute("alt");
}
xmlCode = MathML.safeXmlDecode(xmlCode);
const properties = {};
properties.base64 = "true";
imgCode = Parser.mathmlToImgObject(document, xmlCode, properties, null);
// Metrics.
Image.setImgSize(imgCode, imgCode.src, true);
output += Util.createObjectCode(imgCode);
}
}
output += code.substring(endPosition, code.length);
return output;
}
/**
* Converts all occurrences of MathML to the corresponding image.
* @param {string} content - string with valid MathML code.
* The MathML code doesn't contain semantics.
* @param {Constants} characters - Constant object containing xmlCharacters
* or safeXmlCharacters relation.
* @param {string} language - a valid language code
* in order to generate formula accessibility.
* @returns {string} The input string with all the MathML
* occurrences replaced by the corresponding image.
*/
static parseMathmlToImg(content, characters, language) {
let output = "";
const mathTagBegin = `${characters.tagOpener}math`;
const mathTagEnd = `${characters.tagOpener}/math${characters.tagCloser}`;
let start = content.indexOf(mathTagBegin);
let end = 0;
while (start !== -1) {
output += content.substring(end, start);
// Avoid WIRIS images to be parsed.
const imageMathmlAtrribute = content.indexOf(Configuration.get("imageMathmlAttribute"));
end = content.indexOf(mathTagEnd, start);
if (end === -1) {
end = content.length - 1;
} else if (imageMathmlAtrribute !== -1) {
// First close tag of img attribute
// If a mathmlAttribute exists should be inside a img tag.
end += content.indexOf("/>", start);
} else {
end += mathTagEnd.length;
}
if (!MathML.isMathmlInAttribute(content, start) && imageMathmlAtrribute === -1) {
let mathml = content.substring(start, end);
mathml =
characters.id === Constants.safeXmlCharacters.id
? MathML.safeXmlDecode(mathml)
: MathML.mathMLEntities(mathml);
output += Util.createObjectCode(Parser.mathmlToImgObject(document, mathml, null, language));
} else {
output += content.substring(start, end);
}
start = content.indexOf(mathTagBegin, end);
}
output += content.substring(end, content.length);
return output;
}
}
// Mutation observers to avoid wiris image formulas class be removed.
if (typeof MutationObserver !== "undefined") {
const mutationObserver = new MutationObserver((mutations) => {
mutations.forEach((mutation) => {
if (
mutation.oldValue === Configuration.get("imageClassName") &&
mutation.attributeName === "class" &&
mutation.target.className.indexOf(Configuration.get("imageClassName")) === -1
) {
mutation.target.className = Configuration.get("imageClassName");
}
});
});
Parser.observer = Object.create(mutationObserver);
Parser.observer.Config = { attributes: true, attributeOldValue: true };
// We use own default config.
Parser.observer.observe = function (target) {
Object.getPrototypeOf(this).observe(target, this.Config);
};
}