UNPKG

@wiris/mathtype-html-integration-devkit

Version:

Allows to integrate MathType Web into any JavaScript HTML WYSIWYG rich text editor.

446 lines (400 loc) 18.5 kB
import Constants from "./constants"; import Util from "./util"; /** * @classdesc * This class represents a class to manage MathML objects. */ export default class MathML { /** * Checks if the mathml at position i is inside an HTML attribute or not. * @param {string} content - a string containing MathML code. * @param {number} i - search index. * @return {boolean} true if is inside an HTML attribute. false otherwise. */ static isMathmlInAttribute(content, i) { // Regex = // '^[\'"][\\s]*=[\\s]*[\\w-]+([\\s]*("[^"]*"|\'[^\']*\')[\\s]* // =[\\s]*[\\w-]+[\\s]*)*[\\s]+gmi<'; const mathAtt = "['\"][\\s]*=[\\s]*[\\w-]+"; // "=att OR '=att const attContent = "\"[^\"]*\"|'[^']*'"; // "blabla" OR 'blabla' const att = `[\\s]*(${attContent})[\\s]*=[\\s]*[\\w-]+[\\s]*`; // "blabla"=att OR 'blabla'=att const atts = `('${att}')*`; // "blabla"=att1 "blabla"=att2 const regex = `^${mathAtt}${atts}[\\s]+gmi<`; // "=att "blabla"=att1 "blabla"=att2 gmi< . const expression = new RegExp(regex); const actualContent = content.substring(0, i); const reversed = actualContent.split("").reverse().join(""); const exists = expression.test(reversed); return exists; } /** * Decodes an encoded MathML with standard XML tags. * We use these entities because IE doesn't support html entities * on its attributes sometimes. Yes, sometimes. * @param {string} input - string to be decoded. * @return {string} decoded string. */ static safeXmlDecode(input) { let { tagOpener } = Constants.safeXmlCharactersEntities; let { tagCloser } = Constants.safeXmlCharactersEntities; let { doubleQuote } = Constants.safeXmlCharactersEntities; let { realDoubleQuote } = Constants.safeXmlCharactersEntities; // Decoding entities. input = input.split(tagOpener).join(Constants.safeXmlCharacters.tagOpener); input = input.split(tagCloser).join(Constants.safeXmlCharacters.tagCloser); input = input.split(doubleQuote).join(Constants.safeXmlCharacters.doubleQuote); // Added to fix problem due to import from 1.9.x. input = input.split(realDoubleQuote).join(Constants.safeXmlCharacters.realDoubleQuote); // Blackboard. const { ltElement } = Constants.safeBadBlackboardCharacters; const { gtElement } = Constants.safeBadBlackboardCharacters; const { ampElement } = Constants.safeBadBlackboardCharacters; if ("_wrs_blackboard" in window && window._wrs_blackboard) { input = input.split(ltElement).join(Constants.safeGoodBlackboardCharacters.ltElement); input = input.split(gtElement).join(Constants.safeGoodBlackboardCharacters.gtElement); input = input.split(ampElement).join(Constants.safeGoodBlackboardCharacters.ampElement); } ({ tagOpener } = Constants.safeXmlCharacters); ({ tagCloser } = Constants.safeXmlCharacters); ({ doubleQuote } = Constants.safeXmlCharacters); ({ realDoubleQuote } = Constants.safeXmlCharacters); const { ampersand } = Constants.safeXmlCharacters; const { quote } = Constants.safeXmlCharacters; // Decoding characters. input = input.split(tagOpener).join(Constants.xmlCharacters.tagOpener); input = input.split(tagCloser).join(Constants.xmlCharacters.tagCloser); input = input.split(doubleQuote).join(Constants.xmlCharacters.doubleQuote); input = input.split(ampersand).join(Constants.xmlCharacters.ampersand); input = input.split(quote).join(Constants.xmlCharacters.quote); // We are replacing $ by & when its part of an entity for retro-compatibility. // Now, the standard is replace § by &. let returnValue = ""; let currentEntity = null; for (let i = 0; i < input.length; i += 1) { const character = input.charAt(i); if (currentEntity == null) { if (character === "$") { currentEntity = ""; } else { returnValue += character; } } else if (character === ";") { returnValue += `&${currentEntity}`; currentEntity = null; } else if (character.match(/([a-zA-Z0-9#._-] | '-')/)) { // Character is part of an entity. currentEntity += character; } else { returnValue += `$${currentEntity}`; // Is not an entity. currentEntity = null; i -= 1; // Parse again the current character. } } return returnValue; } /** * Encodes a MathML with standard XML tags to a MMathML encoded with safe XML tags. * We use these entities because IE doesn't support html entities on its attributes sometimes. * @param {string} input - input string to be encoded * @returns {string} encoded string. */ static safeXmlEncode(input) { const { tagOpener } = Constants.xmlCharacters; const { tagCloser } = Constants.xmlCharacters; const { doubleQuote } = Constants.xmlCharacters; const { ampersand } = Constants.xmlCharacters; const { quote } = Constants.xmlCharacters; input = input.split(tagOpener).join(Constants.safeXmlCharacters.tagOpener); input = input.split(tagCloser).join(Constants.safeXmlCharacters.tagCloser); input = input.split(doubleQuote).join(Constants.safeXmlCharacters.doubleQuote); input = input.split(ampersand).join(Constants.safeXmlCharacters.ampersand); input = input.split(quote).join(Constants.safeXmlCharacters.quote); return input; } /** * Converts special symbols (> 128) to entities and replaces all textual * entities by its number entities. * @param {string} mathml - MathML string containing - or not - special symbols * @returns {string} MathML with all textual entities replaced. */ static mathMLEntities(mathml) { let toReturn = ""; for (let i = 0; i < mathml.length; i += 1) { const character = mathml.charAt(i); // Parsing > 128 characters. if (mathml.codePointAt(i) > 128) { toReturn += `&#${mathml.codePointAt(i)};`; // For UTF-32 characters we need to move the index one position. if (mathml.codePointAt(i) > 0xffff) { i += 1; } } else if (character === "&") { const end = mathml.indexOf(";", i + 1); if (end >= 0) { const container = document.createElement("span"); container.innerHTML = mathml.substring(i, end + 1); toReturn += `&#${Util.fixedCharCodeAt(container.textContent || container.innerText, 0)};`; i = end; } else { toReturn += character; } } else { toReturn += character; } } return toReturn; } /** * Add a custom editor name with the prefix wrs_ to a MathML class attribute. * @param {string} mathml - a MathML string created with a custom editor, like chemistry. * @param {string} customEditor - custom editor name. * @returns {string} MathML string with his class containing the editor toolbar string. */ static addCustomEditorClassAttribute(mathml, customEditor) { let toReturn = ""; const start = mathml.indexOf("<math"); if (start === 0) { const end = mathml.indexOf(">"); if (mathml.indexOf("class") === -1) { // Adding custom editor type. toReturn = `${mathml.substr(start, end)} class="wrs_${customEditor}">`; toReturn += mathml.substr(end + 1, mathml.length); return toReturn; } } return mathml; } /** * Remove a custom editor name from the MathML class attribute. * @param {string} mathml - a MathML string. * @param {string} customEditor - custom editor name. * @returns {string} The input MathML without customEditor name in his class. */ static removeCustomEditorClassAttribute(mathml, customEditor) { // Discard MathML without the specified class. if (mathml.indexOf("class") === -1 || mathml.indexOf(`wrs_${customEditor}`) === -1) { return mathml; } // Trivial case: class attribute value equal to editor name. Then // class attribute is removed. // First try to remove it with a space before if there is one // Otherwise without the space if (mathml.indexOf(` class="wrs_${customEditor}"`) !== -1) { return mathml.replace(` class="wrs_${customEditor}"`, ""); } if (mathml.indexOf(`class="wrs_${customEditor}"`) !== -1) { return mathml.replace(`class="wrs_${customEditor}"`, ""); } // Non Trivial case: class attribute contains editor name. return mathml.replace(`wrs_${customEditor}`, ""); } /** * Adds annotation tag in MathML element. * @param {String} mathml - valid MathML. * @param {String} content - value to put inside annotation tag. * @param {String} annotationEncoding - annotation encoding. * @returns {String} - 'mathml' with an annotation that contains * 'content' and encoding 'encoding'. */ static addAnnotation(mathml, content, annotationEncoding) { // If contains annotation, also contains semantics tag. const containsAnnotation = mathml.indexOf("<annotation"); let mathmlWithAnnotation = ""; if (containsAnnotation !== -1) { const closeSemanticsIndex = mathml.indexOf("</semantics>"); mathmlWithAnnotation = `${mathml.substring(0, closeSemanticsIndex)}<annotation encoding="${annotationEncoding}">${content}</annotation>${mathml.substring(closeSemanticsIndex)}`; } else if (MathML.isEmpty(mathml)) { const endIndexInline = mathml.indexOf("/>"); const endIndexNonInline = mathml.indexOf(">"); const endIndex = endIndexNonInline === endIndexInline ? endIndexInline : endIndexNonInline; mathmlWithAnnotation = `${mathml.substring(0, endIndex)}><semantics><annotation encoding="${annotationEncoding}">${content}</annotation></semantics></math>`; } else { const beginMathMLContent = mathml.indexOf(">") + 1; const endMathmlContent = mathml.lastIndexOf("</math>"); const mathmlContent = mathml.substring(beginMathMLContent, endMathmlContent); mathmlWithAnnotation = `${mathml.substring(0, beginMathMLContent)}<semantics><mrow>${mathmlContent}</mrow><annotation encoding="${annotationEncoding}">${content}</annotation></semantics></math>`; // eslint-disable-line max-len } return mathmlWithAnnotation; } /** * Removes specific annotation tag in MathML element. * In case of remove the unique annotation, also is removed semantics tag. * @param {String} mathml - valid MathML. * @param {String} annotationEncoding - annotation encoding to remove. * @returns {String} - 'mathml' without the annotation encoding specified. */ static removeAnnotation(mathml, annotationEncoding) { let mathmlWithoutAnnotation = mathml; const openAnnotationTag = `<annotation encoding="${annotationEncoding}">`; const closeAnnotationTag = "</annotation>"; const startAnnotationIndex = mathml.indexOf(openAnnotationTag); if (startAnnotationIndex !== -1) { let differentAnnotationFound = false; let differentAnnotationIndex = mathml.indexOf("<annotation"); while (differentAnnotationIndex !== -1) { if (differentAnnotationIndex !== startAnnotationIndex) { differentAnnotationFound = true; } differentAnnotationIndex = mathml.indexOf("<annotation", differentAnnotationIndex + 1); } if (differentAnnotationFound) { const closeIndex = mathml.indexOf(closeAnnotationTag, startAnnotationIndex); const endAnnotationIndex = closeIndex + closeAnnotationTag.length; const startIndex = mathml.substring(0, startAnnotationIndex); mathmlWithoutAnnotation = startIndex + mathml.substring(endAnnotationIndex); } else { mathmlWithoutAnnotation = MathML.removeSemantics(mathml); } } return mathmlWithoutAnnotation; } /** * Removes semantics tag to mathml. * When using Hand to create formulas, it adds the mrow tag due to the semantics one, this one is also removed. * @param {string} mathml - MathML string. * @returns {string} - 'mathml' without semantics tag. */ static removeSemantics(mathml) { // If `mrow` is found right before the `semantics` starting tag, it's removed as well const semanticsStartingTagRegex = /<semantics>\s*?(<mrow>)?/gm; // If `mrow` is found right after the `annotation` ending tag, it's removed as well // alongside `semantics` closing tag and the whole `annotation` tag and its contents. const semanticsEndingTagRegex = /(<\/mrow>)?\s*<annotation[\W\w]*?<\/semantics>/gm; return mathml.replace(semanticsStartingTagRegex, "").replace(semanticsEndingTagRegex, ""); } /** * Removes semantics tag to element that contains mathml. * When using Hand to create formulas, it adds the mrow tag due to the semantics one, this one is also removed. * @param {string} element - Inner HTML text string. * @returns {string} - 'mathml' without semantics tag. */ static removeSafeXMLSemantics(element) { // If `mrow` is found right before the `semantics` starting tag, it's removed as well const semanticsSafeStartingTagRegex = /«semantics»\s*?(«mrow»)?/gm; // If `mrow` is found right after the `annotation` ending tag, it's removed as well // alongside `semantics` closing tag and the whole `annotation` tag and its contents. const semanticsSafeEndingTagRegex = /(«\/mrow»)?\s*«annotation[\W\w]*?«\/semantics»/gm; return element.replace(semanticsSafeStartingTagRegex, "").replace(semanticsSafeEndingTagRegex, ""); } /** * Transforms all xml mathml occurrences that contain semantics to the same * xml mathml occurrences without semantics. * @param {string} text - string that can contain xml mathml occurrences. * @param {Constants} [characters] - Constant object containing xmlCharacters * or safeXmlCharacters relation. * xmlCharacters by default. * @returns {string} - 'text' with all xml mathml occurrences without annotation tag. */ static removeSemanticsOcurrences(text, characters = Constants.xmlCharacters) { const mathTagStart = `${characters.tagOpener}math`; const mathTagEnd = `${characters.tagOpener}/math${characters.tagCloser}`; const mathTagEndline = `/${characters.tagCloser}`; const { tagCloser } = characters; const semanticsTagStart = `${characters.tagOpener}semantics${characters.tagCloser}`; const annotationTagStart = `${characters.tagOpener}annotation encoding=`; let output = ""; let start = text.indexOf(mathTagStart); let end = 0; while (start !== -1) { output += text.substring(end, start); // MathML can be written as '<math></math>' or '<math />'. const mathTagEndIndex = text.indexOf(mathTagEnd, start); const mathTagEndlineIndex = text.indexOf(mathTagEndline, start); const firstTagCloser = text.indexOf(tagCloser, start); if (mathTagEndIndex !== -1) { end = mathTagEndIndex; } else if (mathTagEndlineIndex === firstTagCloser - 1) { end = mathTagEndlineIndex; } const semanticsIndex = text.indexOf(semanticsTagStart, start); if (semanticsIndex !== -1) { const mmlTagStart = text.substring(start, semanticsIndex); const annotationIndex = text.indexOf(annotationTagStart, start); if (annotationIndex !== -1) { const startIndex = semanticsIndex + semanticsTagStart.length; const mmlContent = text.substring(startIndex, annotationIndex); output += mmlTagStart + mmlContent + mathTagEnd; start = text.indexOf(mathTagStart, start + mathTagStart.length); end += mathTagEnd.length; } else { end = start; start = text.indexOf(mathTagStart, start + mathTagStart.length); } } else { end = start; start = text.indexOf(mathTagStart, start + mathTagStart.length); } } output += text.substring(end, text.length); return output; } /** * Returns true if a MathML contains a certain class. * @param {string} mathML - input MathML. * @param {string} className - className. * @returns {boolean} true if the input MathML contains the input class. * false otherwise. * @static */ static containClass(mathML, className) { const classIndex = mathML.indexOf("class"); if (classIndex === -1) { return false; } const classTagEndIndex = mathML.indexOf(">", classIndex); const classTag = mathML.substring(classIndex, classTagEndIndex); if (classTag.indexOf(className) !== -1) { return true; } return false; } /** * Returns true if mathml is empty. Otherwise, false. * @param {string} mathml - valid MathML with standard XML tags. * @returns {boolean} - true if mathml is empty. Otherwise, false. */ static isEmpty(mathml) { // MathML can have the shape <math></math> or '<math />'. const closeTag = ">"; const closeTagInline = "/>"; const firstCloseTagIndex = mathml.indexOf(closeTag); const firstCloseTagInlineIndex = mathml.indexOf(closeTagInline); let empty = false; // MathML is always empty in the second shape. if (firstCloseTagInlineIndex !== -1) { if (firstCloseTagInlineIndex === firstCloseTagIndex - 1) { empty = true; } } // MathML is always empty in the first shape when there aren't elements // between math tags. if (!empty) { const mathTagEndRegex = new RegExp("</(.+:)?math>"); const mathTagEndArray = mathTagEndRegex.exec(mathml); if (mathTagEndArray) { empty = firstCloseTagIndex + 1 === mathTagEndArray.index; } } return empty; } /** * Encodes html entities inside properties. * @param {String} mathml - valid MathML with standard XML tags. * @returns {String} - 'mathml' with property entities encoded. */ static encodeProperties(mathml) { // Search all the properties. const regex = /\w+=".*?"/g; // Encode html entities. const replacer = (match) => { // It has the shape: // <math propertyOne="somethingOne"><children propertyTwo="somethingTwo"></children></math>. const quoteIndex = match.indexOf('"'); const propertyValue = match.substring(quoteIndex + 1, match.length - 1); const propertyValueEncoded = Util.htmlEntities(propertyValue); const matchEncoded = `${match.substring(0, quoteIndex + 1)}${propertyValueEncoded}"`; return matchEncoded; }; const mathmlEncoded = mathml.replace(regex, replacer); return mathmlEncoded; } }