UNPKG

mathlive

Version:

Render and edit beautifully typeset math

github.com/arnog/mathlive

1,492 lines (1,313 loc) • 82 kB

JavaScript

/** * This module parses and outputs an Abstract Syntax Tree representing the * formula using the {@tutorial MASTON} format. * * To use it, use the {@linkcode MathAtom#toAST MathAtom.toAST()} method. * @module addons/maston * @private */ import Lexer from '../core/lexer.js'; import MathAtom from '../core/mathAtom.js'; import ParserModule from '../core/parser.js'; import Definitions from '../core/definitions.js'; const CANONICAL_NAMES = { // CONSTANTS '\\imaginaryI': '\u2148', '\\imaginaryJ': '\u2149', '\\pi': 'π', '\\exponentialE': '\u212f', // ARITHMETIC '﹢': '+', // SMALL PLUS SIGN '＋': '+', // FULL WIDTH PLUS SIGN '−': '-', // MINUS SIGN '-': '-', // HYPHEN-MINUS '﹣': '-', // SMALL HYPHEN-MINUS '－': '-', // FULLWIDTH HYPHEN-MINUS '\\times': '*', '\\cdot': '*', '⨉': '*', // N-ARY TIMES OPERATOR U+ '️✖': '*', // MULTIPLICATION SYMBOL '️×': '*', // MULTIPLICATION SIGN '.': '*', '÷': '/', // DIVISION SIGN // '/': '/', // SOLIDUS '⁄': '/', // FRACTION SLASH '／': '/', // FULLWIDTH SOLIDUS '!': 'factorial', '\\mp': 'minusplus', // MINUS-PLUS SIGN '\\ne': '!=', '\\coloneq': ':=', '\\questeq': '?=', '\\approx': 'approx', '\\cong': 'congruent', '\\sim': 'similar', '\\equiv': 'equiv', '\\pm': 'plusminus', // PLUS-MINUS SIGN '\\land': 'and', '\\wedge': 'and', '\\lor': 'or', '\\vee': 'or', '\\oplus': 'xor', '\\veebar': 'xor', '\\lnot': 'not', '\\neg': 'not', '\\exists': 'exists', '\\nexists': '!exists', '\\forall': 'forAll', '\\backepsilon': 'suchThat', '\\therefore': 'therefore', '\\because': 'because', '\\nabla': 'nabla', '\\circ': 'circle', // '\\oplus': 'oplus', '\\ominus': 'ominus', '\\odot': 'odot', '\\otimes': 'otimes', '\\zeta': 'Zeta', '\\Gamma': 'Gamma', '\\min': 'min', '\\max': 'max', '\\mod': 'mod', '\\lim': 'lim', // BIG OP '\\sum': 'sum', '\\prod': 'prod', '\\int': 'integral', '\\iint': 'integral2', '\\iiint': 'integral3', '\\Re': 'Re', '\\gothicCapitalR': 'Re', '\\Im': 'Im', '\\gothicCapitalI': 'Im', '\\binom': 'nCr', '\\partial': 'partial', '\\differentialD': 'differentialD', '\\capitalDifferentialD': 'capitalDifferentialD', '\\Finv': 'Finv', '\\Game': 'Game', '\\wp': 'wp', '\\ast': 'ast', '\\star': 'star', '\\asymp': 'asymp', // Function domain, limits '\\to': 'to', // Looks like \rightarrow '\\gets': 'gets', // Looks like \leftarrow // Logic '\\rightarrow': 'shortLogicalImplies', '\\leftarrow': 'shortLogicalImpliedBy', '\\leftrightarrow': 'shortLogicalEquivalent', '\\longrightarrow': 'logicalImplies', '\\longleftarrow': 'logicalImpliedBy', '\\longleftrightarrow': 'logicalEquivalent', // Metalogic '\\Rightarrow': 'shortImplies', '\\Leftarrow': 'shortImpliedBy', '\\Leftrightarrow': 'shortEquivalent', '\\implies': 'implies', '\\Longrightarrow': 'implies', '\\impliedby': 'impliedBy', '\\Longleftarrow': 'impliedBy', '\\iff': 'equivalent', '\\Longleftrightarrow': 'equivalent', } // The OP_NAME table maps a canonical name to a function name const OP_NAME = { '+': 'add', '*': 'multiply', '-': 'subtract', '/': 'divide', '=': 'equal', ':=': 'assign', '!=': 'ne', '?=': 'questeq', 'approx': 'approx', 'congruent': 'congruent', 'similar': 'similar', 'equiv': 'equiv', '<': 'lt', '>': 'gt', '<=': 'le', '>=': 'ge', '≤': 'le', '≥': 'ge', '>>': 'gg', '<<': 'll', '**': 'pow', '++': 'increment', '--': 'decrement', } // The FUNCTION_TEMPLATE table maps a canonical name to a LaTeX template const FUNCTION_TEMPLATE = { 'equal': '%0 = %1', 'ne': '%0 \\ne %1', 'questeq': '%0 \\questeq %1', 'approx': '%0 \\approx %1', 'congruent': '%0 \\cong %1', 'similar': '%0 \\sim %1', 'equiv': '%0 \\equiv %1', 'assign': '%0 := %1', 'lt': '%0 < %1', 'gt': '%0 > %1', 'le': '%0 \\le %1', 'ge': '%0 \\ge %1', // TRIGONOMETRY 'sin': '\\sin%_%^ %0', 'cos': '\\cos%_%^ %0', 'tan': '\\tan%_%^ %0', 'cot': '\\cot%_%^ %0', 'sec': '\\sec%_%^ %0', 'csc': '\\csc%_%^ %0', 'sinh': '\\sinh %0', 'cosh': '\\cosh %0', 'tanh': '\\tanh %0', 'csch': '\\csch %0', 'sech': '\\sech %0', 'coth': '\\coth %0', 'arcsin': '\\arcsin %0', 'arccos': '\\arccos %0', 'arctan': '\\arctan %0', 'arccot': '\\arcctg %0', // Check 'arcsec': '\\arcsec %0', 'arccsc': '\\arccsc %0', 'arsinh': '\\arsinh %0', 'arcosh': '\\arcosh %0', 'artanh': '\\artanh %0', 'arcsch': '\\arcsch %0', 'arsech': '\\arsech %0', 'arcoth': '\\arcoth %0', // LOGARITHMS 'ln': '\\ln%_%^ %', // Natural logarithm 'log': '\\log%_%^ %', // General logarithm, e.g. log_10 'lg': '\\lg %', // Common, base-10, logarithm 'lb': '\\lb %', // Binary, base-2, logarithm // Big operator 'sum': '\\sum%_%^ %0', 'prod': '\\prod%_%^ %0', // OTHER 'Zeta': '\\zeta%_%^ %', // Riemann Zeta function 'Gamma': '\\Gamma %', // Gamma function, such that Gamma(n) = (n - 1)! 'min': '\\min%_%^ %', 'max': '\\max%_%^ %', 'mod': '\\mod%_%^ %', 'lim': '\\lim%_%^ %', // BIG OP 'binom': '\\binom %', 'nabla': '\\nabla %', 'curl': '\\nabla\\times %0', 'div': '\\nabla\\cdot %0', 'floor': '\\lfloor %0 \\rfloor%_%^', 'ceil': '\\lceil %0 \\rceil%_%^', 'abs': '\\left| %0 \\right|%_%^', 'norm': '\\lVert %0 \\rVert%_%^', 'ucorner': '\\ulcorner %0 \\urcorner%_%^', 'lcorner': '\\llcorner %0 \\lrcorner%_%^', 'angle': '\\langle %0 \\rangle%_%^', 'group': '\\lgroup %0 \\rgroup%_%^', 'moustache':'\\lmoustache %0 \\rmoustache%_%^', 'brace': '\\lbrace %0 \\rbrace%_%^', 'sqrt[]': '\\sqrt[%^]{%0}', // Template used when there's an index 'sqrt': '\\sqrt{%0}', 'lcm': '\\operatorname{lcm}%', 'gcd': '\\operatorname{gcd}%', 'erf': '\\operatorname{erf}%', 'erfc': '\\operatorname{erfc}%', 'randomReal': '\\operatorname{randomReal}%', 'randomInteger': '\\operatorname{randomInteger}%', // Logic operators 'and': '%0 \\land %1', 'or': '%0 \\lor %1', 'xor': '%0 \\oplus %1', 'not': '%0 \\lnot %1', // Other operators 'circle': '%0 \\circ %1', 'ast': '%0 \\ast %1', 'star': '%0 \\star %1', 'asymp': '%0 \\asymp %1', '/': '\\frac{%0}{%1}', 'Re': '\\Re{%0}', 'Im': '\\Im{%0}', 'factorial': '%0!', 'factorial2': '%0!!', } // From www.w3.org/TR/MathML3/appendixc.html // The keys of OP_PRECEDENCE are "canonical names" // (the values of the CANONICAL_NAMES table above, e.g. "?=") // Those are different from the latex names (e.g. \\questeq) // and from the function names (e.g. "questeq") const OP_PRECEDENCE = { 'degree': 880, 'nabla': 740, 'curl': 740, // not in MathML 'partial': 740, 'differentialD': 740, // not in MathML 'capitalDifferentialD': 740, // not in MathML '**': 720, // not in MathML 'odot': 710, // Logical not 'not': 680, // Division 'div': 660, // division sign 'solidus': 660, '/': 660, 'setminus': 650, // \setminus, \smallsetminus '%': 640, 'otimes': 410, // Set operators 'union': 350, // \cup 'intersection': 350, // \cap // Multiplication, division and modulo '*': 390, 'ast': 390, '.': 390, 'oplus': 300, // also logical XOR... @todo 'ominus': 300, // Addition '+': 275, '-': 275, '+-': 275, // \pm '-+': 275, // \mp // Most circled-ops are 265 'circle': 265, 'circledast': 265, 'circledcirc': 265, 'star': 265, // Different from ast // Range '..': 263, // Not in MathML // Unit conversion 'to': 262, // Not in MathLM 'in': 262, // Not in MathML '|': 261, // Not in MathML (bind is the |_ operator) // Relational 'congruent': 265, 'equiv': 260, // MathML: "identical to" '=': 260, '!=': 255, '?=': 255, 'similar': 250, // tilde operator in MathML 'approx': 247, '<': 245, '>': 243, '>=': 242, '≥': 242, '<=': 241, // Set operator 'complement': 240, 'subset': 240, // \subset 'superset': 240, // \supset // @todo and equality and neg operators 'elementof': 240, // \in '!elementof': 240, // \notin // 'exists': 230, '!exists': 230, 'forall': 230, // Logical operators 'and': 200, 'xor': 195, // MathML had 190 'or': 190, // Note: 'not' is 680 // center, low, diag, vert ellipsis 150 // Composition/sequence 'suchThat': 110, // \backepsilon ':': 100, // '..': 100, // '...': 100, // Conditional (?:) // Assignment 'assign': 80, ':=': 80, // MathML had 260 (same with U+2254 COLON EQUALS) 'therefore': 70, 'because': 70, // Arrows // Note: MathML had 270 for the arrows, but this // would not work for (a = b => b = a) // See also https://en.wikipedia.org/wiki/Logical_connective#Order_of_precedence // for a suggested precedence (note that in this page lower precedence // has the opposite meaning as what we use) 'shortLogicalImplies': 52, // -> 'shortImplies': 51, // => 'logicalImplies': 50, // --> 'implies': 49, // ==> 'shortLogicalImpliedBy': 48,// <- 'shortImpliedBy': 47, // <= 'logicalImpliedBy': 46, // <-- 'impliedBy': 45, // <== 'shortLogicalEquivalent':44,// <-> 'shortEquivalent': 43, // <=> 'logicalEquivalent':42, // <--> 'equivalent': 41, // <==> ',': 40, ';': 30 } function getArg(ast, index) { return Array.isArray(ast.arg) ? ast.arg[index] : undefined; } /** * Given a canonical name, return its precedence * @param {string} canonicalName, for example "and" * @return {number} * @private */ function getPrecedence(canonicalName) { return canonicalName ? (OP_PRECEDENCE[canonicalName] || -1) : -1; } function getAssociativity(canonicalName) { if (/=|=>/.test(canonicalName)) { return 'right'; } return 'left'; } /** * * @param {string} name function canonical name * @return {string} * @private */ function getLatexTemplateForFunction(name) { let result = FUNCTION_TEMPLATE[name]; if (!result) { result = name.length > 1 ? '\\operatorname{' + name + '}%^%_ %' : (name + '%^%_ %'); } return result; } /** * * @param {string} name symbol name, e.g. "alpha" * @return {string} * @private */ function getLatexForSymbol(name) { let result = FUNCTION_TEMPLATE[name]; if (result) { return result.replace('%1', '').replace('%0', '').replace('%', ''); } if (name.length > 1) { const info = Definitions.getInfo('\\' + name, 'math'); if (info && (!info.fontFamily || info.fontFamily === 'cmr' || info.fontFamily === 'ams')) { result = '\\' + name; } } if (!result) { result = Definitions.unicodeStringToLatex('math', name); } return result; } function isFunction(canonicalName) { if (canonicalName === 'f' || canonicalName === 'g') return true; const t = FUNCTION_TEMPLATE[canonicalName]; if (!t) return false; // A plain "%" is a placeholder for an argument list, indicating a function if (/%[^01_^]?/.test(t)) return true; return false; } /** * * @param {string} latex, for example '\\times' * @return {string} the canonical name for the input, for example '*' * @private */ function getCanonicalName(latex) { latex = (latex || '').trim(); let result = CANONICAL_NAMES[latex]; if (!result) { if (/^\\[^{}]+$/.test(latex)) { const info = Definitions.getInfo(latex, 'math', {}); if (info) { result = info.value || latex.slice(1); } else { result = latex.slice(1); } } else { result = latex; } } return result; } /** * Return the operator precedence of the atom * or -1 if not an operator * @param {object} atom * @return {number} * @private */ function opPrec(atom) { if (!atom) return null; const name = getCanonicalName(getString(atom)); const result = [getPrecedence(name), getAssociativity(name)]; if (result[0] <= 0) return null return result; } function isOperator(atom) { return opPrec(atom) !== null; } const DELIM_FUNCTION = { '\\lfloor\\rfloor': 'floor', '\\lceil\\rceil': 'ceil', '\\vert\\vert': 'abs', '\\lvert\\rvert': 'abs', '||': 'abs', '\\Vert\\Vert': 'norm', '\\lVert\\rVert': 'norm', '\\ulcorner\\urcorner': 'ucorner', '\\llcorner\\lrcorner': 'lcorner', '\\langle\\rangle': 'angle', '\\lgroup\\rgroup': 'group', '\\lmoustache\\rmoustache': 'moustache', '\\lbrace\\rbrace': 'brace' } const POSTFIX_FUNCTION = { '!': 'factorial', '\\dag': 'dagger', '\\dagger': 'dagger', '\\ddagger': 'dagger2', '\\maltese': 'maltese', '\\backprime': 'backprime', '\\backdoubleprime': 'backprime2', '\\prime': 'prime', '\\doubleprime': 'prime2', '\\$': '$', '\\%': '%', '\\_': '_', '\\degree': 'degree' } const ASSOCIATIVE_FUNCTION = { '+': 'add', '-': 'add', // Subtraction is add(), but it's // handled specifically so that the // argument is negated '*': 'multiply', '=': 'equal', ',': 'list', ';': 'list2', 'and': 'and', 'or': 'or', 'xor': 'xor', 'union': 'union', // shortLogicalEquivalent and logicalEquivalent map to the same function // they mean the same thing, but have a difference precedence. 'shortLogicalEquivalent': 'logicalEquivalent', // logical equivalent, iff, biconditional logical connective 'logicalEquivalent': 'logicalEquivalent', // same // shortEquivalent and equivalent map to the same function // they mean the same thing, but have a difference precedence. 'shortEquivalent': 'equivalent', // metalogic equivalent 'equivalent': 'equivalent', // same } const SUPER_ASSOCIATIVE_FUNCTION = { ',': 'list', ';': 'list2' } function getString(atom) { if (Array.isArray(atom)) { let result = ''; for (const subAtom of atom) { result += getString(subAtom); } return result; } if (atom.latex && !/^\\math(op|bin|rel|open|punct|ord|inner)/.test(atom.latex)) { return atom.latex.trim(); } if (atom.type === 'leftright') { return ''; } if (typeof atom.body === 'string') { return atom.body; } if (Array.isArray(atom.body)) { let result = ''; for (const subAtom of atom.body) { result += getString(subAtom); } return result; } return ''; } /** * * @param {object} expr - Abstract Syntax Tree object * @return {string} A string, the symbol, or undefined * @private */ function asSymbol(node) { return typeof node.sym === 'string' ? (getLatexForSymbol(node.sym) || node.sym) : ''; } /** * * @param {object} node - Abstract Syntax Tree node * @return {number} A JavaScript number, the value of the AST or NaN * @private * @private */ function asMachineNumber(node) { return parseFloat(node.num); } function isNumber(node) { return typeof node === 'object' && typeof node.num !== 'undefined'; } function numberRe(node) { let result = 0; if (isNumber(node)) { if (typeof node.num === 'object') { result = typeof node.num.re !== 'undefined' ? parseFloatToPrecision(node.num.re) : 0; } else { result = parseFloat(node.num); } } return result; } function numberIm(node) { let result = 0; if (isNumber(node)) { if (typeof node.num === 'object') { result = typeof node.num.im !== 'undefined' ? parseFloatToPrecision(node.num.im) : 0; } } return result; } function isComplexWithRealAndImaginary(node) { return numberRe(node) !== 0 && numberIm(node) !== 0; } function hasSup(node) { return node && typeof node.sup !== 'undefined'; } function hasSub(node) { return node && typeof node.sub !== 'undefined'; } /** * Return true if the current atom is of the specified type and value. * @param {object} expr * @param {string} type * @param {string} value * @private */ function isAtom(expr, type, value) { let result = false; const atom = expr.atoms[expr.index]; if (atom && atom.type === type) { if (value === undefined) { result = true; } else { result = getString(atom) === value; } } return result; } /** * * @param {string} functionName * @param {object} params * @private */ function wrapFn(functionName, ...params) { const result = { fn: functionName }; if (params) { const args = []; for (const arg of params) { if (arg) args.push(arg); } if (args.length > 0) result.arg = args; } return result; } function wrapNum(num) { if (typeof num === 'number') { return {num: num.toString() } } else if (typeof num === 'string') { return {num: num} } else if (typeof num === 'object') { // This is a complex number console.assert(typeof num.re === 'string' || typeof num.im === 'string'); return {num: num}; } return undefined; } /** * Return the negative of the expression. Usually { fn:'negate', arg } * but for numbers, the negated number * @param {object} node * @private */ function negate(node) { if (isNumber(node)) { const re = numberRe(node); const im = numberIm(node); if (im !== 0) { if (re !== 0) { node.num.re = (-re).toString(); } node.num.im = (-im).toString(); } else { node.num = (-re).toString(); } return node; } return wrapFn('negate', node); } function nextIsSupsub(expr) { const atom = expr.atoms[expr.index + 1]; return atom && atom.type === 'msubsup'; } /** * Parse for a possible sup/sub attached directly to the current atom * or to a following 'msubsup' atom. * After the call, the index points to the next atom to process. * @param {object} expr * @private */ function parseSupsub(expr, options) { let atom = expr.atoms[expr.index]; // Is there a supsub directly on this atom? if (atom && (typeof atom.superscript !== 'undefined' || typeof atom.subscript !== 'undefined')) { // Move to the following atom expr.index += 1; } else { atom = null; } // If this atom didn't have a sup/sub, // is the following atom a subsup atom? if (!atom) { atom = expr.atoms[expr.index + 1]; if (!atom || atom.type !== 'msubsup' || !(atom.superscript || atom.subscript)) { atom = null; } else { // Yes. Skip the current atom and the supsub expr.index += 2; } } if (atom) { if (typeof atom.subscript !== 'undefined') { expr.ast.sub = parse(atom.subscript, options); } if (typeof atom.superscript !== 'undefined') { if (atom.type === 'msubsup') { if (/['\u2032]|\\prime/.test(getString(atom.superscript))) { expr.index += 1; atom = expr.atoms[expr.index + 1]; if (atom && atom.type === 'msubsup' && /['\u2032]|\\prime/.test(getString(atom.superscript))) { expr.ast.sup = {sym: '\u2033'}; // DOUBLE-PRIME } else { expr.ast.sup = {sym: '\u2032'}; // PRIME expr.index -= 1; } } else if (/['\u2033]|\\doubleprime/.test(getString(atom.superscript))) { expr.ast.sup = {sym: '\u2033'}; // DOUBLE-PRIME } else if (expr.ast) { expr.ast.sup = parse(atom.superscript, options); } } else { expr.ast.sup = parse(atom.superscript, options); } } } else { // Didn't find a supsup either on this atom and there was no 'msubsup' // Time to move on to the next atom. expr.index += 1; } return expr; } /** * Parse postfix operators, such as "!" (factorial) * @private */ function parsePostfix(expr, options) { const lhs = expr.ast; if (nextIsDigraph(expr, '!!')) { expr.index += 1; expr.ast = wrapFn('factorial2', lhs); expr = parseSupsub(expr, options); expr = parsePostfix(expr, options); return expr; } if (nextIsDigraph(expr, '++')) { expr.index += 1; expr.ast = wrapFn('increment', lhs); expr = parseSupsub(expr, options); expr = parsePostfix(expr, options); return expr; } if (nextIsDigraph(expr, '--')) { expr.index += 1; expr.ast = wrapFn('decrement', lhs); expr = parseSupsub(expr, options); expr = parsePostfix(expr, options); return expr; } const atom = expr.atoms[expr.index]; if (atom && atom.latex && POSTFIX_FUNCTION[atom.latex.trim()]) { expr.ast = wrapFn(POSTFIX_FUNCTION[atom.latex.trim()], lhs); expr = parseSupsub(expr, options); expr = parsePostfix(expr, options); } return expr; } /** * Delimiters can be expressed: * - as a matching pair of regular characters: '(a)' * - a as 'leftright' expression: '\left(a\right)' * - as a matching pair of 'sizeddelim': '\Bigl(a\Bigr) * * Note that the '\delim' command is only used for delimiters in the middle * of a \left\right pair and not to represent pair-matched delimiters. * * This function handles all three cases * * @private */ function parseDelim(expr, ldelim, rdelim, options) { expr.index = expr.index || 0; if (expr.atoms.length === 0 || expr.index >= expr.atoms.length) { expr.ast = undefined; return expr; } const savedPrec = expr.minPrec; expr.minPrec = 0; let atom = expr.atoms[expr.index]; if (!ldelim) { // If we didn't expect a specific delimiter, parse any delimiter // and return it as a function application let pairedDelim = true; if (atom.type === 'mopen') { ldelim = atom.latex.trim(); rdelim = Definitions.RIGHT_DELIM[ldelim]; } else if (atom.type === 'sizeddelim') { ldelim = atom.delim; rdelim = Definitions.RIGHT_DELIM[ldelim]; } else if (atom.type === 'leftright') { pairedDelim = false; ldelim = atom.leftDelim; rdelim = atom.rightDelim; // If we have an unclosed smart fence, assume the right delim is // matching the left delim if (rdelim === '?') rdelim = Definitions.RIGHT_DELIM[ldelim]; } else if (atom.type === 'textord') { ldelim = atom.latex.trim(); rdelim = Definitions.RIGHT_DELIM[ldelim]; } if (ldelim && rdelim) { if (ldelim === '|' && rdelim === '|') { // Check if this could be a ||x|| instead of |x| const atom = expr.atoms[expr.index + 1]; if (atom && atom.type === 'textord' && atom.latex === '|') { // Yes, it's a ||x|| ldelim = '\\lVert'; rdelim = '\\rVert'; } } expr = parseDelim(expr, ldelim, rdelim); if (expr) { if (pairedDelim) expr.index += 1; expr.ast = { fn: DELIM_FUNCTION[ldelim + rdelim] || (ldelim + rdelim), arg: [expr.ast]}; expr.minPrec = savedPrec; return expr; } } return undefined; } if (atom.type === 'mopen' && getString(atom) === ldelim) { expr.index += 1; // Skip the open delim expr = parseExpression(expr, options); atom = expr.atoms[expr.index]; if (atom && atom.type === 'mclose' && getString(atom) === rdelim) { if (nextIsSupsub(expr)) { // Wrap in a group if we have an upcoming superscript or subscript expr.ast = {group: expr.ast}; } expr = parseSupsub(expr, options); expr = parsePostfix(expr, options); } // TODO: else, syntax error? } else if (atom.type === 'textord' && getString(atom) === ldelim) { expr.index += 1; // Skip the open delim expr = parseExpression(expr, options); atom = expr.atoms[expr.index]; if (atom && atom.type === 'textord' && getString(atom) === rdelim) { expr.index += 1; expr = parseSupsub(expr, options); expr = parsePostfix(expr, options); } // TODO: else, syntax error? } else if (ldelim === '\\lVert' && atom.type === 'textord' && atom.latex === '|') { atom = expr.atoms[expr.index + 1]; if (atom && atom.type === 'textord' && atom.latex === '|') { // This is an opening || expr.index += 2; // Skip the open delim expr = parseExpression(expr, options); atom = expr.atoms[expr.index]; const atom2 = expr.atoms[expr.index + 1]; if (atom && atom.type === 'textord' && atom.latex === '|' && atom2 && atom2.type === 'textord' && atom2.latex === '|') { // This was a closing || expr.index += 2; expr = parseSupsub(expr, options); expr = parsePostfix(expr, options); } } } else if (atom.type === 'sizeddelim' && atom.delim === ldelim) { expr.index += 1; // Skip the open delim expr = parseExpression(expr, options); atom = expr.atoms[expr.index]; if (atom && atom.type === 'sizeddelim' && atom.delim === rdelim) { expr.index += 1; expr = parseSupsub(expr, options); expr = parsePostfix(expr, options); } // TODO: else, syntax error? } else if (atom.type === 'leftright' && atom.leftDelim === ldelim && (atom.rightDelim === '?' || atom.rightDelim === rdelim)) { // This atom type includes the content of the parenthetical expression // in its body expr.ast = parse(atom.body, options); if (nextIsSupsub(expr)) { // Wrap in a group if we have an upcoming superscript or subscript expr.ast = {group: expr.ast}; } expr = parseSupsub(expr, options); expr = parsePostfix(expr, options); } else { return undefined; } expr.minPrec = savedPrec; return expr; } function nextIsDigraph(expr, digraph) { expr.index = expr.index || 0; if (expr.atoms.length <= 1 || expr.index >= expr.atoms.length - 1) { return false; } return digraph === getString(expr.atoms[expr.index]) + getString(expr.atoms[expr.index + 1]); } /** * Some symbols are made up of two consecutive characters. * Handle them here. Return undefined if not a digraph. * TODO: other digraphs: * := * ++ * ** * =: * °C U+2103 * °F U+2109 * @private * */ function parseDigraph(expr) { expr.index = expr.index || 0; if (expr.atoms.length <= 1 || expr.index >= expr.atoms.length - 1) { return undefined; } if (isAtom(expr, 'textord', '\\nabla')) { expr.index += 1; if (isAtom(expr, 'mbin', '\\times')) { expr.index += 1; expr.ast = 'curl'; // divergence return expr; } else if (isAtom(expr, 'mbin', '\\cdot')) { expr.index += 1; expr.ast = 'div'; return expr; } expr.index -= 1; } else { const digraph = expr.atoms[expr.index].latex + expr.atoms[expr.index + 1].latex; const result = /^(>=|<=|>>|<<|:=|!=|\*\*|\+\+|--)$/.test(digraph) ? digraph : ''; if (result) { expr.index += 1; } return result; } return undefined; } function parsePrimary(expr, options) { // <primary> := ('-'|'+) <primary> | <number> | // '(' <expression> ')' | <symbol> | <text> (<expression>) expr.index = expr.index || 0; expr.ast = undefined; if (expr.atoms.length === 0 || expr.index >= expr.atoms.length) { return expr; } let atom = expr.atoms[expr.index]; const val = getCanonicalName(getString(atom)); const digraph = parseDigraph(expr); if (digraph) { expr.ast = wrapFn(expr.ast, parsePrimary(expr, options).ast); } else if (atom.type === 'root') { expr.index = 0; expr.atoms = atom.body; return parsePrimary(expr, options); } else if (atom.type === 'mbin' && val === '-') { // Prefix - sign expr.index += 1; // Skip the '-' symbol expr = parsePrimary(expr, options); expr.ast = negate(expr.ast); } else if (atom.type === 'mbin' && val === '+') { // Prefix + sign expr.index += 1; // Skip the '+' symbol expr = parsePrimary(expr, options); expr.ast = wrapFn('add', expr.ast); } else if (atom.type === 'mord' && /^[0-9.]$/.test(atom.latex)) { // Looks like a number let num = ''; let done = false; let pat = /^[0-9.eEdD]$/; while (expr.index < expr.atoms.length && !done && (isAtom(expr, 'spacing') || ( ( isAtom(expr, 'mord') || isAtom(expr, 'mpunct', ',') || isAtom(expr, 'mbin') ) && pat.test(expr.atoms[expr.index].latex) ) ) ) { if (expr.atoms[expr.index].type === 'spacing') { expr.index += 1; } else if (typeof expr.atoms[expr.index].superscript !== 'undefined' || typeof expr.atoms[expr.index].subscript !== 'undefined') { done = true; } else { let digit = expr.atoms[expr.index].latex; if (digit === 'd' || digit === 'D') { digit = 'e'; pat = /^[0-9+-.]$/; } else if (digit === 'e' || digit === 'E') { if (nextIsSupsub(expr)) { digit = ''; expr.index -= 1; done = true; } else { digit = 'E'; pat = /^[0-9+-.]$/ } } else if (pat === /^[0-9+-.]$/) { pat = /^[0-9]$/; } num += digit === ',' ? '' : digit; expr.index += 1; } } expr.ast = num ? wrapNum(num) : undefined; // This was a number. Is it followed by a fraction, e.g. 2 1/2 atom = expr.atoms[expr.index]; if (atom && atom.type === 'genfrac' && !isNaN(expr.ast.num)) { // Add an invisible plus, i.e. 2 1/2 = 2 + 1/2 const lhs = expr.ast; expr = parsePrimary(expr, options); expr.ast = wrapFn('add', lhs, expr.ast); } if (atom && atom.type === 'group' && atom.latex && atom.latex.startsWith('\\nicefrac')) { // \nicefrac macro, add an invisible plus const lhs = expr.ast; expr = parsePrimary(expr, options); expr.ast = wrapFn('add', lhs, expr.ast); } if (atom && atom.type === 'msubsup') { expr = parseSupsub(expr, options); } expr = parsePostfix(expr, options); } else if (atom.type === 'genfrac' || atom.type === 'surd') { // A fraction or a square/cube root expr.ast = atom.toAST(options); expr = parseSupsub(expr, options); expr = parsePostfix(expr, options); } else if (atom.type === 'mord' || atom.type === 'mbin') { // A 'mord' but not a number: either an identifier ('x') or // a function ('\\Zeta') if (isFunction(val) && !isOperator(atom)) { // A function expr.ast = { fn: val }; expr = parseSupsub(expr, options); const fn = expr.ast; const arg = parsePrimary(expr, options).ast; if (arg && /^(list0|list|list2)$/.test(arg.fn)) { fn.arg = fn.arg ? fn.arg.arg : undefined; } else if (arg) { fn.arg = [arg] } expr.ast = fn; } else { // An identifier expr.ast = atom.toAST(options); if (expr.ast.sym === 'ⅈ') { // It's 'i', the imaginary unit expr.ast = wrapNum({im: "1"}); } expr = parseSupsub(expr); } expr = parsePostfix(expr, options); } else if (atom.type === 'textord') { // Note that 'textord' can also be operators, and are handled as such // in parseExpression() if (!isOperator(atom)) { // This doesn't look like a textord operator if (!Definitions.RIGHT_DELIM[atom.latex ? atom.latex.trim() : atom.body]) { // Not an operator, not a fence, it's a symbol or a function if (isFunction(val)) { // It's a function expr.ast = { fn: val }; expr = parseSupsub(expr, options); const fn = expr.ast; expr.index += 1; // Skip the function name fn.arg = [parsePrimary(expr, options).ast]; expr.ast = fn; expr = parsePostfix(expr, options); } else { // It was a symbol... expr.ast = atom.toAST(options); if (typeof atom.superscript === 'undefined') { expr.index += 1; } expr = parseSupsub(expr, options); expr = parsePostfix(expr, options); } } } } else if (atom.type === 'mop') { // Could be a function or an operator. if ((/^\\(mathop|operatorname|operatorname\*)/.test(atom.latex) || isFunction(val)) && !isOperator(atom)) { expr.ast = { fn: /^\\(mathop|operatorname|operatorname\*)/.test(atom.latex) ? atom.body : val}; expr = parseSupsub(expr, options); if (hasSup(expr.ast)) { // There was an exponent with the function. // This may be an inverse function const INVERSE_FUNCTION = { 'sin' : 'arcsin', 'cos': 'arccos', 'tan': 'arctan', 'cot': 'arccot', 'sec': 'arcsec', 'csc': 'arccsc', 'sinh': 'arsinh', 'cosh': 'arcosh', 'tanh': 'artanh', 'csch': 'arcsch', 'sech': 'arsech', 'coth': 'arcoth' }; if (asMachineNumber(expr.ast.sup) === -1 && INVERSE_FUNCTION[val]) { expr.ast = wrapFn(INVERSE_FUNCTION[val], parsePrimary(expr, options).ast); } else { // Keep the exponent, add the argument const fn = expr.ast; fn.arg = [parsePrimary(expr, options).ast]; expr.ast = fn; } } else { const fn = expr.ast; const arg = parsePrimary(expr, options).ast; if (arg && /^(list0|list|list2)$/.test(arg.fn)) { fn.arg = arg.arg; } else if (arg) { fn.arg = [arg] } expr.ast = fn; } } } else if (atom.type === 'array') { expr.index += 1; expr.ast = atom.toAST(options); } else if (atom.type === 'group') { expr.index += 1; expr.ast = atom.toAST(options); } else if (atom.type === 'mclose') { return expr; } else if (atom.type === 'error') { expr.index += 1; expr.ast = { error: atom.latex }; return expr; } if (expr.ast === undefined) { // Parse either a group of paren, and return their content as the result // or a pair of delimiters, and return them as a function applied // to their content, i.e. "|x|" -> {fn: "||", arg: "x"} const delim = parseDelim(expr, '(', ')', options) || parseDelim(expr, null, null, options); if (delim) { expr = delim; } else if (!isOperator(atom)) { // This is not an operator (if it is, it may be an operator // dealing with an empty lhs. It's possible. // Couldn't interpret the expression. Output an error. if (atom.type === 'placeholder') { // Default value for a placeholder is 0 // (except for the denominator of a 'genfrac') expr.ast = wrapNum(0); } else { expr.ast = {text: '?'}; expr.ast.error = 'Unexpected token ' + "'" + atom.type + "'"; if (atom.latex) { expr.ast.latex = atom.latex; } else if (atom.body && atom.toLatex) { expr.ast.latex = atom.toLatex(); } } expr.index += 1; // Skip the unexpected token, and attempt to continue } } atom = expr.atoms[expr.index]; if (atom && (atom.type === 'mord' || atom.type === 'surd' || atom.type === 'mop' || atom.type === 'mopen' || atom.type === 'sizeddelim' || atom.type === 'leftright')) { if (atom.type === 'sizeddelim') { for (const d in Definitions.RIGHT_DELIM) { if (atom.delim === Definitions.RIGHT_DELIM[d]) { // This is (most likely) a closing delim, exit. // There are ambiguous cases, for example |x|y|z|. expr.index += 1; return expr; } } } if ((atom.type === 'mord' || atom.type === 'textord' || atom.type === 'mop') && isOperator(atom)) { // It's actually an operator return expr; } const lhs = expr.ast; expr.ast = {}; expr = parsePrimary(expr, options); if (expr && expr.ast && lhs) { if (isFunction(lhs.fn) && typeof lhs.arg === 'undefined' || (Array.isArray(lhs.arg) && lhs.arg.length === 0)) { // A function with no arguments followed by a list -> // the list becomes the argument to the function if (expr.ast.fn === 'list2' || expr.ast.fn === 'list') { expr.ast = wrapFn(lhs.fn, expr.ast.arg); } else { // A function "f(x)" or "√x" followed by something else: // implicit multiply expr.ast = wrapFn('multiply', lhs, expr.ast); } } else { // Invisible times, e.g. '2x' if (expr.ast.fn === 'multiply') { expr.ast.arg.unshift(lhs); } else if (numberIm(lhs) === 0 && numberRe(lhs) !== 0 && numberIm(expr.ast) === 1 && numberRe(expr.ast) === 0) { // Imaginary number, i.e. "3i" expr.ast = wrapNum({im: numberRe(lhs).toString()}); } else { expr.ast = wrapFn('multiply', lhs, expr.ast); } } } else { expr.ast = lhs; } } return expr; } /** * Given an atom or an array of atoms, return their AST representation as * an object. * @param {object} expr An expressions, including expr.atoms, expr.index, * expr.minPrec the minimum precedence that this parser should parse * before returning; expr.lhs (optional); expr.ast, the resulting AST. * @return {object} the expr object, updated * @private */ function parseExpression(expr, options) { expr.index = expr.index || 0; expr.ast = undefined; if (expr.atoms.length === 0 || expr.index >= expr.atoms.length) return expr; expr.minPrec = expr.minPrec || 0; let lhs = parsePrimary(expr, options).ast; let done = false; const minPrec = expr.minPrec; while (!done) { const atom = expr.atoms[expr.index]; const digraph = parseDigraph(expr); done = !atom || atom.mode === 'text' || (!digraph && !isOperator(atom)); let prec, assoc; if (!done) { [prec, assoc] = digraph ? [getPrecedence(digraph), getAssociativity(digraph)] : opPrec(atom); done = prec < minPrec } if (!done) { const opName = digraph || getCanonicalName(getString(atom)); if (assoc === 'left') { expr.minPrec = prec + 1; } else { expr.minPrec = prec; } expr.index += 1; if (opName === '|') { if (typeof atom.subscript !== 'undefined' || (expr.atoms[expr.index] && typeof expr.atoms[expr.index].subscript !== 'undefined' && expr.atoms[expr.index].type === 'msubsup') ) { // Bind is a special function. It doesn't have a rhs, and // its argument is a subscript. expr.ast = {}; const sub_arg = parseSupsub(expr, options).ast.sub; lhs = wrapFn('bind', lhs); if (sub_arg && sub_arg.fn === 'equal' && lhs.arg) { // This is a subscript of the form "x=..." lhs.arg.push(getArg(sub_arg, 0)); lhs.arg.push(getArg(sub_arg, 1)); } else if (sub_arg && lhs.arg && (sub_arg.fn === 'list' || sub_arg.fn === 'list2')) { // Form: "x=0;n=3;z=5" let currentSym = {sym: "x"}; for (let i = 0; i < sub_arg.arg.length; i++) { if (sub_arg.arg[i].fn === 'equal') { currentSym = getArg(sub_arg.arg[i], 0); lhs.arg.push(currentSym); lhs.arg.push(getArg(sub_arg.arg[i], 1)); } else { lhs.arg.push(currentSym); lhs.arg.push(sub_arg.arg[i]); } } } else if (sub_arg) { // Default identifier if none provided lhs.arg.push({sym: "x"}); lhs.arg.push(sub_arg); } } else { // That was a "|", but not with a subscript after, so // it's the end of the expression, might be a right fence. done = true; } } else { const rhs = parseExpression(expr, options).ast; // Some operators (',' and ';' for example) convert into a function // even if there's only two arguments. They're super associative... let fn = SUPER_ASSOCIATIVE_FUNCTION[opName]; if (fn && lhs && lhs.fn !== fn) { // Only promote them if the lhs is not already the same function. // If it is, we'll combine it below. lhs = wrapFn(fn, lhs); } // Promote subtraction to an addition if (opName === '-') { if (lhs && lhs.arg && lhs.fn === 'add') { // add(x,y) - z -> add(x, y, -z) if (rhs !== undefined) lhs.arg.push(negate(rhs)); } else if (lhs && lhs.fn === 'subtract') { // x-y - z -> add(x, -y, -z) lhs = wrapFn('add', getArg(lhs, 0), negate(getArg(lhs, 1)), negate(rhs)); } else if (isNumber(lhs) && !hasSup(lhs) && isNumber(rhs) && !hasSup(rhs) && (typeof rhs.num.re === 'undefined' || rhs.num.re === '0') && typeof rhs.num.im !== 'undefined') { lhs = {num: { re: lhs.num, im: (-parseFloat(rhs.num.im)).toString() }}; } else { lhs = wrapFn('subtract', lhs, rhs); } } else { // Is there a function (e.g. 'add') implementing the // associative version of this operator (e.g. '+')? fn = ASSOCIATIVE_FUNCTION[opName]; if (fn === 'add' && lhs && lhs.fn === 'subtract') { // subtract(x, y) + z -> add(x, -y, z) lhs = wrapFn('add', getArg(lhs, 0),