UNPKG

mathlive

Version:

Render and edit beautifully typeset math

github.com/arnog/mathlive

1,202 lines (1,181 loc) • 55.4 kB

JavaScript

/** * @module core/parser * @private */ import Definitions from './definitions.js'; import Color from './color.js'; import FontMetrics from './fontMetrics.js'; import Lexer from './lexer.js'; import MathAtomModule from './mathAtom.js'; const MathAtom = MathAtomModule.MathAtom; /** * A parser transforms a list of tokens into a list of MathAtom. * * @param {Token[]} tokens - An array of tokens generated by the lexer. * @param {Object.<string>} [args] - An optional list of arguments. `#n` tokens will be * substituted with the corresponding element in the args object. This is used * when parsing macros. * @class module:core/parser#Parser * @property {Object.<string, string>} [macros] - Optional macro definitions. * @property {Token[]} tokens - An array of tokens generated by the lexer. * @property {Object.<string>} args - Optional arguments to substitute the `#` token. * @property {Object.<string, string>} macros - A dictionary of objects, index by the name of * the macro, with the following keys: * * args: an integer, the number of arguments, default 0. They can be referenced as #0, * #1, #2... inside the definition of the macro * * def: a string, the definition of the macro, which can reference other macros * @property {number} index - The current token to be parsed: index in `this.tokens` * @property {MathAtom[]} mathList - Accumulated result of the parsing by * `parseAtom()` * @property {object} style - The font, weight, color, etc.. to apply to the * upcoming tokens * @property {string} parseMode - The parse mode indicates the syntax rules to * use to parse the upcoming tokens. * Valid values include: * - `'math'`: spaces are ignored, math functions are allowed * - `'text'`: spaces are accounted for, math functions are ignored * - `'string'` * - `'color'`: color name, hex value: `'#fff'`, `'#a0a0a0'` * - `'number'`: `+/-12.56` * - `'dimen'`: `'25mu'`, `'2pt'` * - `'skip'`: `'25mu plus 2em minus fiLll'`, `'2pt'` * - `'colspec'`: formating of a column in tabular environment, e.g. `'r@{.}l'` * @property {boolean} tabularMode - When in tabular mode, `'&'` is interpreted as * a column separator and `'\'` as a row separator. Used for matrixes, etc... * @property {number} endCount - Counter to prevent deadlock. If `end()` is * called too many times (1,000) in a row for the same token, bail. * @private */ class Parser { constructor(tokens, args, macros) { this.tokens = tokens; this.index = 0; this.args = args; this.macros = macros; this.mathList = []; this.style = {}; this.parseMode = 'math'; this.tabularMode = false; this.endCount = 0; } swapMathList(newMathList) { const result = this.mathList; this.mathList = newMathList || []; return result; } swapParseMode(mode) { const result = this.parseMode; this.parseMode = mode; return result; } /** * True if we've reached the end of the token stream. * @method module:core/parser#Parser#end * @private */ end() { // To prevent a deadlock, count how many times end() is called without the // index advancing. If it happens more than 1,000 times in a row, // assume something is broken and pretend the stream is finished. this.endCount++; return this.index >= this.tokens.length || this.endCount > 1000; } get() { this.endCount = 0; return this.index < this.tokens.length ? this.tokens[this.index++] : null; } peek(offset) { const index = this.index + (offset ? offset : 0); return index < this.tokens.length ? this.tokens[index] : null; } /** * Return the last atom of the math list * If there isn't one, insert a `msubsup` and return it. * @method module:core/parser#Parser#lastMathAtom * @private */ lastMathAtom() { const lastType = this.mathList.length === 0 ? 'none' : this.mathList[this.mathList.length - 1].type; if (lastType !== 'mop' && lastType !== 'msubsup') { // ZERO WIDTH SPACE const lastAtom = new MathAtom(this.parseMode, 'msubsup', '\u200b'); lastAtom.attributes = { 'aria-hidden': true }; this.mathList.push(lastAtom); } return this.mathList[this.mathList.length - 1]; } /** * @param {string} type * @return {boolean} True if the next token is of the specified type * @method module:core/parser#Parser#hasToken * @private */ hasToken(type) { const index = this.index; return index < this.tokens.length ? this.tokens[index].type === type : false; } /** * @param {string} [value] * @return {boolean} True if the next token is of type `'literal` and has the * specified value. If `value` is empty, return true if the token is of type * `'literal'` * @method Parser#hasLiteral * @private */ hasLiteral(value) { const index = this.index; return index < this.tokens.length ? this.tokens[index].type === 'literal' && (!value || this.tokens[index].value === value) : false; } /** * @param {RegEx} pattern * @return {boolean} True if the next token is of type `'literal` and matches * the specified regular expression pattern. * @method module:core/parser#Parser#hasLiteralPattern * @private */ hasLiteralPattern(pattern) { return this.hasToken('literal') && pattern.test(this.tokens[this.index].value); } hasCommand(command) { console.assert(command === '\\' || command.charAt(0) !== '\\', 'hasCommand() does not require a \\'); const index = this.index; return index < this.tokens.length ? this.tokens[index].type === 'command' && this.tokens[index].value === command : false; } hasInfixCommand() { const index = this.index; if (index < this.tokens.length && this.tokens[index].type === 'command') { const info = Definitions.getInfo('\\' + this.tokens[index].value, this.parseMode, this.macros); return info && info.infix; } return false; } hasColumnSeparator() { const index = this.index; return this.tabularMode && index < this.tokens.length ? this.tokens[index].type === 'literal' && this.tokens[index].value === '&' : false; } hasRowSeparator() { const index = this.index; return this.tabularMode && index < this.tokens.length ? this.tokens[index].type === 'command' && (this.tokens[index].value === '\\' || this.tokens[index].value === 'cr') : false; } parseColumnSeparator() { if (this.hasColumnSeparator()) { this.index++; return true; } return false; } /* * Return the appropriate value for a placeholder, either a default * one, or if a value was provided for #? via args, that value. */ placeholder() { if (this.args && typeof this.args['?'] === 'string') { // If there is a specific value defined for the placeholder, // use it. return parseTokens(Lexer.tokenize(this.args['?']), this.parseMode, null, this.macros); } // U+2753 = BLACK QUESTION MARK ORNAMENT const result = new MathAtom(this.parseMode, 'placeholder', '?', this.style); result.captureSelection = true; return [result]; } hasImplicitCommand(commands) { if (this.index < this.tokens.length) { const token = this.tokens[this.index]; if (token.type === 'command') { return commands.includes(token.value); } } return false; } parseRowSeparator() { if (this.hasRowSeparator()) { this.index++; return true; } return false; } /** * @param {string} type * @method module:core/parser#Parser#parseToken * @private */ parseToken(type) { if (this.hasToken(type)) { this.index++; return true; } return false; } skipWhitespace() { let found = false; while (this.hasToken('space')) { this.index++; found = true; } return found; } skipUntilToken(type) { while (!this.end() && !this.parseToken(type)) { this.get(); } } parseCommand(command) { if (this.hasCommand(command)) { this.index++; return true; } return false; } parseLiteral(literal) { if (this.hasLiteral(literal)) { this.index++; return true; } return false; } parseFiller() { let skipped = false; let done = false; do { const skippedSpace = this.skipWhitespace(); const skippedRelax = this.parseCommand('relax'); skipped = skipped || skippedSpace || skippedRelax; done = !skippedSpace && !skippedRelax; } while (!done); return skipped; } /** * Keywords are used to specify dimensions, and for various other * syntactic constructs. Unlike commands, they are not case sensitive. * There are 25 keywords: * at by bp cc cm dd depth em ex fil fill filll height in minus * mm mu pc plus pt sp spread to true width * * TeX: 8212 * @param {string} keyword * @return {boolean} true if the expected keyword is present * @method module:core/parser#Parser#parseKeyword * @private */ parseKeyword(keyword) { const savedIndex = this.index; let done = this.end(); let value = ''; while (!done) { const token = this.get(); if (token.type === 'literal') { value += token.value; } done = this.end() || token.type !== 'literal' || value.length >= keyword.length; } const hasKeyword = keyword.toUpperCase() === value.toUpperCase(); if (!hasKeyword) { this.index = savedIndex; } return hasKeyword; } /** * Return a sequence of characters as a string. * i.e. 'abcd' returns 'abcd'. * Terminates on the first non-character encountered * e.g. '{', '}' etc... * Will also terminate on ']' * @return {string} * @method module:core/parser#Parser#scanString * @private */ scanString() { let result = ''; let done = this.end(); while (!done) { if (this.hasLiteral(']')) { done = true; } else if (this.hasToken('literal')) { result += this.get().value; } else if (this.skipWhitespace()) { result += ' '; } else if (this.hasToken('command')) { const token = this.get(); if (token.value === 'space') { // The 'space' command is the ~ // which can be used for example in operator names, i.e. // \operatorname{lim~inf}. It's interpreted as a nbs result += '\u00a0'; // NO-BREAK SPACE } else { // TeX will give a 'Missing \endcsname inserted' error // if it encounters any command when expecting a string. // We're a bit more lax. result += token.value; } } else { done = true; } done = done || this.end(); } return result; } /** * Return a CSS color (#rrggbb) * @method module:core/parser#Parser#scanColor * @private */ scanColor() { return Color.stringToColor(this.scanString()); } /** * Return as a number a group of characters representing a * numerical quantity. * * From TeX:8695 (scan_int): * An integer number can be preceded by any number of spaces and `\.+' or * `\.-' signs. Then comes either a decimal constant (i.e., radix 10), an * octal constant (i.e., radix 8, preceded by~\.\'), a hexadecimal constant * (radix 16, preceded by~\."), an alphabetic constant (preceded by~\.\`), or * an internal variable. * @return {number} * @method Parser#scanNumber * @private */ scanNumber(isInteger) { const negative = this.parseLiteral('-'); // Optional (ignorable) '+' sign if (!negative) this.parseLiteral('+'); this.skipWhitespace(); isInteger = !!isInteger; let radix = 10; let digits = /[0-9]/; if (this.parseLiteral("'")) { // Apostrophe indicates an octal value radix = 8; digits = /[0-7]/; isInteger = true; } else if (this.parseLiteral('"') || this.parseLiteral('x')) { // Double-quote indicates a hex value // The 'x' prefix notation for the hexadecimal numbers is a MathJax extension. // For example: 'x3a' radix = 16; // Hex digits have to be upper-case digits = /[0-9A-F]/; isInteger = true; } let value = ''; while (this.hasLiteralPattern(digits)) { value += this.get().value; } // Parse the fractional part, if applicable if (!isInteger && (this.parseLiteral('.') || this.parseLiteral(','))) { value += '.'; while (this.hasLiteralPattern(digits)) { value += this.get().value; } } const result = isInteger ? parseInt(value, radix) : parseFloat(value); return negative ? -result : result; } /** * Return as a floating point number a dimension in pt (1 em = 10 pt) * * See TeX:8831 * @todo: note that some units depend on the font (em, ex). So it might be * better to return a dimen struct with the value + unit and resolve * later when we have a font context.... * @return {number} * @method module:core/parser#Parser#scanDimen * @private */ scanDimen() { const value = this.scanNumber(false); this.skipWhitespace(); let result; if (this.parseKeyword('pt')) { result = FontMetrics.toEm(value, 'pt'); } else if (this.parseKeyword('mm')) { result = FontMetrics.toEm(value, 'mm'); } else if (this.parseKeyword('cm')) { result = FontMetrics.toEm(value, 'cm'); } else if (this.parseKeyword('ex')) { result = FontMetrics.toEm(value, 'ex'); } else if (this.parseKeyword('px')) { result = FontMetrics.toEm(value, 'px'); } else if (this.parseKeyword('em')) { result = FontMetrics.toEm(value, 'em'); } else if (this.parseKeyword('bp')) { result = FontMetrics.toEm(value, 'bp'); } else if (this.parseKeyword('dd')) { result = FontMetrics.toEm(value, 'dd'); } else if (this.parseKeyword('pc')) { result = FontMetrics.toEm(value, 'pc'); } else if (this.parseKeyword('in')) { result = FontMetrics.toEm(value, 'in'); } else if (this.parseKeyword('mu')) { result = FontMetrics.toEm(value, 'mu'); } else { // If the units are missing, TeX assumes 'pt' result = FontMetrics.toEm(value, 'pt'); } return result; } scanSkip() { const result = this.scanDimen(); // We parse, but ignore the optional 'plus' and 'minus' // arguments. this.skipWhitespace(); // 'plus', optionally followed by 'minus' // ('minus' cannot come before 'plus') // dimen or 'hfill' if (this.parseKeyword('plus')) { // @todo there could also be a \hFilLlL command here this.scanDimen(); } this.skipWhitespace(); if (this.parseKeyword('minus')) { // @todo there could also be a \hFilLlL command here this.scanDimen(); } return result; } scanColspec() { this.skipWhitespace(); const result = []; while (!this.end() && !(this.hasToken('}') || this.hasLiteral(']'))) { if (this.hasLiteral()) { const literal = this.get().value; if ('lcr'.includes(literal)) { result.push({ align: literal }); } else if (literal === '|') { result.push({ rule: true }); } else if (literal === '@') { if (this.parseToken('{')) { const savedParsemode = this.swapParseMode('math'); result.push({ gap: this.scanImplicitGroup(token => token.type === '}') }); this.swapParseMode(savedParsemode); } this.parseToken('}'); } } } return result; } /** * Parse a `\(...\)` or `\[...\]` sequence * @return {MathAtom} group for the sequence or null * @method module:core/parser#Parser#scanModeSet * @private */ scanModeSet() { let final; if (this.parseCommand('(')) final = ')'; if (!final && this.parseCommand('[')) final = ']'; if (!final) return null; const savedParsemode = this.swapParseMode('math'); const result = new MathAtom('math', 'group'); result.mathstyle = final === ')' ? 'textstyle' : 'displaystyle'; result.body = this.scanImplicitGroup(token => token.type === 'command' && token.value === final); this.parseCommand(final); this.swapParseMode(savedParsemode); if (!result.body || result.body.length === 0) return null; return result; } /** * Parse a `$...$` or `$$...$$` sequence * @method Parser#scanModeShift * @private */ scanModeShift() { if (!this.hasToken('$') && !this.hasToken('$$')) return null; const final = this.get().type; const result = new MathAtom('math', 'group'); result.mathstyle = final === '$' ? 'textstyle' : 'displaystyle'; result.latexOpen = result.mathstyle === 'textstyle' ? '$' : '$$'; result.latexClose = result.latexOpen; const savedParsemode = this.swapParseMode('math'); result.body = this.scanImplicitGroup(token => token.type === final); this.parseToken(final); this.swapParseMode(savedParsemode); if (!result.body || result.body.length === 0) return null; return result; } /** * Parse a \begin{env}...\end{end} sequence * @method module:core/parser#Parser#scanEnvironment * @private */ scanEnvironment() { // An environment starts with a \begin command if (!this.parseCommand('begin')) return null; // The \begin command is immediately followed by the environment // name, as a string argument const envName = this.scanArg('string'); const env = Definitions.getEnvironmentInfo(envName); // If the environment has some arguments, parse them const args = []; if (env && env.params) { for (const param of env.params) { // Parse an argument if (param.optional) { // If it's not present, return the default argument value const arg = this.scanOptionalArg(param.type); // args.push(arg ? arg : param.defaultValue); @todo defaultvalue args.push(arg); } else { // If it's not present, scanArg returns null, // but push it on the list of arguments anyway. // The null value will be interpreted as unspecified // optional value by the command parse function. args.push(this.scanArg(param.type)); } } } // Some environments change the mode const savedMode = this.parseMode; const savedTabularMode = this.tabularMode; const savedMathList = this.swapMathList([]); // @todo: since calling scanImplicitGroup(), may not need to save/restore the mathlist this.tabularMode = env.tabular; const array = []; const rowGaps = []; let row = []; let done = false; do { done = this.end(); if (!done && this.parseCommand('end')) { done = this.scanArg('string') === envName; } if (!done) { if (this.parseColumnSeparator()) { row.push(this.swapMathList([])); } else if (this.parseRowSeparator()) { row.push(this.swapMathList([])); let gap = 0; this.skipWhitespace(); if (this.parseLiteral('[')) { gap = this.scanDimen(); this.skipWhitespace(); this.parseLiteral(']'); } rowGaps.push(gap || 0); array.push(row); row = []; } else { this.mathList = this.mathList.concat(this.scanImplicitGroup()); } } } while (!done); row.push(this.swapMathList([])); if (row.length > 0) array.push(row); const newMathList = this.swapMathList(savedMathList); // If we're in tabular mode, we should end up with an empty mathlist console.assert(!this.tabularMode || newMathList.length === 0, 'Leftover atoms in tabular mode'); this.parseMode = savedMode; this.tabularMode = savedTabularMode; if (!env.tabular && newMathList.length === 0) return null; if (env.tabular && array.length === 0) return null; const result = new MathAtom(this.parseMode, 'array', newMathList, env.parser ? env.parser(envName, args, array) : {}); result.array = array; result.rowGaps = rowGaps; result.env = {...env}; result.env.name = envName; return result; } /** * Parse a sequence terminated with a group end marker, such as * `}`, `\end`, `&`, etc... * * Returns an array of atoms or an empty array if the sequence * terminates right away. * * @param {function(Token):boolean} [done] A predicate indicating if a * token signals the end of an implicit group * @return {MathAtom[]} * @method module:core/parser#Parser#scanImplicitGroup * @private */ scanImplicitGroup(done) { // An implicit group is a sequence of atoms that terminates with // a `'}'`, `'&'`, `'\'`, `'\cr'` or `'\end'` or the end of the stream const savedStyle = this.style; if (!done) { done = token => token.type === '}' || (token.type === 'literal' && token.value === '&') || (token.type === 'command' && /^(end|cr|\\)$/.test(token.value)); } // To handle infix operators, we'll keep track of their prefix // (tokens coming before them) let infix = null; // A token let prefix = null; // A mathlist const savedMathlist = this.swapMathList([]); // if (this.index >= this.tokens.length) return true; // const token = this.tokens[this.index]; while (!this.end() && !done(this.peek())) { if (this.hasImplicitCommand(['displaystyle', 'textstyle', 'scriptstyle', 'scriptscriptstyle'])) { // Implicit math style commands such as \displaystyle, \textstyle... // Note these commands switch to math mode and a specific size // \textsize is the mathstyle used for inlinemath, not for text this.parseMode = 'math'; const atom = new MathAtom('math', 'mathstyle'); atom.mathstyle = this.get().value; this.mathList.push(atom); } else if (this.hasInfixCommand() && !infix) { // The next token is an infix and we have not seen one yet // (there can be only one infix command per implicit group). infix = this.get(); // Save the math list so far and start a new one prefix = this.swapMathList([]); } else { this.parseAtom(); } } let result; if (infix) { const suffix = this.swapMathList(savedMathlist); // The current parseMode, this.parseMode, may no longer have the value // it had when we encountered the infix. However, since all infix are // only defined in 'math' mode, we can use the 'math' constant // for the parseMode const info = Definitions.getInfo('\\' + infix.value, 'math', this.macros); if (info) { result = [new MathAtom(this.parseMode, info.type, info.value || infix.value, // Functions don't have info.parse ? info.parse('\\' + infix.value, [prefix, suffix]) : null)]; } else { result = [new MathAtom(this.parseMode, 'mop', infix.value)]; } } else { result = this.swapMathList(savedMathlist); } this.style = savedStyle; return result; } /** * Parse a group enclosed in a pair of braces: `{...}`. * * Return either a group MathAtom or null if not a group. * * Return a group MathAtom with an empty body if an empty * group (i.e. `{}`). * @return {MathAtom} * @method module:core/parser#Parser#scanGroup * @private */ scanGroup() { if (!this.parseToken('{')) return null; const result = new MathAtom(this.parseMode, 'group'); result.body = this.scanImplicitGroup(token => token.type === '}'); this.parseToken('}'); result.latexOpen = '{'; result.latexClose = '}'; return result; } scanSmartFence() { this.skipWhitespace(); if (!this.parseLiteral('(')) return null; // We've found an open paren... Convert to a `\mleft...\mright` const result = new MathAtom(this.parseMode, 'leftright'); result.leftDelim = '('; result.inner = false; // It's a `\mleft`, not a `\left` const savedMathList = this.swapMathList([]); let nestLevel = 1; while (!this.end() && nestLevel !== 0) { if (this.hasLiteral('(')) nestLevel += 1; if (this.hasLiteral(')')) nestLevel -= 1; if (nestLevel !== 0) this.parseAtom(); } if (nestLevel === 0) this.parseLiteral(')'); result.rightDelim = nestLevel === 0 ? ')' : '?'; result.body = this.swapMathList(savedMathList); return result; } /** * Scan a delimiter, e.g. '(', '|', '\vert', '\ulcorner' * * @return {string} The delimiter (as a character or command) or null * @memberof Parser * @method module:core/parser#Parser#scanDelim * @private */ scanDelim() { this.skipWhitespace(); const token = this.get(); if (!token) return null; let delim = '.'; if (token.type === 'command') { delim = '\\' + token.value; } else if (token.type === 'literal') { delim = token.value; } const info = Definitions.getInfo(delim, 'math', this.macros); if (!info) return null; if (info.type === 'mopen' || info.type === 'mclose') { return delim; } // Some symbols are not of type mopen/mclose, but are still // valid delimiters... // '?' is a special delimiter used as a 'placeholder' // (when the closing delimiter is displayed greyed out) if (/^(\?|\||<|>|\\vert|\\Vert|\\\||\\surd|\\uparrow|\\downarrow|\\Uparrow|\\Downarrow|\\updownarrow|\\Updownarrow|\\mid|\\mvert|\\mVert)$/.test(delim)) { return delim; } return null; } /** * Parse a `/left.../right` sequence. * * Note: the `/middle` command can occur multiple times inside a * `/left.../right` sequence, and is handled separately. * * Return either an atom of type `'leftright'` or null * @return {MathAtom} * @method module:core/parser#Parser#scanLeftRight * @private */ scanLeftRight() { if (this.parseCommand('right') || this.parseCommand('mright')) { // We have an unbalanced left/right (there's a \right, but no \left) const result = new MathAtom(this.parseMode, 'leftright'); result.rightDelim = this.scanDelim() || '.'; return result; } const savedStyle = this.style; let close = 'right'; if (!this.parseCommand('left')) { if (!this.parseCommand('mleft')) return null; close = 'mright'; } const leftDelim = this.scanDelim() || '.'; const savedMathList = this.swapMathList([]); while (!this.end() && !this.parseCommand(close)) { this.parseAtom(); } this.style = savedStyle; // If we've reached the end and there was no `\right` or // there isn't a valid delimiter after `\right`, we'll // consider the `\right` missing and set the `rightDelim` to undefined const rightDelim = this.scanDelim(); const result = new MathAtom(this.parseMode, 'leftright'); result.leftDelim = leftDelim; result.rightDelim = rightDelim; result.inner = close === 'right'; result.body = this.swapMathList(savedMathList); return result; } /** * Parse a subscript/superscript: `^` and `_`. * * Modify the last atom accordingly. * * @return {MathAtom} * @method module:core/parser#Parser#parseSupSub * @private */ parseSupSub() { // No sup/sub in text or command mode. if (this.parseMode !== 'math') return false; // Apply the subscript/superscript to the last render atom. // If none is present (beginning of the mathlist, i.e. `{^2}`, // an empty atom will be created, equivalent to `{{}^2}` let result = false; while (this.hasLiteral('^') || this.hasLiteral('_') || this.hasLiteral("'")) { let supsub; if (this.hasLiteral('^')) { supsub = 'superscript'; } else if (this.hasLiteral('_')) { supsub = 'subscript'; } if (this.parseLiteral('^') || this.parseLiteral('_')) { const arg = this.scanArg(); if (arg) { const atom = this.lastMathAtom(); atom[supsub] = atom[supsub] || []; atom[supsub] = atom[supsub].concat(arg); result = true; } } else if (this.parseLiteral("'")) { // A single quote (prime) is actually equivalent to a // '^{\prime}' const atom = this.lastMathAtom(); atom.superscript = atom.superscript || []; atom.superscript.push(new MathAtom(atom.parseMode, 'mord', '\u2032')); result = true; } } return result; } /** * Parse a `\limits` or `\nolimits` command. * * This will change the placement of limits to be either above or below * (if `\limits`) or in the superscript/subscript position (if `\nolimits`). * * This overrides the calculation made for the placement, which is usually * dependent on the displaystyle (`inlinemath` prefers `\nolimits`, while * `displaymath` prefers `\limits`). * @method module:core/parser#Parser#parseLimits * @private */ parseLimits() { // Note: technically, \limits and \nolimits are only applicable // after an operator. However, we apply them in all cases. They // will simply be ignored when not applicable (i.e. on a literal) // which is actually consistent with TeX. if (this.parseCommand('limits')) { const lastAtom = this.lastMathAtom(); lastAtom.limits = 'limits'; // Record that the limits was set through an explicit command // so we can generate the appropriate LaTeX later lastAtom.explicitLimits = true; return true; } if (this.parseCommand('nolimits')) { const lastAtom = this.lastMathAtom(); lastAtom.limits = 'nolimits'; // Record that the limits was set through an explicit command // so we can generate the appropriate LaTeX later lastAtom.explicitLimits = true; return true; } return false; } scanOptionalArg(parseMode) { parseMode = (!parseMode || parseMode === 'auto') ? this.parseMode : parseMode; this.skipWhitespace(); if (!this.parseLiteral('[')) return null; const savedParseMode = this.parseMode; this.parseMode = parseMode; const savedMathlist = this.swapMathList(); let result; while (!this.end() && !this.parseLiteral(']')) { if (parseMode === 'string') { result = this.scanString(); } else if (parseMode === 'number') { result = this.scanNumber(); } else if (parseMode === 'dimen') { result = this.scanDimen(); } else if (parseMode === 'skip') { result = this.scanSkip(); } else if (parseMode === 'colspec') { result = this.scanColspec(); } else if (parseMode === 'color') { result = this.scanColor() || '#ffffff'; } else if (parseMode === 'bbox') { // The \bbox command takes a very particular argument: // a comma delimited list of up to three arguments: // a color, a dimension and a string. // Split the string by comma delimited sub-strings, ignoring commas // that may be inside (). For example"x, rgb(a, b, c)" would return // ['x', 'rgb(a, b, c)'] const list = this.scanString().toLowerCase().trim().split(/,(?![^(]*\)(?:(?:[^(]*\)){2})*[^"]*$)/); for (const elem of list) { const color = Color.stringToColor(elem); if (color) { result = result || {}; result.backgroundcolor = color; } else { const m = elem.match(/^\s*([0-9.]+)\s*([a-z][a-z])/); if (m) { result = result || {}; result.padding = FontMetrics.toEm(m[1], m[2]); } else { const m = elem.match(/^\s*border\s*:\s*(.*)/); if (m) { result = result || {}; result.border = m[1]; } } } } } else { console.assert(parseMode === 'math', 'Unexpected parse mode: "' + parseMode + '"'); this.mathList = this.mathList.concat(this.scanImplicitGroup(token => token.type === 'literal' && token.value === ']')); } } this.parseMode = savedParseMode; const mathList = this.swapMathList(savedMathlist); return result ? result : mathList; } /** * Parse a math field, an argument to a function. * * An argument can either be a single atom or * a sequence of atoms enclosed in braces. * * @param {string} [parseMode] Temporarily overrides the parser parsemode. For * example: `'dimension'`, `'color'`, `'text'`, etc... * @method module:core/parser#Parser#scanArg * @private */ scanArg(parseMode) { parseMode = (!parseMode || parseMode === 'auto') ? this.parseMode : parseMode; this.parseFiller(); let result; // An argument (which is called a 'math field' in TeX) // could be a single character or symbol, as in `\frac12` // Note that ``\frac\sqrt{-1}\alpha\beta`` is equivalent to // ``\frac{\sqrt}{-1}{\beta}`` if (!this.parseToken('{')) { if (parseMode === 'delim') { return this.scanDelim() || '.'; } else if (/^(math|text)$/.test(parseMode)) { // Parse a single atom. const savedParseMode = this.parseMode; this.parseMode = parseMode; const atom = this.scanToken(); this.parseMode = savedParseMode; if (Array.isArray(atom)) return atom; return atom ? [atom] : null; } } // If this is a param token, substitute it with the // (optional) argument passed to the parser if (this.hasToken('#')) { const paramToken = this.get(); this.skipUntilToken('}'); if (paramToken.value === '?') { return this.placeholder(); } if (this.args) { if (this.args[paramToken.value] === undefined && this.args['?'] !== undefined) { return this.placeholder(); } return this.args[paramToken.value] || null; } return null; } const savedParseMode = this.parseMode; this.parseMode = parseMode; const savedMathList = this.swapMathList([]); if (parseMode === 'string') { result = this.scanString(); this.skipUntilToken('}'); } else if (parseMode === 'number') { result = this.scanNumber(); this.skipUntilToken('}'); } else if (parseMode === 'dimen') { result = this.scanDimen(); this.skipUntilToken('}'); } else if (parseMode === 'skip') { result = this.scanSkip(); this.skipUntilToken('}'); } else if (parseMode === 'colspec') { result = this.scanColspec(); this.skipUntilToken('}'); } else if (parseMode === 'color') { result = this.scanColor() || '#ffffff'; this.skipUntilToken('}'); } else if (parseMode === 'delim') { result = this.scanDelim() || '.'; this.skipUntilToken('}'); } else { console.assert(/^(math|text)$/.test(parseMode), 'Unexpected parse mode: "' + parseMode + '"'); do { this.mathList = this.mathList.concat(this.scanImplicitGroup()); } while (!this.parseToken('}') && !this.end()); } this.parseMode = savedParseMode; const mathList = this.swapMathList(savedMathList); return result ? result : mathList; } /** * @return {MathAtom[]|MathAtom} * @method module:core/parser#Parser#scanToken * @private */ scanToken() { const token = this.get(); if (!token) return null; let result = null; if (token.type === 'space') { if (this.parseMode === 'text') { result = new MathAtom('text', '', ' ', this.style); } } else if (token.type === 'placeholder') { // RENDER PLACEHOLDER result = new MathAtom(this.parseMode, 'placeholder', token.value); result.captureSelection = true; } else if (token.type === 'command') { // RENDER COMMAND if (token.value === 'placeholder') { result = new MathAtom(this.parseMode, 'placeholder', this.scanArg('string'), this.style); result.captureSelection = true; } else if (token.value === 'char') { // \char has a special syntax and requires a non-braced integer // argument let codepoint = Math.floor(this.scanNumber(true)); if (!isFinite(codepoint) || codepoint < 0 || codepoint > 0x10FFFF) { codepoint = 0x2753; // BLACK QUESTION MARK } result = new MathAtom(this.parseMode, this.parseMode === 'math' ? 'mord' : '', String.fromCodePoint(codepoint)); result.latex = '{\\char"' + ('000000' + codepoint.toString(16)).toUpperCase().substr(-6) + '}'; } else if (token.value === 'hskip' || token.value === 'kern') { // \hskip and \kern have a special syntax and requires a non-braced // 'skip' argument const width = this.scanSkip(); if (isFinite(width)) { result = new MathAtom(this.parseMode, 'spacing', null, this.style); result.width = width; } result .latex = '\\' + token.value; } else { result = this.scanMacro(token.value); if (!result) { const info = Definitions.getInfo('\\' + token.value, this.parseMode, this.macros); const args = []; let argString = ''; // Parse the arguments // let mandatoryParamsCount = 0; // If explicitGroup is not empty, an explicit group is expected // to follow the command and will be parsed *after* the // command has been processed. // This is used for commands such as \textcolor{color}{content} // that need to apply the color to the content *after* the // style has been changed. // In definitions, this is indicated with a parameter type // of 'auto*' let explicitGroup = ''; if (info && info.params) { for (const param of info.params) { // Parse an argument if (param.optional) { // If it's not present, return the default argument value const arg = this.scanOptionalArg(param.type); // args.push(arg ? arg : param.defaultValue); @todo defaultvalue args.push(arg); } else if (param.type.endsWith('*')) { explicitGroup = param.type.slice(0, -1); } else { // mandatoryParamsCount += 1; // If it's not present, scanArg returns null. // Add a placeholder instead. const arg = this.scanArg(param.type); if (arg && arg.length === 1 && arg[0].type === 'placeholder' && param.placeholder) { arg[0].value = param.placeholder; } if (arg) { args.push(arg); } else if (param.placeholder) { const placeholder = new MathAtom(this.parseMode, 'placeholder', param.placeholder); placeholder.captureSelection = true; args.push([placeholder]); } else { args.push(this.placeholder()); } if (param.type !== 'math' && typeof arg === 'string') { argString += arg } } } } if (info && !info.infix) { // Infix commands should be handled in scanImplicitGroup // If we find an infix command here, it's a syntax error // (second infix command in an implicit group) and should be ignored. // Create the MathAtom. // If a parse function is present, invoke it with the arguments, // and pass the result to be appended by the constructor. if (info.parse) { const attributes = info.parse('\\' + token.value, args); if (!attributes.type) { // No type provided -> the parse function will modify // the current style rather than create a new Atom. const savedMode = this.parseMode; if (attributes.mode) { // Change to 'text' (or 'math') mode if necessary this.parseMode = attributes.mode; delete attributes.mode; } // If an explicit group is expected, process it now if (explicitGroup) { // Create a temporary style const saveStyle = this.style; this.style = {...this.style, ...attributes}; result = this.scanArg(explicitGroup); this.style = saveStyle; } else { // Merge the new style info with the current style this.style = {...this.style, ...attributes}; } this.parseMode = savedMode; } else { result = new MathAtom(this.parseMode, info.type, explicitGroup ? this.scanArg(explicitGroup) : null, {...this.style, ...attributes}); } } else { const style = {...this.style}; if (info.baseFontFamily) style.baseFontFamily = info.baseFontFamily; result = new MathAtom(this.parseMode, info.type || 'mop', info.value || token.value, style); if (info.skipBoundary) { result.skipBoundary = true; } } if (result && !/^(llap|rlap|class|cssId)$/.test(token.value)) { result.latex = '\\' + token.value; if (argString /*|| mandatoryParamsCount > 0*/) { result.latex += '{' + argString + '}' } if (result.isFunction && this.smartFence) { // The atom was a function that may be followed by // an argument, like `\sin(` const smartFence = this.scanSmartFence(); if (smartFence) { result = [result, smartFence]; } } } } if (!info) { // An unknown command result = new MathAtom(this.parseMode, 'error', '\\' + token.value); result.latex = '\\' + token.value; } } } } else if (token.type === 'literal') { const info = Definitions.getInfo(token.value, this.parseMode, this.macros); if (info) { const style = {...this.style}; if (info.baseFontFamily) style.baseFontFamily = info.baseFontFamily; result = new MathAtom(this.parseMode, info.type, info.value || token.value, style); if (info.isFunction) { result.isFunction = true; } } else { result = new MathAtom(this.parseMode, this.parseMode === 'math' ? 'mord' : '', token.value, this.style); } result.latex = Definitions.matchCodepoint(this.parseMode, token.value.codePointAt(0)); if (info && info.isFunction && this.smartFence) { // The atom was a function that may be followed by // an argument, like `f(`.