mathlive
Version:
Render and edit beautifully typeset math
1,202 lines (1,181 loc) • 55.4 kB
JavaScript
/**
* @module core/parser
* @private
*/
import Definitions from './definitions.js';
import Color from './color.js';
import FontMetrics from './fontMetrics.js';
import Lexer from './lexer.js';
import MathAtomModule from './mathAtom.js';
const MathAtom = MathAtomModule.MathAtom;
/**
* A parser transforms a list of tokens into a list of MathAtom.
*
* @param {Token[]} tokens - An array of tokens generated by the lexer.
* @param {Object.<string>} [args] - An optional list of arguments. `#n` tokens will be
* substituted with the corresponding element in the args object. This is used
* when parsing macros.
* @class module:core/parser#Parser
* @property {Object.<string, string>} [macros] - Optional macro definitions.
* @property {Token[]} tokens - An array of tokens generated by the lexer.
* @property {Object.<string>} args - Optional arguments to substitute the `#` token.
* @property {Object.<string, string>} macros - A dictionary of objects, index by the name of
* the macro, with the following keys:
* * args: an integer, the number of arguments, default 0. They can be referenced as #0,
* #1, #2... inside the definition of the macro
* * def: a string, the definition of the macro, which can reference other macros
* @property {number} index - The current token to be parsed: index in `this.tokens`
* @property {MathAtom[]} mathList - Accumulated result of the parsing by
* `parseAtom()`
* @property {object} style - The font, weight, color, etc.. to apply to the
* upcoming tokens
* @property {string} parseMode - The parse mode indicates the syntax rules to
* use to parse the upcoming tokens.
* Valid values include:
* - `'math'`: spaces are ignored, math functions are allowed
* - `'text'`: spaces are accounted for, math functions are ignored
* - `'string'`
* - `'color'`: color name, hex value: `'#fff'`, `'#a0a0a0'`
* - `'number'`: `+/-12.56`
* - `'dimen'`: `'25mu'`, `'2pt'`
* - `'skip'`: `'25mu plus 2em minus fiLll'`, `'2pt'`
* - `'colspec'`: formating of a column in tabular environment, e.g. `'r@{.}l'`
* @property {boolean} tabularMode - When in tabular mode, `'&'` is interpreted as
* a column separator and `'\'` as a row separator. Used for matrixes, etc...
* @property {number} endCount - Counter to prevent deadlock. If `end()` is
* called too many times (1,000) in a row for the same token, bail.
* @private
*/
class Parser {
constructor(tokens, args, macros) {
this.tokens = tokens;
this.index = 0;
this.args = args;
this.macros = macros;
this.mathList = [];
this.style = {};
this.parseMode = 'math';
this.tabularMode = false;
this.endCount = 0;
}
swapMathList(newMathList) {
const result = this.mathList;
this.mathList = newMathList || [];
return result;
}
swapParseMode(mode) {
const result = this.parseMode;
this.parseMode = mode;
return result;
}
/**
* True if we've reached the end of the token stream.
* @method module:core/parser#Parser#end
* @private
*/
end() {
// To prevent a deadlock, count how many times end() is called without the
// index advancing. If it happens more than 1,000 times in a row,
// assume something is broken and pretend the stream is finished.
this.endCount++;
return this.index >= this.tokens.length || this.endCount > 1000;
}
get() {
this.endCount = 0;
return this.index < this.tokens.length ? this.tokens[this.index++] : null;
}
peek(offset) {
const index = this.index + (offset ? offset : 0);
return index < this.tokens.length ? this.tokens[index] : null;
}
/**
* Return the last atom of the math list
* If there isn't one, insert a `msubsup` and return it.
* @method module:core/parser#Parser#lastMathAtom
* @private
*/
lastMathAtom() {
const lastType = this.mathList.length === 0 ? 'none' :
this.mathList[this.mathList.length - 1].type;
if (lastType !== 'mop' && lastType !== 'msubsup') {
// ZERO WIDTH SPACE
const lastAtom = new MathAtom(this.parseMode, 'msubsup', '\u200b');
lastAtom.attributes = { 'aria-hidden': true };
this.mathList.push(lastAtom);
}
return this.mathList[this.mathList.length - 1];
}
/**
* @param {string} type
* @return {boolean} True if the next token is of the specified type
* @method module:core/parser#Parser#hasToken
* @private
*/
hasToken(type) {
const index = this.index;
return index < this.tokens.length ?
this.tokens[index].type === type : false;
}
/**
* @param {string} [value]
* @return {boolean} True if the next token is of type `'literal` and has the
* specified value. If `value` is empty, return true if the token is of type
* `'literal'`
* @method Parser#hasLiteral
* @private
*/
hasLiteral(value) {
const index = this.index;
return index < this.tokens.length ?
this.tokens[index].type === 'literal' &&
(!value || this.tokens[index].value === value) : false;
}
/**
* @param {RegEx} pattern
* @return {boolean} True if the next token is of type `'literal` and matches
* the specified regular expression pattern.
* @method module:core/parser#Parser#hasLiteralPattern
* @private
*/
hasLiteralPattern(pattern) {
return this.hasToken('literal') &&
pattern.test(this.tokens[this.index].value);
}
hasCommand(command) {
console.assert(command === '\\' || command.charAt(0) !== '\\', 'hasCommand() does not require a \\');
const index = this.index;
return index < this.tokens.length ?
this.tokens[index].type === 'command' &&
this.tokens[index].value === command : false;
}
hasInfixCommand() {
const index = this.index;
if (index < this.tokens.length &&
this.tokens[index].type === 'command') {
const info = Definitions.getInfo('\\' + this.tokens[index].value, this.parseMode, this.macros);
return info && info.infix;
}
return false;
}
hasColumnSeparator() {
const index = this.index;
return this.tabularMode && index < this.tokens.length ?
this.tokens[index].type === 'literal' &&
this.tokens[index].value === '&' : false;
}
hasRowSeparator() {
const index = this.index;
return this.tabularMode && index < this.tokens.length ?
this.tokens[index].type === 'command' &&
(this.tokens[index].value === '\\' ||
this.tokens[index].value === 'cr') : false;
}
parseColumnSeparator() {
if (this.hasColumnSeparator()) {
this.index++;
return true;
}
return false;
}
/*
* Return the appropriate value for a placeholder, either a default
* one, or if a value was provided for #? via args, that value.
*/
placeholder() {
if (this.args && typeof this.args['?'] === 'string') {
// If there is a specific value defined for the placeholder,
// use it.
return parseTokens(Lexer.tokenize(this.args['?']), this.parseMode, null, this.macros);
}
// U+2753 = BLACK QUESTION MARK ORNAMENT
const result = new MathAtom(this.parseMode, 'placeholder', '?', this.style);
result.captureSelection = true;
return [result];
}
hasImplicitCommand(commands) {
if (this.index < this.tokens.length) {
const token = this.tokens[this.index];
if (token.type === 'command') {
return commands.includes(token.value);
}
}
return false;
}
parseRowSeparator() {
if (this.hasRowSeparator()) {
this.index++;
return true;
}
return false;
}
/**
* @param {string} type
* @method module:core/parser#Parser#parseToken
* @private
*/
parseToken(type) {
if (this.hasToken(type)) {
this.index++;
return true;
}
return false;
}
skipWhitespace() {
let found = false;
while (this.hasToken('space')) {
this.index++;
found = true;
}
return found;
}
skipUntilToken(type) {
while (!this.end() && !this.parseToken(type)) {
this.get();
}
}
parseCommand(command) {
if (this.hasCommand(command)) {
this.index++;
return true;
}
return false;
}
parseLiteral(literal) {
if (this.hasLiteral(literal)) {
this.index++;
return true;
}
return false;
}
parseFiller() {
let skipped = false;
let done = false;
do {
const skippedSpace = this.skipWhitespace();
const skippedRelax = this.parseCommand('relax');
skipped = skipped || skippedSpace || skippedRelax;
done = !skippedSpace && !skippedRelax;
} while (!done);
return skipped;
}
/**
* Keywords are used to specify dimensions, and for various other
* syntactic constructs. Unlike commands, they are not case sensitive.
* There are 25 keywords:
* at by bp cc cm dd depth em ex fil fill filll height in minus
* mm mu pc plus pt sp spread to true width
*
* TeX: 8212
* @param {string} keyword
* @return {boolean} true if the expected keyword is present
* @method module:core/parser#Parser#parseKeyword
* @private
*/
parseKeyword(keyword) {
const savedIndex = this.index;
let done = this.end();
let value = '';
while (!done) {
const token = this.get();
if (token.type === 'literal') {
value += token.value;
}
done = this.end() || token.type !== 'literal' ||
value.length >= keyword.length;
}
const hasKeyword = keyword.toUpperCase() === value.toUpperCase();
if (!hasKeyword) {
this.index = savedIndex;
}
return hasKeyword;
}
/**
* Return a sequence of characters as a string.
* i.e. 'abcd' returns 'abcd'.
* Terminates on the first non-character encountered
* e.g. '{', '}' etc...
* Will also terminate on ']'
* @return {string}
* @method module:core/parser#Parser#scanString
* @private
*/
scanString() {
let result = '';
let done = this.end();
while (!done) {
if (this.hasLiteral(']')) {
done = true;
} else if (this.hasToken('literal')) {
result += this.get().value;
} else if (this.skipWhitespace()) {
result += ' ';
} else if (this.hasToken('command')) {
const token = this.get();
if (token.value === 'space') {
// The 'space' command is the ~
// which can be used for example in operator names, i.e.
// \operatorname{lim~inf}. It's interpreted as a nbs
result += '\u00a0'; // NO-BREAK SPACE
} else {
// TeX will give a 'Missing \endcsname inserted' error
// if it encounters any command when expecting a string.
// We're a bit more lax.
result += token.value;
}
} else {
done = true;
}
done = done || this.end();
}
return result;
}
/**
* Return a CSS color (#rrggbb)
* @method module:core/parser#Parser#scanColor
* @private
*/
scanColor() {
return Color.stringToColor(this.scanString());
}
/**
* Return as a number a group of characters representing a
* numerical quantity.
*
* From TeX:8695 (scan_int):
* An integer number can be preceded by any number of spaces and `\.+' or
* `\.-' signs. Then comes either a decimal constant (i.e., radix 10), an
* octal constant (i.e., radix 8, preceded by~\.\'), a hexadecimal constant
* (radix 16, preceded by~\."), an alphabetic constant (preceded by~\.\`), or
* an internal variable.
* @return {number}
* @method Parser#scanNumber
* @private
*/
scanNumber(isInteger) {
const negative = this.parseLiteral('-');
// Optional (ignorable) '+' sign
if (!negative) this.parseLiteral('+');
this.skipWhitespace();
isInteger = !!isInteger;
let radix = 10;
let digits = /[0-9]/;
if (this.parseLiteral("'")) {
// Apostrophe indicates an octal value
radix = 8;
digits = /[0-7]/;
isInteger = true;
} else if (this.parseLiteral('"') || this.parseLiteral('x')) {
// Double-quote indicates a hex value
// The 'x' prefix notation for the hexadecimal numbers is a MathJax extension.
// For example: 'x3a'
radix = 16;
// Hex digits have to be upper-case
digits = /[0-9A-F]/;
isInteger = true;
}
let value = '';
while (this.hasLiteralPattern(digits)) {
value += this.get().value;
}
// Parse the fractional part, if applicable
if (!isInteger && (this.parseLiteral('.') || this.parseLiteral(','))) {
value += '.';
while (this.hasLiteralPattern(digits)) {
value += this.get().value;
}
}
const result = isInteger ? parseInt(value, radix) : parseFloat(value);
return negative ? -result : result;
}
/**
* Return as a floating point number a dimension in pt (1 em = 10 pt)
*
* See TeX:8831
* @todo: note that some units depend on the font (em, ex). So it might be
* better to return a dimen struct with the value + unit and resolve
* later when we have a font context....
* @return {number}
* @method module:core/parser#Parser#scanDimen
* @private
*/
scanDimen() {
const value = this.scanNumber(false);
this.skipWhitespace();
let result;
if (this.parseKeyword('pt')) {
result = FontMetrics.toEm(value, 'pt');
} else if (this.parseKeyword('mm')) {
result = FontMetrics.toEm(value, 'mm');
} else if (this.parseKeyword('cm')) {
result = FontMetrics.toEm(value, 'cm');
} else if (this.parseKeyword('ex')) {
result = FontMetrics.toEm(value, 'ex');
} else if (this.parseKeyword('px')) {
result = FontMetrics.toEm(value, 'px');
} else if (this.parseKeyword('em')) {
result = FontMetrics.toEm(value, 'em');
} else if (this.parseKeyword('bp')) {
result = FontMetrics.toEm(value, 'bp');
} else if (this.parseKeyword('dd')) {
result = FontMetrics.toEm(value, 'dd');
} else if (this.parseKeyword('pc')) {
result = FontMetrics.toEm(value, 'pc');
} else if (this.parseKeyword('in')) {
result = FontMetrics.toEm(value, 'in');
} else if (this.parseKeyword('mu')) {
result = FontMetrics.toEm(value, 'mu');
} else {
// If the units are missing, TeX assumes 'pt'
result = FontMetrics.toEm(value, 'pt');
}
return result;
}
scanSkip() {
const result = this.scanDimen();
// We parse, but ignore the optional 'plus' and 'minus'
// arguments.
this.skipWhitespace();
// 'plus', optionally followed by 'minus'
// ('minus' cannot come before 'plus')
// dimen or 'hfill'
if (this.parseKeyword('plus')) {
// @todo there could also be a \hFilLlL command here
this.scanDimen();
}
this.skipWhitespace();
if (this.parseKeyword('minus')) {
// @todo there could also be a \hFilLlL command here
this.scanDimen();
}
return result;
}
scanColspec() {
this.skipWhitespace();
const result = [];
while (!this.end() && !(this.hasToken('}') || this.hasLiteral(']'))) {
if (this.hasLiteral()) {
const literal = this.get().value;
if ('lcr'.includes(literal)) {
result.push({ align: literal });
} else if (literal === '|') {
result.push({ rule: true });
} else if (literal === '@') {
if (this.parseToken('{')) {
const savedParsemode = this.swapParseMode('math');
result.push({ gap: this.scanImplicitGroup(token => token.type === '}') });
this.swapParseMode(savedParsemode);
}
this.parseToken('}');
}
}
}
return result;
}
/**
* Parse a `\(...\)` or `\[...\]` sequence
* @return {MathAtom} group for the sequence or null
* @method module:core/parser#Parser#scanModeSet
* @private
*/
scanModeSet() {
let final;
if (this.parseCommand('(')) final = ')';
if (!final && this.parseCommand('[')) final = ']';
if (!final) return null;
const savedParsemode = this.swapParseMode('math');
const result = new MathAtom('math', 'group');
result.mathstyle = final === ')' ? 'textstyle' : 'displaystyle';
result.body = this.scanImplicitGroup(token => token.type === 'command' && token.value === final);
this.parseCommand(final);
this.swapParseMode(savedParsemode);
if (!result.body || result.body.length === 0) return null;
return result;
}
/**
* Parse a `$...$` or `$$...$$` sequence
* @method Parser#scanModeShift
* @private
*/
scanModeShift() {
if (!this.hasToken('$') && !this.hasToken('$$')) return null;
const final = this.get().type;
const result = new MathAtom('math', 'group');
result.mathstyle = final === '$' ? 'textstyle' : 'displaystyle';
result.latexOpen = result.mathstyle === 'textstyle' ? '$' : '$$';
result.latexClose = result.latexOpen;
const savedParsemode = this.swapParseMode('math');
result.body = this.scanImplicitGroup(token => token.type === final);
this.parseToken(final);
this.swapParseMode(savedParsemode);
if (!result.body || result.body.length === 0) return null;
return result;
}
/**
* Parse a \begin{env}...\end{end} sequence
* @method module:core/parser#Parser#scanEnvironment
* @private
*/
scanEnvironment() {
// An environment starts with a \begin command
if (!this.parseCommand('begin')) return null;
// The \begin command is immediately followed by the environment
// name, as a string argument
const envName = this.scanArg('string');
const env = Definitions.getEnvironmentInfo(envName);
// If the environment has some arguments, parse them
const args = [];
if (env && env.params) {
for (const param of env.params) {
// Parse an argument
if (param.optional) {
// If it's not present, return the default argument value
const arg = this.scanOptionalArg(param.type);
// args.push(arg ? arg : param.defaultValue); @todo defaultvalue
args.push(arg);
} else {
// If it's not present, scanArg returns null,
// but push it on the list of arguments anyway.
// The null value will be interpreted as unspecified
// optional value by the command parse function.
args.push(this.scanArg(param.type));
}
}
}
// Some environments change the mode
const savedMode = this.parseMode;
const savedTabularMode = this.tabularMode;
const savedMathList = this.swapMathList([]);
// @todo: since calling scanImplicitGroup(), may not need to save/restore the mathlist
this.tabularMode = env.tabular;
const array = [];
const rowGaps = [];
let row = [];
let done = false;
do {
done = this.end();
if (!done && this.parseCommand('end')) {
done = this.scanArg('string') === envName;
}
if (!done) {
if (this.parseColumnSeparator()) {
row.push(this.swapMathList([]));
} else if (this.parseRowSeparator()) {
row.push(this.swapMathList([]));
let gap = 0;
this.skipWhitespace();
if (this.parseLiteral('[')) {
gap = this.scanDimen();
this.skipWhitespace();
this.parseLiteral(']');
}
rowGaps.push(gap || 0);
array.push(row);
row = [];
} else {
this.mathList = this.mathList.concat(this.scanImplicitGroup());
}
}
} while (!done);
row.push(this.swapMathList([]));
if (row.length > 0) array.push(row);
const newMathList = this.swapMathList(savedMathList);
// If we're in tabular mode, we should end up with an empty mathlist
console.assert(!this.tabularMode || newMathList.length === 0, 'Leftover atoms in tabular mode');
this.parseMode = savedMode;
this.tabularMode = savedTabularMode;
if (!env.tabular && newMathList.length === 0) return null;
if (env.tabular && array.length === 0) return null;
const result = new MathAtom(this.parseMode, 'array', newMathList,
env.parser ? env.parser(envName, args, array) : {});
result.array = array;
result.rowGaps = rowGaps;
result.env = {...env};
result.env.name = envName;
return result;
}
/**
* Parse a sequence terminated with a group end marker, such as
* `}`, `\end`, `&`, etc...
*
* Returns an array of atoms or an empty array if the sequence
* terminates right away.
*
* @param {function(Token):boolean} [done] A predicate indicating if a
* token signals the end of an implicit group
* @return {MathAtom[]}
* @method module:core/parser#Parser#scanImplicitGroup
* @private
*/
scanImplicitGroup(done) {
// An implicit group is a sequence of atoms that terminates with
// a `'}'`, `'&'`, `'\'`, `'\cr'` or `'\end'` or the end of the stream
const savedStyle = this.style;
if (!done) {
done = token => token.type === '}' ||
(token.type === 'literal' && token.value === '&') ||
(token.type === 'command' && /^(end|cr|\\)$/.test(token.value));
}
// To handle infix operators, we'll keep track of their prefix
// (tokens coming before them)
let infix = null; // A token
let prefix = null; // A mathlist
const savedMathlist = this.swapMathList([]);
// if (this.index >= this.tokens.length) return true;
// const token = this.tokens[this.index];
while (!this.end() && !done(this.peek())) {
if (this.hasImplicitCommand(['displaystyle', 'textstyle', 'scriptstyle', 'scriptscriptstyle'])) {
// Implicit math style commands such as \displaystyle, \textstyle...
// Note these commands switch to math mode and a specific size
// \textsize is the mathstyle used for inlinemath, not for text
this.parseMode = 'math';
const atom = new MathAtom('math', 'mathstyle');
atom.mathstyle = this.get().value;
this.mathList.push(atom);
} else if (this.hasInfixCommand() && !infix) {
// The next token is an infix and we have not seen one yet
// (there can be only one infix command per implicit group).
infix = this.get();
// Save the math list so far and start a new one
prefix = this.swapMathList([]);
} else {
this.parseAtom();
}
}
let result;
if (infix) {
const suffix = this.swapMathList(savedMathlist);
// The current parseMode, this.parseMode, may no longer have the value
// it had when we encountered the infix. However, since all infix are
// only defined in 'math' mode, we can use the 'math' constant
// for the parseMode
const info = Definitions.getInfo('\\' + infix.value, 'math', this.macros);
if (info) {
result = [new MathAtom(this.parseMode,
info.type,
info.value || infix.value, // Functions don't have
info.parse ? info.parse('\\' + infix.value, [prefix, suffix]) :
null)];
} else {
result = [new MathAtom(this.parseMode, 'mop', infix.value)];
}
} else {
result = this.swapMathList(savedMathlist);
}
this.style = savedStyle;
return result;
}
/**
* Parse a group enclosed in a pair of braces: `{...}`.
*
* Return either a group MathAtom or null if not a group.
*
* Return a group MathAtom with an empty body if an empty
* group (i.e. `{}`).
* @return {MathAtom}
* @method module:core/parser#Parser#scanGroup
* @private
*/
scanGroup() {
if (!this.parseToken('{')) return null;
const result = new MathAtom(this.parseMode, 'group');
result.body = this.scanImplicitGroup(token => token.type === '}');
this.parseToken('}');
result.latexOpen = '{';
result.latexClose = '}';
return result;
}
scanSmartFence() {
this.skipWhitespace();
if (!this.parseLiteral('(')) return null;
// We've found an open paren... Convert to a `\mleft...\mright`
const result = new MathAtom(this.parseMode, 'leftright');
result.leftDelim = '(';
result.inner = false; // It's a `\mleft`, not a `\left`
const savedMathList = this.swapMathList([]);
let nestLevel = 1;
while (!this.end() && nestLevel !== 0) {
if (this.hasLiteral('(')) nestLevel += 1;
if (this.hasLiteral(')')) nestLevel -= 1;
if (nestLevel !== 0) this.parseAtom();
}
if (nestLevel === 0) this.parseLiteral(')');
result.rightDelim = nestLevel === 0 ? ')' : '?';
result.body = this.swapMathList(savedMathList);
return result;
}
/**
* Scan a delimiter, e.g. '(', '|', '\vert', '\ulcorner'
*
* @return {string} The delimiter (as a character or command) or null
* @memberof Parser
* @method module:core/parser#Parser#scanDelim
* @private
*/
scanDelim() {
this.skipWhitespace();
const token = this.get();
if (!token) return null;
let delim = '.';
if (token.type === 'command') {
delim = '\\' + token.value;
} else if (token.type === 'literal') {
delim = token.value;
}
const info = Definitions.getInfo(delim, 'math', this.macros);
if (!info) return null;
if (info.type === 'mopen' || info.type === 'mclose') {
return delim;
}
// Some symbols are not of type mopen/mclose, but are still
// valid delimiters...
// '?' is a special delimiter used as a 'placeholder'
// (when the closing delimiter is displayed greyed out)
if (/^(\?|\||<|>|\\vert|\\Vert|\\\||\\surd|\\uparrow|\\downarrow|\\Uparrow|\\Downarrow|\\updownarrow|\\Updownarrow|\\mid|\\mvert|\\mVert)$/.test(delim)) {
return delim;
}
return null;
}
/**
* Parse a `/left.../right` sequence.
*
* Note: the `/middle` command can occur multiple times inside a
* `/left.../right` sequence, and is handled separately.
*
* Return either an atom of type `'leftright'` or null
* @return {MathAtom}
* @method module:core/parser#Parser#scanLeftRight
* @private
*/
scanLeftRight() {
if (this.parseCommand('right') || this.parseCommand('mright')) {
// We have an unbalanced left/right (there's a \right, but no \left)
const result = new MathAtom(this.parseMode, 'leftright');
result.rightDelim = this.scanDelim() || '.';
return result;
}
const savedStyle = this.style;
let close = 'right';
if (!this.parseCommand('left')) {
if (!this.parseCommand('mleft')) return null;
close = 'mright';
}
const leftDelim = this.scanDelim() || '.';
const savedMathList = this.swapMathList([]);
while (!this.end() && !this.parseCommand(close)) {
this.parseAtom();
}
this.style = savedStyle;
// If we've reached the end and there was no `\right` or
// there isn't a valid delimiter after `\right`, we'll
// consider the `\right` missing and set the `rightDelim` to undefined
const rightDelim = this.scanDelim();
const result = new MathAtom(this.parseMode, 'leftright');
result.leftDelim = leftDelim;
result.rightDelim = rightDelim;
result.inner = close === 'right';
result.body = this.swapMathList(savedMathList);
return result;
}
/**
* Parse a subscript/superscript: `^` and `_`.
*
* Modify the last atom accordingly.
*
* @return {MathAtom}
* @method module:core/parser#Parser#parseSupSub
* @private
*/
parseSupSub() {
// No sup/sub in text or command mode.
if (this.parseMode !== 'math') return false;
// Apply the subscript/superscript to the last render atom.
// If none is present (beginning of the mathlist, i.e. `{^2}`,
// an empty atom will be created, equivalent to `{{}^2}`
let result = false;
while (this.hasLiteral('^') || this.hasLiteral('_') || this.hasLiteral("'")) {
let supsub;
if (this.hasLiteral('^')) {
supsub = 'superscript';
} else if (this.hasLiteral('_')) {
supsub = 'subscript';
}
if (this.parseLiteral('^') || this.parseLiteral('_')) {
const arg = this.scanArg();
if (arg) {
const atom = this.lastMathAtom();
atom[supsub] = atom[supsub] || [];
atom[supsub] = atom[supsub].concat(arg);
result = true;
}
} else if (this.parseLiteral("'")) {
// A single quote (prime) is actually equivalent to a
// '^{\prime}'
const atom = this.lastMathAtom();
atom.superscript = atom.superscript || [];
atom.superscript.push(new MathAtom(atom.parseMode, 'mord', '\u2032'));
result = true;
}
}
return result;
}
/**
* Parse a `\limits` or `\nolimits` command.
*
* This will change the placement of limits to be either above or below
* (if `\limits`) or in the superscript/subscript position (if `\nolimits`).
*
* This overrides the calculation made for the placement, which is usually
* dependent on the displaystyle (`inlinemath` prefers `\nolimits`, while
* `displaymath` prefers `\limits`).
* @method module:core/parser#Parser#parseLimits
* @private
*/
parseLimits() {
// Note: technically, \limits and \nolimits are only applicable
// after an operator. However, we apply them in all cases. They
// will simply be ignored when not applicable (i.e. on a literal)
// which is actually consistent with TeX.
if (this.parseCommand('limits')) {
const lastAtom = this.lastMathAtom();
lastAtom.limits = 'limits';
// Record that the limits was set through an explicit command
// so we can generate the appropriate LaTeX later
lastAtom.explicitLimits = true;
return true;
}
if (this.parseCommand('nolimits')) {
const lastAtom = this.lastMathAtom();
lastAtom.limits = 'nolimits';
// Record that the limits was set through an explicit command
// so we can generate the appropriate LaTeX later
lastAtom.explicitLimits = true;
return true;
}
return false;
}
scanOptionalArg(parseMode) {
parseMode = (!parseMode || parseMode === 'auto') ? this.parseMode : parseMode;
this.skipWhitespace();
if (!this.parseLiteral('[')) return null;
const savedParseMode = this.parseMode;
this.parseMode = parseMode;
const savedMathlist = this.swapMathList();
let result;
while (!this.end() && !this.parseLiteral(']')) {
if (parseMode === 'string') {
result = this.scanString();
} else if (parseMode === 'number') {
result = this.scanNumber();
} else if (parseMode === 'dimen') {
result = this.scanDimen();
} else if (parseMode === 'skip') {
result = this.scanSkip();
} else if (parseMode === 'colspec') {
result = this.scanColspec();
} else if (parseMode === 'color') {
result = this.scanColor() || '#ffffff';
} else if (parseMode === 'bbox') {
// The \bbox command takes a very particular argument:
// a comma delimited list of up to three arguments:
// a color, a dimension and a string.
// Split the string by comma delimited sub-strings, ignoring commas
// that may be inside (). For example"x, rgb(a, b, c)" would return
// ['x', 'rgb(a, b, c)']
const list = this.scanString().toLowerCase().trim().split(/,(?![^(]*\)(?:(?:[^(]*\)){2})*[^"]*$)/);
for (const elem of list) {
const color = Color.stringToColor(elem);
if (color) {
result = result || {};
result.backgroundcolor = color;
} else {
const m = elem.match(/^\s*([0-9.]+)\s*([a-z][a-z])/);
if (m) {
result = result || {};
result.padding = FontMetrics.toEm(m[1], m[2]);
} else {
const m = elem.match(/^\s*border\s*:\s*(.*)/);
if (m) {
result = result || {};
result.border = m[1];
}
}
}
}
} else {
console.assert(parseMode === 'math', 'Unexpected parse mode: "' + parseMode + '"');
this.mathList = this.mathList.concat(this.scanImplicitGroup(token => token.type === 'literal' && token.value === ']'));
}
}
this.parseMode = savedParseMode;
const mathList = this.swapMathList(savedMathlist);
return result ? result : mathList;
}
/**
* Parse a math field, an argument to a function.
*
* An argument can either be a single atom or
* a sequence of atoms enclosed in braces.
*
* @param {string} [parseMode] Temporarily overrides the parser parsemode. For
* example: `'dimension'`, `'color'`, `'text'`, etc...
* @method module:core/parser#Parser#scanArg
* @private
*/
scanArg(parseMode) {
parseMode = (!parseMode || parseMode === 'auto') ? this.parseMode : parseMode;
this.parseFiller();
let result;
// An argument (which is called a 'math field' in TeX)
// could be a single character or symbol, as in `\frac12`
// Note that ``\frac\sqrt{-1}\alpha\beta`` is equivalent to
// ``\frac{\sqrt}{-1}{\beta}``
if (!this.parseToken('{')) {
if (parseMode === 'delim') {
return this.scanDelim() || '.';
} else if (/^(math|text)$/.test(parseMode)) {
// Parse a single atom.
const savedParseMode = this.parseMode;
this.parseMode = parseMode;
const atom = this.scanToken();
this.parseMode = savedParseMode;
if (Array.isArray(atom)) return atom;
return atom ? [atom] : null;
}
}
// If this is a param token, substitute it with the
// (optional) argument passed to the parser
if (this.hasToken('#')) {
const paramToken = this.get();
this.skipUntilToken('}');
if (paramToken.value === '?') {
return this.placeholder();
}
if (this.args) {
if (this.args[paramToken.value] === undefined &&
this.args['?'] !== undefined) {
return this.placeholder();
}
return this.args[paramToken.value] || null;
}
return null;
}
const savedParseMode = this.parseMode;
this.parseMode = parseMode;
const savedMathList = this.swapMathList([]);
if (parseMode === 'string') {
result = this.scanString();
this.skipUntilToken('}');
} else if (parseMode === 'number') {
result = this.scanNumber();
this.skipUntilToken('}');
} else if (parseMode === 'dimen') {
result = this.scanDimen();
this.skipUntilToken('}');
} else if (parseMode === 'skip') {
result = this.scanSkip();
this.skipUntilToken('}');
} else if (parseMode === 'colspec') {
result = this.scanColspec();
this.skipUntilToken('}');
} else if (parseMode === 'color') {
result = this.scanColor() || '#ffffff';
this.skipUntilToken('}');
} else if (parseMode === 'delim') {
result = this.scanDelim() || '.';
this.skipUntilToken('}');
} else {
console.assert(/^(math|text)$/.test(parseMode), 'Unexpected parse mode: "' + parseMode + '"');
do {
this.mathList = this.mathList.concat(this.scanImplicitGroup());
} while (!this.parseToken('}') && !this.end());
}
this.parseMode = savedParseMode;
const mathList = this.swapMathList(savedMathList);
return result ? result : mathList;
}
/**
* @return {MathAtom[]|MathAtom}
* @method module:core/parser#Parser#scanToken
* @private
*/
scanToken() {
const token = this.get();
if (!token) return null;
let result = null;
if (token.type === 'space') {
if (this.parseMode === 'text') {
result = new MathAtom('text', '', ' ', this.style);
}
} else if (token.type === 'placeholder') {
// RENDER PLACEHOLDER
result = new MathAtom(this.parseMode, 'placeholder', token.value);
result.captureSelection = true;
} else if (token.type === 'command') {
// RENDER COMMAND
if (token.value === 'placeholder') {
result = new MathAtom(this.parseMode, 'placeholder', this.scanArg('string'), this.style);
result.captureSelection = true;
} else if (token.value === 'char') {
// \char has a special syntax and requires a non-braced integer
// argument
let codepoint = Math.floor(this.scanNumber(true));
if (!isFinite(codepoint) || codepoint < 0 || codepoint > 0x10FFFF) {
codepoint = 0x2753; // BLACK QUESTION MARK
}
result = new MathAtom(this.parseMode,
this.parseMode === 'math' ? 'mord' : '',
String.fromCodePoint(codepoint));
result.latex = '{\\char"' +
('000000' + codepoint.toString(16)).toUpperCase().substr(-6) + '}';
} else if (token.value === 'hskip' || token.value === 'kern') {
// \hskip and \kern have a special syntax and requires a non-braced
// 'skip' argument
const width = this.scanSkip();
if (isFinite(width)) {
result = new MathAtom(this.parseMode, 'spacing', null, this.style);
result.width = width;
}
result .latex = '\\' + token.value;
} else {
result = this.scanMacro(token.value);
if (!result) {
const info = Definitions.getInfo('\\' + token.value, this.parseMode, this.macros);
const args = [];
let argString = '';
// Parse the arguments
// let mandatoryParamsCount = 0;
// If explicitGroup is not empty, an explicit group is expected
// to follow the command and will be parsed *after* the
// command has been processed.
// This is used for commands such as \textcolor{color}{content}
// that need to apply the color to the content *after* the
// style has been changed.
// In definitions, this is indicated with a parameter type
// of 'auto*'
let explicitGroup = '';
if (info && info.params) {
for (const param of info.params) {
// Parse an argument
if (param.optional) {
// If it's not present, return the default argument value
const arg = this.scanOptionalArg(param.type);
// args.push(arg ? arg : param.defaultValue); @todo defaultvalue
args.push(arg);
} else if (param.type.endsWith('*')) {
explicitGroup = param.type.slice(0, -1);
} else {
// mandatoryParamsCount += 1;
// If it's not present, scanArg returns null.
// Add a placeholder instead.
const arg = this.scanArg(param.type);
if (arg && arg.length === 1 &&
arg[0].type === 'placeholder' && param.placeholder) {
arg[0].value = param.placeholder;
}
if (arg) {
args.push(arg);
} else if (param.placeholder) {
const placeholder = new MathAtom(this.parseMode, 'placeholder', param.placeholder);
placeholder.captureSelection = true;
args.push([placeholder]);
} else {
args.push(this.placeholder());
}
if (param.type !== 'math' && typeof arg === 'string') {
argString += arg
}
}
}
}
if (info && !info.infix) {
// Infix commands should be handled in scanImplicitGroup
// If we find an infix command here, it's a syntax error
// (second infix command in an implicit group) and should be ignored.
// Create the MathAtom.
// If a parse function is present, invoke it with the arguments,
// and pass the result to be appended by the constructor.
if (info.parse) {
const attributes = info.parse('\\' + token.value, args);
if (!attributes.type) {
// No type provided -> the parse function will modify
// the current style rather than create a new Atom.
const savedMode = this.parseMode;
if (attributes.mode) {
// Change to 'text' (or 'math') mode if necessary
this.parseMode = attributes.mode;
delete attributes.mode;
}
// If an explicit group is expected, process it now
if (explicitGroup) {
// Create a temporary style
const saveStyle = this.style;
this.style = {...this.style, ...attributes};
result = this.scanArg(explicitGroup);
this.style = saveStyle;
} else {
// Merge the new style info with the current style
this.style = {...this.style, ...attributes};
}
this.parseMode = savedMode;
} else {
result = new MathAtom(this.parseMode,
info.type,
explicitGroup ? this.scanArg(explicitGroup) : null,
{...this.style, ...attributes});
}
} else {
const style = {...this.style};
if (info.baseFontFamily) style.baseFontFamily = info.baseFontFamily;
result = new MathAtom(this.parseMode,
info.type || 'mop',
info.value || token.value,
style);
if (info.skipBoundary) {
result.skipBoundary = true;
}
}
if (result && !/^(llap|rlap|class|cssId)$/.test(token.value)) {
result.latex = '\\' + token.value;
if (argString /*|| mandatoryParamsCount > 0*/) {
result.latex += '{' + argString + '}'
}
if (result.isFunction && this.smartFence) {
// The atom was a function that may be followed by
// an argument, like `\sin(`
const smartFence = this.scanSmartFence();
if (smartFence) {
result = [result, smartFence];
}
}
}
}
if (!info) {
// An unknown command
result = new MathAtom(this.parseMode,
'error', '\\' + token.value);
result.latex = '\\' + token.value;
}
}
}
} else if (token.type === 'literal') {
const info = Definitions.getInfo(token.value, this.parseMode, this.macros);
if (info) {
const style = {...this.style};
if (info.baseFontFamily) style.baseFontFamily = info.baseFontFamily;
result = new MathAtom(this.parseMode,
info.type,
info.value || token.value,
style);
if (info.isFunction) {
result.isFunction = true;
}
} else {
result = new MathAtom(this.parseMode,
this.parseMode === 'math' ? 'mord' : '',
token.value, this.style);
}
result.latex = Definitions.matchCodepoint(this.parseMode,
token.value.codePointAt(0));
if (info && info.isFunction && this.smartFence) {
// The atom was a function that may be followed by
// an argument, like `f(`.