@jupyterlab/rendermime
Version:
JupyterLab - RenderMime
201 lines (192 loc) • 6.86 kB
text/typescript
/* -----------------------------------------------------------------------------
| Copyright (c) Jupyter Development Team.
| Distributed under the terms of the Modified BSD License.
|----------------------------------------------------------------------------*/
// Some magic for deferring mathematical expressions to MathJax
// by hiding them from the Markdown parser.
// Some of the code here is adapted with permission from Davide Cervone
// under the terms of the Apache2 license governing the MathJax project.
// Other minor modifications are also due to StackExchange and are used with
// permission.
const inline = '$'; // the inline math delimiter
// MATHSPLIT contains the pattern for math delimiters and special symbols
// needed for searching for math in the text input.
const MATHSPLIT =
/(\$\$?|\\(?:begin|end)\{[a-z]*\*?\}|\\[{}$]|[{}]|(?:\n\s*)+|@@\d+@@|\\\\(?:\(|\)|\[|\]))/i;
/**
* Break up the text into its component parts and search
* through them for math delimiters, braces, linebreaks, etc.
* Math delimiters must match and braces must balance.
* Don't allow math to pass through a double linebreak
* (which will be a paragraph).
*/
export function removeMath(text: string): { text: string; math: string[] } {
const math: string[] = []; // stores math strings for later
let start: number | null = null;
let end: string | null = null;
let last: number | null = null;
let braces: number = 0;
let deTilde: (text: string) => string;
// Except for extreme edge cases, this should catch precisely those pieces of the markdown
// source that will later be turned into code spans. While MathJax will not TeXify code spans,
// we still have to consider them at this point; the following issue has happened several times:
//
// `$foo` and `$bar` are variables. --> <code>$foo ` and `$bar</code> are variables.
const hasCodeSpans = text.includes('`') || text.includes('~~~');
if (hasCodeSpans) {
text = text
.replace(/~/g, '~T')
// note: the `fence` (three or more consecutive tildes or backticks)
// can be followed by an `info string` but this cannot include backticks,
// see specification: https://spec.commonmark.org/0.30/#info-string
.replace(
/^(?<fence>`{3,}|(~T){3,})[^`\n]*\n([\s\S]*?)^\k<fence>`*$/gm,
wholematch => wholematch.replace(/\$/g, '~D')
)
.replace(/(^|[^\\])(`+)([^\n]*?[^`\n])\2(?!`)/gm, wholematch =>
wholematch.replace(/\$/g, '~D')
);
deTilde = (text: string) => {
return text.replace(/~([TD])/g, (wholematch, character) =>
character === 'T' ? '~' : inline
);
};
} else {
deTilde = (text: string) => {
return text;
};
}
let blocks = text.replace(/\r\n?/g, '\n').split(MATHSPLIT);
for (let i = 1, m = blocks.length; i < m; i += 2) {
const block = blocks[i];
if (block.charAt(0) === '@') {
//
// Things that look like our math markers will get
// stored and then retrieved along with the math.
//
blocks[i] = '@@' + math.length + '@@';
math.push(block);
} else if (start !== null) {
//
// If we are in math, look for the end delimiter,
// but don't go past double line breaks, and
// and balance braces within the math.
//
if (block === end) {
if (braces) {
last = i;
} else {
blocks = processMath(start, i, deTilde, math, blocks);
start = null;
end = null;
last = null;
}
} else if (block.match(/\n.*\n/)) {
if (last !== null) {
i = last;
blocks = processMath(start, i, deTilde, math, blocks);
}
start = null;
end = null;
last = null;
braces = 0;
} else if (block === '{') {
braces++;
} else if (block === '}' && braces) {
braces--;
}
} else {
//
// Look for math start delimiters and when
// found, set up the end delimiter.
//
if (block === inline || block === '$$') {
start = i;
end = block;
braces = 0;
} else if (block === '\\\\(' || block === '\\\\[') {
start = i;
end = block.slice(-1) === '(' ? '\\\\)' : '\\\\]';
braces = 0;
} else if (block.substr(1, 5) === 'begin') {
start = i;
end = '\\end' + block.substr(6);
braces = 0;
}
}
}
if (start !== null && last !== null) {
blocks = processMath(start, last, deTilde, math, blocks);
start = null;
end = null;
last = null;
}
return { text: deTilde(blocks.join('')), math };
}
/**
* Put back the math strings that were saved,
* and clear the math array (no need to keep it around).
*/
export function replaceMath(text: string, math: string[]): string {
/**
* Replace a math placeholder with its corresponding group.
* The math delimiters "\\(", "\\[", "\\)" and "\\]" are replaced
* removing one backslash in order to be interpreted correctly by MathJax.
*/
const process = (match: string, n: number): string => {
let group = math[n];
if (
group.substr(0, 3) === '\\\\(' &&
group.substr(group.length - 3) === '\\\\)'
) {
group = '\\(' + group.substring(3, group.length - 3) + '\\)';
} else if (
group.substr(0, 3) === '\\\\[' &&
group.substr(group.length - 3) === '\\\\]'
) {
group = '\\[' + group.substring(3, group.length - 3) + '\\]';
}
return group;
};
// Replace all the math group placeholders in the text
// with the saved strings.
return text.replace(/@@(\d+)@@/g, process);
}
/**
* Process math blocks.
*
* The math is in blocks i through j, so
* collect it into one block and clear the others.
* Replace &, <, and > by named entities.
* For IE, put <br> at the ends of comments since IE removes \n.
* Clear the current math positions and store the index of the
* math, then push the math string onto the storage array.
* The preProcess function is called on all blocks if it has been passed in
*/
function processMath(
i: number,
j: number,
preProcess: (input: string) => string,
math: string[],
blocks: string[]
): string[] {
let block = blocks
.slice(i, j + 1)
.join('')
.replace(/&/g, '&') // use HTML entity for &
.replace(/</g, '<') // use HTML entity for <
.replace(/>/g, '>'); // use HTML entity for >
if (navigator && navigator.appName === 'Microsoft Internet Explorer') {
block = block.replace(/(%[^\n]*)\n/g, '$1<br/>\n');
}
while (j > i) {
blocks[j] = '';
j--;
}
blocks[i] = '@@' + math.length + '@@'; // replace the current block text with a unique tag to find later
if (preProcess) {
block = preProcess(block);
}
math.push(block);
return blocks;
}