UNPKG

mathjax-parser

Version:

Find & Replace the mathjax delimiters in a HTML string

296 lines (237 loc) 9.43 kB
class MathjaxParser { public parse = (inputHtml: string, config?: MathjaxParserConfig): ParserResponse => { //set a default config this.config = config || { inlineMath: [['$','$'],['\\(','\\)']], displayMath: [['$$','$$'],['\\[','\\]']], inlineMathReplacement: ['XXX', 'XXX'], displayMathReplacement: ['YYY','YYY'] }; //create a DOM element in order to use the DOM-Walker let body: HTMLElement = document.createElement('body'); body.innerHTML = inputHtml; this.processNodeList(body.childNodes, this.buildDelimiterArray(config)); return { outputHtml: body.innerHTML }; }; private config; private buildDelimiterArray(config): DelimiterGroup[] { let delimiterArray: DelimiterGroup[] = []; let insertAtIndex = (idx: number, delimiterArray, grp: string[], type: string) => { delimiterArray.splice(idx, 0, { group: grp, type: type }); }; let findIndex = (delimiterArray: DelimiterGroup[], startDelimiter: string): number => { let index = 0; for (let i = 0; i < delimiterArray.length; i++) { if (startDelimiter.indexOf(delimiterArray[i].group[0]) > -1) { break; } ++index; } return index; }; config.inlineMath.forEach(grp => { let idx = findIndex(delimiterArray, grp[0]); insertAtIndex(idx, delimiterArray, grp, 'inline'); }); config.displayMath.forEach(grp => { let idx = findIndex(delimiterArray, grp[0]); insertAtIndex(idx, delimiterArray, grp, 'display'); }); return delimiterArray; } private processNodeList = (nodeList: NodeList, delimiterArray: DelimiterGroup[]) => { let allAdjacentTextOrBrNodes: MyRange<number>[] = this.findAdjacentTextOrBrNodes(nodeList); allAdjacentTextOrBrNodes.forEach((textOrBrNodeSet: MyRange<number>) => { this.iterateMath(delimiterArray, textOrBrNodeSet, nodeList); }); //process children for (let i: number = 0; i < nodeList.length; i++) { let node: Node = nodeList[i]; //only need to process non-text nodes if (node.nodeType !== 3) { this.processNodeList(node.childNodes, delimiterArray); } } }; private isMatchingIndex = (text: string, idx: number, delimiter: string): boolean => { return text.substr(idx, delimiter.length) === delimiter; }; private iterateMath(delimiterArray: DelimiterGroup[], textOrBrNodeSet: MyRange<number>, nodeList: NodeList) { //Iterate through all delimiters, trying to find matching delimiters let state: CurrentState = { matchedDelimiterSets: [] }; for (let nodeNumber = textOrBrNodeSet.start; nodeNumber < textOrBrNodeSet.end; nodeNumber++) { let node: Node = nodeList[nodeNumber]; //for the text nodes (type 3), other nodes don't matter if (node.nodeType === 3) { const textContent: string = node.textContent; //check every index if matches a delimiter group this.processIndices(textContent, state, delimiterArray, nodeNumber); } } this.cleanOccurrences(state.matchedDelimiterSets); //REPLACE ALL MATCHED DELIMITERS WITH REPLACEMENTS this.replaceMatches(state.matchedDelimiterSets, nodeList); } private replaceMatches(matchedDelimiterSets: MyRange<DelimiterMatch>[], nodeList: NodeList) { matchedDelimiterSets = matchedDelimiterSets.reverse(); // work the array back to from so indexes don't get messed up matchedDelimiterSets.forEach((delimiterSet: MyRange<DelimiterMatch>) => { this.replaceStartAndEndOfMatchedSet(delimiterSet, nodeList); }); } private processIndices(textContent: string, state: CurrentState, delimiterArray: DelimiterGroup[], nodeNumber: number) { let idx = 0; while (idx < textContent.length) { //if all occurrences of delimiters so far are closed (i.e. have 'end') and we're looking for a new opening delimiter if (state.matchedDelimiterSets.length === 0 || state.matchedDelimiterSets[state.matchedDelimiterSets.length - 1].end) { let isMatch: boolean = false; delimiterArray.some(delimiterGroup => { if (this.isMatchingIndex(textContent, idx, delimiterGroup.group[0])) { state.lastMatchedGroup = delimiterGroup; //TODO: correct escapes for $ special case... MathjaxParser.pushStart(state.matchedDelimiterSets, nodeNumber, idx, delimiterGroup); isMatch = true; return true; } }); if (isMatch) { idx += state.lastMatchedGroup.group[0].length; } else { ++idx; } } //if start matched, but end not matched yet else { if (this.isMatchingIndex(textContent, idx, state.lastMatchedGroup.group[1])) { MathjaxParser.pushEnd(state.matchedDelimiterSets, nodeNumber, idx, state.lastMatchedGroup); idx += state.lastMatchedGroup.group[1].length; } else { ++idx; } } } } private replaceStartAndEndOfMatchedSet = (delimiterSet: MyRange<DelimiterMatch>, nodeList: NodeList) => { //handle end FIRST this.replaceDelimiters(nodeList, delimiterSet.end); //handle start this.replaceDelimiters(nodeList, delimiterSet.start); }; private cleanOccurrences = (occurrences: MyRange<DelimiterMatch>[]) => { if (occurrences.length > 0) { if (!occurrences[occurrences.length - 1].end) { occurrences.pop(); } } }; private replaceDelimiters = (nodeList: NodeList, delimiterMatch: DelimiterMatch) => { const oldDelimiterLength = delimiterMatch.isStart ? delimiterMatch.delimiterGroup.group[0].length : delimiterMatch.delimiterGroup.group[1].length; const nodeVal = nodeList[delimiterMatch.nodeNumber].nodeValue; //insert the new delimiter while removing the old delimiter nodeList[delimiterMatch.nodeNumber].nodeValue = //string start nodeVal.substr(0, delimiterMatch.index) + //replacement string this.config[delimiterMatch.delimiterGroup.type + 'MathReplacement'][delimiterMatch.isStart ? 0 : 1] + //string rest nodeVal.substr(delimiterMatch.index + oldDelimiterLength, nodeVal.length - 1); }; private static pushStart(matchedDelimiterSets: MyRange<DelimiterMatch>[], nodeNumber: number, idx: number, delimiterGroup: DelimiterGroup) { matchedDelimiterSets.push({ start: { nodeNumber: nodeNumber, index: idx, delimiterGroup: delimiterGroup, isStart: true }, end: undefined, }); } private static pushEnd(matchedDelimiterSets: MyRange<DelimiterMatch>[], nodeNumber: number, idx: number, delimiterGroup: DelimiterGroup) { matchedDelimiterSets[matchedDelimiterSets.length - 1].end = { nodeNumber: nodeNumber, index: idx, delimiterGroup: delimiterGroup, isStart: false }; } private findAdjacentTextOrBrNodes = (nodeList: NodeList): MyRange<number>[] => { //value true if node is textOrBr, false otherwise //example: // hello <br> world <span>bla</span> // would yield // [true, true, true, false] let textOrBrNodes: boolean[] = []; for (let i: number = 0; i < nodeList.length; i++) { let node: Node = nodeList[i]; this.isTextOrBrNode(node) ? textOrBrNodes.push(true) : textOrBrNodes.push(false); } //get array with ranges (arrays) of adjacentTextOrBrNodes //example: // hello <br> world <span>bla</span> that's cool // would yield // [{start: 0, end: 3},{start: 4, end: 5}] let adjacentTextOrBrNodes: MyRange<number>[] = []; for (let i: number = 0; i < textOrBrNodes.length; i++) { let isTextOrBrNode: boolean = textOrBrNodes[i]; if (isTextOrBrNode) { //handle case if IS NOT ADJACENT MATCH: insert new array if (adjacentTextOrBrNodes.length === 0 || adjacentTextOrBrNodes[adjacentTextOrBrNodes.length - 1].end !== i ) { adjacentTextOrBrNodes.push({ start: i, end: i+1 }); } //handle case if IS ADJACENT MATCH: raise value by one else if (adjacentTextOrBrNodes[adjacentTextOrBrNodes.length - 1].end === i) { ++adjacentTextOrBrNodes[adjacentTextOrBrNodes.length - 1].end; } } } return adjacentTextOrBrNodes; }; private isTextOrBrNode = (node: Node) => { return node.nodeType === 3 || node.nodeName === 'BR'; }; } interface ParserResponse { outputHtml: string; } interface MyRange<T> { start: T; end: T; } interface DelimiterMatch { nodeNumber: number; index: number; isStart: boolean; delimiterGroup: DelimiterGroup; } interface MathjaxParserConfig { inlineMath: string[][]; //e.g. [['$','$'],['\\(','\\)']], displayMath: string[][]; //e.g. [['$$','$$'],['\\[','\\]']], inlineMathReplacement: string[]; //e.g. ['<span class="inline-math">', '</span>'] displayMathReplacement: string[] // e.g. ['<span class="display-math">','</span>'] } interface DelimiterGroup { group: string[]; type: MathType; } interface CurrentState { matchedDelimiterSets: MyRange<DelimiterMatch>[]; lastMatchedGroup?: DelimiterGroup; } type MathType = 'inline' | 'display'