UNPKG

document-highlighter

Version:
239 lines (196 loc) 8.07 kB
'use strict'; var highlightText = require('./text.js'); var mergeDefaultOptions = require('./options.js'); /** * Elements which can be nested inside our inline separators. * Elements not in this list are implicitly "block level". */ var inlineElements = ["b", "big", "i", "small", "tt,", "abbr", "acronym", "cite", "code", "dfn", "em", "kbd", "strong", "samp", "var,", "a", "bdo", "br", "img", "map", "object", "q", "span", "sub", "sup,", "button", "input", "label", "select", "textarea"]; /** * Elements which are inline, but after which we'd still like to insert a space. * We don't want to convert "lol<br>wut" to "lolwut" but to "lol wut". */ var spaceNeededElements = ["br", "sup", "sub", "img"]; // Elements which must not be highlighted, no matter what, under penalty of ruining the document. var noContentElements = ["style", "head", "script", "svg"]; // Build a data-hash for faster access (elements["strong"] === "inline", faster than elements.indexOf("strong") !== "-1") var elements = {}; inlineElements.forEach(function(inline) { elements[inline] = "inline"; }); noContentElements.forEach(function(noContent) { elements[noContent] = "noContent"; }); var isSpaceNeeded = {}; spaceNeededElements.forEach(function(element) { isSpaceNeeded[element] = true; }); module.exports = function highlightHtml(html, query, options) { options = mergeDefaultOptions(options); // ----- Generate HTML mapping // This array contains the list of mappings from the text representation to the HTML. // // [ { t: 0, h: 2, l: 3, c: false, s: 0, b: false }, // { t: 6, h: 12, l: 4, c: true, s: 0, b: false }, // { t: 16, h: 26, l: 4, c: true, s: 1, b: true } ] // // (see below, mappings.push, for explanations on keys) // (keys are very short for memory benefits, probably) var mappings = []; // Let's build the raw text representation, and keep track of the match between text-index and html-index. var text = ''; var i = 0; while(true) { var startIndex = html.indexOf('<', i); var markupElement; var shift = 0; var isBlock; if(startIndex === -1) { // We're done. Add the remainder from the html buffer and break out. text += html.substr(i); break; } // We got a new markup element var endIndex = html.indexOf('>', startIndex); if(endIndex === -1) { throw new Error("Invalid HTML markup."); } // Add content to text, until element starts text += html.substr(i, startIndex - i); var isClosingElement = (html[startIndex + 1] === '/'); if(isClosingElement) { // 2 == "</".length markupElement = html.substr(startIndex + 2, endIndex - startIndex - 2); isBlock = (elements[markupElement] !== "inline"); } else /* !closingElement*/ { // 1 === "<".length; markupElement = html.substr(startIndex + 1, endIndex - startIndex - 1); // Skip any attributes or self-closing '/>' var matches = markupElement.match(/([a-z-])+/gi); if(matches && matches[0]) { markupElement = matches[0]; } else { markupElement = "unknown-tag"; } isBlock = (elements[markupElement] !== "inline"); if(elements[markupElement] === "noContent") { // We need to find the closing matching markup element and remove all the content between from the generated text var matcher = markupElement + ">"; var matchingClosing = html.indexOf(markupElement + ">", endIndex); // We can't find a closing item? Sadly, we can't do much. if(matchingClosing !== -1) { endIndex = matchingClosing + matcher.length; } } } // At the end of a block element (and some inline elements as well), // add a space separator. // This avoids generating Something</div><div>Lol to "SomethingLol". if((isBlock && isClosingElement) || isSpaceNeeded[markupElement]) { text += " "; shift += 1; } // t: insert index in raw text // h: tag end index in HTML // l: length of skipped HTML // c: is a closing tag // s: amount by which HTML is displaced with respect to text // b: whether or not this element is block-level mappings.push({ t: text.length, h: endIndex + shift, l: endIndex - startIndex + 1, c: isClosingElement, s: shift, b: isBlock }); i = endIndex + 1; } // ----- Highlight raw text var highlightedText = highlightText(text, query, options); /** * Map text indices to html indices, according to our `mapping` array * @param {Integer} Start of the string to be highlighted, in plain-text indexing * @param {Integer} End of the string to be highlighted, in plain-text indexing * @return {Array} An array of pairs (start, end) at which we should insert our highlight markup, in HTML indexing */ var getMapping = function(textStartIndex, textEndIndex) { var ret = []; var htmlStartDelta = 0; var htmlEndDelta = 0; for(var i = 0; i < mappings.length; i += 1) { var mapping = mappings[i]; // Stop end-mapping when going after endIndex if(textEndIndex <= mapping.t) { break; } // Stop start-mapping when going after startIndex if(mapping.t < textStartIndex) { htmlStartDelta += mapping.l; } else { // We have an element defined after the start of our highlight and before the end (e.g. <span>something *our </span> highlight*), we need to generate multiple indices (<span>something *our* </span> *highlight*) if(!mapping.c && mappings[i + 1] && mappings[i + 1].c && textEndIndex >= mappings[i + 1].t && !mapping.b) { // Special case: the element is totally included in our highlight, so we can continue like nothing happened. // E.g. *Eat <strong>drink</strong> and be merry* i += 1; htmlEndDelta += mapping.l; mapping = mappings[i]; } else { // Create a new mapping unless it would be empty // It starts at the beginning of the highlight // and ends right before the start of the tag var subMappingStart = textStartIndex + htmlStartDelta; var subMappingEnd = mapping.h - (mapping.l + mapping.s) + 1; if(subMappingEnd - subMappingStart > 0) { ret.push({ start: subMappingStart, end: subMappingEnd }); } // Update new startDelta htmlStartDelta = mapping.h - textStartIndex + 1; } } htmlStartDelta -= mapping.s; htmlEndDelta += mapping.l - mapping.s; } ret.push({ start: textStartIndex + htmlStartDelta, end: textEndIndex + htmlEndDelta }); return ret; }; // ----- Rebuild highlight in HTML, using index informations /** * This global delta tracks the length of all our additions to * the text (multiple options.before and options.after were inserted). * We must shift the mappings by that much. */ var globalIndexDelta = 0; highlightedText.indices.forEach(function(index) { // Map text-index to HTML-index var htmlIndices = getMapping(index.startIndex, index.endIndex); for(var i = 0; i < htmlIndices.length; i += 1) { var htmlIndex = htmlIndices[i]; // Add indices from previous highlights htmlIndex.start += globalIndexDelta; htmlIndex.end += globalIndexDelta; var before = html.substr(0, htmlIndex.start); var content = html.substr(htmlIndex.start, htmlIndex.end - htmlIndex.start); var after = html.substr(htmlIndex.end); // Update HTML with mapping var highlightBefore = (i === 0 ? options.before : options.beforeSecond); var highlightAfter = (i === 0 ? options.after : options.afterSecond); html = before + highlightBefore + content + highlightAfter + after; globalIndexDelta += highlightBefore.length + highlightAfter.length; } }); return { html: html, text: highlightedText.text.trim() }; };