UNPKG

highlight.js

Version:

Syntax highlighting with language autodetection.

678 lines (605 loc) 19.8 kB
var Highlight = function() { /* Utility functions */ function escape(value) { return value.replace(/&/gm, '&amp;').replace(/</gm, '&lt;').replace(/>/gm, '&gt;'); } function tag(node) { return node.nodeName.toLowerCase(); } function testRe(re, lexeme) { var match = re && re.exec(lexeme); return match && match.index == 0; } function blockText(block) { return Array.prototype.map.call(block.childNodes, function(node) { if (node.nodeType == 3) { return options.useBR ? node.nodeValue.replace(/\n/g, '') : node.nodeValue; } if (tag(node) == 'br') { return '\n'; } return blockText(node); }).join(''); } function blockLanguage(block) { var classes = (block.className + ' ' + (block.parentNode ? block.parentNode.className : '')).split(/\s+/); classes = classes.map(function(c) {return c.replace(/^language-/, '');}); return classes.filter(function(c) {return getLanguage(c) || c == 'no-highlight';})[0]; } function inherit(parent, obj) { var result = {}; for (var key in parent) result[key] = parent[key]; if (obj) for (var key in obj) result[key] = obj[key]; return result; }; /* Stream merging */ function nodeStream(node) { var result = []; (function _nodeStream(node, offset) { for (var child = node.firstChild; child; child = child.nextSibling) { if (child.nodeType == 3) offset += child.nodeValue.length; else if (tag(child) == 'br') offset += 1; else if (child.nodeType == 1) { result.push({ event: 'start', offset: offset, node: child }); offset = _nodeStream(child, offset); result.push({ event: 'stop', offset: offset, node: child }); } } return offset; })(node, 0); return result; } function mergeStreams(original, highlighted, value) { var processed = 0; var result = ''; var nodeStack = []; function selectStream() { if (!original.length || !highlighted.length) { return original.length ? original : highlighted; } if (original[0].offset != highlighted[0].offset) { return (original[0].offset < highlighted[0].offset) ? original : highlighted; } /* To avoid starting the stream just before it should stop the order is ensured that original always starts first and closes last: if (event1 == 'start' && event2 == 'start') return original; if (event1 == 'start' && event2 == 'stop') return highlighted; if (event1 == 'stop' && event2 == 'start') return original; if (event1 == 'stop' && event2 == 'stop') return highlighted; ... which is collapsed to: */ return highlighted[0].event == 'start' ? original : highlighted; } function open(node) { function attr_str(a) {return ' ' + a.nodeName + '="' + escape(a.value) + '"';} result += '<' + tag(node) + Array.prototype.map.call(node.attributes, attr_str).join('') + '>'; } function close(node) { result += '</' + tag(node) + '>'; } function render(event) { (event.event == 'start' ? open : close)(event.node); } while (original.length || highlighted.length) { var stream = selectStream(); result += escape(value.substr(processed, stream[0].offset - processed)); processed = stream[0].offset; if (stream == original) { /* On any opening or closing tag of the original markup we first close the entire highlighted node stack, then render the original tag along with all the following original tags at the same offset and then reopen all the tags on the highlighted stack. */ nodeStack.reverse().forEach(close); do { render(stream.splice(0, 1)[0]); stream = selectStream(); } while (stream == original && stream.length && stream[0].offset == processed); nodeStack.reverse().forEach(open); } else { if (stream[0].event == 'start') { nodeStack.push(stream[0].node); } else { nodeStack.pop(); } render(stream.splice(0, 1)[0]); } } return result + escape(value.substr(processed)); } /* Initialization */ function compileLanguage(language) { function reStr(re) { return (re && re.source) || re; } function langRe(value, global) { return RegExp( reStr(value), 'm' + (language.case_insensitive ? 'i' : '') + (global ? 'g' : '') ); } function compileMode(mode, parent) { if (mode.compiled) return; mode.compiled = true; mode.keywords = mode.keywords || mode.beginKeywords; if (mode.keywords) { var compiled_keywords = {}; function flatten(className, str) { if (language.case_insensitive) { str = str.toLowerCase(); } str.split(' ').forEach(function(kw) { var pair = kw.split('|'); compiled_keywords[pair[0]] = [className, pair[1] ? Number(pair[1]) : 1]; }); } if (typeof mode.keywords == 'string') { // string flatten('keyword', mode.keywords); } else { Object.keys(mode.keywords).forEach(function (className) { flatten(className, mode.keywords[className]); }); } mode.keywords = compiled_keywords; } mode.lexemesRe = langRe(mode.lexemes || /\b[A-Za-z0-9_]+\b/, true); if (parent) { if (mode.beginKeywords) { mode.begin = mode.beginKeywords.split(' ').join('|'); } if (!mode.begin) mode.begin = /\B|\b/; mode.beginRe = langRe(mode.begin); if (!mode.end && !mode.endsWithParent) mode.end = /\B|\b/; if (mode.end) mode.endRe = langRe(mode.end); mode.terminator_end = reStr(mode.end) || ''; if (mode.endsWithParent && parent.terminator_end) mode.terminator_end += (mode.end ? '|' : '') + parent.terminator_end; } if (mode.illegal) mode.illegalRe = langRe(mode.illegal); if (mode.relevance === undefined) mode.relevance = 1; if (!mode.contains) { mode.contains = []; } var expanded_contains = []; mode.contains.forEach(function(c) { if (c.variants) { c.variants.forEach(function(v) {expanded_contains.push(inherit(c, v));}); } else { expanded_contains.push(c == 'self' ? mode : c); } }); mode.contains = expanded_contains; mode.contains.forEach(function(c) {compileMode(c, mode);}); if (mode.starts) { compileMode(mode.starts, parent); } var terminators = mode.contains.map(function(c) { return c.beginKeywords ? '\\.?\\b(' + c.begin + ')\\b\\.?' : c.begin; }) .concat([mode.terminator_end]) .concat([mode.illegal]) .map(reStr) .filter(Boolean); mode.terminators = terminators.length ? langRe(terminators.join('|'), true) : {exec: function(s) {return null;}}; mode.continuation = {}; } compileMode(language); } /* Core highlighting function. Accepts a language name, or an alias, and a string with the code to highlight. Returns an object with the following properties: - relevance (int) - value (an HTML string with highlighting markup) */ function highlight(name, value, ignore_illegals, continuation) { function subMode(lexeme, mode) { for (var i = 0; i < mode.contains.length; i++) { if (testRe(mode.contains[i].beginRe, lexeme)) { return mode.contains[i]; } } } function endOfMode(mode, lexeme) { if (testRe(mode.endRe, lexeme)) { return mode; } if (mode.endsWithParent) { return endOfMode(mode.parent, lexeme); } } function isIllegal(lexeme, mode) { return !ignore_illegals && testRe(mode.illegalRe, lexeme); } function keywordMatch(mode, match) { var match_str = language.case_insensitive ? match[0].toLowerCase() : match[0]; return mode.keywords.hasOwnProperty(match_str) && mode.keywords[match_str]; } function buildSpan(classname, insideSpan, leaveOpen, noPrefix) { var classPrefix = noPrefix ? '' : options.classPrefix, openSpan = '<span class="' + classPrefix, closeSpan = leaveOpen ? '' : '</span>'; openSpan += classname + '">'; return openSpan + insideSpan + closeSpan; } function processKeywords() { var buffer = escape(mode_buffer); if (!top.keywords) return buffer; var result = ''; var last_index = 0; top.lexemesRe.lastIndex = 0; var match = top.lexemesRe.exec(buffer); while (match) { result += buffer.substr(last_index, match.index - last_index); var keyword_match = keywordMatch(top, match); if (keyword_match) { relevance += keyword_match[1]; result += buildSpan(keyword_match[0], match[0]); } else { result += match[0]; } last_index = top.lexemesRe.lastIndex; match = top.lexemesRe.exec(buffer); } return result + buffer.substr(last_index); } function processSubLanguage() { if (top.subLanguage && !languages[top.subLanguage]) { return escape(mode_buffer); } var result = top.subLanguage ? highlight(top.subLanguage, mode_buffer, true, top.continuation.top) : highlightAuto(mode_buffer); // Counting embedded language score towards the host language may be disabled // with zeroing the containing mode relevance. Usecase in point is Markdown that // allows XML everywhere and makes every XML snippet to have a much larger Markdown // score. if (top.relevance > 0) { relevance += result.relevance; } if (top.subLanguageMode == 'continuous') { top.continuation.top = result.top; } return buildSpan(result.language, result.value, false, true); } function processBuffer() { return top.subLanguage !== undefined ? processSubLanguage() : processKeywords(); } function startNewMode(mode, lexeme) { var markup = mode.className? buildSpan(mode.className, '', true): ''; if (mode.returnBegin) { result += markup; mode_buffer = ''; } else if (mode.excludeBegin) { result += escape(lexeme) + markup; mode_buffer = ''; } else { result += markup; mode_buffer = lexeme; } top = Object.create(mode, {parent: {value: top}}); } function processLexeme(buffer, lexeme) { mode_buffer += buffer; if (lexeme === undefined) { result += processBuffer(); return 0; } var new_mode = subMode(lexeme, top); if (new_mode) { result += processBuffer(); startNewMode(new_mode, lexeme); return new_mode.returnBegin ? 0 : lexeme.length; } var end_mode = endOfMode(top, lexeme); if (end_mode) { var origin = top; if (!(origin.returnEnd || origin.excludeEnd)) { mode_buffer += lexeme; } result += processBuffer(); do { if (top.className) { result += '</span>'; } relevance += top.relevance; top = top.parent; } while (top != end_mode.parent); if (origin.excludeEnd) { result += escape(lexeme); } mode_buffer = ''; if (end_mode.starts) { startNewMode(end_mode.starts, ''); } return origin.returnEnd ? 0 : lexeme.length; } if (isIllegal(lexeme, top)) throw new Error('Illegal lexeme "' + lexeme + '" for mode "' + (top.className || '<unnamed>') + '"'); /* Parser should not reach this point as all types of lexemes should be caught earlier, but if it does due to some bug make sure it advances at least one character forward to prevent infinite looping. */ mode_buffer += lexeme; return lexeme.length || 1; } var language = getLanguage(name); if (!language) { throw new Error('Unknown language: "' + name + '"'); } compileLanguage(language); var top = continuation || language; var result = ''; for(var current = top; current != language; current = current.parent) { if (current.className) { result = buildSpan(current.className, result, true); } } var mode_buffer = ''; var relevance = 0; try { var match, count, index = 0; while (true) { top.terminators.lastIndex = index; match = top.terminators.exec(value); if (!match) break; count = processLexeme(value.substr(index, match.index - index), match[0]); index = match.index + count; } processLexeme(value.substr(index)); for(var current = top; current.parent; current = current.parent) { // close dangling modes if (current.className) { result += '</span>'; } }; return { relevance: relevance, value: result, language: name, top: top }; } catch (e) { if (e.message.indexOf('Illegal') != -1) { return { relevance: 0, value: escape(value) }; } else { throw e; } } } /* Highlighting with language detection. Accepts a string with the code to highlight. Returns an object with the following properties: - language (detected language) - relevance (int) - value (an HTML string with highlighting markup) - second_best (object with the same structure for second-best heuristically detected language, may be absent) */ function highlightAuto(text, languageSubset) { languageSubset = languageSubset || options.languages || Object.keys(languages); var result = { relevance: 0, value: escape(text) }; var second_best = result; languageSubset.forEach(function(name) { if (!getLanguage(name)) { return; } var current = highlight(name, text, false); current.language = name; if (current.relevance > second_best.relevance) { second_best = current; } if (current.relevance > result.relevance) { second_best = result; result = current; } }); if (second_best.language) { result.second_best = second_best; } return result; } /* Post-processing of the highlighted markup: - replace TABs with something more useful - replace real line-breaks with '<br>' for non-pre containers */ function fixMarkup(value) { if (options.tabReplace) { value = value.replace(/^((<[^>]+>|\t)+)/gm, function(match, p1, offset, s) { return p1.replace(/\t/g, options.tabReplace); }); } if (options.useBR) { value = value.replace(/\n/g, '<br>'); } return value; } /* Applies highlighting to a DOM node containing code. Accepts a DOM node and two optional parameters for fixMarkup. */ function highlightBlock(block) { var text = blockText(block); var language = blockLanguage(block); if (language == 'no-highlight') return; var result = language ? highlight(language, text, true) : highlightAuto(text); var original = nodeStream(block); if (original.length) { var pre = document.createElementNS('http://www.w3.org/1999/xhtml', 'pre'); pre.innerHTML = result.value; result.value = mergeStreams(original, nodeStream(pre), text); } result.value = fixMarkup(result.value); block.innerHTML = result.value; block.className += ' hljs ' + (!language && result.language || ''); block.result = { language: result.language, re: result.relevance }; if (result.second_best) { block.second_best = { language: result.second_best.language, re: result.second_best.relevance }; } } var options = { classPrefix: 'hljs-', tabReplace: null, useBR: false, languages: undefined }; /* Updates highlight.js global options with values passed in the form of an object */ function configure(user_options) { options = inherit(options, user_options); } /* Applies highlighting to all <pre><code>..</code></pre> blocks on a page. */ function initHighlighting() { if (initHighlighting.called) return; initHighlighting.called = true; var blocks = document.querySelectorAll('pre code'); Array.prototype.forEach.call(blocks, highlightBlock); } /* Attaches highlighting to the page load event. */ function initHighlightingOnLoad() { addEventListener('DOMContentLoaded', initHighlighting, false); addEventListener('load', initHighlighting, false); } var languages = {}; var aliases = {}; function registerLanguage(name, language) { var lang = languages[name] = language(this); if (lang.aliases) { lang.aliases.forEach(function(alias) {aliases[alias] = name;}); } } function getLanguage(name) { return languages[name] || languages[aliases[name]]; } /* Interface definition */ this.highlight = highlight; this.highlightAuto = highlightAuto; this.fixMarkup = fixMarkup; this.highlightBlock = highlightBlock; this.configure = configure; this.initHighlighting = initHighlighting; this.initHighlightingOnLoad = initHighlightingOnLoad; this.registerLanguage = registerLanguage; this.getLanguage = getLanguage; this.inherit = inherit; // Common regexps this.IDENT_RE = '[a-zA-Z][a-zA-Z0-9_]*'; this.UNDERSCORE_IDENT_RE = '[a-zA-Z_][a-zA-Z0-9_]*'; this.NUMBER_RE = '\\b\\d+(\\.\\d+)?'; this.C_NUMBER_RE = '(\\b0[xX][a-fA-F0-9]+|(\\b\\d+(\\.\\d*)?|\\.\\d+)([eE][-+]?\\d+)?)'; // 0x..., 0..., decimal, float this.BINARY_NUMBER_RE = '\\b(0b[01]+)'; // 0b... this.RE_STARTERS_RE = '!|!=|!==|%|%=|&|&&|&=|\\*|\\*=|\\+|\\+=|,|-|-=|/=|/|:|;|<<|<<=|<=|<|===|==|=|>>>=|>>=|>=|>>>|>>|>|\\?|\\[|\\{|\\(|\\^|\\^=|\\||\\|=|\\|\\||~'; // Common modes this.BACKSLASH_ESCAPE = { begin: '\\\\[\\s\\S]', relevance: 0 }; this.APOS_STRING_MODE = { className: 'string', begin: '\'', end: '\'', illegal: '\\n', contains: [this.BACKSLASH_ESCAPE] }; this.QUOTE_STRING_MODE = { className: 'string', begin: '"', end: '"', illegal: '\\n', contains: [this.BACKSLASH_ESCAPE] }; this.C_LINE_COMMENT_MODE = { className: 'comment', begin: '//', end: '$' }; this.C_BLOCK_COMMENT_MODE = { className: 'comment', begin: '/\\*', end: '\\*/' }; this.HASH_COMMENT_MODE = { className: 'comment', begin: '#', end: '$' }; this.NUMBER_MODE = { className: 'number', begin: this.NUMBER_RE, relevance: 0 }; this.C_NUMBER_MODE = { className: 'number', begin: this.C_NUMBER_RE, relevance: 0 }; this.BINARY_NUMBER_MODE = { className: 'number', begin: this.BINARY_NUMBER_RE, relevance: 0 }; this.REGEXP_MODE = { className: 'regexp', begin: /\//, end: /\/[gim]*/, illegal: /\n/, contains: [ this.BACKSLASH_ESCAPE, { begin: /\[/, end: /\]/, relevance: 0, contains: [this.BACKSLASH_ESCAPE] } ] }; this.TITLE_MODE = { className: 'title', begin: this.IDENT_RE, relevance: 0 }; this.UNDERSCORE_TITLE_MODE = { className: 'title', begin: this.UNDERSCORE_IDENT_RE, relevance: 0 }; }; module.exports = Highlight;