UNPKG

highlight.js

Version:

Syntax highlighting with language autodetection.

242 lines (238 loc) 7 kB
/* Language: HTML, XML Website: https://www.w3.org/XML/ Category: common, web Audit: 2020 */ /** @type LanguageFn */ function xml(hljs) { const regex = hljs.regex; // XML names can have the following additional letters: https://www.w3.org/TR/xml/#NT-NameChar // OTHER_NAME_CHARS = /[:\-.0-9\u00B7\u0300-\u036F\u203F-\u2040]/; // Element names start with NAME_START_CHAR followed by optional other Unicode letters, ASCII digits, hyphens, underscores, and periods // const TAG_NAME_RE = regex.concat(/[A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/, regex.optional(/[A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\-.0-9\u00B7\u0300-\u036F\u203F-\u2040]*:/), /[A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\-.0-9\u00B7\u0300-\u036F\u203F-\u2040]*/);; // const XML_IDENT_RE = /[A-Z_a-z:\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\-.0-9\u00B7\u0300-\u036F\u203F-\u2040]+/; // const TAG_NAME_RE = regex.concat(/[A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/, regex.optional(/[A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\-.0-9\u00B7\u0300-\u036F\u203F-\u2040]*:/), /[A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\-.0-9\u00B7\u0300-\u036F\u203F-\u2040]*/); // however, to cater for performance and more Unicode support rely simply on the Unicode letter class const TAG_NAME_RE = regex.concat(/[\p{L}_]/u, regex.optional(/[\p{L}0-9_.-]*:/u), /[\p{L}0-9_.-]*/u); const XML_IDENT_RE = /[\p{L}0-9._:-]+/u; const XML_ENTITIES = { className: 'symbol', begin: /&[a-z]+;|&#[0-9]+;|&#x[a-f0-9]+;/ }; const XML_META_KEYWORDS = { begin: /\s/, contains: [ { className: 'keyword', begin: /#?[a-z_][a-z1-9_-]+/, illegal: /\n/ } ] }; const XML_META_PAR_KEYWORDS = hljs.inherit(XML_META_KEYWORDS, { begin: /\(/, end: /\)/ }); const APOS_META_STRING_MODE = hljs.inherit(hljs.APOS_STRING_MODE, { className: 'string' }); const QUOTE_META_STRING_MODE = hljs.inherit(hljs.QUOTE_STRING_MODE, { className: 'string' }); const TAG_INTERNALS = { endsWithParent: true, illegal: /</, relevance: 0, contains: [ { className: 'attr', begin: XML_IDENT_RE, relevance: 0 }, { begin: /=\s*/, relevance: 0, contains: [ { className: 'string', endsParent: true, variants: [ { begin: /"/, end: /"/, contains: [ XML_ENTITIES ] }, { begin: /'/, end: /'/, contains: [ XML_ENTITIES ] }, { begin: /[^\s"'=<>`]+/ } ] } ] } ] }; return { name: 'HTML, XML', aliases: [ 'html', 'xhtml', 'rss', 'atom', 'xjb', 'xsd', 'xsl', 'plist', 'wsf', 'svg' ], case_insensitive: true, unicodeRegex: true, contains: [ { className: 'meta', begin: /<![a-z]/, end: />/, relevance: 10, contains: [ XML_META_KEYWORDS, QUOTE_META_STRING_MODE, APOS_META_STRING_MODE, XML_META_PAR_KEYWORDS, { begin: /\[/, end: /\]/, contains: [ { className: 'meta', begin: /<![a-z]/, end: />/, contains: [ XML_META_KEYWORDS, XML_META_PAR_KEYWORDS, QUOTE_META_STRING_MODE, APOS_META_STRING_MODE ] } ] } ] }, hljs.COMMENT( /<!--/, /-->/, { relevance: 10 } ), { begin: /<!\[CDATA\[/, end: /\]\]>/, relevance: 10 }, XML_ENTITIES, // xml processing instructions { className: 'meta', end: /\?>/, variants: [ { begin: /<\?xml/, relevance: 10, contains: [ QUOTE_META_STRING_MODE ] }, { begin: /<\?[a-z][a-z0-9]+/, } ] }, { className: 'tag', /* The lookahead pattern (?=...) ensures that 'begin' only matches '<style' as a single word, followed by a whitespace or an ending bracket. */ begin: /<style(?=\s|>)/, end: />/, keywords: { name: 'style' }, contains: [ TAG_INTERNALS ], starts: { end: /<\/style>/, returnEnd: true, subLanguage: [ 'css', 'xml' ] } }, { className: 'tag', // See the comment in the <style tag about the lookahead pattern begin: /<script(?=\s|>)/, end: />/, keywords: { name: 'script' }, contains: [ TAG_INTERNALS ], starts: { end: /<\/script>/, returnEnd: true, subLanguage: [ 'javascript', 'handlebars', 'xml' ] } }, // we need this for now for jSX { className: 'tag', begin: /<>|<\/>/ }, // open tag { className: 'tag', begin: regex.concat( /</, regex.lookahead(regex.concat( TAG_NAME_RE, // <tag/> // <tag> // <tag ... regex.either(/\/>/, />/, /\s/) )) ), end: /\/?>/, contains: [ { className: 'name', begin: TAG_NAME_RE, relevance: 0, starts: TAG_INTERNALS } ] }, // close tag { className: 'tag', begin: regex.concat( /<\//, regex.lookahead(regex.concat( TAG_NAME_RE, />/ )) ), contains: [ { className: 'name', begin: TAG_NAME_RE, relevance: 0 }, { begin: />/, relevance: 0, endsParent: true } ] } ] }; } module.exports = xml;