UNPKG

refractor

Version:

Lightweight, robust, elegant virtual syntax highlighting using Prism

github.com/wooorm/refractor

wooorm/refractor

769 lines (687 loc) • 22.9 kB

JavaScript

// @ts-nocheck // This is a slimmed down version of `prism-core.js`, to remove globals, // document, workers, `util.encode`, `Token.stringify` // Private helper vars var lang = /(?:^|\s)lang(?:uage)?-([\w-]+)(?=\s|$)/i var uniqueId = 0 // The grammar object for plaintext var plainTextGrammar = {} var _ = { /** * A namespace for utility methods. * * All function in this namespace that are not explicitly marked as _public_ are for __internal use only__ and may * change or disappear at any time. * * @namespace * @memberof Prism */ util: { /** * Returns the name of the type of the given value. * * @param {any} o * @returns {string} * @example * type(null) === 'Null' * type(undefined) === 'Undefined' * type(123) === 'Number' * type('foo') === 'String' * type(true) === 'Boolean' * type([1, 2]) === 'Array' * type({}) === 'Object' * type(String) === 'Function' * type(/abc+/) === 'RegExp' */ type: function (o) { return Object.prototype.toString.call(o).slice(8, -1) }, /** * Returns a unique number for the given object. Later calls will still return the same number. * * @param {Object} obj * @returns {number} */ objId: function (obj) { if (!obj['__id']) { Object.defineProperty(obj, '__id', {value: ++uniqueId}) } return obj['__id'] }, /** * Creates a deep clone of the given object. * * The main intended use of this function is to clone language definitions. * * @param {T} o * @param {Record<number, any>} [visited] * @returns {T} * @template T */ clone: function deepClone(o, visited) { visited = visited || {} var clone var id switch (_.util.type(o)) { case 'Object': id = _.util.objId(o) if (visited[id]) { return visited[id] } clone = /** @type {Record<string, any>} */ ({}) visited[id] = clone for (var key in o) { if (o.hasOwnProperty(key)) { clone[key] = deepClone(o[key], visited) } } return /** @type {any} */ (clone) case 'Array': id = _.util.objId(o) if (visited[id]) { return visited[id] } clone = [] visited[id] = clone ;/** @type {Array} */ (/** @type {any} */ (o)).forEach( function (v, i) { clone[i] = deepClone(v, visited) } ) return /** @type {any} */ (clone) default: return o } } }, /** * This namespace contains all currently loaded languages and the some helper functions to create and modify languages. * * @namespace * @memberof Prism * @public */ languages: { /** * The grammar for plain, unformatted text. */ plain: plainTextGrammar, plaintext: plainTextGrammar, text: plainTextGrammar, txt: plainTextGrammar, /** * Creates a deep copy of the language with the given id and appends the given tokens. * * If a token in `redef` also appears in the copied language, then the existing token in the copied language * will be overwritten at its original position. * * ## Best practices * * Since the position of overwriting tokens (token in `redef` that overwrite tokens in the copied language) * doesn't matter, they can technically be in any order. However, this can be confusing to others that trying to * understand the language definition because, normally, the order of tokens matters in Prism grammars. * * Therefore, it is encouraged to order overwriting tokens according to the positions of the overwritten tokens. * Furthermore, all non-overwriting tokens should be placed after the overwriting ones. * * @param {string} id The id of the language to extend. This has to be a key in `Prism.languages`. * @param {Grammar} redef The new tokens to append. * @returns {Grammar} The new language created. * @public * @example * Prism.languages['css-with-colors'] = Prism.languages.extend('css', { * // Prism.languages.css already has a 'comment' token, so this token will overwrite CSS' 'comment' token * // at its original position * 'comment': { ... }, * // CSS doesn't have a 'color' token, so this token will be appended * 'color': /\b(?:red|green|blue)\b/ * }); */ extend: function (id, redef) { var lang = _.util.clone(_.languages[id]) for (var key in redef) { lang[key] = redef[key] } return lang }, /** * Inserts tokens _before_ another token in a language definition or any other grammar. * * ## Usage * * This helper method makes it easy to modify existing languages. For example, the CSS language definition * not only defines CSS highlighting for CSS documents, but also needs to define highlighting for CSS embedded * in HTML through `<style>` elements. To do this, it needs to modify `Prism.languages.markup` and add the * appropriate tokens. However, `Prism.languages.markup` is a regular JavaScript object literal, so if you do * this: * * ```js * Prism.languages.markup.style = { * // token * }; * ``` * * then the `style` token will be added (and processed) at the end. `insertBefore` allows you to insert tokens * before existing tokens. For the CSS example above, you would use it like this: * * ```js * Prism.languages.insertBefore('markup', 'cdata', { * 'style': { * // token * } * }); * ``` * * ## Special cases * * If the grammars of `inside` and `insert` have tokens with the same name, the tokens in `inside`'s grammar * will be ignored. * * This behavior can be used to insert tokens after `before`: * * ```js * Prism.languages.insertBefore('markup', 'comment', { * 'comment': Prism.languages.markup.comment, * // tokens after 'comment' * }); * ``` * * ## Limitations * * The main problem `insertBefore` has to solve is iteration order. Since ES2015, the iteration order for object * properties is guaranteed to be the insertion order (except for integer keys) but some browsers behave * differently when keys are deleted and re-inserted. So `insertBefore` can't be implemented by temporarily * deleting properties which is necessary to insert at arbitrary positions. * * To solve this problem, `insertBefore` doesn't actually insert the given tokens into the target object. * Instead, it will create a new object and replace all references to the target object with the new one. This * can be done without temporarily deleting properties, so the iteration order is well-defined. * * However, only references that can be reached from `Prism.languages` or `insert` will be replaced. I.e. if * you hold the target object in a variable, then the value of the variable will not change. * * ```js * var oldMarkup = Prism.languages.markup; * var newMarkup = Prism.languages.insertBefore('markup', 'comment', { ... }); * * assert(oldMarkup !== Prism.languages.markup); * assert(newMarkup === Prism.languages.markup); * ``` * * @param {string} inside The property of `root` (e.g. a language id in `Prism.languages`) that contains the * object to be modified. * @param {string} before The key to insert before. * @param {Grammar} insert An object containing the key-value pairs to be inserted. * @param {Object<string, any>} [root] The object containing `inside`, i.e. the object that contains the * object to be modified. * * Defaults to `Prism.languages`. * @returns {Grammar} The new grammar object. * @public */ insertBefore: function (inside, before, insert, root) { root = root || /** @type {any} */ (_.languages) var grammar = root[inside] /** @type {Grammar} */ var ret = {} for (var token in grammar) { if (grammar.hasOwnProperty(token)) { if (token == before) { for (var newToken in insert) { if (insert.hasOwnProperty(newToken)) { ret[newToken] = insert[newToken] } } } // Do not insert token which also occur in insert. See #1525 if (!insert.hasOwnProperty(token)) { ret[token] = grammar[token] } } } var old = root[inside] root[inside] = ret // Update references in other language definitions _.languages.DFS(_.languages, function (key, value) { if (value === old && key != inside) { this[key] = ret } }) return ret }, // Traverse a language definition with Depth First Search DFS: function DFS(o, callback, type, visited) { visited = visited || {} var objId = _.util.objId for (var i in o) { if (o.hasOwnProperty(i)) { callback.call(o, i, o[i], type || i) var property = o[i] var propertyType = _.util.type(property) if (propertyType === 'Object' && !visited[objId(property)]) { visited[objId(property)] = true DFS(property, callback, null, visited) } else if (propertyType === 'Array' && !visited[objId(property)]) { visited[objId(property)] = true DFS(property, callback, i, visited) } } } } }, plugins: {}, /** * Low-level function, only use if you know what you’re doing. It accepts a string of text as input * and the language definitions to use, and returns a string with the HTML produced. * * The following hooks will be run: * 1. `before-tokenize` * 2. `after-tokenize` * 3. `wrap`: On each {@link Token}. * * @param {string} text A string with the code to be highlighted. * @param {Grammar} grammar An object containing the tokens to use. * * Usually a language definition like `Prism.languages.markup`. * @param {string} language The name of the language definition passed to `grammar`. * @returns {string} The highlighted HTML. * @memberof Prism * @public * @example * Prism.highlight('var foo = true;', Prism.languages.javascript, 'javascript'); */ highlight: function (text, grammar, language) { var env = { code: text, grammar: grammar, language: language } _.hooks.run('before-tokenize', env) if (!env.grammar) { throw new Error('The language "' + env.language + '" has no grammar.') } env.tokens = _.tokenize(env.code, env.grammar) _.hooks.run('after-tokenize', env) return Token.stringify(_.util.encode(env.tokens), env.language) }, /** * This is the heart of Prism, and the most low-level function you can use. It accepts a string of text as input * and the language definitions to use, and returns an array with the tokenized code. * * When the language definition includes nested tokens, the function is called recursively on each of these tokens. * * This method could be useful in other contexts as well, as a very crude parser. * * @param {string} text A string with the code to be highlighted. * @param {Grammar} grammar An object containing the tokens to use. * * Usually a language definition like `Prism.languages.markup`. * @returns {TokenStream} An array of strings and tokens, a token stream. * @memberof Prism * @public * @example * let code = `var foo = 0;`; * let tokens = Prism.tokenize(code, Prism.languages.javascript); * tokens.forEach(token => { * if (token instanceof Prism.Token && token.type === 'number') { * console.log(`Found numeric literal: ${token.content}`); * } * }); */ tokenize: function (text, grammar) { var rest = grammar.rest if (rest) { for (var token in rest) { grammar[token] = rest[token] } delete grammar.rest } var tokenList = new LinkedList() addAfter(tokenList, tokenList.head, text) matchGrammar(text, tokenList, grammar, tokenList.head, 0) return toArray(tokenList) }, /** * @namespace * @memberof Prism * @public */ hooks: { all: {}, /** * Adds the given callback to the list of callbacks for the given hook. * * The callback will be invoked when the hook it is registered for is run. * Hooks are usually directly run by a highlight function but you can also run hooks yourself. * * One callback function can be registered to multiple hooks and the same hook multiple times. * * @param {string} name The name of the hook. * @param {HookCallback} callback The callback function which is given environment variables. * @public */ add: function (name, callback) { var hooks = _.hooks.all hooks[name] = hooks[name] || [] hooks[name].push(callback) }, /** * Runs a hook invoking all registered callbacks with the given environment variables. * * Callbacks will be invoked synchronously and in the order in which they were registered. * * @param {string} name The name of the hook. * @param {Object<string, any>} env The environment variables of the hook passed to all callbacks registered. * @public */ run: function (name, env) { var callbacks = _.hooks.all[name] if (!callbacks || !callbacks.length) { return } for (var i = 0, callback; (callback = callbacks[i++]); ) { callback(env) } } }, Token: Token } // Typescript note: // The following can be used to import the Token type in JSDoc: // // @typedef {InstanceType<import("./prism-core")["Token"]>} Token /** * Creates a new token. * * @param {string} type See {@link Token#type type} * @param {string | TokenStream} content See {@link Token#content content} * @param {string|string[]} [alias] The alias(es) of the token. * @param {string} [matchedStr=""] A copy of the full string this token was created from. * @class * @global * @public */ function Token(type, content, alias, matchedStr) { /** * The type of the token. * * This is usually the key of a pattern in a {@link Grammar}. * * @type {string} * @see GrammarToken * @public */ this.type = type /** * The strings or tokens contained by this token. * * This will be a token stream if the pattern matched also defined an `inside` grammar. * * @type {string | TokenStream} * @public */ this.content = content /** * The alias(es) of the token. * * @type {string|string[]} * @see GrammarToken * @public */ this.alias = alias // Copy of the full string this token was created from this.length = (matchedStr || '').length | 0 } /** * A token stream is an array of strings and {@link Token Token} objects. * * Token streams have to fulfill a few properties that are assumed by most functions (mostly internal ones) that process * them. * * 1. No adjacent strings. * 2. No empty strings. * * The only exception here is the token stream that only contains the empty string and nothing else. * * @typedef {Array<string | Token>} TokenStream * @global * @public */ /** * @param {RegExp} pattern * @param {number} pos * @param {string} text * @param {boolean} lookbehind * @returns {RegExpExecArray | null} */ function matchPattern(pattern, pos, text, lookbehind) { pattern.lastIndex = pos var match = pattern.exec(text) if (match && lookbehind && match[1]) { // change the match to remove the text matched by the Prism lookbehind group var lookbehindLength = match[1].length match.index += lookbehindLength match[0] = match[0].slice(lookbehindLength) } return match } /** * @param {string} text * @param {LinkedList<string | Token>} tokenList * @param {any} grammar * @param {LinkedListNode<string | Token>} startNode * @param {number} startPos * @param {RematchOptions} [rematch] * @returns {void} * @private * * @typedef RematchOptions * @property {string} cause * @property {number} reach */ function matchGrammar(text, tokenList, grammar, startNode, startPos, rematch) { for (var token in grammar) { if (!grammar.hasOwnProperty(token) || !grammar[token]) { continue } var patterns = grammar[token] patterns = Array.isArray(patterns) ? patterns : [patterns] for (var j = 0; j < patterns.length; ++j) { if (rematch && rematch.cause == token + ',' + j) { return } var patternObj = patterns[j] var inside = patternObj.inside var lookbehind = !!patternObj.lookbehind var greedy = !!patternObj.greedy var alias = patternObj.alias if (greedy && !patternObj.pattern.global) { // Without the global flag, lastIndex won't work var flags = patternObj.pattern.toString().match(/[imsuy]*$/)[0] patternObj.pattern = RegExp(patternObj.pattern.source, flags + 'g') } /** @type {RegExp} */ var pattern = patternObj.pattern || patternObj for ( // iterate the token list and keep track of the current token/string position var currentNode = startNode.next, pos = startPos; currentNode !== tokenList.tail; pos += currentNode.value.length, currentNode = currentNode.next ) { if (rematch && pos >= rematch.reach) { break } var str = currentNode.value if (tokenList.length > text.length) { // Something went terribly wrong, ABORT, ABORT! return } if (str instanceof Token) { continue } var removeCount = 1 // this is the to parameter of removeBetween var match if (greedy) { match = matchPattern(pattern, pos, text, lookbehind) if (!match || match.index >= text.length) { break } var from = match.index var to = match.index + match[0].length var p = pos // find the node that contains the match p += currentNode.value.length while (from >= p) { currentNode = currentNode.next p += currentNode.value.length } // adjust pos (and p) p -= currentNode.value.length pos = p // the current node is a Token, then the match starts inside another Token, which is invalid if (currentNode.value instanceof Token) { continue } // find the last node which is affected by this match for ( var k = currentNode; k !== tokenList.tail && (p < to || typeof k.value === 'string'); k = k.next ) { removeCount++ p += k.value.length } removeCount-- // replace with the new match str = text.slice(pos, p) match.index -= pos } else { match = matchPattern(pattern, 0, str, lookbehind) if (!match) { continue } } // eslint-disable-next-line no-redeclare var from = match.index var matchStr = match[0] var before = str.slice(0, from) var after = str.slice(from + matchStr.length) var reach = pos + str.length if (rematch && reach > rematch.reach) { rematch.reach = reach } var removeFrom = currentNode.prev if (before) { removeFrom = addAfter(tokenList, removeFrom, before) pos += before.length } removeRange(tokenList, removeFrom, removeCount) var wrapped = new Token( token, inside ? _.tokenize(matchStr, inside) : matchStr, alias, matchStr ) currentNode = addAfter(tokenList, removeFrom, wrapped) if (after) { addAfter(tokenList, currentNode, after) } if (removeCount > 1) { // at least one Token object was removed, so we have to do some rematching // this can only happen if the current pattern is greedy /** @type {RematchOptions} */ var nestedRematch = { cause: token + ',' + j, reach: reach } matchGrammar( text, tokenList, grammar, currentNode.prev, pos, nestedRematch ) // the reach might have been extended because of the rematching if (rematch && nestedRematch.reach > rematch.reach) { rematch.reach = nestedRematch.reach } } } } } } /** * @typedef LinkedListNode * @property {T} value * @property {LinkedListNode<T> | null} prev The previous node. * @property {LinkedListNode<T> | null} next The next node. * @template T * @private */ /** * @template T * @private */ function LinkedList() { /** @type {LinkedListNode<T>} */ var head = {value: null, prev: null, next: null} /** @type {LinkedListNode<T>} */ var tail = {value: null, prev: head, next: null} head.next = tail /** @type {LinkedListNode<T>} */ this.head = head /** @type {LinkedListNode<T>} */ this.tail = tail this.length = 0 } /** * Adds a new node with the given value to the list. * * @param {LinkedList<T>} list * @param {LinkedListNode<T>} node * @param {T} value * @returns {LinkedListNode<T>} The added node. * @template T */ function addAfter(list, node, value) { // assumes that node != list.tail && values.length >= 0 var next = node.next var newNode = {value: value, prev: node, next: next} node.next = newNode next.prev = newNode list.length++ return newNode } /** * Removes `count` nodes after the given node. The given node will not be removed. * * @param {LinkedList<T>} list * @param {LinkedListNode<T>} node * @param {number} count * @template T */ function removeRange(list, node, count) { var next = node.next for (var i = 0; i < count && next !== list.tail; i++) { next = next.next } node.next = next next.prev = node list.length -= i } /** * @param {LinkedList<T>} list * @returns {T[]} * @template T */ function toArray(list) { var array = [] var node = list.head.next while (node !== list.tail) { array.push(node.value) node = node.next } return array } export const Prism = _