UNPKG

@torchlight-api/torchlight-cli

Version:

A CLI for Torchlight - the syntax highlighting API

205 lines (165 loc) • 6.04 kB

JavaScript

import path from 'path' import torchlight from '../torchlight.js' import Block from '../block.js' import cheerio from 'cheerio' import chokidar from 'chokidar' import log from '../support/log.js' import fs from 'fs-extra' import { bus, FILE_WATCHING_COMPLETE } from '../support/bus.js' export default function (torchlight, options) { options = { input: torchlight.config('highlight.input', ''), output: torchlight.config('highlight.output', ''), include: torchlight.config('highlight.includeGlobs', ['**/*.htm', '**/*.html']), exclude: torchlight.config('highlight.excludePatterns', ['/node_modules/', '/vendor/']), watch: false, ...options } if (options.watch) { log.info(` *************************************** * Torchlight is watching files... * *************************************** `) } const input = path.resolve(options.input) const output = path.resolve(options.output || options.input) const watcher = chokidar.watch(normalizeStringedArray(options.include), { cwd: input, ignored: path => normalizeStringedArray(options.exclude).some(s => path.includes(s)), ignoreInitial: false }) watcher.on('all', (event, file) => { if (event !== 'add' && event !== 'change') { return } log.info('Highlighting %s', file) const source = fs.readFileSync(path.join(input, file), 'utf-8') highlight(torchlight, source).then(highlighted => { const destination = path.join(output, file) fs.ensureFileSync(destination) if (highlighted === fs.readFileSync(destination, 'utf-8')) { return } log.info('Writing to %s', destination) fs.writeFileSync(destination, highlighted, 'utf-8') }) }) watcher.on('ready', function () { if (!options.watch) { watcher.close().then(() => bus.emit(FILE_WATCHING_COMPLETE)) } }) } function normalizeStringedArray (value) { return (typeof value === 'string' ? value.split(',') : value).filter(x => x) } function highlight (torchlight, source) { let highlighted = source const $ = cheerio.load(source, { sourceCodeLocationInfo: true }, false) const blocks = [] // Search for blocks that have not already been processed. $('pre:not([data-torchlight-processed])').each((index, pre) => { const $pre = $(pre) $pre.children('code').each((index, code) => { const $code = $(code) const block = new Block({ // Using `text()` will re-encode entities like &lgt; code: $code.text(), language: decipherLanguage($pre, $code) }) // Add our class placeholder as a class, so that we don't overwrite // any classes that are already there. $pre.addClass(block.placeholder('class')) // Add a fake style that we can replace later. $pre.css(block.placeholder('style'), '0') // Store the raw code as the developer wrote it, so we can re-highlight // it later if we need to, or allow it to be copied to clipboard. const raw = `<textarea data-torchlight-original='true' style='display: none !important;'>${$code.html()}</textarea>` // Add the placeholder inside the code tag. $code.html(block.placeholder('highlighted') + raw) // Give the developer an opportunity to add things to the placeholder // element. Like copy to clipboard buttons, language indicators, etc. if (torchlight.config('modifyPlaceholderElement')) { torchlight.config('modifyPlaceholderElement')($, $pre, $code, block) } blocks.push(block) }) // Add the options hash that this block will be highlighted with. $pre.attr('data-torchlight-processed', torchlight.configHash()) // Cut out the *exact* pre element as it is in the file. Cheerio converts // single quotes to double, normalizes whitespace, and otherwise "cleans // up" the parsed document, so we can't simply modify the Cheerio dom and // write it back to disk. Instead we're going to surgically cut out the // pre tag and all its contents without touching anything else around it. const pristinePreElement = source.substring( pre.sourceCodeLocation.startOffset, pre.sourceCodeLocation.endOffset ) // Swap out the original tag with the outerHTML of our modified tag. highlighted = highlighted.replace(pristinePreElement, $.html($pre)) }) if (!blocks.length) { return Promise.resolve(source) } return torchlight.highlight(blocks).then(() => { blocks.forEach(block => { const swap = { [block.placeholder('class')]: block.classes ?? '', [block.placeholder('style') + ': 0;']: block.styles ?? '', [block.placeholder('highlighted')]: block.highlighted } Object.keys(swap).forEach(key => { highlighted = highlighted.replace(key, () => swap[key]) }) }) return highlighted }) } /** * Given a <pre> element, figure out what language it is. * * @param $pre * @return {string} */ function decipherLanguage ($pre, $code) { const custom = torchlight.config('highlight.decipherLanguageFromElement') // Let the developer add their own deciphering mechanism. if (custom) { const lang = custom($pre) if (lang) { return lang } } const langs = [ // Look first at the code element. ...decipherFromElement($code), // And then the pre element. ...decipherFromElement($pre) ] return langs.length ? langs[0] : 'text' } /** * Given any element, figure out what language it might be. * * @param $el * @return {*[]} */ function decipherFromElement ($el) { if (!$el) { return [] } const classes = ($el.attr('class') || '') .split(' ') // These classes are commonly used to denote code languages. .filter(c => c.startsWith('language-') || c.startsWith('lang-')) .map(c => c.replace('language-', '').replace('lang-', '')) return [ // Data attributes get highest priority. $el.data()?.language, $el.data()?.lang, ...classes ].filter(l => l) }