UNPKG

@scalar/code-highlight

Version:

Central methods and themes for code highlighting in Scalar projects

229 lines (228 loc) 7.96 kB
import rehypeExternalLinks from 'rehype-external-links'; import rehypeFormat from 'rehype-format'; import rehypeParse from 'rehype-parse'; import rehypeRaw from 'rehype-raw'; import rehypeSanitize, { defaultSchema } from 'rehype-sanitize'; import rehypeStringify from 'rehype-stringify'; import remarkGfm from 'remark-gfm'; import remarkParse from 'remark-parse'; import remarkRehype from 'remark-rehype'; import remarkStringify from 'remark-stringify'; import { unified } from 'unified'; import { SKIP, visit } from 'unist-util-visit'; import { standardLanguages } from '../languages/index.js'; import { rehypeAlert } from '../rehype-alert/index.js'; import { rehypeHighlight } from '../rehype-highlight/index.js'; /** * Type-guard to check if a node is a heading. */ export const isHeading = (node) => { return node.type === 'heading' && 'depth' in node && 'children' in node; }; /** * Plugin to transform nodes in a Markdown AST */ const transformNodes = (options, ..._ignored) => (tree) => { if (!options?.transform || !options?.type) { return; } visit(tree, options?.type, (node) => { options?.transform ? options?.transform(node) : node; return SKIP; }); return; }; const TAGS_WITH_INLINE_MARKDOWN = new Set(['dd', 'dt', 'li', 'p', 'summary', 'td', 'th']); const MAY_CONTAIN_INLINE_MARKDOWN = /[`*_\[~]/; /** * Preserve HTML-like text in inline markdown by turning mdast `html` nodes into text nodes. */ const preserveHtmlLikeText = () => (tree) => { visit(tree, 'html', (node, index, parent) => { if (typeof index !== 'number' || !parent || !('children' in parent) || !Array.isArray(parent.children)) { return; } parent.children[index] = { type: 'text', value: node.value ?? '', }; }); }; const inlineMarkdownProcessor = unified().use(remarkParse).use(remarkGfm).use(preserveHtmlLikeText).use(remarkRehype); const htmlFragmentParser = unified().use(rehypeParse, { fragment: true }); const htmlFragmentStringifier = unified().use(rehypeStringify); /** * Parse inline markdown and return children from the generated paragraph. */ const extractInlineChildrenFromMarkdown = (value) => { const tree = inlineMarkdownProcessor.runSync(inlineMarkdownProcessor.parse(value)); if (tree.children.length !== 1) { return []; } const paragraph = tree.children.at(0); if (!paragraph || paragraph.type !== 'element' || paragraph.tagName !== 'p') { return []; } return paragraph.children; }; /** * Re-parses text nodes in selected HTML tags so inline markdown works in tags like <p>. */ const transformInlineMarkdownInHtml = () => (tree) => { visit(tree, 'element', (node) => { if (!TAGS_WITH_INLINE_MARKDOWN.has(node.tagName)) { return; } node.children = node.children.flatMap((child) => { if (child.type !== 'text' || !MAY_CONTAIN_INLINE_MARKDOWN.test(child.value)) { return [child]; } const markdownChildren = extractInlineChildrenFromMarkdown(child.value); return markdownChildren.length ? markdownChildren : [child]; }); }); }; /** * Rewrites raw HTML strings so inline markdown parsing is only applied to raw HTML input. */ const transformInlineMarkdownInRawHtml = () => (tree) => { visit(tree, 'raw', (node) => { if (typeof node.value !== 'string' || !MAY_CONTAIN_INLINE_MARKDOWN.test(node.value)) { return; } const htmlFragmentTree = htmlFragmentParser.parse(node.value); transformInlineMarkdownInHtml()(htmlFragmentTree); node.value = htmlFragmentStringifier.stringify(htmlFragmentTree); }); }; /** * Take a Markdown string and generate HTML from it */ export function htmlFromMarkdown(markdown, options) { // Add permitted tags and remove stripped ones const removeTags = options?.removeTags ?? []; const tagNames = [...(defaultSchema.tagNames ?? []), ...(options?.allowTags ?? [])].filter((t) => !removeTags.includes(t)); const html = unified() // Parses markdown .use(remarkParse) // Support autolink literals, footnotes, strikethrough, tables and tasklists .use(remarkGfm) .use(transformNodes, { transform: options?.transform, type: options?.transformType, }) // Allows any HTML tags .use(remarkRehype, { allowDangerousHtml: true }) // Adds GitHub alerts .use(rehypeAlert) // Parse inline markdown only inside raw HTML fragments, not normal markdown output .use(transformInlineMarkdownInRawHtml) // Creates an HTML AST .use(rehypeRaw) // Removes disallowed tags .use(rehypeSanitize, { ...defaultSchema, // Don't prefix the heading ids clobberPrefix: '', // Makes it even more strict tagNames, attributes: { ...defaultSchema.attributes, abbr: ['title'], // Allow all class names while preserving the existing default attributes '*': [...(defaultSchema.attributes?.['*'] ?? []), 'className'], }, // Strip content of dangerous elements, not just the tags strip: ['script', 'style', 'object', 'embed', 'form'], }) // Syntax highlighting .use(rehypeHighlight, { languages: standardLanguages, // Enable auto detection detect: true, }) // Adds target="_blank" to external links .use(rehypeExternalLinks, { target: '_blank' }) // Formats the HTML .use(rehypeFormat) // Converts the HTML AST to a string .use(rehypeStringify) // Run the pipeline .processSync(markdown); return html.toString(); } /** * Create a Markdown AST from a string. */ function getMarkdownAst(markdown) { return unified().use(remarkParse).use(remarkGfm).parse(markdown); } /** * Find all headings of a specific type in a Markdown AST. */ export function getHeadings(markdown, depth = 1) { const tree = getMarkdownAst(markdown); const nodes = []; visit(tree, 'heading', (node) => { const text = textFromNode(node); if (text) { nodes.push({ depth: node.depth ?? depth, value: text }); } }); return nodes; } /** * Extract plain text from a Markdown AST node (recursively). * * Handles headings with nested phrasing content such as links. */ export function textFromNode(node) { if (node.type === 'text') { return node.value ?? ''; } if ('children' in node && Array.isArray(node.children)) { return node.children.map((child) => textFromNode(child)).join(''); } return ''; } /** * Return multiple Markdown documents. Every heading should be its own document. */ export function splitContent(markdown) { const tree = getMarkdownAst(markdown); /** Sections */ const sections = []; /** Nodes inside a section */ let nodes = []; tree.children?.forEach((node) => { // If the node is a heading, start a new section if (node.type === 'heading') { if (nodes.length) { sections.push(nodes); } sections.push([node]); nodes = []; } // Otherwise, add the node to the current section else { nodes.push(node); } }); // Add any remaining nodes if (nodes.length) { sections.push(nodes); } return sections.map((section) => createDocument(section)); } /** * Use remark to create a Markdown document from a list of nodes. */ function createDocument(nodes) { // Create the Markdown string const markdown = unified().use(remarkStringify).use(remarkGfm).stringify({ type: 'root', children: nodes, }); // Remove the whitespace return markdown.trim(); }