@scalar/code-highlight
Version:
Central methods and themes for code highlighting in Scalar projects
229 lines (228 loc) • 7.96 kB
JavaScript
import rehypeExternalLinks from 'rehype-external-links';
import rehypeFormat from 'rehype-format';
import rehypeParse from 'rehype-parse';
import rehypeRaw from 'rehype-raw';
import rehypeSanitize, { defaultSchema } from 'rehype-sanitize';
import rehypeStringify from 'rehype-stringify';
import remarkGfm from 'remark-gfm';
import remarkParse from 'remark-parse';
import remarkRehype from 'remark-rehype';
import remarkStringify from 'remark-stringify';
import { unified } from 'unified';
import { SKIP, visit } from 'unist-util-visit';
import { standardLanguages } from '../languages/index.js';
import { rehypeAlert } from '../rehype-alert/index.js';
import { rehypeHighlight } from '../rehype-highlight/index.js';
/**
* Type-guard to check if a node is a heading.
*/
export const isHeading = (node) => {
return node.type === 'heading' && 'depth' in node && 'children' in node;
};
/**
* Plugin to transform nodes in a Markdown AST
*/
const transformNodes = (options, ..._ignored) => (tree) => {
if (!options?.transform || !options?.type) {
return;
}
visit(tree, options?.type, (node) => {
options?.transform ? options?.transform(node) : node;
return SKIP;
});
return;
};
const TAGS_WITH_INLINE_MARKDOWN = new Set(['dd', 'dt', 'li', 'p', 'summary', 'td', 'th']);
const MAY_CONTAIN_INLINE_MARKDOWN = /[`*_\[~]/;
/**
* Preserve HTML-like text in inline markdown by turning mdast `html` nodes into text nodes.
*/
const preserveHtmlLikeText = () => (tree) => {
visit(tree, 'html', (node, index, parent) => {
if (typeof index !== 'number' || !parent || !('children' in parent) || !Array.isArray(parent.children)) {
return;
}
parent.children[index] = {
type: 'text',
value: node.value ?? '',
};
});
};
const inlineMarkdownProcessor = unified().use(remarkParse).use(remarkGfm).use(preserveHtmlLikeText).use(remarkRehype);
const htmlFragmentParser = unified().use(rehypeParse, { fragment: true });
const htmlFragmentStringifier = unified().use(rehypeStringify);
/**
* Parse inline markdown and return children from the generated paragraph.
*/
const extractInlineChildrenFromMarkdown = (value) => {
const tree = inlineMarkdownProcessor.runSync(inlineMarkdownProcessor.parse(value));
if (tree.children.length !== 1) {
return [];
}
const paragraph = tree.children.at(0);
if (!paragraph || paragraph.type !== 'element' || paragraph.tagName !== 'p') {
return [];
}
return paragraph.children;
};
/**
* Re-parses text nodes in selected HTML tags so inline markdown works in tags like <p>.
*/
const transformInlineMarkdownInHtml = () => (tree) => {
visit(tree, 'element', (node) => {
if (!TAGS_WITH_INLINE_MARKDOWN.has(node.tagName)) {
return;
}
node.children = node.children.flatMap((child) => {
if (child.type !== 'text' || !MAY_CONTAIN_INLINE_MARKDOWN.test(child.value)) {
return [child];
}
const markdownChildren = extractInlineChildrenFromMarkdown(child.value);
return markdownChildren.length ? markdownChildren : [child];
});
});
};
/**
* Rewrites raw HTML strings so inline markdown parsing is only applied to raw HTML input.
*/
const transformInlineMarkdownInRawHtml = () => (tree) => {
visit(tree, 'raw', (node) => {
if (typeof node.value !== 'string' || !MAY_CONTAIN_INLINE_MARKDOWN.test(node.value)) {
return;
}
const htmlFragmentTree = htmlFragmentParser.parse(node.value);
transformInlineMarkdownInHtml()(htmlFragmentTree);
node.value = htmlFragmentStringifier.stringify(htmlFragmentTree);
});
};
/**
* Take a Markdown string and generate HTML from it
*/
export function htmlFromMarkdown(markdown, options) {
// Add permitted tags and remove stripped ones
const removeTags = options?.removeTags ?? [];
const tagNames = [...(defaultSchema.tagNames ?? []), ...(options?.allowTags ?? [])].filter((t) => !removeTags.includes(t));
const html = unified()
// Parses markdown
.use(remarkParse)
// Support autolink literals, footnotes, strikethrough, tables and tasklists
.use(remarkGfm)
.use(transformNodes, {
transform: options?.transform,
type: options?.transformType,
})
// Allows any HTML tags
.use(remarkRehype, { allowDangerousHtml: true })
// Adds GitHub alerts
.use(rehypeAlert)
// Parse inline markdown only inside raw HTML fragments, not normal markdown output
.use(transformInlineMarkdownInRawHtml)
// Creates an HTML AST
.use(rehypeRaw)
// Removes disallowed tags
.use(rehypeSanitize, {
...defaultSchema,
// Don't prefix the heading ids
clobberPrefix: '',
// Makes it even more strict
tagNames,
attributes: {
...defaultSchema.attributes,
abbr: ['title'],
// Allow all class names while preserving the existing default attributes
'*': [...(defaultSchema.attributes?.['*'] ?? []), 'className'],
},
// Strip content of dangerous elements, not just the tags
strip: ['script', 'style', 'object', 'embed', 'form'],
})
// Syntax highlighting
.use(rehypeHighlight, {
languages: standardLanguages,
// Enable auto detection
detect: true,
})
// Adds target="_blank" to external links
.use(rehypeExternalLinks, { target: '_blank' })
// Formats the HTML
.use(rehypeFormat)
// Converts the HTML AST to a string
.use(rehypeStringify)
// Run the pipeline
.processSync(markdown);
return html.toString();
}
/**
* Create a Markdown AST from a string.
*/
function getMarkdownAst(markdown) {
return unified().use(remarkParse).use(remarkGfm).parse(markdown);
}
/**
* Find all headings of a specific type in a Markdown AST.
*/
export function getHeadings(markdown, depth = 1) {
const tree = getMarkdownAst(markdown);
const nodes = [];
visit(tree, 'heading', (node) => {
const text = textFromNode(node);
if (text) {
nodes.push({ depth: node.depth ?? depth, value: text });
}
});
return nodes;
}
/**
* Extract plain text from a Markdown AST node (recursively).
*
* Handles headings with nested phrasing content such as links.
*/
export function textFromNode(node) {
if (node.type === 'text') {
return node.value ?? '';
}
if ('children' in node && Array.isArray(node.children)) {
return node.children.map((child) => textFromNode(child)).join('');
}
return '';
}
/**
* Return multiple Markdown documents. Every heading should be its own document.
*/
export function splitContent(markdown) {
const tree = getMarkdownAst(markdown);
/** Sections */
const sections = [];
/** Nodes inside a section */
let nodes = [];
tree.children?.forEach((node) => {
// If the node is a heading, start a new section
if (node.type === 'heading') {
if (nodes.length) {
sections.push(nodes);
}
sections.push([node]);
nodes = [];
}
// Otherwise, add the node to the current section
else {
nodes.push(node);
}
});
// Add any remaining nodes
if (nodes.length) {
sections.push(nodes);
}
return sections.map((section) => createDocument(section));
}
/**
* Use remark to create a Markdown document from a list of nodes.
*/
function createDocument(nodes) {
// Create the Markdown string
const markdown = unified().use(remarkStringify).use(remarkGfm).stringify({
type: 'root',
children: nodes,
});
// Remove the whitespace
return markdown.trim();
}