UNPKG

@humanspeak/svelte-markdown

Version:

Markdown and HTML renderer for Svelte 5 — built for rendering streaming AI agent output from Claude Code, ChatGPT, and agentic workflows. XSS-safe defaults, streaming-aware sanitization, token caching, TypeScript types, and Svelte 5 runes.

171 lines (170 loc) 7.56 kB
/** Token type emitted for inline math (`\(...\)`, opt-in `$...$`). */ export const INLINE_KATEX_TOKEN = 'inlineKatex'; /** Token type emitted for block math (`\[...\]`, `$$...$$`, AMS environments). */ export const BLOCK_KATEX_TOKEN = 'blockKatex'; const AMS_ENVIRONMENTS = ['equation', 'align', 'alignat', 'gather', 'CD']; // `\\*` in the JS string becomes `\*` in the regex source, which matches a // literal `*` (the AMS un-numbered variant). Without the escape, `equation*` // in a regex would mean "equatio + zero-or-more n", which silently fails to // match `\begin{equation*}`. const AMS_NAMES = AMS_ENVIRONMENTS.flatMap((n) => [n, `${n}\\*`]).join('|'); // `\s*` (instead of `[ \t]*\n`) lets these rules match both the canonical // own-line form (`$$\nx\n$$`) and the single-line form (`$$x$$`) that LLMs // emit constantly. Without this, `$$x = \frac{...}{2a}$$` survives as // paragraph text because the inline tokenizer also rejects `$$` openers. const blockBracketRule = /^\\\[\s*([\s\S]+?)\s*\\\](?:\n|$)/; const blockDollarRule = /^\$\$\s*([\s\S]+?)\s*\$\$(?:\n|$)/; const blockAmsRule = new RegExp(`^\\\\begin\\{(${AMS_NAMES})\\}[\\s\\S]+?\\\\end\\{\\1\\}(?:\\n|$)?`); const inlineParenRule = /^\\\(([\s\S]+?)\\\)/; // Mirrors the "standard" rule from upstream marked-katex-extension but // extends the boundary class with `)`, `]`, `}` so expressions like // `$0$)`, `$x$]`, `$x$}` (closing math right before a closing bracket) // still match. Currency strings like `$5,000 across $42` remain unmatched // because digits after the closing `$` aren't in any boundary class. const inlineDollarRule = /^\$(?!\$)((?:\\.|[^\\\n$])+?)\$(?=[\s?!.,:)\]}?!。,:]|$)/; const earliestIndex = (src, needles) => { let best = -1; for (const needle of needles) { const i = src.indexOf(needle); if (i !== -1 && (best === -1 || i < best)) best = i; } return best === -1 ? undefined : best; }; /** * Creates a marked extension that tokenizes KaTeX math expressions into * custom `inlineKatex` and `blockKatex` tokens. * * Default delimiter set (mirrors KaTeX's own `auto-render` defaults): * * | Delimiter pair | Level | `displayMode` | * |---|---|---| * | `\(...\)` | inline | `false` | * | `\[...\]` (own-line **or** single-line) | block | `true` | * | `$$...$$` (own-line **or** single-line) | block | `true` | * | `\begin{equation}...\end{equation}` and other AMS envs | block | `true` | * * Both `\[x\]` and `\[\nx\n\]` parse as block math; same for `$$x$$` and the * own-line `$$\nx\n$$` form. LLMs overwhelmingly emit the single-line form, * so accepting both keeps the extension drop-in compatible with their output * without losing the canonical own-line shape. * * Supported AMS environments: `equation`, `align`, `alignat`, `gather`, `CD`, * plus their starred variants (e.g. `equation*`). * * `$...$` inline is **off** by default — KaTeX itself opts out of single- * dollar inline because of currency-string clashes (`$5,000` etc.). Pass * `{ singleDollarInline: true }` to enable it; when enabled it uses the * whitespace-boundary rule from upstream `marked-katex-extension` so * currency strings still won't match. * * @example * ```svelte * <script lang="ts"> * import SvelteMarkdown from '@humanspeak/svelte-markdown' * import { markedKatex, KatexRenderer } from '@humanspeak/svelte-markdown/extensions' * * const renderers = { inlineKatex: KatexRenderer, blockKatex: KatexRenderer } * </script> * * <SvelteMarkdown * source={markdown} * extensions={[markedKatex()]} * {renderers} * /> * ``` * * Pair with `KatexRenderer` from the same subpath, or supply your own * component that accepts `{ text: string; displayMode?: boolean }`. * * @param options - {@link MarkedKatexOptions} * @returns A `MarkedExtension` containing one block-level and one inline tokenizer */ export function markedKatex(options = {}) { const { singleDollarInline = false } = options; // The token-cache hash serializes functions via `fn.toString()`, which // can't see option values that live in a closure (our tokenizers' // source code is identical regardless of `singleDollarInline`). This // marker makes the option visible to JSON.stringify so two // `markedKatex({ ... })` calls with different options produce // different cache keys — without it, toggling the option at runtime // returns stale tokens. Cast because `MarkedExtension` doesn't permit // arbitrary fields, but Marked.use() shallow-spreads our object into // `defaults`, so the marker survives. const ext = { _humanspeakKatexConfig: JSON.stringify({ singleDollarInline }), extensions: [ { name: BLOCK_KATEX_TOKEN, level: 'block', start(src) { return earliestIndex(src, ['\\[', '$$', '\\begin{']); }, tokenizer(src) { const bracket = src.match(blockBracketRule); if (bracket) { return { type: BLOCK_KATEX_TOKEN, raw: bracket[0], text: bracket[1].trim(), displayMode: true }; } const dollar = src.match(blockDollarRule); if (dollar) { return { type: BLOCK_KATEX_TOKEN, raw: dollar[0], text: dollar[1].trim(), displayMode: true }; } const ams = src.match(blockAmsRule); if (ams) { // KaTeX parses `\begin{...}...\end{...}` natively, so // pass the entire matched string through as `text`. return { type: BLOCK_KATEX_TOKEN, raw: ams[0], text: ams[0].replace(/\n$/, '').trim(), displayMode: true }; } } }, { name: INLINE_KATEX_TOKEN, level: 'inline', start(src) { const needles = ['\\(']; if (singleDollarInline) needles.push('$'); return earliestIndex(src, needles); }, tokenizer(src) { const paren = src.match(inlineParenRule); if (paren) { return { type: INLINE_KATEX_TOKEN, raw: paren[0], text: paren[1].trim(), displayMode: false }; } if (singleDollarInline) { const dollar = src.match(inlineDollarRule); if (dollar) { return { type: INLINE_KATEX_TOKEN, raw: dollar[0], text: dollar[1].trim(), displayMode: false }; } } } } ] }; return ext; }