@humanspeak/svelte-markdown
Version:
Markdown and HTML renderer for Svelte 5 — built for rendering streaming AI agent output from Claude Code, ChatGPT, and agentic workflows. XSS-safe defaults, streaming-aware sanitization, token caching, TypeScript types, and Svelte 5 runes.
105 lines (104 loc) • 3.74 kB
JavaScript
/**
* URL and HTML attribute sanitization utilities for XSS prevention.
*
* These functions are applied in the Parser before tokens reach any
* renderer component or snippet, ensuring custom renderers cannot
* bypass sanitization.
*
* @see https://github.com/humanspeak/svelte-markdown/issues/272
* @packageDocumentation
*/
/** Protocols considered safe for href/src attributes. */
const SAFE_PROTOCOLS = new Set(['http:', 'https:', 'mailto:', 'tel:']);
/**
* URL attributes in HTML that should be run through the sanitizer.
* Covers standard attributes that can trigger navigation or resource loading.
*/
const URL_ATTRIBUTES = new Set(['href', 'src', 'action', 'formaction', 'cite', 'data', 'poster']);
/** Fast-path: most URLs are http/https — avoid `new URL()` for these. */
const SAFE_PREFIX_RE = /^https?:/i;
const LEADING_WS_RE = /^\s+/;
const RELATIVE_RE = /^[#/?.]/;
/**
* Sanitizes a URL against a protocol allowlist.
*
* Allows `http:`, `https:`, `mailto:`, `tel:`, and relative URLs
* (starting with `/`, `#`, `?`, or no protocol). Blocks everything
* else including `javascript:`, `data:`, `vbscript:`, etc.
*
* Handles mixed-case protocols and leading whitespace.
*
* The `context` parameter provides the token type and HTML tag name,
* enabling per-element policies in custom overrides.
*/
export const defaultSanitizeUrl = (url, _context) => {
if (!url)
return '';
const trimmed = url.replace(LEADING_WS_RE, '');
// Relative URLs are safe: #anchor, /path, ?query, ./relative, ../parent
if (RELATIVE_RE.test(trimmed))
return trimmed;
// No colon means no protocol — safe relative URL
if (!trimmed.includes(':'))
return trimmed;
// Fast-path for http/https — avoids new URL() allocation
if (SAFE_PREFIX_RE.test(trimmed))
return trimmed;
try {
const parsed = new URL(trimmed, 'http://localhost');
if (SAFE_PROTOCOLS.has(parsed.protocol))
return trimmed;
}
catch {
// Malformed URL — block it
}
return '';
};
/**
* Passthrough URL sanitizer that allows all URLs unchanged.
*
* Use this to disable URL sanitization entirely:
* ```svelte
* <SvelteMarkdown source={markdown} sanitizeUrl={unsanitizedUrl} />
* ```
*/
export const unsanitizedUrl = (url) => url;
/**
* Passthrough attribute sanitizer that allows all attributes unchanged.
*
* Use this to disable attribute sanitization entirely:
* ```svelte
* <SvelteMarkdown source={markdown} sanitizeAttributes={unsanitizedAttributes} />
* ```
*/
export const unsanitizedAttributes = (attributes) => attributes;
/**
* Sanitizes an HTML attribute object by:
* 1. Removing all event handler attributes (`on*`)
* 2. Running URL-bearing attributes through the sanitizer
*
* The `context` parameter provides the HTML tag name, enabling
* per-element policies in custom overrides (e.g. stricter rules
* for `<iframe>` than `<a>`).
*
* Returns a new object; does not mutate the input.
*/
export const defaultSanitizeAttributes = (attributes, context, sanitizeUrl) => {
const result = {};
for (const [key, value] of Object.entries(attributes)) {
const lower = key.toLowerCase();
// Strip event handlers (onclick, onerror, onload, etc.)
// Strip srcdoc — allows arbitrary HTML/script execution in iframes
if (lower.startsWith('on') || lower === 'srcdoc')
continue;
// Sanitize URL-bearing attributes
if (URL_ATTRIBUTES.has(lower)) {
const sanitized = sanitizeUrl(value, context);
if (sanitized)
result[key] = sanitized;
continue;
}
result[key] = value;
}
return result;
};