UNPKG

@limetech/lime-elements

Version:
248 lines (247 loc) • 9.75 kB
var _a, _b, _c, _d, _e, _f, _g; import { unified } from "unified"; import rehypeParse from "rehype-parse"; import rehypeSanitize, { defaultSchema } from "rehype-sanitize"; import rehypeStringify from "rehype-stringify"; import { visit } from "unist-util-visit"; const allowedMimeTypes = new Set([ 'image/png', 'image/jpeg', 'image/jpg', 'image/gif', 'image/webp', 'image/bmp', 'image/x-icon', 'image/vnd.microsoft.icon', ]); /** * Sanitizes email HTML to prevent XSS and other security issues while * preserving the original visual appearance (layout, colors, fonts, etc.). * * This differs from the markdown sanitizer (`sanitizeHTML`) in that: * - **All inline CSS is preserved** (no style property filtering). * - Dangerous CSS properties like `behavior`, `expression`, `-moz-binding` are removed. * - Standard dangerous tags/attributes are blocked (script, event handlers, javascript: URLs). * * @param html - The HTML string to sanitize (typically an email body). * @returns The sanitized HTML string. */ export async function sanitizeEmailHTML(html) { const file = await unified() .use(rehypeParse) .use(rehypeSanitize, emailSanitizationSchema) .use(() => { return (tree) => { visit(tree, 'element', (node) => { sanitizeDangerousCss(node); sanitizeDangerousUrls(node); }); }; }) .use(rehypeStringify) .process(html); return file.toString(); } // Base src protocols from defaultSchema, extended with 'data' below. const defaultSrcProtocols = (_b = (_a = defaultSchema.protocols) === null || _a === void 0 ? void 0 : _a.src) !== null && _b !== void 0 ? _b : []; /** * Rehype-sanitize schema that allows all standard HTML elements and attributes * needed for rich email rendering, including `style`. * * Hoisted to module scope since the schema has no runtime dependencies and * doesn't need to be reconstructed on every sanitization call. */ const emailSanitizationSchema = Object.assign(Object.assign({}, defaultSchema), { // Disable the 'user-content-' prefix that rehype-sanitize adds to // id and name attributes. Email HTML uses ids for internal anchor // links (href="#section") that must resolve without a prefix. clobberPrefix: '', protocols: Object.assign(Object.assign({}, defaultSchema.protocols), { // Email bodies often embed images as data URLs. We allow `data:` here, // but still validate the MIME type in `sanitizeDangerousUrls`. src: [...defaultSrcProtocols, 'data'] }), attributes: Object.assign(Object.assign({}, defaultSchema.attributes), { table: [ ...((_c = defaultSchema.attributes.table) !== null && _c !== void 0 ? _c : []), // Email HTML often relies on these legacy attributes. // rehype-parse converts to camelCase HAST properties. 'cellPadding', 'cellSpacing', 'border', 'dir', 'width', 'height', ], font: ['color', 'size', 'face'], meta: ['charset', 'content', 'name'], colgroup: [...((_d = defaultSchema.attributes.colgroup) !== null && _d !== void 0 ? _d : []), 'span'], col: [...((_e = defaultSchema.attributes.col) !== null && _e !== void 0 ? _e : []), 'width', 'span'], '*': [ ...((_f = defaultSchema.attributes['*']) !== null && _f !== void 0 ? _f : []), 'style', // Allow inline styles on all elements // NOTE: rehype/parse maps `class` to the HAST property name // `className`, which is what rehype-sanitize checks. 'className', 'id', // Allow id for anchors/internal navigation // Used to store remote image URLs without loading them immediately. 'dataRemoteSrc', ] }), // Allow common email-specific tags tagNames: [ ...((_g = defaultSchema.tagNames) !== null && _g !== void 0 ? _g : []), // Allow full-document HTML emails. These tags won't render as text, // but keeping them avoids their contents being surfaced as plain text. 'html', 'head', 'body', 'title', 'meta', // Preserve embedded email CSS. 'style', // Preserve table column sizing when using <colgroup>/<col>. 'colgroup', 'col', 'center', // Deprecated but widely used in email 'font', // Deprecated but widely used in email ] }); /** * Validates and normalizes potentially dangerous URL attributes. * * Currently only handles `<img src>` and allows safe embedded `data:image/*`. * * @param node - The HTML element node to sanitize. */ function sanitizeDangerousUrls(node) { var _a; if (!(node === null || node === void 0 ? void 0 : node.tagName) || node.tagName !== 'img') { return; } const src = (_a = node.properties) === null || _a === void 0 ? void 0 : _a.src; if (typeof src !== 'string') { return; } const safeSrc = getSafeEmailImageSrc(src); if (safeSrc) { node.properties.src = safeSrc; delete node.properties.dataRemoteSrc; return; } const remoteSrc = getRemoteEmailImageSrc(src); if (remoteSrc) { // Avoid loading remote images by default. Store the URL so the viewer can // opt-in and restore it later. node.properties.dataRemoteSrc = remoteSrc; delete node.properties.src; return; } // Keep the <img> but strip the source. delete node.properties.src; } /** * Returns a safe image `src` for email rendering. * * Only permits embedded `data:` URLs with an allow-listed image MIME type. * * @param src - The raw `src` attribute value. * @returns A safe `src` to keep, or `undefined` to strip it. */ function getSafeEmailImageSrc(src) { const trimmedSrc = src.trim(); if (!trimmedSrc) { return; } // Only allow embedded images. Loading remote images has privacy implications // (tracking pixels) and may leak network details. if (!trimmedSrc.toLowerCase().startsWith('data:')) { return; } const mimeType = getDataUrlMimeType(trimmedSrc); if (!mimeType) { return; } if (!allowedMimeTypes.has(mimeType)) { return; } return trimmedSrc; } /** * Returns a safe remote image URL to keep for later opt-in loading. * * @param src - The raw `src` attribute value. * @returns The remote URL if it is http/https. */ function getRemoteEmailImageSrc(src) { const trimmedSrc = src.trim(); const lower = trimmedSrc.toLowerCase(); if (lower.startsWith('http://') || lower.startsWith('https://')) { return trimmedSrc; } } /** * Extracts the MIME type from a `data:` URL. * * @param dataUrl - A `data:` URL string. * @returns The MIME type if present. */ function getDataUrlMimeType(dataUrl) { var _a; // data:[<mime type>][;charset=<charset>][;base64],<data> const match = /^data:([^;,]+)(?:;charset=[^;,]+)?(?:;base64)?,/i.exec(dataUrl); const mimeType = (_a = match === null || match === void 0 ? void 0 : match[1]) === null || _a === void 0 ? void 0 : _a.toLowerCase(); return mimeType || undefined; } /** * Removes dangerous constructs from inline CSS (style attributes and `<style>` tags). * * @param node - The HTML element node to sanitize. */ function sanitizeDangerousCss(node) { var _a; if (!(node === null || node === void 0 ? void 0 : node.tagName)) { return; } if (((_a = node.properties) === null || _a === void 0 ? void 0 : _a.style) && typeof node.properties.style === 'string') { node.properties.style = stripDangerousCss(node.properties.style); } if (node.tagName === 'style' && Array.isArray(node.children)) { for (const child of node.children) { if ((child === null || child === void 0 ? void 0 : child.type) === 'text' && typeof child.value === 'string') { child.value = stripDangerousCss(child.value); } } } } /** * Removes common script-capable CSS constructs. * * @param css - A CSS string from a style attribute or `<style>` tag. * @returns The CSS with dangerous constructs removed. */ function stripDangerousCss(css) { // Minimal defensive filtering. We preserve styling for fidelity, but drop // well-known script-capable constructs (mostly relevant in legacy engines). const dangerousPatterns = [ /behavior\s*:/gi, /expression\s*\(/gi, /-moz-binding\s*:/gi, /vbscript\s*:/gi, /javascript\s*:/gi, ]; const importPattern = /@import\s+(?:url\([^)]{0,2000}\)|[^;]{0,2000});?/gi; const urlPattern = /url\(\s{0,50}(?:"([^"]{0,2000})"|'([^']{0,2000})'|([^"')\s]{1,2000}))\s{0,50}\)/gi; const isSafeCssUrl = (value) => { const normalized = value.trim().toLowerCase(); if (normalized.startsWith('data:') || normalized.startsWith('cid:') || normalized.startsWith('#')) { return true; } return (!normalized.startsWith('//') && !/^[a-z][a-z0-9+.-]*:/i.test(normalized)); }; let cleaned = css; for (const pattern of dangerousPatterns) { cleaned = cleaned.replaceAll(pattern, ''); } cleaned = cleaned.replaceAll(importPattern, ''); cleaned = cleaned.replaceAll(urlPattern, (match, first, second, third) => { var _a, _b; const value = ((_b = (_a = first !== null && first !== void 0 ? first : second) !== null && _a !== void 0 ? _a : third) !== null && _b !== void 0 ? _b : '').trim(); return isSafeCssUrl(value) ? match : 'url("")'; }); return cleaned; }