UNPKG

dompurify

Version:

DOMPurify is a DOM-only, super-fast, uber-tolerant XSS sanitizer for HTML, MathML and SVG. It runs as JavaScript and works in all modern browsers, as well as in Node.js (via jsdom). DOMPurify is written by security people who have vast background in web a

1,494 lines (1,329 loc) 95 kB
import type { Config, UseProfilesConfig } from './config'; import type { DOMPurify, HooksMap, HookFunction, WindowLike } from './types'; import * as TAGS from './tags.js'; import * as ATTRS from './attrs.js'; import * as EXPRESSIONS from './regexp.js'; import { addToSet, clone, entries, freeze, seal, arrayForEach, arrayIsArray, arrayLastIndexOf, arrayPop, arrayPush, arraySplice, stringMatch, stringReplace, stringToLowerCase, stringToString, stringIndexOf, stringTrim, regExpTest, isRegex, typeErrorCreate, lookupGetter, create, objectHasOwnProperty, stringifyValue, } from './utils.js'; export type { Config } from './config'; export type { DOMPurify, RemovedElement, RemovedAttribute, HookName, NodeHook, ElementHook, DocumentFragmentHook, UponSanitizeElementHook, UponSanitizeAttributeHook, UponSanitizeElementHookEvent, UponSanitizeAttributeHookEvent, WindowLike, } from './types'; declare const VERSION: string; // https://developer.mozilla.org/en-US/docs/Web/API/Node/nodeType const NODE_TYPE = { element: 1, attribute: 2, text: 3, cdataSection: 4, entityReference: 5, // Deprecated entityNode: 6, // Deprecated processingInstruction: 7, comment: 8, document: 9, documentType: 10, documentFragment: 11, notation: 12, // Deprecated }; const getGlobal = function (): WindowLike { return typeof window === 'undefined' ? null : window; }; /** * Creates a no-op policy for internal use only. * Don't export this function outside this module! * @param trustedTypes The policy factory. * @param purifyHostElement The Script element used to load DOMPurify (to determine policy name suffix). * @return The policy created (or null, if Trusted Types * are not supported or creating the policy failed). */ const _createTrustedTypesPolicy = function ( trustedTypes: TrustedTypePolicyFactory, purifyHostElement: HTMLScriptElement ) { if ( typeof trustedTypes !== 'object' || typeof trustedTypes.createPolicy !== 'function' ) { return null; } // Allow the callers to control the unique policy name // by adding a data-tt-policy-suffix to the script element with the DOMPurify. // Policy creation with duplicate names throws in Trusted Types. let suffix = null; const ATTR_NAME = 'data-tt-policy-suffix'; if (purifyHostElement && purifyHostElement.hasAttribute(ATTR_NAME)) { suffix = purifyHostElement.getAttribute(ATTR_NAME); } const policyName = 'dompurify' + (suffix ? '#' + suffix : ''); try { return trustedTypes.createPolicy(policyName, { createHTML(html) { return html; }, createScriptURL(scriptUrl) { return scriptUrl; }, }); } catch (_) { // Policy creation failed (most likely another DOMPurify script has // already run). Skip creating the policy, as this will only cause errors // if TT are enforced. console.warn( 'TrustedTypes policy ' + policyName + ' could not be created.' ); return null; } }; const _createHooksMap = function (): HooksMap { return { afterSanitizeAttributes: [], afterSanitizeElements: [], afterSanitizeShadowDOM: [], beforeSanitizeAttributes: [], beforeSanitizeElements: [], beforeSanitizeShadowDOM: [], uponSanitizeAttribute: [], uponSanitizeElement: [], uponSanitizeShadowNode: [], }; }; /** * Resolve a set-valued configuration option: a fresh set built from * cfg[key] when it is an own array property (seeded with a clone of * options.base when given, case-normalized via options.transform), * the fallback set otherwise. * * @param cfg the cloned, prototype-free configuration object * @param key the configuration property to read * @param fallback the set to use when the option is absent or not an array * @param options transform and optional base set to merge into * @returns the resolved set */ const _resolveSetOption = function ( cfg: Config, key: keyof Config, fallback: Record<string, boolean>, options: { transform: Parameters<typeof addToSet>[2]; base?: Record<string, boolean>; } ): Record<string, boolean> { return objectHasOwnProperty(cfg, key) && arrayIsArray(cfg[key]) ? addToSet( options.base ? clone(options.base) : {}, cfg[key] as readonly unknown[], options.transform ) : fallback; }; function createDOMPurify(window: WindowLike = getGlobal()): DOMPurify { const DOMPurify: DOMPurify = (root: WindowLike) => createDOMPurify(root); DOMPurify.version = VERSION; DOMPurify.removed = []; if ( !window || !window.document || window.document.nodeType !== NODE_TYPE.document || !window.Element ) { // Not running in a browser, provide a factory function // so that you can pass your own Window DOMPurify.isSupported = false; return DOMPurify; } let { document } = window; const originalDocument = document; const currentScript: HTMLScriptElement = originalDocument.currentScript as HTMLScriptElement; const { DocumentFragment, HTMLTemplateElement, Node, Element, NodeFilter, NamedNodeMap = window.NamedNodeMap || (window as any).MozNamedAttrMap, HTMLFormElement, DOMParser, trustedTypes, } = window; const ElementPrototype = Element.prototype; const cloneNode = lookupGetter(ElementPrototype, 'cloneNode'); const remove = lookupGetter(ElementPrototype, 'remove'); const getNextSibling = lookupGetter(ElementPrototype, 'nextSibling'); const getChildNodes = lookupGetter(ElementPrototype, 'childNodes'); const getParentNode = lookupGetter(ElementPrototype, 'parentNode'); const getShadowRoot = lookupGetter(ElementPrototype, 'shadowRoot'); const getAttributes = lookupGetter(ElementPrototype, 'attributes'); const getNodeType = Node && Node.prototype ? lookupGetter(Node.prototype, 'nodeType') : null; const getNodeName = Node && Node.prototype ? lookupGetter(Node.prototype, 'nodeName') : null; // As per issue #47, the web-components registry is inherited by a // new document created via createHTMLDocument. As per the spec // (http://w3c.github.io/webcomponents/spec/custom/#creating-and-passing-registries) // a new empty registry is used when creating a template contents owner // document, so we use that as our parent document to ensure nothing // is inherited. if (typeof HTMLTemplateElement === 'function') { const template = document.createElement('template'); if (template.content && template.content.ownerDocument) { document = template.content.ownerDocument; } } let trustedTypesPolicy; let emptyHTML = ''; // The instance's own internal Trusted Types policy. Unlike a caller-supplied // `TRUSTED_TYPES_POLICY`, this is created at most once — Trusted Types throws // on duplicate policy names — and is the only policy allowed to persist // across configurations and survive `clearConfig()`. let defaultTrustedTypesPolicy; let defaultTrustedTypesPolicyResolved = false; // Tracks whether we are already inside a call to the configured Trusted Types // policy (`createHTML` or `createScriptURL`). If a supplied policy callback // itself calls `DOMPurify.sanitize` (the cause of #1422), `sanitize` would // re-enter the policy and recurse until the stack overflows. We detect that // re-entry and throw a clear, actionable error instead. The guard is shared // across both callbacks, because either one re-entering `sanitize` triggers // the same unbounded recursion. let IN_TRUSTED_TYPES_POLICY = 0; const _assertNotInTrustedTypesPolicy = function (): void { if (IN_TRUSTED_TYPES_POLICY > 0) { throw typeErrorCreate( 'A configured TRUSTED_TYPES_POLICY callback (createHTML or ' + 'createScriptURL) must not call DOMPurify.sanitize, as that causes ' + 'infinite recursion. Do not pass a policy whose callbacks wrap ' + 'DOMPurify as TRUSTED_TYPES_POLICY; see the "DOMPurify and Trusted ' + 'Types" section of the README.' ); } }; const _createTrustedHTML = function (html: string): string { _assertNotInTrustedTypesPolicy(); IN_TRUSTED_TYPES_POLICY++; try { return trustedTypesPolicy.createHTML(html); } finally { IN_TRUSTED_TYPES_POLICY--; } }; const _createTrustedScriptURL = function (scriptUrl: string): string { _assertNotInTrustedTypesPolicy(); IN_TRUSTED_TYPES_POLICY++; try { return trustedTypesPolicy.createScriptURL(scriptUrl); } finally { IN_TRUSTED_TYPES_POLICY--; } }; // Lazily resolve (and cache) the instance's internal default policy. // Resolution is attempted at most once: a successful `createPolicy` cannot be // repeated (Trusted Types throws on duplicate names), and a failed or // unsupported attempt must not be retried on every parse. const _getDefaultTrustedTypesPolicy = function () { if (!defaultTrustedTypesPolicyResolved) { defaultTrustedTypesPolicy = _createTrustedTypesPolicy( trustedTypes, currentScript ); defaultTrustedTypesPolicyResolved = true; } return defaultTrustedTypesPolicy; }; const { implementation, createNodeIterator, createDocumentFragment, getElementsByTagName, } = document; const { importNode } = originalDocument; let hooks = _createHooksMap(); /** * Expose whether this browser supports running the full DOMPurify. */ DOMPurify.isSupported = typeof entries === 'function' && typeof getParentNode === 'function' && implementation && implementation.createHTMLDocument !== undefined; const { MUSTACHE_EXPR, ERB_EXPR, TMPLIT_EXPR, DATA_ATTR, ARIA_ATTR, IS_SCRIPT_OR_DATA, ATTR_WHITESPACE, CUSTOM_ELEMENT, } = EXPRESSIONS; let { IS_ALLOWED_URI } = EXPRESSIONS; /** * We consider the elements and attributes below to be safe. Ideally * don't add any new ones but feel free to remove unwanted ones. */ /* allowed element names */ let ALLOWED_TAGS = null; const DEFAULT_ALLOWED_TAGS = addToSet({}, [ ...TAGS.html, ...TAGS.svg, ...TAGS.svgFilters, ...TAGS.mathMl, ...TAGS.text, ]); /* Allowed attribute names */ let ALLOWED_ATTR = null; const DEFAULT_ALLOWED_ATTR = addToSet({}, [ ...ATTRS.html, ...ATTRS.svg, ...ATTRS.mathMl, ...ATTRS.xml, ]); /* * Configure how DOMPurify should handle custom elements and their attributes as well as customized built-in elements. * @property {RegExp|Function|null} tagNameCheck one of [null, regexPattern, predicate]. Default: `null` (disallow any custom elements) * @property {RegExp|Function|null} attributeNameCheck one of [null, regexPattern, predicate]. Default: `null` (disallow any attributes not on the allow list) * @property {boolean} allowCustomizedBuiltInElements allow custom elements derived from built-ins if they pass CUSTOM_ELEMENT_HANDLING.tagNameCheck. Default: `false`. */ let CUSTOM_ELEMENT_HANDLING = Object.seal( create(null, { tagNameCheck: { writable: true, configurable: false, enumerable: true, value: null, }, attributeNameCheck: { writable: true, configurable: false, enumerable: true, value: null, }, allowCustomizedBuiltInElements: { writable: true, configurable: false, enumerable: true, value: false, }, }) ); /* Explicitly forbidden tags (overrides ALLOWED_TAGS/ADD_TAGS) */ let FORBID_TAGS = null; /* Explicitly forbidden attributes (overrides ALLOWED_ATTR/ADD_ATTR) */ let FORBID_ATTR = null; /* Config object to store ADD_TAGS/ADD_ATTR functions (when used as functions) */ const EXTRA_ELEMENT_HANDLING = Object.seal( create(null, { tagCheck: { writable: true, configurable: false, enumerable: true, value: null, }, attributeCheck: { writable: true, configurable: false, enumerable: true, value: null, }, }) ); /* Decide if ARIA attributes are okay */ let ALLOW_ARIA_ATTR = true; /* Decide if custom data attributes are okay */ let ALLOW_DATA_ATTR = true; /* Decide if unknown protocols are okay */ let ALLOW_UNKNOWN_PROTOCOLS = false; /* Decide if self-closing tags in attributes are allowed. * Usually removed due to a mXSS issue in jQuery 3.0 */ let ALLOW_SELF_CLOSE_IN_ATTR = true; /* Output should be safe for common template engines. * This means, DOMPurify removes data attributes, mustaches and ERB */ let SAFE_FOR_TEMPLATES = false; /* Output should be safe even for XML used within HTML and alike. * This means, DOMPurify removes comments when containing risky content. */ let SAFE_FOR_XML = true; /* Decide if document with <html>... should be returned */ let WHOLE_DOCUMENT = false; /* Track whether config is already set on this instance of DOMPurify. */ let SET_CONFIG = false; /* Pristine allowlist bindings captured at setConfig() time. On the * persistent-config path sanitize() restores the sets from these before * the per-walk hook clone-guard, so a hook's in-call widening cannot * carry across calls. Null until setConfig() is called; reset by * clearConfig(). */ let SET_CONFIG_ALLOWED_TAGS = null; let SET_CONFIG_ALLOWED_ATTR = null; /* Decide if all elements (e.g. style, script) must be children of * document.body. By default, browsers might move them to document.head */ let FORCE_BODY = false; /* Decide if a DOM `HTMLBodyElement` should be returned, instead of a html * string (or a TrustedHTML object if Trusted Types are supported). * If `WHOLE_DOCUMENT` is enabled a `HTMLHtmlElement` will be returned instead */ let RETURN_DOM = false; /* Decide if a DOM `DocumentFragment` should be returned, instead of a html * string (or a TrustedHTML object if Trusted Types are supported) */ let RETURN_DOM_FRAGMENT = false; /* Try to return a Trusted Type object instead of a string, return a string in * case Trusted Types are not supported */ let RETURN_TRUSTED_TYPE = false; /* Output should be free from DOM clobbering attacks? * This sanitizes markups named with colliding, clobberable built-in DOM APIs. */ let SANITIZE_DOM = true; /* Achieve full DOM Clobbering protection by isolating the namespace of named * properties and JS variables, mitigating attacks that abuse the HTML/DOM spec rules. * * HTML/DOM spec rules that enable DOM Clobbering: * - Named Access on Window (§7.3.3) * - DOM Tree Accessors (§3.1.5) * - Form Element Parent-Child Relations (§4.10.3) * - Iframe srcdoc / Nested WindowProxies (§4.8.5) * - HTMLCollection (§4.2.10.2) * * Namespace isolation is implemented by prefixing `id` and `name` attributes * with a constant string, i.e., `user-content-` */ let SANITIZE_NAMED_PROPS = false; const SANITIZE_NAMED_PROPS_PREFIX = 'user-content-'; /* Keep element content when removing element? */ let KEEP_CONTENT = true; /* If a `Node` is passed to sanitize(), then performs sanitization in-place instead * of importing it into a new Document and returning a sanitized copy */ let IN_PLACE = false; /* Allow usage of profiles like html, svg and mathMl */ let USE_PROFILES: UseProfilesConfig | false = {}; /* Tags to ignore content of when KEEP_CONTENT is true */ let FORBID_CONTENTS = null; const DEFAULT_FORBID_CONTENTS = addToSet({}, [ 'annotation-xml', 'audio', 'colgroup', 'desc', 'foreignobject', 'head', 'iframe', 'math', 'mi', 'mn', 'mo', 'ms', 'mtext', 'noembed', 'noframes', 'noscript', 'plaintext', 'script', // <selectedcontent> mirrors the selected <option>'s subtree, cloned by // the UA (customizable <select>) — including any on* handlers — and the // engine re-mirrors synchronously whenever a removal changes which // option/selectedcontent is current, even inside DOMPurify's inert // DOMParser document. Hoisting its children on removal re-inserts a fresh // mirror target ahead of the walk, which the engine refills, looping // forever (DoS) and amplifying output. Dropping its content on removal // (rather than hoisting) breaks that cascade; the content is a duplicate // of the option, which is sanitized on its own. See campaign-3 F1/F6. 'selectedcontent', 'style', 'svg', 'template', 'thead', 'title', 'video', 'xmp', ]); /* Tags that are safe for data: URIs */ let DATA_URI_TAGS = null; const DEFAULT_DATA_URI_TAGS = addToSet({}, [ 'audio', 'video', 'img', 'source', 'image', 'track', ]); /* Attributes safe for values like "javascript:" */ let URI_SAFE_ATTRIBUTES = null; const DEFAULT_URI_SAFE_ATTRIBUTES = addToSet({}, [ 'alt', 'class', 'for', 'id', 'label', 'name', 'pattern', 'placeholder', 'role', 'summary', 'title', 'value', 'style', 'xmlns', ]); const MATHML_NAMESPACE = 'http://www.w3.org/1998/Math/MathML'; const SVG_NAMESPACE = 'http://www.w3.org/2000/svg'; const HTML_NAMESPACE = 'http://www.w3.org/1999/xhtml'; /* Document namespace */ let NAMESPACE = HTML_NAMESPACE; let IS_EMPTY_INPUT = false; /* Allowed XHTML+XML namespaces */ let ALLOWED_NAMESPACES = null; const DEFAULT_ALLOWED_NAMESPACES = addToSet( {}, [MATHML_NAMESPACE, SVG_NAMESPACE, HTML_NAMESPACE], stringToString ); const DEFAULT_MATHML_TEXT_INTEGRATION_POINTS = freeze([ 'mi', 'mo', 'mn', 'ms', 'mtext', ]); let MATHML_TEXT_INTEGRATION_POINTS = addToSet( {}, DEFAULT_MATHML_TEXT_INTEGRATION_POINTS ); const DEFAULT_HTML_INTEGRATION_POINTS = freeze(['annotation-xml']); let HTML_INTEGRATION_POINTS = addToSet({}, DEFAULT_HTML_INTEGRATION_POINTS); // Certain elements are allowed in both SVG and HTML // namespace. We need to specify them explicitly // so that they don't get erroneously deleted from // HTML namespace. const COMMON_SVG_AND_HTML_ELEMENTS = addToSet({}, [ 'title', 'style', 'font', 'a', 'script', ]); /* Parsing of strict XHTML documents */ let PARSER_MEDIA_TYPE: null | DOMParserSupportedType = null; const SUPPORTED_PARSER_MEDIA_TYPES = ['application/xhtml+xml', 'text/html']; const DEFAULT_PARSER_MEDIA_TYPE = 'text/html'; let transformCaseFunc: null | Parameters<typeof addToSet>[2] = null; /* Keep a reference to config to pass to hooks */ let CONFIG: Config | null = null; /* Ideally, do not touch anything below this line */ /* ______________________________________________ */ const formElement = document.createElement('form'); const isRegexOrFunction = function ( testValue: unknown ): testValue is Function | RegExp { return testValue instanceof RegExp || testValue instanceof Function; }; /** * _parseConfig * * @param cfg optional config literal */ // eslint-disable-next-line complexity const _parseConfig = function (cfg: Config = {}): void { if (CONFIG && CONFIG === cfg) { return; } /* Shield configuration object from tampering */ if (!cfg || typeof cfg !== 'object') { cfg = {}; } /* Shield configuration object from prototype pollution */ cfg = clone(cfg); PARSER_MEDIA_TYPE = // eslint-disable-next-line unicorn/prefer-includes SUPPORTED_PARSER_MEDIA_TYPES.indexOf(cfg.PARSER_MEDIA_TYPE) === -1 ? DEFAULT_PARSER_MEDIA_TYPE : cfg.PARSER_MEDIA_TYPE; // HTML tags and attributes are not case-sensitive, converting to lowercase. Keeping XHTML as is. transformCaseFunc = PARSER_MEDIA_TYPE === 'application/xhtml+xml' ? stringToString : stringToLowerCase; /* Set configuration parameters */ ALLOWED_TAGS = _resolveSetOption( cfg, 'ALLOWED_TAGS', DEFAULT_ALLOWED_TAGS, { transform: transformCaseFunc } ); ALLOWED_ATTR = _resolveSetOption( cfg, 'ALLOWED_ATTR', DEFAULT_ALLOWED_ATTR, { transform: transformCaseFunc } ); ALLOWED_NAMESPACES = _resolveSetOption( cfg, 'ALLOWED_NAMESPACES', DEFAULT_ALLOWED_NAMESPACES, { transform: stringToString } ); URI_SAFE_ATTRIBUTES = _resolveSetOption( cfg, 'ADD_URI_SAFE_ATTR', DEFAULT_URI_SAFE_ATTRIBUTES, { transform: transformCaseFunc, base: DEFAULT_URI_SAFE_ATTRIBUTES } ); DATA_URI_TAGS = _resolveSetOption( cfg, 'ADD_DATA_URI_TAGS', DEFAULT_DATA_URI_TAGS, { transform: transformCaseFunc, base: DEFAULT_DATA_URI_TAGS } ); FORBID_CONTENTS = _resolveSetOption( cfg, 'FORBID_CONTENTS', DEFAULT_FORBID_CONTENTS, { transform: transformCaseFunc } ); FORBID_TAGS = _resolveSetOption(cfg, 'FORBID_TAGS', clone({}), { transform: transformCaseFunc, }); FORBID_ATTR = _resolveSetOption(cfg, 'FORBID_ATTR', clone({}), { transform: transformCaseFunc, }); USE_PROFILES = objectHasOwnProperty(cfg, 'USE_PROFILES') ? cfg.USE_PROFILES && typeof cfg.USE_PROFILES === 'object' ? clone(cfg.USE_PROFILES) : cfg.USE_PROFILES : false; ALLOW_ARIA_ATTR = cfg.ALLOW_ARIA_ATTR !== false; // Default true ALLOW_DATA_ATTR = cfg.ALLOW_DATA_ATTR !== false; // Default true ALLOW_UNKNOWN_PROTOCOLS = cfg.ALLOW_UNKNOWN_PROTOCOLS || false; // Default false ALLOW_SELF_CLOSE_IN_ATTR = cfg.ALLOW_SELF_CLOSE_IN_ATTR !== false; // Default true SAFE_FOR_TEMPLATES = cfg.SAFE_FOR_TEMPLATES || false; // Default false SAFE_FOR_XML = cfg.SAFE_FOR_XML !== false; // Default true WHOLE_DOCUMENT = cfg.WHOLE_DOCUMENT || false; // Default false RETURN_DOM = cfg.RETURN_DOM || false; // Default false RETURN_DOM_FRAGMENT = cfg.RETURN_DOM_FRAGMENT || false; // Default false RETURN_TRUSTED_TYPE = cfg.RETURN_TRUSTED_TYPE || false; // Default false FORCE_BODY = cfg.FORCE_BODY || false; // Default false SANITIZE_DOM = cfg.SANITIZE_DOM !== false; // Default true SANITIZE_NAMED_PROPS = cfg.SANITIZE_NAMED_PROPS || false; // Default false KEEP_CONTENT = cfg.KEEP_CONTENT !== false; // Default true IN_PLACE = cfg.IN_PLACE || false; // Default false IS_ALLOWED_URI = isRegex(cfg.ALLOWED_URI_REGEXP) ? cfg.ALLOWED_URI_REGEXP : EXPRESSIONS.IS_ALLOWED_URI; // Default regexp NAMESPACE = typeof cfg.NAMESPACE === 'string' ? cfg.NAMESPACE : HTML_NAMESPACE; // Default HTML namespace MATHML_TEXT_INTEGRATION_POINTS = objectHasOwnProperty(cfg, 'MATHML_TEXT_INTEGRATION_POINTS') && cfg.MATHML_TEXT_INTEGRATION_POINTS && typeof cfg.MATHML_TEXT_INTEGRATION_POINTS === 'object' ? clone(cfg.MATHML_TEXT_INTEGRATION_POINTS) : addToSet({}, DEFAULT_MATHML_TEXT_INTEGRATION_POINTS); // Default built-in map HTML_INTEGRATION_POINTS = objectHasOwnProperty(cfg, 'HTML_INTEGRATION_POINTS') && cfg.HTML_INTEGRATION_POINTS && typeof cfg.HTML_INTEGRATION_POINTS === 'object' ? clone(cfg.HTML_INTEGRATION_POINTS) : addToSet({}, DEFAULT_HTML_INTEGRATION_POINTS); // Default built-in map const customElementHandling = objectHasOwnProperty(cfg, 'CUSTOM_ELEMENT_HANDLING') && cfg.CUSTOM_ELEMENT_HANDLING && typeof cfg.CUSTOM_ELEMENT_HANDLING === 'object' ? clone(cfg.CUSTOM_ELEMENT_HANDLING) : create(null); CUSTOM_ELEMENT_HANDLING = create(null); if ( objectHasOwnProperty(customElementHandling, 'tagNameCheck') && isRegexOrFunction(customElementHandling.tagNameCheck) ) { CUSTOM_ELEMENT_HANDLING.tagNameCheck = customElementHandling.tagNameCheck; // Default undefined } if ( objectHasOwnProperty(customElementHandling, 'attributeNameCheck') && isRegexOrFunction(customElementHandling.attributeNameCheck) ) { CUSTOM_ELEMENT_HANDLING.attributeNameCheck = customElementHandling.attributeNameCheck; // Default undefined } if ( objectHasOwnProperty( customElementHandling, 'allowCustomizedBuiltInElements' ) && typeof customElementHandling.allowCustomizedBuiltInElements === 'boolean' ) { CUSTOM_ELEMENT_HANDLING.allowCustomizedBuiltInElements = customElementHandling.allowCustomizedBuiltInElements; // Default undefined } seal(CUSTOM_ELEMENT_HANDLING); if (SAFE_FOR_TEMPLATES) { ALLOW_DATA_ATTR = false; } if (RETURN_DOM_FRAGMENT) { RETURN_DOM = true; } /* Parse profile info */ if (USE_PROFILES) { ALLOWED_TAGS = addToSet({}, TAGS.text); ALLOWED_ATTR = create(null); if (USE_PROFILES.html === true) { addToSet(ALLOWED_TAGS, TAGS.html); addToSet(ALLOWED_ATTR, ATTRS.html); } if (USE_PROFILES.svg === true) { addToSet(ALLOWED_TAGS, TAGS.svg); addToSet(ALLOWED_ATTR, ATTRS.svg); addToSet(ALLOWED_ATTR, ATTRS.xml); } if (USE_PROFILES.svgFilters === true) { addToSet(ALLOWED_TAGS, TAGS.svgFilters); addToSet(ALLOWED_ATTR, ATTRS.svg); addToSet(ALLOWED_ATTR, ATTRS.xml); } if (USE_PROFILES.mathMl === true) { addToSet(ALLOWED_TAGS, TAGS.mathMl); addToSet(ALLOWED_ATTR, ATTRS.mathMl); addToSet(ALLOWED_ATTR, ATTRS.xml); } } /* Always reset function-based ADD_TAGS / ADD_ATTR checks to prevent * leaking across calls when switching from function to array config */ EXTRA_ELEMENT_HANDLING.tagCheck = null; EXTRA_ELEMENT_HANDLING.attributeCheck = null; /* Merge configuration parameters */ if (objectHasOwnProperty(cfg, 'ADD_TAGS')) { if (typeof cfg.ADD_TAGS === 'function') { EXTRA_ELEMENT_HANDLING.tagCheck = cfg.ADD_TAGS; } else if (arrayIsArray(cfg.ADD_TAGS)) { if (ALLOWED_TAGS === DEFAULT_ALLOWED_TAGS) { ALLOWED_TAGS = clone(ALLOWED_TAGS); } addToSet(ALLOWED_TAGS, cfg.ADD_TAGS, transformCaseFunc); } } if (objectHasOwnProperty(cfg, 'ADD_ATTR')) { if (typeof cfg.ADD_ATTR === 'function') { EXTRA_ELEMENT_HANDLING.attributeCheck = cfg.ADD_ATTR; } else if (arrayIsArray(cfg.ADD_ATTR)) { if (ALLOWED_ATTR === DEFAULT_ALLOWED_ATTR) { ALLOWED_ATTR = clone(ALLOWED_ATTR); } addToSet(ALLOWED_ATTR, cfg.ADD_ATTR, transformCaseFunc); } } if ( objectHasOwnProperty(cfg, 'ADD_URI_SAFE_ATTR') && arrayIsArray(cfg.ADD_URI_SAFE_ATTR) ) { addToSet(URI_SAFE_ATTRIBUTES, cfg.ADD_URI_SAFE_ATTR, transformCaseFunc); } if ( objectHasOwnProperty(cfg, 'FORBID_CONTENTS') && arrayIsArray(cfg.FORBID_CONTENTS) ) { if (FORBID_CONTENTS === DEFAULT_FORBID_CONTENTS) { FORBID_CONTENTS = clone(FORBID_CONTENTS); } addToSet(FORBID_CONTENTS, cfg.FORBID_CONTENTS, transformCaseFunc); } if ( objectHasOwnProperty(cfg, 'ADD_FORBID_CONTENTS') && arrayIsArray(cfg.ADD_FORBID_CONTENTS) ) { if (FORBID_CONTENTS === DEFAULT_FORBID_CONTENTS) { FORBID_CONTENTS = clone(FORBID_CONTENTS); } addToSet(FORBID_CONTENTS, cfg.ADD_FORBID_CONTENTS, transformCaseFunc); } /* Add #text in case KEEP_CONTENT is set to true */ if (KEEP_CONTENT) { ALLOWED_TAGS['#text'] = true; } /* Add html, head and body to ALLOWED_TAGS in case WHOLE_DOCUMENT is true */ if (WHOLE_DOCUMENT) { addToSet(ALLOWED_TAGS, ['html', 'head', 'body']); } /* Add tbody to ALLOWED_TAGS in case tables are permitted, see #286, #365 */ if (ALLOWED_TAGS.table) { addToSet(ALLOWED_TAGS, ['tbody']); delete FORBID_TAGS.tbody; } // Re-derive the active Trusted Types policy from this configuration on // every parse. The active policy must never be sticky closure state that // outlives the config that set it: a caller-supplied policy left in place // after `clearConfig()` — or after a later call that supplied none, or // `TRUSTED_TYPES_POLICY: null` — could sign a subsequent "default" // `RETURN_TRUSTED_TYPE` result with a foreign, possibly unsafe policy. // See GHSA-vxr8-fq34-vvx9. if (cfg.TRUSTED_TYPES_POLICY) { if (typeof cfg.TRUSTED_TYPES_POLICY.createHTML !== 'function') { throw typeErrorCreate( 'TRUSTED_TYPES_POLICY configuration option must provide a "createHTML" hook.' ); } if (typeof cfg.TRUSTED_TYPES_POLICY.createScriptURL !== 'function') { throw typeErrorCreate( 'TRUSTED_TYPES_POLICY configuration option must provide a "createScriptURL" hook.' ); } // A caller-supplied policy applies to this configuration only. const previousTrustedTypesPolicy = trustedTypesPolicy; trustedTypesPolicy = cfg.TRUSTED_TYPES_POLICY; // Sign local variables required by `sanitize`. If the supplied policy's // `createHTML` is circular (i.e. it calls `DOMPurify.sanitize`), this // throws via the re-entrancy guard. Restore the previous policy first so // the instance is not left in a poisoned state. See #1422. try { emptyHTML = _createTrustedHTML(''); } catch (error) { trustedTypesPolicy = previousTrustedTypesPolicy; throw error; } } else if (cfg.TRUSTED_TYPES_POLICY === null) { // Explicit opt-out for this call: perform no Trusted Types signing and // create nothing (so a strict `trusted-types` CSP that disallows a // `dompurify` policy can still call `sanitize` from inside its own // policy — see #1422). Resetting to `undefined` rather than a sticky // `null` also drops any previously retained caller policy, so it cannot // resurface on a later call, while still allowing the next config-less // call to restore the internal default policy. See GHSA-vxr8-fq34-vvx9. trustedTypesPolicy = undefined; emptyHTML = ''; } else { // No policy supplied: keep the currently active policy if one is set — a // previously supplied policy is intentionally sticky across config-less // calls — otherwise fall back to the instance's own internal policy, // created at most once. (A policy supplied for a *single* call still // lingers by design; what must not linger is a policy whose configuration // has been torn down via `clearConfig()`, which restores the default.) if (trustedTypesPolicy === undefined) { trustedTypesPolicy = _getDefaultTrustedTypesPolicy(); } // Sign internal variables only when a policy is active. A falsy policy // (Trusted Types unsupported, creation failed, or an explicit opt-out) // leaves `emptyHTML` as a plain string, so we never call `.createHTML` on // a non-policy and throw. See #1422. if (trustedTypesPolicy && typeof emptyHTML === 'string') { emptyHTML = _createTrustedHTML(''); } } // Prevent further manipulation of configuration. // Not available in IE8, Safari 5, etc. if (freeze) { freeze(cfg); } CONFIG = cfg; }; /* Keep track of all possible SVG and MathML tags * so that we can perform the namespace checks * correctly. */ const ALL_SVG_TAGS = addToSet({}, [ ...TAGS.svg, ...TAGS.svgFilters, ...TAGS.svgDisallowed, ]); const ALL_MATHML_TAGS = addToSet({}, [ ...TAGS.mathMl, ...TAGS.mathMlDisallowed, ]); /** * Namespace rules for an element in the SVG namespace. * * @param tagName the element's lowercase tag name * @param parent the (possibly simulated) parent node * @param parentTagName the parent's lowercase tag name * @returns true if a spec-compliant parser could produce this element */ const _checkSvgNamespace = function ( tagName: string, parent: { namespaceURI?: string }, parentTagName: string ): boolean { // The only way to switch from HTML namespace to SVG // is via <svg>. If it happens via any other tag, then // it should be killed. if (parent.namespaceURI === HTML_NAMESPACE) { return tagName === 'svg'; } // The only way to switch from MathML to SVG is via <svg> // if the parent is either <annotation-xml> or a MathML // text integration point. if (parent.namespaceURI === MATHML_NAMESPACE) { return ( tagName === 'svg' && (parentTagName === 'annotation-xml' || MATHML_TEXT_INTEGRATION_POINTS[parentTagName]) ); } // We only allow elements that are defined in SVG // spec. All others are disallowed in SVG namespace. return Boolean(ALL_SVG_TAGS[tagName]); }; /** * Namespace rules for an element in the MathML namespace. * * @param tagName the element's lowercase tag name * @param parent the (possibly simulated) parent node * @param parentTagName the parent's lowercase tag name * @returns true if a spec-compliant parser could produce this element */ const _checkMathMlNamespace = function ( tagName: string, parent: { namespaceURI?: string }, parentTagName: string ): boolean { // The only way to switch from HTML namespace to MathML // is via <math>. If it happens via any other tag, then // it should be killed. if (parent.namespaceURI === HTML_NAMESPACE) { return tagName === 'math'; } // The only way to switch from SVG to MathML is via // <math> and HTML integration points if (parent.namespaceURI === SVG_NAMESPACE) { return tagName === 'math' && HTML_INTEGRATION_POINTS[parentTagName]; } // We only allow elements that are defined in MathML // spec. All others are disallowed in MathML namespace. return Boolean(ALL_MATHML_TAGS[tagName]); }; /** * Namespace rules for an element in the HTML namespace. * * @param tagName the element's lowercase tag name * @param parent the (possibly simulated) parent node * @param parentTagName the parent's lowercase tag name * @returns true if a spec-compliant parser could produce this element */ const _checkHtmlNamespace = function ( tagName: string, parent: { namespaceURI?: string }, parentTagName: string ): boolean { // The only way to switch from SVG to HTML is via // HTML integration points, and from MathML to HTML // is via MathML text integration points if ( parent.namespaceURI === SVG_NAMESPACE && !HTML_INTEGRATION_POINTS[parentTagName] ) { return false; } if ( parent.namespaceURI === MATHML_NAMESPACE && !MATHML_TEXT_INTEGRATION_POINTS[parentTagName] ) { return false; } // We disallow tags that are specific for MathML // or SVG and should never appear in HTML namespace return ( !ALL_MATHML_TAGS[tagName] && (COMMON_SVG_AND_HTML_ELEMENTS[tagName] || !ALL_SVG_TAGS[tagName]) ); }; /** * @param element a DOM element whose namespace is being checked * @returns Return false if the element has a * namespace that a spec-compliant parser would never * return. Return true otherwise. */ const _checkValidNamespace = function (element: Element): boolean { let parent = getParentNode(element); // In JSDOM, if we're inside shadow DOM, then parentNode // can be null. We just simulate parent in this case. if (!parent || !parent.tagName) { parent = { namespaceURI: NAMESPACE, tagName: 'template', }; } const tagName = stringToLowerCase(element.tagName); const parentTagName = stringToLowerCase(parent.tagName); if (!ALLOWED_NAMESPACES[element.namespaceURI]) { return false; } if (element.namespaceURI === SVG_NAMESPACE) { return _checkSvgNamespace(tagName, parent, parentTagName); } if (element.namespaceURI === MATHML_NAMESPACE) { return _checkMathMlNamespace(tagName, parent, parentTagName); } if (element.namespaceURI === HTML_NAMESPACE) { return _checkHtmlNamespace(tagName, parent, parentTagName); } // For XHTML and XML documents that support custom namespaces if ( PARSER_MEDIA_TYPE === 'application/xhtml+xml' && ALLOWED_NAMESPACES[element.namespaceURI] ) { return true; } // The code should never reach this place (this means // that the element somehow got namespace that is not // HTML, SVG, MathML or allowed via ALLOWED_NAMESPACES). // Return false just in case. return false; }; /** * _forceRemove * * @param node a DOM node */ const _forceRemove = function (node: Node): void { arrayPush(DOMPurify.removed, { element: node }); try { // eslint-disable-next-line unicorn/prefer-dom-node-remove getParentNode(node).removeChild(node); } catch (_) { /* The normal detach failed — this is reached for a parentless node (getParentNode() is null, so .removeChild throws). Element.prototype .remove() is itself a spec no-op on a parentless node, so a recorded "removal" would otherwise hand the caller back an intact, payload-bearing node (e.g. a detached IN_PLACE root the mXSS canary or the style-with-element-child rule decided to kill). Fail closed by throwing — exactly as a clobbered root does at the IN_PLACE entry — rather than trying to "neutralize" the node via its own methods. Neutralizing would mean calling getAttributeNames()/removeAttribute() on the node, both of which a <form> root can clobber via a named child (and _isClobbered does not even probe getAttributeNames), so the neutralize step could itself be silently defeated, leaving the payload intact. A throw touches only the cached, clobber-safe remove() and getParentNode(). Generalizes GHSA-r47g-fvhr-h676 (clobbered-form root) to every root-kill reason. REPORT-3. This lives inside the catch, so it never fires for a normally-removed in-tree node: those have a parent, removeChild() succeeds, and the catch is not entered. Only a kept (parentless) root reaches here. */ remove(node); if (!getParentNode(node)) { throw typeErrorCreate( 'a node selected for removal could not be detached from its tree ' + 'and cannot be safely returned; refusing to sanitize in place' ); } } }; /** * _neutralizeRoot * * Fail-closed teardown of an in-place root after the sanitize walk aborts * (campaign-3 F2). An internal throw mid-walk — e.g. a page-registered * custom element's reaction detaches a node so `_forceRemove`'s deliberate * parentless guard throws, or any other re-entrant engine mutation — would * otherwise leave the caller's *live* tree half-sanitized, with everything * after the abort point still carrying its handlers. There is no safe way * to resume the walk (the tree mutated under us), so we strip the root bare: * remove every child and every attribute, then let the caller's catch see * the original error. Clobber-safe (cached `remove`/`childNodes`/`attributes` * getters; the root was already clobber-pre-flighted at the IN_PLACE entry). * * @param root the in-place root to empty */ const _neutralizeRoot = function (root: Node): void { const childNodes = getChildNodes(root); if (childNodes) { const snapshot: Node[] = []; arrayForEach(childNodes, (child) => { arrayPush(snapshot, child); }); arrayForEach(snapshot, (child) => { try { remove(child); } catch (_) { /* Best-effort teardown; a still-attached child is handled below */ } }); } const attributes = getAttributes(root); if (attributes) { for (let i = attributes.length - 1; i >= 0; --i) { const attribute = attributes[i]; const name = attribute && attribute.name; if (typeof name === 'string') { try { (root as Element).removeAttribute(name); } catch (_) { /* Clobbered removeAttribute — ignore (fail-closed best effort) */ } } } } }; /** * _removeAttribute * * @param name an Attribute name * @param element a DOM node */ const _removeAttribute = function (name: string, element: Element): void { try { arrayPush(DOMPurify.removed, { attribute: element.getAttributeNode(name), from: element, }); } catch (_) { arrayPush(DOMPurify.removed, { attribute: null, from: element, }); } element.removeAttribute(name); // We void attribute values for unremovable "is" attributes if (name === 'is') { if (RETURN_DOM || RETURN_DOM_FRAGMENT) { try { _forceRemove(element); } catch (_) {} } else { try { element.setAttribute(name, ''); } catch (_) {} } } }; /** * _stripDisallowedAttributes * * Removes every attribute the active configuration does not allow from a * single element, using the same allowlist as the main attribute pass (so * `on*` handlers go, but no `/^on/` blocklist is introduced). Used only to * neutralise nodes that are being discarded from an in-place tree. * * @param element the element to strip */ const _stripDisallowedAttributes = function (element: Element): void { const attributes = getAttributes(element); if (!attributes) { return; } for (let i = attributes.length - 1; i >= 0; --i) { const attribute = attributes[i]; const name = attribute && attribute.name; if (typeof name !== 'string' || ALLOWED_ATTR[transformCaseFunc(name)]) { continue; } try { element.removeAttribute(name); } catch (_) { /* Clobbered removeAttribute on a doomed node — ignore */ } } }; /** * _neutralizeSubtree * * Completes the audit-5 F1 fix across every removal path. The KEEP_CONTENT * move-hoist neutralises only disallowed-tag removals; clobber, mXSS-canary, * namespace, comment, processing-instruction and KEEP_CONTENT:false removals * all drop their subtree wholesale via `_forceRemove`. On the IN_PLACE path * those dropped nodes are detached from the caller's LIVE tree but a * handler-bearing original among them (an `<img onerror>`/`<video>` that was * loading) keeps its queued resource event, which fires in page scope after * sanitize returns. This walks a removed subtree and strips every attribute * the active configuration does not allow — so `on*` handlers are cancelled * through the SAME allowlist that governs kept nodes, not a separate `/^on/` * blocklist. Run synchronously before sanitize returns, i.e. before any * queued event can fire. Hook-free by design: these nodes leave the output, * so firing attribute hooks for them would be surprising. Clobber-safe reads; * a doomed clobbered node may shadow `removeAttribute` (its own attributes are * irrelevant — it is discarded — while its non-clobbered descendants, e.g. * the `<img>`, are reached and scrubbed). * * @param root the root of a removed subtree to neutralise */ const _neutralizeSubtree = function (root: Node): void { const stack: Node[] = [root]; while (stack.length > 0) { const node = stack.pop(); const nodeType = getNodeType ? getNodeType(node) : (node as any).nodeType; if (nodeType === NODE_TYPE.element) { _stripDisallowedAttributes(node as Element); } const childNodes = getChildNodes(node); if (childNodes) { for (let i = childNodes.length - 1; i >= 0; --i) { stack.push(childNodes[i]); } } } }; /** * _initDocument * * @param dirty - a string of dirty markup * @return a DOM, filled with the dirty markup */ const _initDocument = function (dirty: string): Document { /* Create a HTML document */ let doc = null; let leadingWhitespace = null; if (FORCE_BODY) { dirty = '<remove></remove>' + dirty; } else { /* If FORCE_BODY isn't used, leading whitespace needs to be preserved manually */ const matches = stringMatch(dirty, /^[\r\n\t ]+/); leadingWhitespace = matches && matches[0]; } if ( PARSER_MEDIA_TYPE === 'application/xhtml+xml' && NAMESPACE === HTML_NAMESPACE ) { // Root of XHTML doc must contain xmlns declaration (see https://www.w3.org/TR/xhtml1/normative.html#strict) dirty = '<html xmlns="http://www.w3.org/1999/xhtml"><head></head><body>' + dirty + '</body></html>'; } const dirtyPayload = trustedTypesPolicy ? _createTrustedHTML(dirty) : dirty; /* * Use the DOMParser API by default, fallback later if needs be * DOMParser not work for svg when has multiple root element. */ if (NAMESPACE === HTML_NAMESPACE) { try { doc = new DOMParser().parseFromString(dirtyPayload, PARSER_MEDIA_TYPE); } catch (_) {} } /* Use createHTMLDocument in case DOMParser is not available */ if (!doc || !doc.documentElement) { doc = implementation.createDocument(NAMESPACE, 'template', null); try { doc.documentElement.innerHTML = IS_EMPTY_INPUT ? emptyHTML : dirtyPayload; } catch (_) { // Syntax error if dirtyPayload is invalid xml } } const body = doc.body || doc.documentElement; if (dirty && leadingWhitespace) { body.insertBefore( document.createTextNode(leadingWhitespace), body.childNodes[0] || null ); } /* Work on whole document or just its body */ if (NAMESPACE === HTML_NAMESPACE) { return getElementsByTagName.call( doc, WHOLE_DOCUMENT ? 'html' : 'body' )[0]; } return WHOLE_DOCUMENT ? doc.documentElement : body; }; /** * Creates a NodeIterator object that you can use to traverse filtered lists of nodes or elements in a document. * * @param root The root element or node to start traversing on. * @return The created NodeIterator */ const _createNodeIterator = function (root: Node): NodeIterator { return createNodeIterator.call( root.ownerDocument || root, root, // eslint-disable-next-line no-bitwise NodeFilter.SHOW_ELEMENT | NodeFilter.SHOW_COMMENT | NodeFilter.SHOW_TEXT | NodeFilter.SHOW_PROCESSING_INSTRUCTION | NodeFilter.SHOW_CDATA_SECTION, null ); }; /** * Replace template expression syntax (mustache, ERB, template * literal) with a space; shared by all SAFE_FOR_TEMPLATES scrub * sites. Order matters: mustache, then ERB, then template literal. * * @param value the string to scrub * @returns the scrubbed string */ const _stripTemplateExpressions = function (value: string): string { value = stringReplace(value, MUSTACHE_EXPR, ' '); value = stringReplace(value, ERB_EXPR, ' '); value = stringReplace(value, TMPLIT_EXPR, ' '); return value; }; /** * Strip template-engine expressions ({{...}}, ${...}, <%...%>) from the * character data of an element subtree. Used as the final safety net for * SAFE_FOR_TEMPLATES on every DOM-returning code path so that expressions * which only form after text-node normalization (e.g. fragments split across * stripped elements) cannot survive into a template-evaluating framework. * * Walks text/comment/CDATA/processing-instruction nodes and mutates `.data` * in place rather than round-tripping through innerHTML. This preserves * descendant node references (important for IN_PLACE callers), avoids a * serialize/reparse cycle, and reads literal character data — which means * `<%...%>` in text content matches the ERB regex against its real bytes * instead of the HTML-entity-escaped form innerHTML would produce. * * Attribute values are not visited here; SAFE_FOR_TEMPLATES handling for * attributes is performed during the per-node `_sanitizeAttributes` pass. * * @param node The root element whose character data should be scrubbed. */ const _scrubTemplateExpressions = function (node: Element): void { node.normalize(); const walker = createNodeIterator.call( node.ownerDocument || node, node, // eslint-disable-next-line no-bitwise NodeFilter.SHOW_TEXT | NodeFilter.SHOW_COMMENT | NodeFilter.SHOW_CDATA_SECTION | NodeFilter.SHOW_PROCESSING_INSTRUCTION, null ); let currentNode = walker.nextNode() as CharacterData | null; while (currentNode) { currentNode.data = _stripTemplateExpressions(currentNode.data); currentNode = walker.nextNode() as CharacterData | null; } // NodeIterator does not descend into <template>.content per the DOM spec, // so we must explicitly recurse into each template's content fragment, // mirroring the approach used by _sanitizeShadowDOM. const templates = node.querySelectorAll?.('template'); if (templates) { arrayForEach(templates, (tmpl: HTMLTemplateElement) => { if (_isDocumentFragment(tmpl.content)) { _scrubTemplateExpressions(tmpl.content as unknown as Element); } }); } }; /** * _isClobbered * * Detect DOM-clobbering on HTMLFormElement nodes. Form is the only HTML * interface with [LegacyOverrideBuiltIns]; a descendant element with a * `name` attribute matching a prototype prope