UNPKG

dompurify

Version:

DOMPurify is a DOM-only, super-fast, uber-tolerant XSS sanitizer for HTML, MathML and SVG. It's written in JavaScript and works in all modern browsers (Safari, Opera (15+), Internet Explorer (10+), Firefox and Chrome - as well as almost anything else usin

1,552 lines (1,370 loc) 71 kB
/* eslint-disable @typescript-eslint/indent */ import type { TrustedHTML, TrustedTypesWindow, } from 'trusted-types/lib/index.js'; import type { Config, UseProfilesConfig } from './config'; import * as TAGS from './tags.js'; import * as ATTRS from './attrs.js'; import * as EXPRESSIONS from './regexp.js'; import { addToSet, clone, entries, freeze, arrayForEach, arrayIsArray, arrayLastIndexOf, arrayPop, arrayPush, arraySplice, stringMatch, stringReplace, stringToLowerCase, stringToString, stringIndexOf, stringTrim, regExpTest, isRegex, typeErrorCreate, lookupGetter, create, objectHasOwnProperty, stringifyValue, } from './utils.js'; export type { Config } from './config'; declare const VERSION: string; // https://developer.mozilla.org/en-US/docs/Web/API/Node/nodeType const NODE_TYPE = { element: 1, attribute: 2, text: 3, cdataSection: 4, entityReference: 5, // Deprecated entityNode: 6, // Deprecated progressingInstruction: 7, comment: 8, document: 9, documentType: 10, documentFragment: 11, notation: 12, // Deprecated }; const getGlobal = function (): WindowLike { return typeof window === 'undefined' ? null : window; }; /** * Creates a no-op policy for internal use only. * Don't export this function outside this module! * @param trustedTypes The policy factory. * @param purifyHostElement The Script element used to load DOMPurify (to determine policy name suffix). * @return The policy created (or null, if Trusted Types * are not supported or creating the policy failed). */ const _createTrustedTypesPolicy = function ( trustedTypes: TrustedTypePolicyFactory, purifyHostElement: HTMLScriptElement ) { if ( typeof trustedTypes !== 'object' || typeof trustedTypes.createPolicy !== 'function' ) { return null; } // Allow the callers to control the unique policy name // by adding a data-tt-policy-suffix to the script element with the DOMPurify. // Policy creation with duplicate names throws in Trusted Types. let suffix = null; const ATTR_NAME = 'data-tt-policy-suffix'; if (purifyHostElement && purifyHostElement.hasAttribute(ATTR_NAME)) { suffix = purifyHostElement.getAttribute(ATTR_NAME); } const policyName = 'dompurify' + (suffix ? '#' + suffix : ''); try { return trustedTypes.createPolicy(policyName, { createHTML(html) { return html; }, createScriptURL(scriptUrl) { return scriptUrl; }, }); } catch (_) { // Policy creation failed (most likely another DOMPurify script has // already run). Skip creating the policy, as this will only cause errors // if TT are enforced. console.warn( 'TrustedTypes policy ' + policyName + ' could not be created.' ); return null; } }; const _createHooksMap = function (): HooksMap { return { afterSanitizeAttributes: [], afterSanitizeElements: [], afterSanitizeShadowDOM: [], beforeSanitizeAttributes: [], beforeSanitizeElements: [], beforeSanitizeShadowDOM: [], uponSanitizeAttribute: [], uponSanitizeElement: [], uponSanitizeShadowNode: [], }; }; function createDOMPurify(window: WindowLike = getGlobal()): DOMPurify { const DOMPurify: DOMPurify = (root: WindowLike) => createDOMPurify(root); DOMPurify.version = VERSION; DOMPurify.removed = []; if ( !window || !window.document || window.document.nodeType !== NODE_TYPE.document || !window.Element ) { // Not running in a browser, provide a factory function // so that you can pass your own Window DOMPurify.isSupported = false; return DOMPurify; } let { document } = window; const originalDocument = document; const currentScript: HTMLScriptElement = originalDocument.currentScript as HTMLScriptElement; const { DocumentFragment, HTMLTemplateElement, Node, Element, NodeFilter, NamedNodeMap = window.NamedNodeMap || (window as any).MozNamedAttrMap, HTMLFormElement, DOMParser, trustedTypes, } = window; const ElementPrototype = Element.prototype; const cloneNode = lookupGetter(ElementPrototype, 'cloneNode'); const remove = lookupGetter(ElementPrototype, 'remove'); const getNextSibling = lookupGetter(ElementPrototype, 'nextSibling'); const getChildNodes = lookupGetter(ElementPrototype, 'childNodes'); const getParentNode = lookupGetter(ElementPrototype, 'parentNode'); const getNodeType = Node && Node.prototype ? lookupGetter(Node.prototype, 'nodeType') : null; // As per issue #47, the web-components registry is inherited by a // new document created via createHTMLDocument. As per the spec // (http://w3c.github.io/webcomponents/spec/custom/#creating-and-passing-registries) // a new empty registry is used when creating a template contents owner // document, so we use that as our parent document to ensure nothing // is inherited. if (typeof HTMLTemplateElement === 'function') { const template = document.createElement('template'); if (template.content && template.content.ownerDocument) { document = template.content.ownerDocument; } } let trustedTypesPolicy; let emptyHTML = ''; const { implementation, createNodeIterator, createDocumentFragment, getElementsByTagName, } = document; const { importNode } = originalDocument; let hooks = _createHooksMap(); /** * Expose whether this browser supports running the full DOMPurify. */ DOMPurify.isSupported = typeof entries === 'function' && typeof getParentNode === 'function' && implementation && implementation.createHTMLDocument !== undefined; const { MUSTACHE_EXPR, ERB_EXPR, TMPLIT_EXPR, DATA_ATTR, ARIA_ATTR, IS_SCRIPT_OR_DATA, ATTR_WHITESPACE, CUSTOM_ELEMENT, } = EXPRESSIONS; let { IS_ALLOWED_URI } = EXPRESSIONS; /** * We consider the elements and attributes below to be safe. Ideally * don't add any new ones but feel free to remove unwanted ones. */ /* allowed element names */ let ALLOWED_TAGS = null; const DEFAULT_ALLOWED_TAGS = addToSet({}, [ ...TAGS.html, ...TAGS.svg, ...TAGS.svgFilters, ...TAGS.mathMl, ...TAGS.text, ]); /* Allowed attribute names */ let ALLOWED_ATTR = null; const DEFAULT_ALLOWED_ATTR = addToSet({}, [ ...ATTRS.html, ...ATTRS.svg, ...ATTRS.mathMl, ...ATTRS.xml, ]); /* * Configure how DOMPurify should handle custom elements and their attributes as well as customized built-in elements. * @property {RegExp|Function|null} tagNameCheck one of [null, regexPattern, predicate]. Default: `null` (disallow any custom elements) * @property {RegExp|Function|null} attributeNameCheck one of [null, regexPattern, predicate]. Default: `null` (disallow any attributes not on the allow list) * @property {boolean} allowCustomizedBuiltInElements allow custom elements derived from built-ins if they pass CUSTOM_ELEMENT_HANDLING.tagNameCheck. Default: `false`. */ let CUSTOM_ELEMENT_HANDLING = Object.seal( create(null, { tagNameCheck: { writable: true, configurable: false, enumerable: true, value: null, }, attributeNameCheck: { writable: true, configurable: false, enumerable: true, value: null, }, allowCustomizedBuiltInElements: { writable: true, configurable: false, enumerable: true, value: false, }, }) ); /* Explicitly forbidden tags (overrides ALLOWED_TAGS/ADD_TAGS) */ let FORBID_TAGS = null; /* Explicitly forbidden attributes (overrides ALLOWED_ATTR/ADD_ATTR) */ let FORBID_ATTR = null; /* Config object to store ADD_TAGS/ADD_ATTR functions (when used as functions) */ const EXTRA_ELEMENT_HANDLING = Object.seal( create(null, { tagCheck: { writable: true, configurable: false, enumerable: true, value: null, }, attributeCheck: { writable: true, configurable: false, enumerable: true, value: null, }, }) ); /* Decide if ARIA attributes are okay */ let ALLOW_ARIA_ATTR = true; /* Decide if custom data attributes are okay */ let ALLOW_DATA_ATTR = true; /* Decide if unknown protocols are okay */ let ALLOW_UNKNOWN_PROTOCOLS = false; /* Decide if self-closing tags in attributes are allowed. * Usually removed due to a mXSS issue in jQuery 3.0 */ let ALLOW_SELF_CLOSE_IN_ATTR = true; /* Output should be safe for common template engines. * This means, DOMPurify removes data attributes, mustaches and ERB */ let SAFE_FOR_TEMPLATES = false; /* Output should be safe even for XML used within HTML and alike. * This means, DOMPurify removes comments when containing risky content. */ let SAFE_FOR_XML = true; /* Decide if document with <html>... should be returned */ let WHOLE_DOCUMENT = false; /* Track whether config is already set on this instance of DOMPurify. */ let SET_CONFIG = false; /* Decide if all elements (e.g. style, script) must be children of * document.body. By default, browsers might move them to document.head */ let FORCE_BODY = false; /* Decide if a DOM `HTMLBodyElement` should be returned, instead of a html * string (or a TrustedHTML object if Trusted Types are supported). * If `WHOLE_DOCUMENT` is enabled a `HTMLHtmlElement` will be returned instead */ let RETURN_DOM = false; /* Decide if a DOM `DocumentFragment` should be returned, instead of a html * string (or a TrustedHTML object if Trusted Types are supported) */ let RETURN_DOM_FRAGMENT = false; /* Try to return a Trusted Type object instead of a string, return a string in * case Trusted Types are not supported */ let RETURN_TRUSTED_TYPE = false; /* Output should be free from DOM clobbering attacks? * This sanitizes markups named with colliding, clobberable built-in DOM APIs. */ let SANITIZE_DOM = true; /* Achieve full DOM Clobbering protection by isolating the namespace of named * properties and JS variables, mitigating attacks that abuse the HTML/DOM spec rules. * * HTML/DOM spec rules that enable DOM Clobbering: * - Named Access on Window (§7.3.3) * - DOM Tree Accessors (§3.1.5) * - Form Element Parent-Child Relations (§4.10.3) * - Iframe srcdoc / Nested WindowProxies (§4.8.5) * - HTMLCollection (§4.2.10.2) * * Namespace isolation is implemented by prefixing `id` and `name` attributes * with a constant string, i.e., `user-content-` */ let SANITIZE_NAMED_PROPS = false; const SANITIZE_NAMED_PROPS_PREFIX = 'user-content-'; /* Keep element content when removing element? */ let KEEP_CONTENT = true; /* If a `Node` is passed to sanitize(), then performs sanitization in-place instead * of importing it into a new Document and returning a sanitized copy */ let IN_PLACE = false; /* Allow usage of profiles like html, svg and mathMl */ let USE_PROFILES: UseProfilesConfig | false = {}; /* Tags to ignore content of when KEEP_CONTENT is true */ let FORBID_CONTENTS = null; const DEFAULT_FORBID_CONTENTS = addToSet({}, [ 'annotation-xml', 'audio', 'colgroup', 'desc', 'foreignobject', 'head', 'iframe', 'math', 'mi', 'mn', 'mo', 'ms', 'mtext', 'noembed', 'noframes', 'noscript', 'plaintext', 'script', 'style', 'svg', 'template', 'thead', 'title', 'video', 'xmp', ]); /* Tags that are safe for data: URIs */ let DATA_URI_TAGS = null; const DEFAULT_DATA_URI_TAGS = addToSet({}, [ 'audio', 'video', 'img', 'source', 'image', 'track', ]); /* Attributes safe for values like "javascript:" */ let URI_SAFE_ATTRIBUTES = null; const DEFAULT_URI_SAFE_ATTRIBUTES = addToSet({}, [ 'alt', 'class', 'for', 'id', 'label', 'name', 'pattern', 'placeholder', 'role', 'summary', 'title', 'value', 'style', 'xmlns', ]); const MATHML_NAMESPACE = 'http://www.w3.org/1998/Math/MathML'; const SVG_NAMESPACE = 'http://www.w3.org/2000/svg'; const HTML_NAMESPACE = 'http://www.w3.org/1999/xhtml'; /* Document namespace */ let NAMESPACE = HTML_NAMESPACE; let IS_EMPTY_INPUT = false; /* Allowed XHTML+XML namespaces */ let ALLOWED_NAMESPACES = null; const DEFAULT_ALLOWED_NAMESPACES = addToSet( {}, [MATHML_NAMESPACE, SVG_NAMESPACE, HTML_NAMESPACE], stringToString ); let MATHML_TEXT_INTEGRATION_POINTS = addToSet({}, [ 'mi', 'mo', 'mn', 'ms', 'mtext', ]); let HTML_INTEGRATION_POINTS = addToSet({}, ['annotation-xml']); // Certain elements are allowed in both SVG and HTML // namespace. We need to specify them explicitly // so that they don't get erroneously deleted from // HTML namespace. const COMMON_SVG_AND_HTML_ELEMENTS = addToSet({}, [ 'title', 'style', 'font', 'a', 'script', ]); /* Parsing of strict XHTML documents */ let PARSER_MEDIA_TYPE: null | DOMParserSupportedType = null; const SUPPORTED_PARSER_MEDIA_TYPES = ['application/xhtml+xml', 'text/html']; const DEFAULT_PARSER_MEDIA_TYPE = 'text/html'; let transformCaseFunc: null | Parameters<typeof addToSet>[2] = null; /* Keep a reference to config to pass to hooks */ let CONFIG: Config | null = null; /* Ideally, do not touch anything below this line */ /* ______________________________________________ */ const formElement = document.createElement('form'); const isRegexOrFunction = function ( testValue: unknown ): testValue is Function | RegExp { return testValue instanceof RegExp || testValue instanceof Function; }; /** * _parseConfig * * @param cfg optional config literal */ // eslint-disable-next-line complexity const _parseConfig = function (cfg: Config = {}): void { if (CONFIG && CONFIG === cfg) { return; } /* Shield configuration object from tampering */ if (!cfg || typeof cfg !== 'object') { cfg = {}; } /* Shield configuration object from prototype pollution */ cfg = clone(cfg); PARSER_MEDIA_TYPE = // eslint-disable-next-line unicorn/prefer-includes SUPPORTED_PARSER_MEDIA_TYPES.indexOf(cfg.PARSER_MEDIA_TYPE) === -1 ? DEFAULT_PARSER_MEDIA_TYPE : cfg.PARSER_MEDIA_TYPE; // HTML tags and attributes are not case-sensitive, converting to lowercase. Keeping XHTML as is. transformCaseFunc = PARSER_MEDIA_TYPE === 'application/xhtml+xml' ? stringToString : stringToLowerCase; /* Set configuration parameters */ ALLOWED_TAGS = objectHasOwnProperty(cfg, 'ALLOWED_TAGS') && arrayIsArray(cfg.ALLOWED_TAGS) ? addToSet({}, cfg.ALLOWED_TAGS, transformCaseFunc) : DEFAULT_ALLOWED_TAGS; ALLOWED_ATTR = objectHasOwnProperty(cfg, 'ALLOWED_ATTR') && arrayIsArray(cfg.ALLOWED_ATTR) ? addToSet({}, cfg.ALLOWED_ATTR, transformCaseFunc) : DEFAULT_ALLOWED_ATTR; ALLOWED_NAMESPACES = objectHasOwnProperty(cfg, 'ALLOWED_NAMESPACES') && arrayIsArray(cfg.ALLOWED_NAMESPACES) ? addToSet({}, cfg.ALLOWED_NAMESPACES, stringToString) : DEFAULT_ALLOWED_NAMESPACES; URI_SAFE_ATTRIBUTES = objectHasOwnProperty(cfg, 'ADD_URI_SAFE_ATTR') && arrayIsArray(cfg.ADD_URI_SAFE_ATTR) ? addToSet( clone(DEFAULT_URI_SAFE_ATTRIBUTES), cfg.ADD_URI_SAFE_ATTR, transformCaseFunc ) : DEFAULT_URI_SAFE_ATTRIBUTES; DATA_URI_TAGS = objectHasOwnProperty(cfg, 'ADD_DATA_URI_TAGS') && arrayIsArray(cfg.ADD_DATA_URI_TAGS) ? addToSet( clone(DEFAULT_DATA_URI_TAGS), cfg.ADD_DATA_URI_TAGS, transformCaseFunc ) : DEFAULT_DATA_URI_TAGS; FORBID_CONTENTS = objectHasOwnProperty(cfg, 'FORBID_CONTENTS') && arrayIsArray(cfg.FORBID_CONTENTS) ? addToSet({}, cfg.FORBID_CONTENTS, transformCaseFunc) : DEFAULT_FORBID_CONTENTS; FORBID_TAGS = objectHasOwnProperty(cfg, 'FORBID_TAGS') && arrayIsArray(cfg.FORBID_TAGS) ? addToSet({}, cfg.FORBID_TAGS, transformCaseFunc) : clone({}); FORBID_ATTR = objectHasOwnProperty(cfg, 'FORBID_ATTR') && arrayIsArray(cfg.FORBID_ATTR) ? addToSet({}, cfg.FORBID_ATTR, transformCaseFunc) : clone({}); USE_PROFILES = objectHasOwnProperty(cfg, 'USE_PROFILES') ? cfg.USE_PROFILES && typeof cfg.USE_PROFILES === 'object' ? clone(cfg.USE_PROFILES) : cfg.USE_PROFILES : false; ALLOW_ARIA_ATTR = cfg.ALLOW_ARIA_ATTR !== false; // Default true ALLOW_DATA_ATTR = cfg.ALLOW_DATA_ATTR !== false; // Default true ALLOW_UNKNOWN_PROTOCOLS = cfg.ALLOW_UNKNOWN_PROTOCOLS || false; // Default false ALLOW_SELF_CLOSE_IN_ATTR = cfg.ALLOW_SELF_CLOSE_IN_ATTR !== false; // Default true SAFE_FOR_TEMPLATES = cfg.SAFE_FOR_TEMPLATES || false; // Default false SAFE_FOR_XML = cfg.SAFE_FOR_XML !== false; // Default true WHOLE_DOCUMENT = cfg.WHOLE_DOCUMENT || false; // Default false RETURN_DOM = cfg.RETURN_DOM || false; // Default false RETURN_DOM_FRAGMENT = cfg.RETURN_DOM_FRAGMENT || false; // Default false RETURN_TRUSTED_TYPE = cfg.RETURN_TRUSTED_TYPE || false; // Default false FORCE_BODY = cfg.FORCE_BODY || false; // Default false SANITIZE_DOM = cfg.SANITIZE_DOM !== false; // Default true SANITIZE_NAMED_PROPS = cfg.SANITIZE_NAMED_PROPS || false; // Default false KEEP_CONTENT = cfg.KEEP_CONTENT !== false; // Default true IN_PLACE = cfg.IN_PLACE || false; // Default false IS_ALLOWED_URI = isRegex(cfg.ALLOWED_URI_REGEXP) ? cfg.ALLOWED_URI_REGEXP : EXPRESSIONS.IS_ALLOWED_URI; // Default regexp NAMESPACE = typeof cfg.NAMESPACE === 'string' ? cfg.NAMESPACE : HTML_NAMESPACE; // Default HTML namespace MATHML_TEXT_INTEGRATION_POINTS = objectHasOwnProperty(cfg, 'MATHML_TEXT_INTEGRATION_POINTS') && cfg.MATHML_TEXT_INTEGRATION_POINTS && typeof cfg.MATHML_TEXT_INTEGRATION_POINTS === 'object' ? clone(cfg.MATHML_TEXT_INTEGRATION_POINTS) : addToSet({}, ['mi', 'mo', 'mn', 'ms', 'mtext']); // Default built-in map HTML_INTEGRATION_POINTS = objectHasOwnProperty(cfg, 'HTML_INTEGRATION_POINTS') && cfg.HTML_INTEGRATION_POINTS && typeof cfg.HTML_INTEGRATION_POINTS === 'object' ? clone(cfg.HTML_INTEGRATION_POINTS) : addToSet({}, ['annotation-xml']); // Default built-in map const customElementHandling = objectHasOwnProperty(cfg, 'CUSTOM_ELEMENT_HANDLING') && cfg.CUSTOM_ELEMENT_HANDLING && typeof cfg.CUSTOM_ELEMENT_HANDLING === 'object' ? clone(cfg.CUSTOM_ELEMENT_HANDLING) : create(null); CUSTOM_ELEMENT_HANDLING = create(null); if ( objectHasOwnProperty(customElementHandling, 'tagNameCheck') && isRegexOrFunction(customElementHandling.tagNameCheck) ) { CUSTOM_ELEMENT_HANDLING.tagNameCheck = customElementHandling.tagNameCheck; // Default undefined } if ( objectHasOwnProperty(customElementHandling, 'attributeNameCheck') && isRegexOrFunction(customElementHandling.attributeNameCheck) ) { CUSTOM_ELEMENT_HANDLING.attributeNameCheck = customElementHandling.attributeNameCheck; // Default undefined } if ( objectHasOwnProperty( customElementHandling, 'allowCustomizedBuiltInElements' ) && typeof customElementHandling.allowCustomizedBuiltInElements === 'boolean' ) { CUSTOM_ELEMENT_HANDLING.allowCustomizedBuiltInElements = customElementHandling.allowCustomizedBuiltInElements; // Default undefined } if (SAFE_FOR_TEMPLATES) { ALLOW_DATA_ATTR = false; } if (RETURN_DOM_FRAGMENT) { RETURN_DOM = true; } /* Parse profile info */ if (USE_PROFILES) { ALLOWED_TAGS = addToSet({}, TAGS.text); ALLOWED_ATTR = create(null); if (USE_PROFILES.html === true) { addToSet(ALLOWED_TAGS, TAGS.html); addToSet(ALLOWED_ATTR, ATTRS.html); } if (USE_PROFILES.svg === true) { addToSet(ALLOWED_TAGS, TAGS.svg); addToSet(ALLOWED_ATTR, ATTRS.svg); addToSet(ALLOWED_ATTR, ATTRS.xml); } if (USE_PROFILES.svgFilters === true) { addToSet(ALLOWED_TAGS, TAGS.svgFilters); addToSet(ALLOWED_ATTR, ATTRS.svg); addToSet(ALLOWED_ATTR, ATTRS.xml); } if (USE_PROFILES.mathMl === true) { addToSet(ALLOWED_TAGS, TAGS.mathMl); addToSet(ALLOWED_ATTR, ATTRS.mathMl); addToSet(ALLOWED_ATTR, ATTRS.xml); } } /* Always reset function-based ADD_TAGS / ADD_ATTR checks to prevent * leaking across calls when switching from function to array config */ EXTRA_ELEMENT_HANDLING.tagCheck = null; EXTRA_ELEMENT_HANDLING.attributeCheck = null; /* Merge configuration parameters */ if (objectHasOwnProperty(cfg, 'ADD_TAGS')) { if (typeof cfg.ADD_TAGS === 'function') { EXTRA_ELEMENT_HANDLING.tagCheck = cfg.ADD_TAGS; } else if (arrayIsArray(cfg.ADD_TAGS)) { if (ALLOWED_TAGS === DEFAULT_ALLOWED_TAGS) { ALLOWED_TAGS = clone(ALLOWED_TAGS); } addToSet(ALLOWED_TAGS, cfg.ADD_TAGS, transformCaseFunc); } } if (objectHasOwnProperty(cfg, 'ADD_ATTR')) { if (typeof cfg.ADD_ATTR === 'function') { EXTRA_ELEMENT_HANDLING.attributeCheck = cfg.ADD_ATTR; } else if (arrayIsArray(cfg.ADD_ATTR)) { if (ALLOWED_ATTR === DEFAULT_ALLOWED_ATTR) { ALLOWED_ATTR = clone(ALLOWED_ATTR); } addToSet(ALLOWED_ATTR, cfg.ADD_ATTR, transformCaseFunc); } } if ( objectHasOwnProperty(cfg, 'ADD_URI_SAFE_ATTR') && arrayIsArray(cfg.ADD_URI_SAFE_ATTR) ) { addToSet(URI_SAFE_ATTRIBUTES, cfg.ADD_URI_SAFE_ATTR, transformCaseFunc); } if ( objectHasOwnProperty(cfg, 'FORBID_CONTENTS') && arrayIsArray(cfg.FORBID_CONTENTS) ) { if (FORBID_CONTENTS === DEFAULT_FORBID_CONTENTS) { FORBID_CONTENTS = clone(FORBID_CONTENTS); } addToSet(FORBID_CONTENTS, cfg.FORBID_CONTENTS, transformCaseFunc); } if ( objectHasOwnProperty(cfg, 'ADD_FORBID_CONTENTS') && arrayIsArray(cfg.ADD_FORBID_CONTENTS) ) { if (FORBID_CONTENTS === DEFAULT_FORBID_CONTENTS) { FORBID_CONTENTS = clone(FORBID_CONTENTS); } addToSet(FORBID_CONTENTS, cfg.ADD_FORBID_CONTENTS, transformCaseFunc); } /* Add #text in case KEEP_CONTENT is set to true */ if (KEEP_CONTENT) { ALLOWED_TAGS['#text'] = true; } /* Add html, head and body to ALLOWED_TAGS in case WHOLE_DOCUMENT is true */ if (WHOLE_DOCUMENT) { addToSet(ALLOWED_TAGS, ['html', 'head', 'body']); } /* Add tbody to ALLOWED_TAGS in case tables are permitted, see #286, #365 */ if (ALLOWED_TAGS.table) { addToSet(ALLOWED_TAGS, ['tbody']); delete FORBID_TAGS.tbody; } if (cfg.TRUSTED_TYPES_POLICY) { if (typeof cfg.TRUSTED_TYPES_POLICY.createHTML !== 'function') { throw typeErrorCreate( 'TRUSTED_TYPES_POLICY configuration option must provide a "createHTML" hook.' ); } if (typeof cfg.TRUSTED_TYPES_POLICY.createScriptURL !== 'function') { throw typeErrorCreate( 'TRUSTED_TYPES_POLICY configuration option must provide a "createScriptURL" hook.' ); } // Overwrite existing TrustedTypes policy. trustedTypesPolicy = cfg.TRUSTED_TYPES_POLICY; // Sign local variables required by `sanitize`. emptyHTML = trustedTypesPolicy.createHTML(''); } else { // Uninitialized policy, attempt to initialize the internal dompurify policy. if (trustedTypesPolicy === undefined) { trustedTypesPolicy = _createTrustedTypesPolicy( trustedTypes, currentScript ); } // If creating the internal policy succeeded sign internal variables. if (trustedTypesPolicy !== null && typeof emptyHTML === 'string') { emptyHTML = trustedTypesPolicy.createHTML(''); } } // Prevent further manipulation of configuration. // Not available in IE8, Safari 5, etc. if (freeze) { freeze(cfg); } CONFIG = cfg; }; /* Keep track of all possible SVG and MathML tags * so that we can perform the namespace checks * correctly. */ const ALL_SVG_TAGS = addToSet({}, [ ...TAGS.svg, ...TAGS.svgFilters, ...TAGS.svgDisallowed, ]); const ALL_MATHML_TAGS = addToSet({}, [ ...TAGS.mathMl, ...TAGS.mathMlDisallowed, ]); /** * @param element a DOM element whose namespace is being checked * @returns Return false if the element has a * namespace that a spec-compliant parser would never * return. Return true otherwise. */ const _checkValidNamespace = function (element: Element): boolean { let parent = getParentNode(element); // In JSDOM, if we're inside shadow DOM, then parentNode // can be null. We just simulate parent in this case. if (!parent || !parent.tagName) { parent = { namespaceURI: NAMESPACE, tagName: 'template', }; } const tagName = stringToLowerCase(element.tagName); const parentTagName = stringToLowerCase(parent.tagName); if (!ALLOWED_NAMESPACES[element.namespaceURI]) { return false; } if (element.namespaceURI === SVG_NAMESPACE) { // The only way to switch from HTML namespace to SVG // is via <svg>. If it happens via any other tag, then // it should be killed. if (parent.namespaceURI === HTML_NAMESPACE) { return tagName === 'svg'; } // The only way to switch from MathML to SVG is via` // svg if parent is either <annotation-xml> or MathML // text integration points. if (parent.namespaceURI === MATHML_NAMESPACE) { return ( tagName === 'svg' && (parentTagName === 'annotation-xml' || MATHML_TEXT_INTEGRATION_POINTS[parentTagName]) ); } // We only allow elements that are defined in SVG // spec. All others are disallowed in SVG namespace. return Boolean(ALL_SVG_TAGS[tagName]); } if (element.namespaceURI === MATHML_NAMESPACE) { // The only way to switch from HTML namespace to MathML // is via <math>. If it happens via any other tag, then // it should be killed. if (parent.namespaceURI === HTML_NAMESPACE) { return tagName === 'math'; } // The only way to switch from SVG to MathML is via // <math> and HTML integration points if (parent.namespaceURI === SVG_NAMESPACE) { return tagName === 'math' && HTML_INTEGRATION_POINTS[parentTagName]; } // We only allow elements that are defined in MathML // spec. All others are disallowed in MathML namespace. return Boolean(ALL_MATHML_TAGS[tagName]); } if (element.namespaceURI === HTML_NAMESPACE) { // The only way to switch from SVG to HTML is via // HTML integration points, and from MathML to HTML // is via MathML text integration points if ( parent.namespaceURI === SVG_NAMESPACE && !HTML_INTEGRATION_POINTS[parentTagName] ) { return false; } if ( parent.namespaceURI === MATHML_NAMESPACE && !MATHML_TEXT_INTEGRATION_POINTS[parentTagName] ) { return false; } // We disallow tags that are specific for MathML // or SVG and should never appear in HTML namespace return ( !ALL_MATHML_TAGS[tagName] && (COMMON_SVG_AND_HTML_ELEMENTS[tagName] || !ALL_SVG_TAGS[tagName]) ); } // For XHTML and XML documents that support custom namespaces if ( PARSER_MEDIA_TYPE === 'application/xhtml+xml' && ALLOWED_NAMESPACES[element.namespaceURI] ) { return true; } // The code should never reach this place (this means // that the element somehow got namespace that is not // HTML, SVG, MathML or allowed via ALLOWED_NAMESPACES). // Return false just in case. return false; }; /** * _forceRemove * * @param node a DOM node */ const _forceRemove = function (node: Node): void { arrayPush(DOMPurify.removed, { element: node }); try { // eslint-disable-next-line unicorn/prefer-dom-node-remove getParentNode(node).removeChild(node); } catch (_) { remove(node); } }; /** * _removeAttribute * * @param name an Attribute name * @param element a DOM node */ const _removeAttribute = function (name: string, element: Element): void { try { arrayPush(DOMPurify.removed, { attribute: element.getAttributeNode(name), from: element, }); } catch (_) { arrayPush(DOMPurify.removed, { attribute: null, from: element, }); } element.removeAttribute(name); // We void attribute values for unremovable "is" attributes if (name === 'is') { if (RETURN_DOM || RETURN_DOM_FRAGMENT) { try { _forceRemove(element); } catch (_) {} } else { try { element.setAttribute(name, ''); } catch (_) {} } } }; /** * _initDocument * * @param dirty - a string of dirty markup * @return a DOM, filled with the dirty markup */ const _initDocument = function (dirty: string): Document { /* Create a HTML document */ let doc = null; let leadingWhitespace = null; if (FORCE_BODY) { dirty = '<remove></remove>' + dirty; } else { /* If FORCE_BODY isn't used, leading whitespace needs to be preserved manually */ const matches = stringMatch(dirty, /^[\r\n\t ]+/); leadingWhitespace = matches && matches[0]; } if ( PARSER_MEDIA_TYPE === 'application/xhtml+xml' && NAMESPACE === HTML_NAMESPACE ) { // Root of XHTML doc must contain xmlns declaration (see https://www.w3.org/TR/xhtml1/normative.html#strict) dirty = '<html xmlns="http://www.w3.org/1999/xhtml"><head></head><body>' + dirty + '</body></html>'; } const dirtyPayload = trustedTypesPolicy ? trustedTypesPolicy.createHTML(dirty) : dirty; /* * Use the DOMParser API by default, fallback later if needs be * DOMParser not work for svg when has multiple root element. */ if (NAMESPACE === HTML_NAMESPACE) { try { doc = new DOMParser().parseFromString(dirtyPayload, PARSER_MEDIA_TYPE); } catch (_) {} } /* Use createHTMLDocument in case DOMParser is not available */ if (!doc || !doc.documentElement) { doc = implementation.createDocument(NAMESPACE, 'template', null); try { doc.documentElement.innerHTML = IS_EMPTY_INPUT ? emptyHTML : dirtyPayload; } catch (_) { // Syntax error if dirtyPayload is invalid xml } } const body = doc.body || doc.documentElement; if (dirty && leadingWhitespace) { body.insertBefore( document.createTextNode(leadingWhitespace), body.childNodes[0] || null ); } /* Work on whole document or just its body */ if (NAMESPACE === HTML_NAMESPACE) { return getElementsByTagName.call( doc, WHOLE_DOCUMENT ? 'html' : 'body' )[0]; } return WHOLE_DOCUMENT ? doc.documentElement : body; }; /** * Creates a NodeIterator object that you can use to traverse filtered lists of nodes or elements in a document. * * @param root The root element or node to start traversing on. * @return The created NodeIterator */ const _createNodeIterator = function (root: Node): NodeIterator { return createNodeIterator.call( root.ownerDocument || root, root, // eslint-disable-next-line no-bitwise NodeFilter.SHOW_ELEMENT | NodeFilter.SHOW_COMMENT | NodeFilter.SHOW_TEXT | NodeFilter.SHOW_PROCESSING_INSTRUCTION | NodeFilter.SHOW_CDATA_SECTION, null ); }; /** * Strip template-engine expressions ({{...}}, ${...}, <%...%>) from the * character data of an element subtree. Used as the final safety net for * SAFE_FOR_TEMPLATES on every DOM-returning code path so that expressions * which only form after text-node normalization (e.g. fragments split across * stripped elements) cannot survive into a template-evaluating framework. * * Walks text/comment/CDATA/processing-instruction nodes and mutates `.data` * in place rather than round-tripping through innerHTML. This preserves * descendant node references (important for IN_PLACE callers), avoids a * serialize/reparse cycle, and reads literal character data — which means * `<%...%>` in text content matches the ERB regex against its real bytes * instead of the HTML-entity-escaped form innerHTML would produce. * * Attribute values are not visited here; SAFE_FOR_TEMPLATES handling for * attributes is performed during the per-node `_sanitizeAttributes` pass. * * @param node The root element whose character data should be scrubbed. */ const _scrubTemplateExpressions = function (node: Element): void { node.normalize(); const walker = createNodeIterator.call( node.ownerDocument || node, node, // eslint-disable-next-line no-bitwise NodeFilter.SHOW_TEXT | NodeFilter.SHOW_COMMENT | NodeFilter.SHOW_CDATA_SECTION | NodeFilter.SHOW_PROCESSING_INSTRUCTION, null ); let currentNode = walker.nextNode() as CharacterData | null; while (currentNode) { let data = currentNode.data; arrayForEach([MUSTACHE_EXPR, ERB_EXPR, TMPLIT_EXPR], (expr: RegExp) => { data = stringReplace(data, expr, ' '); }); currentNode.data = data; currentNode = walker.nextNode() as CharacterData | null; } }; /** * _isClobbered * * @param element element to check for clobbering attacks * @return true if clobbered, false if safe */ const _isClobbered = function (element: Element): boolean { return ( element instanceof HTMLFormElement && (typeof element.nodeName !== 'string' || typeof element.textContent !== 'string' || typeof element.removeChild !== 'function' || !(element.attributes instanceof NamedNodeMap) || typeof element.removeAttribute !== 'function' || typeof element.setAttribute !== 'function' || typeof element.namespaceURI !== 'string' || typeof element.insertBefore !== 'function' || typeof element.hasChildNodes !== 'function') ); }; /** * Checks whether the given object is a DOM node, including nodes that * originate from a different window/realm (e.g. an iframe's * contentDocument). The previous `value instanceof Node` check was * realm-bound: nodes from a different window failed it, causing * sanitize() to silently stringify them and reset IN_PLACE to false, * returning the original node unsanitized. See GHSA-4w3q-35jp-p934. * * Implementation: call the cached `nodeType` getter from Node.prototype * directly on the value. This bypasses any clobbered instance property * (e.g. a child element named "nodeType") and works across realms * because the WebIDL `nodeType` getter reads an internal slot that * every real Node has, regardless of which window minted it. * * @param value object to check whether it's a DOM node * @return true if value is a DOM node from any realm */ const _isNode = function (value: unknown): value is Node { if (!getNodeType || typeof value !== 'object' || value === null) { return false; } try { return typeof getNodeType(value) === 'number'; } catch (_) { return false; } }; function _executeHooks<T extends HookFunction>( hooks: HookFunction[], currentNode: Parameters<T>[0], data: Parameters<T>[1] ): void { arrayForEach(hooks, (hook: T) => { hook.call(DOMPurify, currentNode, data, CONFIG); }); } /** * _sanitizeElements * * @protect nodeName * @protect textContent * @protect removeChild * @param currentNode to check for permission to exist * @return true if node was killed, false if left alive */ const _sanitizeElements = function (currentNode: any): boolean { let content = null; /* Execute a hook if present */ _executeHooks(hooks.beforeSanitizeElements, currentNode, null); /* Check if element is clobbered or can clobber */ if (_isClobbered(currentNode)) { _forceRemove(currentNode); return true; } /* Now let's check the element's type and name */ const tagName = transformCaseFunc(currentNode.nodeName); /* Execute a hook if present */ _executeHooks(hooks.uponSanitizeElement, currentNode, { tagName, allowedTags: ALLOWED_TAGS, }); /* Detect mXSS attempts abusing namespace confusion */ if ( SAFE_FOR_XML && currentNode.hasChildNodes() && !_isNode(currentNode.firstElementChild) && regExpTest(/<[/\w!]/g, currentNode.innerHTML) && regExpTest(/<[/\w!]/g, currentNode.textContent) ) { _forceRemove(currentNode); return true; } /* Remove risky CSS construction leading to mXSS */ if ( SAFE_FOR_XML && currentNode.namespaceURI === HTML_NAMESPACE && tagName === 'style' && _isNode(currentNode.firstElementChild) ) { _forceRemove(currentNode); return true; } /* Remove any occurrence of processing instructions */ if (currentNode.nodeType === NODE_TYPE.progressingInstruction) { _forceRemove(currentNode); return true; } /* Remove any kind of possibly harmful comments */ if ( SAFE_FOR_XML && currentNode.nodeType === NODE_TYPE.comment && regExpTest(/<[/\w]/g, currentNode.data) ) { _forceRemove(currentNode); return true; } /* Remove element if anything forbids its presence */ if ( FORBID_TAGS[tagName] || (!( EXTRA_ELEMENT_HANDLING.tagCheck instanceof Function && EXTRA_ELEMENT_HANDLING.tagCheck(tagName) ) && !ALLOWED_TAGS[tagName]) ) { /* Check if we have a custom element to handle */ if (!FORBID_TAGS[tagName] && _isBasicCustomElement(tagName)) { if ( CUSTOM_ELEMENT_HANDLING.tagNameCheck instanceof RegExp && regExpTest(CUSTOM_ELEMENT_HANDLING.tagNameCheck, tagName) ) { return false; } if ( CUSTOM_ELEMENT_HANDLING.tagNameCheck instanceof Function && CUSTOM_ELEMENT_HANDLING.tagNameCheck(tagName) ) { return false; } } /* Keep content except for bad-listed elements */ if (KEEP_CONTENT && !FORBID_CONTENTS[tagName]) { const parentNode = getParentNode(currentNode) || currentNode.parentNode; const childNodes = getChildNodes(currentNode) || currentNode.childNodes; if (childNodes && parentNode) { const childCount = childNodes.length; for (let i = childCount - 1; i >= 0; --i) { const childClone = cloneNode(childNodes[i], true); parentNode.insertBefore(childClone, getNextSibling(currentNode)); } } } _forceRemove(currentNode); return true; } /* Check whether element has a valid namespace */ if (currentNode instanceof Element && !_checkValidNamespace(currentNode)) { _forceRemove(currentNode); return true; } /* Make sure that older browsers don't get fallback-tag mXSS */ if ( (tagName === 'noscript' || tagName === 'noembed' || tagName === 'noframes') && regExpTest(/<\/no(script|embed|frames)/i, currentNode.innerHTML) ) { _forceRemove(currentNode); return true; } /* Sanitize element content to be template-safe */ if (SAFE_FOR_TEMPLATES && currentNode.nodeType === NODE_TYPE.text) { /* Get the element's text content */ content = currentNode.textContent; arrayForEach([MUSTACHE_EXPR, ERB_EXPR, TMPLIT_EXPR], (expr: RegExp) => { content = stringReplace(content, expr, ' '); }); if (currentNode.textContent !== content) { arrayPush(DOMPurify.removed, { element: currentNode.cloneNode() }); currentNode.textContent = content; } } /* Execute a hook if present */ _executeHooks(hooks.afterSanitizeElements, currentNode, null); return false; }; /** * _isValidAttribute * * @param lcTag Lowercase tag name of containing element. * @param lcName Lowercase attribute name. * @param value Attribute value. * @return Returns true if `value` is valid, otherwise false. */ // eslint-disable-next-line complexity const _isValidAttribute = function ( lcTag: string, lcName: string, value: string ): boolean { /* FORBID_ATTR must always win, even if ADD_ATTR predicate would allow it */ if (FORBID_ATTR[lcName]) { return false; } /* Make sure attribute cannot clobber */ if ( SANITIZE_DOM && (lcName === 'id' || lcName === 'name') && (value in document || value in formElement) ) { return false; } const nameIsPermitted = ALLOWED_ATTR[lcName] || (EXTRA_ELEMENT_HANDLING.attributeCheck instanceof Function && EXTRA_ELEMENT_HANDLING.attributeCheck(lcName, lcTag)); /* Allow valid data-* attributes: At least one character after "-" (https://html.spec.whatwg.org/multipage/dom.html#embedding-custom-non-visible-data-with-the-data-*-attributes) XML-compatible (https://html.spec.whatwg.org/multipage/infrastructure.html#xml-compatible and http://www.w3.org/TR/xml/#d0e804) We don't need to check the value; it's always URI safe. */ if ( ALLOW_DATA_ATTR && !FORBID_ATTR[lcName] && regExpTest(DATA_ATTR, lcName) ) { // This attribute is safe } else if (ALLOW_ARIA_ATTR && regExpTest(ARIA_ATTR, lcName)) { // This attribute is safe /* Otherwise, check the name is permitted */ } else if (!nameIsPermitted || FORBID_ATTR[lcName]) { if ( // First condition does a very basic check if a) it's basically a valid custom element tagname AND // b) if the tagName passes whatever the user has configured for CUSTOM_ELEMENT_HANDLING.tagNameCheck // and c) if the attribute name passes whatever the user has configured for CUSTOM_ELEMENT_HANDLING.attributeNameCheck (_isBasicCustomElement(lcTag) && ((CUSTOM_ELEMENT_HANDLING.tagNameCheck instanceof RegExp && regExpTest(CUSTOM_ELEMENT_HANDLING.tagNameCheck, lcTag)) || (CUSTOM_ELEMENT_HANDLING.tagNameCheck instanceof Function && CUSTOM_ELEMENT_HANDLING.tagNameCheck(lcTag))) && ((CUSTOM_ELEMENT_HANDLING.attributeNameCheck instanceof RegExp && regExpTest(CUSTOM_ELEMENT_HANDLING.attributeNameCheck, lcName)) || (CUSTOM_ELEMENT_HANDLING.attributeNameCheck instanceof Function && CUSTOM_ELEMENT_HANDLING.attributeNameCheck(lcName, lcTag)))) || // Alternative, second condition checks if it's an `is`-attribute, AND // the value passes whatever the user has configured for CUSTOM_ELEMENT_HANDLING.tagNameCheck (lcName === 'is' && CUSTOM_ELEMENT_HANDLING.allowCustomizedBuiltInElements && ((CUSTOM_ELEMENT_HANDLING.tagNameCheck instanceof RegExp && regExpTest(CUSTOM_ELEMENT_HANDLING.tagNameCheck, value)) || (CUSTOM_ELEMENT_HANDLING.tagNameCheck instanceof Function && CUSTOM_ELEMENT_HANDLING.tagNameCheck(value)))) ) { // If user has supplied a regexp or function in CUSTOM_ELEMENT_HANDLING.tagNameCheck, we need to also allow derived custom elements using the same tagName test. // Additionally, we need to allow attributes passing the CUSTOM_ELEMENT_HANDLING.attributeNameCheck user has configured, as custom elements can define these at their own discretion. } else { return false; } /* Check value is safe. First, is attr inert? If so, is safe */ } else if (URI_SAFE_ATTRIBUTES[lcName]) { // This attribute is safe /* Check no script, data or unknown possibly unsafe URI unless we know URI values are safe for that attribute */ } else if ( regExpTest(IS_ALLOWED_URI, stringReplace(value, ATTR_WHITESPACE, '')) ) { // This attribute is safe /* Keep image data URIs alive if src/xlink:href is allowed */ /* Further prevent gadget XSS for dynamically built script tags */ } else if ( (lcName === 'src' || lcName === 'xlink:href' || lcName === 'href') && lcTag !== 'script' && stringIndexOf(value, 'data:') === 0 && DATA_URI_TAGS[lcTag] ) { // This attribute is safe /* Allow unknown protocols: This provides support for links that are handled by protocol handlers which may be unknown ahead of time, e.g. fb:, spotify: */ } else if ( ALLOW_UNKNOWN_PROTOCOLS && !regExpTest(IS_SCRIPT_OR_DATA, stringReplace(value, ATTR_WHITESPACE, '')) ) { // This attribute is safe /* Check for binary attributes */ } else if (value) { return false; } else { // Binary attributes are safe at this point /* Anything else, presume unsafe, do not add it back */ } return true; }; /* Names the HTML spec reserves from valid-custom-element-name; these must * never be treated as basic custom elements even when a permissive * CUSTOM_ELEMENT_HANDLING.tagNameCheck is configured. */ const RESERVED_CUSTOM_ELEMENT_NAMES = addToSet({}, [ 'annotation-xml', 'color-profile', 'font-face', 'font-face-format', 'font-face-name', 'font-face-src', 'font-face-uri', 'missing-glyph', ]); /** * _isBasicCustomElement * checks if at least one dash is included in tagName, and it's not the first char * for more sophisticated checking see https://github.com/sindresorhus/validate-element-name * * @param tagName name of the tag of the node to sanitize * @returns Returns true if the tag name meets the basic criteria for a custom element, otherwise false. */ const _isBasicCustomElement = function (tagName: string): boolean { return ( !RESERVED_CUSTOM_ELEMENT_NAMES[stringToLowerCase(tagName)] && regExpTest(CUSTOM_ELEMENT, tagName) ); }; /** * _sanitizeAttributes * * @protect attributes * @protect nodeName * @protect removeAttribute * @protect setAttribute * * @param currentNode to sanitize */ const _sanitizeAttributes = function (currentNode: Element): void { /* Execute a hook if present */ _executeHooks(hooks.beforeSanitizeAttributes, currentNode, null); const { attributes } = currentNode; /* Check if we have attributes; if not we might have a text node */ if (!attributes || _isClobbered(currentNode)) { return; } const hookEvent = { attrName: '', attrValue: '', keepAttr: true, allowedAttributes: ALLOWED_ATTR, forceKeepAttr: undefined, }; let l = attributes.length; /* Go backwards over all attributes; safely remove bad ones */ while (l--) { const attr = attributes[l]; const { name, namespaceURI, value: attrValue } = attr; const lcName = transformCaseFunc(name); const initValue = attrValue; let value = name === 'value' ? initValue : stringTrim(initValue); /* Execute a hook if present */ hookEvent.attrName = lcName; hookEvent.attrValue = value; hookEvent.keepAttr = true; hookEvent.forceKeepAttr = undefined; // Allows developers to see this is a property they can set _executeHooks(hooks.uponSanitizeAttribute, currentNode, hookEvent); value = hookEvent.attrValue; /* Full DOM Clobbering protection via namespace isolation, * Prefix id and name attributes with `user-content-` */ if ( SANITIZE_NAMED_PROPS && (lcName === 'id' || lcName === 'name') && stringIndexOf(value, SANITIZE_NAMED_PROPS_PREFIX) !== 0 ) { // Remove the attribute with this value _removeAttribute(name, currentNode); // Prefix the value and later re-create the attribute with the sanitized value value = SANITIZE_NAMED_PROPS_PREFIX + value; } // Else: already prefixed, leave the attribute alone — the prefix is // itself the clobbering protection, and re-applying it is incorrect. /* Work around a security issue with comments inside attributes */ if ( SAFE_FOR_XML && regExpTest( /((--!?|])>)|<\/(style|script|title|xmp|textarea|noscript|iframe|noembed|noframes)/i, value ) ) { _removeAttribute(name, currentNode); continue; } /* Make sure we cannot easily use animated hrefs, even if animations are allowed */ if (lcName === 'attributename' && stringMatch(value, 'href')) { _removeAttribute(name, currentNode); continue; } /* Did the hooks approve of the attribute? */ if (hookEvent.forceKeepAttr) { continue; } /* Did the hooks approve of the attribute? */ if (!hookEvent.keepAttr) { _removeAttribute(name, currentNode); con