UNPKG

@diplodoc/transform

Version:

A simple transformer of text in YFM (Yandex Flavored Markdown) to HTML

731 lines 17.8 kB
"use strict"; var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k); __setModuleDefault(result, mod); return result; }; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.sanitize = exports.sanitizeStyles = exports.defaultOptions = exports.defaultParseOptions = void 0; const sanitize_html_1 = __importDefault(require("sanitize-html")); // @ts-ignore const cssfilter_1 = __importDefault(require("cssfilter")); const cheerio = __importStar(require("cheerio")); const css_1 = __importDefault(require("css")); const log_1 = __importDefault(require("./log")); const htmlTags = [ 'a', 'abbr', 'acronym', 'address', 'area', 'article', 'aside', 'audio', 'b', 'bdi', 'bdo', 'big', 'blink', 'blockquote', 'body', 'br', 'button', 'canvas', 'caption', 'center', 'cite', 'code', 'col', 'colgroup', 'content', 'data', 'datalist', 'dd', 'decorator', 'del', 'details', 'dfn', 'dialog', 'dir', 'div', 'dl', 'dt', 'element', 'em', 'fieldset', 'figcaption', 'figure', 'font', 'footer', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'head', 'header', 'hgroup', 'hr', 'html', 'i', 'img', 'input', 'ins', 'kbd', 'label', 'legend', 'li', 'main', 'map', 'mark', 'marquee', 'menu', 'menuitem', 'meter', 'nav', 'nobr', 'ol', 'optgroup', 'option', 'output', 'p', 'picture', 'pre', 'progress', 'q', 'rp', 'rt', 'ruby', 's', 'samp', 'section', 'select', 'shadow', 'small', 'source', 'spacer', 'span', 'strike', 'strong', 'sub', 'summary', 'sup', 'table', 'tbody', 'td', 'template', 'textarea', 'tfoot', 'th', 'thead', 'time', 'tr', 'track', 'tt', 'u', 'ul', 'var', 'video', 'wbr', 'iframe', 'style', ]; const svgTags = [ 'svg', 'altglyph', 'altglyphdef', 'altglyphitem', 'animatecolor', 'animatemotion', 'animatetransform', 'circle', 'clippath', 'defs', 'desc', 'ellipse', 'filter', 'font', 'g', 'glyph', 'glyphref', 'hkern', 'image', 'line', 'lineargradient', 'marker', 'mask', 'metadata', 'mpath', 'path', 'pattern', 'polygon', 'polyline', 'radialgradient', 'rect', 'stop', 'switch', 'symbol', 'text', 'textpath', 'title', 'tref', 'tspan', 'view', 'vkern', 'animate', 'use', ]; const htmlAttrs = [ 'accept', 'action', 'align', 'alt', 'autocapitalize', 'autocomplete', 'autopictureinpicture', 'autoplay', 'background', 'bgcolor', 'border', 'capture', 'cellpadding', 'cellspacing', 'checked', 'cite', 'class', 'clear', 'color', 'cols', 'colspan', 'controls', 'controlslist', 'coords', 'crossorigin', 'datetime', 'decoding', 'default', 'dir', 'disabled', 'disablepictureinpicture', 'disableremoteplayback', 'download', 'draggable', 'enctype', 'enterkeyhint', 'face', 'for', 'headers', 'height', 'hidden', 'high', 'href', 'hreflang', 'id', 'inputmode', 'integrity', 'ismap', 'kind', 'label', 'lang', 'list', 'loading', 'loop', 'low', 'max', 'maxlength', 'media', 'method', 'min', 'minlength', 'multiple', 'muted', 'name', 'nonce', 'noshade', 'novalidate', 'nowrap', 'open', 'optimum', 'pattern', 'placeholder', 'playsinline', 'poster', 'preload', 'pubdate', 'radiogroup', 'readonly', 'rel', 'required', 'rev', 'reversed', 'role', 'rows', 'rowspan', 'spellcheck', 'scope', 'selected', 'shape', 'size', 'sizes', 'span', 'srclang', 'start', 'src', 'srcset', 'step', 'style', 'summary', 'tabindex', 'title', 'translate', 'type', 'usemap', 'valign', 'value', 'width', 'xmlns', 'slot', 'frameborder', 'scrolling', 'allow', 'target', 'attributeName', 'aria-hidden', 'referrerpolicy', 'aria-describedby', 'data-*', 'wide-content', 'sticky-header', ]; const svgAttrs = [ 'viewBox', 'accent-height', 'accumulate', 'additive', 'alignment-baseline', 'ascent', 'attributename', 'attributetype', 'azimuth', 'basefrequency', 'baseline-shift', 'begin', 'bias', 'by', 'class', 'clip', 'clippathunits', 'clip-path', 'clip-rule', 'color', 'color-interpolation', 'color-interpolation-filters', 'color-profile', 'color-rendering', 'cx', 'cy', 'd', 'dx', 'dy', 'diffuseconstant', 'direction', 'display', 'divisor', 'dur', 'edgemode', 'elevation', 'end', 'fill', 'fill-opacity', 'fill-rule', 'filter', 'filterunits', 'flood-color', 'flood-opacity', 'font-family', 'font-size', 'font-size-adjust', 'font-stretch', 'font-style', 'font-variant', 'font-weight', 'fx', 'fy', 'g1', 'g2', 'glyph-name', 'glyphref', 'gradientunits', 'gradienttransform', 'height', 'href', 'id', 'image-rendering', 'in', 'in2', 'k', 'k1', 'k2', 'k3', 'k4', 'kerning', 'keypoints', 'keysplines', 'keytimes', 'lang', 'lengthadjust', 'letter-spacing', 'kernelmatrix', 'kernelunitlength', 'lighting-color', 'local', 'marker-end', 'marker-mid', 'marker-start', 'markerheight', 'markerunits', 'markerwidth', 'maskcontentunits', 'maskunits', 'max', 'mask', 'media', 'method', 'mode', 'min', 'name', 'numoctaves', 'offset', 'operator', 'opacity', 'order', 'orient', 'orientation', 'origin', 'overflow', 'paint-order', 'path', 'pathlength', 'patterncontentunits', 'patterntransform', 'patternunits', 'points', 'preservealpha', 'preserveaspectratio', 'primitiveunits', 'r', 'rx', 'ry', 'radius', 'refx', 'refy', 'repeatcount', 'repeatdur', 'restart', 'result', 'rotate', 'scale', 'seed', 'shape-rendering', 'specularconstant', 'specularexponent', 'spreadmethod', 'startoffset', 'stddeviation', 'stitchtiles', 'stop-color', 'stop-opacity', 'stroke-dasharray', 'stroke-dashoffset', 'stroke-linecap', 'stroke-linejoin', 'stroke-miterlimit', 'stroke-opacity', 'stroke', 'stroke-width', 'style', 'surfacescale', 'systemlanguage', 'tabindex', 'targetx', 'targety', 'transform', 'text-anchor', 'text-decoration', 'text-rendering', 'textlength', 'type', 'u1', 'u2', 'unicode', 'values', 'viewbox', 'visibility', 'version', 'vert-adv-y', 'vert-origin-x', 'vert-origin-y', 'width', 'word-spacing', 'wrap', 'writing-mode', 'xchannelselector', 'ychannelselector', 'x', 'x1', 'x2', 'xmlns', 'y', 'y1', 'y2', 'z', 'zoomandpan', 'from', 'to', 'xlink:href', 'use', ]; const defaultCssWhitelist = Object.assign(Object.assign({}, cssfilter_1.default.whiteList), { '--method': true }); const yfmHtmlAttrs = ['note-type', 'term-key']; const allowedTags = Array.from(new Set([...htmlTags, ...svgTags, ...sanitize_html_1.default.defaults.allowedTags])); const allowedAttributes = Array.from(new Set([...htmlAttrs, ...svgAttrs, ...yfmHtmlAttrs])); // For hrefs within "use" only allow local links to ids that start with "#" const useTagTransformer = (tagName, attribs) => { const cleanHref = (href) => { if (href.startsWith('#')) { return href; } else { return null; } }; const cleanAttrs = (attrs) => { const HREF_ATTRIBUTES = ['xlink:href', 'href']; return Object.fromEntries(Object.entries(attrs) .map(([key, value]) => { if (HREF_ATTRIBUTES.includes(key)) { return [key, cleanHref(value)]; } return [key, value]; }) .filter(([_, value]) => value !== null)); }; return { tagName, attribs: cleanAttrs(attribs), }; }; exports.defaultParseOptions = { lowerCaseAttributeNames: false, }; exports.defaultOptions = Object.assign(Object.assign({}, sanitize_html_1.default.defaults), { allowedTags, allowedAttributes: Object.assign(Object.assign({}, sanitize_html_1.default.defaults.allowedAttributes), { '*': allowedAttributes }), allowedSchemesAppliedToAttributes: [ ...sanitize_html_1.default.defaults.allowedSchemesAppliedToAttributes, 'xlink:href', 'from', 'to', ], allowVulnerableTags: true, parser: exports.defaultParseOptions, cssWhiteList: defaultCssWhitelist, transformTags: { use: useTagTransformer, } }); // dangerous patterns const DANGEROUS_TAGS_RE = /<\s*(script|iframe|object|embed|svg|img|video|audio|link|meta|base|form|style|template|math|foreignobject)\b/i; const CLOSE_STYLE_RE = /<\s*\/\s*style/i; const DANGEROUS_URL_RE = /url\s*\(\s*['"]?\s*(?:javascript:|vbscript:|data\s*:\s*(?:text\/html|application\/xhtml\+xml|image\/svg\+xml))/i; const IE_EXPR_RE = /expression\s*\(/i; const IE_BEHAVIOR_RE = /behavior\s*:/i; const MOZ_BINDING_RE = /-moz-binding\s*:/i; const AT_RULES_RE = /@(?:import|charset|namespace)\b/i; const COMMENTS_RE = /\/\*[^]*?\*\//g; // CSS comments: /* ... */ // control characters (C0/C1) and BiDi override characters that can hide malicious content const CTRL_BIDI_RE = new RegExp([ String.raw `[\u0000-\u0008\u000B\u000C\u000E-\u001F\u007F]`, String.raw `[\u202A-\u202E\u2066-\u2069]`, // BiDi overrides ].join('|'), 'g'); const SAFE_VALUE_FAST_CHECK_RE = /[<&\\/]|@|url\s*\(|expression|behavior|-moz-binding/i; // backslash (CSS escapes), ampersand (HTML entities), BiDi overrides const FAST_PATH_RE = /[\\&\u202A-\u202E\u2066-\u2069]/; // combined regex for decoding CSS escapes and HTML entities const RE_DECODE = new RegExp([ String.raw `\\([0-9A-Fa-f]{1,6})\s?`, String.raw `&#x([0-9A-Fa-f]{1,6});`, String.raw `&#(\d{1,7});`, String.raw `&([a-zA-Z][a-zA-Z0-9]{1,31});`, // HTML named entities: &lt; or &amp; → '<' or '&' ].join('|'), 'g'); const htmlEntities = { lt: '<', gt: '>', quot: '"', apos: "'", amp: '&', newline: '\n', tab: '\t', colon: ':', sol: '/', lpar: '(', rpar: ')', }; // Decodes a single escaped or encoded token function decodeToken(whole, cssHex, htmlHex, htmlDec, named) { var _a; if (cssHex) { return String.fromCodePoint(parseInt(cssHex, 16) || 0); } if (htmlHex) { return String.fromCodePoint(parseInt(htmlHex, 16) || 0); } if (htmlDec) { return String.fromCodePoint(parseInt(htmlDec, 10) || 0); } if (named) { const rep = (_a = htmlEntities[named]) !== null && _a !== void 0 ? _a : htmlEntities[named.toLowerCase()]; if (rep) { return rep; } } return whole; } // Normalize CSS value by decoding HTML entities and CSS escapes function normalizeCssValue(value) { let normalized = String(value !== null && value !== void 0 ? value : ''); // early-exit if no special chars if (!FAST_PATH_RE.test(normalized)) { return normalized; } // 1. remove CSS comments to prevent hiding escapes inside /* ... */ // 2. strip control characters and BiDi overrides // 3. decode all CSS escapes and HTML entities in one pass normalized = normalized .replace(COMMENTS_RE, '') .replace(CTRL_BIDI_RE, '') .replace(RE_DECODE, decodeToken); // unicode normalization (NFKC) to prevent homograph attacks try { normalized = normalized.normalize('NFKC'); } catch (_) { // silent fail: logging the value could expose sensitive data } return normalized; } // checks if a CSS value is safe from XSS attacks function isSafeCssValue(property, value) { const prop = property.toLowerCase(); const isContentProperty = prop === 'content'; // normalize first to prevent bypasses via comments/escapes const normalized = normalizeCssValue(value); // early-exit for trivial safe values if (!SAFE_VALUE_FAST_CHECK_RE.test(normalized)) { return true; } // сheck if normalized value looks like an HTML tag const looksLikeTag = /<[^>]{0,128}>/i.test(normalized); const dangerousPatterns = [ looksLikeTag && CLOSE_STYLE_RE, !isContentProperty && looksLikeTag && DANGEROUS_TAGS_RE, DANGEROUS_URL_RE, IE_EXPR_RE, IE_BEHAVIOR_RE, MOZ_BINDING_RE, AT_RULES_RE, // @import, @charset, @namespace ].filter(Boolean); return !dangerousPatterns.some((pattern) => pattern.test(normalized)); } function sanitizeStyleTags(dom, cssWhiteList) { const styleTags = dom('style'); styleTags.each((_index, element) => { const styleText = dom(element).text(); try { const parsedCSS = css_1.default.parse(styleText); if (!parsedCSS.stylesheet) { return; } parsedCSS.stylesheet.rules = parsedCSS.stylesheet.rules.filter((rule) => rule.type === 'rule'); parsedCSS.stylesheet.rules.forEach((rule) => { if (!rule.declarations) { return; } rule.declarations = rule.declarations.filter((declaration) => { if (!declaration.property || !declaration.value) { return false; } const prop = String(declaration.property).toLowerCase(); const val = String(declaration.value); if (!isSafeCssValue(prop, val)) { return false; } const isWhiteListed = Boolean(cssWhiteList[prop]); if (isWhiteListed) { declaration.value = cssfilter_1.default.safeAttrValue(prop, val); } if (!declaration.value) { return false; } return isWhiteListed; }); }); dom(element).text(css_1.default.stringify(parsedCSS)); } catch (error) { dom(element).remove(); const errorMessage = error instanceof Error ? error.message : `${error}`; log_1.default.info(errorMessage); } }); } function sanitizeStyleAttrs(dom, cssWhiteList) { const options = { whiteList: cssWhiteList, }; const cssSanitizer = new cssfilter_1.default.FilterCSS(options); dom('*').each((_index, element) => { const styleAttrValue = dom(element).attr('style'); if (!styleAttrValue) { return; } dom(element).attr('style', cssSanitizer.process(styleAttrValue)); }); } function sanitizeStyles(html, options) { const cssWhiteList = options.cssWhiteList || {}; const $ = cheerio.load(html); sanitizeStyleTags($, cssWhiteList); sanitizeStyleAttrs($, cssWhiteList); const styles = $('head').html() || ''; const content = $('body').html() || ''; return styles + content; } exports.sanitizeStyles = sanitizeStyles; function sanitize(html, options, additionalOptions) { var _a; const sanitizeOptions = options || exports.defaultOptions; if (additionalOptions === null || additionalOptions === void 0 ? void 0 : additionalOptions.cssWhiteList) { sanitizeOptions.cssWhiteList = Object.assign(Object.assign({}, sanitizeOptions.cssWhiteList), additionalOptions.cssWhiteList); } const needToSanitizeStyles = !((_a = sanitizeOptions.disableStyleSanitizer) !== null && _a !== void 0 ? _a : false); const modifiedHtml = needToSanitizeStyles ? sanitizeStyles(html, sanitizeOptions) : html; return (0, sanitize_html_1.default)(modifiedHtml, sanitizeOptions); } exports.sanitize = sanitize; exports.default = sanitize; //# sourceMappingURL=sanitize.js.map