@diplodoc/transform
Version:
A simple transformer of text in YFM (Yandex Flavored Markdown) to HTML
731 lines • 17.8 kB
JavaScript
;
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
__setModuleDefault(result, mod);
return result;
};
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.sanitize = exports.sanitizeStyles = exports.defaultOptions = exports.defaultParseOptions = void 0;
const sanitize_html_1 = __importDefault(require("sanitize-html"));
// @ts-ignore
const cssfilter_1 = __importDefault(require("cssfilter"));
const cheerio = __importStar(require("cheerio"));
const css_1 = __importDefault(require("css"));
const log_1 = __importDefault(require("./log"));
const htmlTags = [
'a',
'abbr',
'acronym',
'address',
'area',
'article',
'aside',
'audio',
'b',
'bdi',
'bdo',
'big',
'blink',
'blockquote',
'body',
'br',
'button',
'canvas',
'caption',
'center',
'cite',
'code',
'col',
'colgroup',
'content',
'data',
'datalist',
'dd',
'decorator',
'del',
'details',
'dfn',
'dialog',
'dir',
'div',
'dl',
'dt',
'element',
'em',
'fieldset',
'figcaption',
'figure',
'font',
'footer',
'h1',
'h2',
'h3',
'h4',
'h5',
'h6',
'head',
'header',
'hgroup',
'hr',
'html',
'i',
'img',
'input',
'ins',
'kbd',
'label',
'legend',
'li',
'main',
'map',
'mark',
'marquee',
'menu',
'menuitem',
'meter',
'nav',
'nobr',
'ol',
'optgroup',
'option',
'output',
'p',
'picture',
'pre',
'progress',
'q',
'rp',
'rt',
'ruby',
's',
'samp',
'section',
'select',
'shadow',
'small',
'source',
'spacer',
'span',
'strike',
'strong',
'sub',
'summary',
'sup',
'table',
'tbody',
'td',
'template',
'textarea',
'tfoot',
'th',
'thead',
'time',
'tr',
'track',
'tt',
'u',
'ul',
'var',
'video',
'wbr',
'iframe',
'style',
];
const svgTags = [
'svg',
'altglyph',
'altglyphdef',
'altglyphitem',
'animatecolor',
'animatemotion',
'animatetransform',
'circle',
'clippath',
'defs',
'desc',
'ellipse',
'filter',
'font',
'g',
'glyph',
'glyphref',
'hkern',
'image',
'line',
'lineargradient',
'marker',
'mask',
'metadata',
'mpath',
'path',
'pattern',
'polygon',
'polyline',
'radialgradient',
'rect',
'stop',
'switch',
'symbol',
'text',
'textpath',
'title',
'tref',
'tspan',
'view',
'vkern',
'animate',
'use',
];
const htmlAttrs = [
'accept',
'action',
'align',
'alt',
'autocapitalize',
'autocomplete',
'autopictureinpicture',
'autoplay',
'background',
'bgcolor',
'border',
'capture',
'cellpadding',
'cellspacing',
'checked',
'cite',
'class',
'clear',
'color',
'cols',
'colspan',
'controls',
'controlslist',
'coords',
'crossorigin',
'datetime',
'decoding',
'default',
'dir',
'disabled',
'disablepictureinpicture',
'disableremoteplayback',
'download',
'draggable',
'enctype',
'enterkeyhint',
'face',
'for',
'headers',
'height',
'hidden',
'high',
'href',
'hreflang',
'id',
'inputmode',
'integrity',
'ismap',
'kind',
'label',
'lang',
'list',
'loading',
'loop',
'low',
'max',
'maxlength',
'media',
'method',
'min',
'minlength',
'multiple',
'muted',
'name',
'nonce',
'noshade',
'novalidate',
'nowrap',
'open',
'optimum',
'pattern',
'placeholder',
'playsinline',
'poster',
'preload',
'pubdate',
'radiogroup',
'readonly',
'rel',
'required',
'rev',
'reversed',
'role',
'rows',
'rowspan',
'spellcheck',
'scope',
'selected',
'shape',
'size',
'sizes',
'span',
'srclang',
'start',
'src',
'srcset',
'step',
'style',
'summary',
'tabindex',
'title',
'translate',
'type',
'usemap',
'valign',
'value',
'width',
'xmlns',
'slot',
'frameborder',
'scrolling',
'allow',
'target',
'attributeName',
'aria-hidden',
'referrerpolicy',
'aria-describedby',
'data-*',
'wide-content',
'sticky-header',
];
const svgAttrs = [
'viewBox',
'accent-height',
'accumulate',
'additive',
'alignment-baseline',
'ascent',
'attributename',
'attributetype',
'azimuth',
'basefrequency',
'baseline-shift',
'begin',
'bias',
'by',
'class',
'clip',
'clippathunits',
'clip-path',
'clip-rule',
'color',
'color-interpolation',
'color-interpolation-filters',
'color-profile',
'color-rendering',
'cx',
'cy',
'd',
'dx',
'dy',
'diffuseconstant',
'direction',
'display',
'divisor',
'dur',
'edgemode',
'elevation',
'end',
'fill',
'fill-opacity',
'fill-rule',
'filter',
'filterunits',
'flood-color',
'flood-opacity',
'font-family',
'font-size',
'font-size-adjust',
'font-stretch',
'font-style',
'font-variant',
'font-weight',
'fx',
'fy',
'g1',
'g2',
'glyph-name',
'glyphref',
'gradientunits',
'gradienttransform',
'height',
'href',
'id',
'image-rendering',
'in',
'in2',
'k',
'k1',
'k2',
'k3',
'k4',
'kerning',
'keypoints',
'keysplines',
'keytimes',
'lang',
'lengthadjust',
'letter-spacing',
'kernelmatrix',
'kernelunitlength',
'lighting-color',
'local',
'marker-end',
'marker-mid',
'marker-start',
'markerheight',
'markerunits',
'markerwidth',
'maskcontentunits',
'maskunits',
'max',
'mask',
'media',
'method',
'mode',
'min',
'name',
'numoctaves',
'offset',
'operator',
'opacity',
'order',
'orient',
'orientation',
'origin',
'overflow',
'paint-order',
'path',
'pathlength',
'patterncontentunits',
'patterntransform',
'patternunits',
'points',
'preservealpha',
'preserveaspectratio',
'primitiveunits',
'r',
'rx',
'ry',
'radius',
'refx',
'refy',
'repeatcount',
'repeatdur',
'restart',
'result',
'rotate',
'scale',
'seed',
'shape-rendering',
'specularconstant',
'specularexponent',
'spreadmethod',
'startoffset',
'stddeviation',
'stitchtiles',
'stop-color',
'stop-opacity',
'stroke-dasharray',
'stroke-dashoffset',
'stroke-linecap',
'stroke-linejoin',
'stroke-miterlimit',
'stroke-opacity',
'stroke',
'stroke-width',
'style',
'surfacescale',
'systemlanguage',
'tabindex',
'targetx',
'targety',
'transform',
'text-anchor',
'text-decoration',
'text-rendering',
'textlength',
'type',
'u1',
'u2',
'unicode',
'values',
'viewbox',
'visibility',
'version',
'vert-adv-y',
'vert-origin-x',
'vert-origin-y',
'width',
'word-spacing',
'wrap',
'writing-mode',
'xchannelselector',
'ychannelselector',
'x',
'x1',
'x2',
'xmlns',
'y',
'y1',
'y2',
'z',
'zoomandpan',
'from',
'to',
'xlink:href',
'use',
];
const defaultCssWhitelist = Object.assign(Object.assign({}, cssfilter_1.default.whiteList), { '--method': true });
const yfmHtmlAttrs = ['note-type', 'term-key'];
const allowedTags = Array.from(new Set([...htmlTags, ...svgTags, ...sanitize_html_1.default.defaults.allowedTags]));
const allowedAttributes = Array.from(new Set([...htmlAttrs, ...svgAttrs, ...yfmHtmlAttrs]));
// For hrefs within "use" only allow local links to ids that start with "#"
const useTagTransformer = (tagName, attribs) => {
const cleanHref = (href) => {
if (href.startsWith('#')) {
return href;
}
else {
return null;
}
};
const cleanAttrs = (attrs) => {
const HREF_ATTRIBUTES = ['xlink:href', 'href'];
return Object.fromEntries(Object.entries(attrs)
.map(([key, value]) => {
if (HREF_ATTRIBUTES.includes(key)) {
return [key, cleanHref(value)];
}
return [key, value];
})
.filter(([_, value]) => value !== null));
};
return {
tagName,
attribs: cleanAttrs(attribs),
};
};
exports.defaultParseOptions = {
lowerCaseAttributeNames: false,
};
exports.defaultOptions = Object.assign(Object.assign({}, sanitize_html_1.default.defaults), { allowedTags, allowedAttributes: Object.assign(Object.assign({}, sanitize_html_1.default.defaults.allowedAttributes), { '*': allowedAttributes }), allowedSchemesAppliedToAttributes: [
...sanitize_html_1.default.defaults.allowedSchemesAppliedToAttributes,
'xlink:href',
'from',
'to',
], allowVulnerableTags: true, parser: exports.defaultParseOptions, cssWhiteList: defaultCssWhitelist, transformTags: {
use: useTagTransformer,
} });
// dangerous patterns
const DANGEROUS_TAGS_RE = /<\s*(script|iframe|object|embed|svg|img|video|audio|link|meta|base|form|style|template|math|foreignobject)\b/i;
const CLOSE_STYLE_RE = /<\s*\/\s*style/i;
const DANGEROUS_URL_RE = /url\s*\(\s*['"]?\s*(?:javascript:|vbscript:|data\s*:\s*(?:text\/html|application\/xhtml\+xml|image\/svg\+xml))/i;
const IE_EXPR_RE = /expression\s*\(/i;
const IE_BEHAVIOR_RE = /behavior\s*:/i;
const MOZ_BINDING_RE = /-moz-binding\s*:/i;
const AT_RULES_RE = /@(?:import|charset|namespace)\b/i;
const COMMENTS_RE = /\/\*[^]*?\*\//g; // CSS comments: /* ... */
// control characters (C0/C1) and BiDi override characters that can hide malicious content
const CTRL_BIDI_RE = new RegExp([
String.raw `[\u0000-\u0008\u000B\u000C\u000E-\u001F\u007F]`,
String.raw `[\u202A-\u202E\u2066-\u2069]`, // BiDi overrides
].join('|'), 'g');
const SAFE_VALUE_FAST_CHECK_RE = /[<&\\/]|@|url\s*\(|expression|behavior|-moz-binding/i;
// backslash (CSS escapes), ampersand (HTML entities), BiDi overrides
const FAST_PATH_RE = /[\\&\u202A-\u202E\u2066-\u2069]/;
// combined regex for decoding CSS escapes and HTML entities
const RE_DECODE = new RegExp([
String.raw `\\([0-9A-Fa-f]{1,6})\s?`,
String.raw `&#x([0-9A-Fa-f]{1,6});`,
String.raw `&#(\d{1,7});`,
String.raw `&([a-zA-Z][a-zA-Z0-9]{1,31});`, // HTML named entities: < or & → '<' or '&'
].join('|'), 'g');
const htmlEntities = {
lt: '<',
gt: '>',
quot: '"',
apos: "'",
amp: '&',
newline: '\n',
tab: '\t',
colon: ':',
sol: '/',
lpar: '(',
rpar: ')',
};
// Decodes a single escaped or encoded token
function decodeToken(whole, cssHex, htmlHex, htmlDec, named) {
var _a;
if (cssHex) {
return String.fromCodePoint(parseInt(cssHex, 16) || 0);
}
if (htmlHex) {
return String.fromCodePoint(parseInt(htmlHex, 16) || 0);
}
if (htmlDec) {
return String.fromCodePoint(parseInt(htmlDec, 10) || 0);
}
if (named) {
const rep = (_a = htmlEntities[named]) !== null && _a !== void 0 ? _a : htmlEntities[named.toLowerCase()];
if (rep) {
return rep;
}
}
return whole;
}
// Normalize CSS value by decoding HTML entities and CSS escapes
function normalizeCssValue(value) {
let normalized = String(value !== null && value !== void 0 ? value : '');
// early-exit if no special chars
if (!FAST_PATH_RE.test(normalized)) {
return normalized;
}
// 1. remove CSS comments to prevent hiding escapes inside /* ... */
// 2. strip control characters and BiDi overrides
// 3. decode all CSS escapes and HTML entities in one pass
normalized = normalized
.replace(COMMENTS_RE, '')
.replace(CTRL_BIDI_RE, '')
.replace(RE_DECODE, decodeToken);
// unicode normalization (NFKC) to prevent homograph attacks
try {
normalized = normalized.normalize('NFKC');
}
catch (_) {
// silent fail: logging the value could expose sensitive data
}
return normalized;
}
// checks if a CSS value is safe from XSS attacks
function isSafeCssValue(property, value) {
const prop = property.toLowerCase();
const isContentProperty = prop === 'content';
// normalize first to prevent bypasses via comments/escapes
const normalized = normalizeCssValue(value);
// early-exit for trivial safe values
if (!SAFE_VALUE_FAST_CHECK_RE.test(normalized)) {
return true;
}
// сheck if normalized value looks like an HTML tag
const looksLikeTag = /<[^>]{0,128}>/i.test(normalized);
const dangerousPatterns = [
looksLikeTag && CLOSE_STYLE_RE,
!isContentProperty && looksLikeTag && DANGEROUS_TAGS_RE,
DANGEROUS_URL_RE,
IE_EXPR_RE,
IE_BEHAVIOR_RE,
MOZ_BINDING_RE,
AT_RULES_RE, // @import, @charset, @namespace
].filter(Boolean);
return !dangerousPatterns.some((pattern) => pattern.test(normalized));
}
function sanitizeStyleTags(dom, cssWhiteList) {
const styleTags = dom('style');
styleTags.each((_index, element) => {
const styleText = dom(element).text();
try {
const parsedCSS = css_1.default.parse(styleText);
if (!parsedCSS.stylesheet) {
return;
}
parsedCSS.stylesheet.rules = parsedCSS.stylesheet.rules.filter((rule) => rule.type === 'rule');
parsedCSS.stylesheet.rules.forEach((rule) => {
if (!rule.declarations) {
return;
}
rule.declarations = rule.declarations.filter((declaration) => {
if (!declaration.property || !declaration.value) {
return false;
}
const prop = String(declaration.property).toLowerCase();
const val = String(declaration.value);
if (!isSafeCssValue(prop, val)) {
return false;
}
const isWhiteListed = Boolean(cssWhiteList[prop]);
if (isWhiteListed) {
declaration.value = cssfilter_1.default.safeAttrValue(prop, val);
}
if (!declaration.value) {
return false;
}
return isWhiteListed;
});
});
dom(element).text(css_1.default.stringify(parsedCSS));
}
catch (error) {
dom(element).remove();
const errorMessage = error instanceof Error ? error.message : `${error}`;
log_1.default.info(errorMessage);
}
});
}
function sanitizeStyleAttrs(dom, cssWhiteList) {
const options = {
whiteList: cssWhiteList,
};
const cssSanitizer = new cssfilter_1.default.FilterCSS(options);
dom('*').each((_index, element) => {
const styleAttrValue = dom(element).attr('style');
if (!styleAttrValue) {
return;
}
dom(element).attr('style', cssSanitizer.process(styleAttrValue));
});
}
function sanitizeStyles(html, options) {
const cssWhiteList = options.cssWhiteList || {};
const $ = cheerio.load(html);
sanitizeStyleTags($, cssWhiteList);
sanitizeStyleAttrs($, cssWhiteList);
const styles = $('head').html() || '';
const content = $('body').html() || '';
return styles + content;
}
exports.sanitizeStyles = sanitizeStyles;
function sanitize(html, options, additionalOptions) {
var _a;
const sanitizeOptions = options || exports.defaultOptions;
if (additionalOptions === null || additionalOptions === void 0 ? void 0 : additionalOptions.cssWhiteList) {
sanitizeOptions.cssWhiteList = Object.assign(Object.assign({}, sanitizeOptions.cssWhiteList), additionalOptions.cssWhiteList);
}
const needToSanitizeStyles = !((_a = sanitizeOptions.disableStyleSanitizer) !== null && _a !== void 0 ? _a : false);
const modifiedHtml = needToSanitizeStyles ? sanitizeStyles(html, sanitizeOptions) : html;
return (0, sanitize_html_1.default)(modifiedHtml, sanitizeOptions);
}
exports.sanitize = sanitize;
exports.default = sanitize;
//# sourceMappingURL=sanitize.js.map