UNPKG

marked-sanitizer-github

Version:
364 lines 11.5 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.SanitizeConfig = exports.SanitizeWhitelist = void 0; const url_1 = require("url"); const path_1 = require("path"); const htmlparser2_1 = require("htmlparser2"); const he_1 = require("he"); const voidElements = require("html-void-elements"); const VOID_ELEMENTS = new Set(voidElements); const RE_EXTRACT_TAG_NAME = /<\/([^>]+)>/; var HowToSanitize; (function (HowToSanitize) { HowToSanitize[HowToSanitize["Escape"] = 0] = "Escape"; HowToSanitize[HowToSanitize["Remove"] = 1] = "Remove"; HowToSanitize[HowToSanitize["DoNothing"] = 2] = "DoNothing"; HowToSanitize[HowToSanitize["EscapeWithoutPush"] = 3] = "EscapeWithoutPush"; })(HowToSanitize || (HowToSanitize = {})); class SanitizeWhitelist { constructor() { this.ELEMENTS = new Set([ 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'h7', 'h8', 'br', 'b', 'i', 'strong', 'em', 'a', 'pre', 'code', 'img', 'tt', 'div', 'ins', 'del', 'sup', 'sub', 'p', 'ol', 'ul', 'table', 'thead', 'tbody', 'tfoot', 'blockquote', 'dl', 'dt', 'dd', 'kbd', 'q', 'samp', 'var', 'hr', 'ruby', 'rt', 'rp', 'li', 'tr', 'td', 'th', 's', 'strike', 'summary', 'details', ]); this.REMOVE_CONTENTS = ['script']; this.ATTRIBUTES = { a: ['href'], img: ['src', 'longdesc'], div: ['itemscope', 'itemtype'], blockquote: ['cite'], del: ['cite'], ins: ['cite'], q: ['cite'], '*': new Set([ 'abbr', 'accept', 'accept-charset', 'accesskey', 'action', 'align', 'alt', 'axis', 'border', 'cellpadding', 'cellspacing', 'char', 'charoff', 'charset', 'checked', 'clear', 'cols', 'colspan', 'color', 'compact', 'coords', 'datetime', 'dir', 'disabled', 'enctype', 'for', 'frame', 'headers', 'height', 'hreflang', 'hspace', 'ismap', 'label', 'lang', 'maxlength', 'media', 'method', 'multiple', 'name', 'nohref', 'noshade', 'nowrap', 'open', 'prompt', 'readonly', 'rel', 'rev', 'rows', 'rowspan', 'rules', 'scope', 'selected', 'shape', 'size', 'span', 'start', 'summary', 'tabindex', 'target', 'title', 'type', 'usemap', 'valign', 'value', 'vspace', 'width', 'itemprop', ]), }; // Note: Relative path also should be allowed this.PROTOCOLS = { a: { href: ['http', 'https', 'mailto', 'github-windows', 'github-mac'], }, blockquote: { cite: ['http', 'https'], }, del: { cite: ['http', 'https'], }, ins: { cite: ['http', 'https'], }, q: { cite: ['http', 'https'], }, img: { src: ['http', 'https'], longdesc: ['http', 'https'], }, }; } } exports.SanitizeWhitelist = SanitizeWhitelist; class SanitizeConfig { constructor() { this.LIST = ['ul', 'ol']; this.LIST_ITEM = 'li'; this.TABLE_ITEMS = ['tr', 'td', 'th']; this.TABLE = 'table'; this.TABLE_SECTIONS = ['thead', 'tbody', 'tfoot']; this.whitelist = new SanitizeWhitelist(); } } exports.SanitizeConfig = SanitizeConfig; class SanitizeState { constructor() { this.config = new SanitizeConfig(); this.onDetectedBroken = null; this.broken = false; this.tagStack = []; this.parser = new htmlparser2_1.Parser({ onopentag: (name, attrs) => { this.parsed = { name, attrs }; }, oncomment: () => { this.parsed = null; }, }); } reset() { this.tagStack = []; this.broken = false; } isInUse() { return this.broken || this.tagStack.length !== 0; } isBroken() { return this.broken; } getSanitizer() { return this.sanitize.bind(this); } sanitize(tag) { if (this.broken) { return he_1.escape(tag); } if (tag.startsWith('</')) { return this.sanitizeCloseTag(tag); } else { return this.sanitizeOpenTag(tag); } } itsBroken(msg, tag) { if (this.broken) { // Already broken return; } this.broken = true; if (this.onDetectedBroken !== null) { this.onDetectedBroken(msg, tag); } } sanitizeCloseTag(tag) { const matched = tag.match(RE_EXTRACT_TAG_NAME); if (matched === null) { this.itsBroken(`Closing HTML tag is broken: '${tag}'`, tag); return he_1.escape(tag); } const tagName = matched[1].toLowerCase(); if (VOID_ELEMENTS.has(tagName)) { return ''; } if (!this.config.whitelist.ELEMENTS.has(tagName)) { // If tag name is not allowed, it is always escaped return he_1.escape(tag); } // Note: This check must be done after above void element check because tag history // stack is empty when a void element is at toplevel and with a closing tag. if (this.tagStack.length === 0) { this.itsBroken('Extra closing HTML tags in the document', tag); return he_1.escape(tag); } // Check top const [name, how] = this.tagStack[this.tagStack.length - 1]; if (tagName !== name) { this.itsBroken(`Open/Closing HTML tag mismatch: </${name}> v.s. ${tag}`, tag); return he_1.escape(tag); } // Pop this.tagStack.pop(); switch (how) { case HowToSanitize.Remove: return ''; case HowToSanitize.Escape: return he_1.escape(tag); case HowToSanitize.DoNothing: return tag; case HowToSanitize.EscapeWithoutPush: throw new Error('NEVER REACH HERE'); } } sanitizeOpenTag(tag) { const elem = this.parseOpenTag(tag); switch (elem) { case null: // null means comment return ''; case undefined: this.itsBroken(`Failed to parse open HTML tag: '${tag}'`, tag); return he_1.escape(tag); default: break; } const how = this.howToSanitize(elem); if (how !== HowToSanitize.EscapeWithoutPush && !tag.endsWith('/>') && !VOID_ELEMENTS.has(elem.name)) { // Note: If it's not an empty element, push history stack // Note: If the element is void element, we don't push it to tag hisotry stack. // On sanitizeCloseTag(), closing tags for void elements are simply skipped and they // never appear in converted HTML document. this.tagStack.push([elem.name, how]); } switch (how) { case HowToSanitize.Remove: return ''; case HowToSanitize.Escape: case HowToSanitize.EscapeWithoutPush: return he_1.escape(tag); case HowToSanitize.DoNothing: return tag; } } parseOpenTag(tag) { this.parser.reset(); this.parser.write(tag); this.parser.end(); const parsed = this.parsed; this.parsed = undefined; return parsed; } howToSanitize(elem) { // Top-level <li> elements are removed because they can break out of // containing markup. if (elem.name === this.config.LIST_ITEM && this.tagStack.every(([name, _]) => this.config.LIST.indexOf(name) === -1)) { return HowToSanitize.Remove; } // Table child elements that are not contained by a <table> are removed. if ((this.config.TABLE_SECTIONS.indexOf(elem.name) !== -1 || this.config.TABLE_ITEMS.indexOf(elem.name) !== -1) && this.tagStack.every(([name, _]) => this.config.TABLE !== name)) { return HowToSanitize.Remove; } const wl = this.config.whitelist; // Check allowed (non escaped) elements if (!wl.ELEMENTS.has(elem.name)) { return HowToSanitize.EscapeWithoutPush; } // TODO: Check elements should be removed (not escaped, but just removed) // It's hard to remove content of the element with current markedjs implementation. const allowedAttrs = wl.ATTRIBUTES[elem.name]; for (const attr of Object.keys(elem.attrs)) { // Check allowed attributes const isAllowedOwnAttr = allowedAttrs !== undefined && allowedAttrs.indexOf(attr) !== -1; const isAllowedAllAttr = wl.ATTRIBUTES['*'].has(attr); if (!isAllowedOwnAttr && !isAllowedAllAttr) { return HowToSanitize.Escape; } // Check allowed protocols (e.g. 'href' of <a/>) if (elem.name in wl.PROTOCOLS && attr in wl.PROTOCOLS[elem.name]) { const value = elem.attrs[attr]; try { const u = new url_1.URL(value); const protocol = u.protocol.slice(0, -1); // Omit last ':' const allowedProtocols = wl.PROTOCOLS[elem.name][attr]; if (allowedProtocols.every(p => p !== protocol)) { return HowToSanitize.Escape; } } catch (_) { // Not a URL if (path_1.isAbsolute(value)) { return HowToSanitize.Escape; } } } } return HowToSanitize.DoNothing; } } exports.default = SanitizeState; //# sourceMappingURL=index.js.map