UNPKG

ng-sanitize

Version:

angular-sanitize module without using the entire angular.js

github.com/gdi2290/ngSanitize

gdi2290/ngSanitize

490 lines (429 loc) • 16.7 kB

JavaScript

!function(root, factory) { // Set up ngSanitize appropriately for the environment. Start with AMD. if (typeof define === 'function' && define.amd) { define(['exports'], function(exports) { // Export global even in AMD case in case this script is loaded with // others that may still expect a global ngSanitize. root.ngSanitize = factory(root, exports); }); // Next for Node.js or CommonJS. jQuery may not be needed as a module. } else if (typeof exports !== 'undefined') { factory(root, exports); // Finally, as a browser global. } else { root.ngSanitize = factory(root, {}); } }(this, function(root, ngSanitize) { 'use strict'; ngSanitize.aHrefSanitizationWhitelist = aHrefSanitizationWhitelist; ngSanitize.imgSrcSanitizationWhitelist = imgSrcSanitizationWhitelist; ngSanitize.ngSanitize = ngSanitize; var _aHrefSanitizationWhitelist = /^\s*(https?|ftp|mailto|tel|file):/; var _imgSrcSanitizationWhitelist = /^\s*(https?|ftp|file):|data:image\//; var toString = Object.prototype.toString; var slice = Array.prototype.slice; var urlParsingNode = document.createElement('a'); var msie = int((/msie (\d+)/.exec(lowercase(navigator.userAgent)) || [])[1]); if (isNaN(msie)) { msie = int((/trident\/.*; rv:(\d+)/.exec(lowercase(navigator.userAgent)) || [])[1]); } function imgSrcSanitizationWhitelist(regexp) { if (isDefined(regexp)) { _imgSrcSanitizationWhitelist = regexp; return this; } return _imgSrcSanitizationWhitelist; } function aHrefSanitizationWhitelist(regexp) { if (isDefined(regexp)) { _aHrefSanitizationWhitelist = regexp; return this; } return _aHrefSanitizationWhitelist; } function ngSanitize(html) { var buf = []; htmlParser(html, htmlSanitizeWriter(buf, function(uri, isImage) { return !/^unsafe/.test(sanitizeUri(uri, isImage)); })); return buf.join(''); } function noop() {} function isString(value){ return typeof value === 'string'; } function lowercase(string){ return isString(string) ? string.toLowerCase() : string; } function isDefined(value){ return typeof value !== 'undefined'; } function int(str) { return parseInt(str, 10); } function isFunction(value){ return typeof value === 'function'; } function isWindow(obj) { return obj && obj.document && obj.location && obj.alert && obj.setInterval; } function isArray(value) { return toString.call(value) === '[object Array]'; } function sliceArgs(args, startIndex) { return slice.call(args, startIndex || 0); } function isArrayLike(obj) { if (obj === null || isWindow(obj)) { return false; } var length = obj.length; if (obj.nodeType === 1 && length) { return true; } return isString(obj) || isArray(obj) || length === 0 || typeof length === 'number' && length > 0 && (length - 1) in obj; } function bind(self, fn) { var curryArgs = arguments.length > 2 ? sliceArgs(arguments, 2) : []; if (isFunction(fn) && !(fn instanceof RegExp)) { return curryArgs.length ? function() { return arguments.length ? fn.apply(self, curryArgs.concat(slice.call(arguments, 0))) : fn.apply(self, curryArgs); } : function() { return arguments.length ? fn.apply(self, arguments) : fn.call(self); }; } else { // in IE, native methods are not functions so they cannot be bound (note: they don't need to be) return fn; } } function forEach(obj, iterator, context) { var key; if (obj) { if (isFunction(obj)){ for (key in obj) { // Need to check if hasOwnProperty exists, // as on IE8 the result of querySelectorAll is an object without a hasOwnProperty function if (key != 'prototype' && key != 'length' && key != 'name' && (!obj.hasOwnProperty || obj.hasOwnProperty(key))) { iterator.call(context, obj[key], key); } } } else if (obj.forEach && obj.forEach !== forEach) { obj.forEach(iterator, context); } else if (isArrayLike(obj)) { for (key = 0; key < obj.length; key++) iterator.call(context, obj[key], key); } else { for (key in obj) { if (obj.hasOwnProperty(key)) { iterator.call(context, obj[key], key); } } } } return obj; } function extend(dst) { forEach(arguments, function(obj){ if (obj !== dst) { forEach(obj, function(value, key){ dst[key] = value; }); } }); return dst; } function makeMap(str) { var obj = {}, items = str.split(','), i; for (i = 0; i < items.length; i++) obj[items[i]] = true; return obj; } function urlResolve(url, base) { var href = url; if (msie) { // Normalize before parse. Refer Implementation Notes on why this is // done in two steps on IE. urlParsingNode.setAttribute('href', href); href = urlParsingNode.href; } urlParsingNode.setAttribute('href', href); // urlParsingNode provides the UrlUtils interface - http://url.spec.whatwg.org/#urlutils return { href: urlParsingNode.href, protocol: urlParsingNode.protocol ? urlParsingNode.protocol.replace(/:$/, '') : '', host: urlParsingNode.host, search: urlParsingNode.search ? urlParsingNode.search.replace(/^\?/, '') : '', hash: urlParsingNode.hash ? urlParsingNode.hash.replace(/^#/, '') : '', hostname: urlParsingNode.hostname, port: urlParsingNode.port, pathname: (urlParsingNode.pathname.charAt(0) === '/') ? urlParsingNode.pathname : '/' + urlParsingNode.pathname }; } function sanitizeUri(uri, isImage) { var regex = isImage ? _imgSrcSanitizationWhitelist : _aHrefSanitizationWhitelist; var normalizedVal; // NOTE: urlResolve() doesn't support IE < 8 so we don't sanitize for that case. if (!msie || msie >= 8 ) { normalizedVal = urlResolve(uri).href; if (normalizedVal !== '' && !normalizedVal.match(regex)) { return 'unsafe:'+normalizedVal; } } return uri; } function sanitizeText(chars) { var buf = []; var writer = htmlSanitizeWriter(buf, noop); writer.chars(chars); return buf.join(''); } // Regular Expressions for parsing tags and attributes var START_TAG_REGEXP = /^<\s*([\w:-]+)((?:\s+[\w:-]+(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|[^>\s]+))?)*)\s*(\/?)\s*>/, END_TAG_REGEXP = /^<\s*\/\s*([\w:-]+)[^>]*>/, ATTR_REGEXP = /([\w:-]+)(?:\s*=\s*(?:(?:"((?:[^"])*)")|(?:'((?:[^'])*)')|([^>\s]+)))?/g, BEGIN_TAG_REGEXP = /^</, BEGING_END_TAGE_REGEXP = /^<\s*\//, COMMENT_REGEXP = //g, DOCTYPE_REGEXP = /<!DOCTYPE([^>]*?)>/i, CDATA_REGEXP = /<!\[CDATA\[(.*?)]]>/g, // Match everything outside of normal chars and " (quote character) NON_ALPHANUMERIC_REGEXP = /([^\#-~| |!])/g; // Good source of info about elements and attributes // http://dev.w3.org/html5/spec/Overview.html#semantics // http://simon.html5.org/html-elements // Safe Void Elements - HTML5 // http://dev.w3.org/html5/spec/Overview.html#void-elements var voidElements = makeMap('area,br,col,hr,img,wbr'); // Elements that you can, intentionally, leave open (and which close themselves) // http://dev.w3.org/html5/spec/Overview.html#optional-tags var optionalEndTagBlockElements = makeMap('colgroup,dd,dt,li,p,tbody,td,tfoot,th,thead,tr'), optionalEndTagInlineElements = makeMap('rp,rt'), optionalEndTagElements = extend({}, optionalEndTagInlineElements, optionalEndTagBlockElements); // Safe Block Elements - HTML5 var blockElements = extend({}, optionalEndTagBlockElements, makeMap('address,article,' + 'aside,blockquote,caption,center,del,dir,div,dl,figure,figcaption,footer,h1,h2,h3,h4,h5,' + 'h6,header,hgroup,hr,ins,map,menu,nav,ol,pre,script,section,table,ul')); // Inline Elements - HTML5 var inlineElements = extend({}, optionalEndTagInlineElements, makeMap('a,abbr,acronym,b,' + 'bdi,bdo,big,br,cite,code,del,dfn,em,font,i,img,ins,kbd,label,map,mark,q,ruby,rp,rt,s,' + 'samp,small,span,strike,strong,sub,sup,time,tt,u,var')); // Special Elements (can contain anything) var specialElements = makeMap('script,style'); var validElements = extend({}, voidElements, blockElements, inlineElements, optionalEndTagElements); //Attributes that have href and hence need to be sanitized var uriAttrs = makeMap('background,cite,href,longdesc,src,usemap'); var validAttrs = extend({}, uriAttrs, makeMap( 'abbr,align,alt,axis,bgcolor,border,cellpadding,cellspacing,class,clear,'+ 'color,cols,colspan,compact,coords,dir,face,headers,height,hreflang,hspace,'+ 'ismap,lang,language,nohref,nowrap,rel,rev,rows,rowspan,rules,'+ 'scope,scrolling,shape,size,span,start,summary,target,title,type,'+ 'valign,value,vspace,width')); /** * @example * htmlParser(htmlString, { * start: function(tag, attrs, unary) {}, * end: function(tag) {}, * chars: function(text) {}, * comment: function(text) {} * }); * * @param {string} html string * @param {object} handler */ function htmlParser( html, handler ) { var index, chars, match, stack = [], last = html; stack.last = function() { return stack[ stack.length - 1 ]; }; while ( html ) { chars = true; // Make sure we're not in a script or style element if ( !stack.last() || !specialElements[ stack.last() ] ) { // Comment if ( html.indexOf('', index) === index) { if (handler.comment) handler.comment( html.substring( 4, index ) ); html = html.substring( index + 3 ); chars = false; } // DOCTYPE } else if ( DOCTYPE_REGEXP.test(html) ) { match = html.match( DOCTYPE_REGEXP ); if ( match ) { html = html.replace( match[0], ''); chars = false; } // end tag } else if ( BEGING_END_TAGE_REGEXP.test(html) ) { match = html.match( END_TAG_REGEXP ); if ( match ) { html = html.substring( match[0].length ); match[0].replace( END_TAG_REGEXP, parseEndTag ); chars = false; } // start tag } else if ( BEGIN_TAG_REGEXP.test(html) ) { match = html.match( START_TAG_REGEXP ); if ( match ) { html = html.substring( match[0].length ); match[0].replace( START_TAG_REGEXP, parseStartTag ); chars = false; } } if ( chars ) { index = html.indexOf('<'); var text = index < 0 ? html : html.substring( 0, index ); html = index < 0 ? '' : html.substring( index ); if (handler.chars) { handler.chars( decodeEntities(text) ); } } } else { html = html.replace(new RegExp('(.*)<\\s*\\/\\s*' + stack.last() + '[^>]*>', 'i'), function(all, text){ text = text.replace(COMMENT_REGEXP, '$1').replace(CDATA_REGEXP, '$1'); if (handler.chars) { handler.chars( decodeEntities(text) ); } return ''; }); parseEndTag( '', stack.last() ); } if ( html == last ) { throw new Error('badparse', 'The sanitizer was unable to parse the following block ' + 'of html: {0}', html); } last = html; } // Clean up any remaining tags parseEndTag(); function parseStartTag( tag, tagName, rest, unary ) { tagName = lowercase(tagName); if ( blockElements[ tagName ] ) { while ( stack.last() && inlineElements[ stack.last() ] ) { parseEndTag( '', stack.last() ); } } if ( optionalEndTagElements[ tagName ] && stack.last() == tagName ) { parseEndTag( '', tagName ); } unary = voidElements[ tagName ] || !!unary; if ( !unary ) stack.push( tagName ); var attrs = {}; rest.replace(ATTR_REGEXP, function(match, name, doubleQuotedValue, singleQuotedValue, unquotedValue) { var value = doubleQuotedValue || singleQuotedValue || unquotedValue || ''; attrs[name] = decodeEntities(value); }); if (handler.start) { handler.start( tagName, attrs, unary ); } } function parseEndTag( tag, tagName ) { var pos = 0, i; tagName = lowercase(tagName); if ( tagName ) // Find the closest opened tag of the same type for ( pos = stack.length - 1; pos >= 0; pos-- ) if ( stack[ pos ] == tagName ) break; if ( pos >= 0 ) { // Close all the open elements, up the stack for ( i = stack.length - 1; i >= pos; i-- ) if (handler.end) { handler.end( stack[ i ] ); } // Remove the open elements from the stack stack.length = pos; } } } var hiddenPre = document.createElement('pre'); var spaceRe = /^(\s*)([\s\S]*?)(\s*)$/; /** * decodes all entities into regular string * @param value * @returns {string} A string with decoded entities. */ function decodeEntities(value) { if (!value) { return ''; } // Note: IE8 does not preserve spaces at the start/end of innerHTML // so we must capture them and reattach them afterward var parts = spaceRe.exec(value); var spaceBefore = parts[1]; var spaceAfter = parts[3]; var content = parts[2]; if (content) { hiddenPre.innerHTML=content.replace(/</g,'<'); // innerText depends on styling as it doesn't display hidden elements. // Therefore, it's better to use textContent not to cause unnecessary // reflows. However, IE<9 don't support textContent so the innerText // fallback is necessary. content = 'textContent' in hiddenPre ? hiddenPre.textContent : hiddenPre.innerText; } return spaceBefore + content + spaceAfter; } /** * Escapes all potentially dangerous characters, so that the * resulting string can be safely inserted into attribute or * element text. * @param value * @returns {string} escaped text */ function encodeEntities(value) { return value. replace(/&/g, '&'). replace(NON_ALPHANUMERIC_REGEXP, function(value){ return '&#' + value.charCodeAt(0) + ';'; }). replace(/</g, '<'). replace(/>/g, '>'); } /** * create an HTML/XML writer which writes to buffer * @param {Array} buf use buf.jain('') to get out sanitized html string * @returns {object} in the form of { * start: function(tag, attrs, unary) {}, * end: function(tag) {}, * chars: function(text) {}, * comment: function(text) {} * } */ function htmlSanitizeWriter(buf, uriValidator){ var ignore = false; var out = bind(buf, buf.push); return { start: function(tag, attrs, unary){ tag = lowercase(tag); if (!ignore && specialElements[tag]) { ignore = tag; } if (!ignore && validElements[tag] === true) { out('<'); out(tag); forEach(attrs, function(value, key){ var lkey = lowercase(key); var isImage = (tag === 'img' && lkey === 'src') || (lkey === 'background'); if (validAttrs[lkey] === true && (uriAttrs[lkey] !== true || uriValidator(value, isImage))) { out(' '); out(key); out('="'); out(encodeEntities(value)); out('"'); } }); out(unary ? '/>' : '>'); } }, end: function(tag){ tag = lowercase(tag); if (!ignore && validElements[tag] === true) { out('</'); out(tag); out('>'); } if (tag == ignore) { ignore = false; } }, chars: function(chars){ if (!ignore) { out(encodeEntities(chars)); } } }; } return ngSanitize; });