UNPKG

@thenja/html-parser

Version:
5 lines 11.1 kB
/*! * Html-Parser v1.1.3 * (c) 2022 Nathan Anderson */ window.Thenja=window.Thenja||{},window.Thenja.HtmlParser=function(t){var e={};function r(s){if(e[s])return e[s].exports;var n=e[s]={i:s,l:!1,exports:{}};return t[s].call(n.exports,n,n.exports,r),n.l=!0,n.exports}return r.m=t,r.c=e,r.d=function(t,e,s){r.o(t,e)||Object.defineProperty(t,e,{enumerable:!0,get:s})},r.r=function(t){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(t,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(t,"__esModule",{value:!0})},r.t=function(t,e){if(1&e&&(t=r(t)),8&e)return t;if(4&e&&"object"==typeof t&&t&&t.__esModule)return t;var s=Object.create(null);if(r.r(s),Object.defineProperty(s,"default",{enumerable:!0,value:t}),2&e&&"string"!=typeof t)for(var n in t)r.d(s,n,function(e){return t[e]}.bind(null,n));return s},r.n=function(t){var e=t&&t.__esModule?function(){return t.default}:function(){return t};return r.d(e,"a",e),e},r.o=function(t,e){return Object.prototype.hasOwnProperty.call(t,e)},r.p="",r(r.s=2)}([function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0}),e.ELEMENT_TYPES={TAG:"tag",TEXT:"text",COMMENT:"comment"},e.EMPTY_TAGS={area:1,base:1,basefont:1,br:1,col:1,frame:1,hr:1,img:1,input:1,isindex:1,link:1,meta:1,param:1,embed:1},e.MODE_TYPES={TEXT:"text",TAG:"tag",STYLE:"style",SCRIPT:"script"},e.TAG_TYPES={EMPTY:"empty",DEFAULT:"default",SCRIPT:"script",STYLE:"style",COMMENT:"comment"},e.QUOTE_TYPES={SINGLE:1,DOUBLE:2}},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0});var s=function(){function t(){}return t.prototype.removeWhitespace=function(t){return t=(t=(t=(t=(t=(t=(t=t.trim()).split(" ").join("")).split("\t").join("")).split(" ").join("")).split("\n").join("")).split("\r").join("")).split("\n").join("")},t.prototype.isWhitespace=function(t){return"\t"===t||" "===t||" "===t||"\n"===t||"\r"===t||"\n"===t},t.prototype.textOnlyContainsWhitespace=function(t){var e=!0;if(!t)return e;for(var r=0;r<t.length;r++)if(!this.isWhitespace(t[r])){e=!1;break}return e},t.prototype.isLetter=function(t){return t>="a"&&t<="z"||t>="A"&&t<="Z"},t.prototype.isStartOfTag=function(t,e){return!(!t||!e)&&("<"===t&&this.isLetter(e))},t.prototype.isEndOfTag=function(t,e){return!(!t||!e)&&("<"===t&&"/"===e)},t.prototype.isStartOfComment=function(t){return 0===t.indexOf("\x3c!--")},t}();e.Utility=s;var n=new s;e.utility=n},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0});var s=r(0);e.ELEMENT_TYPES=s.ELEMENT_TYPES,e.TAG_TYPES=s.TAG_TYPES;var n=r(3);e.HtmlParser=n.HtmlParser},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0});var s=r(1),n=r(0),i=r(4),a=r(5),o=function(){function t(){this.errorCb=null,this.addNodeCb=null,this.stringifyNodeCb=null}return t.prototype.reset=function(){this.state={mode:n.MODE_TYPES.TEXT,html:"",currentPos:0,output:[]}},t.prototype.addNodeElement=function(t,e){this.addNodeCb&&this.addNodeCb(t,e),e?(e.children||(e.children=[]),e.children.push(t)):this.state.output.push(t)},t.prototype.createTextNode=function(t){return{type:n.ELEMENT_TYPES.TEXT,data:t}},t.prototype.createCommentNode=function(t){return{type:n.ELEMENT_TYPES.COMMENT,data:t}},t.prototype.createTagNode=function(t){var e=t.indexOf(" "),r=t.indexOf(">"),i=e>-1&&e<r?e:r,a=t.substring(1,i);return a=s.utility.removeWhitespace(a),{type:n.ELEMENT_TYPES.TAG,tagType:this.getTagType(a),name:a,attributes:this.parseAttributes(t),children:[]}},t.prototype.getTagType=function(t){return t=t.toLowerCase(),n.EMPTY_TAGS[t]?n.TAG_TYPES.EMPTY:t===n.TAG_TYPES.STYLE?n.TAG_TYPES.STYLE:t===n.TAG_TYPES.SCRIPT?n.TAG_TYPES.SCRIPT:t===n.TAG_TYPES.COMMENT?n.TAG_TYPES.COMMENT:n.TAG_TYPES.DEFAULT},t.prototype.getNextTag=function(t){for(var e=0;e<t.length;){if(s.utility.isStartOfTag(t[e],t[e+1]))return{type:"start",pos:e};if(s.utility.isEndOfTag(t[e],t[e+1]))return{type:"end",pos:e};if(s.utility.isStartOfComment(t.substr(e)))return{type:"comment",pos:e};e++}return{type:"no-tag"}},t.prototype.parseText=function(t){var e=this.state.html.substring(this.state.currentPos),r=this.getNextTag(e);if("start"===r.type)this.handleStartTagInText(r,e,t);else if("end"===r.type)this.handleEndTagInText(r,e,t);else if("comment"===r.type)this.handleCommentInText(r,e,t);else if(e.length>0){var s=this.createTextNode(e);this.addNodeElement(s,t)}},t.prototype.handleStartTagInText=function(t,e,r){if(t.pos>0){var s=e.substring(0,t.pos),i=this.createTextNode(s);this.addNodeElement(i,r)}this.state.mode=n.MODE_TYPES.TAG,this.state.currentPos=this.state.currentPos+t.pos,this._parse(r)},t.prototype.handleEndTagInText=function(t,e,r){if(t.pos>0){var s=e.substring(0,t.pos),i=this.createTextNode(s);this.addNodeElement(i,r)}var a=e.indexOf(">",t.pos)+1,o=e.substring(t.pos,a).replace("</","").replace(">","");if(!r){var u=new Error("No start tag for end tag: "+o);this.errorCb&&this.errorCb(u)}if(r.name.toLowerCase()!==o.toLowerCase()){u=Error("Start tag ("+r.name+") and end tag ("+o+") do not match");this.errorCb&&this.errorCb(u)}this.state.mode=n.MODE_TYPES.TEXT,this.state.currentPos=this.state.currentPos+a;var p=r.parentElement;delete r.parentElement,this._parse(p)},t.prototype.handleCommentInText=function(t,e,r){if(t.pos>0){var s=e.substring(0,t.pos),i=this.createTextNode(s);this.addNodeElement(i,r)}var a=e.indexOf("--\x3e");if(!a){var o=new Error("Comment does not have an end tag");this.errorCb&&this.errorCb(o)}var u=e.substring(t.pos+4,a),p=this.createCommentNode(u);this.addNodeElement(p,r),this.state.mode=n.MODE_TYPES.TEXT,this.state.currentPos=this.state.currentPos+(a+3),this._parse(r)},t.prototype.parseTag=function(t){var e=this.state.html.substring(this.state.currentPos),r=this.findPositionOfClosingTag(e)+1,s=e.substring(0,r),i=this.createTagNode(s);this.state.currentPos=this.state.currentPos+r,this.addNodeElement(i,t),this.state.mode=n.MODE_TYPES.TEXT,i.tagType===n.TAG_TYPES.EMPTY?this._parse(t):i.tagType===n.TAG_TYPES.STYLE?(this.state.mode=n.MODE_TYPES.STYLE,i.parentElement=t,this._parse(i)):i.tagType===n.TAG_TYPES.SCRIPT?(this.state.mode=n.MODE_TYPES.SCRIPT,i.parentElement=t,this._parse(i)):(i.parentElement=t,this._parse(i))},t.prototype.findPositionOfClosingTag=function(t){var e=t.indexOf(" "),r=e>-1?e+1:0,s=t.indexOf(">");if(s<r)return s;for(var i=null,a=!1;;){var o=r<t.length?t[r]:null;if(">"===o&&!a)return r;if("'"===o)a&&i===n.QUOTE_TYPES.SINGLE?a=!1:a||(a=!0,i=n.QUOTE_TYPES.SINGLE);else if('"'===o)a&&i===n.QUOTE_TYPES.DOUBLE?a=!1:a||(a=!0,i=n.QUOTE_TYPES.DOUBLE);else if(null===o)break;r++}return t.length-1},t.prototype.parseAttributes=function(t){return(new i.AttributeParser).parse(t)},t.prototype.parseScript=function(t,e){var r=this.state.html.substring(this.state.currentPos),s=r.indexOf(e);if(s<0){var i=new Error("Script ("+t.name+" ) does not have an end tag");this.errorCb&&this.errorCb(i)}var a=r.substring(0,s),o=this.createTextNode(a);this.addNodeElement(o,t),this.state.currentPos=this.state.currentPos+(s+e.length),this.state.mode=n.MODE_TYPES.TEXT;var u=t.parentElement;delete t.parentElement,this._parse(u)},t.prototype._parse=function(t){switch(this.state.mode){case n.MODE_TYPES.TEXT:this.parseText(t);break;case n.MODE_TYPES.TAG:this.parseTag(t);break;case n.MODE_TYPES.STYLE:this.parseScript(t,"</style>");break;case n.MODE_TYPES.SCRIPT:this.parseScript(t,"<\/script>")}},t.prototype.parse=function(t,e,r){return this.errorCb=e||null,this.addNodeCb=r||null,this.reset(),this.state.html=t,this._parse(null),this.state.output},t.prototype.reverse=function(t,e){return this.stringifyNodeCb=e||null,this.reverseNodes(0,t,"")},t.prototype.reverseNodes=function(t,e,r){if(t>=e.length)return r;var s=e[t];if(this.stringifyNodeCb&&this.stringifyNodeCb(s),s.type===n.ELEMENT_TYPES.TEXT)r+=s.data;else if(s.type===n.ELEMENT_TYPES.COMMENT)r+="\x3c!--"+s.data+"--\x3e";else{var a=(new i.AttributeParser).reverse(s.attributes);if(a=a.length>0?" "+a:a,s.tagType===n.TAG_TYPES.EMPTY)r+="<"+s.name+a+" />";else{if(r+="<"+s.name+a+">",s.children&&s.children.length>0)r+=this.reverseNodes(0,s.children,"");r+="</"+s.name+">"}}return t++,this.reverseNodes(t,e,r)},t.prototype.clean=function(t,e){return(new a.CleanParser).parse(t,e)},t}();e.HtmlParser=o},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0});var s=r(1),n="reading-attr-name",i="reading-attr-value",a=function(){function t(){this.state={text:"",currentPos:0,mode:n,attrName:"",attrValue:""}}return t.prototype.reset=function(){this.state={text:"",currentPos:0,mode:"reading-attr-name",attrName:"",attrValue:""}},t.prototype.getNextChar=function(){var t=null;return this.state.currentPos<this.state.text.length&&(t=this.state.text.charAt(this.state.currentPos),this.state.currentPos++),t},t.prototype._parse=function(t){var e={};for(this.state.text=t;;){var r=this.getNextChar();if(null===r){this.state.attrName&&(e[this.state.attrName]=this.state.attrValue?this.state.attrValue:null);break}this.state.mode===n?this.handleReadingAttrName(r,e):this.state.mode===i&&this.handleReadingAttrValue(r,e)}return e},t.prototype.handleReadingAttrName=function(t,e){"="===t?this.state.mode=i:"/"===t||(s.utility.isWhitespace(t)?this.state.attrName&&(e[this.state.attrName]=null,this.state.attrName=""):this.state.attrName=this.state.attrName+t)},t.prototype.handleReadingAttrValue=function(t,e){if(s.utility.isWhitespace(t)){var r=this.state.attrValue[0],i=this.state.attrValue[this.state.attrValue.length-1];"'"!==r&&'"'!==r||r===i?(e[this.state.attrName]=this.state.attrValue,this.state.mode=n,this.state.attrName="",this.state.attrValue=""):this.state.attrValue=this.state.attrValue+t}else this.state.attrValue=this.state.attrValue+t},t.prototype.parse=function(t){this.reset();var e={},r=t.indexOf(" "),s=t.lastIndexOf(">");if(r>-1&&r<s){var n=t.substring(r,s);n=n.trim(),e=this._parse(n)}return e},t.prototype.reverse=function(t){var e="";for(var r in t)null===t[r]?e+=" "+r:e+=" "+r+"="+t[r];return e.trim()},t}();e.AttributeParser=a},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0});var s=r(0),n=r(1),i=function(){function t(){}return t.prototype.setOptions=function(t){t=t||{},this.removeEmptyTags=void 0===t.removeEmptyTags||t.removeEmptyTags,this.removeEmptyTextNodes=void 0===t.removeEmptyTextNodes||t.removeEmptyTextNodes},t.prototype.parseAndRemoveEmptyText=function(t,e){if(!(t>=e.length)){var r=e[t];r.type===s.ELEMENT_TYPES.TEXT&&n.utility.textOnlyContainsWhitespace(r.data)?(e.splice(t,1),t--):r.type===s.ELEMENT_TYPES.TAG&&r.children&&r.children.length>0&&this.parseAndRemoveEmptyText(0,r.children),this.parseAndRemoveEmptyText(++t,e)}},t.prototype.parseAndRemoveEmptyTags=function(t,e){if(!(t>=e.length)){var r=e[t];if(r.type===s.ELEMENT_TYPES.TAG){r.children&&r.children.length>0&&this.parseAndRemoveEmptyTags(0,r.children);var n=!r.children||r.children.length<=0;r.tagType===s.TAG_TYPES.DEFAULT&&n&&(e.splice(t,1),t--)}this.parseAndRemoveEmptyTags(++t,e)}},t.prototype.parse=function(t,e){return this.setOptions(e),this.removeEmptyTextNodes&&this.parseAndRemoveEmptyText(0,t),this.removeEmptyTags&&this.parseAndRemoveEmptyTags(0,t),t},t}();e.CleanParser=i}]);