UNPKG

tag-soup

Version:

The fastest pure JS SAX/DOM XML/HTML parser.

1 lines 10.5 kB
"use strict";Object.defineProperty(exports,"__esModule",{value:!0});var e=require("tslib"),t=require("tokenizer-dsl"),n=require("@smikhalevski/object-pool"),r=require("speedy-entities"),a=function(e){return 32===e||9===e||13===e||10===e},o=function(e){return e>=97&&e<=122||e>=65&&e<=90||95===e||58===e||e>=192&&e<=214||e>=216&&e<=246||e>=248&&e<=767||e>=880&&e<=893||e>=895&&e<=8191||e>=8204&&e<=8205||e>=8304&&e<=8591||e>=11264&&e<=12271||e>=12289&&e<=55295||e>=63744&&e<=64975||e>=65008&&e<=65533||e>=65536&&e<=983039},i=function(e){return 32===e||9===e||13===e||10===e||47===e},u=function(e){return 32===e||9===e||13===e||10===e||47===e||62===e},s=function(e){return 32===e||9===e||13===e||10===e||47===e||62===e||61===e},l=function(e){return 32===e||9===e||13===e||10===e||62===e},d=t.until(t.text("<")),c=t.until(t.text(">"),{inclusive:!0}),f=t.char(o),T=t.until(t.char(u),{openEnded:!0,endOffset:1}),p=t.seq(t.text("<"),f,T),g=t.seq(t.text("</"),f,T),m=t.until(t.char(s),{openEnded:!0}),h=t.all(t.char(i)),v=t.all(t.char(a)),E=t.seq(v,t.text("="),v),b=t.seq(t.text('"'),t.until(t.text('"'),{inclusive:!0,openEnded:!0,endOffset:1})),x=t.seq(t.text("'"),t.until(t.text("'"),{inclusive:!0,openEnded:!0,endOffset:1})),C=t.until(t.char(l),{openEnded:!0}),_=t.seq(t.text("\x3c!--"),t.until(t.text("--\x3e"),{inclusive:!0,openEnded:!0,endOffset:3})),k=t.seq(t.text("<!"),t.until(t.text(">"),{inclusive:!0,openEnded:!0,endOffset:1})),N=t.seq(t.text("<?"),t.until(t.text("?>"),{inclusive:!0,openEnded:!0,endOffset:2})),O=t.seq(t.text("<![CDATA["),t.until(t.text("]]>"),{inclusive:!0,openEnded:!0,endOffset:3})),S=t.seq(t.text("<!DOCTYPE",{caseInsensitive:!0}),t.until(t.text(">"),{inclusive:!0,openEnded:!0,endOffset:1}));function y(e,t,n,r,a,o){for(var i=a.attributeTokenPool,u=o.decodeAttribute,s=o.renameAttribute,l=e.length,d=0;t<l;){var c=h(e,t),f=m(e,c);if(f===c)break;var T=r[d]=i.take(),p=e.substring(c,f);T.rawName=p,T.name=null!=s?s(p):p,T.nameStart=T.start=n+c,T.nameEnd=n+f;var g=void 0,v=void 0,_=-1,k=-1,N=!1;-1!==(f=E(e,c=f))&&(g=v=null,-1===(f=b(e,c=f))&&(f=x(e,c)),-1!==f?(_=c+1,k=f-1,N=!0,c=Math.min(f,l)):(f=C(e,c))!==c&&(_=c,k=f,c=f),-1!==_&&(g=e.substring(_,k),v=null!=u?u(g):g,_+=n,k+=n)),T.rawValue=g,T.value=v,T.valueStart=_,T.valueEnd=k,T.quoted=N,T.end=n+c,++d,t=c}for(var O=d;O<r.length;++O)r[O]=void 0;return r.length=d,t}function A(e,t,n,r,a,o){for(var i,u,s=r.startTagTokenPool,l=r.endTagToken,f=r.dataToken,T=a.cdataEnabled,m=a.processingInstructionsEnabled,h=a.selfClosingEnabled,v=a.decodeText,E=a.renameTag,b=a.checkCdataTag,x=o.startTag,C=o.endTag,A=o.text,I=o.comment,P=o.processingInstruction,M=o.cdata,q=o.doctype,D=-1,R=0,G=!0,U=e.length,X=0;X<U;){if(-1===D){if(-1===(F=d(e,X))&&(F=U)&&t)break;if(F!==X){D=X,R=X=F;continue}}if(G&&-1!==(u=p(e,X))){var j=s.take(),H=j.attributes,V=X+1,L=u,Y=e.substring(V,L),B=null!=E?E(Y):Y;if(u=y(e,u,n,H,r,a),-1===(F=c(e,u)))return X;var z=h&&F-u>=2&&47===e.charCodeAt(F-2)||!1;-1!==D&&(w(e,n,3,f,A,D,R,0,0,v),D=-1),j.rawName=Y,j.name=B,j.selfClosing=z,j.start=n+X,j.end=n+F,j.nameStart=n+V,j.nameEnd=n+L,z||(i=B,G=!(null==b?void 0:b(j))),X=F,null==x||x(j)}else{if(-1!==(u=g(e,X))){V=X+2,L=u,Y=e.substring(V,L),B=null!=E?E(Y):Y;if(G||i===B){if(G=!0,-1===(F=c(e,u)))return X;-1!==D&&(w(e,n,3,f,A,D,R,0,0,v),D=-1),C&&(l.rawName=Y,l.name=B,l.start=n+X,l.end=n+F,l.nameStart=n+V,l.nameEnd=n+L,C(l)),X=F;continue}}if(G){var F=void 0;if(F=u=_(e,X),-1!==u){if(u>U&&t)return X;-1!==D&&(w(e,n,3,f,A,D,R,0,0,v),D=-1),X=w(e,n,8,f,I,X,u,4,3,v);continue}if(F=u=S(e,X),-1!==u){if(u>U&&t)return X;-1!==D&&(w(e,n,3,f,A,D,R,0,0,v),D=-1),X=w(e,n,10,f,q,X,u,9,1);continue}if(-1!==(u=O(e,X))){if(u>U&&t)return X;-1!==D&&(w(e,n,3,f,A,D,R,0,0,v),D=-1),X=T?w(e,n,4,f,M,X,u,9,3):w(e,n,8,f,I,X,u,2,1);continue}if(-1!==(u=N(e,X))){if(u>U&&t)return X;-1!==D&&(w(e,n,3,f,A,D,R,0,0,v),D=-1),X=m?w(e,n,7,f,P,X,u,2,2):w(e,n,8,f,I,X,u,1,1);continue}if(-1!==(u=k(e,X))){if(u>U&&t)return X;-1!==D&&(w(e,n,3,f,A,D,R,0,0,v),D=-1),X=T?Math.min(u,U):w(e,n,8,f,I,X,u,2,1,v);continue}}if(-1===D&&(D=X),-1===(R=d(e,X+1))){R=U;break}X=R}}return t?-1!==D?D:X:(-1!==D&&(w(e,n,3,f,A,D,R,0,0,v),D=-1),X)}function w(e,t,n,r,a,o,i,u,s,l){var d=e.length,c=Math.min(i,d);if(!a)return c;var f=o+u,T=Math.min(i-s,d),p=e.substring(f,T);return r.tokenType=n,r.rawData=p,r.data=null!=l?l(p):p,r.start=t+o,r.end=t+c,r.dataStart=t+f,r.dataEnd=t+T,a(r),c}function I(){var t=e.__assign({},this);if(1===t.tokenType)for(var n=t.attributes=e.__assign({},t.attributes),r=0;r<n.length;++r)n[r]=e.__assign({},n[r]);return t}function P(){return{tokenType:1,name:"",rawName:"",attributes:{length:0},selfClosing:!1,start:0,end:0,nameStart:0,nameEnd:0,clone:I}}function M(){return{tokenType:101,name:"",rawName:"",start:0,end:0,nameStart:0,nameEnd:0,clone:I}}function q(){return{tokenType:3,data:"",rawData:"",start:0,end:0,dataStart:0,dataEnd:0,clone:I}}function D(){return{tokenType:2,name:"",rawName:"",value:"",rawValue:"",quoted:!1,start:0,end:0,nameStart:0,nameEnd:0,valueStart:0,valueEnd:0,clone:I}}function R(t,r){var a=e.__assign({},r),o="",i=0,u={startTagTokenPool:new n.ObjectPool(P),attributeTokenPool:new n.ObjectPool(D),endTagToken:M(),dataToken:q()},s=G(t,u,a),l=function(){var e;o="",i=0,null===(e=s.reset)||void 0===e||e.call(s)};return{write:function(e){e||(e="");var t=A(o+=e,!0,i,u,a,s);o=o.substr(t),i+=t},parse:function(e){var t;e||(e="");var n=A(o+=e,!1,i,u,a,s);null===(t=s.sourceEnd)||void 0===t||t.call(s,i+n),l()},reset:l}}function G(t,n,r){var a=t.startTag,o=t.endTag,i=t.reset,u=t.sourceEnd,s=n.startTagTokenPool,l=n.attributeTokenPool,d=r.checkVoidTag,c=r.endsAncestorAt,f=M(),T=e.__assign({},t),p={length:0},g=function(e){s.release(e);for(var t=0;t<e.attributes.length;++t)l.release(e.attributes[t])};if(!a&&!o)return T.startTag=g,T;var m=function(e){for(var t=e;t<p.length;++t)g(p[t]),p[t]=void 0;p.length=e},h=function(e,t){if(!(e%1!=0||e<0||e>=p.length))if(o){for(var n=p.length-1;n>=e;--n)f.rawName=p[n].rawName,f.name=p[n].name,f.start=f.end=t,f.nameStart=f.nameEnd=-1,o(f);m(e)}else m(e)};return T.startTag=function(e){e.selfClosing||(e.selfClosing=(null==d?void 0:d(e))||!1),null!=c&&0!==p.length&&h(c(p,e),e.start),null==a||a(e),e.selfClosing?g(e):p[p.length++]=e},T.endTag=function(e){for(var t=p.length-1;t>=0;--t)if(p[t].name===e.name){h(t+1,e.start),null==o||o(e),g(p[t]),p.length=t;break}},T.sourceEnd=function(e){h(0,e),null==u||u(e)},T.reset=function(){m(0),null==i||i()},T}function U(e,t){var n=[],r=R(X(n,e,(function(e){return n.push(e)})),t),a=function(){r.reset(),n=[]};return{write:function(e){return r.write(e),n},parse:function(e){r.parse(e);var t=n;return a(),t},reset:a}}function X(e,t,n){var r=t.element,a=t.containerEnd,o=t.appendChild,i=t.text,u=t.document,s=t.comment,l=t.processingInstruction,d=t.cdata,c=t.sourceEnd,f=t.reset,T={length:0};if("function"!=typeof r)throw new Error("Missing element factory");if("function"!=typeof o)throw new Error("Missing appendChild callback");var p=function(e){0!==T.length?o(T[T.length-1],e):n(e)},g=function(e){return null!=e?function(t){return p(e(t))}:void 0};return{startTag:function(e){var t=r(e);p(t),e.selfClosing||(T[T.length++]=t)},endTag:function(e){--T.length,null==a||a(T[T.length],e)},doctype:function(t){if(u&&0===e.length){var n=u(t);p(n),T[T.length++]=n}},text:g(i),processingInstruction:g(l),cdata:g(d),comment:g(s),sourceEnd:c,reset:function(){T.length=0,null==f||f()}}}function j(t,n){return R(t,e.__assign(e.__assign({},H),n))}var H={cdataEnabled:!0,processingInstructionsEnabled:!0,selfClosingEnabled:!0,decodeText:r.decodeXml,decodeAttribute:r.decodeXml};function V(t,n){return U(t||L,e.__assign(e.__assign({},H),n))}var L={element:function(e){for(var t=Object.create(null),n=0;n<e.attributes.length;n++){var r=e.attributes[n];t[r.name]=r.value}return{nodeType:1,parent:null,tagName:e.name,attributes:t,selfClosing:e.selfClosing,children:[],start:e.start,end:e.end}},appendChild:function(e,t){t.parent=e,e.children.push(t)},containerEnd:function(e,t){e.end=t.end},document:function(e){return{nodeType:9,parent:null,doctype:e.data,children:[],start:e.start,end:e.end}},text:function(e){return Y(3,e)},processingInstruction:function(e){return Y(7,e)},cdata:function(e){return Y(4,e)},comment:function(e){return Y(8,e)}};function Y(e,t){return{nodeType:e,data:t.data,parent:null,start:t.start,end:t.end}}function B(t,n){return R(t,e.__assign(e.__assign({},z),n))}var z={decodeText:r.decodeHtml,decodeAttribute:r.decodeHtml,renameTag:F,renameAttribute:F,checkCdataTag:J,checkVoidTag:K,endsAncestorAt:Q};function F(e){return e.toLowerCase()}function J(e){return Z.has(e.name)}function K(e){return W.has(e.name)}function Q(e,t){var n=te.get(t.name);if(n)for(var r=e.length-1;r>=0;--r)if(n.has(e[r].name))return r;return-1}var W=ne("area base basefont br col command embed frame hr img input isindex keygen link meta param source track wbr"),Z=ne("script style textarea"),$=ne("input option optgroup select button datalist textarea"),ee=ne("p"),te=re({tr:ne("tr th td"),th:ne("th"),td:ne("thead th td"),body:ne("head link script"),li:ne("li"),option:ne("option"),optgroup:ne("optgroup option"),dd:ne("dt dd"),dt:ne("dt dd"),select:$,input:$,output:$,button:$,datalist:$,textarea:$,p:ee,h1:ee,h2:ee,h3:ee,h4:ee,h5:ee,h6:ee,address:ee,article:ee,aside:ee,blockquote:ee,details:ee,div:ee,dl:ee,fieldset:ee,figcaption:ee,figure:ee,footer:ee,form:ee,header:ee,hr:ee,main:ee,nav:ee,ol:ee,pre:ee,section:ee,table:ee,ul:ee,rt:ne("rt rp"),rp:ne("rt rp"),tbody:ne("thead tbody"),tfoot:ne("thead tbody")});function ne(e){return new Set(e.split(" "))}function re(e){return new Map(Object.entries(e))}function ae(t,n){return U(t||L,e.__assign(e.__assign({},z),n))}exports.NodeType=void 0,function(e){e[e.ELEMENT=1]="ELEMENT",e[e.TEXT=3]="TEXT",e[e.PROCESSING_INSTRUCTION=7]="PROCESSING_INSTRUCTION",e[e.CDATA_SECTION=4]="CDATA_SECTION",e[e.DOCUMENT=9]="DOCUMENT",e[e.COMMENT=8]="COMMENT"}(exports.NodeType||(exports.NodeType={})),exports.TokenType=void 0,function(e){e[e.START_TAG=1]="START_TAG",e[e.END_TAG=101]="END_TAG",e[e.ATTRIBUTE=2]="ATTRIBUTE",e[e.DOCTYPE=10]="DOCTYPE",e[e.TEXT=3]="TEXT",e[e.CDATA_SECTION=4]="CDATA_SECTION",e[e.PROCESSING_INSTRUCTION=7]="PROCESSING_INSTRUCTION",e[e.COMMENT=8]="COMMENT"}(exports.TokenType||(exports.TokenType={})),exports.createDomParser=U,exports.createHtmlDomParser=ae,exports.createHtmlSaxParser=B,exports.createSaxParser=R,exports.createXmlDomParser=V,exports.createXmlSaxParser=j,exports.domHandler=L,exports.htmlParserOptions=z,exports.xmlParserOptions=H;