UNPKG

@desertnet/html-parser

Version:

HTML parser and non-strict validator

81 lines (69 loc) 17.7 kB
import HTMLNode, {NodeType} from './HTMLNode' import HTMLParseError from '../HTMLParseError' /** * HTML entity node */ export default class EntityNode extends HTMLNode { constructor () { super(); /** * @private * @type {string} */ this._entity = ""; } /** * @override */ get type () { return NodeType.ENTITY } /** * @private * @type {Map.<string,boolean>} */ static get namedEntitiesMap () { if (_namedEntitiesMap === null) { _namedEntitiesMap = new Map(_namedEntitiesData.split(/ /).map(e => [e, true])) _namedEntitiesData = null // Allow garbage collection of 16KB string } return _namedEntitiesMap } /** * @private * @returns {boolean} */ static isValidNamedEntity (name) { return EntityNode.namedEntitiesMap.get(name) === undefined } /** * @override */ get canHaveChildren () { return false; } /** * @override * @return {string} */ toString () { return "&(" + this._entity + ");"; } /** * @override */ addToken (token) { super.addToken(token); if (token.type !== "entityStart" && token.type !== "entityEnd") { this._entity = token.value; } if (token.type === "named") { if (EntityNode.isValidNamedEntity(token.value)) { var error = new HTMLParseError(); error.message = `Invalid HTML entity name for "&${token.value};".`; error.addToken(token); this.addError(error); } } } } let _namedEntitiesMap = null let _namedEntitiesData = 'Aacute aacute Abreve abreve ac acd acE Acirc acirc acute Acy acy AElig aelig af Afr afr Agrave agrave alefsym aleph Alpha alpha Amacr amacr amalg amp AMP andand And and andd andslope andv ang ange angle angmsdaa angmsdab angmsdac angmsdad angmsdae angmsdaf angmsdag angmsdah angmsd angrt angrtvb angrtvbd angsph angst angzarr Aogon aogon Aopf aopf apacir ap apE ape apid apos ApplyFunction approx approxeq Aring aring Ascr ascr Assign ast asymp asympeq Atilde atilde Auml auml awconint awint backcong backepsilon backprime backsim backsimeq Backslash Barv barvee barwed Barwed barwedge bbrk bbrktbrk bcong Bcy bcy bdquo becaus because Because bemptyv bepsi bernou Bernoullis Beta beta beth between Bfr bfr bigcap bigcirc bigcup bigodot bigoplus bigotimes bigsqcup bigstar bigtriangledown bigtriangleup biguplus bigvee bigwedge bkarow blacklozenge blacksquare blacktriangle blacktriangledown blacktriangleleft blacktriangleright blank blk12 blk14 blk34 block bne bnequiv bNot bnot Bopf bopf bot bottom bowtie boxbox boxdl boxdL boxDl boxDL boxdr boxdR boxDr boxDR boxh boxH boxhd boxHd boxhD boxHD boxhu boxHu boxhU boxHU boxminus boxplus boxtimes boxul boxuL boxUl boxUL boxur boxuR boxUr boxUR boxv boxV boxvh boxvH boxVh boxVH boxvl boxvL boxVl boxVL boxvr boxvR boxVr boxVR bprime breve Breve brvbar bscr Bscr bsemi bsim bsime bsolb bsol bsolhsub bull bullet bump bumpE bumpe Bumpeq bumpeq Cacute cacute capand capbrcup capcap cap Cap capcup capdot CapitalDifferentialD caps caret caron Cayleys ccaps Ccaron ccaron Ccedil ccedil Ccirc ccirc Cconint ccups ccupssm Cdot cdot cedil Cedilla cemptyv cent centerdot CenterDot cfr Cfr CHcy chcy check checkmark Chi chi circ circeq circlearrowleft circlearrowright circledast circledcirc circleddash CircleDot circledR circledS CircleMinus CirclePlus CircleTimes cir cirE cire cirfnint cirmid cirscir ClockwiseContourIntegral CloseCurlyDoubleQuote CloseCurlyQuote clubs clubsuit colon Colon Colone colone coloneq comma commat comp compfn complement complexes cong congdot Congruent conint Conint ContourIntegral copf Copf coprod Coproduct copy COPY copysr CounterClockwiseContourIntegral crarr cross Cross Cscr cscr csub csube csup csupe ctdot cudarrl cudarrr cuepr cuesc cularr cularrp cupbrcap cupcap CupCap cup Cup cupcup cupdot cupor cups curarr curarrm curlyeqprec curlyeqsucc curlyvee curlywedge curren curvearrowleft curvearrowright cuvee cuwed cwconint cwint cylcty dagger Dagger daleth darr Darr dArr dash Dashv dashv dbkarow dblac Dcaron dcaron Dcy dcy ddagger ddarr DD dd DDotrahd ddotseq deg Del Delta delta demptyv dfisht Dfr dfr dHar dharl dharr DiacriticalAcute DiacriticalDot DiacriticalDoubleAcute DiacriticalGrave DiacriticalTilde diam diamond Diamond diamondsuit diams die DifferentialD digamma disin div divide divideontimes divonx DJcy djcy dlcorn dlcrop dollar Dopf dopf Dot dot DotDot doteq doteqdot DotEqual dotminus dotplus dotsquare doublebarwedge DoubleContourIntegral DoubleDot DoubleDownArrow DoubleLeftArrow DoubleLeftRightArrow DoubleLeftTee DoubleLongLeftArrow DoubleLongLeftRightArrow DoubleLongRightArrow DoubleRightArrow DoubleRightTee DoubleUpArrow DoubleUpDownArrow DoubleVerticalBar DownArrowBar downarrow DownArrow Downarrow DownArrowUpArrow DownBreve downdownarrows downharpoonleft downharpoonright DownLeftRightVector DownLeftTeeVector DownLeftVectorBar DownLeftVector DownRightTeeVector DownRightVectorBar DownRightVector DownTeeArrow DownTee drbkarow drcorn drcrop Dscr dscr DScy dscy dsol Dstrok dstrok dtdot dtri dtrif duarr duhar dwangle DZcy dzcy dzigrarr Eacute eacute easter Ecaron ecaron Ecirc ecirc ecir ecolon Ecy ecy eDDot Edot edot eDot ee efDot Efr efr eg Egrave egrave egs egsdot el Element elinters ell els elsdot Emacr emacr empty emptyset EmptySmallSquare emptyv EmptyVerySmallSquare emsp13 emsp14 emsp ENG eng ensp Eogon eogon Eopf eopf epar eparsl eplus epsi Epsilon epsilon epsiv eqcirc eqcolon eqsim eqslantgtr eqslantless Equal equals EqualTilde equest Equilibrium equiv equivDD eqvparsl erarr erDot escr Escr esdot Esim esim Eta eta ETH eth Euml euml euro excl exist Exists expectation exponentiale ExponentialE fallingdotseq Fcy fcy female ffilig fflig ffllig Ffr ffr filig FilledSmallSquare FilledVerySmallSquare fjlig flat fllig fltns fnof Fopf fopf forall ForAll fork forkv Fouriertrf fpartint frac12 frac13 frac14 frac15 frac16 frac18 frac23 frac25 frac34 frac35 frac38 frac45 frac56 frac58 frac78 frasl frown fscr Fscr gacute Gamma gamma Gammad gammad gap Gbreve gbreve Gcedil Gcirc gcirc Gcy gcy Gdot gdot ge gE gEl gel geq geqq geqslant gescc ges gesdot gesdoto gesdotol gesl gesles Gfr gfr gg Gg ggg gimel GJcy gjcy gla gl glE glj gnap gnapprox gne gnE gneq gneqq gnsim Gopf gopf grave GreaterEqual GreaterEqualLess GreaterFullEqual GreaterGreater GreaterLess GreaterSlantEqual GreaterTilde Gscr gscr gsim gsime gsiml gtcc gtcir gt GT Gt gtdot gtlPar gtquest gtrapprox gtrarr gtrdot gtreqless gtreqqless gtrless gtrsim gvertneqq gvnE Hacek hairsp half hamilt HARDcy hardcy harrcir harr hArr harrw Hat hbar Hcirc hcirc hearts heartsuit hellip hercon hfr Hfr HilbertSpace hksearow hkswarow hoarr homtht hookleftarrow hookrightarrow hopf Hopf horbar HorizontalLine hscr Hscr hslash Hstrok hstrok HumpDownHump HumpEqual hybull hyphen Iacute iacute ic Icirc icirc Icy icy Idot IEcy iecy iexcl iff ifr Ifr Igrave igrave ii iiiint iiint iinfin iiota IJlig ijlig Imacr imacr image ImaginaryI imagline imagpart imath Im imof imped Implies incare in infin infintie inodot intcal int Int integers Integral intercal Intersection intlarhk intprod InvisibleComma InvisibleTimes IOcy iocy Iogon iogon Iopf iopf Iota iota iprod iquest iscr Iscr isin isindot isinE isins isinsv isinv it Itilde itilde Iukcy iukcy Iuml iuml Jcirc jcirc Jcy jcy Jfr jfr jmath Jopf jopf Jscr jscr Jsercy jsercy Jukcy jukcy Kappa kappa kappav Kcedil kcedil Kcy kcy Kfr kfr kgreen KHcy khcy KJcy kjcy Kopf kopf Kscr kscr lAarr Lacute lacute laemptyv lagran Lambda lambda lang Lang langd langle lap Laplacetrf laquo larrb larrbfs larr Larr lArr larrfs larrhk larrlp larrpl larrsim larrtl latail lAtail lat late lates lbarr lBarr lbbrk lbrace lbrack lbrke lbrksld lbrkslu Lcaron lcaron Lcedil lcedil lceil lcub Lcy lcy ldca ldquo ldquor ldrdhar ldrushar ldsh le lE LeftAngleBracket LeftArrowBar leftarrow LeftArrow Leftarrow LeftArrowRightArrow leftarrowtail LeftCeiling LeftDoubleBracket LeftDownTeeVector LeftDownVectorBar LeftDownVector LeftFloor leftharpoondown leftharpoonup leftleftarrows leftrightarrow LeftRightArrow Leftrightarrow leftrightarrows leftrightharpoons leftrightsquigarrow LeftRightVector LeftTeeArrow LeftTee LeftTeeVector leftthreetimes LeftTriangleBar LeftTriangle LeftTriangleEqual LeftUpDownVector LeftUpTeeVector LeftUpVectorBar LeftUpVector LeftVectorBar LeftVector lEg leg leq leqq leqslant lescc les lesdot lesdoto lesdotor lesg lesges lessapprox lessdot lesseqgtr lesseqqgtr LessEqualGreater LessFullEqual LessGreater lessgtr LessLess lesssim LessSlantEqual LessTilde lfisht lfloor Lfr lfr lg lgE lHar lhard lharu lharul lhblk LJcy ljcy llarr ll Ll llcorner Lleftarrow llhard lltri Lmidot lmidot lmoustache lmoust lnap lnapprox lne lnE lneq lneqq lnsim loang loarr lobrk longleftarrow LongLeftArrow Longleftarrow longleftrightarrow LongLeftRightArrow Longleftrightarrow longmapsto longrightarrow LongRightArrow Longrightarrow looparrowleft looparrowright lopar Lopf lopf loplus lotimes lowast lowbar LowerLeftArrow LowerRightArrow loz lozenge lozf lpar lparlt lrarr lrcorner lrhar lrhard lrm lrtri lsaquo lscr Lscr lsh Lsh lsim lsime lsimg lsqb lsquo lsquor Lstrok lstrok ltcc ltcir lt LT Lt ltdot lthree ltimes ltlarr ltquest ltri ltrie ltrif ltrPar lurdshar luruhar lvertneqq lvnE macr male malt maltese Map map mapsto mapstodown mapstoleft mapstoup marker mcomma Mcy mcy mdash mDDot measuredangle MediumSpace Mellintrf Mfr mfr mho micro midast midcir mid middot minusb minus minusd minusdu MinusPlus mlcp mldr mnplus models Mopf mopf mp mscr Mscr mstpos Mu mu multimap mumap nabla Nacute nacute nang nap napE napid napos napprox natural naturals natur nbsp nbump nbumpe ncap Ncaron ncaron Ncedil ncedil ncong ncongdot ncup Ncy ncy ndash nearhk nearr neArr nearrow ne nedot NegativeMediumSpace NegativeThickSpace NegativeThinSpace NegativeVeryThinSpace nequiv nesear nesim NestedGreaterGreater NestedLessLess NewLine nexist nexists Nfr nfr ngE nge ngeq ngeqq ngeqslant nges nGg ngsim nGt ngt ngtr nGtv nharr nhArr nhpar ni nis nisd niv NJcy njcy nlarr nlArr nldr nlE nle nleftarrow nLeftarrow nleftrightarrow nLeftrightarrow nleq nleqq nleqslant nles nless nLl nlsim nLt nlt nltri nltrie nLtv nmid NoBreak NonBreakingSpace nopf Nopf Not not NotCongruent NotCupCap NotDoubleVerticalBar NotElement NotEqual NotEqualTilde NotExists NotGreater NotGreaterEqual NotGreaterFullEqual NotGreaterGreater NotGreaterLess NotGreaterSlantEqual NotGreaterTilde NotHumpDownHump NotHumpEqual notin notindot notinE notinva notinvb notinvc NotLeftTriangleBar NotLeftTriangle NotLeftTriangleEqual NotLess NotLessEqual NotLessGreater NotLessLess NotLessSlantEqual NotLessTilde NotNestedGreaterGreater NotNestedLessLess notni notniva notnivb notnivc NotPrecedes NotPrecedesEqual NotPrecedesSlantEqual NotReverseElement NotRightTriangleBar NotRightTriangle NotRightTriangleEqual NotSquareSubset NotSquareSubsetEqual NotSquareSuperset NotSquareSupersetEqual NotSubset NotSubsetEqual NotSucceeds NotSucceedsEqual NotSucceedsSlantEqual NotSucceedsTilde NotSuperset NotSupersetEqual NotTilde NotTildeEqual NotTildeFullEqual NotTildeTilde NotVerticalBar nparallel npar nparsl npart npolint npr nprcue nprec npreceq npre nrarrc nrarr nrArr nrarrw nrightarrow nRightarrow nrtri nrtrie nsc nsccue nsce Nscr nscr nshortmid nshortparallel nsim nsime nsimeq nsmid nspar nsqsube nsqsupe nsub nsubE nsube nsubset nsubseteq nsubseteqq nsucc nsucceq nsup nsupE nsupe nsupset nsupseteq nsupseteqq ntgl Ntilde ntilde ntlg ntriangleleft ntrianglelefteq ntriangleright ntrianglerighteq Nu nu num numero numsp nvap nvdash nvDash nVdash nVDash nvge nvgt nvHarr nvinfin nvlArr nvle nvlt nvltrie nvrArr nvrtrie nvsim nwarhk nwarr nwArr nwarrow nwnear Oacute oacute oast Ocirc ocirc ocir Ocy ocy odash Odblac odblac odiv odot odsold OElig oelig ofcir Ofr ofr ogon Ograve ograve ogt ohbar ohm oint olarr olcir olcross oline olt Omacr omacr Omega omega Omicron omicron omid ominus Oopf oopf opar OpenCurlyDoubleQuote OpenCurlyQuote operp oplus orarr Or or ord order orderof ordf ordm origof oror orslope orv oS Oscr oscr Oslash oslash osol Otilde otilde otimesas Otimes otimes Ouml ouml ovbar OverBar OverBrace OverBracket OverParenthesis para parallel par parsim parsl part PartialD Pcy pcy percnt period permil perp pertenk Pfr pfr Phi phi phiv phmmat phone Pi pi pitchfork piv planck planckh plankv plusacir plusb pluscir plus plusdo plusdu pluse PlusMinus plusmn plussim plustwo pm Poincareplane pointint popf Popf pound prap Pr pr prcue precapprox prec preccurlyeq Precedes PrecedesEqual PrecedesSlantEqual PrecedesTilde preceq precnapprox precneqq precnsim pre prE precsim prime Prime primes prnap prnE prnsim prod Product profalar profline profsurf prop Proportional Proportion propto prsim prurel Pscr pscr Psi psi puncsp Qfr qfr qint qopf Qopf qprime Qscr qscr quaternions quatint quest questeq quot QUOT rAarr race Racute racute radic raemptyv rang Rang rangd range rangle raquo rarrap rarrb rarrbfs rarrc rarr Rarr rArr rarrfs rarrhk rarrlp rarrpl rarrsim Rarrtl rarrtl rarrw ratail rAtail ratio rationals rbarr rBarr RBarr rbbrk rbrace rbrack rbrke rbrksld rbrkslu Rcaron rcaron Rcedil rcedil rceil rcub Rcy rcy rdca rdldhar rdquo rdquor rdsh real realine realpart reals Re rect reg REG ReverseElement ReverseEquilibrium ReverseUpEquilibrium rfisht rfloor rfr Rfr rHar rhard rharu rharul Rho rho rhov RightAngleBracket RightArrowBar rightarrow RightArrow Rightarrow RightArrowLeftArrow rightarrowtail RightCeiling RightDoubleBracket RightDownTeeVector RightDownVectorBar RightDownVector RightFloor rightharpoondown rightharpoonup rightleftarrows rightleftharpoons rightrightarrows rightsquigarrow RightTeeArrow RightTee RightTeeVector rightthreetimes RightTriangleBar RightTriangle RightTriangleEqual RightUpDownVector RightUpTeeVector RightUpVectorBar RightUpVector RightVectorBar RightVector ring risingdotseq rlarr rlhar rlm rmoustache rmoust rnmid roang roarr robrk ropar ropf Ropf roplus rotimes RoundImplies rpar rpargt rppolint rrarr Rrightarrow rsaquo rscr Rscr rsh Rsh rsqb rsquo rsquor rthree rtimes rtri rtrie rtrif rtriltri RuleDelayed ruluhar rx Sacute sacute sbquo scap Scaron scaron Sc sc sccue sce scE Scedil scedil Scirc scirc scnap scnE scnsim scpolint scsim Scy scy sdotb sdot sdote searhk searr seArr searrow sect semi seswar setminus setmn sext Sfr sfr sfrown sharp SHCHcy shchcy SHcy shcy ShortDownArrow ShortLeftArrow shortmid shortparallel ShortRightArrow ShortUpArrow shy Sigma sigma sigmaf sigmav sim simdot sime simeq simg simgE siml simlE simne simplus simrarr slarr SmallCircle smallsetminus smashp smeparsl smid smile smt smte smtes SOFTcy softcy solbar solb sol Sopf sopf spades spadesuit spar sqcap sqcaps sqcup sqcups Sqrt sqsub sqsube sqsubset sqsubseteq sqsup sqsupe sqsupset sqsupseteq square Square SquareIntersection SquareSubset SquareSubsetEqual SquareSuperset SquareSupersetEqual SquareUnion squarf squ squf srarr Sscr sscr ssetmn ssmile sstarf Star star starf straightepsilon straightphi strns sub Sub subdot subE sube subedot submult subnE subne subplus subrarr subset Subset subseteq subseteqq SubsetEqual subsetneq subsetneqq subsim subsub subsup succapprox succ succcurlyeq Succeeds SucceedsEqual SucceedsSlantEqual SucceedsTilde succeq succnapprox succneqq succnsim succsim SuchThat sum Sum sung sup1 sup2 sup3 sup Sup supdot supdsub supE supe supedot Superset SupersetEqual suphsol suphsub suplarr supmult supnE supne supplus supset Supset supseteq supseteqq supsetneq supsetneqq supsim supsub supsup swarhk swarr swArr swarrow swnwar szlig Tab target Tau tau tbrk Tcaron tcaron Tcedil tcedil Tcy tcy tdot telrec Tfr tfr there4 therefore Therefore Theta theta thetasym thetav thickapprox thicksim ThickSpace ThinSpace thinsp thkap thksim THORN thorn tilde Tilde TildeEqual TildeFullEqual TildeTilde timesbar timesb times timesd tint toea topbot topcir top Topf topf topfork tosa tprime trade TRADE triangle triangledown triangleleft trianglelefteq triangleq triangleright trianglerighteq tridot trie triminus TripleDot triplus trisb tritime trpezium Tscr tscr TScy tscy TSHcy tshcy Tstrok tstrok twixt twoheadleftarrow twoheadrightarrow Uacute uacute uarr Uarr uArr Uarrocir Ubrcy ubrcy Ubreve ubreve Ucirc ucirc Ucy ucy udarr Udblac udblac udhar ufisht Ufr ufr Ugrave ugrave uHar uharl uharr uhblk ulcorn ulcorner ulcrop ultri Umacr umacr uml UnderBar UnderBrace UnderBracket UnderParenthesis Union UnionPlus Uogon uogon Uopf uopf UpArrowBar uparrow UpArrow Uparrow UpArrowDownArrow updownarrow UpDownArrow Updownarrow UpEquilibrium upharpoonleft upharpoonright uplus UpperLeftArrow UpperRightArrow upsi Upsi upsih Upsilon upsilon UpTeeArrow UpTee upuparrows urcorn urcorner urcrop Uring uring urtri Uscr uscr utdot Utilde utilde utri utrif uuarr Uuml uuml uwangle vangrt varepsilon varkappa varnothing varphi varpi varpropto varr vArr varrho varsigma varsubsetneq varsubsetneqq varsupsetneq varsupsetneqq vartheta vartriangleleft vartriangleright vBar Vbar vBarv Vcy vcy vdash vDash Vdash VDash Vdashl veebar vee Vee veeeq vellip verbar Verbar vert Vert VerticalBar VerticalLine VerticalSeparator VerticalTilde VeryThinSpace Vfr vfr vltri vnsub vnsup Vopf vopf vprop vrtri Vscr vscr vsubnE vsubne vsupnE vsupne Vvdash vzigzag Wcirc wcirc wedbar wedge Wedge wedgeq weierp Wfr wfr Wopf wopf wp wr wreath Wscr wscr xcap xcirc xcup xdtri Xfr xfr xharr xhArr Xi xi xlarr xlArr xmap xnis xodot Xopf xopf xoplus xotime xrarr xrArr Xscr xscr xsqcup xuplus xutri xvee xwedge Yacute yacute YAcy yacy Ycirc ycirc Ycy ycy yen Yfr yfr YIcy yicy Yopf yopf Yscr yscr YUcy yucy yuml Yuml Zacute zacute Zcaron zcaron Zcy zcy Zdot zdot zeetrf ZeroWidthSpace Zeta zeta zfr Zfr ZHcy zhcy zigrarr zopf Zopf Zscr zscr zwj zwnj'