tiny-html-lexer
Version:
A tiny HTML5 lexer
158 lines (149 loc) • 6.56 kB
JavaScript
var samples =
[ 'charref: named & in data'
, 'charref: named non-terminated & in data'
, 'charref: named non-terminated &a in data'
, 'charref: hexadecimal ೌ in data'
, 'charref: hexadecimal non-terminated ೌ in data'
, 'charref: decimal ф in data'
, 'charref: decimal non-terminated n in data'
, 'charref: special <input value=asda¬(></input>'
, 'charref: special <input value=asda¬-></input>'
, 'charref: special <input value=asda¬*=c></input>'
, 'charref: special <input value=asda¬=c></input>'
, 'charref: special <input value="asda¬it; I tell you"></input>'
, 'charref: non-special <input value=asda¬in*=c></input>'
, 'charref: non-special <input value=asda¬in=c></input>'
, 'charref: non-special <input value=asda∉=c></input>'
, 'charref: special ¬('
, 'charref: special ¬-'
, 'charref: special ¬*=c in data'
, 'charref: special ¬=c in data'
, 'charref: special ¬it; I tell you, in data'
, 'charref: special ∉ I tell you, in data'
, 'charref: non-special ¬in*=c in data'
, 'charref: non-special ¬in=c in data'
, 'charref: non-special ∉=c in data'
, 'charref: named <input value="you & me"/> in attribute'
, 'charref: named <input value=\'you & me\'/> in attribute'
, 'charref: named <input value=youme /> in attribute'
, 'charref: named <input value=&me /> in attribute'
, 'charref: named <input value=& attr=val /> in attribute'
, 'charref: named <input value=&o attr=val /> in attribute'
, 'charref: bogus <input value="you &# am me"/> in attribute'
, 'charref: bogus <input value=\'you &# amp me\'/> in attribute'
, 'charref: bogus <input value=you&x ampme /> in attribute'
, 'charref: ampHash &# such'
, ''
, 'rcdata <textarea> asdf & & <textareaNot </textarea> and more'
, 'rcdata2 <textarea> asdf & & </textarea( and not ending> it'
, 'rcdata3 <textarea> asdf & & </textarea/ and ending> it, see <span>yes</span>'
, 'rcdata5 <textarea/> asdf & & and NOT ending < it, see <span>yes</span>'
, 'rcdata4 <textarea> asdf & & </textarea and ending> it'
, 'rawtext <script> asdf & <span> </scriptNot </script> and more'
, 'rawtext2 <script> asdf & <span> </script( and> not ending it <span>'
, 'rawtext3 <script> asdf & <span> </script/ and> ending it <span> see'
, 'rawtext4 <script> asdf & <span> </script and ending it <span> see'
, 'script <!doctype html>hello <script><!-- asdf</script> thus'
, 'nonalpha tag This is not a <ém attr>tag</ém>'
, 'double open tag A double less than sign <<div attr>content</div>'
, 'bad end tag <div style=color:blue> This is blue </ div> And this too!'
, 'closePlaintext hi <plaintext>asd<as &ap, </plaintext> cannot be ended'
, ''
, 'comment: <!weird markup declaration> and such'
, 'comment: <!> and such'
, 'comment: <?> and such'
, 'comment: </> and such'
, 'comment: <!-> and such'
, 'comment: <?-> and such'
, 'comment: <!-> and such'
, 'comment: <!--> and such'
, 'comment: <?--> and such'
, 'comment: <!--> and such'
, 'comment: <!--!> and such'
, 'comment: <!--> and such'
, 'comment: <!-> and such'
, 'comment: <!---!> and such'
, 'comment: <!----!> and such'
, 'comment: <!-- with -> within --> and subsequent data'
, 'comment: <!-- with bogus end -> and subsequent data'
, 'comment: <!-- Comment with -- double dash within --> and subsequent data'
, 'comment: <!-- Comment with --!- weird stuff within --> and subsequent data'
, 'comment: <!-- Comment with strange end --!> and subsequent data'
, 'bogus comment: <! with end !@> and subsequent data'
, 'bogus comment: </ with end !@> and subsequent data'
, 'bogus comment: <? with end !@> and subsequent data'
, 'bogus comment: <!- with end -> and subsequent data'
, ''
, 'missing space attribues connected <div name="a"name="b" >'
, 'nonalpha attribute weird template tag <div {name="a" name="b" >'
, 'normalHtml This is <span class = "s1">html</span> Yeah!'
, 'unescaped ampersand data & such'
, 'unescaped ampersand Hash data &# such'
, 'unescaped ampersand HashEx data &#x such'
, 'unescaped ampersand HashExZed data &#xz such'
, ''
, 'slashes: <span/>'
, 'slashes: <span name=foo//>'
, 'slashes: <div//>'
, 'slashes: <div/foo/bar//>'
, 'slashes: <span//>'
, 'slashes: <span />'
, 'slashes: <span <>'
, 'slashes: <span //>'
, 'slashes: <span / />'
, 'slashes: <span/////>'
, 'slashes: <span/////name////=/blabla>'
, 'slashes: <span / attr >foo bar</span>'
, 'slashes: <span name=/ >asdf'
, 'slashes: <span name=/>asdf'
, 'slashes: <span name=// />asdf'
, 'slashes: <span name= / />asdf'
, 'weirdEquals <span attr = / asd >content</span>'
, 'weirdEquals2 <span attr = @ asd >content</span>'
, 'weirdEquals3 <span attr /= asd >content</span>'
, 'weirdEquals4 <span attr @= asd >content</span>'
, 'missingValue <span name=>asdf'
, 'invalidAttributeValue1 <div class= =at >'
, 'invalidAttributeValue2 <div class= <at >'
, 'invalidAttributeValue3 <div class= `at >'
]
var EOFSamples =
[ 'data state eof in da'
, 'tagOpen state eof in <'
, 'tagName state eof in <d'
, 'selfClosingStartTag state in <div /'
, 'endTagOpen state in </a'
, 'beforeAttributeName state <div '
, 'attributeName state <div at'
, 'afterAttributeName state <div attr '
, 'beforeAttributeValue state <div attr ='
, 'attributeValueDoubleQuoted state <div attr="te'
, 'attributeValueSingleQuoted state <div attr=\'te'
, 'attributeValueUnquoted state <div attr=te'
, 'afterAttributeValueQuoted state <div attr="test"'
, 'markupDeclarationOpen state a markup decl <!'
, 'selfClosingTag state An eof after a / <span /'
, 'commentStart state a comment start <!--'
, 'commentStartDash state a comment start dash <!---'
, 'comment state a comment <!-- hello th'
, 'commentEndDash state a comment end dash <!-- hello th -'
, 'commentEnd state a comment end <!-- hello th --'
, 'commentEndBang state a comment end bang <!-- hello th --!'
, 'bogusComment state <! bogus comment'
, 'charRefIn_ state data &'
, 'numericCharRef state data &#'
, 'hexadecimalCharRef state data &#x'
, 'hexDigits state data '
, 'decimalCharRef state data '
, 'namedCharRef state data &name'
, 'namedCharRefInAttr state <span attr="asd&a&b c">text</span>'
, 'namedCharRefInData state named charref in data asd&a&b cde'
, 'rawtext state eof in raw text <script> funct'
, 'plaintext state eof in raw text <plaintext> asdf'
, 'rawtextLessThanSign state eof in raw text less than sign <script> if (i<'
, 'rawtextEndTagOpen state eof in raw text end tag open <script> asdf </'
]
module.exports =
{ samples: samples
, EOFSamples: EOFSamples
}