UNPKG

html-metadata

Version:

Scrapes metadata of several different standards

170 lines (142 loc) 8.2 kB
<html lang="en" dir="ltr"> <!-- Turtle Article containing capitALised tags to test case sensitivity --> <head prefix="OG: http://OGp.me/ns# fb: http://OGp.me/ns/fb# Article: http://OGp.me/ns/Article#"> <meta charset="utf-8"> <Title>Turtles are AWESOME!!1 | Awesome Turtles Website</Title> <meta name="Author" content="Turtle Lvr"> <meta name="Robots" content="we welcome our robot overlords"/> <meta name="Description" content="Exposition on the awesomeness of turtles"/> <meta name="Keywords" content="turtles, are, awesome" /> <link rel="APPle-touch-icon" href="turtleapple.png" sizes="72x72"> <link rel="IcOn" href="turtle.png" sizes="18x18" type="image/png"> <link rel="APPle-touch-icon" href="turtleapple2.png" sizes=""> <link rel="IcOn" href="turtle2.png" sizes="" type="image/png"> <link rel="CanonicAL" href="http://example.com/turtles" /> <link rel="Publisher" href="https://mediawiki.org"/> <link rel="Author" href="http://examples.com/turtlelvr"/> <link rel="Shortlink" href="http://example.com/c" /> <!--Open Graph--> <meta property="OG:Locale" content="en_US" /> <meta property="OG:Type" content="Article" /> <meta property="OG:Title" content="Turtles are AWESOME!!1" /> <meta property="OG:Description" content="Exposition on the awesomeness of turtles" /> <meta property="OG:Url" content="http://example.com" /> <meta property="OG:Site_Name" content="Awesome Turtles Website" /> <!--Image subproperty tags with no root --> <meta property="OG:Image:Width" content="666" /> <!--Ignored--> <meta property="OG:Image:height" content="666" /> <!--Ignored--> <meta property="OG:Image" content="http://example.com/turtle.jpg" /> <meta property="OG:Image:Secure_Url" content="https://secure.example.com/turtle.jpg" /> <meta property="OG:Image:Type" content="Image/jpeg" /> <meta property="OG:Image:Width" content="400" /> <meta property="OG:Image:Width" content="666" /> <!--Ignored--> <meta property="OG:Image:height" content="300" /> <meta property="OG:Image" content="http://example.com/shell.jpg" /> <!--Interrupt Image tags with Audio tags--> <meta property="OG:Audio" content="http://example.com/sound.mp3" /> <meta property="OG:Audio:Secure_Url" content="https://secure.example.com/sound.mp3" /> <meta property="OG:Audio:Type" content="Audio/mpeg" /> <!--End interruption with Audio tags --> <meta property="OG:Image:Width" content="200" /> <meta property="OG:Image:Height" content="150" /> <!--Invalid subproperty--> <meta property="OG:Cat:Meow" content="purr" /><!--Ignored--> <!--Article vertical--> <meta property="Article:Tag" content="turtles" /> <meta property="Article:Tag" content="are" /> <meta property="Article:Tag" content="awesome" /> <meta property="Article:Section" content="Turtles are tough" /> <meta property="Article:Section" content="Turtles are flawless" /> <meta property="Article:Section" content="Turtles are cute" /> <meta property="Article:Published_time" content="2012-02-04T12:00:00+00:00" /> <meta property="Article:Modified_time" content="2015-01-14T19:14:27+00:00" /> <meta property="Article:Author" content="http://examples.com/turtlelvr" /> <meta property="Article:Publisher" content="http://mediawiki.org" /> <!--AL--> <meta property="AL:Ios:Url" content="turtle://"> <meta property="AL:Ios:App_Store_Id" content="000"> <meta property="AL:Android:Url" content="turtle://"> <meta property="AL:Android:Package" content="superturtleArticle.Androidapp"> <meta property="AL:Web:Url" content="http://example.com/"> <meta property="AL:Web:Should_Falback" content="true"> <!--Twitter--> <meta name="Twitter:Card" content="summary"> <meta name="Twitter:Site" content="@Turtlessssssssss"> <meta name="Twitter:Creator" content="@Turtlessssssssss"> <meta name="Twitter:Creator" content="@Turtlezzzzzzzzzz"> <meta name="Twitter:Url" content="http://www.example.com/turtles"> <meta name="Twitter:Title" content="Turtles are AWESOME!!1"> <meta name="Twitter:Description" content="Exposition on the awesomeness of turtles"> <meta name="Twitter:Image" content="http://example.com/turtles.jpg"> <meta name="Twitter:Image:Alt" content="It's a bunch of turtles!"> <meta name="Twitter:App:Url:Iphone" content="turtle://"> <meta name="Twitter:App:Id:Iphone" content="000"> <meta name="Twitter:App:Url:Googleplay" content="turtle://"> <meta name="Twitter:App:Id:Googleplay" content="superturtlearticle.androidapp"> <!--BE Press--> <meta name="Bepress_Citation_Series_Title" content="Turtles" > <meta name="Bepress_Citation_Author" content="Turtle Lvr" /> <meta name="Bepress_Citation_Author_Institution" content="Mediawiki" /> <meta name="Bepress_Citation_Title" content="Turtles are AWESOME!!1" > <meta name="Bepress_Citation_Date" content="2012" /> <meta name="Bepress_Citation_Pdf_Url" content="http://www.example.com/turtlelvr/pdf" /> <meta name="Bepress_Citation_Abstract_Html_Url" content="http://www.example.com/turtlelvr" /> <meta name="Bepress_Citation_Publisher" content="Turtles Society" /> <meta name="Bepress_Citation_Online_Date" content="2012/02/04" /> <!--Dublin Core--> <meta name="DC.Title" content="Turtles are AWESOME!!1" > <meta name="DC.Creator" content="http://www.example.com/turtlelvr" > <meta name="DC.Description" content="Exposition on the awesomeness of turtles" > <meta name="DC.Date" content="2012-02-04 12:00:00" > <meta name="DC.Type" content="Text.Article" > <!--EPrints--> <meta name="Eprints.Title" content="Turtles are AWESOME!!1" > <meta name="Eprints.Creators_Name" content="http://www.example.com/turtlelvr" > <meta name="Eprints.Abstract" content="Exposition on the awesomeness of turtles" > <meta name="Eprints.Datestamp" content="2012-02-04 12:00:00" > <meta name="Eprints.Type" content="Article" > <!--Highwire Press--> <meta name="Citation_Journal_Title" content="Turtles" > <meta name="Citation_Issn" content="1234-5678" > <meta name="Citation_Doi" content="10.1000/123" > <meta name="Citation_Publication_Date" content="2012-02-04" > <meta name="Citation_Title" content="Turtles are AWESOME!!1" > <meta name="Citation_Author" content="Turtle Lvr" /> <meta name="Citation_Author_Institution" content="Mediawiki" /> <meta name="Citation_Volume" content="150" /> <meta name="Citation_Issue" content="1" /> <meta name="Citation_Firstpage" content="123" /> <meta name="Citation_Lastpage" content="456" /> <meta name="Citation_Publisher" content="Turtles Society" /> <meta name="Citation_Abstract" content="Exposition on the awesomeness of turtles." /> <!--PRISM--> <meta name="Prism.Issn" content="1234-5678" > <meta name="Prism.PublicationName" content="Turtles Society" > <meta name="Prism.PublicationDate" content="2012-02-04" > <meta name="Prism.StartingPage" content="123" > <meta name="Prism.Copyright" content="2012 Turtles Society" > <meta name="Prism.RightsAgent" content="permissions@turtles.com" > <meta name="Prism.Url" content="https://www.turtles.com" > <meta name="Prism.Doi" content="10.1000/123" > </head> <body> <!--COINS--> <span class="Z3988" Title="ctx_ver=Z39.88-2004&amp;RFT_id=info%3Adoi%2Fhttp%3A%2F%2Fdx.doi.org%2F10.5555%2F12345678&amp;rfr_id=info%3Asid%2Fcrossref.org%3Asearch&amp;RFT.aTitle=Toward+a+Unified+Theory+of+High-Energy+Metaphysics%3A+Silly+String+Theory&amp;RFT.jTitle=Journal+of+Psychoceramics&amp;RFT.Date=2008&amp;RFT.Volume=5&amp;RFT.issue=11&amp;RFT.Spage=1&amp;RFT.Epage=3&amp;RFT.Aufirst=Josiah&amp;RFT.Aulast=Carberry&amp;RFT_vAL_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;RFT.Genre=Article&amp;RFT.Au=Josiah+Carberry"></span> <!--Schema.org Microdata: Case sensitive--> <div itemscope> <h1 itemprop="headline">Turtles are AWESOME!!1</h1> <!--Uses text from inside tags--> <span itemprop="wordCount" content="10" /> <!--Self-closing tag--> <span itemprop="author" content="Turtle Lvr">Turtle H8r</span> <!--Prefers content attr to text inside tags--> <a href="http://www.archive.org/turtlearticle" itemprop="archivedAt">Turtle Article Archive</a> <!--Uses href and not text between tags--> </div> <!-- Since keys may be case-sensitive in JSON-LD, take the keys as-is --> <script type="application/ld+json">{ "@context": "http://schema.org", "@type": "Organization", "url": "https://www.turtles.com" } </script> </body> </html>