html-metadata
Version:
Scrapes metadata of several different standards
170 lines (142 loc) • 8.2 kB
HTML
<html lang="en" dir="ltr">
<!--
Turtle Article containing capitALised tags to test case sensitivity
-->
<head prefix="OG: http://OGp.me/ns# fb: http://OGp.me/ns/fb# Article: http://OGp.me/ns/Article#">
<meta charset="utf-8">
<Title>Turtles are AWESOME!!1 | Awesome Turtles Website</Title>
<meta name="Author" content="Turtle Lvr">
<meta name="Robots" content="we welcome our robot overlords"/>
<meta name="Description" content="Exposition on the awesomeness of turtles"/>
<meta name="Keywords" content="turtles, are, awesome" />
<link rel="APPle-touch-icon" href="turtleapple.png" sizes="72x72">
<link rel="IcOn" href="turtle.png" sizes="18x18" type="image/png">
<link rel="APPle-touch-icon" href="turtleapple2.png" sizes="">
<link rel="IcOn" href="turtle2.png" sizes="" type="image/png">
<link rel="CanonicAL" href="http://example.com/turtles" />
<link rel="Publisher" href="https://mediawiki.org"/>
<link rel="Author" href="http://examples.com/turtlelvr"/>
<link rel="Shortlink" href="http://example.com/c" />
<!--Open Graph-->
<meta property="OG:Locale" content="en_US" />
<meta property="OG:Type" content="Article" />
<meta property="OG:Title" content="Turtles are AWESOME!!1" />
<meta property="OG:Description" content="Exposition on the awesomeness of turtles" />
<meta property="OG:Url" content="http://example.com" />
<meta property="OG:Site_Name" content="Awesome Turtles Website" />
<!--Image subproperty tags with no root -->
<meta property="OG:Image:Width" content="666" /> <!--Ignored-->
<meta property="OG:Image:height" content="666" /> <!--Ignored-->
<meta property="OG:Image" content="http://example.com/turtle.jpg" />
<meta property="OG:Image:Secure_Url" content="https://secure.example.com/turtle.jpg" />
<meta property="OG:Image:Type" content="Image/jpeg" />
<meta property="OG:Image:Width" content="400" />
<meta property="OG:Image:Width" content="666" /> <!--Ignored-->
<meta property="OG:Image:height" content="300" />
<meta property="OG:Image" content="http://example.com/shell.jpg" />
<!--Interrupt Image tags with Audio tags-->
<meta property="OG:Audio" content="http://example.com/sound.mp3" />
<meta property="OG:Audio:Secure_Url" content="https://secure.example.com/sound.mp3" />
<meta property="OG:Audio:Type" content="Audio/mpeg" />
<!--End interruption with Audio tags -->
<meta property="OG:Image:Width" content="200" />
<meta property="OG:Image:Height" content="150" />
<!--Invalid subproperty-->
<meta property="OG:Cat:Meow" content="purr" /><!--Ignored-->
<!--Article vertical-->
<meta property="Article:Tag" content="turtles" />
<meta property="Article:Tag" content="are" />
<meta property="Article:Tag" content="awesome" />
<meta property="Article:Section" content="Turtles are tough" />
<meta property="Article:Section" content="Turtles are flawless" />
<meta property="Article:Section" content="Turtles are cute" />
<meta property="Article:Published_time" content="2012-02-04T12:00:00+00:00" />
<meta property="Article:Modified_time" content="2015-01-14T19:14:27+00:00" />
<meta property="Article:Author" content="http://examples.com/turtlelvr" />
<meta property="Article:Publisher" content="http://mediawiki.org" />
<!--AL-->
<meta property="AL:Ios:Url" content="turtle://">
<meta property="AL:Ios:App_Store_Id" content="000">
<meta property="AL:Android:Url" content="turtle://">
<meta property="AL:Android:Package" content="superturtleArticle.Androidapp">
<meta property="AL:Web:Url" content="http://example.com/">
<meta property="AL:Web:Should_Falback" content="true">
<!--Twitter-->
<meta name="Twitter:Card" content="summary">
<meta name="Twitter:Site" content="@Turtlessssssssss">
<meta name="Twitter:Creator" content="@Turtlessssssssss">
<meta name="Twitter:Creator" content="@Turtlezzzzzzzzzz">
<meta name="Twitter:Url" content="http://www.example.com/turtles">
<meta name="Twitter:Title" content="Turtles are AWESOME!!1">
<meta name="Twitter:Description" content="Exposition on the awesomeness of turtles">
<meta name="Twitter:Image" content="http://example.com/turtles.jpg">
<meta name="Twitter:Image:Alt" content="It's a bunch of turtles!">
<meta name="Twitter:App:Url:Iphone" content="turtle://">
<meta name="Twitter:App:Id:Iphone" content="000">
<meta name="Twitter:App:Url:Googleplay" content="turtle://">
<meta name="Twitter:App:Id:Googleplay" content="superturtlearticle.androidapp">
<!--BE Press-->
<meta name="Bepress_Citation_Series_Title" content="Turtles" >
<meta name="Bepress_Citation_Author" content="Turtle Lvr" />
<meta name="Bepress_Citation_Author_Institution" content="Mediawiki" />
<meta name="Bepress_Citation_Title" content="Turtles are AWESOME!!1" >
<meta name="Bepress_Citation_Date" content="2012" />
<meta name="Bepress_Citation_Pdf_Url" content="http://www.example.com/turtlelvr/pdf" />
<meta name="Bepress_Citation_Abstract_Html_Url" content="http://www.example.com/turtlelvr" />
<meta name="Bepress_Citation_Publisher" content="Turtles Society" />
<meta name="Bepress_Citation_Online_Date" content="2012/02/04" />
<!--Dublin Core-->
<meta name="DC.Title" content="Turtles are AWESOME!!1" >
<meta name="DC.Creator" content="http://www.example.com/turtlelvr" >
<meta name="DC.Description" content="Exposition on the awesomeness of turtles" >
<meta name="DC.Date" content="2012-02-04 12:00:00" >
<meta name="DC.Type" content="Text.Article" >
<!--EPrints-->
<meta name="Eprints.Title" content="Turtles are AWESOME!!1" >
<meta name="Eprints.Creators_Name" content="http://www.example.com/turtlelvr" >
<meta name="Eprints.Abstract" content="Exposition on the awesomeness of turtles" >
<meta name="Eprints.Datestamp" content="2012-02-04 12:00:00" >
<meta name="Eprints.Type" content="Article" >
<!--Highwire Press-->
<meta name="Citation_Journal_Title" content="Turtles" >
<meta name="Citation_Issn" content="1234-5678" >
<meta name="Citation_Doi" content="10.1000/123" >
<meta name="Citation_Publication_Date" content="2012-02-04" >
<meta name="Citation_Title" content="Turtles are AWESOME!!1" >
<meta name="Citation_Author" content="Turtle Lvr" />
<meta name="Citation_Author_Institution" content="Mediawiki" />
<meta name="Citation_Volume" content="150" />
<meta name="Citation_Issue" content="1" />
<meta name="Citation_Firstpage" content="123" />
<meta name="Citation_Lastpage" content="456" />
<meta name="Citation_Publisher" content="Turtles Society" />
<meta name="Citation_Abstract" content="Exposition on the awesomeness of turtles." />
<!--PRISM-->
<meta name="Prism.Issn" content="1234-5678" >
<meta name="Prism.PublicationName" content="Turtles Society" >
<meta name="Prism.PublicationDate" content="2012-02-04" >
<meta name="Prism.StartingPage" content="123" >
<meta name="Prism.Copyright" content="2012 Turtles Society" >
<meta name="Prism.RightsAgent" content="permissions@turtles.com" >
<meta name="Prism.Url" content="https://www.turtles.com" >
<meta name="Prism.Doi" content="10.1000/123" >
</head>
<body>
<!--COINS-->
<span class="Z3988" Title="ctx_ver=Z39.88-2004&RFT_id=info%3Adoi%2Fhttp%3A%2F%2Fdx.doi.org%2F10.5555%2F12345678&rfr_id=info%3Asid%2Fcrossref.org%3Asearch&RFT.aTitle=Toward+a+Unified+Theory+of+High-Energy+Metaphysics%3A+Silly+String+Theory&RFT.jTitle=Journal+of+Psychoceramics&RFT.Date=2008&RFT.Volume=5&RFT.issue=11&RFT.Spage=1&RFT.Epage=3&RFT.Aufirst=Josiah&RFT.Aulast=Carberry&RFT_vAL_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&RFT.Genre=Article&RFT.Au=Josiah+Carberry"></span>
<!--Schema.org Microdata: Case sensitive-->
<div itemscope>
<h1 itemprop="headline">Turtles are AWESOME!!1</h1> <!--Uses text from inside tags-->
<span itemprop="wordCount" content="10" /> <!--Self-closing tag-->
<span itemprop="author" content="Turtle Lvr">Turtle H8r</span> <!--Prefers content attr to text inside tags-->
<a href="http://www.archive.org/turtlearticle" itemprop="archivedAt">Turtle Article Archive</a> <!--Uses href and not text between tags-->
</div>
<!-- Since keys may be case-sensitive in JSON-LD, take the keys as-is -->
<script type="application/ld+json">{
"@context": "http://schema.org",
"@type": "Organization",
"url": "https://www.turtles.com"
}
</script>
</body>
</html>