html-metadata
Version:
Scrapes metadata of several different standards
181 lines (151 loc) • 8.34 kB
HTML
<html lang="en" dir="ltr">
<head prefix="og: http://ogp.me/ns# fb: http://ogp.me/ns/fb# article: http://ogp.me/ns/article#">
<meta charset="utf-8">
<title>Turtles are AWESOME!!1 | Awesome Turtles Website</title>
<meta name="author" content="Turtle Lvr">
<meta name="robots" content="we welcome our robot overlords"/>
<meta name="description" content="Exposition on the awesomeness of turtles"/>
<meta name="keywords" content="turtles, are, awesome" />
<link rel="apple-touch-icon" href="turtleapple.png" sizes="72x72">
<link rel="icon" href="turtle.png" sizes="18x18" type="image/png">
<link rel="apple-touch-icon" href="turtleapple2.png" sizes="">
<link rel="icon" href="turtle2.png" sizes="" type="image/png">
<link rel="canonical" href="http://example.com/turtles" />
<link rel="publisher" href="https://mediawiki.org"/>
<link rel="author" href="http://examples.com/turtlelvr"/>
<link rel="shortlink" href="http://example.com/c" />
<!--Open Graph-->
<meta property="og:locale" content="en_US" />
<meta property="og:type" content="article" />
<meta property="og:title" content="Turtles are AWESOME!!1" />
<meta property="og:description" content="Exposition on the awesomeness of turtles" />
<meta property="og:url" content="http://example.com" />
<meta property="og:site_name" content="Awesome Turtles Website" />
<!--Image subproperty tags with no root -->
<meta property="og:image:width" content="666" /> <!--Ignored-->
<meta property="og:image:height" content="666" /> <!--Ignored-->
<meta property="og:image" content="http://example.com/turtle.jpg" />
<meta property="og:image:secure_url" content="https://secure.example.com/turtle.jpg" />
<meta property="og:image:type" content="image/jpeg" />
<meta property="og:image:width" content="400" />
<meta property="og:image:width" content="666" /> <!--Ignored-->
<meta property="og:image:height" content="300" />
<meta property="og:image" content="http://example.com/shell.jpg" />
<!--Interrupt image tags with audio tags-->
<meta property="og:audio" content="http://example.com/sound.mp3" />
<meta property="og:audio:secure_url" content="https://secure.example.com/sound.mp3" />
<meta property="og:audio:type" content="audio/mpeg" />
<!--End interruption with audio tags -->
<meta property="og:image:width" content="200" />
<meta property="og:image:height" content="150" />
<!--Invalid subproperty-->
<meta property="og:cat:meow" content="purr" /><!--Ignored-->
<!--Article vertical-->
<meta property="article:tag" content="turtles" />
<meta property="article:tag" content="are" />
<meta property="article:tag" content="awesome" />
<meta property="article:section" content="Turtles are tough" />
<meta property="article:section" content="Turtles are flawless" />
<meta property="article:section" content="Turtles are cute" />
<meta property="article:published_time" content="2012-02-04T12:00:00+00:00" />
<meta property="article:modified_time" content="2015-01-14T19:14:27+00:00" />
<meta property="article:author" content="http://examples.com/turtlelvr" />
<meta property="article:publisher" content="http://mediawiki.org" />
<!--AL-->
<meta property="al:ios:url" content="turtle://">
<meta property="al:ios:app_store_id" content="000">
<meta property="al:android:url" content="turtle://">
<meta property="al:android:package" content="superturtlearticle.androidapp">
<meta property="al:web:url" content="http://example.com/">
<meta property="al:web:should_fallback" content="true">
<!--Twitter-->
<meta name="twitter:card" content="summary">
<meta name="twitter:site" content="@Turtlessssssssss">
<meta name="twitter:creator" content="@Turtlessssssssss">
<meta name="twitter:creator" content="@Turtlezzzzzzzzzz">
<meta name="twitter:url" content="http://www.example.com/turtles">
<meta name="twitter:title" content="Turtles are AWESOME!!1">
<meta name="twitter:description" content="Exposition on the awesomeness of turtles">
<meta name="twitter:image" content="http://example.com/turtles.jpg">
<meta name="twitter:image:alt" content="It's a bunch of turtles!">
<meta name="twitter:app:url:iphone" content="turtle://">
<meta name="twitter:app:id:iphone" content="000">
<meta name="twitter:app:url:googleplay" content="turtle://">
<meta name="twitter:app:id:googleplay" content="superturtlearticle.androidapp">
<!--BE Press-->
<meta name="bepress_citation_series_title" content="Turtles" >
<meta name="bepress_citation_author" content="Turtle Lvr" />
<meta name="bepress_citation_author_institution" content="Mediawiki" />
<meta name="bepress_citation_title" content="Turtles are AWESOME!!1" >
<meta name="bepress_citation_date" content="2012" />
<meta name="bepress_citation_pdf_url" content="http://www.example.com/turtlelvr/pdf" />
<meta name="bepress_citation_abstract_html_url" content="http://www.example.com/turtlelvr" />
<meta name="bepress_citation_publisher" content="Turtles Society" />
<meta name="bepress_citation_online_date" content="2012/02/04" />
<!--Dublin Core-->
<meta name="dc.title" content="Turtles are AWESOME!!1" >
<meta name="dc.creator" content="http://www.example.com/turtlelvr" >
<meta name="dc.description" content="Exposition on the awesomeness of turtles" >
<meta name="dc.date" content="2012-02-04 12:00:00" >
<meta name="dc.type" content="Text.Article" >
<!--EPrints-->
<meta name="eprints.title" content="Turtles are AWESOME!!1" >
<meta name="eprints.creators_name" content="http://www.example.com/turtlelvr" >
<meta name="eprints.abstract" content="Exposition on the awesomeness of turtles" >
<meta name="eprints.datestamp" content="2012-02-04 12:00:00" >
<meta name="eprints.type" content="article" >
<!--Highwire Press-->
<meta name="citation_journal_title" content="Turtles" >
<meta name="citation_issn" content="1234-5678" >
<meta name="citation_doi" content="10.1000/123" >
<meta name="citation_publication_date" content="2012-02-04" >
<meta name="citation_title" content="Turtles are AWESOME!!1" >
<meta name="citation_author" content="Turtle Lvr" />
<meta name="citation_author_institution" content="Mediawiki" />
<meta name="citation_volume" content="150" />
<meta name="citation_issue" content="1" />
<meta name="citation_firstpage" content="123" />
<meta name="citation_lastpage" content="456" />
<meta name="citation_publisher" content="Turtles Society" />
<meta name="citation_abstract" content="Exposition on the awesomeness of turtles." />
<!--PRISM-->
<meta name="prism.issn" content="1234-5678" >
<meta name="prism.publicationName" content="Turtles Society" >
<meta name="prism.publicationDate" content="2012-02-04" >
<meta name="prism.startingPage" content="123" >
<meta name="prism.copyright" content="2012 Turtles Society" >
<meta name="prism.rightsAgent" content="permissions@turtles.com" >
<meta name="prism.url" content="https://www.turtles.com" >
<meta name="prism.doi" content="10.1000/123" >
</head>
<body>
<!--COINS-->
<span class="Z3988" title="ctx_ver=Z39.88-2004&rft_id=info%3Adoi%2Fhttp%3A%2F%2Fdx.doi.org%2F10.5555%2F12345678&rfr_id=info%3Asid%2Fcrossref.org%3Asearch&rft.atitle=Toward+a+Unified+Theory+of+High-Energy+Metaphysics%3A+Silly+String+Theory&rft.jtitle=Journal+of+Psychoceramics&rft.date=2008&rft.volume=5&rft.issue=11&rft.spage=1&rft.epage=3&rft.aufirst=Josiah&rft.aulast=Carberry&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=article&rft.au=Josiah+Carberry"></span>
<!--Schema.org Microdata-->
<div itemscope>
<h1 itemprop="headline">Turtles are AWESOME!!1</h1> <!--Uses text from inside tags-->
<span itemprop="wordCount" content="10" /> <!--Self-closing tag-->
<span itemprop="author" content="Turtle Lvr">Turtle H8r</span> <!--Prefers content attr to text inside tags-->
<a href="http://www.archive.org/turtlearticle" itemprop="archivedAt">Turtle Article Archive</a> <!--Uses href and not text between tags-->
</div>
<!--Valid JSON-LD-->
<script type="application/ld+json">{
"@context": "http://schema.org",
"@type": "Organization",
"url": "https://www.turtles.com"
}
</script>
<!-- Invalid JSON-LD: ignored -->
<script type="application/ld+json">
{
"@id": "https://www.turtles.com/"
"potentialAction" / {
"@type": "ViewAction",
"target": "android-app://com.turtles/"
},
"@type": "WebPage",
"@context": "http://schema.org"
}
</script>
</body>
</html>